Add source call audio playback to vocabulary suggestions

When the induction loop proposes a new vocabulary term, it now records which sampled call(s) most likely produced the suggestion. Admins see a collapsible "▶ source" player under each pending term showing the audio clip and transcript, so they can hear what was actually said before approving or dismissing. - vocabulary_learner: track sampled call docs, attach source_call_ids to each pending term via word-overlap search with fallback - types: VocabularyPendingTerm.source_call_ids?: string[] - c2api: add getCall(id) using existing GET /calls/{call_id} endpoint - VocabularyPanel: SourceCallPlayer component — lazy-loads call on first expand, shows audio controls + transcript snippet
2026-06-01 01:45:03 -04:00
parent 032eef311f
commit 913fe0cbee
4 changed files with 102 additions and 11 deletions
@@ -18,6 +18,7 @@ import asyncio
 import difflib
 import json
 import random
+import re
 from datetime import datetime, timezone, timedelta
 from typing import Optional
 from app.internal.logger import logger
@@ -297,6 +298,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
    random.shuffle(all_calls)
    char_budget = settings.vocabulary_induction_sample_tokens * 4
    transcript_block = ""
+    sampled_call_docs: list[dict] = []
    sampled = 0
    for call in all_calls:
        text = call.get("transcript_corrected") or call.get("transcript") or ""
@@ -306,6 +308,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
            break
        tg = call.get("talkgroup_name") or f"TGID {call.get('talkgroup_id', '?')}"
        transcript_block += f"[{tg}] {text}\n"
+        sampled_call_docs.append(call)
        sampled += 1

    if sampled < 3:
@@ -322,11 +325,16 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
    pending_lower = {p["term"].lower() for p in existing_pending}
    vocab_lower   = {t.lower() for t in existing_vocab}

-    to_queue = [
-        {"term": t, "source": "induction", "added_at": now}
-        for t in new_terms
-        if t.lower() not in vocab_lower and t.lower() not in pending_lower
-    ]
+    to_queue = []
+    for t in new_terms:
+        if t.lower() in vocab_lower or t.lower() in pending_lower:
+            continue
+        to_queue.append({
+            "term": t,
+            "source": "induction",
+            "added_at": now,
+            "source_call_ids": _find_source_calls(t, sampled_call_docs),
+        })
    if not to_queue:
        return

@@ -343,6 +351,30 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
 # Internal sync helpers
 # ─────────────────────────────────────────────────────────────────────────────

+def _find_source_calls(term: str, sampled_calls: list[dict], max_results: int = 3) -> list[str]:
+    """
+    Find which sampled calls most likely produced this induction suggestion.
+    Splits the proposed term into tokens and searches call transcripts for overlap.
+    Falls back to the first two sampled calls when no token match is found
+    (e.g. fully garbled terms like "why vac" → "YVAC" have no word overlap).
+    """
+    tokens = [t.lower() for t in re.split(r"[^a-zA-Z0-9]+", term) if len(t) >= 2]
+    matched: list[str] = []
+    if tokens:
+        for call in sampled_calls:
+            call_id = call.get("call_id")
+            if not call_id:
+                continue
+            text = (call.get("transcript_corrected") or call.get("transcript") or "").lower()
+            if any(tok in text for tok in tokens):
+                matched.append(call_id)
+                if len(matched) >= max_results:
+                    break
+    if not matched:
+        matched = [c["call_id"] for c in sampled_calls[:2] if c.get("call_id")]
+    return matched
+
+
 _STOP_WORDS = {
    "the", "and", "for", "are", "was", "were", "this", "that", "with",
    "have", "has", "had", "but", "not", "from", "they", "will", "what",
@@ -829,6 +829,54 @@ function AiFlagsPanel({ systemId, initial }: { systemId: string; initial: System
  );
 }

+// ── Source call audio player ──────────────────────────────────────────────────
+
+function SourceCallPlayer({ callId }: { callId: string }) {
+  const [call, setCall] = useState<{ audio_url?: string | null; transcript?: string | null; transcript_corrected?: string | null } | null>(null);
+  const [loading, setLoading] = useState(false);
+  const [open, setOpen] = useState(false);
+
+  async function toggle() {
+    if (!open && !call) {
+      setLoading(true);
+      try {
+        const c = await c2api.getCall(callId);
+        setCall(c as typeof call);
+      } finally {
+        setLoading(false);
+      }
+    }
+    setOpen((v) => !v);
+  }
+
+  const transcript = call?.transcript_corrected || call?.transcript;
+
+  return (
+    <div className="text-xs">
+      <button
+        onClick={toggle}
+        disabled={loading}
+        className="text-indigo-500 hover:text-indigo-400 transition-colors disabled:opacity-50"
+        title={callId}
+      >
+        {loading ? "loading…" : open ? "▲ source" : "▶ source"}
+      </button>
+      {open && call && (
+        <div className="mt-1.5 space-y-1 pl-2 border-l border-gray-700">
+          {call.audio_url ? (
+            <audio src={call.audio_url} controls className="w-full" style={{ height: "1.75rem" }} />
+          ) : (
+            <p className="text-gray-600 italic">No audio</p>
+          )}
+          {transcript && (
+            <p className="text-gray-500 italic line-clamp-2">{transcript}</p>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
+
 // ── Vocabulary panel ──────────────────────────────────────────────────────────

 function VocabularyPanel({ systemId }: { systemId: string }) {
@@ -979,13 +1027,22 @@ function VocabularyPanel({ systemId }: { systemId: string }) {
                  <p className="text-gray-500 uppercase tracking-wider mb-1.5">
                    Induction suggestions ({pending.length})
                  </p>
-                  <div className="space-y-1">
+                  <div className="space-y-2">
                    {pending.map((p) => (
-                      <div key={p.term} className="flex items-center gap-2">
-                        <span className="text-gray-300 flex-1">{p.term}</span>
-                        <span className="text-gray-600">{p.source}</span>
-                        <button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
-                        <button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
+                      <div key={p.term} className="space-y-1">
+                        <div className="flex items-center gap-2">
+                          <span className="text-gray-300 flex-1">{p.term}</span>
+                          <span className="text-gray-600">{p.source}</span>
+                          <button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
+                          <button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
+                        </div>
+                        {p.source_call_ids && p.source_call_ids.length > 0 && (
+                          <div className="pl-1 space-y-1">
+                            {p.source_call_ids.map((id) => (
+                              <SourceCallPlayer key={id} callId={id} />
+                            ))}
+                          </div>
+                        )}
                      </div>
                    ))}
                  </div>
@@ -56,6 +56,7 @@ export const c2api = {
    request(`/nodes/${id}`, { method: "DELETE" }),

  // Calls
+  getCall: (callId: string) => request<import("@/lib/types").CallRecord>(`/calls/${callId}`),
  getCalls: (params?: Record<string, string>) => {
    const qs = params ? "?" + new URLSearchParams(params).toString() : "";
    return request<unknown[]>(`/calls${qs}`);
@@ -19,6 +19,7 @@ export interface VocabularyPendingTerm {
  term: string;
  source: "induction" | "correction";
  added_at: string;
+  source_call_ids?: string[];
 }

 export interface SystemRecord {