Add source call audio playback to vocabulary suggestions

When the induction loop proposes a new vocabulary term, it now records which sampled call(s) most likely produced the suggestion. Admins see a collapsible "▶ source" player under each pending term showing the audio clip and transcript, so they can hear what was actually said before approving or dismissing. - vocabulary_learner: track sampled call docs, attach source_call_ids to each pending term via word-overlap search with fallback - types: VocabularyPendingTerm.source_call_ids?: string[] - c2api: add getCall(id) using existing GET /calls/{call_id} endpoint - VocabularyPanel: SourceCallPlayer component — lazy-loads call on first expand, shows audio controls + transcript snippet
2026-06-01 01:45:03 -04:00
parent 032eef311f
commit 913fe0cbee
4 changed files with 102 additions and 11 deletions
@@ -18,6 +18,7 @@ import asyncio
 import difflib
 import json
 import random
 import re
 from datetime import datetime, timezone, timedelta
 from typing import Optional
 from app.internal.logger import logger
@@ -297,6 +298,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
    random.shuffle(all_calls)
    char_budget = settings.vocabulary_induction_sample_tokens * 4
    transcript_block = ""
    sampled_call_docs: list[dict] = []
    sampled = 0
    for call in all_calls:
        text = call.get("transcript_corrected") or call.get("transcript") or ""
@@ -306,6 +308,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
            break
        tg = call.get("talkgroup_name") or f"TGID {call.get('talkgroup_id', '?')}"
        transcript_block += f"[{tg}] {text}\n"
        sampled_call_docs.append(call)
        sampled += 1
    if sampled < 3:
@@ -322,11 +325,16 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
    pending_lower = {p["term"].lower() for p in existing_pending}
    vocab_lower   = {t.lower() for t in existing_vocab}
-    to_queue = [
+    to_queue = []
-        {"term": t, "source": "induction", "added_at": now}
+    for t in new_terms:
-        for t in new_terms
+        if t.lower() in vocab_lower or t.lower() in pending_lower:
-        if t.lower() not in vocab_lower and t.lower() not in pending_lower
+            continue
-    ]
+        to_queue.append({
            "term": t,
            "source": "induction",
            "added_at": now,
            "source_call_ids": _find_source_calls(t, sampled_call_docs),
        })
    if not to_queue:
        return
@@ -343,6 +351,30 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
 # Internal sync helpers
 # ─────────────────────────────────────────────────────────────────────────────
 def _find_source_calls(term: str, sampled_calls: list[dict], max_results: int = 3) -> list[str]:
    """
    Find which sampled calls most likely produced this induction suggestion.
    Splits the proposed term into tokens and searches call transcripts for overlap.
    Falls back to the first two sampled calls when no token match is found
    (e.g. fully garbled terms like "why vac" → "YVAC" have no word overlap).
    """
    tokens = [t.lower() for t in re.split(r"[^a-zA-Z0-9]+", term) if len(t) >= 2]
    matched: list[str] = []
    if tokens:
        for call in sampled_calls:
            call_id = call.get("call_id")
            if not call_id:
                continue
            text = (call.get("transcript_corrected") or call.get("transcript") or "").lower()
            if any(tok in text for tok in tokens):
                matched.append(call_id)
                if len(matched) >= max_results:
                    break
    if not matched:
        matched = [c["call_id"] for c in sampled_calls[:2] if c.get("call_id")]
    return matched
 _STOP_WORDS = {
    "the", "and", "for", "are", "was", "were", "this", "that", "with",
    "have", "has", "had", "but", "not", "from", "they", "will", "what",
@@ -829,6 +829,54 @@ function AiFlagsPanel({ systemId, initial }: { systemId: string; initial: System
  );
 }
 // ── Source call audio player ──────────────────────────────────────────────────
 function SourceCallPlayer({ callId }: { callId: string }) {
  const [call, setCall] = useState<{ audio_url?: string | null; transcript?: string | null; transcript_corrected?: string | null } | null>(null);
  const [loading, setLoading] = useState(false);
  const [open, setOpen] = useState(false);
  async function toggle() {
    if (!open && !call) {
      setLoading(true);
      try {
        const c = await c2api.getCall(callId);
        setCall(c as typeof call);
      } finally {
        setLoading(false);
      }
    }
    setOpen((v) => !v);
  }
  const transcript = call?.transcript_corrected || call?.transcript;
  return (
    <div className="text-xs">
      <button
        onClick={toggle}
        disabled={loading}
        className="text-indigo-500 hover:text-indigo-400 transition-colors disabled:opacity-50"
        title={callId}
      >
        {loading ? "loading…" : open ? "▲ source" : "▶ source"}
      </button>
      {open && call && (
        <div className="mt-1.5 space-y-1 pl-2 border-l border-gray-700">
          {call.audio_url ? (
            <audio src={call.audio_url} controls className="w-full" style={{ height: "1.75rem" }} />
          ) : (
            <p className="text-gray-600 italic">No audio</p>
          )}
          {transcript && (
            <p className="text-gray-500 italic line-clamp-2">{transcript}</p>
          )}
        </div>
      )}
    </div>
  );
 }
 // ── Vocabulary panel ──────────────────────────────────────────────────────────
 function VocabularyPanel({ systemId }: { systemId: string }) {
@@ -979,13 +1027,22 @@ function VocabularyPanel({ systemId }: { systemId: string }) {
                  <p className="text-gray-500 uppercase tracking-wider mb-1.5">
                    Induction suggestions ({pending.length})
                  </p>
-                  <div className="space-y-1">
+                  <div className="space-y-2">
                    {pending.map((p) => (
-                      <div key={p.term} className="flex items-center gap-2">
+                      <div key={p.term} className="space-y-1">
-                        <span className="text-gray-300 flex-1">{p.term}</span>
+                        <div className="flex items-center gap-2">
-                        <span className="text-gray-600">{p.source}</span>
+                          <span className="text-gray-300 flex-1">{p.term}</span>
-                        <button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
+                          <span className="text-gray-600">{p.source}</span>
-                        <button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
+                          <button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
                          <button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
                        </div>
                        {p.source_call_ids && p.source_call_ids.length > 0 && (
                          <div className="pl-1 space-y-1">
                            {p.source_call_ids.map((id) => (
                              <SourceCallPlayer key={id} callId={id} />
                            ))}
                          </div>
                        )}
                      </div>
                    ))}
                  </div>
@@ -56,6 +56,7 @@ export const c2api = {
    request(`/nodes/${id}`, { method: "DELETE" }),
  // Calls
  getCall: (callId: string) => request<import("@/lib/types").CallRecord>(`/calls/${callId}`),
  getCalls: (params?: Record<string, string>) => {
    const qs = params ? "?" + new URLSearchParams(params).toString() : "";
    return request<unknown[]>(`/calls${qs}`);
@@ -19,6 +19,7 @@ export interface VocabularyPendingTerm {
  term: string;
  source: "induction" | "correction";
  added_at: string;
  source_call_ids?: string[];
 }
 export interface SystemRecord {