From 913fe0cbee9a2fedb5583dd9257f230e5fad46a1 Mon Sep 17 00:00:00 2001
From: Logan <Logan@simplestepsolutions.com>
Date: Mon, 1 Jun 2026 01:45:03 -0400
Subject: [PATCH] Add source call audio playback to vocabulary suggestions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the induction loop proposes a new vocabulary term, it now records
which sampled call(s) most likely produced the suggestion. Admins see
a collapsible "▶ source" player under each pending term showing the
audio clip and transcript, so they can hear what was actually said
before approving or dismissing.

- vocabulary_learner: track sampled call docs, attach source_call_ids
  to each pending term via word-overlap search with fallback
- types: VocabularyPendingTerm.source_call_ids?: string[]
- c2api: add getCall(id) using existing GET /calls/{call_id} endpoint
- VocabularyPanel: SourceCallPlayer component — lazy-loads call on
  first expand, shows audio controls + transcript snippet
---
 .../app/internal/vocabulary_learner.py        | 42 +++++++++--
 drb-frontend/app/systems/page.tsx             | 69 +++++++++++++++++--
 drb-frontend/lib/c2api.ts                     |  1 +
 drb-frontend/lib/types.ts                     |  1 +
 4 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/drb-c2-core/app/internal/vocabulary_learner.py b/drb-c2-core/app/internal/vocabulary_learner.py
index d90df7b..3b6e84d 100644
--- a/drb-c2-core/app/internal/vocabulary_learner.py
+++ b/drb-c2-core/app/internal/vocabulary_learner.py
@@ -18,6 +18,7 @@ import asyncio
 import difflib
 import json
 import random
+import re
 from datetime import datetime, timezone, timedelta
 from typing import Optional
 from app.internal.logger import logger
@@ -297,6 +298,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
     random.shuffle(all_calls)
     char_budget = settings.vocabulary_induction_sample_tokens * 4
     transcript_block = ""
+    sampled_call_docs: list[dict] = []
     sampled = 0
     for call in all_calls:
         text = call.get("transcript_corrected") or call.get("transcript") or ""
@@ -306,6 +308,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
             break
         tg = call.get("talkgroup_name") or f"TGID {call.get('talkgroup_id', '?')}"
         transcript_block += f"[{tg}] {text}\n"
+        sampled_call_docs.append(call)
         sampled += 1
 
     if sampled < 3:
@@ -322,11 +325,16 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
     pending_lower = {p["term"].lower() for p in existing_pending}
     vocab_lower   = {t.lower() for t in existing_vocab}
 
-    to_queue = [
-        {"term": t, "source": "induction", "added_at": now}
-        for t in new_terms
-        if t.lower() not in vocab_lower and t.lower() not in pending_lower
-    ]
+    to_queue = []
+    for t in new_terms:
+        if t.lower() in vocab_lower or t.lower() in pending_lower:
+            continue
+        to_queue.append({
+            "term": t,
+            "source": "induction",
+            "added_at": now,
+            "source_call_ids": _find_source_calls(t, sampled_call_docs),
+        })
     if not to_queue:
         return
 
@@ -343,6 +351,30 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
 # Internal sync helpers
 # ─────────────────────────────────────────────────────────────────────────────
 
+def _find_source_calls(term: str, sampled_calls: list[dict], max_results: int = 3) -> list[str]:
+    """
+    Find which sampled calls most likely produced this induction suggestion.
+    Splits the proposed term into tokens and searches call transcripts for overlap.
+    Falls back to the first two sampled calls when no token match is found
+    (e.g. fully garbled terms like "why vac" → "YVAC" have no word overlap).
+    """
+    tokens = [t.lower() for t in re.split(r"[^a-zA-Z0-9]+", term) if len(t) >= 2]
+    matched: list[str] = []
+    if tokens:
+        for call in sampled_calls:
+            call_id = call.get("call_id")
+            if not call_id:
+                continue
+            text = (call.get("transcript_corrected") or call.get("transcript") or "").lower()
+            if any(tok in text for tok in tokens):
+                matched.append(call_id)
+                if len(matched) >= max_results:
+                    break
+    if not matched:
+        matched = [c["call_id"] for c in sampled_calls[:2] if c.get("call_id")]
+    return matched
+
+
 _STOP_WORDS = {
     "the", "and", "for", "are", "was", "were", "this", "that", "with",
     "have", "has", "had", "but", "not", "from", "they", "will", "what",
diff --git a/drb-frontend/app/systems/page.tsx b/drb-frontend/app/systems/page.tsx
index ba4b0e8..427ce2b 100644
--- a/drb-frontend/app/systems/page.tsx
+++ b/drb-frontend/app/systems/page.tsx
@@ -829,6 +829,54 @@ function AiFlagsPanel({ systemId, initial }: { systemId: string; initial: System
   );
 }
 
+// ── Source call audio player ──────────────────────────────────────────────────
+
+function SourceCallPlayer({ callId }: { callId: string }) {
+  const [call, setCall] = useState<{ audio_url?: string | null; transcript?: string | null; transcript_corrected?: string | null } | null>(null);
+  const [loading, setLoading] = useState(false);
+  const [open, setOpen] = useState(false);
+
+  async function toggle() {
+    if (!open && !call) {
+      setLoading(true);
+      try {
+        const c = await c2api.getCall(callId);
+        setCall(c as typeof call);
+      } finally {
+        setLoading(false);
+      }
+    }
+    setOpen((v) => !v);
+  }
+
+  const transcript = call?.transcript_corrected || call?.transcript;
+
+  return (
+    <div className="text-xs">
+      <button
+        onClick={toggle}
+        disabled={loading}
+        className="text-indigo-500 hover:text-indigo-400 transition-colors disabled:opacity-50"
+        title={callId}
+      >
+        {loading ? "loading…" : open ? "▲ source" : "▶ source"}
+      </button>
+      {open && call && (
+        <div className="mt-1.5 space-y-1 pl-2 border-l border-gray-700">
+          {call.audio_url ? (
+            <audio src={call.audio_url} controls className="w-full" style={{ height: "1.75rem" }} />
+          ) : (
+            <p className="text-gray-600 italic">No audio</p>
+          )}
+          {transcript && (
+            <p className="text-gray-500 italic line-clamp-2">{transcript}</p>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
+
 // ── Vocabulary panel ──────────────────────────────────────────────────────────
 
 function VocabularyPanel({ systemId }: { systemId: string }) {
@@ -979,13 +1027,22 @@ function VocabularyPanel({ systemId }: { systemId: string }) {
                   <p className="text-gray-500 uppercase tracking-wider mb-1.5">
                     Induction suggestions ({pending.length})
                   </p>
-                  <div className="space-y-1">
+                  <div className="space-y-2">
                     {pending.map((p) => (
-                      <div key={p.term} className="flex items-center gap-2">
-                        <span className="text-gray-300 flex-1">{p.term}</span>
-                        <span className="text-gray-600">{p.source}</span>
-                        <button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
-                        <button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
+                      <div key={p.term} className="space-y-1">
+                        <div className="flex items-center gap-2">
+                          <span className="text-gray-300 flex-1">{p.term}</span>
+                          <span className="text-gray-600">{p.source}</span>
+                          <button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
+                          <button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
+                        </div>
+                        {p.source_call_ids && p.source_call_ids.length > 0 && (
+                          <div className="pl-1 space-y-1">
+                            {p.source_call_ids.map((id) => (
+                              <SourceCallPlayer key={id} callId={id} />
+                            ))}
+                          </div>
+                        )}
                       </div>
                     ))}
                   </div>
diff --git a/drb-frontend/lib/c2api.ts b/drb-frontend/lib/c2api.ts
index 8136c8f..6fb44bb 100644
--- a/drb-frontend/lib/c2api.ts
+++ b/drb-frontend/lib/c2api.ts
@@ -56,6 +56,7 @@ export const c2api = {
     request(`/nodes/${id}`, { method: "DELETE" }),
 
   // Calls
+  getCall: (callId: string) => request<import("@/lib/types").CallRecord>(`/calls/${callId}`),
   getCalls: (params?: Record<string, string>) => {
     const qs = params ? "?" + new URLSearchParams(params).toString() : "";
     return request<unknown[]>(`/calls${qs}`);
diff --git a/drb-frontend/lib/types.ts b/drb-frontend/lib/types.ts
index e6d6561..e9520f5 100644
--- a/drb-frontend/lib/types.ts
+++ b/drb-frontend/lib/types.ts
@@ -19,6 +19,7 @@ export interface VocabularyPendingTerm {
   term: string;
   source: "induction" | "correction";
   added_at: string;
+  source_call_ids?: string[];
 }
 
 export interface SystemRecord {