From 913fe0cbee9a2fedb5583dd9257f230e5fad46a1 Mon Sep 17 00:00:00 2001 From: Logan Date: Mon, 1 Jun 2026 01:45:03 -0400 Subject: [PATCH] Add source call audio playback to vocabulary suggestions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the induction loop proposes a new vocabulary term, it now records which sampled call(s) most likely produced the suggestion. Admins see a collapsible "▶ source" player under each pending term showing the audio clip and transcript, so they can hear what was actually said before approving or dismissing. - vocabulary_learner: track sampled call docs, attach source_call_ids to each pending term via word-overlap search with fallback - types: VocabularyPendingTerm.source_call_ids?: string[] - c2api: add getCall(id) using existing GET /calls/{call_id} endpoint - VocabularyPanel: SourceCallPlayer component — lazy-loads call on first expand, shows audio controls + transcript snippet --- .../app/internal/vocabulary_learner.py | 42 +++++++++-- drb-frontend/app/systems/page.tsx | 69 +++++++++++++++++-- drb-frontend/lib/c2api.ts | 1 + drb-frontend/lib/types.ts | 1 + 4 files changed, 102 insertions(+), 11 deletions(-) diff --git a/drb-c2-core/app/internal/vocabulary_learner.py b/drb-c2-core/app/internal/vocabulary_learner.py index d90df7b..3b6e84d 100644 --- a/drb-c2-core/app/internal/vocabulary_learner.py +++ b/drb-c2-core/app/internal/vocabulary_learner.py @@ -18,6 +18,7 @@ import asyncio import difflib import json import random +import re from datetime import datetime, timezone, timedelta from typing import Optional from app.internal.logger import logger @@ -297,6 +298,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None: random.shuffle(all_calls) char_budget = settings.vocabulary_induction_sample_tokens * 4 transcript_block = "" + sampled_call_docs: list[dict] = [] sampled = 0 for call in all_calls: text = call.get("transcript_corrected") or call.get("transcript") or "" @@ -306,6 +308,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None: break tg = call.get("talkgroup_name") or f"TGID {call.get('talkgroup_id', '?')}" transcript_block += f"[{tg}] {text}\n" + sampled_call_docs.append(call) sampled += 1 if sampled < 3: @@ -322,11 +325,16 @@ async def _induct_system(system_id: str, system_doc: dict) -> None: pending_lower = {p["term"].lower() for p in existing_pending} vocab_lower = {t.lower() for t in existing_vocab} - to_queue = [ - {"term": t, "source": "induction", "added_at": now} - for t in new_terms - if t.lower() not in vocab_lower and t.lower() not in pending_lower - ] + to_queue = [] + for t in new_terms: + if t.lower() in vocab_lower or t.lower() in pending_lower: + continue + to_queue.append({ + "term": t, + "source": "induction", + "added_at": now, + "source_call_ids": _find_source_calls(t, sampled_call_docs), + }) if not to_queue: return @@ -343,6 +351,30 @@ async def _induct_system(system_id: str, system_doc: dict) -> None: # Internal sync helpers # ───────────────────────────────────────────────────────────────────────────── +def _find_source_calls(term: str, sampled_calls: list[dict], max_results: int = 3) -> list[str]: + """ + Find which sampled calls most likely produced this induction suggestion. + Splits the proposed term into tokens and searches call transcripts for overlap. + Falls back to the first two sampled calls when no token match is found + (e.g. fully garbled terms like "why vac" → "YVAC" have no word overlap). + """ + tokens = [t.lower() for t in re.split(r"[^a-zA-Z0-9]+", term) if len(t) >= 2] + matched: list[str] = [] + if tokens: + for call in sampled_calls: + call_id = call.get("call_id") + if not call_id: + continue + text = (call.get("transcript_corrected") or call.get("transcript") or "").lower() + if any(tok in text for tok in tokens): + matched.append(call_id) + if len(matched) >= max_results: + break + if not matched: + matched = [c["call_id"] for c in sampled_calls[:2] if c.get("call_id")] + return matched + + _STOP_WORDS = { "the", "and", "for", "are", "was", "were", "this", "that", "with", "have", "has", "had", "but", "not", "from", "they", "will", "what", diff --git a/drb-frontend/app/systems/page.tsx b/drb-frontend/app/systems/page.tsx index ba4b0e8..427ce2b 100644 --- a/drb-frontend/app/systems/page.tsx +++ b/drb-frontend/app/systems/page.tsx @@ -829,6 +829,54 @@ function AiFlagsPanel({ systemId, initial }: { systemId: string; initial: System ); } +// ── Source call audio player ────────────────────────────────────────────────── + +function SourceCallPlayer({ callId }: { callId: string }) { + const [call, setCall] = useState<{ audio_url?: string | null; transcript?: string | null; transcript_corrected?: string | null } | null>(null); + const [loading, setLoading] = useState(false); + const [open, setOpen] = useState(false); + + async function toggle() { + if (!open && !call) { + setLoading(true); + try { + const c = await c2api.getCall(callId); + setCall(c as typeof call); + } finally { + setLoading(false); + } + } + setOpen((v) => !v); + } + + const transcript = call?.transcript_corrected || call?.transcript; + + return ( +
+ + {open && call && ( +
+ {call.audio_url ? ( +
+ )} +
+ ); +} + // ── Vocabulary panel ────────────────────────────────────────────────────────── function VocabularyPanel({ systemId }: { systemId: string }) { @@ -979,13 +1027,22 @@ function VocabularyPanel({ systemId }: { systemId: string }) {

Induction suggestions ({pending.length})

-
+
{pending.map((p) => ( -
- {p.term} - {p.source} - - +
+
+ {p.term} + {p.source} + + +
+ {p.source_call_ids && p.source_call_ids.length > 0 && ( +
+ {p.source_call_ids.map((id) => ( + + ))} +
+ )}
))}
diff --git a/drb-frontend/lib/c2api.ts b/drb-frontend/lib/c2api.ts index 8136c8f..6fb44bb 100644 --- a/drb-frontend/lib/c2api.ts +++ b/drb-frontend/lib/c2api.ts @@ -56,6 +56,7 @@ export const c2api = { request(`/nodes/${id}`, { method: "DELETE" }), // Calls + getCall: (callId: string) => request(`/calls/${callId}`), getCalls: (params?: Record) => { const qs = params ? "?" + new URLSearchParams(params).toString() : ""; return request(`/calls${qs}`); diff --git a/drb-frontend/lib/types.ts b/drb-frontend/lib/types.ts index e6d6561..e9520f5 100644 --- a/drb-frontend/lib/types.ts +++ b/drb-frontend/lib/types.ts @@ -19,6 +19,7 @@ export interface VocabularyPendingTerm { term: string; source: "induction" | "correction"; added_at: string; + source_call_ids?: string[]; } export interface SystemRecord {