Add source call audio playback to vocabulary suggestions
When the induction loop proposes a new vocabulary term, it now records
which sampled call(s) most likely produced the suggestion. Admins see
a collapsible "▶ source" player under each pending term showing the
audio clip and transcript, so they can hear what was actually said
before approving or dismissing.
- vocabulary_learner: track sampled call docs, attach source_call_ids
to each pending term via word-overlap search with fallback
- types: VocabularyPendingTerm.source_call_ids?: string[]
- c2api: add getCall(id) using existing GET /calls/{call_id} endpoint
- VocabularyPanel: SourceCallPlayer component — lazy-loads call on
first expand, shows audio controls + transcript snippet
This commit is contained in:
@@ -18,6 +18,7 @@ import asyncio
|
||||
import difflib
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional
|
||||
from app.internal.logger import logger
|
||||
@@ -297,6 +298,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
||||
random.shuffle(all_calls)
|
||||
char_budget = settings.vocabulary_induction_sample_tokens * 4
|
||||
transcript_block = ""
|
||||
sampled_call_docs: list[dict] = []
|
||||
sampled = 0
|
||||
for call in all_calls:
|
||||
text = call.get("transcript_corrected") or call.get("transcript") or ""
|
||||
@@ -306,6 +308,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
||||
break
|
||||
tg = call.get("talkgroup_name") or f"TGID {call.get('talkgroup_id', '?')}"
|
||||
transcript_block += f"[{tg}] {text}\n"
|
||||
sampled_call_docs.append(call)
|
||||
sampled += 1
|
||||
|
||||
if sampled < 3:
|
||||
@@ -322,11 +325,16 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
||||
pending_lower = {p["term"].lower() for p in existing_pending}
|
||||
vocab_lower = {t.lower() for t in existing_vocab}
|
||||
|
||||
to_queue = [
|
||||
{"term": t, "source": "induction", "added_at": now}
|
||||
for t in new_terms
|
||||
if t.lower() not in vocab_lower and t.lower() not in pending_lower
|
||||
]
|
||||
to_queue = []
|
||||
for t in new_terms:
|
||||
if t.lower() in vocab_lower or t.lower() in pending_lower:
|
||||
continue
|
||||
to_queue.append({
|
||||
"term": t,
|
||||
"source": "induction",
|
||||
"added_at": now,
|
||||
"source_call_ids": _find_source_calls(t, sampled_call_docs),
|
||||
})
|
||||
if not to_queue:
|
||||
return
|
||||
|
||||
@@ -343,6 +351,30 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
||||
# Internal sync helpers
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _find_source_calls(term: str, sampled_calls: list[dict], max_results: int = 3) -> list[str]:
|
||||
"""
|
||||
Find which sampled calls most likely produced this induction suggestion.
|
||||
Splits the proposed term into tokens and searches call transcripts for overlap.
|
||||
Falls back to the first two sampled calls when no token match is found
|
||||
(e.g. fully garbled terms like "why vac" → "YVAC" have no word overlap).
|
||||
"""
|
||||
tokens = [t.lower() for t in re.split(r"[^a-zA-Z0-9]+", term) if len(t) >= 2]
|
||||
matched: list[str] = []
|
||||
if tokens:
|
||||
for call in sampled_calls:
|
||||
call_id = call.get("call_id")
|
||||
if not call_id:
|
||||
continue
|
||||
text = (call.get("transcript_corrected") or call.get("transcript") or "").lower()
|
||||
if any(tok in text for tok in tokens):
|
||||
matched.append(call_id)
|
||||
if len(matched) >= max_results:
|
||||
break
|
||||
if not matched:
|
||||
matched = [c["call_id"] for c in sampled_calls[:2] if c.get("call_id")]
|
||||
return matched
|
||||
|
||||
|
||||
_STOP_WORDS = {
|
||||
"the", "and", "for", "are", "was", "were", "this", "that", "with",
|
||||
"have", "has", "had", "but", "not", "from", "they", "will", "what",
|
||||
|
||||
@@ -829,6 +829,54 @@ function AiFlagsPanel({ systemId, initial }: { systemId: string; initial: System
|
||||
);
|
||||
}
|
||||
|
||||
// ── Source call audio player ──────────────────────────────────────────────────
|
||||
|
||||
function SourceCallPlayer({ callId }: { callId: string }) {
|
||||
const [call, setCall] = useState<{ audio_url?: string | null; transcript?: string | null; transcript_corrected?: string | null } | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
async function toggle() {
|
||||
if (!open && !call) {
|
||||
setLoading(true);
|
||||
try {
|
||||
const c = await c2api.getCall(callId);
|
||||
setCall(c as typeof call);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
setOpen((v) => !v);
|
||||
}
|
||||
|
||||
const transcript = call?.transcript_corrected || call?.transcript;
|
||||
|
||||
return (
|
||||
<div className="text-xs">
|
||||
<button
|
||||
onClick={toggle}
|
||||
disabled={loading}
|
||||
className="text-indigo-500 hover:text-indigo-400 transition-colors disabled:opacity-50"
|
||||
title={callId}
|
||||
>
|
||||
{loading ? "loading…" : open ? "▲ source" : "▶ source"}
|
||||
</button>
|
||||
{open && call && (
|
||||
<div className="mt-1.5 space-y-1 pl-2 border-l border-gray-700">
|
||||
{call.audio_url ? (
|
||||
<audio src={call.audio_url} controls className="w-full" style={{ height: "1.75rem" }} />
|
||||
) : (
|
||||
<p className="text-gray-600 italic">No audio</p>
|
||||
)}
|
||||
{transcript && (
|
||||
<p className="text-gray-500 italic line-clamp-2">{transcript}</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Vocabulary panel ──────────────────────────────────────────────────────────
|
||||
|
||||
function VocabularyPanel({ systemId }: { systemId: string }) {
|
||||
@@ -979,13 +1027,22 @@ function VocabularyPanel({ systemId }: { systemId: string }) {
|
||||
<p className="text-gray-500 uppercase tracking-wider mb-1.5">
|
||||
Induction suggestions ({pending.length})
|
||||
</p>
|
||||
<div className="space-y-1">
|
||||
<div className="space-y-2">
|
||||
{pending.map((p) => (
|
||||
<div key={p.term} className="flex items-center gap-2">
|
||||
<span className="text-gray-300 flex-1">{p.term}</span>
|
||||
<span className="text-gray-600">{p.source}</span>
|
||||
<button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
|
||||
<button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
|
||||
<div key={p.term} className="space-y-1">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-gray-300 flex-1">{p.term}</span>
|
||||
<span className="text-gray-600">{p.source}</span>
|
||||
<button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
|
||||
<button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
|
||||
</div>
|
||||
{p.source_call_ids && p.source_call_ids.length > 0 && (
|
||||
<div className="pl-1 space-y-1">
|
||||
{p.source_call_ids.map((id) => (
|
||||
<SourceCallPlayer key={id} callId={id} />
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
@@ -56,6 +56,7 @@ export const c2api = {
|
||||
request(`/nodes/${id}`, { method: "DELETE" }),
|
||||
|
||||
// Calls
|
||||
getCall: (callId: string) => request<import("@/lib/types").CallRecord>(`/calls/${callId}`),
|
||||
getCalls: (params?: Record<string, string>) => {
|
||||
const qs = params ? "?" + new URLSearchParams(params).toString() : "";
|
||||
return request<unknown[]>(`/calls${qs}`);
|
||||
|
||||
@@ -19,6 +19,7 @@ export interface VocabularyPendingTerm {
|
||||
term: string;
|
||||
source: "induction" | "correction";
|
||||
added_at: string;
|
||||
source_call_ids?: string[];
|
||||
}
|
||||
|
||||
export interface SystemRecord {
|
||||
|
||||
Reference in New Issue
Block a user