Add source call audio playback to vocabulary suggestions
When the induction loop proposes a new vocabulary term, it now records
which sampled call(s) most likely produced the suggestion. Admins see
a collapsible "▶ source" player under each pending term showing the
audio clip and transcript, so they can hear what was actually said
before approving or dismissing.
- vocabulary_learner: track sampled call docs, attach source_call_ids
to each pending term via word-overlap search with fallback
- types: VocabularyPendingTerm.source_call_ids?: string[]
- c2api: add getCall(id) using existing GET /calls/{call_id} endpoint
- VocabularyPanel: SourceCallPlayer component — lazy-loads call on
first expand, shows audio controls + transcript snippet
This commit is contained in:
@@ -18,6 +18,7 @@ import asyncio
|
|||||||
import difflib
|
import difflib
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from app.internal.logger import logger
|
from app.internal.logger import logger
|
||||||
@@ -297,6 +298,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
|||||||
random.shuffle(all_calls)
|
random.shuffle(all_calls)
|
||||||
char_budget = settings.vocabulary_induction_sample_tokens * 4
|
char_budget = settings.vocabulary_induction_sample_tokens * 4
|
||||||
transcript_block = ""
|
transcript_block = ""
|
||||||
|
sampled_call_docs: list[dict] = []
|
||||||
sampled = 0
|
sampled = 0
|
||||||
for call in all_calls:
|
for call in all_calls:
|
||||||
text = call.get("transcript_corrected") or call.get("transcript") or ""
|
text = call.get("transcript_corrected") or call.get("transcript") or ""
|
||||||
@@ -306,6 +308,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
|||||||
break
|
break
|
||||||
tg = call.get("talkgroup_name") or f"TGID {call.get('talkgroup_id', '?')}"
|
tg = call.get("talkgroup_name") or f"TGID {call.get('talkgroup_id', '?')}"
|
||||||
transcript_block += f"[{tg}] {text}\n"
|
transcript_block += f"[{tg}] {text}\n"
|
||||||
|
sampled_call_docs.append(call)
|
||||||
sampled += 1
|
sampled += 1
|
||||||
|
|
||||||
if sampled < 3:
|
if sampled < 3:
|
||||||
@@ -322,11 +325,16 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
|||||||
pending_lower = {p["term"].lower() for p in existing_pending}
|
pending_lower = {p["term"].lower() for p in existing_pending}
|
||||||
vocab_lower = {t.lower() for t in existing_vocab}
|
vocab_lower = {t.lower() for t in existing_vocab}
|
||||||
|
|
||||||
to_queue = [
|
to_queue = []
|
||||||
{"term": t, "source": "induction", "added_at": now}
|
for t in new_terms:
|
||||||
for t in new_terms
|
if t.lower() in vocab_lower or t.lower() in pending_lower:
|
||||||
if t.lower() not in vocab_lower and t.lower() not in pending_lower
|
continue
|
||||||
]
|
to_queue.append({
|
||||||
|
"term": t,
|
||||||
|
"source": "induction",
|
||||||
|
"added_at": now,
|
||||||
|
"source_call_ids": _find_source_calls(t, sampled_call_docs),
|
||||||
|
})
|
||||||
if not to_queue:
|
if not to_queue:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -343,6 +351,30 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
|||||||
# Internal sync helpers
|
# Internal sync helpers
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _find_source_calls(term: str, sampled_calls: list[dict], max_results: int = 3) -> list[str]:
|
||||||
|
"""
|
||||||
|
Find which sampled calls most likely produced this induction suggestion.
|
||||||
|
Splits the proposed term into tokens and searches call transcripts for overlap.
|
||||||
|
Falls back to the first two sampled calls when no token match is found
|
||||||
|
(e.g. fully garbled terms like "why vac" → "YVAC" have no word overlap).
|
||||||
|
"""
|
||||||
|
tokens = [t.lower() for t in re.split(r"[^a-zA-Z0-9]+", term) if len(t) >= 2]
|
||||||
|
matched: list[str] = []
|
||||||
|
if tokens:
|
||||||
|
for call in sampled_calls:
|
||||||
|
call_id = call.get("call_id")
|
||||||
|
if not call_id:
|
||||||
|
continue
|
||||||
|
text = (call.get("transcript_corrected") or call.get("transcript") or "").lower()
|
||||||
|
if any(tok in text for tok in tokens):
|
||||||
|
matched.append(call_id)
|
||||||
|
if len(matched) >= max_results:
|
||||||
|
break
|
||||||
|
if not matched:
|
||||||
|
matched = [c["call_id"] for c in sampled_calls[:2] if c.get("call_id")]
|
||||||
|
return matched
|
||||||
|
|
||||||
|
|
||||||
_STOP_WORDS = {
|
_STOP_WORDS = {
|
||||||
"the", "and", "for", "are", "was", "were", "this", "that", "with",
|
"the", "and", "for", "are", "was", "were", "this", "that", "with",
|
||||||
"have", "has", "had", "but", "not", "from", "they", "will", "what",
|
"have", "has", "had", "but", "not", "from", "they", "will", "what",
|
||||||
|
|||||||
@@ -829,6 +829,54 @@ function AiFlagsPanel({ systemId, initial }: { systemId: string; initial: System
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Source call audio player ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function SourceCallPlayer({ callId }: { callId: string }) {
|
||||||
|
const [call, setCall] = useState<{ audio_url?: string | null; transcript?: string | null; transcript_corrected?: string | null } | null>(null);
|
||||||
|
const [loading, setLoading] = useState(false);
|
||||||
|
const [open, setOpen] = useState(false);
|
||||||
|
|
||||||
|
async function toggle() {
|
||||||
|
if (!open && !call) {
|
||||||
|
setLoading(true);
|
||||||
|
try {
|
||||||
|
const c = await c2api.getCall(callId);
|
||||||
|
setCall(c as typeof call);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setOpen((v) => !v);
|
||||||
|
}
|
||||||
|
|
||||||
|
const transcript = call?.transcript_corrected || call?.transcript;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="text-xs">
|
||||||
|
<button
|
||||||
|
onClick={toggle}
|
||||||
|
disabled={loading}
|
||||||
|
className="text-indigo-500 hover:text-indigo-400 transition-colors disabled:opacity-50"
|
||||||
|
title={callId}
|
||||||
|
>
|
||||||
|
{loading ? "loading…" : open ? "▲ source" : "▶ source"}
|
||||||
|
</button>
|
||||||
|
{open && call && (
|
||||||
|
<div className="mt-1.5 space-y-1 pl-2 border-l border-gray-700">
|
||||||
|
{call.audio_url ? (
|
||||||
|
<audio src={call.audio_url} controls className="w-full" style={{ height: "1.75rem" }} />
|
||||||
|
) : (
|
||||||
|
<p className="text-gray-600 italic">No audio</p>
|
||||||
|
)}
|
||||||
|
{transcript && (
|
||||||
|
<p className="text-gray-500 italic line-clamp-2">{transcript}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// ── Vocabulary panel ──────────────────────────────────────────────────────────
|
// ── Vocabulary panel ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
function VocabularyPanel({ systemId }: { systemId: string }) {
|
function VocabularyPanel({ systemId }: { systemId: string }) {
|
||||||
@@ -979,13 +1027,22 @@ function VocabularyPanel({ systemId }: { systemId: string }) {
|
|||||||
<p className="text-gray-500 uppercase tracking-wider mb-1.5">
|
<p className="text-gray-500 uppercase tracking-wider mb-1.5">
|
||||||
Induction suggestions ({pending.length})
|
Induction suggestions ({pending.length})
|
||||||
</p>
|
</p>
|
||||||
<div className="space-y-1">
|
<div className="space-y-2">
|
||||||
{pending.map((p) => (
|
{pending.map((p) => (
|
||||||
<div key={p.term} className="flex items-center gap-2">
|
<div key={p.term} className="space-y-1">
|
||||||
<span className="text-gray-300 flex-1">{p.term}</span>
|
<div className="flex items-center gap-2">
|
||||||
<span className="text-gray-600">{p.source}</span>
|
<span className="text-gray-300 flex-1">{p.term}</span>
|
||||||
<button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
|
<span className="text-gray-600">{p.source}</span>
|
||||||
<button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
|
<button onClick={() => handleApprove(p.term)} className="text-green-500 hover:text-green-400 transition-colors px-1">✓</button>
|
||||||
|
<button onClick={() => handleDismiss(p.term)} className="text-gray-600 hover:text-red-400 transition-colors px-1">✕</button>
|
||||||
|
</div>
|
||||||
|
{p.source_call_ids && p.source_call_ids.length > 0 && (
|
||||||
|
<div className="pl-1 space-y-1">
|
||||||
|
{p.source_call_ids.map((id) => (
|
||||||
|
<SourceCallPlayer key={id} callId={id} />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ export const c2api = {
|
|||||||
request(`/nodes/${id}`, { method: "DELETE" }),
|
request(`/nodes/${id}`, { method: "DELETE" }),
|
||||||
|
|
||||||
// Calls
|
// Calls
|
||||||
|
getCall: (callId: string) => request<import("@/lib/types").CallRecord>(`/calls/${callId}`),
|
||||||
getCalls: (params?: Record<string, string>) => {
|
getCalls: (params?: Record<string, string>) => {
|
||||||
const qs = params ? "?" + new URLSearchParams(params).toString() : "";
|
const qs = params ? "?" + new URLSearchParams(params).toString() : "";
|
||||||
return request<unknown[]>(`/calls${qs}`);
|
return request<unknown[]>(`/calls${qs}`);
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ export interface VocabularyPendingTerm {
|
|||||||
term: string;
|
term: string;
|
||||||
source: "induction" | "correction";
|
source: "induction" | "correction";
|
||||||
added_at: string;
|
added_at: string;
|
||||||
|
source_call_ids?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SystemRecord {
|
export interface SystemRecord {
|
||||||
|
|||||||
Reference in New Issue
Block a user