Add source call audio playback to vocabulary suggestions
When the induction loop proposes a new vocabulary term, it now records
which sampled call(s) most likely produced the suggestion. Admins see
a collapsible "▶ source" player under each pending term showing the
audio clip and transcript, so they can hear what was actually said
before approving or dismissing.
- vocabulary_learner: track sampled call docs, attach source_call_ids
to each pending term via word-overlap search with fallback
- types: VocabularyPendingTerm.source_call_ids?: string[]
- c2api: add getCall(id) using existing GET /calls/{call_id} endpoint
- VocabularyPanel: SourceCallPlayer component — lazy-loads call on
first expand, shows audio controls + transcript snippet
This commit is contained in:
@@ -18,6 +18,7 @@ import asyncio
|
||||
import difflib
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional
|
||||
from app.internal.logger import logger
|
||||
@@ -297,6 +298,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
||||
random.shuffle(all_calls)
|
||||
char_budget = settings.vocabulary_induction_sample_tokens * 4
|
||||
transcript_block = ""
|
||||
sampled_call_docs: list[dict] = []
|
||||
sampled = 0
|
||||
for call in all_calls:
|
||||
text = call.get("transcript_corrected") or call.get("transcript") or ""
|
||||
@@ -306,6 +308,7 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
||||
break
|
||||
tg = call.get("talkgroup_name") or f"TGID {call.get('talkgroup_id', '?')}"
|
||||
transcript_block += f"[{tg}] {text}\n"
|
||||
sampled_call_docs.append(call)
|
||||
sampled += 1
|
||||
|
||||
if sampled < 3:
|
||||
@@ -322,11 +325,16 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
||||
pending_lower = {p["term"].lower() for p in existing_pending}
|
||||
vocab_lower = {t.lower() for t in existing_vocab}
|
||||
|
||||
to_queue = [
|
||||
{"term": t, "source": "induction", "added_at": now}
|
||||
for t in new_terms
|
||||
if t.lower() not in vocab_lower and t.lower() not in pending_lower
|
||||
]
|
||||
to_queue = []
|
||||
for t in new_terms:
|
||||
if t.lower() in vocab_lower or t.lower() in pending_lower:
|
||||
continue
|
||||
to_queue.append({
|
||||
"term": t,
|
||||
"source": "induction",
|
||||
"added_at": now,
|
||||
"source_call_ids": _find_source_calls(t, sampled_call_docs),
|
||||
})
|
||||
if not to_queue:
|
||||
return
|
||||
|
||||
@@ -343,6 +351,30 @@ async def _induct_system(system_id: str, system_doc: dict) -> None:
|
||||
# Internal sync helpers
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _find_source_calls(term: str, sampled_calls: list[dict], max_results: int = 3) -> list[str]:
|
||||
"""
|
||||
Find which sampled calls most likely produced this induction suggestion.
|
||||
Splits the proposed term into tokens and searches call transcripts for overlap.
|
||||
Falls back to the first two sampled calls when no token match is found
|
||||
(e.g. fully garbled terms like "why vac" → "YVAC" have no word overlap).
|
||||
"""
|
||||
tokens = [t.lower() for t in re.split(r"[^a-zA-Z0-9]+", term) if len(t) >= 2]
|
||||
matched: list[str] = []
|
||||
if tokens:
|
||||
for call in sampled_calls:
|
||||
call_id = call.get("call_id")
|
||||
if not call_id:
|
||||
continue
|
||||
text = (call.get("transcript_corrected") or call.get("transcript") or "").lower()
|
||||
if any(tok in text for tok in tokens):
|
||||
matched.append(call_id)
|
||||
if len(matched) >= max_results:
|
||||
break
|
||||
if not matched:
|
||||
matched = [c["call_id"] for c in sampled_calls[:2] if c.get("call_id")]
|
||||
return matched
|
||||
|
||||
|
||||
_STOP_WORDS = {
|
||||
"the", "and", "for", "are", "was", "were", "this", "that", "with",
|
||||
"have", "has", "had", "but", "not", "from", "they", "will", "what",
|
||||
|
||||
Reference in New Issue
Block a user