fix: raise garbage-transcript threshold to avoid false positives on plate reads
Phonetic run threshold 5 → 12: a plate spellout ("Foxtrot Alpha Uniform Lima
Kilo...") produces 6–8 consecutive phonetic words, triggering false positives
and blocking intelligence extraction on legitimate calls. 12 is safely above
any real spellout (~8 max) while still catching the full-alphabet hallucination
(26 words). Also writes skip_reason="garbage_transcript" to the call doc and
surfaces it in the admin correlation debug endpoint.
This commit is contained in:
@@ -94,11 +94,13 @@ def _is_garbage_transcript(transcript: str) -> bool:
|
||||
if not words:
|
||||
return False
|
||||
|
||||
# Threshold of 12: well above any legitimate plate/name spellout (~6–8 words)
|
||||
# but catches the full-alphabet hallucination (26 words in sequence).
|
||||
run = 0
|
||||
for w in words:
|
||||
if w in _PHONETIC_ALPHA_WORDS:
|
||||
run += 1
|
||||
if run >= 5:
|
||||
if run >= 12:
|
||||
return True
|
||||
else:
|
||||
run = 0
|
||||
@@ -154,6 +156,10 @@ async def extract_scenes(
|
||||
f"Intelligence: call {call_id} — garbage transcript detected "
|
||||
f"(Whisper hallucination), skipping extraction"
|
||||
)
|
||||
try:
|
||||
await fstore.doc_set("calls", call_id, {"skip_reason": "garbage_transcript"})
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
raw_scenes: list[dict] = await asyncio.to_thread(
|
||||
|
||||
@@ -68,6 +68,7 @@ async def debug_correlation(
|
||||
"corr_candidates": call.get("corr_candidates"),
|
||||
"corr_shared_units": call.get("corr_shared_units"),
|
||||
"corr_sweep_count": call.get("corr_sweep_count"),
|
||||
"skip_reason": call.get("skip_reason"),
|
||||
}
|
||||
|
||||
# ── Fetch recent incidents ────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user