fix: raise garbage-transcript threshold to avoid false positives on plate reads
Phonetic run threshold 5 → 12: a plate spellout ("Foxtrot Alpha Uniform Lima
Kilo...") produces 6–8 consecutive phonetic words, triggering false positives
and blocking intelligence extraction on legitimate calls. 12 is safely above
any real spellout (~8 max) while still catching the full-alphabet hallucination
(26 words). Also writes skip_reason="garbage_transcript" to the call doc and
surfaces it in the admin correlation debug endpoint.
This commit is contained in:
@@ -94,11 +94,13 @@ def _is_garbage_transcript(transcript: str) -> bool:
|
|||||||
if not words:
|
if not words:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Threshold of 12: well above any legitimate plate/name spellout (~6–8 words)
|
||||||
|
# but catches the full-alphabet hallucination (26 words in sequence).
|
||||||
run = 0
|
run = 0
|
||||||
for w in words:
|
for w in words:
|
||||||
if w in _PHONETIC_ALPHA_WORDS:
|
if w in _PHONETIC_ALPHA_WORDS:
|
||||||
run += 1
|
run += 1
|
||||||
if run >= 5:
|
if run >= 12:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
run = 0
|
run = 0
|
||||||
@@ -154,6 +156,10 @@ async def extract_scenes(
|
|||||||
f"Intelligence: call {call_id} — garbage transcript detected "
|
f"Intelligence: call {call_id} — garbage transcript detected "
|
||||||
f"(Whisper hallucination), skipping extraction"
|
f"(Whisper hallucination), skipping extraction"
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
await fstore.doc_set("calls", call_id, {"skip_reason": "garbage_transcript"})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
return []
|
return []
|
||||||
|
|
||||||
raw_scenes: list[dict] = await asyncio.to_thread(
|
raw_scenes: list[dict] = await asyncio.to_thread(
|
||||||
|
|||||||
@@ -68,6 +68,7 @@ async def debug_correlation(
|
|||||||
"corr_candidates": call.get("corr_candidates"),
|
"corr_candidates": call.get("corr_candidates"),
|
||||||
"corr_shared_units": call.get("corr_shared_units"),
|
"corr_shared_units": call.get("corr_shared_units"),
|
||||||
"corr_sweep_count": call.get("corr_sweep_count"),
|
"corr_sweep_count": call.get("corr_sweep_count"),
|
||||||
|
"skip_reason": call.get("skip_reason"),
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Fetch recent incidents ────────────────────────────────────────────────
|
# ── Fetch recent incidents ────────────────────────────────────────────────
|
||||||
|
|||||||
Reference in New Issue
Block a user