From 7dd090e8b2b4437e57e801e79498a9b35b45c2b4 Mon Sep 17 00:00:00 2001 From: Logan Date: Mon, 25 May 2026 03:31:43 -0400 Subject: [PATCH] fix: raise garbage-transcript threshold to avoid false positives on plate reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phonetic run threshold 5 → 12: a plate spellout ("Foxtrot Alpha Uniform Lima Kilo...") produces 6–8 consecutive phonetic words, triggering false positives and blocking intelligence extraction on legitimate calls. 12 is safely above any real spellout (~8 max) while still catching the full-alphabet hallucination (26 words). Also writes skip_reason="garbage_transcript" to the call doc and surfaces it in the admin correlation debug endpoint. --- drb-c2-core/app/internal/intelligence.py | 8 +++++++- drb-c2-core/app/routers/admin.py | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drb-c2-core/app/internal/intelligence.py b/drb-c2-core/app/internal/intelligence.py index 956aaf1..e5c1624 100644 --- a/drb-c2-core/app/internal/intelligence.py +++ b/drb-c2-core/app/internal/intelligence.py @@ -94,11 +94,13 @@ def _is_garbage_transcript(transcript: str) -> bool: if not words: return False + # Threshold of 12: well above any legitimate plate/name spellout (~6–8 words) + # but catches the full-alphabet hallucination (26 words in sequence). run = 0 for w in words: if w in _PHONETIC_ALPHA_WORDS: run += 1 - if run >= 5: + if run >= 12: return True else: run = 0 @@ -154,6 +156,10 @@ async def extract_scenes( f"Intelligence: call {call_id} — garbage transcript detected " f"(Whisper hallucination), skipping extraction" ) + try: + await fstore.doc_set("calls", call_id, {"skip_reason": "garbage_transcript"}) + except Exception: + pass return [] raw_scenes: list[dict] = await asyncio.to_thread( diff --git a/drb-c2-core/app/routers/admin.py b/drb-c2-core/app/routers/admin.py index abcb9d1..966fe09 100644 --- a/drb-c2-core/app/routers/admin.py +++ b/drb-c2-core/app/routers/admin.py @@ -68,6 +68,7 @@ async def debug_correlation( "corr_candidates": call.get("corr_candidates"), "corr_shared_units": call.get("corr_shared_units"), "corr_sweep_count": call.get("corr_sweep_count"), + "skip_reason": call.get("skip_reason"), } # ── Fetch recent incidents ────────────────────────────────────────────────