diff --git a/drb-c2-core/app/internal/intelligence.py b/drb-c2-core/app/internal/intelligence.py index 956aaf1..e5c1624 100644 --- a/drb-c2-core/app/internal/intelligence.py +++ b/drb-c2-core/app/internal/intelligence.py @@ -94,11 +94,13 @@ def _is_garbage_transcript(transcript: str) -> bool: if not words: return False + # Threshold of 12: well above any legitimate plate/name spellout (~6–8 words) + # but catches the full-alphabet hallucination (26 words in sequence). run = 0 for w in words: if w in _PHONETIC_ALPHA_WORDS: run += 1 - if run >= 5: + if run >= 12: return True else: run = 0 @@ -154,6 +156,10 @@ async def extract_scenes( f"Intelligence: call {call_id} — garbage transcript detected " f"(Whisper hallucination), skipping extraction" ) + try: + await fstore.doc_set("calls", call_id, {"skip_reason": "garbage_transcript"}) + except Exception: + pass return [] raw_scenes: list[dict] = await asyncio.to_thread( diff --git a/drb-c2-core/app/routers/admin.py b/drb-c2-core/app/routers/admin.py index abcb9d1..966fe09 100644 --- a/drb-c2-core/app/routers/admin.py +++ b/drb-c2-core/app/routers/admin.py @@ -68,6 +68,7 @@ async def debug_correlation( "corr_candidates": call.get("corr_candidates"), "corr_shared_units": call.get("corr_shared_units"), "corr_sweep_count": call.get("corr_sweep_count"), + "skip_reason": call.get("skip_reason"), } # ── Fetch recent incidents ────────────────────────────────────────────────