Fix tags, titles, and hallucinations

2026-05-04 01:13:18 -04:00
parent 531ce64eeb
commit f6897566f8
2 changed files with 19 additions and 7 deletions
@@ -123,8 +123,6 @@ def _sync_transcribe(
                response_format="verbose_json",
                temperature=0,
            )
-        text = response.text.strip() or None
-
        # Filter hallucinated segments.  Two sources of hallucination in P25 recordings:
        #
        # 1. Trailing silence / static — Whisper fills silence past real content with
@@ -142,6 +140,12 @@ def _sync_transcribe(
            and s.start < audio_duration
            and getattr(s, "no_speech_prob", 0.0) < 0.8
        ]
+        # Reconstruct text from non-hallucinated segments only so the two stay
+        # in sync.  If every segment was filtered (e.g. pure static or repeated
+        # prompt-word hallucination like "Standby. Standby. Standby..."), text
+        # becomes None which prevents the intelligence pipeline from running on
+        # hallucinated content.
+        text = " ".join(s["text"] for s in segments) or None
        return text, segments
    finally:
        try: