From f6897566f86c0020e3411012a398eb42890ff50e Mon Sep 17 00:00:00 2001
From: Logan <Logan@simplestepsolutions.com>
Date: Mon, 4 May 2026 01:13:18 -0400
Subject: [PATCH] Fix tags, titles, and hallucinations

---
 .../app/internal/incident_correlator.py        | 18 +++++++++++++-----
 drb-c2-core/app/internal/transcription.py      |  8 ++++++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drb-c2-core/app/internal/incident_correlator.py b/drb-c2-core/app/internal/incident_correlator.py
index b602c43..8ac0fb0 100644
--- a/drb-c2-core/app/internal/incident_correlator.py
+++ b/drb-c2-core/app/internal/incident_correlator.py
@@ -49,6 +49,14 @@ from app.config import settings
 _DISPATCH_TG_RE = re.compile(r"\bdispatch\b|\bdisp\b", re.IGNORECASE)
 
 
+def _tag_to_title(tag: str) -> str:
+    """
+    Convert a hyphenated tag to title case without the str.title() apostrophe bug.
+    e.g. "lower-macy's" → "Lower Macy's"  (not "Lower Macy'S")
+    """
+    return " ".join(w.capitalize() for w in tag.replace("-", " ").split())
+
+
 def _is_dispatch_channel(talkgroup_name: Optional[str]) -> bool:
     """True when the talkgroup is a shared dispatch backbone (not a tactical/working channel)."""
     if not talkgroup_name:
@@ -474,12 +482,12 @@ async def _update_incident(
     # Routine status calls (type=None) do not clobber the title.
     if incident_type:
         content_tags = [t for t in tags if t != "auto-generated"]
-        primary_tag = content_tags[0].replace("-", " ").title() if content_tags else None
+        primary_tag = _tag_to_title(content_tags[0]) if content_tags else None
         tg_label = (
             talkgroup_name
             or (f"TGID {talkgroup_id}" if talkgroup_id else inc.get("title", "").split(" — ")[-1])
         )
-        if primary_tag and best_location:
+        if primary_tag and best_location and primary_tag.lower() != best_location.lower():
             updates["title"] = f"{primary_tag} at {best_location}"
         elif primary_tag and tg_label:
             updates["title"] = f"{primary_tag} — {tg_label}"
@@ -513,13 +521,13 @@ async def _create_incident(
 
     # Build a descriptive title from tags + location when available
     content_tags = [t for t in tags if t != "auto-generated"]
-    primary_tag  = content_tags[0].replace("-", " ").title() if content_tags else None
-    if primary_tag and location:
+    primary_tag  = _tag_to_title(content_tags[0]) if content_tags else None
+    if primary_tag and location and primary_tag.lower() != location.lower():
         title = f"{primary_tag} at {location}"
     elif primary_tag:
         title = f"{primary_tag} — {tg_label}"
     else:
-        title = f"{incident_type.title()} — {tg_label}"
+        title = f"{_tag_to_title(incident_type)} — {tg_label}"
 
     doc = {
         "incident_id":       incident_id,
diff --git a/drb-c2-core/app/internal/transcription.py b/drb-c2-core/app/internal/transcription.py
index 7a20feb..3dff828 100644
--- a/drb-c2-core/app/internal/transcription.py
+++ b/drb-c2-core/app/internal/transcription.py
@@ -123,8 +123,6 @@ def _sync_transcribe(
                 response_format="verbose_json",
                 temperature=0,
             )
-        text = response.text.strip() or None
-
         # Filter hallucinated segments.  Two sources of hallucination in P25 recordings:
         #
         # 1. Trailing silence / static — Whisper fills silence past real content with
@@ -142,6 +140,12 @@ def _sync_transcribe(
             and s.start < audio_duration
             and getattr(s, "no_speech_prob", 0.0) < 0.8
         ]
+        # Reconstruct text from non-hallucinated segments only so the two stay
+        # in sync.  If every segment was filtered (e.g. pure static or repeated
+        # prompt-word hallucination like "Standby. Standby. Standby..."), text
+        # becomes None which prevents the intelligence pipeline from running on
+        # hallucinated content.
+        text = " ".join(s["text"] for s in segments) or None
         return text, segments
     finally:
         try: