Fix tags, titles, and hallucinations

This commit is contained in:
Logan
2026-05-04 01:13:18 -04:00
parent 531ce64eeb
commit f6897566f8
2 changed files with 19 additions and 7 deletions
@@ -49,6 +49,14 @@ from app.config import settings
_DISPATCH_TG_RE = re.compile(r"\bdispatch\b|\bdisp\b", re.IGNORECASE) _DISPATCH_TG_RE = re.compile(r"\bdispatch\b|\bdisp\b", re.IGNORECASE)
def _tag_to_title(tag: str) -> str:
"""
Convert a hyphenated tag to title case without the str.title() apostrophe bug.
e.g. "lower-macy's""Lower Macy's" (not "Lower Macy'S")
"""
return " ".join(w.capitalize() for w in tag.replace("-", " ").split())
def _is_dispatch_channel(talkgroup_name: Optional[str]) -> bool: def _is_dispatch_channel(talkgroup_name: Optional[str]) -> bool:
"""True when the talkgroup is a shared dispatch backbone (not a tactical/working channel).""" """True when the talkgroup is a shared dispatch backbone (not a tactical/working channel)."""
if not talkgroup_name: if not talkgroup_name:
@@ -474,12 +482,12 @@ async def _update_incident(
# Routine status calls (type=None) do not clobber the title. # Routine status calls (type=None) do not clobber the title.
if incident_type: if incident_type:
content_tags = [t for t in tags if t != "auto-generated"] content_tags = [t for t in tags if t != "auto-generated"]
primary_tag = content_tags[0].replace("-", " ").title() if content_tags else None primary_tag = _tag_to_title(content_tags[0]) if content_tags else None
tg_label = ( tg_label = (
talkgroup_name talkgroup_name
or (f"TGID {talkgroup_id}" if talkgroup_id else inc.get("title", "").split("")[-1]) or (f"TGID {talkgroup_id}" if talkgroup_id else inc.get("title", "").split("")[-1])
) )
if primary_tag and best_location: if primary_tag and best_location and primary_tag.lower() != best_location.lower():
updates["title"] = f"{primary_tag} at {best_location}" updates["title"] = f"{primary_tag} at {best_location}"
elif primary_tag and tg_label: elif primary_tag and tg_label:
updates["title"] = f"{primary_tag}{tg_label}" updates["title"] = f"{primary_tag}{tg_label}"
@@ -513,13 +521,13 @@ async def _create_incident(
# Build a descriptive title from tags + location when available # Build a descriptive title from tags + location when available
content_tags = [t for t in tags if t != "auto-generated"] content_tags = [t for t in tags if t != "auto-generated"]
primary_tag = content_tags[0].replace("-", " ").title() if content_tags else None primary_tag = _tag_to_title(content_tags[0]) if content_tags else None
if primary_tag and location: if primary_tag and location and primary_tag.lower() != location.lower():
title = f"{primary_tag} at {location}" title = f"{primary_tag} at {location}"
elif primary_tag: elif primary_tag:
title = f"{primary_tag}{tg_label}" title = f"{primary_tag}{tg_label}"
else: else:
title = f"{incident_type.title()}{tg_label}" title = f"{_tag_to_title(incident_type)}{tg_label}"
doc = { doc = {
"incident_id": incident_id, "incident_id": incident_id,
+6 -2
View File
@@ -123,8 +123,6 @@ def _sync_transcribe(
response_format="verbose_json", response_format="verbose_json",
temperature=0, temperature=0,
) )
text = response.text.strip() or None
# Filter hallucinated segments. Two sources of hallucination in P25 recordings: # Filter hallucinated segments. Two sources of hallucination in P25 recordings:
# #
# 1. Trailing silence / static — Whisper fills silence past real content with # 1. Trailing silence / static — Whisper fills silence past real content with
@@ -142,6 +140,12 @@ def _sync_transcribe(
and s.start < audio_duration and s.start < audio_duration
and getattr(s, "no_speech_prob", 0.0) < 0.8 and getattr(s, "no_speech_prob", 0.0) < 0.8
] ]
# Reconstruct text from non-hallucinated segments only so the two stay
# in sync. If every segment was filtered (e.g. pure static or repeated
# prompt-word hallucination like "Standby. Standby. Standby..."), text
# becomes None which prevents the intelligence pipeline from running on
# hallucinated content.
text = " ".join(s["text"] for s in segments) or None
return text, segments return text, segments
finally: finally:
try: try: