Fix tags, titles, and hallucinations
This commit is contained in:
@@ -49,6 +49,14 @@ from app.config import settings
|
|||||||
_DISPATCH_TG_RE = re.compile(r"\bdispatch\b|\bdisp\b", re.IGNORECASE)
|
_DISPATCH_TG_RE = re.compile(r"\bdispatch\b|\bdisp\b", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def _tag_to_title(tag: str) -> str:
|
||||||
|
"""
|
||||||
|
Convert a hyphenated tag to title case without the str.title() apostrophe bug.
|
||||||
|
e.g. "lower-macy's" → "Lower Macy's" (not "Lower Macy'S")
|
||||||
|
"""
|
||||||
|
return " ".join(w.capitalize() for w in tag.replace("-", " ").split())
|
||||||
|
|
||||||
|
|
||||||
def _is_dispatch_channel(talkgroup_name: Optional[str]) -> bool:
|
def _is_dispatch_channel(talkgroup_name: Optional[str]) -> bool:
|
||||||
"""True when the talkgroup is a shared dispatch backbone (not a tactical/working channel)."""
|
"""True when the talkgroup is a shared dispatch backbone (not a tactical/working channel)."""
|
||||||
if not talkgroup_name:
|
if not talkgroup_name:
|
||||||
@@ -474,12 +482,12 @@ async def _update_incident(
|
|||||||
# Routine status calls (type=None) do not clobber the title.
|
# Routine status calls (type=None) do not clobber the title.
|
||||||
if incident_type:
|
if incident_type:
|
||||||
content_tags = [t for t in tags if t != "auto-generated"]
|
content_tags = [t for t in tags if t != "auto-generated"]
|
||||||
primary_tag = content_tags[0].replace("-", " ").title() if content_tags else None
|
primary_tag = _tag_to_title(content_tags[0]) if content_tags else None
|
||||||
tg_label = (
|
tg_label = (
|
||||||
talkgroup_name
|
talkgroup_name
|
||||||
or (f"TGID {talkgroup_id}" if talkgroup_id else inc.get("title", "").split(" — ")[-1])
|
or (f"TGID {talkgroup_id}" if talkgroup_id else inc.get("title", "").split(" — ")[-1])
|
||||||
)
|
)
|
||||||
if primary_tag and best_location:
|
if primary_tag and best_location and primary_tag.lower() != best_location.lower():
|
||||||
updates["title"] = f"{primary_tag} at {best_location}"
|
updates["title"] = f"{primary_tag} at {best_location}"
|
||||||
elif primary_tag and tg_label:
|
elif primary_tag and tg_label:
|
||||||
updates["title"] = f"{primary_tag} — {tg_label}"
|
updates["title"] = f"{primary_tag} — {tg_label}"
|
||||||
@@ -513,13 +521,13 @@ async def _create_incident(
|
|||||||
|
|
||||||
# Build a descriptive title from tags + location when available
|
# Build a descriptive title from tags + location when available
|
||||||
content_tags = [t for t in tags if t != "auto-generated"]
|
content_tags = [t for t in tags if t != "auto-generated"]
|
||||||
primary_tag = content_tags[0].replace("-", " ").title() if content_tags else None
|
primary_tag = _tag_to_title(content_tags[0]) if content_tags else None
|
||||||
if primary_tag and location:
|
if primary_tag and location and primary_tag.lower() != location.lower():
|
||||||
title = f"{primary_tag} at {location}"
|
title = f"{primary_tag} at {location}"
|
||||||
elif primary_tag:
|
elif primary_tag:
|
||||||
title = f"{primary_tag} — {tg_label}"
|
title = f"{primary_tag} — {tg_label}"
|
||||||
else:
|
else:
|
||||||
title = f"{incident_type.title()} — {tg_label}"
|
title = f"{_tag_to_title(incident_type)} — {tg_label}"
|
||||||
|
|
||||||
doc = {
|
doc = {
|
||||||
"incident_id": incident_id,
|
"incident_id": incident_id,
|
||||||
|
|||||||
@@ -123,8 +123,6 @@ def _sync_transcribe(
|
|||||||
response_format="verbose_json",
|
response_format="verbose_json",
|
||||||
temperature=0,
|
temperature=0,
|
||||||
)
|
)
|
||||||
text = response.text.strip() or None
|
|
||||||
|
|
||||||
# Filter hallucinated segments. Two sources of hallucination in P25 recordings:
|
# Filter hallucinated segments. Two sources of hallucination in P25 recordings:
|
||||||
#
|
#
|
||||||
# 1. Trailing silence / static — Whisper fills silence past real content with
|
# 1. Trailing silence / static — Whisper fills silence past real content with
|
||||||
@@ -142,6 +140,12 @@ def _sync_transcribe(
|
|||||||
and s.start < audio_duration
|
and s.start < audio_duration
|
||||||
and getattr(s, "no_speech_prob", 0.0) < 0.8
|
and getattr(s, "no_speech_prob", 0.0) < 0.8
|
||||||
]
|
]
|
||||||
|
# Reconstruct text from non-hallucinated segments only so the two stay
|
||||||
|
# in sync. If every segment was filtered (e.g. pure static or repeated
|
||||||
|
# prompt-word hallucination like "Standby. Standby. Standby..."), text
|
||||||
|
# becomes None which prevents the intelligence pipeline from running on
|
||||||
|
# hallucinated content.
|
||||||
|
text = " ".join(s["text"] for s in segments) or None
|
||||||
return text, segments
|
return text, segments
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user