Fix correlation false-merge, switch STT to whisper-1 without vocab prompt
- correlator: unit_overlap on dispatch channels now applies content divergence check when the call has geocoded coords but the incident doesn't; previously this gap caused unrelated calls to merge into stale incidents (e.g. patrol officer at a second scene 70 min later) - STT: switch default model from gpt-4o-transcribe to whisper-1, which faithfully transcribes all exchanges in multi-PTT recordings; gpt-4o was silently dropping utterances, starving the correlation engine - STT: remove vocabulary from the Whisper prompt; whisper-1 echoes prompted terms into noise/silence, skewing extracted incident data; vocabulary context is now applied exclusively in the GPT extraction step (build_gpt_vocab_block) where it is used as reference only
This commit is contained in:
@@ -19,7 +19,7 @@ class Settings(BaseSettings):
|
||||
|
||||
# OpenAI (STT + intelligence)
|
||||
openai_api_key: Optional[str] = None
|
||||
stt_model: str = "gpt-4o-transcribe" # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
||||
stt_model: str = "whisper-1" # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
||||
|
||||
# Google Maps (geocoding)
|
||||
google_maps_api_key: Optional[str] = None
|
||||
|
||||
@@ -1030,6 +1030,18 @@ def _call_fits_incident(
|
||||
if dist_km > proximity_km:
|
||||
logger.info(f" fits[{inc_id}]: unit_overlap({matched_units}) but location_conflict dist={dist_km:.2f}km → unit_loc_conflict")
|
||||
return False, "unit_loc_conflict"
|
||||
elif call_embedding and idle_min >= 15:
|
||||
# Call has geocode but incident doesn't — fall back to content
|
||||
# divergence as a location proxy. Without this, stale incidents
|
||||
# that never geocoded absorb unrelated calls purely on unit
|
||||
# overlap (e.g. a patrol officer working a second scene 70 min
|
||||
# after the original call).
|
||||
inc_emb_u = inc.get("embedding")
|
||||
if inc_emb_u:
|
||||
sim = _cosine_similarity(call_embedding, inc_emb_u)
|
||||
if sim < 0.82:
|
||||
logger.info(f" fits[{inc_id}]: unit_overlap({matched_units}) but content_divergence (has_call_coords/no_inc_coords) sim={sim:.3f} → content_divergence")
|
||||
return False, "content_divergence"
|
||||
elif call_embedding and idle_min >= 15:
|
||||
# No geocode available AND old incident: use content divergence as a
|
||||
# location-proxy veto. After 15+ minutes an officer at a completely
|
||||
|
||||
@@ -40,16 +40,9 @@ async def transcribe_call(
|
||||
if not gcs_uri or not gcs_uri.startswith("gs://"):
|
||||
return None, []
|
||||
|
||||
# Load vocabulary for this system (empty list if none yet)
|
||||
vocabulary: list[str] = []
|
||||
if system_id:
|
||||
from app.internal.vocabulary_learner import get_vocabulary
|
||||
vocab_data = await get_vocabulary(system_id)
|
||||
vocabulary = vocab_data.get("vocabulary") or []
|
||||
|
||||
try:
|
||||
transcript, segments = await asyncio.to_thread(
|
||||
_sync_transcribe, gcs_uri, talkgroup_name, vocabulary
|
||||
_sync_transcribe, gcs_uri, talkgroup_name
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Transcription failed for call {call_id}: {e}")
|
||||
@@ -74,7 +67,6 @@ async def transcribe_call(
|
||||
def _sync_transcribe(
|
||||
gcs_uri: str,
|
||||
talkgroup_name: Optional[str] = None,
|
||||
vocabulary: Optional[list[str]] = None,
|
||||
) -> tuple[Optional[str], list[dict]]:
|
||||
"""Download audio from GCS and transcribe with OpenAI Whisper."""
|
||||
from google.cloud import storage as gcs
|
||||
@@ -108,13 +100,16 @@ def _sync_transcribe(
|
||||
try:
|
||||
blob.download_to_filename(tmp_path)
|
||||
|
||||
from app.internal.vocabulary_learner import build_whisper_vocab_prompt
|
||||
vocab_prefix = build_whisper_vocab_prompt(vocabulary or [])
|
||||
tg_prefix = f"Talkgroup: {talkgroup_name}. " if talkgroup_name else ""
|
||||
prompt = tg_prefix + vocab_prefix + _WHISPER_PROMPT
|
||||
tg_prefix = f"Talkgroup: {talkgroup_name}. " if talkgroup_name else ""
|
||||
# Vocabulary is intentionally excluded from the Whisper prompt.
|
||||
# whisper-1 treats the prompt as a transcription prior and echoes
|
||||
# vocabulary terms into noise/silence, polluting downstream extraction.
|
||||
# Vocabulary context is applied in the GPT extraction step instead,
|
||||
# where it is used as reference rather than a transcription prior.
|
||||
prompt = tg_prefix + _WHISPER_PROMPT
|
||||
|
||||
# Only whisper-1 supports verbose_json (per-segment timestamps + no_speech_prob).
|
||||
# Newer models (gpt-4o-transcribe, gpt-4o-mini-transcribe) only accept json/text.
|
||||
# gpt-4o-transcribe and gpt-4o-mini-transcribe only support json/text.
|
||||
use_verbose = settings.stt_model == "whisper-1"
|
||||
|
||||
openai_client = OpenAI(api_key=settings.openai_api_key)
|
||||
|
||||
Reference in New Issue
Block a user