# app/internal/incident_correlator.py
- *`correlate_call`* — added units and vehicles optional params; when provided (per-scene from intelligence extraction), they take priority over the merged call-document values, preventing multi-scene unit contamination
- *Cross-TGID correlation path (2.5)* — *new path between location and slow paths*: when a call shares 2+ unit IDs with a recent same-system, same-type incident AND embedding similarity ≥ 0.85, it links them — catches multi-talkgroup pursuits like the bicycle search that split across dispatch/tactical/geographic channels
# `app/internal/intelligence.py`
- *`reassignment` field* — added to the GPT-4o-mini prompt schema and rules; `true` when dispatch is actively pulling a unit to a new, different call (not a status update or en route acknowledgement); returned in every processed scene dict
- *Tag location rule* — added explicit instruction to the prompt: tags must describe what happened, not where; place names, road names, and talkgroup names are explicitly forbidden as tags
# `app/routers/upload.py`
- Both scene correlation call sites (`_run_extraction_pipeline` and `_run_intelligence_pipeline`) now pass `units=corr_units` where `corr_units = [] if scene.get("reassignment") else scene.get("units") `— suppresses unit overlap matching when a unit is being reassigned to a new call, preventing chaining into their previous incident
- Both sites also pass `vehicles=scene.get("vehicles")` (per-scene vehicles, from the multi-scene units fix)
# `app/config.py`
- `embedding_cross_tg_threshold: float = 0.85` — threshold for the new cross-TGID path
This commit is contained in:
@@ -26,6 +26,7 @@ class Settings(BaseSettings):
|
|||||||
correlation_window_hours: int = 2 # slow/location path: max hours since last call
|
correlation_window_hours: int = 2 # slow/location path: max hours since last call
|
||||||
embedding_similarity_threshold: float = 0.93 # slow-path: requires location corroboration
|
embedding_similarity_threshold: float = 0.93 # slow-path: requires location corroboration
|
||||||
embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar)
|
embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar)
|
||||||
|
embedding_cross_tg_threshold: float = 0.85 # cross-TG path: same dept + 2+ shared units
|
||||||
location_proximity_km: float = 0.5 # radius for location-proximity matching
|
location_proximity_km: float = 0.5 # radius for location-proximity matching
|
||||||
incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls
|
incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls
|
||||||
recorrelation_scan_minutes: int = 60 # re-examine orphaned calls ended within this window
|
recorrelation_scan_minutes: int = 60 # re-examine orphaned calls ended within this window
|
||||||
|
|||||||
@@ -92,6 +92,8 @@ async def correlate_call(
|
|||||||
location_coords: Optional[dict] = None,
|
location_coords: Optional[dict] = None,
|
||||||
reference_time: Optional[datetime] = None,
|
reference_time: Optional[datetime] = None,
|
||||||
create_if_new: bool = True,
|
create_if_new: bool = True,
|
||||||
|
units: Optional[list[str]] = None,
|
||||||
|
vehicles: Optional[list[str]] = None,
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Link call_id to an existing incident or create a new one.
|
Link call_id to an existing incident or create a new one.
|
||||||
@@ -115,8 +117,12 @@ async def correlate_call(
|
|||||||
# Fetch call doc once — reused for disambiguation, embedding merge, unit accumulation
|
# Fetch call doc once — reused for disambiguation, embedding merge, unit accumulation
|
||||||
call_doc = await fstore.doc_get("calls", call_id) or {}
|
call_doc = await fstore.doc_get("calls", call_id) or {}
|
||||||
call_embedding: Optional[list] = call_doc.get("embedding")
|
call_embedding: Optional[list] = call_doc.get("embedding")
|
||||||
call_units: list[str] = call_doc.get("units") or []
|
# Prefer explicitly passed units/vehicles (per-scene, from intelligence extraction)
|
||||||
call_vehicles: list[str] = call_doc.get("vehicles") or []
|
# over the call doc, which merges units from ALL scenes in a multi-scene recording.
|
||||||
|
# Falling back to the call doc is correct for recorrelation sweeps where we have no
|
||||||
|
# scene-level breakdown.
|
||||||
|
call_units: list[str] = units if units is not None else (call_doc.get("units") or [])
|
||||||
|
call_vehicles: list[str] = vehicles if vehicles is not None else (call_doc.get("vehicles") or [])
|
||||||
call_severity: str = call_doc.get("severity") or "unknown"
|
call_severity: str = call_doc.get("severity") or "unknown"
|
||||||
# Use passed coords first (freshly geocoded), fall back to what's on the call doc
|
# Use passed coords first (freshly geocoded), fall back to what's on the call doc
|
||||||
coords: Optional[dict] = location_coords or call_doc.get("location_coords")
|
coords: Optional[dict] = location_coords or call_doc.get("location_coords")
|
||||||
@@ -210,6 +216,42 @@ async def correlate_call(
|
|||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# ── 2.5. Cross-TG path: same department, overlapping units, moderate similarity ──
|
||||||
|
#
|
||||||
|
# Catches pursuits / searches that span multiple talkgroup IDs within the same
|
||||||
|
# department (e.g. dispatch → tactical → geographic channel). The fast path
|
||||||
|
# is TGID-scoped so it never links these. Two conditions together provide
|
||||||
|
# strong evidence of the same scene without needing location:
|
||||||
|
# • 2+ shared unit IDs (same officers working the same call)
|
||||||
|
# • embedding similarity >= cross-TG threshold (same subject matter)
|
||||||
|
# Requiring 2+ shared units prevents single-officer false positives.
|
||||||
|
if not matched_incident and call_embedding and incident_type and call_units and system_id:
|
||||||
|
call_unit_set = set(call_units)
|
||||||
|
best_cross_score = 0.0
|
||||||
|
best_cross_inc: Optional[dict] = None
|
||||||
|
for inc in recent:
|
||||||
|
if inc.get("type") != incident_type:
|
||||||
|
continue
|
||||||
|
if system_id not in (inc.get("system_ids") or []):
|
||||||
|
continue
|
||||||
|
inc_units_set = set(inc.get("units") or [])
|
||||||
|
if len(call_unit_set & inc_units_set) < 2:
|
||||||
|
continue
|
||||||
|
inc_embedding = inc.get("embedding")
|
||||||
|
if not inc_embedding:
|
||||||
|
continue
|
||||||
|
sim = _cosine_similarity(call_embedding, inc_embedding)
|
||||||
|
if sim > best_cross_score:
|
||||||
|
best_cross_score = sim
|
||||||
|
best_cross_inc = inc
|
||||||
|
if best_cross_inc and best_cross_score >= settings.embedding_cross_tg_threshold:
|
||||||
|
matched_incident = best_cross_inc
|
||||||
|
logger.info(
|
||||||
|
f"Correlator cross-TG path: call {call_id} → {best_cross_inc['incident_id']} "
|
||||||
|
f"(sim={best_cross_score:.3f}, "
|
||||||
|
f"shared_units={len(call_unit_set & set(best_cross_inc.get('units') or []))})"
|
||||||
|
)
|
||||||
|
|
||||||
# ── 3. Slow path: embedding similarity (time-limited, same type) ──────────
|
# ── 3. Slow path: embedding similarity (time-limited, same type) ──────────
|
||||||
#
|
#
|
||||||
# Two tiers:
|
# Two tiers:
|
||||||
|
|||||||
@@ -31,16 +31,18 @@ Response format — a JSON object with a "scenes" array. Each scene:
|
|||||||
units: list of unit IDs or officer numbers explicitly mentioned
|
units: list of unit IDs or officer numbers explicitly mentioned
|
||||||
severity: one of "minor" | "moderate" | "major" | "unknown"
|
severity: one of "minor" | "moderate" | "major" | "unknown"
|
||||||
resolved: true if this scene explicitly signals incident closure, false otherwise
|
resolved: true if this scene explicitly signals incident closure, false otherwise
|
||||||
|
reassignment: true if dispatch is actively pulling a unit away from their current assignment to respond to a new, different call — e.g. "Baker, can you clear and respond to...", "Adam, break from that and go to...". False if the unit is simply reporting in, updating status, or continuing their current assignment.
|
||||||
transcript_corrected: corrected text for this scene's transmissions only, or null
|
transcript_corrected: corrected text for this scene's transmissions only, or null
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- location: prefer intersections > addresses > mile markers > route+town > route alone > town alone. Empty string if none.
|
- location: prefer intersections > addresses > mile markers > route+town > route alone > town alone. Empty string if none.
|
||||||
- tags: specific, lowercase, hyphenated. Do not repeat incident_type as a tag.
|
- tags: describe WHAT happened, not WHERE. Specific, lowercase, hyphenated. Do not use location names, road names, talkgroup names, or place names as tags (wrong: "lower-macy's", "canvas-route-6", "route-202"; right: "suspect-search", "shoplifting", "vehicle-pursuit"). Do not repeat incident_type as a tag.
|
||||||
- units: only identifiers explicitly mentioned, not inferred.
|
- units: only identifiers explicitly mentioned, not inferred.
|
||||||
- Do not invent details not present in the transcript.
|
- Do not invent details not present in the transcript.
|
||||||
- incident_type: let the talkgroup channel be your primary signal. Use "fire" ONLY if the talkgroup is clearly a fire/rescue channel OR the transcript explicitly describes active fire, smoke, flames, or structure fire activation. Police or EMS referencing a fire scene → use "police" or "ems". When uncertain, prefer "other" over "fire".
|
- incident_type: let the talkgroup channel be your primary signal. Use "fire" ONLY if the talkgroup is clearly a fire/rescue channel OR the transcript explicitly describes active fire, smoke, flames, or structure fire activation. Police or EMS referencing a fire scene → use "police" or "ems". When uncertain, prefer "other" over "fire".
|
||||||
- ten_codes: interpret radio codes using the department reference provided below. Do not guess codes not listed.
|
- ten_codes: interpret radio codes using the department reference provided below. Do not guess codes not listed.
|
||||||
- resolved: true only when the scene explicitly signals "Code 4", "all clear", "10-42", "in custody", "patient transported", "fire out", "GOA", "negative contact", "scene clear".
|
- resolved: true only when the scene explicitly signals "Code 4", "all clear", "10-42", "in custody", "patient transported", "fire out", "GOA", "negative contact", "scene clear".
|
||||||
|
- reassignment: only true when a unit is explicitly being pulled to a completely new call or location. A unit going en route to their first dispatch is NOT a reassignment. Routine status updates, acknowledgements, and scene updates are NOT reassignments.
|
||||||
- transcript_corrected: fix only clear STT/vocoder errors (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if accurate.
|
- transcript_corrected: fix only clear STT/vocoder errors (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if accurate.
|
||||||
|
|
||||||
System: {system_id}
|
System: {system_id}
|
||||||
@@ -130,6 +132,7 @@ async def extract_scenes(
|
|||||||
units: list[str] = scene.get("units") or []
|
units: list[str] = scene.get("units") or []
|
||||||
severity: str = scene.get("severity") or "unknown"
|
severity: str = scene.get("severity") or "unknown"
|
||||||
resolved: bool = bool(scene.get("resolved", False))
|
resolved: bool = bool(scene.get("resolved", False))
|
||||||
|
reassignment: bool = bool(scene.get("reassignment", False))
|
||||||
transcript_corrected: Optional[str]= scene.get("transcript_corrected") or None
|
transcript_corrected: Optional[str]= scene.get("transcript_corrected") or None
|
||||||
segment_indices: Optional[list] = scene.get("segment_indices")
|
segment_indices: Optional[list] = scene.get("segment_indices")
|
||||||
|
|
||||||
@@ -160,6 +163,7 @@ async def extract_scenes(
|
|||||||
"units": units,
|
"units": units,
|
||||||
"severity": severity,
|
"severity": severity,
|
||||||
"resolved": resolved,
|
"resolved": resolved,
|
||||||
|
"reassignment": reassignment,
|
||||||
"transcript_corrected": transcript_corrected,
|
"transcript_corrected": transcript_corrected,
|
||||||
"segment_indices": segment_indices,
|
"segment_indices": segment_indices,
|
||||||
"embedding": embedding,
|
"embedding": embedding,
|
||||||
|
|||||||
@@ -110,6 +110,9 @@ async def _run_extraction_pipeline(
|
|||||||
all_tags: list[str] = []
|
all_tags: list[str] = []
|
||||||
for scene in scenes:
|
for scene in scenes:
|
||||||
all_tags.extend(scene["tags"])
|
all_tags.extend(scene["tags"])
|
||||||
|
# When dispatch is pulling a unit to a NEW call (reassignment), suppress unit
|
||||||
|
# overlap so the new scene doesn't chain into the unit's previous incident.
|
||||||
|
corr_units = [] if scene.get("reassignment") else scene.get("units")
|
||||||
incident_id = await incident_correlator.correlate_call(
|
incident_id = await incident_correlator.correlate_call(
|
||||||
call_id=call_id,
|
call_id=call_id,
|
||||||
node_id=node_id,
|
node_id=node_id,
|
||||||
@@ -120,6 +123,8 @@ async def _run_extraction_pipeline(
|
|||||||
incident_type=scene["incident_type"],
|
incident_type=scene["incident_type"],
|
||||||
location=scene["location"],
|
location=scene["location"],
|
||||||
location_coords=scene["location_coords"],
|
location_coords=scene["location_coords"],
|
||||||
|
units=corr_units,
|
||||||
|
vehicles=scene.get("vehicles"),
|
||||||
)
|
)
|
||||||
if incident_id and incident_id not in incident_ids:
|
if incident_id and incident_id not in incident_ids:
|
||||||
incident_ids.append(incident_id)
|
incident_ids.append(incident_id)
|
||||||
@@ -206,6 +211,7 @@ async def _run_intelligence_pipeline(
|
|||||||
if flags["correlation_enabled"]:
|
if flags["correlation_enabled"]:
|
||||||
for scene in scenes:
|
for scene in scenes:
|
||||||
all_tags.extend(scene["tags"])
|
all_tags.extend(scene["tags"])
|
||||||
|
corr_units = [] if scene.get("reassignment") else scene.get("units")
|
||||||
incident_id = await incident_correlator.correlate_call(
|
incident_id = await incident_correlator.correlate_call(
|
||||||
call_id=call_id,
|
call_id=call_id,
|
||||||
node_id=node_id,
|
node_id=node_id,
|
||||||
@@ -216,6 +222,8 @@ async def _run_intelligence_pipeline(
|
|||||||
incident_type=scene["incident_type"],
|
incident_type=scene["incident_type"],
|
||||||
location=scene["location"],
|
location=scene["location"],
|
||||||
location_coords=scene["location_coords"],
|
location_coords=scene["location_coords"],
|
||||||
|
units=corr_units,
|
||||||
|
vehicles=scene.get("vehicles"),
|
||||||
)
|
)
|
||||||
if incident_id and incident_id not in incident_ids:
|
if incident_id and incident_id not in incident_ids:
|
||||||
incident_ids.append(incident_id)
|
incident_ids.append(incident_id)
|
||||||
|
|||||||
Reference in New Issue
Block a user