diff --git a/drb-c2-core/app/config.py b/drb-c2-core/app/config.py index d74a1d2..aacc68e 100644 --- a/drb-c2-core/app/config.py +++ b/drb-c2-core/app/config.py @@ -26,6 +26,7 @@ class Settings(BaseSettings): correlation_window_hours: int = 2 # slow/location path: max hours since last call embedding_similarity_threshold: float = 0.93 # slow-path: requires location corroboration embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar) + embedding_cross_tg_threshold: float = 0.85 # cross-TG path: same dept + 2+ shared units location_proximity_km: float = 0.5 # radius for location-proximity matching incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls recorrelation_scan_minutes: int = 60 # re-examine orphaned calls ended within this window diff --git a/drb-c2-core/app/internal/incident_correlator.py b/drb-c2-core/app/internal/incident_correlator.py index 8ac0fb0..a91660c 100644 --- a/drb-c2-core/app/internal/incident_correlator.py +++ b/drb-c2-core/app/internal/incident_correlator.py @@ -92,6 +92,8 @@ async def correlate_call( location_coords: Optional[dict] = None, reference_time: Optional[datetime] = None, create_if_new: bool = True, + units: Optional[list[str]] = None, + vehicles: Optional[list[str]] = None, ) -> Optional[str]: """ Link call_id to an existing incident or create a new one. @@ -115,8 +117,12 @@ async def correlate_call( # Fetch call doc once — reused for disambiguation, embedding merge, unit accumulation call_doc = await fstore.doc_get("calls", call_id) or {} call_embedding: Optional[list] = call_doc.get("embedding") - call_units: list[str] = call_doc.get("units") or [] - call_vehicles: list[str] = call_doc.get("vehicles") or [] + # Prefer explicitly passed units/vehicles (per-scene, from intelligence extraction) + # over the call doc, which merges units from ALL scenes in a multi-scene recording. + # Falling back to the call doc is correct for recorrelation sweeps where we have no + # scene-level breakdown. + call_units: list[str] = units if units is not None else (call_doc.get("units") or []) + call_vehicles: list[str] = vehicles if vehicles is not None else (call_doc.get("vehicles") or []) call_severity: str = call_doc.get("severity") or "unknown" # Use passed coords first (freshly geocoded), fall back to what's on the call doc coords: Optional[dict] = location_coords or call_doc.get("location_coords") @@ -210,6 +216,42 @@ async def correlate_call( ) break + # ── 2.5. Cross-TG path: same department, overlapping units, moderate similarity ── + # + # Catches pursuits / searches that span multiple talkgroup IDs within the same + # department (e.g. dispatch → tactical → geographic channel). The fast path + # is TGID-scoped so it never links these. Two conditions together provide + # strong evidence of the same scene without needing location: + # • 2+ shared unit IDs (same officers working the same call) + # • embedding similarity >= cross-TG threshold (same subject matter) + # Requiring 2+ shared units prevents single-officer false positives. + if not matched_incident and call_embedding and incident_type and call_units and system_id: + call_unit_set = set(call_units) + best_cross_score = 0.0 + best_cross_inc: Optional[dict] = None + for inc in recent: + if inc.get("type") != incident_type: + continue + if system_id not in (inc.get("system_ids") or []): + continue + inc_units_set = set(inc.get("units") or []) + if len(call_unit_set & inc_units_set) < 2: + continue + inc_embedding = inc.get("embedding") + if not inc_embedding: + continue + sim = _cosine_similarity(call_embedding, inc_embedding) + if sim > best_cross_score: + best_cross_score = sim + best_cross_inc = inc + if best_cross_inc and best_cross_score >= settings.embedding_cross_tg_threshold: + matched_incident = best_cross_inc + logger.info( + f"Correlator cross-TG path: call {call_id} → {best_cross_inc['incident_id']} " + f"(sim={best_cross_score:.3f}, " + f"shared_units={len(call_unit_set & set(best_cross_inc.get('units') or []))})" + ) + # ── 3. Slow path: embedding similarity (time-limited, same type) ────────── # # Two tiers: diff --git a/drb-c2-core/app/internal/intelligence.py b/drb-c2-core/app/internal/intelligence.py index f674fe6..6395ef8 100644 --- a/drb-c2-core/app/internal/intelligence.py +++ b/drb-c2-core/app/internal/intelligence.py @@ -31,16 +31,18 @@ Response format — a JSON object with a "scenes" array. Each scene: units: list of unit IDs or officer numbers explicitly mentioned severity: one of "minor" | "moderate" | "major" | "unknown" resolved: true if this scene explicitly signals incident closure, false otherwise + reassignment: true if dispatch is actively pulling a unit away from their current assignment to respond to a new, different call — e.g. "Baker, can you clear and respond to...", "Adam, break from that and go to...". False if the unit is simply reporting in, updating status, or continuing their current assignment. transcript_corrected: corrected text for this scene's transmissions only, or null Rules: - location: prefer intersections > addresses > mile markers > route+town > route alone > town alone. Empty string if none. -- tags: specific, lowercase, hyphenated. Do not repeat incident_type as a tag. +- tags: describe WHAT happened, not WHERE. Specific, lowercase, hyphenated. Do not use location names, road names, talkgroup names, or place names as tags (wrong: "lower-macy's", "canvas-route-6", "route-202"; right: "suspect-search", "shoplifting", "vehicle-pursuit"). Do not repeat incident_type as a tag. - units: only identifiers explicitly mentioned, not inferred. - Do not invent details not present in the transcript. - incident_type: let the talkgroup channel be your primary signal. Use "fire" ONLY if the talkgroup is clearly a fire/rescue channel OR the transcript explicitly describes active fire, smoke, flames, or structure fire activation. Police or EMS referencing a fire scene → use "police" or "ems". When uncertain, prefer "other" over "fire". - ten_codes: interpret radio codes using the department reference provided below. Do not guess codes not listed. - resolved: true only when the scene explicitly signals "Code 4", "all clear", "10-42", "in custody", "patient transported", "fire out", "GOA", "negative contact", "scene clear". +- reassignment: only true when a unit is explicitly being pulled to a completely new call or location. A unit going en route to their first dispatch is NOT a reassignment. Routine status updates, acknowledgements, and scene updates are NOT reassignments. - transcript_corrected: fix only clear STT/vocoder errors (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if accurate. System: {system_id} @@ -130,6 +132,7 @@ async def extract_scenes( units: list[str] = scene.get("units") or [] severity: str = scene.get("severity") or "unknown" resolved: bool = bool(scene.get("resolved", False)) + reassignment: bool = bool(scene.get("reassignment", False)) transcript_corrected: Optional[str]= scene.get("transcript_corrected") or None segment_indices: Optional[list] = scene.get("segment_indices") @@ -160,6 +163,7 @@ async def extract_scenes( "units": units, "severity": severity, "resolved": resolved, + "reassignment": reassignment, "transcript_corrected": transcript_corrected, "segment_indices": segment_indices, "embedding": embedding, diff --git a/drb-c2-core/app/routers/upload.py b/drb-c2-core/app/routers/upload.py index f43a75a..566aa22 100644 --- a/drb-c2-core/app/routers/upload.py +++ b/drb-c2-core/app/routers/upload.py @@ -110,6 +110,9 @@ async def _run_extraction_pipeline( all_tags: list[str] = [] for scene in scenes: all_tags.extend(scene["tags"]) + # When dispatch is pulling a unit to a NEW call (reassignment), suppress unit + # overlap so the new scene doesn't chain into the unit's previous incident. + corr_units = [] if scene.get("reassignment") else scene.get("units") incident_id = await incident_correlator.correlate_call( call_id=call_id, node_id=node_id, @@ -120,6 +123,8 @@ async def _run_extraction_pipeline( incident_type=scene["incident_type"], location=scene["location"], location_coords=scene["location_coords"], + units=corr_units, + vehicles=scene.get("vehicles"), ) if incident_id and incident_id not in incident_ids: incident_ids.append(incident_id) @@ -206,6 +211,7 @@ async def _run_intelligence_pipeline( if flags["correlation_enabled"]: for scene in scenes: all_tags.extend(scene["tags"]) + corr_units = [] if scene.get("reassignment") else scene.get("units") incident_id = await incident_correlator.correlate_call( call_id=call_id, node_id=node_id, @@ -216,6 +222,8 @@ async def _run_intelligence_pipeline( incident_type=scene["incident_type"], location=scene["location"], location_coords=scene["location_coords"], + units=corr_units, + vehicles=scene.get("vehicles"), ) if incident_id and incident_id not in incident_ids: incident_ids.append(incident_id)