diff --git a/drb-c2-core/app/internal/incident_correlator.py b/drb-c2-core/app/internal/incident_correlator.py index c873129..29974d5 100644 --- a/drb-c2-core/app/internal/incident_correlator.py +++ b/drb-c2-core/app/internal/incident_correlator.py @@ -281,6 +281,7 @@ async def correlate_call( if _call_fits_incident( candidate, call_units, call_vehicles, coords, settings.location_proximity_km, is_dispatch=is_dispatch, + call_embedding=call_embedding, now=now, ): matched_incident = candidate corr_debug = { @@ -297,7 +298,7 @@ async def correlate_call( ) elif len(tg_recent) > 1: candidate = _disambiguate( - tg_recent, call_units, call_vehicles, coords, call_embedding + tg_recent, call_units, call_vehicles, coords, call_embedding, now=now ) # Disambiguate picks the best candidate, but still verify the call # actually fits before committing — a new unrelated call on a busy @@ -305,6 +306,7 @@ async def correlate_call( if _call_fits_incident( candidate, call_units, call_vehicles, coords, settings.location_proximity_km, is_dispatch=is_dispatch, + call_embedding=call_embedding, now=now, ): matched_incident = candidate corr_debug = { @@ -346,7 +348,25 @@ async def correlate_call( and inc.get("updated_at", "") > best_unit_inc.get("updated_at", "") for inc in all_active ) - if not reassigned_away: + # Location conflict check: same unit at a clearly different location + # means the officer has moved to a new scene (e.g. dispatch sent + # them to a second call without an explicit reassignment signal). + location_conflict = False + if coords and not reassigned_away: + inc_coords_uc = best_unit_inc.get("location_coords") + if inc_coords_uc: + uc_dist = _haversine_km( + coords["lat"], coords["lng"], + inc_coords_uc["lat"], inc_coords_uc["lng"], + ) + if uc_dist > settings.location_proximity_km: + location_conflict = True + logger.info( + f"Correlator unit-continuity skipped: call {call_id} — " + f"unit match but location conflict ({uc_dist:.2f}km) with " + f"{best_unit_inc['incident_id']}" + ) + if not reassigned_away and not location_conflict: matched_incident = best_unit_inc corr_debug = { "corr_path": "unit-continuity", @@ -611,11 +631,20 @@ def _disambiguate( call_vehicles: list[str], call_coords: Optional[dict], call_embedding: Optional[list], + now: Optional[datetime] = None, ) -> dict: """ Score each talkgroup-matched candidate and return the best. - Signals (descending weight): unit overlap, vehicle overlap, - location proximity, embedding similarity. + + Signals (descending weight): + 1. Time recency — how long ago was this incident last active on this TGID? + Very recent activity is the strongest context signal on a + shared dispatch channel. + 2. Unit overlap — same officer = same call (strong positive). + 3. Vehicle overlap + 4. Location proximity + 5. Embedding similarity (content tiebreaker) + Ties broken by most-recently-updated. """ best = candidates[0] @@ -624,6 +653,15 @@ def _disambiguate( for inc in candidates: score = 0.0 + # Time recency: recently-active incidents are much more likely targets + # for a follow-up transmission than idle ones. + if now is not None: + idle_min = _incident_idle_minutes(inc, now) + if idle_min < 2: score += 8.0 + elif idle_min < 5: score += 4.0 + elif idle_min < 15: score += 1.0 + # > 15 min: no bonus — older incidents compete on content/units only + inc_units = set(inc.get("units") or []) if inc_units and call_units and any(u in inc_units for u in call_units): score += 10.0 @@ -661,37 +699,87 @@ def _call_fits_incident( call_coords: Optional[dict], proximity_km: float, is_dispatch: bool = False, + call_embedding: Optional[list] = None, + now: Optional[datetime] = None, ) -> bool: """ Return True if this call plausibly belongs to the given incident. - Positive signals (unit/vehicle overlap, location proximity) are always - respected. The fallback — when there is no evidence either way — depends - on channel type: + Evaluation order for dispatch channels (is_dispatch=True): + ───────────────────────────────────────────────────────── + 1. Unit overlap + Same officer = same call. On dispatch channels, also run a location + conflict guard: if both sides carry geocoded coords and they differ + significantly, the officer has moved to a new scene and the unit match + is a false positive. + When the call has NO geocoded coordinates AND the incident is old + (≥ 15 min), use content divergence as a location proxy: an officer at + a genuinely different scene will be talking about clearly different + things. For recent incidents (< 15 min) we skip this proxy — the + officer may simply be giving an update without mentioning the address. - • Tactical / working channel (is_dispatch=False): default True (link). - A working channel is dedicated to one scene; no evidence of separation - means they're probably the same call. + 2. Vehicle overlap → True - • Dispatch channel (is_dispatch=True): default False (create new). - A dispatch channel carries every incident for a department. Linking - without positive evidence would merge unrelated incidents whenever - geocoding fails (which is common for partial street addresses). + 3. Location proximity + Both sides geocoded and close → True. + Both sides geocoded and far apart (no other positive signal) → False. + + 4. No positive signals fired → dispatch fallback + a. Conversational continuity: idle < 2 min → True. + A call arriving within 2 minutes of the last incident activity almost + certainly belongs to the same dispatch thread. "Baker, head over + there too" or "copy that" carries no incident-specific vocabulary but + is unambiguously a response to what was just said. We do not require + embedding similarity here — embeddings capture word meaning, not + conversational context, and short operational messages will always + have lower similarity than the incident's accumulated content. + b. Older incident, no positive signals → False. + A shared dispatch channel must not absorb calls by default. + + Tactical / working channel (is_dispatch=False): + ──────────────────────────────────────────────── + Channel is dedicated to one scene. No evidence of separation ≈ same call. + Default → True. Thin calls (no units/vehicles/coords) never reach this function — - they're intercepted by the is_thin_call branch above. + they are intercepted before it in correlate_call. """ - # Unit overlap is the strongest positive signal: same officers = same call. + idle_min = _incident_idle_minutes(inc, now) if now is not None else 9999.0 + + # ── 1. Unit overlap ─────────────────────────────────────────────────────── inc_units = set(inc.get("units") or []) if inc_units and call_units and any(u in inc_units for u in call_units): + if is_dispatch: + if call_coords: + # Hard location conflict: geocoded on both sides and clearly different. + inc_coords_u = inc.get("location_coords") + if inc_coords_u: + dist_km = _haversine_km( + call_coords["lat"], call_coords["lng"], + inc_coords_u["lat"], inc_coords_u["lng"], + ) + if dist_km > proximity_km: + return False + elif call_embedding and idle_min >= 15: + # No geocode available AND old incident: use content divergence as a + # location-proxy veto. After 15+ minutes an officer at a completely + # different scene will be discussing clearly different content. + # Skip this for recent incidents — an officer updating on the same + # scene without re-stating the address is normal and their update + # won't share much vocabulary with the original dispatch. + inc_emb_u = inc.get("embedding") + if inc_emb_u: + sim = _cosine_similarity(call_embedding, inc_emb_u) + if sim < 0.82: + return False return True - # Vehicle overlap: same vehicle description across calls → same scene. + # ── 2. Vehicle overlap ──────────────────────────────────────────────────── inc_vehicles = set(inc.get("vehicles") or []) if inc_vehicles and call_vehicles and any(v in inc_vehicles for v in call_vehicles): return True - # When both sides have geocoded coordinates, distance is the tiebreaker. + # ── 3. Location proximity ───────────────────────────────────────────────── inc_coords = inc.get("location_coords") if call_coords and inc_coords: dist_km = _haversine_km( @@ -700,15 +788,20 @@ def _call_fits_incident( ) if dist_km <= proximity_km: return True - # Different location AND no unit/vehicle overlap → different incident. + # Conflicting location, no other positive signal → different scene. return False - # No geocoded location on one or both sides. - # On a tactical/working channel, default to linking (conservative — channel - # is dedicated to one scene so no evidence of separation ≈ same scene). - # On a dispatch channel, require positive evidence — without it we risk - # pulling every ungeocoded call in a shift into the same incident. - return not is_dispatch + # ── 4. No positive signals ──────────────────────────────────────────────── + if is_dispatch: + # Conversational continuity: the call arrived during the same conversation + # thread (< 2 min since last incident activity) with no contradicting evidence. + if idle_min < 2: + return True + # Shared dispatch channel — do not link without at least one positive signal. + return False + + # Tactical channel: one scene per channel → link by default. + return True async def _update_incident(