Correlation updates

This commit is contained in:
Logan
2026-05-23 22:55:50 -04:00
parent 5a18a66d77
commit 6397e24035
+118 -25
View File
@@ -281,6 +281,7 @@ async def correlate_call(
if _call_fits_incident( if _call_fits_incident(
candidate, call_units, call_vehicles, coords, candidate, call_units, call_vehicles, coords,
settings.location_proximity_km, is_dispatch=is_dispatch, settings.location_proximity_km, is_dispatch=is_dispatch,
call_embedding=call_embedding, now=now,
): ):
matched_incident = candidate matched_incident = candidate
corr_debug = { corr_debug = {
@@ -297,7 +298,7 @@ async def correlate_call(
) )
elif len(tg_recent) > 1: elif len(tg_recent) > 1:
candidate = _disambiguate( candidate = _disambiguate(
tg_recent, call_units, call_vehicles, coords, call_embedding tg_recent, call_units, call_vehicles, coords, call_embedding, now=now
) )
# Disambiguate picks the best candidate, but still verify the call # Disambiguate picks the best candidate, but still verify the call
# actually fits before committing — a new unrelated call on a busy # actually fits before committing — a new unrelated call on a busy
@@ -305,6 +306,7 @@ async def correlate_call(
if _call_fits_incident( if _call_fits_incident(
candidate, call_units, call_vehicles, coords, candidate, call_units, call_vehicles, coords,
settings.location_proximity_km, is_dispatch=is_dispatch, settings.location_proximity_km, is_dispatch=is_dispatch,
call_embedding=call_embedding, now=now,
): ):
matched_incident = candidate matched_incident = candidate
corr_debug = { corr_debug = {
@@ -346,7 +348,25 @@ async def correlate_call(
and inc.get("updated_at", "") > best_unit_inc.get("updated_at", "") and inc.get("updated_at", "") > best_unit_inc.get("updated_at", "")
for inc in all_active for inc in all_active
) )
if not reassigned_away: # Location conflict check: same unit at a clearly different location
# means the officer has moved to a new scene (e.g. dispatch sent
# them to a second call without an explicit reassignment signal).
location_conflict = False
if coords and not reassigned_away:
inc_coords_uc = best_unit_inc.get("location_coords")
if inc_coords_uc:
uc_dist = _haversine_km(
coords["lat"], coords["lng"],
inc_coords_uc["lat"], inc_coords_uc["lng"],
)
if uc_dist > settings.location_proximity_km:
location_conflict = True
logger.info(
f"Correlator unit-continuity skipped: call {call_id}"
f"unit match but location conflict ({uc_dist:.2f}km) with "
f"{best_unit_inc['incident_id']}"
)
if not reassigned_away and not location_conflict:
matched_incident = best_unit_inc matched_incident = best_unit_inc
corr_debug = { corr_debug = {
"corr_path": "unit-continuity", "corr_path": "unit-continuity",
@@ -611,11 +631,20 @@ def _disambiguate(
call_vehicles: list[str], call_vehicles: list[str],
call_coords: Optional[dict], call_coords: Optional[dict],
call_embedding: Optional[list], call_embedding: Optional[list],
now: Optional[datetime] = None,
) -> dict: ) -> dict:
""" """
Score each talkgroup-matched candidate and return the best. Score each talkgroup-matched candidate and return the best.
Signals (descending weight): unit overlap, vehicle overlap,
location proximity, embedding similarity. Signals (descending weight):
1. Time recency — how long ago was this incident last active on this TGID?
Very recent activity is the strongest context signal on a
shared dispatch channel.
2. Unit overlap — same officer = same call (strong positive).
3. Vehicle overlap
4. Location proximity
5. Embedding similarity (content tiebreaker)
Ties broken by most-recently-updated. Ties broken by most-recently-updated.
""" """
best = candidates[0] best = candidates[0]
@@ -624,6 +653,15 @@ def _disambiguate(
for inc in candidates: for inc in candidates:
score = 0.0 score = 0.0
# Time recency: recently-active incidents are much more likely targets
# for a follow-up transmission than idle ones.
if now is not None:
idle_min = _incident_idle_minutes(inc, now)
if idle_min < 2: score += 8.0
elif idle_min < 5: score += 4.0
elif idle_min < 15: score += 1.0
# > 15 min: no bonus — older incidents compete on content/units only
inc_units = set(inc.get("units") or []) inc_units = set(inc.get("units") or [])
if inc_units and call_units and any(u in inc_units for u in call_units): if inc_units and call_units and any(u in inc_units for u in call_units):
score += 10.0 score += 10.0
@@ -661,37 +699,87 @@ def _call_fits_incident(
call_coords: Optional[dict], call_coords: Optional[dict],
proximity_km: float, proximity_km: float,
is_dispatch: bool = False, is_dispatch: bool = False,
call_embedding: Optional[list] = None,
now: Optional[datetime] = None,
) -> bool: ) -> bool:
""" """
Return True if this call plausibly belongs to the given incident. Return True if this call plausibly belongs to the given incident.
Positive signals (unit/vehicle overlap, location proximity) are always Evaluation order for dispatch channels (is_dispatch=True):
respected. The fallback — when there is no evidence either way — depends ─────────────────────────────────────────────────────────
on channel type: 1. Unit overlap
Same officer = same call. On dispatch channels, also run a location
conflict guard: if both sides carry geocoded coords and they differ
significantly, the officer has moved to a new scene and the unit match
is a false positive.
When the call has NO geocoded coordinates AND the incident is old
(≥ 15 min), use content divergence as a location proxy: an officer at
a genuinely different scene will be talking about clearly different
things. For recent incidents (< 15 min) we skip this proxy — the
officer may simply be giving an update without mentioning the address.
• Tactical / working channel (is_dispatch=False): default True (link). 2. Vehicle overlap → True
A working channel is dedicated to one scene; no evidence of separation
means they're probably the same call.
• Dispatch channel (is_dispatch=True): default False (create new). 3. Location proximity
A dispatch channel carries every incident for a department. Linking Both sides geocoded and close → True.
without positive evidence would merge unrelated incidents whenever Both sides geocoded and far apart (no other positive signal) → False.
geocoding fails (which is common for partial street addresses).
4. No positive signals fired → dispatch fallback
a. Conversational continuity: idle < 2 min → True.
A call arriving within 2 minutes of the last incident activity almost
certainly belongs to the same dispatch thread. "Baker, head over
there too" or "copy that" carries no incident-specific vocabulary but
is unambiguously a response to what was just said. We do not require
embedding similarity here — embeddings capture word meaning, not
conversational context, and short operational messages will always
have lower similarity than the incident's accumulated content.
b. Older incident, no positive signals → False.
A shared dispatch channel must not absorb calls by default.
Tactical / working channel (is_dispatch=False):
────────────────────────────────────────────────
Channel is dedicated to one scene. No evidence of separation ≈ same call.
Default → True.
Thin calls (no units/vehicles/coords) never reach this function — Thin calls (no units/vehicles/coords) never reach this function —
they're intercepted by the is_thin_call branch above. they are intercepted before it in correlate_call.
""" """
# Unit overlap is the strongest positive signal: same officers = same call. idle_min = _incident_idle_minutes(inc, now) if now is not None else 9999.0
# ── 1. Unit overlap ───────────────────────────────────────────────────────
inc_units = set(inc.get("units") or []) inc_units = set(inc.get("units") or [])
if inc_units and call_units and any(u in inc_units for u in call_units): if inc_units and call_units and any(u in inc_units for u in call_units):
if is_dispatch:
if call_coords:
# Hard location conflict: geocoded on both sides and clearly different.
inc_coords_u = inc.get("location_coords")
if inc_coords_u:
dist_km = _haversine_km(
call_coords["lat"], call_coords["lng"],
inc_coords_u["lat"], inc_coords_u["lng"],
)
if dist_km > proximity_km:
return False
elif call_embedding and idle_min >= 15:
# No geocode available AND old incident: use content divergence as a
# location-proxy veto. After 15+ minutes an officer at a completely
# different scene will be discussing clearly different content.
# Skip this for recent incidents — an officer updating on the same
# scene without re-stating the address is normal and their update
# won't share much vocabulary with the original dispatch.
inc_emb_u = inc.get("embedding")
if inc_emb_u:
sim = _cosine_similarity(call_embedding, inc_emb_u)
if sim < 0.82:
return False
return True return True
# Vehicle overlap: same vehicle description across calls → same scene. # ── 2. Vehicle overlap ────────────────────────────────────────────────────
inc_vehicles = set(inc.get("vehicles") or []) inc_vehicles = set(inc.get("vehicles") or [])
if inc_vehicles and call_vehicles and any(v in inc_vehicles for v in call_vehicles): if inc_vehicles and call_vehicles and any(v in inc_vehicles for v in call_vehicles):
return True return True
# When both sides have geocoded coordinates, distance is the tiebreaker. # ── 3. Location proximity ─────────────────────────────────────────────────
inc_coords = inc.get("location_coords") inc_coords = inc.get("location_coords")
if call_coords and inc_coords: if call_coords and inc_coords:
dist_km = _haversine_km( dist_km = _haversine_km(
@@ -700,15 +788,20 @@ def _call_fits_incident(
) )
if dist_km <= proximity_km: if dist_km <= proximity_km:
return True return True
# Different location AND no unit/vehicle overlap → different incident. # Conflicting location, no other positive signal → different scene.
return False return False
# No geocoded location on one or both sides. # ── 4. No positive signals ────────────────────────────────────────────────
# On a tactical/working channel, default to linking (conservative — channel if is_dispatch:
# is dedicated to one scene so no evidence of separation ≈ same scene). # Conversational continuity: the call arrived during the same conversation
# On a dispatch channel, require positive evidence — without it we risk # thread (< 2 min since last incident activity) with no contradicting evidence.
# pulling every ungeocoded call in a shift into the same incident. if idle_min < 2:
return not is_dispatch return True
# Shared dispatch channel — do not link without at least one positive signal.
return False
# Tactical channel: one scene per channel → link by default.
return True
async def _update_incident( async def _update_incident(