From e704df1a62eb9ea7a643ab1cba826b93b9e15ee4 Mon Sep 17 00:00:00 2001
From: Logan <Logan@simplestepsolutions.com>
Date: Mon, 4 May 2026 01:33:03 -0400
Subject: [PATCH] =?UTF-8?q?#=20`app/internal/incident=5Fcorrelator.py`=20-?=
 =?UTF-8?q?=20*`correlate=5Fcall`*=20=E2=80=94=20added=20units=20and=20veh?=
 =?UTF-8?q?icles=20optional=20params;=20when=20provided=20(per-scene=20fro?=
 =?UTF-8?q?m=20intelligence=20extraction),=20they=20take=20priority=20over?=
 =?UTF-8?q?=20the=20merged=20call-document=20values,=20preventing=20multi-?=
 =?UTF-8?q?scene=20unit=20contamination=20-=20*Cross-TGID=20correlation=20?=
 =?UTF-8?q?path=20(2.5)*=20=E2=80=94=20*new=20path=20between=20location=20?=
 =?UTF-8?q?and=20slow=20paths*:=20when=20a=20call=20shares=202+=20unit=20I?=
 =?UTF-8?q?Ds=20with=20a=20recent=20same-system,=20same-type=20incident=20?=
 =?UTF-8?q?AND=20embedding=20similarity=20=E2=89=A5=200.85,=20it=20links?=
 =?UTF-8?q?=20them=20=E2=80=94=20catches=20multi-talkgroup=20pursuits=20li?=
 =?UTF-8?q?ke=20the=20bicycle=20search=20that=20split=20across=20dispatch/?=
 =?UTF-8?q?tactical/geographic=20channels=20#=20`app/internal/intelligence?=
 =?UTF-8?q?.py`=20-=20*`reassignment`=20field*=20=E2=80=94=20added=20to=20?=
 =?UTF-8?q?the=20GPT-4o-mini=20prompt=20schema=20and=20rules;=20`true`=20w?=
 =?UTF-8?q?hen=20dispatch=20is=20actively=20pulling=20a=20unit=20to=20a=20?=
 =?UTF-8?q?new,=20different=20call=20(not=20a=20status=20update=20or=20en?=
 =?UTF-8?q?=20route=20acknowledgement);=20returned=20in=20every=20processe?=
 =?UTF-8?q?d=20scene=20dict=20-=20*Tag=20location=20rule*=20=E2=80=94=20ad?=
 =?UTF-8?q?ded=20explicit=20instruction=20to=20the=20prompt:=20tags=20must?=
 =?UTF-8?q?=20describe=20what=20happened,=20not=20where;=20place=20names,?=
 =?UTF-8?q?=20road=20names,=20and=20talkgroup=20names=20are=20explicitly?=
 =?UTF-8?q?=20forbidden=20as=20tags=20#=20`app/routers/upload.py`=20-=20Bo?=
 =?UTF-8?q?th=20scene=20correlation=20call=20sites=20(`=5Frun=5Fextraction?=
 =?UTF-8?q?=5Fpipeline`=20and=20`=5Frun=5Fintelligence=5Fpipeline`)=20now?=
 =?UTF-8?q?=20pass=20`units=3Dcorr=5Funits`=20where=20`corr=5Funits=20=3D?=
 =?UTF-8?q?=20[]=20if=20scene.get("reassignment")=20else=20scene.get("unit?=
 =?UTF-8?q?s")=20`=E2=80=94=20suppresses=20unit=20overlap=20matching=20whe?=
 =?UTF-8?q?n=20a=20unit=20is=20being=20reassigned=20to=20a=20new=20call,?=
 =?UTF-8?q?=20preventing=20chaining=20into=20their=20previous=20incident?=
 =?UTF-8?q?=20-=20Both=20sites=20also=20pass=20`vehicles=3Dscene.get("vehi?=
 =?UTF-8?q?cles")`=20(per-scene=20vehicles,=20from=20the=20multi-scene=20u?=
 =?UTF-8?q?nits=20fix)=20#=20`app/config.py`=20-=20`embedding=5Fcross=5Ftg?=
 =?UTF-8?q?=5Fthreshold:=20float=20=3D=200.85`=20=E2=80=94=20threshold=20f?=
 =?UTF-8?q?or=20the=20new=20cross-TGID=20path?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 drb-c2-core/app/config.py                     |  1 +
 .../app/internal/incident_correlator.py       | 46 ++++++++++++++++++-
 drb-c2-core/app/internal/intelligence.py      |  6 ++-
 drb-c2-core/app/routers/upload.py             |  8 ++++
 4 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/drb-c2-core/app/config.py b/drb-c2-core/app/config.py
index d74a1d2..aacc68e 100644
--- a/drb-c2-core/app/config.py
+++ b/drb-c2-core/app/config.py
@@ -26,6 +26,7 @@ class Settings(BaseSettings):
     correlation_window_hours: int = 2          # slow/location path: max hours since last call
     embedding_similarity_threshold: float = 0.93  # slow-path: requires location corroboration
     embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar)
+    embedding_cross_tg_threshold: float = 0.85    # cross-TG path: same dept + 2+ shared units
     location_proximity_km: float = 0.5        # radius for location-proximity matching
     incident_auto_resolve_minutes: int = 90   # auto-resolve after N minutes with no new calls
     recorrelation_scan_minutes: int = 60      # re-examine orphaned calls ended within this window
diff --git a/drb-c2-core/app/internal/incident_correlator.py b/drb-c2-core/app/internal/incident_correlator.py
index 8ac0fb0..a91660c 100644
--- a/drb-c2-core/app/internal/incident_correlator.py
+++ b/drb-c2-core/app/internal/incident_correlator.py
@@ -92,6 +92,8 @@ async def correlate_call(
     location_coords: Optional[dict] = None,
     reference_time: Optional[datetime] = None,
     create_if_new: bool = True,
+    units: Optional[list[str]] = None,
+    vehicles: Optional[list[str]] = None,
 ) -> Optional[str]:
     """
     Link call_id to an existing incident or create a new one.
@@ -115,8 +117,12 @@ async def correlate_call(
     # Fetch call doc once — reused for disambiguation, embedding merge, unit accumulation
     call_doc      = await fstore.doc_get("calls", call_id) or {}
     call_embedding: Optional[list] = call_doc.get("embedding")
-    call_units:    list[str]       = call_doc.get("units")    or []
-    call_vehicles: list[str]       = call_doc.get("vehicles") or []
+    # Prefer explicitly passed units/vehicles (per-scene, from intelligence extraction)
+    # over the call doc, which merges units from ALL scenes in a multi-scene recording.
+    # Falling back to the call doc is correct for recorrelation sweeps where we have no
+    # scene-level breakdown.
+    call_units:    list[str] = units    if units    is not None else (call_doc.get("units")    or [])
+    call_vehicles: list[str] = vehicles if vehicles is not None else (call_doc.get("vehicles") or [])
     call_severity: str             = call_doc.get("severity") or "unknown"
     # Use passed coords first (freshly geocoded), fall back to what's on the call doc
     coords: Optional[dict] = location_coords or call_doc.get("location_coords")
@@ -210,6 +216,42 @@ async def correlate_call(
                 )
                 break
 
+    # ── 2.5. Cross-TG path: same department, overlapping units, moderate similarity ──
+    #
+    # Catches pursuits / searches that span multiple talkgroup IDs within the same
+    # department (e.g. dispatch → tactical → geographic channel).  The fast path
+    # is TGID-scoped so it never links these.  Two conditions together provide
+    # strong evidence of the same scene without needing location:
+    #   • 2+ shared unit IDs  (same officers working the same call)
+    #   • embedding similarity >= cross-TG threshold (same subject matter)
+    # Requiring 2+ shared units prevents single-officer false positives.
+    if not matched_incident and call_embedding and incident_type and call_units and system_id:
+        call_unit_set = set(call_units)
+        best_cross_score = 0.0
+        best_cross_inc: Optional[dict] = None
+        for inc in recent:
+            if inc.get("type") != incident_type:
+                continue
+            if system_id not in (inc.get("system_ids") or []):
+                continue
+            inc_units_set = set(inc.get("units") or [])
+            if len(call_unit_set & inc_units_set) < 2:
+                continue
+            inc_embedding = inc.get("embedding")
+            if not inc_embedding:
+                continue
+            sim = _cosine_similarity(call_embedding, inc_embedding)
+            if sim > best_cross_score:
+                best_cross_score = sim
+                best_cross_inc   = inc
+        if best_cross_inc and best_cross_score >= settings.embedding_cross_tg_threshold:
+            matched_incident = best_cross_inc
+            logger.info(
+                f"Correlator cross-TG path: call {call_id} → {best_cross_inc['incident_id']} "
+                f"(sim={best_cross_score:.3f}, "
+                f"shared_units={len(call_unit_set & set(best_cross_inc.get('units') or []))})"
+            )
+
     # ── 3. Slow path: embedding similarity (time-limited, same type) ──────────
     #
     # Two tiers:
diff --git a/drb-c2-core/app/internal/intelligence.py b/drb-c2-core/app/internal/intelligence.py
index f674fe6..6395ef8 100644
--- a/drb-c2-core/app/internal/intelligence.py
+++ b/drb-c2-core/app/internal/intelligence.py
@@ -31,16 +31,18 @@ Response format — a JSON object with a "scenes" array. Each scene:
   units: list of unit IDs or officer numbers explicitly mentioned
   severity: one of "minor" | "moderate" | "major" | "unknown"
   resolved: true if this scene explicitly signals incident closure, false otherwise
+  reassignment: true if dispatch is actively pulling a unit away from their current assignment to respond to a new, different call — e.g. "Baker, can you clear and respond to...", "Adam, break from that and go to...". False if the unit is simply reporting in, updating status, or continuing their current assignment.
   transcript_corrected: corrected text for this scene's transmissions only, or null
 
 Rules:
 - location: prefer intersections > addresses > mile markers > route+town > route alone > town alone. Empty string if none.
-- tags: specific, lowercase, hyphenated. Do not repeat incident_type as a tag.
+- tags: describe WHAT happened, not WHERE. Specific, lowercase, hyphenated. Do not use location names, road names, talkgroup names, or place names as tags (wrong: "lower-macy's", "canvas-route-6", "route-202"; right: "suspect-search", "shoplifting", "vehicle-pursuit"). Do not repeat incident_type as a tag.
 - units: only identifiers explicitly mentioned, not inferred.
 - Do not invent details not present in the transcript.
 - incident_type: let the talkgroup channel be your primary signal. Use "fire" ONLY if the talkgroup is clearly a fire/rescue channel OR the transcript explicitly describes active fire, smoke, flames, or structure fire activation. Police or EMS referencing a fire scene → use "police" or "ems". When uncertain, prefer "other" over "fire".
 - ten_codes: interpret radio codes using the department reference provided below. Do not guess codes not listed.
 - resolved: true only when the scene explicitly signals "Code 4", "all clear", "10-42", "in custody", "patient transported", "fire out", "GOA", "negative contact", "scene clear".
+- reassignment: only true when a unit is explicitly being pulled to a completely new call or location. A unit going en route to their first dispatch is NOT a reassignment. Routine status updates, acknowledgements, and scene updates are NOT reassignments.
 - transcript_corrected: fix only clear STT/vocoder errors (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if accurate.
 
 System: {system_id}
@@ -130,6 +132,7 @@ async def extract_scenes(
         units:              list[str]      = scene.get("units") or []
         severity:           str            = scene.get("severity") or "unknown"
         resolved:           bool           = bool(scene.get("resolved", False))
+        reassignment:       bool           = bool(scene.get("reassignment", False))
         transcript_corrected: Optional[str]= scene.get("transcript_corrected") or None
         segment_indices:    Optional[list] = scene.get("segment_indices")
 
@@ -160,6 +163,7 @@ async def extract_scenes(
             "units":                units,
             "severity":             severity,
             "resolved":             resolved,
+            "reassignment":         reassignment,
             "transcript_corrected": transcript_corrected,
             "segment_indices":      segment_indices,
             "embedding":            embedding,
diff --git a/drb-c2-core/app/routers/upload.py b/drb-c2-core/app/routers/upload.py
index f43a75a..566aa22 100644
--- a/drb-c2-core/app/routers/upload.py
+++ b/drb-c2-core/app/routers/upload.py
@@ -110,6 +110,9 @@ async def _run_extraction_pipeline(
     all_tags: list[str] = []
     for scene in scenes:
         all_tags.extend(scene["tags"])
+        # When dispatch is pulling a unit to a NEW call (reassignment), suppress unit
+        # overlap so the new scene doesn't chain into the unit's previous incident.
+        corr_units = [] if scene.get("reassignment") else scene.get("units")
         incident_id = await incident_correlator.correlate_call(
             call_id=call_id,
             node_id=node_id,
@@ -120,6 +123,8 @@ async def _run_extraction_pipeline(
             incident_type=scene["incident_type"],
             location=scene["location"],
             location_coords=scene["location_coords"],
+            units=corr_units,
+            vehicles=scene.get("vehicles"),
         )
         if incident_id and incident_id not in incident_ids:
             incident_ids.append(incident_id)
@@ -206,6 +211,7 @@ async def _run_intelligence_pipeline(
     if flags["correlation_enabled"]:
         for scene in scenes:
             all_tags.extend(scene["tags"])
+            corr_units = [] if scene.get("reassignment") else scene.get("units")
             incident_id = await incident_correlator.correlate_call(
                 call_id=call_id,
                 node_id=node_id,
@@ -216,6 +222,8 @@ async def _run_intelligence_pipeline(
                 incident_type=scene["incident_type"],
                 location=scene["location"],
                 location_coords=scene["location_coords"],
+                units=corr_units,
+                vehicles=scene.get("vehicles"),
             )
             if incident_id and incident_id not in incident_ids:
                 incident_ids.append(incident_id)