Updates to intel and correlation

2026-04-23 01:26:41 -04:00
parent bcd3406ae8
commit 317f9d2a9d
12 changed files with 468 additions and 150 deletions
@@ -83,6 +83,28 @@ async def patch_transcript(
        "embedding": None,
    })

+    # Unlink from ALL current incidents so re-correlation starts clean.
+    # Handles both old single incident_id and new incident_ids list.
+    old_ids: list[str] = call.get("incident_ids") or (
+        [call["incident_id"]] if call.get("incident_id") else []
+    )
+    for old_incident_id in old_ids:
+        old_incident = await fstore.doc_get("incidents", old_incident_id)
+        if old_incident:
+            remaining = [c for c in (old_incident.get("call_ids") or []) if c != call_id]
+            if remaining:
+                await fstore.doc_set("incidents", old_incident_id, {
+                    "call_ids": remaining,
+                    "summary_stale": True,
+                })
+            else:
+                await fstore.doc_set("incidents", old_incident_id, {
+                    "call_ids": [],
+                    "status": "resolved",
+                    "summary_stale": True,
+                })
+    await fstore.doc_set("calls", call_id, {"incident_ids": [], "incident_id": None})
+
    # Learn from the correction: diff original → corrected and add new tokens to vocabulary
    system_id = call.get("system_id")
    original_text = call.get("transcript_corrected") or call.get("transcript") or ""
@@ -1,7 +1,7 @@
 import uuid
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
-from typing import Optional
+from typing import Dict, Optional
 from app.models import SystemCreate, SystemRecord
 from app.internal import firestore as fstore

@@ -12,6 +12,10 @@ class VocabularyTermBody(BaseModel):
    term: str


+class TenCodesBody(BaseModel):
+    ten_codes: Dict[str, str]
+
+
@router.get("")
 async def list_systems():
    return await fstore.collection_list("systems")
@@ -50,6 +54,27 @@ async def delete_system(system_id: str):
    await fstore.doc_delete("systems", system_id)


+# ── Ten-codes endpoints ────────────────────────────────────────────────────────
+
+@router.get("/{system_id}/ten-codes")
+async def get_ten_codes(system_id: str):
+    """Return the ten-code dictionary for a system."""
+    system = await fstore.doc_get("systems", system_id)
+    if not system:
+        raise HTTPException(404, f"System '{system_id}' not found.")
+    return {"ten_codes": system.get("ten_codes") or {}}
+
+
+@router.put("/{system_id}/ten-codes")
+async def update_ten_codes(system_id: str, body: TenCodesBody):
+    """Replace the ten-code dictionary for a system."""
+    existing = await fstore.doc_get("systems", system_id)
+    if not existing:
+        raise HTTPException(404, f"System '{system_id}' not found.")
+    await fstore.doc_update("systems", system_id, {"ten_codes": body.ten_codes})
+    return {"ok": True, "ten_codes": body.ten_codes}
+
+
 # ── Vocabulary endpoints ───────────────────────────────────────────────────────

@router.get("/{system_id}/vocabulary")
@@ -96,35 +96,47 @@ async def _run_extraction_pipeline(
    """Run steps 2-4 of the intelligence pipeline using an existing transcript."""
    from app.internal import intelligence, incident_correlator, alerter

-    tags, incident_type, location, location_coords, resolved = await intelligence.extract_tags(
+    # Step 2: Scene detection + intelligence extraction.
+    # Returns one scene per distinct incident detected in the recording.
+    scenes = await intelligence.extract_scenes(
        call_id, transcript, talkgroup_name,
        talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
        node_id=node_id,
        preserve_transcript_correction=preserve_transcript_correction,
    )

-    incident_id = await incident_correlator.correlate_call(
-        call_id=call_id,
-        node_id=node_id,
-        system_id=system_id,
-        talkgroup_id=talkgroup_id,
-        talkgroup_name=talkgroup_name,
-        tags=tags,
-        incident_type=incident_type,
-        location=location,
-        location_coords=location_coords,
-    )
+    # Step 3: Correlate each scene to an incident independently.
+    incident_ids: list[str] = []
+    all_tags: list[str] = []
+    for scene in scenes:
+        all_tags.extend(scene["tags"])
+        incident_id = await incident_correlator.correlate_call(
+            call_id=call_id,
+            node_id=node_id,
+            system_id=system_id,
+            talkgroup_id=talkgroup_id,
+            talkgroup_name=talkgroup_name,
+            tags=scene["tags"],
+            incident_type=scene["incident_type"],
+            location=scene["location"],
+            location_coords=scene["location_coords"],
+        )
+        if incident_id and incident_id not in incident_ids:
+            incident_ids.append(incident_id)
+        if scene["resolved"] and incident_id:
+            await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
+            logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")

-    if resolved and incident_id:
-        await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
-        logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")
+    if incident_ids:
+        await fstore.doc_set("calls", call_id, {"incident_ids": incident_ids})

+    # Step 4: Alert dispatch — run once with merged tags from all scenes.
    await alerter.check_and_dispatch(
        call_id=call_id,
        node_id=node_id,
        talkgroup_id=talkgroup_id,
        talkgroup_name=talkgroup_name,
-        tags=tags,
+        tags=list(dict.fromkeys(all_tags)),
        transcript=transcript,
    )

@@ -140,8 +152,8 @@ async def _run_intelligence_pipeline(
    """
    Post-upload intelligence pipeline (runs as a background task):
      1. Transcribe audio via Google STT
-      2. Extract tags/incident type from transcript
-      3. Correlate with existing incidents (or create new one)
+      2. Detect scenes + extract intelligence (one result per incident in recording)
+      3. Correlate each scene with existing incidents (or create new ones)
      4. Check alert rules and dispatch notifications
    """
    from app.internal import transcription, intelligence, incident_correlator, alerter
@@ -155,35 +167,57 @@ async def _run_intelligence_pipeline(
            call_id, gcs_uri, talkgroup_name, system_id=system_id
        )

-    # Step 2: Intelligence extraction
-    tags: list[str] = []
-    incident_type: Optional[str] = None
-    location: Optional[str] = None
-    location_coords: Optional[dict] = None
-    resolved: bool = False
+    # Step 2: Scene detection + intelligence extraction
+    scenes: list[dict] = []
    if transcript:
-        tags, incident_type, location, location_coords, resolved = await intelligence.extract_tags(
+        scenes = await intelligence.extract_scenes(
            call_id, transcript, talkgroup_name,
            talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
            node_id=node_id,
        )

-    # Step 3: Incident correlation (always runs — unclassified calls can still link via talkgroup)
-    incident_id = await incident_correlator.correlate_call(
-        call_id=call_id,
-        node_id=node_id,
-        system_id=system_id,
-        talkgroup_id=talkgroup_id,
-        talkgroup_name=talkgroup_name,
-        tags=tags,
-        incident_type=incident_type,
-        location=location,
-        location_coords=location_coords,
-    )
+    # Step 3: Correlate each scene independently.
+    # A single recording can produce multiple incidents on a busy channel.
+    incident_ids: list[str] = []
+    all_tags: list[str] = []
+    for scene in scenes:
+        all_tags.extend(scene["tags"])
+        incident_id = await incident_correlator.correlate_call(
+            call_id=call_id,
+            node_id=node_id,
+            system_id=system_id,
+            talkgroup_id=talkgroup_id,
+            talkgroup_name=talkgroup_name,
+            tags=scene["tags"],
+            incident_type=scene["incident_type"],
+            location=scene["location"],
+            location_coords=scene["location_coords"],
+        )
+        if incident_id and incident_id not in incident_ids:
+            incident_ids.append(incident_id)
+        if scene["resolved"] and incident_id:
+            await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
+            logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")

-    if resolved and incident_id:
-        await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
-        logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")
+    # Correlator also runs for calls with no scenes (unclassified) to attempt
+    # talkgroup-based linking even when no transcript could be produced.
+    if not scenes:
+        incident_id = await incident_correlator.correlate_call(
+            call_id=call_id,
+            node_id=node_id,
+            system_id=system_id,
+            talkgroup_id=talkgroup_id,
+            talkgroup_name=talkgroup_name,
+            tags=[],
+            incident_type=None,
+            location=None,
+            location_coords=None,
+        )
+        if incident_id:
+            incident_ids.append(incident_id)
+
+    if incident_ids:
+        await fstore.doc_set("calls", call_id, {"incident_ids": incident_ids})

    # Step 4: Alert dispatch (always runs — talkgroup ID rules don't need a transcript)
    await alerter.check_and_dispatch(
@@ -191,6 +225,6 @@ async def _run_intelligence_pipeline(
        node_id=node_id,
        talkgroup_id=talkgroup_id,
        talkgroup_name=talkgroup_name,
-        tags=tags,
+        tags=list(dict.fromkeys(all_tags)),
        transcript=transcript,
    )