Updates to intel and correlation

2026-04-23 01:26:41 -04:00
parent bcd3406ae8
commit 317f9d2a9d
12 changed files with 468 additions and 150 deletions
@@ -1,8 +1,10 @@
 """
 GPT-4o-mini intelligence extraction from call transcripts.

-Sends the transcript to GPT-4o mini with a tight JSON schema prompt.
-Returns structured data: incident type, tags, location, vehicles, units, severity.
+Sends the transcript to GPT-4o-mini with a structured prompt that detects
+whether the recording contains one or multiple distinct scenes (back-to-back
+dispatch conversations on a busy channel).  Returns a list of scene dicts —
+one per detected incident.  Most calls produce a single scene.

 Falls back gracefully if the API is unavailable or returns malformed output.
 """
@@ -13,30 +15,37 @@ from typing import Optional
 from app.internal.logger import logger
 from app.internal import firestore as fstore

-_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio recording. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Each numbered transmission is a separate PTT press from a different radio. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.
+_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio recording. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Each numbered transmission is a separate PTT press from a different radio.

-Schema:
-{{
-  "incident_type": one of "fire" | "ems" | "police" | "accident" | "other" | "unknown",
-  "tags": [list of specific descriptive tags, max 6, e.g. "two-car mva", "property-damage-only", "working fire", "shots-fired"],
-  "location": "most specific location string found, or empty string",
-  "vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"],
-  "units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"],
-  "severity": one of "minor" | "moderate" | "major" | "unknown",
-  "resolved": true if this call explicitly signals the incident is over ("Code 4", "in custody", "all clear", "fire out", "patient transported", "GOA", "scene clear", "10-42", "negative contact", "clear the scene"), false otherwise,
-  "transcript_corrected": "corrected full transcript string, or null if no corrections needed"
-}}
+SCENE DETECTION:
+A busy dispatch channel sometimes captures back-to-back conversations about multiple concurrent incidents in a single recording. Detect whether this recording contains ONE scene (all transmissions relate to a single event) or MULTIPLE scenes (clearly distinct dispatch conversations with different units being assigned, different locations, different event types). Assign short status transmissions (10-4, en route, acknowledgements) with no clear scene context to the most recent scene before them in the list.
+
+Always respond with the scenes array, even for a single scene.
+
+Response format — a JSON object with a "scenes" array. Each scene:
+  segment_indices: list of 0-based indices into the numbered transmissions (or null if no segments)
+  incident_type: one of "fire" | "ems" | "police" | "accident" | "other" | "unknown"
+  tags: list of specific descriptive tags, max 6, e.g. "two-car mva", "working fire", "shots-fired"
+  location: most specific location string found, or empty string
+  vehicles: list of vehicle descriptions mentioned
+  units: list of unit IDs or officer numbers explicitly mentioned
+  severity: one of "minor" | "moderate" | "major" | "unknown"
+  resolved: true if this scene explicitly signals incident closure, false otherwise
+  transcript_corrected: corrected text for this scene's transmissions only, or null

 Rules:
 - location: prefer intersections > addresses > mile markers > route+town > route alone > town alone. Empty string if none.
- tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag.
+- tags: specific, lowercase, hyphenated. Do not repeat incident_type as a tag.
 - units: only identifiers explicitly mentioned, not inferred.
 - Do not invent details not present in the transcript.
- transcript_corrected: fix only clear STT errors caused by vocoder distortion (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Use the back-and-forth context between transmissions to resolve ambiguities. Keep all radio language as-is — do NOT decode codes into plain English. Return null if the transcript looks accurate.
+- incident_type: let the talkgroup channel be your primary signal. Use "fire" ONLY if the talkgroup is clearly a fire/rescue channel OR the transcript explicitly describes active fire, smoke, flames, or structure fire activation. Police or EMS referencing a fire scene → use "police" or "ems". When uncertain, prefer "other" over "fire".
+- ten_codes: interpret radio codes using the department reference provided below. Do not guess codes not listed.
+- resolved: true only when the scene explicitly signals "Code 4", "all clear", "10-42", "in custody", "patient transported", "fire out", "GOA", "negative contact", "scene clear".
+- transcript_corrected: fix only clear STT/vocoder errors (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if accurate.

 System: {system_id}
 Talkgroup: {talkgroup_name}
-{vocabulary_block}{transcript_block}"""
+{ten_codes_block}{vocabulary_block}{transcript_block}"""

 # Nominatim viewbox half-width in degrees (~11 km at mid-latitudes)
 _GEO_DELTA = 0.1
@@ -54,7 +63,14 @@ _TG_SUFFIX_RE = re.compile(
 )


-async def extract_tags(
+def _build_ten_codes_block(ten_codes: dict[str, str]) -> str:
+    if not ten_codes:
+        return ""
+    lines = "\n".join(f"  {code}: {meaning}" for code, meaning in sorted(ten_codes.items()))
+    return f"Department ten-codes:\n{lines}\n\n"
+
+
+async def extract_scenes(
    call_id: str,
    transcript: str,
    talkgroup_name: Optional[str] = None,
@@ -63,84 +79,128 @@ async def extract_tags(
    segments: Optional[list[dict]] = None,
    node_id: Optional[str] = None,
    preserve_transcript_correction: bool = False,
-) -> tuple[list[str], Optional[str], Optional[str], Optional[dict], bool]:
+) -> list[dict]:
    """
-    Extract incident tags, type, location, corrected transcript, and closure signal via GPT-4o mini.
-    Geocodes the extracted location string via Nominatim using the node's position as bias.
+    Split the transcript into one or more scenes and extract structured
+    intelligence for each.  Most calls return a single scene; a busy dispatch
+    channel capturing back-to-back conversations returns multiple.

-    Returns:
-        (tags, primary_type, location_str, location_coords, resolved)
-        where location_coords is {"lat": float, "lng": float} or None,
-        and resolved is True when the transcript signals incident closure.
+    Each scene dict contains:
+        tags, incident_type, location, location_coords, resolved,
+        severity, vehicles, units, transcript_corrected,
+        segment_indices, embedding

-    Side-effect: updates calls/{call_id} in Firestore with tags, location,
-    location_coords, vehicles, units, severity, transcript_corrected; also stores embedding.
+    Side-effect: updates calls/{call_id} in Firestore with merged tags,
+    location (primary scene), units/vehicles, severity, embedding, and
+    optionally transcript_corrected.
    """
-    # Load per-system vocabulary for prompt injection
    vocabulary: list[str] = []
+    ten_codes: dict[str, str] = {}
    if system_id:
        from app.internal.vocabulary_learner import get_vocabulary
        vocab_data = await get_vocabulary(system_id)
        vocabulary = vocab_data.get("vocabulary") or []
+        system_doc = await fstore.doc_get("systems", system_id)
+        if system_doc:
+            ten_codes = system_doc.get("ten_codes") or {}

-    result = await asyncio.to_thread(
-        _sync_extract, transcript, talkgroup_name, talkgroup_id, system_id, segments, vocabulary
+    raw_scenes: list[dict] = await asyncio.to_thread(
+        _sync_extract,
+        transcript, talkgroup_name, talkgroup_id, system_id, segments, vocabulary, ten_codes,
    )

-    tags: list[str]              = result.get("tags") or []
-    incident_type: Optional[str] = result.get("incident_type") or None
-    location: Optional[str]      = result.get("location") or None
-    vehicles: list[str]          = result.get("vehicles") or []
-    units: list[str]             = result.get("units") or []
-    severity: str                = result.get("severity") or "unknown"
-    resolved: bool               = bool(result.get("resolved", False))
-    transcript_corrected: Optional[str] = result.get("transcript_corrected") or None
+    if not raw_scenes:
+        return []

-    if incident_type in ("unknown", "other", ""):
-        incident_type = None
-
-    # Geocode the location string if we have one and a node to bias toward
-    location_coords: Optional[dict] = None
-    if location and node_id:
+    # Resolve node position once for geocoding all scenes
+    node_lat: Optional[float] = None
+    node_lon: Optional[float] = None
+    if node_id:
        node_doc = await fstore.doc_get("nodes", node_id)
        if node_doc:
            node_lat = node_doc.get("lat")
            node_lon = node_doc.get("lon")
-            if node_lat is not None and node_lon is not None:
-                state      = await _get_node_state(node_id, node_lat, node_lon)
-                muni       = _municipality_from_tg(talkgroup_name)
-                hint_parts = [p for p in [muni, state] if p]
-                query      = f"{location}, {', '.join(hint_parts)}" if hint_parts else location
-                location_coords = await _geocode_location(query, node_lat, node_lon)

-    # Store embedding alongside structured data
-    embedding = await asyncio.to_thread(_sync_embed, _embed_text(transcript, incident_type))
+    processed: list[dict] = []
+    for scene in raw_scenes:
+        tags:               list[str]      = scene.get("tags") or []
+        incident_type:      Optional[str]  = scene.get("incident_type") or None
+        location:           Optional[str]  = scene.get("location") or None
+        vehicles:           list[str]      = scene.get("vehicles") or []
+        units:              list[str]      = scene.get("units") or []
+        severity:           str            = scene.get("severity") or "unknown"
+        resolved:           bool           = bool(scene.get("resolved", False))
+        transcript_corrected: Optional[str]= scene.get("transcript_corrected") or None
+        segment_indices:    Optional[list] = scene.get("segment_indices")

-    updates: dict = {"tags": tags, "severity": severity}
-    if location:
-        updates["location"] = location
-    if location_coords:
-        updates["location_coords"] = location_coords
-    if vehicles:
-        updates["vehicles"] = vehicles
-    if units:
-        updates["units"] = units
-    if embedding:
-        updates["embedding"] = embedding
-    if transcript_corrected and not preserve_transcript_correction:
-        updates["transcript_corrected"] = transcript_corrected
+        if incident_type in ("unknown", "other", ""):
+            incident_type = None
+
+        # Geocode this scene's location
+        location_coords: Optional[dict] = None
+        if location and node_lat is not None and node_lon is not None:
+            state = await _get_node_state(node_id, node_lat, node_lon)
+            muni  = _municipality_from_tg(talkgroup_name)
+            hint_parts = [p for p in [muni, state] if p]
+            query = f"{location}, {', '.join(hint_parts)}" if hint_parts else location
+            location_coords = await _geocode_location(query, node_lat, node_lon)
+
+        # Embed this scene's content
+        scene_text = _build_scene_embed_text(
+            transcript, segments, segment_indices, incident_type, transcript_corrected
+        )
+        embedding = await asyncio.to_thread(_sync_embed, scene_text)
+
+        processed.append({
+            "tags":                 tags,
+            "incident_type":        incident_type,
+            "location":             location,
+            "location_coords":      location_coords,
+            "vehicles":             vehicles,
+            "units":                units,
+            "severity":             severity,
+            "resolved":             resolved,
+            "transcript_corrected": transcript_corrected,
+            "segment_indices":      segment_indices,
+            "embedding":            embedding,
+        })
+
+    # Merge across scenes for the call-level Firestore document.
+    # Primary scene (first) owns location, severity, transcript_corrected.
+    # Tags/units/vehicles are union-merged from all scenes.
+    primary = processed[0]
+    all_tags     = list(dict.fromkeys(t for s in processed for t in s["tags"]))
+    all_units    = list(dict.fromkeys(u for s in processed for u in s["units"]))
+    all_vehicles = list(dict.fromkeys(v for s in processed for v in s["vehicles"]))
+
+    updates: dict = {"tags": all_tags, "severity": primary["severity"]}
+    if primary["location"]:
+        updates["location"] = primary["location"]
+    if primary["location_coords"]:
+        updates["location_coords"] = primary["location_coords"]
+    if all_units:
+        updates["units"] = all_units
+    if all_vehicles:
+        updates["vehicles"] = all_vehicles
+    if primary["embedding"]:
+        updates["embedding"] = primary["embedding"]
+    if primary["transcript_corrected"] and not preserve_transcript_correction:
+        updates["transcript_corrected"] = primary["transcript_corrected"]

    try:
        await fstore.doc_set("calls", call_id, updates)
    except Exception as e:
        logger.warning(f"Could not save intelligence for call {call_id}: {e}")

-    logger.info(
-        f"Intelligence: call {call_id} → type={incident_type}, "
-        f"tags={tags}, location={location!r}, coords={location_coords}, severity={severity}, "
-        f"corrected={transcript_corrected is not None}"
+    scene_summary = (
+        f"{len(processed)} scene(s): "
+        + ", ".join(
+            f"[{s['incident_type'] or 'unclassified'} tags={s['tags'][:2]}]"
+            for s in processed
+        )
    )
-    return tags, incident_type, location, location_coords, resolved
+    logger.info(f"Intelligence: call {call_id} → {scene_summary}")
+    return processed


 async def _geocode_location(
@@ -220,7 +280,6 @@ def _municipality_from_tg(tg_name: Optional[str]) -> Optional[str]:
    if not tg_name:
        return None
    cleaned = _TG_SUFFIX_RE.sub("", tg_name).strip()
-    # Discard if nothing left, purely numeric, or a short all-caps abbreviation (e.g. "WC", "TAC")
    if not cleaned or cleaned.isdigit() or (len(cleaned) <= 3 and cleaned.isupper()):
        return None
    return cleaned
@@ -234,6 +293,23 @@ def _build_transcript_block(transcript: str, segments: Optional[list[dict]]) ->
    return f"Transcript:\n{transcript}"


+def _build_scene_embed_text(
+    transcript: str,
+    segments: Optional[list[dict]],
+    segment_indices: Optional[list[int]],
+    incident_type: Optional[str],
+    transcript_corrected: Optional[str],
+) -> str:
+    """Build the text string to embed for a specific scene."""
+    prefix = f"[{incident_type}] " if incident_type else ""
+    if transcript_corrected:
+        return f"{prefix}{transcript_corrected}"
+    if segments and segment_indices:
+        texts = [segments[i]["text"] for i in segment_indices if i < len(segments)]
+        return f"{prefix}{' '.join(texts)}"
+    return f"{prefix}{transcript}"
+
+
 def _sync_extract(
    transcript: str,
    talkgroup_name: Optional[str],
@@ -241,14 +317,15 @@ def _sync_extract(
    system_id: Optional[str],
    segments: Optional[list[dict]],
    vocabulary: Optional[list[str]] = None,
-) -> dict:
-    """Call GPT-4o mini and parse the JSON response."""
+    ten_codes: Optional[dict[str, str]] = None,
+) -> list[dict]:
+    """Call GPT-4o-mini and return a list of scene dicts."""
    from app.config import settings
    from openai import OpenAI

    if not settings.openai_api_key:
        logger.warning("OPENAI_API_KEY not set — intelligence extraction disabled.")
-        return {}
+        return []

    from app.internal.vocabulary_learner import build_gpt_vocab_block
    tg = f"{talkgroup_name} (TGID {talkgroup_id})" if talkgroup_id else (talkgroup_name or "unknown")
@@ -256,6 +333,7 @@ def _sync_extract(
        transcript_block=_build_transcript_block(transcript, segments),
        talkgroup_name=tg,
        system_id=system_id or "unknown",
+        ten_codes_block=_build_ten_codes_block(ten_codes or {}),
        vocabulary_block=build_gpt_vocab_block(vocabulary or []),
    )

@@ -266,13 +344,22 @@ def _sync_extract(
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"},
        )
-        return json.loads(response.choices[0].message.content)
+        raw = json.loads(response.choices[0].message.content)
+
+        # New format: {"scenes": [...]}
+        if "scenes" in raw and isinstance(raw["scenes"], list):
+            return raw["scenes"]
+
+        # Fallback: GPT returned the old flat single-scene format
+        logger.warning("GPT returned flat format instead of scenes array — wrapping")
+        return [raw]
+
    except json.JSONDecodeError as e:
-        logger.warning(f"GPT-4o mini returned non-JSON: {e}")
-        return {}
+        logger.warning(f"GPT-4o-mini returned non-JSON: {e}")
+        return []
    except Exception as e:
-        logger.warning(f"GPT-4o mini extraction failed: {e}")
-        return {}
+        logger.warning(f"GPT-4o-mini extraction failed: {e}")
+        return []


 def _sync_embed(text: str) -> Optional[list[float]]:
@@ -290,8 +377,3 @@ def _sync_embed(text: str) -> Optional[list[float]]:
    except Exception as e:
        logger.warning(f"Embedding generation failed: {e}")
        return None
-
-
-def _embed_text(transcript: str, incident_type: Optional[str]) -> str:
-    prefix = f"[{incident_type}] " if incident_type else ""
-    return f"{prefix}{transcript}"