big ui and intel updates

2026-04-19 16:48:55 -04:00
parent 0df53df92e
commit 303c5b13cf
11 changed files with 527 additions and 169 deletions
@@ -44,6 +44,7 @@ async def correlate_call(
    tags: list[str],
    incident_type: Optional[str],
    location: Optional[str] = None,
+    location_coords: Optional[dict] = None,
 ) -> Optional[str]:
    """
    Link call_id to an existing incident or create a new one.
@@ -113,10 +114,10 @@ async def correlate_call(
    # ----------------------------------------------------------------
    if matched_incident:
        incident_id = matched_incident["incident_id"]
-        await _update_incident(matched_incident, call_id, talkgroup_id, system_id, tags, location, now)
+        await _update_incident(matched_incident, call_id, talkgroup_id, system_id, tags, location, location_coords, now)
    else:
        incident_id = await _create_incident(
-            call_id, incident_type, talkgroup_id, talkgroup_name, system_id, tags, location, now
+            call_id, incident_type, talkgroup_id, talkgroup_name, system_id, tags, location, location_coords, now
        )

    # Back-link the call
@@ -131,6 +132,7 @@ async def _update_incident(
    system_id: Optional[str],
    tags: list[str],
    location: Optional[str],
+    location_coords: Optional[dict],
    now: datetime,
 ) -> None:
    incident_id = inc["incident_id"]
@@ -151,11 +153,11 @@ async def _update_incident(
    merged_tags = list(dict.fromkeys(inc.get("tags", []) + tags))

    # Location — append to mentions; update display location if new one is non-null
-    # TODO: replace "newest wins" with Maps geocoding bbox comparison for true specificity
    location_mentions = inc.get("location_mentions", [])
    if location and location not in location_mentions:
        location_mentions.append(location)
    best_location = location if location else inc.get("location")
+    best_coords = location_coords if location_coords else inc.get("location_coords")

    # Update centroid embedding
    embedding_updates = await _merge_embedding(inc, call_id)
@@ -172,6 +174,8 @@ async def _update_incident(
    }
    if best_location:
        updates["location"] = best_location
+    if best_coords:
+        updates["location_coords"] = best_coords

    await fstore.doc_set("incidents", incident_id, updates)
    logger.info(f"Correlator: linked call {call_id} to existing incident {incident_id}")
@@ -185,10 +189,11 @@ async def _create_incident(
    system_id: Optional[str],
    tags: list[str],
    location: Optional[str],
+    location_coords: Optional[dict],
    now: datetime,
 ) -> str:
    incident_id = str(uuid.uuid4())
-    tg_label = talkgroup_name or "Unknown Talkgroup"
+    tg_label = talkgroup_name or (f"TGID {talkgroup_id}" if talkgroup_id else "Unknown Talkgroup")

    call_doc = await fstore.doc_get("calls", call_id)
    call_embedding = call_doc.get("embedding") if call_doc else None
@@ -202,6 +207,7 @@ async def _create_incident(
        "type": incident_type,
        "status": "active",
        "location": location,
+        "location_coords": location_coords,
        "location_mentions": [location] if location else [],
        "call_ids": [call_id],
        "talkgroup_ids": [str(talkgroup_id)] if talkgroup_id is not None else [],
@@ -1,10 +1,10 @@
 """
-Gemini-powered intelligence extraction from call transcripts.
+GPT-4o-mini intelligence extraction from call transcripts.

-Sends the transcript to Gemini Flash with a tight JSON schema prompt.
+Sends the transcript to GPT-4o mini with a tight JSON schema prompt.
 Returns structured data: incident type, tags, location, vehicles, units, severity.

-Falls back gracefully if Gemini is unavailable or returns malformed output.
+Falls back gracefully if the API is unavailable or returns malformed output.
 """
 import asyncio
 import json
@@ -36,6 +36,9 @@ System: {system_id}
 Talkgroup: {talkgroup_name}
 {transcript_block}"""

+# Nominatim viewbox half-width in degrees (~35 km at mid-latitudes)
+_GEO_DELTA = 0.3
+

 async def extract_tags(
    call_id: str,
@@ -44,38 +47,52 @@ async def extract_tags(
    talkgroup_id: Optional[int] = None,
    system_id: Optional[str] = None,
    segments: Optional[list[dict]] = None,
-) -> tuple[list[str], Optional[str], Optional[str]]:
+    node_id: Optional[str] = None,
+) -> tuple[list[str], Optional[str], Optional[str], Optional[dict]]:
    """
-    Extract incident tags, type, location, and corrected transcript via Gemini.
+    Extract incident tags, type, location, and corrected transcript via GPT-4o mini.
+    Geocodes the extracted location string via Nominatim using the node's position as bias.

    Returns:
-        (tags, primary_type, location)
+        (tags, primary_type, location_str, location_coords)
+        where location_coords is {"lat": float, "lng": float} or None.

    Side-effect: updates calls/{call_id} in Firestore with tags, location,
-    vehicles, units, severity, transcript_corrected; also stores the call embedding.
+    location_coords, vehicles, units, severity, transcript_corrected; also stores embedding.
    """
-    result = await asyncio.to_thread(_sync_extract, transcript, talkgroup_name, talkgroup_id, system_id, segments)
+    result = await asyncio.to_thread(
+        _sync_extract, transcript, talkgroup_name, talkgroup_id, system_id, segments
+    )

-    tags: list[str] = result.get("tags") or []
+    tags: list[str]              = result.get("tags") or []
    incident_type: Optional[str] = result.get("incident_type") or None
-    location: Optional[str] = result.get("location") or None
-    vehicles: list[str] = result.get("vehicles") or []
-    units: list[str] = result.get("units") or []
-    severity: str = result.get("severity") or "unknown"
+    location: Optional[str]      = result.get("location") or None
+    vehicles: list[str]          = result.get("vehicles") or []
+    units: list[str]             = result.get("units") or []
+    severity: str                = result.get("severity") or "unknown"
    transcript_corrected: Optional[str] = result.get("transcript_corrected") or None

    if incident_type in ("unknown", "other", ""):
        incident_type = None

+    # Geocode the location string if we have one and a node to bias toward
+    location_coords: Optional[dict] = None
+    if location and node_id:
+        node_doc = await fstore.doc_get("nodes", node_id)
+        if node_doc:
+            node_lat = node_doc.get("lat")
+            node_lon = node_doc.get("lon")
+            if node_lat is not None and node_lon is not None:
+                location_coords = await _geocode_location(location, node_lat, node_lon)
+
    # Store embedding alongside structured data
    embedding = await asyncio.to_thread(_sync_embed, _embed_text(transcript, incident_type))

-    updates: dict = {
-        "tags": tags,
-        "severity": severity,
-    }
+    updates: dict = {"tags": tags, "severity": severity}
    if location:
        updates["location"] = location
+    if location_coords:
+        updates["location_coords"] = location_coords
    if vehicles:
        updates["vehicles"] = vehicles
    if units:
@@ -92,10 +109,49 @@ async def extract_tags(

    logger.info(
        f"Intelligence: call {call_id} → type={incident_type}, "
-        f"tags={tags}, location={location!r}, severity={severity}, "
+        f"tags={tags}, location={location!r}, coords={location_coords}, severity={severity}, "
        f"corrected={transcript_corrected is not None}"
    )
-    return tags, incident_type, location
+    return tags, incident_type, location, location_coords
+
+
+async def _geocode_location(
+    location_str: str, node_lat: float, node_lon: float
+) -> Optional[dict]:
+    """
+    Geocode a location string using Nominatim, biased toward the node's area.
+    Returns {"lat": float, "lng": float} or None if geocoding fails.
+    """
+    import httpx
+
+    viewbox = (
+        f"{node_lon - _GEO_DELTA},{node_lat - _GEO_DELTA},"
+        f"{node_lon + _GEO_DELTA},{node_lat + _GEO_DELTA}"
+    )
+    params = {
+        "q":       location_str,
+        "format":  "json",
+        "limit":   1,
+        "viewbox": viewbox,
+        "bounded": 1,
+    }
+    headers = {"User-Agent": "DRB-Dispatch/1.0 (public-safety radio monitor)"}
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            r = await client.get(
+                "https://nominatim.openstreetmap.org/search",
+                params=params,
+                headers=headers,
+            )
+            r.raise_for_status()
+            results = r.json()
+            if results:
+                coords = {"lat": float(results[0]["lat"]), "lng": float(results[0]["lon"])}
+                logger.info(f"Geocoded '{location_str}' → {coords}")
+                return coords
+    except Exception as e:
+        logger.warning(f"Geocoding failed for '{location_str}': {e}")
+    return None


 def _build_transcript_block(transcript: str, segments: Optional[list[dict]]) -> str:
@@ -154,10 +210,7 @@ def _sync_embed(text: str) -> Optional[list[float]]:

    try:
        client = OpenAI(api_key=settings.openai_api_key)
-        result = client.embeddings.create(
-            model="text-embedding-3-small",
-            input=text,
-        )
+        result = client.embeddings.create(model="text-embedding-3-small", input=text)
        return result.data[0].embedding
    except Exception as e:
        logger.warning(f"Embedding generation failed: {e}")
@@ -165,6 +218,5 @@ def _sync_embed(text: str) -> Optional[list[float]]:


 def _embed_text(transcript: str, incident_type: Optional[str]) -> str:
-    """Build the text string to embed — transcript + type context."""
    prefix = f"[{incident_type}] " if incident_type else ""
    return f"{prefix}{transcript}"
@@ -95,9 +95,10 @@ async def _run_extraction_pipeline(
    """Run steps 2-4 of the intelligence pipeline using an existing transcript."""
    from app.internal import intelligence, incident_correlator, alerter

-    tags, incident_type, location = await intelligence.extract_tags(
+    tags, incident_type, location, location_coords = await intelligence.extract_tags(
        call_id, transcript, talkgroup_name,
        talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
+        node_id=node_id,
    )

    if incident_type:
@@ -110,6 +111,7 @@ async def _run_extraction_pipeline(
            tags=tags,
            incident_type=incident_type,
            location=location,
+            location_coords=location_coords,
        )

    await alerter.check_and_dispatch(
@@ -150,10 +152,12 @@ async def _run_intelligence_pipeline(
    tags: list[str] = []
    incident_type: Optional[str] = None
    location: Optional[str] = None
+    location_coords: Optional[dict] = None
    if transcript:
-        tags, incident_type, location = await intelligence.extract_tags(
+        tags, incident_type, location, location_coords = await intelligence.extract_tags(
            call_id, transcript, talkgroup_name,
            talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
+            node_id=node_id,
        )

    # Step 3: Incident correlation
@@ -167,6 +171,7 @@ async def _run_intelligence_pipeline(
            tags=tags,
            incident_type=incident_type,
            location=location,
+            location_coords=location_coords,
        )

    # Step 4: Alert dispatch (always runs — talkgroup ID rules don't need a transcript)