Big updates

2026-04-21 01:51:23 -04:00
parent 788afca339
commit 6612e4b683
13 changed files with 168 additions and 49 deletions
@@ -145,6 +145,7 @@ async def correlate_call(
        await _update_incident(
            matched_incident, call_id, talkgroup_id, system_id, tags,
            location, location_coords, call_units, call_vehicles, call_embedding, now,
+            talkgroup_name=talkgroup_name, incident_type=incident_type,
        )
    elif incident_type:
        incident_id = await _create_incident(
@@ -250,6 +251,8 @@ async def _update_incident(
    call_vehicles: list[str],
    call_embedding: Optional[list],
    now: datetime,
+    talkgroup_name: Optional[str] = None,
+    incident_type: Optional[str] = None,
 ) -> None:
    incident_id = inc["incident_id"]

@@ -295,6 +298,22 @@ async def _update_incident(
    if best_coords:
        updates["location_coords"] = best_coords

+    # Re-evaluate title when a substantive call (classified incident_type) brings new tags.
+    # Routine status calls (type=None) do not clobber the title.
+    if incident_type:
+        content_tags = [t for t in tags if t != "auto-generated"]
+        primary_tag = content_tags[0].replace("-", " ").title() if content_tags else None
+        tg_label = (
+            talkgroup_name
+            or (f"TGID {talkgroup_id}" if talkgroup_id else inc.get("title", "").split(" — ")[-1])
+        )
+        if primary_tag and best_location:
+            updates["title"] = f"{primary_tag} at {best_location}"
+        elif primary_tag and tg_label:
+            updates["title"] = f"{primary_tag} — {tg_label}"
+        elif primary_tag:
+            updates["title"] = primary_tag
+
    await fstore.doc_set("incidents", incident_id, updates)
    logger.info(f"Correlator: linked call {call_id} to incident {incident_id}")

@@ -315,14 +334,24 @@ async def _create_incident(
    now: datetime,
 ) -> str:
    incident_id = str(uuid.uuid4())
-    tg_label    = (
+    tg_label = (
        talkgroup_name
        or (f"TGID {talkgroup_id}" if talkgroup_id else "Unknown Talkgroup")
    )

+    # Build a descriptive title from tags + location when available
+    content_tags = [t for t in tags if t != "auto-generated"]
+    primary_tag  = content_tags[0].replace("-", " ").title() if content_tags else None
+    if primary_tag and location:
+        title = f"{primary_tag} at {location}"
+    elif primary_tag:
+        title = f"{primary_tag} — {tg_label}"
+    else:
+        title = f"{incident_type.title()} — {tg_label}"
+
    doc = {
        "incident_id":       incident_id,
-        "title":             f"{incident_type.title()} — {tg_label}",
+        "title":             title,
        "type":              incident_type,
        "status":            "active",
        "location":          location,
@@ -62,6 +62,7 @@ async def extract_tags(
    system_id: Optional[str] = None,
    segments: Optional[list[dict]] = None,
    node_id: Optional[str] = None,
+    preserve_transcript_correction: bool = False,
 ) -> tuple[list[str], Optional[str], Optional[str], Optional[dict], bool]:
    """
    Extract incident tags, type, location, corrected transcript, and closure signal via GPT-4o mini.
@@ -119,7 +120,7 @@ async def extract_tags(
        updates["units"] = units
    if embedding:
        updates["embedding"] = embedding
-    if transcript_corrected:
+    if transcript_corrected and not preserve_transcript_correction:
        updates["transcript_corrected"] = transcript_corrected

    try:
@@ -154,12 +154,23 @@ class MQTTHandler:
            else datetime.now(timezone.utc)
        )

+        # Prefer the name from OP25 metadata; fall back to the system config
+        tgid_name = payload.get("tgid_name") or ""
+        if not tgid_name and system_id and payload.get("tgid"):
+            system_doc = await fstore.doc_get("systems", system_id)
+            if system_doc:
+                tgid_int = int(payload["tgid"])
+                for tg in system_doc.get("config", {}).get("talkgroups", []):
+                    if int(tg.get("id", -1)) == tgid_int:
+                        tgid_name = tg.get("name", "")
+                        break
+
        doc = {
            "call_id": call_id,
            "node_id": node_id,
            "system_id": system_id,
            "talkgroup_id": payload.get("tgid"),
-            "talkgroup_name": payload.get("tgid_name") or "",
+            "talkgroup_name": tgid_name,
            "freq": payload.get("freq"),
            "srcaddr": payload.get("srcaddr"),
            "started_at": started_at,
@@ -40,7 +40,7 @@ async def transcribe_call(
        return None, []

    try:
-        transcript, segments = await asyncio.to_thread(_sync_transcribe, gcs_uri)
+        transcript, segments = await asyncio.to_thread(_sync_transcribe, gcs_uri, talkgroup_name)
    except Exception as e:
        logger.warning(f"Transcription failed for call {call_id}: {e}")
        return None, []
@@ -61,7 +61,7 @@ async def transcribe_call(
    return transcript, segments


-def _sync_transcribe(gcs_uri: str) -> tuple[Optional[str], list[dict]]:
+def _sync_transcribe(gcs_uri: str, talkgroup_name: Optional[str] = None) -> tuple[Optional[str], list[dict]]:
    """Download audio from GCS and transcribe with OpenAI Whisper."""
    from google.cloud import storage as gcs
    from google.oauth2 import service_account
@@ -94,13 +94,15 @@ def _sync_transcribe(gcs_uri: str) -> tuple[Optional[str], list[dict]]:
    try:
        blob.download_to_filename(tmp_path)

+        prompt = (f"Talkgroup: {talkgroup_name}. " + _WHISPER_PROMPT) if talkgroup_name else _WHISPER_PROMPT
+
        openai_client = OpenAI(api_key=settings.openai_api_key)
        with open(tmp_path, "rb") as f:
            response = openai_client.audio.transcriptions.create(
                model="whisper-1",
                file=f,
                language="en",
-                prompt=_WHISPER_PROMPT,
+                prompt=prompt,
                response_format="verbose_json",
            )
        text = response.text.strip() or None
@@ -71,10 +71,10 @@ async def patch_transcript(
    if not call:
        raise HTTPException(404, f"Call '{call_id}' not found.")

-    # Save new transcript, clear stale intelligence fields
+    # Save user correction as transcript_corrected; leave original transcript intact.
+    # Clear stale intelligence fields so re-extraction runs fresh.
    await fstore.doc_set("calls", call_id, {
-        "transcript": body.transcript,
-        "transcript_corrected": None,
+        "transcript_corrected": body.transcript,
        "tags": [],
        "severity": "unknown",
        "location": None,
@@ -93,5 +93,6 @@ async def patch_transcript(
        talkgroup_name=call.get("talkgroup_name"),
        transcript=body.transcript,
        segments=call.get("segments"),
+        preserve_transcript_correction=True,
    )
    return {"ok": True, "call_id": call_id}
@@ -91,6 +91,7 @@ async def _run_extraction_pipeline(
    talkgroup_name: Optional[str],
    transcript: str,
    segments: Optional[list] = None,
+    preserve_transcript_correction: bool = False,
 ) -> None:
    """Run steps 2-4 of the intelligence pipeline using an existing transcript."""
    from app.internal import intelligence, incident_correlator, alerter
@@ -99,6 +100,7 @@ async def _run_extraction_pipeline(
        call_id, transcript, talkgroup_name,
        talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
        node_id=node_id,
+        preserve_transcript_correction=preserve_transcript_correction,
    )

    incident_id = await incident_correlator.correlate_call(