Intel update

2026-04-19 08:00:09 -04:00
parent 2d606add75
commit 1e3d691dbd
3 changed files with 58 additions and 26 deletions
@@ -12,7 +12,7 @@ from typing import Optional
 from app.internal.logger import logger
 from app.internal import firestore as fstore

-_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.
+_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio recording. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Each numbered transmission is a separate PTT press from a different radio. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.

 Schema:
 {{
@@ -22,7 +22,7 @@ Schema:
  "vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"],
  "units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"],
  "severity": one of "minor" | "moderate" | "major" | "unknown",
-  "transcript_corrected": "corrected transcript string, or null if no corrections needed"
+  "transcript_corrected": "corrected full transcript string, or null if no corrections needed"
 }}

 Rules:
@@ -30,17 +30,20 @@ Rules:
 - tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag.
 - units: only identifiers explicitly mentioned, not inferred.
 - Do not invent details not present in the transcript.
- transcript_corrected: fix only clear STT errors caused by vocoder distortion (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if the transcript looks accurate.
+- transcript_corrected: fix only clear STT errors caused by vocoder distortion (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Use the back-and-forth context between transmissions to resolve ambiguities. Keep all radio language as-is — do NOT decode codes into plain English. Return null if the transcript looks accurate.

+System: {system_id}
 Talkgroup: {talkgroup_name}
-Transcript:
-{transcript}"""
+{transcript_block}"""


 async def extract_tags(
    call_id: str,
    transcript: str,
    talkgroup_name: Optional[str] = None,
+    talkgroup_id: Optional[int] = None,
+    system_id: Optional[str] = None,
+    segments: Optional[list[dict]] = None,
 ) -> tuple[list[str], Optional[str], Optional[str]]:
    """
    Extract incident tags, type, location, and corrected transcript via Gemini.
@@ -51,7 +54,7 @@ async def extract_tags(
    Side-effect: updates calls/{call_id} in Firestore with tags, location,
    vehicles, units, severity, transcript_corrected; also stores the call embedding.
    """
-    result = await asyncio.to_thread(_sync_extract, transcript, talkgroup_name)
+    result = await asyncio.to_thread(_sync_extract, transcript, talkgroup_name, talkgroup_id, system_id, segments)

    tags: list[str] = result.get("tags") or []
    incident_type: Optional[str] = result.get("incident_type") or None
@@ -95,7 +98,21 @@ async def extract_tags(
    return tags, incident_type, location


-def _sync_extract(transcript: str, talkgroup_name: Optional[str]) -> dict:
+def _build_transcript_block(transcript: str, segments: Optional[list[dict]]) -> str:
+    """Format transcript as numbered transmissions if segments are available."""
+    if segments and len(segments) > 1:
+        lines = [f"{i+1}. [{s['start']}s] {s['text']}" for i, s in enumerate(segments)]
+        return f"Transmissions ({len(segments)}):\n" + "\n".join(lines)
+    return f"Transcript:\n{transcript}"
+
+
+def _sync_extract(
+    transcript: str,
+    talkgroup_name: Optional[str],
+    talkgroup_id: Optional[int],
+    system_id: Optional[str],
+    segments: Optional[list[dict]],
+) -> dict:
    """Call Gemini Flash and parse the JSON response."""
    from app.config import settings
    import google.generativeai as genai
@@ -110,9 +127,11 @@ def _sync_extract(transcript: str, talkgroup_name: Optional[str]) -> dict:
        generation_config={"response_mime_type": "application/json"},
    )

+    tg = f"{talkgroup_name} (TGID {talkgroup_id})" if talkgroup_id else (talkgroup_name or "unknown")
    prompt = _PROMPT_TEMPLATE.format(
-        transcript=transcript,
-        talkgroup_name=talkgroup_name or "unknown",
+        transcript_block=_build_transcript_block(transcript, segments),
+        talkgroup_name=tg,
+        system_id=system_id or "unknown",
    )

    try:
@@ -28,38 +28,40 @@ async def transcribe_call(
    call_id: str,
    gcs_uri: str,
    talkgroup_name: Optional[str] = None,
-) -> Optional[str]:
+) -> tuple[Optional[str], list[dict]]:
    """
    Transcribe audio at the given GCS URI and store the result in Firestore.

-    Args:
-        call_id:        Firestore document ID in the 'calls' collection.
-        gcs_uri:        GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3
-        talkgroup_name: Passed through to the intelligence layer; unused here.
-
    Returns:
-        The raw Whisper transcript string, or None if transcription failed.
+        (transcript, segments) — segments is a list of {start, end, text} dicts,
+        one per detected transmission. Empty list if transcription failed.
    """
    if not gcs_uri or not gcs_uri.startswith("gs://"):
-        return None
+        return None, []

    try:
-        transcript = await asyncio.to_thread(_sync_transcribe, gcs_uri)
+        transcript, segments = await asyncio.to_thread(_sync_transcribe, gcs_uri)
    except Exception as e:
        logger.warning(f"Transcription failed for call {call_id}: {e}")
-        return None
+        return None, []

    if transcript:
+        updates: dict = {"transcript": transcript}
+        if segments:
+            updates["segments"] = segments
        try:
-            await fstore.doc_set("calls", call_id, {"transcript": transcript})
-            logger.info(f"Transcript saved for call {call_id} ({len(transcript)} chars)")
+            await fstore.doc_set("calls", call_id, updates)
+            logger.info(
+                f"Transcript saved for call {call_id} "
+                f"({len(transcript)} chars, {len(segments)} segment(s))"
+            )
        except Exception as e:
            logger.warning(f"Could not save transcript for {call_id}: {e}")

-    return transcript
+    return transcript, segments


-def _sync_transcribe(gcs_uri: str) -> Optional[str]:
+def _sync_transcribe(gcs_uri: str) -> tuple[Optional[str], list[dict]]:
    """Download audio from GCS and transcribe with OpenAI Whisper."""
    from google.cloud import storage as gcs
    from google.oauth2 import service_account
@@ -99,8 +101,15 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]:
                file=f,
                language="en",
                prompt=_WHISPER_PROMPT,
+                response_format="verbose_json",
            )
-        return response.text.strip() or None
+        text = response.text.strip() or None
+        segments = [
+            {"start": round(s.start, 2), "end": round(s.end, 2), "text": s.text.strip()}
+            for s in (response.segments or [])
+            if s.text.strip()
+        ]
+        return text, segments
    finally:
        try:
            os.unlink(tmp_path)
@@ -101,17 +101,21 @@ async def _run_intelligence_pipeline(
    from app.internal import transcription, intelligence, incident_correlator, alerter

    transcript: Optional[str] = None
+    segments: list[dict] = []

    # Step 1: Transcription
    if gcs_uri:
-        transcript = await transcription.transcribe_call(call_id, gcs_uri, talkgroup_name)
+        transcript, segments = await transcription.transcribe_call(call_id, gcs_uri, talkgroup_name)

    # Step 2: Intelligence extraction
    tags: list[str] = []
    incident_type: Optional[str] = None
    location: Optional[str] = None
    if transcript:
-        tags, incident_type, location = await intelligence.extract_tags(call_id, transcript, talkgroup_name)
+        tags, incident_type, location = await intelligence.extract_tags(
+            call_id, transcript, talkgroup_name,
+            talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
+        )

    # Step 3: Incident correlation
    if incident_type: