stt updates and intelligence updates

2026-04-13 00:01:19 -04:00
parent 7b6fd640d9
commit 616c06f09c
6 changed files with 76 additions and 24 deletions
@@ -22,8 +22,8 @@ class Settings(BaseSettings):

    # Gemini (intelligence extraction, embeddings, incident summaries)
    gemini_api_key: Optional[str] = None
-    summary_interval_minutes: int = 15       # how often the summary loop runs
-    correlation_window_hours: int = 4        # how far back to look for matching incidents
+    summary_interval_minutes: int = 2        # how often the summary loop runs
+    correlation_window_hours: int = 1        # how far back to look for matching incidents
    embedding_similarity_threshold: float = 0.82  # cosine similarity cutoff for slow-path match

    # Internal service key — allows server-side services (discord bot) to call C2 without Firebase
@@ -12,7 +12,7 @@ from typing import Optional
 from app.internal.logger import logger
 from app.internal import firestore as fstore

-_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.
+_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.

 Schema:
 {{
@@ -21,7 +21,8 @@ Schema:
  "location": "most specific location string found, or empty string",
  "vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"],
  "units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"],
-  "severity": one of "minor" | "moderate" | "major" | "unknown"
+  "severity": one of "minor" | "moderate" | "major" | "unknown",
+  "transcript_corrected": "corrected transcript string, or null if no corrections needed"
 }}

 Rules:
@@ -29,7 +30,9 @@ Rules:
 - tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag.
 - units: only identifiers explicitly mentioned, not inferred.
 - Do not invent details not present in the transcript.
+- transcript_corrected: fix only clear STT errors caused by vocoder distortion (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if the transcript looks accurate.

+Talkgroup: {talkgroup_name}
 Transcript:
 {transcript}"""

@@ -37,17 +40,18 @@ Transcript:
 async def extract_tags(
    call_id: str,
    transcript: str,
+    talkgroup_name: Optional[str] = None,
 ) -> tuple[list[str], Optional[str], Optional[str]]:
    """
-    Extract incident tags, type, and location from a transcript via Gemini.
+    Extract incident tags, type, location, and corrected transcript via Gemini.

    Returns:
        (tags, primary_type, location)

    Side-effect: updates calls/{call_id} in Firestore with tags, location,
-    vehicles, units, severity; also stores the call embedding.
+    vehicles, units, severity, transcript_corrected; also stores the call embedding.
    """
-    result = await asyncio.to_thread(_sync_extract, transcript)
+    result = await asyncio.to_thread(_sync_extract, transcript, talkgroup_name)

    tags: list[str] = result.get("tags") or []
    incident_type: Optional[str] = result.get("incident_type") or None
@@ -55,6 +59,7 @@ async def extract_tags(
    vehicles: list[str] = result.get("vehicles") or []
    units: list[str] = result.get("units") or []
    severity: str = result.get("severity") or "unknown"
+    transcript_corrected: Optional[str] = result.get("transcript_corrected") or None

    if incident_type in ("unknown", "other", ""):
        incident_type = None
@@ -74,6 +79,8 @@ async def extract_tags(
        updates["units"] = units
    if embedding:
        updates["embedding"] = embedding
+    if transcript_corrected:
+        updates["transcript_corrected"] = transcript_corrected

    try:
        await fstore.doc_set("calls", call_id, updates)
@@ -82,12 +89,13 @@ async def extract_tags(

    logger.info(
        f"Intelligence: call {call_id} → type={incident_type}, "
-        f"tags={tags}, location={location!r}, severity={severity}"
+        f"tags={tags}, location={location!r}, severity={severity}, "
+        f"corrected={transcript_corrected is not None}"
    )
    return tags, incident_type, location


-def _sync_extract(transcript: str) -> dict:
+def _sync_extract(transcript: str, talkgroup_name: Optional[str]) -> dict:
    """Call Gemini Flash and parse the JSON response."""
    from app.config import settings
    import google.generativeai as genai
@@ -102,8 +110,13 @@ def _sync_extract(transcript: str) -> dict:
        generation_config={"response_mime_type": "application/json"},
    )

+    prompt = _PROMPT_TEMPLATE.format(
+        transcript=transcript,
+        talkgroup_name=talkgroup_name or "unknown",
+    )
+
    try:
-        response = model.generate_content(_PROMPT_TEMPLATE.format(transcript=transcript))
+        response = model.generate_content(prompt)
        return json.loads(response.text)
    except json.JSONDecodeError as e:
        logger.warning(f"Gemini returned non-JSON: {e}")
@@ -11,17 +11,34 @@ from typing import Optional
 from app.internal.logger import logger
 from app.internal import firestore as fstore

+# Whisper treats `prompt` as preceding transcript text, not instructions.
+# Writing it as actual radio speech primes the vocabulary toward P25 codes
+# and phrasing before the model hears the audio.
+_WHISPER_PROMPT = (
+    "10-4. 10-23. 10-20. 10-97. 10-8. 10-7. 10-34. 10-50. 10-52. "
+    "Post 4, I'm out. Post 3. En route. On scene. In route. "
+    "Copy. Negative. Stand by. Be advised. Go ahead. "
+    "Units responding. Dispatch. Talkgroup. "
+    "Engine. Ladder. Medic. Rescue. Car. Unit. "
+    "MVA. MVC. Structure fire. Working fire."
+)

-async def transcribe_call(call_id: str, gcs_uri: str) -> Optional[str]:
+
+async def transcribe_call(
+    call_id: str,
+    gcs_uri: str,
+    talkgroup_name: Optional[str] = None,
+) -> Optional[str]:
    """
    Transcribe audio at the given GCS URI and store the result in Firestore.

    Args:
-        call_id: Firestore document ID in the 'calls' collection.
-        gcs_uri:  GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3
+        call_id:        Firestore document ID in the 'calls' collection.
+        gcs_uri:        GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3
+        talkgroup_name: Passed through to the intelligence layer; unused here.

    Returns:
-        The transcript string, or None if transcription failed / was skipped.
+        The raw Whisper transcript string, or None if transcription failed.
    """
    if not gcs_uri or not gcs_uri.startswith("gs://"):
        return None
@@ -53,11 +70,9 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]:
        logger.warning("OPENAI_API_KEY not set — transcription disabled.")
        return None

-    # Parse gs://bucket/path/to/file.mp3
    without_scheme = gcs_uri[len("gs://"):]
    bucket_name, blob_path = without_scheme.split("/", 1)

-    # Download to a temp file
    if settings.gcp_credentials_path:
        creds = service_account.Credentials.from_service_account_file(
            settings.gcp_credentials_path,
@@ -83,7 +98,7 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]:
                model="whisper-1",
                file=f,
                language="en",
-                prompt="Public safety radio communication. May include police codes, fire, EMS, talkgroup IDs, unit numbers, addresses.",
+                prompt=_WHISPER_PROMPT,
            )
        return response.text.strip() or None
    finally:
@@ -104,14 +104,14 @@ async def _run_intelligence_pipeline(

    # Step 1: Transcription
    if gcs_uri:
-        transcript = await transcription.transcribe_call(call_id, gcs_uri)
+        transcript = await transcription.transcribe_call(call_id, gcs_uri, talkgroup_name)

    # Step 2: Intelligence extraction
    tags: list[str] = []
    incident_type: Optional[str] = None
    location: Optional[str] = None
    if transcript:
-        tags, incident_type, location = await intelligence.extract_tags(call_id, transcript)
+        tags, incident_type, location = await intelligence.extract_tags(call_id, transcript, talkgroup_name)

    # Step 3: Incident correlation
    if incident_type:
@@ -24,8 +24,11 @@ const TAG_COLORS: Record<string, string> = {

 export function CallRow({ call, systemName }: Props) {
  const [expanded, setExpanded] = useState(false);
+  const [showOriginal, setShowOriginal] = useState(false);
  const isActive = call.status === "active";
-  const hasDetails = call.transcript || (call.tags && call.tags.length > 0) || call.incident_id;
+  const hasDetails = call.transcript || call.transcript_corrected || (call.tags && call.tags.length > 0) || call.incident_id;
+  const displayTranscript = (!showOriginal && call.transcript_corrected) ? call.transcript_corrected : call.transcript;
+  const hasBoth = !!(call.transcript && call.transcript_corrected);

  return (
    <>
@@ -101,10 +104,30 @@ export function CallRow({ call, systemName }: Props) {
            )}

            {/* Transcript */}
-            {call.transcript ? (
-              <pre className="text-xs text-gray-300 bg-gray-800 rounded-lg px-4 py-3 whitespace-pre-wrap font-mono leading-relaxed max-h-40 overflow-y-auto">
-                {call.transcript}
-              </pre>
+            {displayTranscript ? (
+              <div className="space-y-1">
+                <div className="flex items-center gap-2">
+                  {hasBoth && (
+                    <span className="text-xs text-gray-600 font-mono">
+                      {showOriginal ? "original" : "corrected"}
+                    </span>
+                  )}
+                  {!hasBoth && call.transcript_corrected && (
+                    <span className="text-xs text-gray-600 font-mono">corrected</span>
+                  )}
+                </div>
+                <pre className="text-xs text-gray-300 bg-gray-800 rounded-lg px-4 py-3 whitespace-pre-wrap font-mono leading-relaxed max-h-40 overflow-y-auto">
+                  {displayTranscript}
+                </pre>
+                {hasBoth && (
+                  <button
+                    onClick={(e) => { e.stopPropagation(); setShowOriginal((v) => !v); }}
+                    className="text-xs text-gray-600 hover:text-gray-400 font-mono transition-colors"
+                  >
+                    {showOriginal ? "show corrected ↑" : "show original ↓"}
+                  </button>
+                )}
+              </div>
            ) : (
              <p className="text-xs text-gray-600 font-mono italic">No transcript available.</p>
            )}
@@ -31,6 +31,7 @@ export interface CallRecord {
  ended_at: string | null;
  audio_url: string | null;
  transcript: string | null;
+  transcript_corrected: string | null;
  incident_id: string | null;
  location: { lat: number; lng: number } | null;
  tags: string[];