diff --git a/drb-c2-core/app/config.py b/drb-c2-core/app/config.py index 018632d..fa7b138 100644 --- a/drb-c2-core/app/config.py +++ b/drb-c2-core/app/config.py @@ -22,8 +22,8 @@ class Settings(BaseSettings): # Gemini (intelligence extraction, embeddings, incident summaries) gemini_api_key: Optional[str] = None - summary_interval_minutes: int = 15 # how often the summary loop runs - correlation_window_hours: int = 4 # how far back to look for matching incidents + summary_interval_minutes: int = 2 # how often the summary loop runs + correlation_window_hours: int = 1 # how far back to look for matching incidents embedding_similarity_threshold: float = 0.82 # cosine similarity cutoff for slow-path match # Internal service key — allows server-side services (discord bot) to call C2 without Firebase diff --git a/drb-c2-core/app/internal/intelligence.py b/drb-c2-core/app/internal/intelligence.py index e3966e9..ec9512e 100644 --- a/drb-c2-core/app/internal/intelligence.py +++ b/drb-c2-core/app/internal/intelligence.py @@ -12,7 +12,7 @@ from typing import Optional from app.internal.logger import logger from app.internal import firestore as fstore -_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation. +_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation. Schema: {{ @@ -21,7 +21,8 @@ Schema: "location": "most specific location string found, or empty string", "vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"], "units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"], - "severity": one of "minor" | "moderate" | "major" | "unknown" + "severity": one of "minor" | "moderate" | "major" | "unknown", + "transcript_corrected": "corrected transcript string, or null if no corrections needed" }} Rules: @@ -29,7 +30,9 @@ Rules: - tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag. - units: only identifiers explicitly mentioned, not inferred. - Do not invent details not present in the transcript. +- transcript_corrected: fix only clear STT errors caused by vocoder distortion (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if the transcript looks accurate. +Talkgroup: {talkgroup_name} Transcript: {transcript}""" @@ -37,17 +40,18 @@ Transcript: async def extract_tags( call_id: str, transcript: str, + talkgroup_name: Optional[str] = None, ) -> tuple[list[str], Optional[str], Optional[str]]: """ - Extract incident tags, type, and location from a transcript via Gemini. + Extract incident tags, type, location, and corrected transcript via Gemini. Returns: (tags, primary_type, location) Side-effect: updates calls/{call_id} in Firestore with tags, location, - vehicles, units, severity; also stores the call embedding. + vehicles, units, severity, transcript_corrected; also stores the call embedding. """ - result = await asyncio.to_thread(_sync_extract, transcript) + result = await asyncio.to_thread(_sync_extract, transcript, talkgroup_name) tags: list[str] = result.get("tags") or [] incident_type: Optional[str] = result.get("incident_type") or None @@ -55,6 +59,7 @@ async def extract_tags( vehicles: list[str] = result.get("vehicles") or [] units: list[str] = result.get("units") or [] severity: str = result.get("severity") or "unknown" + transcript_corrected: Optional[str] = result.get("transcript_corrected") or None if incident_type in ("unknown", "other", ""): incident_type = None @@ -74,6 +79,8 @@ async def extract_tags( updates["units"] = units if embedding: updates["embedding"] = embedding + if transcript_corrected: + updates["transcript_corrected"] = transcript_corrected try: await fstore.doc_set("calls", call_id, updates) @@ -82,12 +89,13 @@ async def extract_tags( logger.info( f"Intelligence: call {call_id} → type={incident_type}, " - f"tags={tags}, location={location!r}, severity={severity}" + f"tags={tags}, location={location!r}, severity={severity}, " + f"corrected={transcript_corrected is not None}" ) return tags, incident_type, location -def _sync_extract(transcript: str) -> dict: +def _sync_extract(transcript: str, talkgroup_name: Optional[str]) -> dict: """Call Gemini Flash and parse the JSON response.""" from app.config import settings import google.generativeai as genai @@ -102,8 +110,13 @@ def _sync_extract(transcript: str) -> dict: generation_config={"response_mime_type": "application/json"}, ) + prompt = _PROMPT_TEMPLATE.format( + transcript=transcript, + talkgroup_name=talkgroup_name or "unknown", + ) + try: - response = model.generate_content(_PROMPT_TEMPLATE.format(transcript=transcript)) + response = model.generate_content(prompt) return json.loads(response.text) except json.JSONDecodeError as e: logger.warning(f"Gemini returned non-JSON: {e}") diff --git a/drb-c2-core/app/internal/transcription.py b/drb-c2-core/app/internal/transcription.py index 47fe9ed..37e4bd1 100644 --- a/drb-c2-core/app/internal/transcription.py +++ b/drb-c2-core/app/internal/transcription.py @@ -11,17 +11,34 @@ from typing import Optional from app.internal.logger import logger from app.internal import firestore as fstore +# Whisper treats `prompt` as preceding transcript text, not instructions. +# Writing it as actual radio speech primes the vocabulary toward P25 codes +# and phrasing before the model hears the audio. +_WHISPER_PROMPT = ( + "10-4. 10-23. 10-20. 10-97. 10-8. 10-7. 10-34. 10-50. 10-52. " + "Post 4, I'm out. Post 3. En route. On scene. In route. " + "Copy. Negative. Stand by. Be advised. Go ahead. " + "Units responding. Dispatch. Talkgroup. " + "Engine. Ladder. Medic. Rescue. Car. Unit. " + "MVA. MVC. Structure fire. Working fire." +) -async def transcribe_call(call_id: str, gcs_uri: str) -> Optional[str]: + +async def transcribe_call( + call_id: str, + gcs_uri: str, + talkgroup_name: Optional[str] = None, +) -> Optional[str]: """ Transcribe audio at the given GCS URI and store the result in Firestore. Args: - call_id: Firestore document ID in the 'calls' collection. - gcs_uri: GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3 + call_id: Firestore document ID in the 'calls' collection. + gcs_uri: GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3 + talkgroup_name: Passed through to the intelligence layer; unused here. Returns: - The transcript string, or None if transcription failed / was skipped. + The raw Whisper transcript string, or None if transcription failed. """ if not gcs_uri or not gcs_uri.startswith("gs://"): return None @@ -53,11 +70,9 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]: logger.warning("OPENAI_API_KEY not set — transcription disabled.") return None - # Parse gs://bucket/path/to/file.mp3 without_scheme = gcs_uri[len("gs://"):] bucket_name, blob_path = without_scheme.split("/", 1) - # Download to a temp file if settings.gcp_credentials_path: creds = service_account.Credentials.from_service_account_file( settings.gcp_credentials_path, @@ -83,7 +98,7 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]: model="whisper-1", file=f, language="en", - prompt="Public safety radio communication. May include police codes, fire, EMS, talkgroup IDs, unit numbers, addresses.", + prompt=_WHISPER_PROMPT, ) return response.text.strip() or None finally: diff --git a/drb-c2-core/app/routers/upload.py b/drb-c2-core/app/routers/upload.py index 7934f31..188f1e0 100644 --- a/drb-c2-core/app/routers/upload.py +++ b/drb-c2-core/app/routers/upload.py @@ -104,14 +104,14 @@ async def _run_intelligence_pipeline( # Step 1: Transcription if gcs_uri: - transcript = await transcription.transcribe_call(call_id, gcs_uri) + transcript = await transcription.transcribe_call(call_id, gcs_uri, talkgroup_name) # Step 2: Intelligence extraction tags: list[str] = [] incident_type: Optional[str] = None location: Optional[str] = None if transcript: - tags, incident_type, location = await intelligence.extract_tags(call_id, transcript) + tags, incident_type, location = await intelligence.extract_tags(call_id, transcript, talkgroup_name) # Step 3: Incident correlation if incident_type: diff --git a/drb-frontend/components/CallRow.tsx b/drb-frontend/components/CallRow.tsx index 4298063..ebe7b92 100644 --- a/drb-frontend/components/CallRow.tsx +++ b/drb-frontend/components/CallRow.tsx @@ -24,8 +24,11 @@ const TAG_COLORS: Record = { export function CallRow({ call, systemName }: Props) { const [expanded, setExpanded] = useState(false); + const [showOriginal, setShowOriginal] = useState(false); const isActive = call.status === "active"; - const hasDetails = call.transcript || (call.tags && call.tags.length > 0) || call.incident_id; + const hasDetails = call.transcript || call.transcript_corrected || (call.tags && call.tags.length > 0) || call.incident_id; + const displayTranscript = (!showOriginal && call.transcript_corrected) ? call.transcript_corrected : call.transcript; + const hasBoth = !!(call.transcript && call.transcript_corrected); return ( <> @@ -101,10 +104,30 @@ export function CallRow({ call, systemName }: Props) { )} {/* Transcript */} - {call.transcript ? ( -
-                {call.transcript}
-              
+ {displayTranscript ? ( +
+
+ {hasBoth && ( + + {showOriginal ? "original" : "corrected"} + + )} + {!hasBoth && call.transcript_corrected && ( + corrected + )} +
+
+                  {displayTranscript}
+                
+ {hasBoth && ( + + )} +
) : (

No transcript available.

)} diff --git a/drb-frontend/lib/types.ts b/drb-frontend/lib/types.ts index a00231f..903f1ff 100644 --- a/drb-frontend/lib/types.ts +++ b/drb-frontend/lib/types.ts @@ -31,6 +31,7 @@ export interface CallRecord { ended_at: string | null; audio_url: string | null; transcript: string | null; + transcript_corrected: string | null; incident_id: string | null; location: { lat: number; lng: number } | null; tags: string[];