stt updates and intelligence updates

This commit is contained in:
Logan
2026-04-13 00:01:19 -04:00
parent 7b6fd640d9
commit 616c06f09c
6 changed files with 76 additions and 24 deletions
+2 -2
View File
@@ -22,8 +22,8 @@ class Settings(BaseSettings):
# Gemini (intelligence extraction, embeddings, incident summaries) # Gemini (intelligence extraction, embeddings, incident summaries)
gemini_api_key: Optional[str] = None gemini_api_key: Optional[str] = None
summary_interval_minutes: int = 15 # how often the summary loop runs summary_interval_minutes: int = 2 # how often the summary loop runs
correlation_window_hours: int = 4 # how far back to look for matching incidents correlation_window_hours: int = 1 # how far back to look for matching incidents
embedding_similarity_threshold: float = 0.82 # cosine similarity cutoff for slow-path match embedding_similarity_threshold: float = 0.82 # cosine similarity cutoff for slow-path match
# Internal service key — allows server-side services (discord bot) to call C2 without Firebase # Internal service key — allows server-side services (discord bot) to call C2 without Firebase
+21 -8
View File
@@ -12,7 +12,7 @@ from typing import Optional
from app.internal.logger import logger from app.internal.logger import logger
from app.internal import firestore as fstore from app.internal import firestore as fstore
_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation. _PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.
Schema: Schema:
{{ {{
@@ -21,7 +21,8 @@ Schema:
"location": "most specific location string found, or empty string", "location": "most specific location string found, or empty string",
"vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"], "vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"],
"units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"], "units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"],
"severity": one of "minor" | "moderate" | "major" | "unknown" "severity": one of "minor" | "moderate" | "major" | "unknown",
"transcript_corrected": "corrected transcript string, or null if no corrections needed"
}} }}
Rules: Rules:
@@ -29,7 +30,9 @@ Rules:
- tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag. - tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag.
- units: only identifiers explicitly mentioned, not inferred. - units: only identifiers explicitly mentioned, not inferred.
- Do not invent details not present in the transcript. - Do not invent details not present in the transcript.
- transcript_corrected: fix only clear STT errors caused by vocoder distortion (e.g. "Several""10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if the transcript looks accurate.
Talkgroup: {talkgroup_name}
Transcript: Transcript:
{transcript}""" {transcript}"""
@@ -37,17 +40,18 @@ Transcript:
async def extract_tags( async def extract_tags(
call_id: str, call_id: str,
transcript: str, transcript: str,
talkgroup_name: Optional[str] = None,
) -> tuple[list[str], Optional[str], Optional[str]]: ) -> tuple[list[str], Optional[str], Optional[str]]:
""" """
Extract incident tags, type, and location from a transcript via Gemini. Extract incident tags, type, location, and corrected transcript via Gemini.
Returns: Returns:
(tags, primary_type, location) (tags, primary_type, location)
Side-effect: updates calls/{call_id} in Firestore with tags, location, Side-effect: updates calls/{call_id} in Firestore with tags, location,
vehicles, units, severity; also stores the call embedding. vehicles, units, severity, transcript_corrected; also stores the call embedding.
""" """
result = await asyncio.to_thread(_sync_extract, transcript) result = await asyncio.to_thread(_sync_extract, transcript, talkgroup_name)
tags: list[str] = result.get("tags") or [] tags: list[str] = result.get("tags") or []
incident_type: Optional[str] = result.get("incident_type") or None incident_type: Optional[str] = result.get("incident_type") or None
@@ -55,6 +59,7 @@ async def extract_tags(
vehicles: list[str] = result.get("vehicles") or [] vehicles: list[str] = result.get("vehicles") or []
units: list[str] = result.get("units") or [] units: list[str] = result.get("units") or []
severity: str = result.get("severity") or "unknown" severity: str = result.get("severity") or "unknown"
transcript_corrected: Optional[str] = result.get("transcript_corrected") or None
if incident_type in ("unknown", "other", ""): if incident_type in ("unknown", "other", ""):
incident_type = None incident_type = None
@@ -74,6 +79,8 @@ async def extract_tags(
updates["units"] = units updates["units"] = units
if embedding: if embedding:
updates["embedding"] = embedding updates["embedding"] = embedding
if transcript_corrected:
updates["transcript_corrected"] = transcript_corrected
try: try:
await fstore.doc_set("calls", call_id, updates) await fstore.doc_set("calls", call_id, updates)
@@ -82,12 +89,13 @@ async def extract_tags(
logger.info( logger.info(
f"Intelligence: call {call_id} → type={incident_type}, " f"Intelligence: call {call_id} → type={incident_type}, "
f"tags={tags}, location={location!r}, severity={severity}" f"tags={tags}, location={location!r}, severity={severity}, "
f"corrected={transcript_corrected is not None}"
) )
return tags, incident_type, location return tags, incident_type, location
def _sync_extract(transcript: str) -> dict: def _sync_extract(transcript: str, talkgroup_name: Optional[str]) -> dict:
"""Call Gemini Flash and parse the JSON response.""" """Call Gemini Flash and parse the JSON response."""
from app.config import settings from app.config import settings
import google.generativeai as genai import google.generativeai as genai
@@ -102,8 +110,13 @@ def _sync_extract(transcript: str) -> dict:
generation_config={"response_mime_type": "application/json"}, generation_config={"response_mime_type": "application/json"},
) )
prompt = _PROMPT_TEMPLATE.format(
transcript=transcript,
talkgroup_name=talkgroup_name or "unknown",
)
try: try:
response = model.generate_content(_PROMPT_TEMPLATE.format(transcript=transcript)) response = model.generate_content(prompt)
return json.loads(response.text) return json.loads(response.text)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.warning(f"Gemini returned non-JSON: {e}") logger.warning(f"Gemini returned non-JSON: {e}")
+22 -7
View File
@@ -11,17 +11,34 @@ from typing import Optional
from app.internal.logger import logger from app.internal.logger import logger
from app.internal import firestore as fstore from app.internal import firestore as fstore
# Whisper treats `prompt` as preceding transcript text, not instructions.
# Writing it as actual radio speech primes the vocabulary toward P25 codes
# and phrasing before the model hears the audio.
_WHISPER_PROMPT = (
"10-4. 10-23. 10-20. 10-97. 10-8. 10-7. 10-34. 10-50. 10-52. "
"Post 4, I'm out. Post 3. En route. On scene. In route. "
"Copy. Negative. Stand by. Be advised. Go ahead. "
"Units responding. Dispatch. Talkgroup. "
"Engine. Ladder. Medic. Rescue. Car. Unit. "
"MVA. MVC. Structure fire. Working fire."
)
async def transcribe_call(call_id: str, gcs_uri: str) -> Optional[str]:
async def transcribe_call(
call_id: str,
gcs_uri: str,
talkgroup_name: Optional[str] = None,
) -> Optional[str]:
""" """
Transcribe audio at the given GCS URI and store the result in Firestore. Transcribe audio at the given GCS URI and store the result in Firestore.
Args: Args:
call_id: Firestore document ID in the 'calls' collection. call_id: Firestore document ID in the 'calls' collection.
gcs_uri: GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3 gcs_uri: GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3
talkgroup_name: Passed through to the intelligence layer; unused here.
Returns: Returns:
The transcript string, or None if transcription failed / was skipped. The raw Whisper transcript string, or None if transcription failed.
""" """
if not gcs_uri or not gcs_uri.startswith("gs://"): if not gcs_uri or not gcs_uri.startswith("gs://"):
return None return None
@@ -53,11 +70,9 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]:
logger.warning("OPENAI_API_KEY not set — transcription disabled.") logger.warning("OPENAI_API_KEY not set — transcription disabled.")
return None return None
# Parse gs://bucket/path/to/file.mp3
without_scheme = gcs_uri[len("gs://"):] without_scheme = gcs_uri[len("gs://"):]
bucket_name, blob_path = without_scheme.split("/", 1) bucket_name, blob_path = without_scheme.split("/", 1)
# Download to a temp file
if settings.gcp_credentials_path: if settings.gcp_credentials_path:
creds = service_account.Credentials.from_service_account_file( creds = service_account.Credentials.from_service_account_file(
settings.gcp_credentials_path, settings.gcp_credentials_path,
@@ -83,7 +98,7 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]:
model="whisper-1", model="whisper-1",
file=f, file=f,
language="en", language="en",
prompt="Public safety radio communication. May include police codes, fire, EMS, talkgroup IDs, unit numbers, addresses.", prompt=_WHISPER_PROMPT,
) )
return response.text.strip() or None return response.text.strip() or None
finally: finally:
+2 -2
View File
@@ -104,14 +104,14 @@ async def _run_intelligence_pipeline(
# Step 1: Transcription # Step 1: Transcription
if gcs_uri: if gcs_uri:
transcript = await transcription.transcribe_call(call_id, gcs_uri) transcript = await transcription.transcribe_call(call_id, gcs_uri, talkgroup_name)
# Step 2: Intelligence extraction # Step 2: Intelligence extraction
tags: list[str] = [] tags: list[str] = []
incident_type: Optional[str] = None incident_type: Optional[str] = None
location: Optional[str] = None location: Optional[str] = None
if transcript: if transcript:
tags, incident_type, location = await intelligence.extract_tags(call_id, transcript) tags, incident_type, location = await intelligence.extract_tags(call_id, transcript, talkgroup_name)
# Step 3: Incident correlation # Step 3: Incident correlation
if incident_type: if incident_type:
+28 -5
View File
@@ -24,8 +24,11 @@ const TAG_COLORS: Record<string, string> = {
export function CallRow({ call, systemName }: Props) { export function CallRow({ call, systemName }: Props) {
const [expanded, setExpanded] = useState(false); const [expanded, setExpanded] = useState(false);
const [showOriginal, setShowOriginal] = useState(false);
const isActive = call.status === "active"; const isActive = call.status === "active";
const hasDetails = call.transcript || (call.tags && call.tags.length > 0) || call.incident_id; const hasDetails = call.transcript || call.transcript_corrected || (call.tags && call.tags.length > 0) || call.incident_id;
const displayTranscript = (!showOriginal && call.transcript_corrected) ? call.transcript_corrected : call.transcript;
const hasBoth = !!(call.transcript && call.transcript_corrected);
return ( return (
<> <>
@@ -101,10 +104,30 @@ export function CallRow({ call, systemName }: Props) {
)} )}
{/* Transcript */} {/* Transcript */}
{call.transcript ? ( {displayTranscript ? (
<pre className="text-xs text-gray-300 bg-gray-800 rounded-lg px-4 py-3 whitespace-pre-wrap font-mono leading-relaxed max-h-40 overflow-y-auto"> <div className="space-y-1">
{call.transcript} <div className="flex items-center gap-2">
</pre> {hasBoth && (
<span className="text-xs text-gray-600 font-mono">
{showOriginal ? "original" : "corrected"}
</span>
)}
{!hasBoth && call.transcript_corrected && (
<span className="text-xs text-gray-600 font-mono">corrected</span>
)}
</div>
<pre className="text-xs text-gray-300 bg-gray-800 rounded-lg px-4 py-3 whitespace-pre-wrap font-mono leading-relaxed max-h-40 overflow-y-auto">
{displayTranscript}
</pre>
{hasBoth && (
<button
onClick={(e) => { e.stopPropagation(); setShowOriginal((v) => !v); }}
className="text-xs text-gray-600 hover:text-gray-400 font-mono transition-colors"
>
{showOriginal ? "show corrected ↑" : "show original ↓"}
</button>
)}
</div>
) : ( ) : (
<p className="text-xs text-gray-600 font-mono italic">No transcript available.</p> <p className="text-xs text-gray-600 font-mono italic">No transcript available.</p>
)} )}
+1
View File
@@ -31,6 +31,7 @@ export interface CallRecord {
ended_at: string | null; ended_at: string | null;
audio_url: string | null; audio_url: string | null;
transcript: string | null; transcript: string | null;
transcript_corrected: string | null;
incident_id: string | null; incident_id: string | null;
location: { lat: number; lng: number } | null; location: { lat: number; lng: number } | null;
tags: string[]; tags: string[];