stt updates and intelligence updates
This commit is contained in:
@@ -22,8 +22,8 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
# Gemini (intelligence extraction, embeddings, incident summaries)
|
# Gemini (intelligence extraction, embeddings, incident summaries)
|
||||||
gemini_api_key: Optional[str] = None
|
gemini_api_key: Optional[str] = None
|
||||||
summary_interval_minutes: int = 15 # how often the summary loop runs
|
summary_interval_minutes: int = 2 # how often the summary loop runs
|
||||||
correlation_window_hours: int = 4 # how far back to look for matching incidents
|
correlation_window_hours: int = 1 # how far back to look for matching incidents
|
||||||
embedding_similarity_threshold: float = 0.82 # cosine similarity cutoff for slow-path match
|
embedding_similarity_threshold: float = 0.82 # cosine similarity cutoff for slow-path match
|
||||||
|
|
||||||
# Internal service key — allows server-side services (discord bot) to call C2 without Firebase
|
# Internal service key — allows server-side services (discord bot) to call C2 without Firebase
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from typing import Optional
|
|||||||
from app.internal.logger import logger
|
from app.internal.logger import logger
|
||||||
from app.internal import firestore as fstore
|
from app.internal import firestore as fstore
|
||||||
|
|
||||||
_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.
|
_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio transcript. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.
|
||||||
|
|
||||||
Schema:
|
Schema:
|
||||||
{{
|
{{
|
||||||
@@ -21,7 +21,8 @@ Schema:
|
|||||||
"location": "most specific location string found, or empty string",
|
"location": "most specific location string found, or empty string",
|
||||||
"vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"],
|
"vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"],
|
||||||
"units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"],
|
"units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"],
|
||||||
"severity": one of "minor" | "moderate" | "major" | "unknown"
|
"severity": one of "minor" | "moderate" | "major" | "unknown",
|
||||||
|
"transcript_corrected": "corrected transcript string, or null if no corrections needed"
|
||||||
}}
|
}}
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
@@ -29,7 +30,9 @@ Rules:
|
|||||||
- tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag.
|
- tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag.
|
||||||
- units: only identifiers explicitly mentioned, not inferred.
|
- units: only identifiers explicitly mentioned, not inferred.
|
||||||
- Do not invent details not present in the transcript.
|
- Do not invent details not present in the transcript.
|
||||||
|
- transcript_corrected: fix only clear STT errors caused by vocoder distortion (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if the transcript looks accurate.
|
||||||
|
|
||||||
|
Talkgroup: {talkgroup_name}
|
||||||
Transcript:
|
Transcript:
|
||||||
{transcript}"""
|
{transcript}"""
|
||||||
|
|
||||||
@@ -37,17 +40,18 @@ Transcript:
|
|||||||
async def extract_tags(
|
async def extract_tags(
|
||||||
call_id: str,
|
call_id: str,
|
||||||
transcript: str,
|
transcript: str,
|
||||||
|
talkgroup_name: Optional[str] = None,
|
||||||
) -> tuple[list[str], Optional[str], Optional[str]]:
|
) -> tuple[list[str], Optional[str], Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Extract incident tags, type, and location from a transcript via Gemini.
|
Extract incident tags, type, location, and corrected transcript via Gemini.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(tags, primary_type, location)
|
(tags, primary_type, location)
|
||||||
|
|
||||||
Side-effect: updates calls/{call_id} in Firestore with tags, location,
|
Side-effect: updates calls/{call_id} in Firestore with tags, location,
|
||||||
vehicles, units, severity; also stores the call embedding.
|
vehicles, units, severity, transcript_corrected; also stores the call embedding.
|
||||||
"""
|
"""
|
||||||
result = await asyncio.to_thread(_sync_extract, transcript)
|
result = await asyncio.to_thread(_sync_extract, transcript, talkgroup_name)
|
||||||
|
|
||||||
tags: list[str] = result.get("tags") or []
|
tags: list[str] = result.get("tags") or []
|
||||||
incident_type: Optional[str] = result.get("incident_type") or None
|
incident_type: Optional[str] = result.get("incident_type") or None
|
||||||
@@ -55,6 +59,7 @@ async def extract_tags(
|
|||||||
vehicles: list[str] = result.get("vehicles") or []
|
vehicles: list[str] = result.get("vehicles") or []
|
||||||
units: list[str] = result.get("units") or []
|
units: list[str] = result.get("units") or []
|
||||||
severity: str = result.get("severity") or "unknown"
|
severity: str = result.get("severity") or "unknown"
|
||||||
|
transcript_corrected: Optional[str] = result.get("transcript_corrected") or None
|
||||||
|
|
||||||
if incident_type in ("unknown", "other", ""):
|
if incident_type in ("unknown", "other", ""):
|
||||||
incident_type = None
|
incident_type = None
|
||||||
@@ -74,6 +79,8 @@ async def extract_tags(
|
|||||||
updates["units"] = units
|
updates["units"] = units
|
||||||
if embedding:
|
if embedding:
|
||||||
updates["embedding"] = embedding
|
updates["embedding"] = embedding
|
||||||
|
if transcript_corrected:
|
||||||
|
updates["transcript_corrected"] = transcript_corrected
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await fstore.doc_set("calls", call_id, updates)
|
await fstore.doc_set("calls", call_id, updates)
|
||||||
@@ -82,12 +89,13 @@ async def extract_tags(
|
|||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Intelligence: call {call_id} → type={incident_type}, "
|
f"Intelligence: call {call_id} → type={incident_type}, "
|
||||||
f"tags={tags}, location={location!r}, severity={severity}"
|
f"tags={tags}, location={location!r}, severity={severity}, "
|
||||||
|
f"corrected={transcript_corrected is not None}"
|
||||||
)
|
)
|
||||||
return tags, incident_type, location
|
return tags, incident_type, location
|
||||||
|
|
||||||
|
|
||||||
def _sync_extract(transcript: str) -> dict:
|
def _sync_extract(transcript: str, talkgroup_name: Optional[str]) -> dict:
|
||||||
"""Call Gemini Flash and parse the JSON response."""
|
"""Call Gemini Flash and parse the JSON response."""
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
import google.generativeai as genai
|
import google.generativeai as genai
|
||||||
@@ -102,8 +110,13 @@ def _sync_extract(transcript: str) -> dict:
|
|||||||
generation_config={"response_mime_type": "application/json"},
|
generation_config={"response_mime_type": "application/json"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
prompt = _PROMPT_TEMPLATE.format(
|
||||||
|
transcript=transcript,
|
||||||
|
talkgroup_name=talkgroup_name or "unknown",
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = model.generate_content(_PROMPT_TEMPLATE.format(transcript=transcript))
|
response = model.generate_content(prompt)
|
||||||
return json.loads(response.text)
|
return json.loads(response.text)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logger.warning(f"Gemini returned non-JSON: {e}")
|
logger.warning(f"Gemini returned non-JSON: {e}")
|
||||||
|
|||||||
@@ -11,17 +11,34 @@ from typing import Optional
|
|||||||
from app.internal.logger import logger
|
from app.internal.logger import logger
|
||||||
from app.internal import firestore as fstore
|
from app.internal import firestore as fstore
|
||||||
|
|
||||||
|
# Whisper treats `prompt` as preceding transcript text, not instructions.
|
||||||
|
# Writing it as actual radio speech primes the vocabulary toward P25 codes
|
||||||
|
# and phrasing before the model hears the audio.
|
||||||
|
_WHISPER_PROMPT = (
|
||||||
|
"10-4. 10-23. 10-20. 10-97. 10-8. 10-7. 10-34. 10-50. 10-52. "
|
||||||
|
"Post 4, I'm out. Post 3. En route. On scene. In route. "
|
||||||
|
"Copy. Negative. Stand by. Be advised. Go ahead. "
|
||||||
|
"Units responding. Dispatch. Talkgroup. "
|
||||||
|
"Engine. Ladder. Medic. Rescue. Car. Unit. "
|
||||||
|
"MVA. MVC. Structure fire. Working fire."
|
||||||
|
)
|
||||||
|
|
||||||
async def transcribe_call(call_id: str, gcs_uri: str) -> Optional[str]:
|
|
||||||
|
async def transcribe_call(
|
||||||
|
call_id: str,
|
||||||
|
gcs_uri: str,
|
||||||
|
talkgroup_name: Optional[str] = None,
|
||||||
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Transcribe audio at the given GCS URI and store the result in Firestore.
|
Transcribe audio at the given GCS URI and store the result in Firestore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
call_id: Firestore document ID in the 'calls' collection.
|
call_id: Firestore document ID in the 'calls' collection.
|
||||||
gcs_uri: GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3
|
gcs_uri: GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3
|
||||||
|
talkgroup_name: Passed through to the intelligence layer; unused here.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The transcript string, or None if transcription failed / was skipped.
|
The raw Whisper transcript string, or None if transcription failed.
|
||||||
"""
|
"""
|
||||||
if not gcs_uri or not gcs_uri.startswith("gs://"):
|
if not gcs_uri or not gcs_uri.startswith("gs://"):
|
||||||
return None
|
return None
|
||||||
@@ -53,11 +70,9 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]:
|
|||||||
logger.warning("OPENAI_API_KEY not set — transcription disabled.")
|
logger.warning("OPENAI_API_KEY not set — transcription disabled.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Parse gs://bucket/path/to/file.mp3
|
|
||||||
without_scheme = gcs_uri[len("gs://"):]
|
without_scheme = gcs_uri[len("gs://"):]
|
||||||
bucket_name, blob_path = without_scheme.split("/", 1)
|
bucket_name, blob_path = without_scheme.split("/", 1)
|
||||||
|
|
||||||
# Download to a temp file
|
|
||||||
if settings.gcp_credentials_path:
|
if settings.gcp_credentials_path:
|
||||||
creds = service_account.Credentials.from_service_account_file(
|
creds = service_account.Credentials.from_service_account_file(
|
||||||
settings.gcp_credentials_path,
|
settings.gcp_credentials_path,
|
||||||
@@ -83,7 +98,7 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]:
|
|||||||
model="whisper-1",
|
model="whisper-1",
|
||||||
file=f,
|
file=f,
|
||||||
language="en",
|
language="en",
|
||||||
prompt="Public safety radio communication. May include police codes, fire, EMS, talkgroup IDs, unit numbers, addresses.",
|
prompt=_WHISPER_PROMPT,
|
||||||
)
|
)
|
||||||
return response.text.strip() or None
|
return response.text.strip() or None
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@@ -104,14 +104,14 @@ async def _run_intelligence_pipeline(
|
|||||||
|
|
||||||
# Step 1: Transcription
|
# Step 1: Transcription
|
||||||
if gcs_uri:
|
if gcs_uri:
|
||||||
transcript = await transcription.transcribe_call(call_id, gcs_uri)
|
transcript = await transcription.transcribe_call(call_id, gcs_uri, talkgroup_name)
|
||||||
|
|
||||||
# Step 2: Intelligence extraction
|
# Step 2: Intelligence extraction
|
||||||
tags: list[str] = []
|
tags: list[str] = []
|
||||||
incident_type: Optional[str] = None
|
incident_type: Optional[str] = None
|
||||||
location: Optional[str] = None
|
location: Optional[str] = None
|
||||||
if transcript:
|
if transcript:
|
||||||
tags, incident_type, location = await intelligence.extract_tags(call_id, transcript)
|
tags, incident_type, location = await intelligence.extract_tags(call_id, transcript, talkgroup_name)
|
||||||
|
|
||||||
# Step 3: Incident correlation
|
# Step 3: Incident correlation
|
||||||
if incident_type:
|
if incident_type:
|
||||||
|
|||||||
@@ -24,8 +24,11 @@ const TAG_COLORS: Record<string, string> = {
|
|||||||
|
|
||||||
export function CallRow({ call, systemName }: Props) {
|
export function CallRow({ call, systemName }: Props) {
|
||||||
const [expanded, setExpanded] = useState(false);
|
const [expanded, setExpanded] = useState(false);
|
||||||
|
const [showOriginal, setShowOriginal] = useState(false);
|
||||||
const isActive = call.status === "active";
|
const isActive = call.status === "active";
|
||||||
const hasDetails = call.transcript || (call.tags && call.tags.length > 0) || call.incident_id;
|
const hasDetails = call.transcript || call.transcript_corrected || (call.tags && call.tags.length > 0) || call.incident_id;
|
||||||
|
const displayTranscript = (!showOriginal && call.transcript_corrected) ? call.transcript_corrected : call.transcript;
|
||||||
|
const hasBoth = !!(call.transcript && call.transcript_corrected);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
@@ -101,10 +104,30 @@ export function CallRow({ call, systemName }: Props) {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Transcript */}
|
{/* Transcript */}
|
||||||
{call.transcript ? (
|
{displayTranscript ? (
|
||||||
<pre className="text-xs text-gray-300 bg-gray-800 rounded-lg px-4 py-3 whitespace-pre-wrap font-mono leading-relaxed max-h-40 overflow-y-auto">
|
<div className="space-y-1">
|
||||||
{call.transcript}
|
<div className="flex items-center gap-2">
|
||||||
</pre>
|
{hasBoth && (
|
||||||
|
<span className="text-xs text-gray-600 font-mono">
|
||||||
|
{showOriginal ? "original" : "corrected"}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{!hasBoth && call.transcript_corrected && (
|
||||||
|
<span className="text-xs text-gray-600 font-mono">corrected</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<pre className="text-xs text-gray-300 bg-gray-800 rounded-lg px-4 py-3 whitespace-pre-wrap font-mono leading-relaxed max-h-40 overflow-y-auto">
|
||||||
|
{displayTranscript}
|
||||||
|
</pre>
|
||||||
|
{hasBoth && (
|
||||||
|
<button
|
||||||
|
onClick={(e) => { e.stopPropagation(); setShowOriginal((v) => !v); }}
|
||||||
|
className="text-xs text-gray-600 hover:text-gray-400 font-mono transition-colors"
|
||||||
|
>
|
||||||
|
{showOriginal ? "show corrected ↑" : "show original ↓"}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<p className="text-xs text-gray-600 font-mono italic">No transcript available.</p>
|
<p className="text-xs text-gray-600 font-mono italic">No transcript available.</p>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ export interface CallRecord {
|
|||||||
ended_at: string | null;
|
ended_at: string | null;
|
||||||
audio_url: string | null;
|
audio_url: string | null;
|
||||||
transcript: string | null;
|
transcript: string | null;
|
||||||
|
transcript_corrected: string | null;
|
||||||
incident_id: string | null;
|
incident_id: string | null;
|
||||||
location: { lat: number; lng: number } | null;
|
location: { lat: number; lng: number } | null;
|
||||||
tags: string[];
|
tags: string[];
|
||||||
|
|||||||
Reference in New Issue
Block a user