8b660d8e10
Correlator
- Raise fast-path idle gate 30 → 90 min (tg_fast_path_idle_minutes)
- Fix disambiguate always-commits bug: run _call_fits_incident on winner
before committing; fall through to new-incident creation if it fails
- Add unit-continuity path (path 1.5): matches all_active by shared unit
IDs with a reassignment guard, bridges calls past the idle gate
- Add tag-based incident_type inference (_TAG_TYPE_HINTS) as GPT fallback,
rescuing tagged calls that would have been dropped (616 observed orphans)
- Add master/child incident model: _create_master_incident, _demote_to_child,
_add_child_to_master; new incidents stamped incident_type="master"
- Add cross-system parent detection (_find_cross_system_parent): two-signal
scoring (road overlap=0.4, embedding≥0.78=0.3, proximity=0.3, threshold=0.5)
wired into create-if-new path; creates master shell on first cross-system match
- Add maybe_resolve_parent: auto-resolves master when all children close;
called from upload pipeline (LLM closure) and summarizer stale sweep
- Add signal-based auto-resolve via units_active/units_cleared tracking:
GPT now extracts cleared_units per scene; _update_incident moves units
between active/cleared lists and resolves the incident when active empties;
stored on call doc for re-correlation sweep reuse
- Add _create_incident initialization of units_active/units_cleared fields
Re-correlation sweep
- Add corr_sweep_count + MAX_SWEEP_ATTEMPTS=3: orphans get 3 attempts
then are tombstoned as corr_path="unlinked", ending the re-sweep loop
(previously hammering each orphan 29-31 times per shift)
Intelligence extraction
- Add cleared_units to GPT prompt schema and rules
- Extract and propagate cleared_units per scene; merge across scenes;
store on call doc for re-correlation sweep
Token management
- Fix token release bug: remove release_token call on discord_connected=False
in MQTT checkin (transient Discord drops were orphaning bots mid-shift)
- Add PUT /tokens/{id}/prefer/{system_id} endpoint: lock a bot token to a
system; pass _none as system_id to clear; stored bidirectionally on both
token and system documents
- discord_join handler resolves preferred_token_id from system doc and passes
system_name in MQTT payload
267 lines
10 KiB
Python
267 lines
10 KiB
Python
from typing import Optional
|
|
from fastapi import APIRouter, BackgroundTasks, UploadFile, File, Form, HTTPException, Security
|
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
from app.internal.storage import upload_audio
|
|
from app.internal import firestore as fstore
|
|
from app.internal.logger import logger
|
|
|
|
router = APIRouter(tags=["upload"])
|
|
|
|
_bearer = HTTPBearer(auto_error=False)
|
|
|
|
|
|
@router.post("/upload")
|
|
async def upload_call_audio(
|
|
background_tasks: BackgroundTasks,
|
|
file: UploadFile = File(...),
|
|
call_id: str = Form(...),
|
|
node_id: str = Form(...),
|
|
talkgroup_id: Optional[int] = Form(None),
|
|
talkgroup_name: Optional[str] = Form(None),
|
|
system_id: Optional[str] = Form(None),
|
|
credentials: Optional[HTTPAuthorizationCredentials] = Security(_bearer),
|
|
):
|
|
"""
|
|
Receive an audio recording from an edge node.
|
|
Upload to GCS, update the call document in Firestore with the audio URL,
|
|
then kick off the intelligence pipeline as a background task.
|
|
"""
|
|
# Verify the per-node API key
|
|
if not credentials:
|
|
raise HTTPException(401, "Missing authorization")
|
|
key_doc = await fstore.doc_get("node_keys", node_id)
|
|
if not key_doc:
|
|
logger.warning(f"Upload 401: no key_doc in Firestore for node_id={node_id!r}")
|
|
raise HTTPException(401, "Invalid node API key")
|
|
if key_doc.get("api_key") != credentials.credentials:
|
|
logger.warning(
|
|
f"Upload 401: key mismatch for node_id={node_id!r} "
|
|
f"(received prefix: {credentials.credentials[:8]}...)"
|
|
)
|
|
raise HTTPException(401, "Invalid node API key")
|
|
|
|
data = await file.read()
|
|
if not data:
|
|
raise HTTPException(400, "Empty file.")
|
|
|
|
filename = file.filename
|
|
audio_url = await upload_audio(data, filename)
|
|
|
|
if audio_url:
|
|
try:
|
|
await fstore.doc_set("calls", call_id, {"audio_url": audio_url})
|
|
except Exception as e:
|
|
logger.warning(f"Could not update call {call_id} with audio_url: {e}")
|
|
|
|
# Convert public GCS URL to gs:// URI for Speech-to-Text
|
|
gcs_uri = _public_url_to_gcs_uri(audio_url)
|
|
|
|
background_tasks.add_task(
|
|
_run_intelligence_pipeline,
|
|
call_id=call_id,
|
|
node_id=node_id,
|
|
system_id=system_id,
|
|
talkgroup_id=talkgroup_id,
|
|
talkgroup_name=talkgroup_name,
|
|
gcs_uri=gcs_uri,
|
|
)
|
|
|
|
return {"url": audio_url}
|
|
|
|
|
|
def _public_url_to_gcs_uri(url: str) -> Optional[str]:
|
|
"""
|
|
Convert a public GCS URL (possibly signed) like
|
|
https://storage.googleapis.com/bucket/calls/file.mp3?Expires=...
|
|
to a gs:// URI usable by Speech-to-Text.
|
|
Returns None if the URL doesn't look like a GCS URL.
|
|
"""
|
|
prefix = "https://storage.googleapis.com/"
|
|
if url and url.startswith(prefix):
|
|
path = url[len(prefix):].split("?")[0] # strip signed-URL query params
|
|
return "gs://" + path
|
|
return None
|
|
|
|
|
|
async def _run_extraction_pipeline(
|
|
call_id: str,
|
|
node_id: str,
|
|
system_id: Optional[str],
|
|
talkgroup_id: Optional[int],
|
|
talkgroup_name: Optional[str],
|
|
transcript: str,
|
|
segments: Optional[list] = None,
|
|
preserve_transcript_correction: bool = False,
|
|
) -> None:
|
|
"""Run steps 2-4 of the intelligence pipeline using an existing transcript."""
|
|
from app.internal import intelligence, incident_correlator, alerter
|
|
|
|
# Step 2: Scene detection + intelligence extraction.
|
|
# Returns one scene per distinct incident detected in the recording.
|
|
scenes = await intelligence.extract_scenes(
|
|
call_id, transcript, talkgroup_name,
|
|
talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
|
|
node_id=node_id,
|
|
preserve_transcript_correction=preserve_transcript_correction,
|
|
)
|
|
|
|
# Step 3: Correlate each scene to an incident independently.
|
|
incident_ids: list[str] = []
|
|
all_tags: list[str] = []
|
|
for scene in scenes:
|
|
all_tags.extend(scene["tags"])
|
|
# When dispatch is pulling a unit to a NEW call (reassignment), suppress unit
|
|
# overlap so the new scene doesn't chain into the unit's previous incident.
|
|
corr_units = [] if scene.get("reassignment") else scene.get("units")
|
|
incident_id = await incident_correlator.correlate_call(
|
|
call_id=call_id,
|
|
node_id=node_id,
|
|
system_id=system_id,
|
|
talkgroup_id=talkgroup_id,
|
|
talkgroup_name=talkgroup_name,
|
|
tags=scene["tags"],
|
|
incident_type=scene["incident_type"],
|
|
location=scene["location"],
|
|
location_coords=scene["location_coords"],
|
|
units=corr_units,
|
|
vehicles=scene.get("vehicles"),
|
|
cleared_units=scene.get("cleared_units"),
|
|
)
|
|
if incident_id and incident_id not in incident_ids:
|
|
incident_ids.append(incident_id)
|
|
if scene["resolved"] and incident_id:
|
|
await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
|
|
await incident_correlator.maybe_resolve_parent(incident_id)
|
|
logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")
|
|
|
|
if incident_ids:
|
|
await fstore.doc_set("calls", call_id, {"incident_ids": incident_ids})
|
|
|
|
# Step 4: Alert dispatch — run once with merged tags from all scenes.
|
|
await alerter.check_and_dispatch(
|
|
call_id=call_id,
|
|
node_id=node_id,
|
|
talkgroup_id=talkgroup_id,
|
|
talkgroup_name=talkgroup_name,
|
|
tags=list(dict.fromkeys(all_tags)),
|
|
transcript=transcript,
|
|
)
|
|
|
|
|
|
async def _run_intelligence_pipeline(
|
|
call_id: str,
|
|
node_id: str,
|
|
system_id: Optional[str],
|
|
talkgroup_id: Optional[int],
|
|
talkgroup_name: Optional[str],
|
|
gcs_uri: Optional[str],
|
|
) -> None:
|
|
"""
|
|
Post-upload intelligence pipeline (runs as a background task):
|
|
1. Transcribe audio via Google STT
|
|
2. Detect scenes + extract intelligence (one result per incident in recording)
|
|
3. Correlate each scene with existing incidents (or create new ones)
|
|
4. Check alert rules and dispatch notifications
|
|
"""
|
|
from app.internal import transcription, intelligence, incident_correlator, alerter
|
|
from app.internal.feature_flags import get_flags
|
|
|
|
flags = await get_flags()
|
|
|
|
# Resolve per-system overrides: system flag=False beats global flag=True,
|
|
# but global flag=False beats everything (master switch).
|
|
system_ai_flags: dict = {}
|
|
if system_id:
|
|
sys_doc = await fstore.doc_get_cached("systems", system_id)
|
|
system_ai_flags = (sys_doc or {}).get("ai_flags") or {}
|
|
|
|
def _flag(name: str) -> bool:
|
|
if not flags[name]: # global master off
|
|
return False
|
|
return system_ai_flags.get(name, True) # system override, default inherit
|
|
|
|
transcript: Optional[str] = None
|
|
segments: list[dict] = []
|
|
|
|
# Step 1: Transcription
|
|
if gcs_uri:
|
|
if _flag("stt_enabled"):
|
|
transcript, segments = await transcription.transcribe_call(
|
|
call_id, gcs_uri, talkgroup_name, system_id=system_id
|
|
)
|
|
else:
|
|
scope = "globally" if not flags["stt_enabled"] else f"system {system_id}"
|
|
logger.info(f"STT disabled ({scope}) — skipping transcription for call {call_id}")
|
|
|
|
# Step 2: Scene detection + intelligence extraction
|
|
scenes: list[dict] = []
|
|
if _flag("correlation_enabled"):
|
|
if transcript:
|
|
scenes = await intelligence.extract_scenes(
|
|
call_id, transcript, talkgroup_name,
|
|
talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
|
|
node_id=node_id,
|
|
)
|
|
else:
|
|
scope = "globally" if not flags["correlation_enabled"] else f"system {system_id}"
|
|
logger.info(f"Correlation disabled ({scope}) — skipping scene extraction and correlation for call {call_id}")
|
|
|
|
# Step 3: Correlate each scene independently.
|
|
# A single recording can produce multiple incidents on a busy channel.
|
|
incident_ids: list[str] = []
|
|
all_tags: list[str] = []
|
|
if flags["correlation_enabled"]:
|
|
for scene in scenes:
|
|
all_tags.extend(scene["tags"])
|
|
corr_units = [] if scene.get("reassignment") else scene.get("units")
|
|
incident_id = await incident_correlator.correlate_call(
|
|
call_id=call_id,
|
|
node_id=node_id,
|
|
system_id=system_id,
|
|
talkgroup_id=talkgroup_id,
|
|
talkgroup_name=talkgroup_name,
|
|
tags=scene["tags"],
|
|
incident_type=scene["incident_type"],
|
|
location=scene["location"],
|
|
location_coords=scene["location_coords"],
|
|
units=corr_units,
|
|
vehicles=scene.get("vehicles"),
|
|
cleared_units=scene.get("cleared_units"),
|
|
)
|
|
if incident_id and incident_id not in incident_ids:
|
|
incident_ids.append(incident_id)
|
|
if scene["resolved"] and incident_id:
|
|
await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
|
|
await incident_correlator.maybe_resolve_parent(incident_id)
|
|
logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")
|
|
|
|
# Correlator also runs for calls with no scenes (unclassified) to attempt
|
|
# talkgroup-based linking even when no transcript could be produced.
|
|
if not scenes:
|
|
incident_id = await incident_correlator.correlate_call(
|
|
call_id=call_id,
|
|
node_id=node_id,
|
|
system_id=system_id,
|
|
talkgroup_id=talkgroup_id,
|
|
talkgroup_name=talkgroup_name,
|
|
tags=[],
|
|
incident_type=None,
|
|
location=None,
|
|
location_coords=None,
|
|
)
|
|
if incident_id:
|
|
incident_ids.append(incident_id)
|
|
|
|
if incident_ids:
|
|
await fstore.doc_set("calls", call_id, {"incident_ids": incident_ids})
|
|
|
|
# Step 4: Alert dispatch (always runs — talkgroup ID rules don't need a transcript)
|
|
await alerter.check_and_dispatch(
|
|
call_id=call_id,
|
|
node_id=node_id,
|
|
talkgroup_id=talkgroup_id,
|
|
talkgroup_name=talkgroup_name,
|
|
tags=list(dict.fromkeys(all_tags)),
|
|
transcript=transcript,
|
|
)
|