Updates to intel and correlation
This commit is contained in:
@@ -24,7 +24,8 @@ class Settings(BaseSettings):
|
||||
gemini_api_key: Optional[str] = None
|
||||
summary_interval_minutes: int = 2 # how often the summary loop runs
|
||||
correlation_window_hours: int = 2 # slow/location path: max hours since last call
|
||||
embedding_similarity_threshold: float = 0.93 # slow-path cosine threshold (tiebreaker only)
|
||||
embedding_similarity_threshold: float = 0.93 # slow-path: requires location corroboration
|
||||
embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar)
|
||||
location_proximity_km: float = 0.5 # radius for location-proximity matching
|
||||
incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls
|
||||
recorrelation_scan_minutes: int = 15 # re-examine orphaned calls ended within this window
|
||||
|
||||
@@ -79,7 +79,25 @@ async def correlate_call(
|
||||
|
||||
matched_incident: Optional[dict] = None
|
||||
|
||||
# A "thin" call carries no scene-identifying information — it is a pure
|
||||
# status transmission (10-4, en route, acknowledgement). Detected by the
|
||||
# absence of extracted units, vehicles, AND geocoded coordinates. Thin
|
||||
# calls should link to wherever the last active conversation on this TGID
|
||||
# was happening rather than running the full scene-verification logic.
|
||||
is_thin_call = not call_units and not call_vehicles and not coords
|
||||
|
||||
# ── 1. Fast path: talkgroup match (any type, no time limit) ──────────────
|
||||
#
|
||||
# Two distinct behaviours depending on call substance:
|
||||
#
|
||||
# • Thin call → link to the most-recently-updated active incident on this
|
||||
# TGID (i.e. the last conversation in progress).
|
||||
#
|
||||
# • Substantive call (has location / units / vehicles) → verify the call
|
||||
# actually belongs to the matched incident before linking. When a busy
|
||||
# dispatch channel runs multiple concurrent scenes (different addresses,
|
||||
# different units) we split them into separate incidents rather than
|
||||
# merging everything because they share a talkgroup.
|
||||
if talkgroup_id is not None and system_id:
|
||||
tg_str = str(talkgroup_id)
|
||||
tg_matches = [
|
||||
@@ -87,11 +105,30 @@ async def correlate_call(
|
||||
if system_id in (inc.get("system_ids") or [])
|
||||
and tg_str in (inc.get("talkgroup_ids") or [])
|
||||
]
|
||||
if len(tg_matches) == 1:
|
||||
matched_incident = tg_matches[0]
|
||||
|
||||
if tg_matches and is_thin_call:
|
||||
# Status/ack call — no scene data to reason about.
|
||||
# Attach to whichever incident was most recently active on this TGID.
|
||||
matched_incident = max(tg_matches, key=lambda inc: inc.get("updated_at", ""))
|
||||
logger.info(
|
||||
f"Correlator fast-path: call {call_id} → {tg_matches[0]['incident_id']}"
|
||||
f"Correlator fast-path (thin→last TGID incident): "
|
||||
f"call {call_id} → {matched_incident['incident_id']}"
|
||||
)
|
||||
elif len(tg_matches) == 1:
|
||||
candidate = tg_matches[0]
|
||||
if _call_fits_incident(
|
||||
candidate, call_units, call_vehicles, coords, settings.location_proximity_km
|
||||
):
|
||||
matched_incident = candidate
|
||||
logger.info(
|
||||
f"Correlator fast-path: call {call_id} → {candidate['incident_id']}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Correlator fast-path skipped: call {call_id} — different scene "
|
||||
f"from {candidate['incident_id']} (no unit overlap + distant location); "
|
||||
f"will attempt new incident"
|
||||
)
|
||||
elif len(tg_matches) > 1:
|
||||
matched_incident = _disambiguate(
|
||||
tg_matches, call_units, call_vehicles, coords, call_embedding
|
||||
@@ -119,7 +156,13 @@ async def correlate_call(
|
||||
)
|
||||
break
|
||||
|
||||
# ── 3. Slow path: embedding + location corroboration (time-limited, same type) ──
|
||||
# ── 3. Slow path: embedding similarity (time-limited, same type) ──────────
|
||||
#
|
||||
# Two tiers:
|
||||
# ① embedding_similarity_threshold + location corroboration (standard)
|
||||
# ② embedding_no_location_threshold alone — when geocoding failed on
|
||||
# either side but the transcript content is semantically very close.
|
||||
# A strong embedding match beats a missing geocode.
|
||||
if not matched_incident and call_embedding and incident_type:
|
||||
best_score = 0.0
|
||||
best_inc: Optional[dict] = None
|
||||
@@ -147,7 +190,14 @@ async def correlate_call(
|
||||
f"Correlator slow-path: call {call_id} → {best_inc['incident_id']} "
|
||||
f"(sim={best_score:.3f}, dist={dist_km:.2f}km)"
|
||||
)
|
||||
# No coords available → slow path alone is not enough; skip
|
||||
elif best_score >= settings.embedding_no_location_threshold:
|
||||
# High-confidence semantic match; geocode unavailable on one or
|
||||
# both sides — content similarity alone is sufficient evidence.
|
||||
matched_incident = best_inc
|
||||
logger.info(
|
||||
f"Correlator slow-path (high-confidence, no location): "
|
||||
f"call {call_id} → {best_inc['incident_id']} (sim={best_score:.3f})"
|
||||
)
|
||||
|
||||
# ── Update existing or create new ────────────────────────────────────────
|
||||
if matched_incident:
|
||||
@@ -167,7 +217,6 @@ async def correlate_call(
|
||||
# No match and either no type or creation suppressed — nothing to do
|
||||
return None
|
||||
|
||||
await fstore.doc_set("calls", call_id, {"incident_id": incident_id})
|
||||
return incident_id
|
||||
|
||||
|
||||
@@ -254,6 +303,62 @@ def _disambiguate(
|
||||
return best
|
||||
|
||||
|
||||
def _call_fits_incident(
|
||||
inc: dict,
|
||||
call_units: list[str],
|
||||
call_vehicles: list[str],
|
||||
call_coords: Optional[dict],
|
||||
proximity_km: float,
|
||||
) -> bool:
|
||||
"""
|
||||
Return True if this call plausibly belongs to the given incident.
|
||||
|
||||
This guards the single-talkgroup-match fast path on busy dispatch channels
|
||||
where multiple concurrent scenes share one talkgroup. We only return False
|
||||
(→ create a new incident) when there is *positive evidence* of a different
|
||||
scene: a geocoded location that is too far away AND no unit or vehicle
|
||||
overlap. In all ambiguous cases we default to True (link) to avoid
|
||||
fragmenting short status calls that carry no location or unit information.
|
||||
|
||||
Examples that correctly split:
|
||||
- Police dispatch sends units to two separate MVAs miles apart
|
||||
- EMS handles overlapping aided cases at different addresses
|
||||
|
||||
Examples that correctly stay together (domestic with split parties):
|
||||
- Units at 10 Main St and 12 Main St — within proximity radius → True
|
||||
- Same unit mentioned in both the call and the incident → True
|
||||
"""
|
||||
# Unit overlap is the strongest positive signal: same officers = same call.
|
||||
inc_units = set(inc.get("units") or [])
|
||||
if inc_units and call_units and any(u in inc_units for u in call_units):
|
||||
return True
|
||||
|
||||
# Vehicle overlap: same vehicle description across calls → same scene.
|
||||
inc_vehicles = set(inc.get("vehicles") or [])
|
||||
if inc_vehicles and call_vehicles and any(v in inc_vehicles for v in call_vehicles):
|
||||
return True
|
||||
|
||||
# When both sides have geocoded coordinates, distance is the tiebreaker.
|
||||
inc_coords = inc.get("location_coords")
|
||||
if call_coords and inc_coords:
|
||||
dist_km = _haversine_km(
|
||||
call_coords["lat"], call_coords["lng"],
|
||||
inc_coords["lat"], inc_coords["lng"],
|
||||
)
|
||||
# Within proximity radius → same scene (handles domestics with nearby split parties).
|
||||
if dist_km <= proximity_km:
|
||||
return True
|
||||
# Different location AND no unit/vehicle overlap → different incident.
|
||||
return False
|
||||
|
||||
# No geocoded location on one or both sides but the call IS substantive
|
||||
# (has units or vehicles — otherwise is_thin_call would have caught it).
|
||||
# Unit overlap already returned True above if present. If we reach here
|
||||
# there is no overlap and no coords to compare — we cannot prove it is a
|
||||
# different scene, so default to linking rather than fragmenting.
|
||||
return True
|
||||
|
||||
|
||||
async def _update_incident(
|
||||
inc: dict,
|
||||
call_id: str,
|
||||
@@ -313,6 +418,11 @@ async def _update_incident(
|
||||
if best_coords:
|
||||
updates["location_coords"] = best_coords
|
||||
|
||||
# Update incident type when a re-classified call provides a concrete type.
|
||||
# This handles the case where admin correction changes fire→police, etc.
|
||||
if incident_type and incident_type != inc.get("type"):
|
||||
updates["type"] = incident_type
|
||||
|
||||
# Re-evaluate title when a substantive call (classified incident_type) brings new tags.
|
||||
# Routine status calls (type=None) do not clobber the title.
|
||||
if incident_type:
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
"""
|
||||
GPT-4o-mini intelligence extraction from call transcripts.
|
||||
|
||||
Sends the transcript to GPT-4o mini with a tight JSON schema prompt.
|
||||
Returns structured data: incident type, tags, location, vehicles, units, severity.
|
||||
Sends the transcript to GPT-4o-mini with a structured prompt that detects
|
||||
whether the recording contains one or multiple distinct scenes (back-to-back
|
||||
dispatch conversations on a busy channel). Returns a list of scene dicts —
|
||||
one per detected incident. Most calls produce a single scene.
|
||||
|
||||
Falls back gracefully if the API is unavailable or returns malformed output.
|
||||
"""
|
||||
@@ -13,30 +15,37 @@ from typing import Optional
|
||||
from app.internal.logger import logger
|
||||
from app.internal import firestore as fstore
|
||||
|
||||
_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio recording. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Each numbered transmission is a separate PTT press from a different radio. Extract structured information and respond ONLY with a single valid JSON object — no markdown, no explanation.
|
||||
_PROMPT_TEMPLATE = """You are analyzing a P25 public safety radio recording. The audio was transcribed by Whisper through a digital radio vocoder, which introduces errors. Each numbered transmission is a separate PTT press from a different radio.
|
||||
|
||||
Schema:
|
||||
{{
|
||||
"incident_type": one of "fire" | "ems" | "police" | "accident" | "other" | "unknown",
|
||||
"tags": [list of specific descriptive tags, max 6, e.g. "two-car mva", "property-damage-only", "working fire", "shots-fired"],
|
||||
"location": "most specific location string found, or empty string",
|
||||
"vehicles": [vehicle descriptions mentioned, e.g. "Hyundai Tucson", "black sedan"],
|
||||
"units": [unit IDs or officer numbers mentioned, e.g. "Unit 511", "Car 4"],
|
||||
"severity": one of "minor" | "moderate" | "major" | "unknown",
|
||||
"resolved": true if this call explicitly signals the incident is over ("Code 4", "in custody", "all clear", "fire out", "patient transported", "GOA", "scene clear", "10-42", "negative contact", "clear the scene"), false otherwise,
|
||||
"transcript_corrected": "corrected full transcript string, or null if no corrections needed"
|
||||
}}
|
||||
SCENE DETECTION:
|
||||
A busy dispatch channel sometimes captures back-to-back conversations about multiple concurrent incidents in a single recording. Detect whether this recording contains ONE scene (all transmissions relate to a single event) or MULTIPLE scenes (clearly distinct dispatch conversations with different units being assigned, different locations, different event types). Assign short status transmissions (10-4, en route, acknowledgements) with no clear scene context to the most recent scene before them in the list.
|
||||
|
||||
Always respond with the scenes array, even for a single scene.
|
||||
|
||||
Response format — a JSON object with a "scenes" array. Each scene:
|
||||
segment_indices: list of 0-based indices into the numbered transmissions (or null if no segments)
|
||||
incident_type: one of "fire" | "ems" | "police" | "accident" | "other" | "unknown"
|
||||
tags: list of specific descriptive tags, max 6, e.g. "two-car mva", "working fire", "shots-fired"
|
||||
location: most specific location string found, or empty string
|
||||
vehicles: list of vehicle descriptions mentioned
|
||||
units: list of unit IDs or officer numbers explicitly mentioned
|
||||
severity: one of "minor" | "moderate" | "major" | "unknown"
|
||||
resolved: true if this scene explicitly signals incident closure, false otherwise
|
||||
transcript_corrected: corrected text for this scene's transmissions only, or null
|
||||
|
||||
Rules:
|
||||
- location: prefer intersections > addresses > mile markers > route+town > route alone > town alone. Empty string if none.
|
||||
- tags: be specific and lowercase, hyphenated. Do not repeat incident_type as a tag.
|
||||
- tags: specific, lowercase, hyphenated. Do not repeat incident_type as a tag.
|
||||
- units: only identifiers explicitly mentioned, not inferred.
|
||||
- Do not invent details not present in the transcript.
|
||||
- transcript_corrected: fix only clear STT errors caused by vocoder distortion (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Use the back-and-forth context between transmissions to resolve ambiguities. Keep all radio language as-is — do NOT decode codes into plain English. Return null if the transcript looks accurate.
|
||||
- incident_type: let the talkgroup channel be your primary signal. Use "fire" ONLY if the talkgroup is clearly a fire/rescue channel OR the transcript explicitly describes active fire, smoke, flames, or structure fire activation. Police or EMS referencing a fire scene → use "police" or "ems". When uncertain, prefer "other" over "fire".
|
||||
- ten_codes: interpret radio codes using the department reference provided below. Do not guess codes not listed.
|
||||
- resolved: true only when the scene explicitly signals "Code 4", "all clear", "10-42", "in custody", "patient transported", "fire out", "GOA", "negative contact", "scene clear".
|
||||
- transcript_corrected: fix only clear STT/vocoder errors (e.g. "Several" → "10-4", misheard street names, garbled unit IDs). Keep all radio language as-is — do NOT decode codes into plain English. Return null if accurate.
|
||||
|
||||
System: {system_id}
|
||||
Talkgroup: {talkgroup_name}
|
||||
{vocabulary_block}{transcript_block}"""
|
||||
{ten_codes_block}{vocabulary_block}{transcript_block}"""
|
||||
|
||||
# Nominatim viewbox half-width in degrees (~11 km at mid-latitudes)
|
||||
_GEO_DELTA = 0.1
|
||||
@@ -54,7 +63,14 @@ _TG_SUFFIX_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
async def extract_tags(
|
||||
def _build_ten_codes_block(ten_codes: dict[str, str]) -> str:
|
||||
if not ten_codes:
|
||||
return ""
|
||||
lines = "\n".join(f" {code}: {meaning}" for code, meaning in sorted(ten_codes.items()))
|
||||
return f"Department ten-codes:\n{lines}\n\n"
|
||||
|
||||
|
||||
async def extract_scenes(
|
||||
call_id: str,
|
||||
transcript: str,
|
||||
talkgroup_name: Optional[str] = None,
|
||||
@@ -63,84 +79,128 @@ async def extract_tags(
|
||||
segments: Optional[list[dict]] = None,
|
||||
node_id: Optional[str] = None,
|
||||
preserve_transcript_correction: bool = False,
|
||||
) -> tuple[list[str], Optional[str], Optional[str], Optional[dict], bool]:
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Extract incident tags, type, location, corrected transcript, and closure signal via GPT-4o mini.
|
||||
Geocodes the extracted location string via Nominatim using the node's position as bias.
|
||||
Split the transcript into one or more scenes and extract structured
|
||||
intelligence for each. Most calls return a single scene; a busy dispatch
|
||||
channel capturing back-to-back conversations returns multiple.
|
||||
|
||||
Returns:
|
||||
(tags, primary_type, location_str, location_coords, resolved)
|
||||
where location_coords is {"lat": float, "lng": float} or None,
|
||||
and resolved is True when the transcript signals incident closure.
|
||||
Each scene dict contains:
|
||||
tags, incident_type, location, location_coords, resolved,
|
||||
severity, vehicles, units, transcript_corrected,
|
||||
segment_indices, embedding
|
||||
|
||||
Side-effect: updates calls/{call_id} in Firestore with tags, location,
|
||||
location_coords, vehicles, units, severity, transcript_corrected; also stores embedding.
|
||||
Side-effect: updates calls/{call_id} in Firestore with merged tags,
|
||||
location (primary scene), units/vehicles, severity, embedding, and
|
||||
optionally transcript_corrected.
|
||||
"""
|
||||
# Load per-system vocabulary for prompt injection
|
||||
vocabulary: list[str] = []
|
||||
ten_codes: dict[str, str] = {}
|
||||
if system_id:
|
||||
from app.internal.vocabulary_learner import get_vocabulary
|
||||
vocab_data = await get_vocabulary(system_id)
|
||||
vocabulary = vocab_data.get("vocabulary") or []
|
||||
system_doc = await fstore.doc_get("systems", system_id)
|
||||
if system_doc:
|
||||
ten_codes = system_doc.get("ten_codes") or {}
|
||||
|
||||
result = await asyncio.to_thread(
|
||||
_sync_extract, transcript, talkgroup_name, talkgroup_id, system_id, segments, vocabulary
|
||||
raw_scenes: list[dict] = await asyncio.to_thread(
|
||||
_sync_extract,
|
||||
transcript, talkgroup_name, talkgroup_id, system_id, segments, vocabulary, ten_codes,
|
||||
)
|
||||
|
||||
tags: list[str] = result.get("tags") or []
|
||||
incident_type: Optional[str] = result.get("incident_type") or None
|
||||
location: Optional[str] = result.get("location") or None
|
||||
vehicles: list[str] = result.get("vehicles") or []
|
||||
units: list[str] = result.get("units") or []
|
||||
severity: str = result.get("severity") or "unknown"
|
||||
resolved: bool = bool(result.get("resolved", False))
|
||||
transcript_corrected: Optional[str] = result.get("transcript_corrected") or None
|
||||
if not raw_scenes:
|
||||
return []
|
||||
|
||||
if incident_type in ("unknown", "other", ""):
|
||||
incident_type = None
|
||||
|
||||
# Geocode the location string if we have one and a node to bias toward
|
||||
location_coords: Optional[dict] = None
|
||||
if location and node_id:
|
||||
# Resolve node position once for geocoding all scenes
|
||||
node_lat: Optional[float] = None
|
||||
node_lon: Optional[float] = None
|
||||
if node_id:
|
||||
node_doc = await fstore.doc_get("nodes", node_id)
|
||||
if node_doc:
|
||||
node_lat = node_doc.get("lat")
|
||||
node_lon = node_doc.get("lon")
|
||||
if node_lat is not None and node_lon is not None:
|
||||
state = await _get_node_state(node_id, node_lat, node_lon)
|
||||
muni = _municipality_from_tg(talkgroup_name)
|
||||
hint_parts = [p for p in [muni, state] if p]
|
||||
query = f"{location}, {', '.join(hint_parts)}" if hint_parts else location
|
||||
location_coords = await _geocode_location(query, node_lat, node_lon)
|
||||
|
||||
# Store embedding alongside structured data
|
||||
embedding = await asyncio.to_thread(_sync_embed, _embed_text(transcript, incident_type))
|
||||
processed: list[dict] = []
|
||||
for scene in raw_scenes:
|
||||
tags: list[str] = scene.get("tags") or []
|
||||
incident_type: Optional[str] = scene.get("incident_type") or None
|
||||
location: Optional[str] = scene.get("location") or None
|
||||
vehicles: list[str] = scene.get("vehicles") or []
|
||||
units: list[str] = scene.get("units") or []
|
||||
severity: str = scene.get("severity") or "unknown"
|
||||
resolved: bool = bool(scene.get("resolved", False))
|
||||
transcript_corrected: Optional[str]= scene.get("transcript_corrected") or None
|
||||
segment_indices: Optional[list] = scene.get("segment_indices")
|
||||
|
||||
updates: dict = {"tags": tags, "severity": severity}
|
||||
if location:
|
||||
updates["location"] = location
|
||||
if location_coords:
|
||||
updates["location_coords"] = location_coords
|
||||
if vehicles:
|
||||
updates["vehicles"] = vehicles
|
||||
if units:
|
||||
updates["units"] = units
|
||||
if embedding:
|
||||
updates["embedding"] = embedding
|
||||
if transcript_corrected and not preserve_transcript_correction:
|
||||
updates["transcript_corrected"] = transcript_corrected
|
||||
if incident_type in ("unknown", "other", ""):
|
||||
incident_type = None
|
||||
|
||||
# Geocode this scene's location
|
||||
location_coords: Optional[dict] = None
|
||||
if location and node_lat is not None and node_lon is not None:
|
||||
state = await _get_node_state(node_id, node_lat, node_lon)
|
||||
muni = _municipality_from_tg(talkgroup_name)
|
||||
hint_parts = [p for p in [muni, state] if p]
|
||||
query = f"{location}, {', '.join(hint_parts)}" if hint_parts else location
|
||||
location_coords = await _geocode_location(query, node_lat, node_lon)
|
||||
|
||||
# Embed this scene's content
|
||||
scene_text = _build_scene_embed_text(
|
||||
transcript, segments, segment_indices, incident_type, transcript_corrected
|
||||
)
|
||||
embedding = await asyncio.to_thread(_sync_embed, scene_text)
|
||||
|
||||
processed.append({
|
||||
"tags": tags,
|
||||
"incident_type": incident_type,
|
||||
"location": location,
|
||||
"location_coords": location_coords,
|
||||
"vehicles": vehicles,
|
||||
"units": units,
|
||||
"severity": severity,
|
||||
"resolved": resolved,
|
||||
"transcript_corrected": transcript_corrected,
|
||||
"segment_indices": segment_indices,
|
||||
"embedding": embedding,
|
||||
})
|
||||
|
||||
# Merge across scenes for the call-level Firestore document.
|
||||
# Primary scene (first) owns location, severity, transcript_corrected.
|
||||
# Tags/units/vehicles are union-merged from all scenes.
|
||||
primary = processed[0]
|
||||
all_tags = list(dict.fromkeys(t for s in processed for t in s["tags"]))
|
||||
all_units = list(dict.fromkeys(u for s in processed for u in s["units"]))
|
||||
all_vehicles = list(dict.fromkeys(v for s in processed for v in s["vehicles"]))
|
||||
|
||||
updates: dict = {"tags": all_tags, "severity": primary["severity"]}
|
||||
if primary["location"]:
|
||||
updates["location"] = primary["location"]
|
||||
if primary["location_coords"]:
|
||||
updates["location_coords"] = primary["location_coords"]
|
||||
if all_units:
|
||||
updates["units"] = all_units
|
||||
if all_vehicles:
|
||||
updates["vehicles"] = all_vehicles
|
||||
if primary["embedding"]:
|
||||
updates["embedding"] = primary["embedding"]
|
||||
if primary["transcript_corrected"] and not preserve_transcript_correction:
|
||||
updates["transcript_corrected"] = primary["transcript_corrected"]
|
||||
|
||||
try:
|
||||
await fstore.doc_set("calls", call_id, updates)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not save intelligence for call {call_id}: {e}")
|
||||
|
||||
logger.info(
|
||||
f"Intelligence: call {call_id} → type={incident_type}, "
|
||||
f"tags={tags}, location={location!r}, coords={location_coords}, severity={severity}, "
|
||||
f"corrected={transcript_corrected is not None}"
|
||||
scene_summary = (
|
||||
f"{len(processed)} scene(s): "
|
||||
+ ", ".join(
|
||||
f"[{s['incident_type'] or 'unclassified'} tags={s['tags'][:2]}]"
|
||||
for s in processed
|
||||
)
|
||||
)
|
||||
return tags, incident_type, location, location_coords, resolved
|
||||
logger.info(f"Intelligence: call {call_id} → {scene_summary}")
|
||||
return processed
|
||||
|
||||
|
||||
async def _geocode_location(
|
||||
@@ -220,7 +280,6 @@ def _municipality_from_tg(tg_name: Optional[str]) -> Optional[str]:
|
||||
if not tg_name:
|
||||
return None
|
||||
cleaned = _TG_SUFFIX_RE.sub("", tg_name).strip()
|
||||
# Discard if nothing left, purely numeric, or a short all-caps abbreviation (e.g. "WC", "TAC")
|
||||
if not cleaned or cleaned.isdigit() or (len(cleaned) <= 3 and cleaned.isupper()):
|
||||
return None
|
||||
return cleaned
|
||||
@@ -234,6 +293,23 @@ def _build_transcript_block(transcript: str, segments: Optional[list[dict]]) ->
|
||||
return f"Transcript:\n{transcript}"
|
||||
|
||||
|
||||
def _build_scene_embed_text(
|
||||
transcript: str,
|
||||
segments: Optional[list[dict]],
|
||||
segment_indices: Optional[list[int]],
|
||||
incident_type: Optional[str],
|
||||
transcript_corrected: Optional[str],
|
||||
) -> str:
|
||||
"""Build the text string to embed for a specific scene."""
|
||||
prefix = f"[{incident_type}] " if incident_type else ""
|
||||
if transcript_corrected:
|
||||
return f"{prefix}{transcript_corrected}"
|
||||
if segments and segment_indices:
|
||||
texts = [segments[i]["text"] for i in segment_indices if i < len(segments)]
|
||||
return f"{prefix}{' '.join(texts)}"
|
||||
return f"{prefix}{transcript}"
|
||||
|
||||
|
||||
def _sync_extract(
|
||||
transcript: str,
|
||||
talkgroup_name: Optional[str],
|
||||
@@ -241,14 +317,15 @@ def _sync_extract(
|
||||
system_id: Optional[str],
|
||||
segments: Optional[list[dict]],
|
||||
vocabulary: Optional[list[str]] = None,
|
||||
) -> dict:
|
||||
"""Call GPT-4o mini and parse the JSON response."""
|
||||
ten_codes: Optional[dict[str, str]] = None,
|
||||
) -> list[dict]:
|
||||
"""Call GPT-4o-mini and return a list of scene dicts."""
|
||||
from app.config import settings
|
||||
from openai import OpenAI
|
||||
|
||||
if not settings.openai_api_key:
|
||||
logger.warning("OPENAI_API_KEY not set — intelligence extraction disabled.")
|
||||
return {}
|
||||
return []
|
||||
|
||||
from app.internal.vocabulary_learner import build_gpt_vocab_block
|
||||
tg = f"{talkgroup_name} (TGID {talkgroup_id})" if talkgroup_id else (talkgroup_name or "unknown")
|
||||
@@ -256,6 +333,7 @@ def _sync_extract(
|
||||
transcript_block=_build_transcript_block(transcript, segments),
|
||||
talkgroup_name=tg,
|
||||
system_id=system_id or "unknown",
|
||||
ten_codes_block=_build_ten_codes_block(ten_codes or {}),
|
||||
vocabulary_block=build_gpt_vocab_block(vocabulary or []),
|
||||
)
|
||||
|
||||
@@ -266,13 +344,22 @@ def _sync_extract(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
response_format={"type": "json_object"},
|
||||
)
|
||||
return json.loads(response.choices[0].message.content)
|
||||
raw = json.loads(response.choices[0].message.content)
|
||||
|
||||
# New format: {"scenes": [...]}
|
||||
if "scenes" in raw and isinstance(raw["scenes"], list):
|
||||
return raw["scenes"]
|
||||
|
||||
# Fallback: GPT returned the old flat single-scene format
|
||||
logger.warning("GPT returned flat format instead of scenes array — wrapping")
|
||||
return [raw]
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"GPT-4o mini returned non-JSON: {e}")
|
||||
return {}
|
||||
logger.warning(f"GPT-4o-mini returned non-JSON: {e}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning(f"GPT-4o mini extraction failed: {e}")
|
||||
return {}
|
||||
logger.warning(f"GPT-4o-mini extraction failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def _sync_embed(text: str) -> Optional[list[float]]:
|
||||
@@ -290,8 +377,3 @@ def _sync_embed(text: str) -> Optional[list[float]]:
|
||||
except Exception as e:
|
||||
logger.warning(f"Embedding generation failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _embed_text(transcript: str, incident_type: Optional[str]) -> str:
|
||||
prefix = f"[{incident_type}] " if incident_type else ""
|
||||
return f"{prefix}{transcript}"
|
||||
|
||||
@@ -46,7 +46,10 @@ async def _run_sweep_pass() -> None:
|
||||
("status", "==", "ended"),
|
||||
("ended_at", ">=", cutoff),
|
||||
])
|
||||
orphans = [c for c in recent_ended if not c.get("incident_id")]
|
||||
orphans = [
|
||||
c for c in recent_ended
|
||||
if not c.get("incident_ids") and not c.get("incident_id")
|
||||
]
|
||||
|
||||
if not orphans:
|
||||
return
|
||||
@@ -89,6 +92,7 @@ async def _recorrelate_orphan(call: dict) -> bool:
|
||||
)
|
||||
|
||||
if incident_id:
|
||||
await fstore.doc_set("calls", call_id, {"incident_ids": [incident_id]})
|
||||
logger.info(
|
||||
f"Re-correlation: linked orphaned call {call_id} → incident {incident_id}"
|
||||
)
|
||||
|
||||
@@ -33,12 +33,14 @@ class SystemRecord(BaseModel):
|
||||
name: str
|
||||
type: str # P25 / DMR / NBFM
|
||||
config: Dict[str, Any] = {} # OP25-compatible config blob
|
||||
ten_codes: Dict[str, str] = {} # {"10-10": "Commercial Alarm", ...}
|
||||
|
||||
|
||||
class SystemCreate(BaseModel):
|
||||
name: str
|
||||
type: str
|
||||
config: Dict[str, Any] = {}
|
||||
ten_codes: Dict[str, str] = {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -56,11 +58,11 @@ class CallRecord(BaseModel):
|
||||
started_at: datetime
|
||||
ended_at: Optional[datetime] = None
|
||||
audio_url: Optional[str] = None
|
||||
transcript: Optional[str] = None # populated later by STT
|
||||
incident_id: Optional[str] = None # populated later by intelligence layer
|
||||
transcript: Optional[str] = None # populated later by STT
|
||||
incident_ids: List[str] = [] # one per scene detected in the recording
|
||||
location: Optional[Dict[str, float]] = None # {lat, lng}
|
||||
tags: List[str] = []
|
||||
status: str = "active" # active / ended
|
||||
status: str = "active" # active / ended
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -83,6 +83,28 @@ async def patch_transcript(
|
||||
"embedding": None,
|
||||
})
|
||||
|
||||
# Unlink from ALL current incidents so re-correlation starts clean.
|
||||
# Handles both old single incident_id and new incident_ids list.
|
||||
old_ids: list[str] = call.get("incident_ids") or (
|
||||
[call["incident_id"]] if call.get("incident_id") else []
|
||||
)
|
||||
for old_incident_id in old_ids:
|
||||
old_incident = await fstore.doc_get("incidents", old_incident_id)
|
||||
if old_incident:
|
||||
remaining = [c for c in (old_incident.get("call_ids") or []) if c != call_id]
|
||||
if remaining:
|
||||
await fstore.doc_set("incidents", old_incident_id, {
|
||||
"call_ids": remaining,
|
||||
"summary_stale": True,
|
||||
})
|
||||
else:
|
||||
await fstore.doc_set("incidents", old_incident_id, {
|
||||
"call_ids": [],
|
||||
"status": "resolved",
|
||||
"summary_stale": True,
|
||||
})
|
||||
await fstore.doc_set("calls", call_id, {"incident_ids": [], "incident_id": None})
|
||||
|
||||
# Learn from the correction: diff original → corrected and add new tokens to vocabulary
|
||||
system_id = call.get("system_id")
|
||||
original_text = call.get("transcript_corrected") or call.get("transcript") or ""
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import uuid
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
from typing import Dict, Optional
|
||||
from app.models import SystemCreate, SystemRecord
|
||||
from app.internal import firestore as fstore
|
||||
|
||||
@@ -12,6 +12,10 @@ class VocabularyTermBody(BaseModel):
|
||||
term: str
|
||||
|
||||
|
||||
class TenCodesBody(BaseModel):
|
||||
ten_codes: Dict[str, str]
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_systems():
|
||||
return await fstore.collection_list("systems")
|
||||
@@ -50,6 +54,27 @@ async def delete_system(system_id: str):
|
||||
await fstore.doc_delete("systems", system_id)
|
||||
|
||||
|
||||
# ── Ten-codes endpoints ────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/{system_id}/ten-codes")
|
||||
async def get_ten_codes(system_id: str):
|
||||
"""Return the ten-code dictionary for a system."""
|
||||
system = await fstore.doc_get("systems", system_id)
|
||||
if not system:
|
||||
raise HTTPException(404, f"System '{system_id}' not found.")
|
||||
return {"ten_codes": system.get("ten_codes") or {}}
|
||||
|
||||
|
||||
@router.put("/{system_id}/ten-codes")
|
||||
async def update_ten_codes(system_id: str, body: TenCodesBody):
|
||||
"""Replace the ten-code dictionary for a system."""
|
||||
existing = await fstore.doc_get("systems", system_id)
|
||||
if not existing:
|
||||
raise HTTPException(404, f"System '{system_id}' not found.")
|
||||
await fstore.doc_update("systems", system_id, {"ten_codes": body.ten_codes})
|
||||
return {"ok": True, "ten_codes": body.ten_codes}
|
||||
|
||||
|
||||
# ── Vocabulary endpoints ───────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/{system_id}/vocabulary")
|
||||
|
||||
@@ -96,35 +96,47 @@ async def _run_extraction_pipeline(
|
||||
"""Run steps 2-4 of the intelligence pipeline using an existing transcript."""
|
||||
from app.internal import intelligence, incident_correlator, alerter
|
||||
|
||||
tags, incident_type, location, location_coords, resolved = await intelligence.extract_tags(
|
||||
# Step 2: Scene detection + intelligence extraction.
|
||||
# Returns one scene per distinct incident detected in the recording.
|
||||
scenes = await intelligence.extract_scenes(
|
||||
call_id, transcript, talkgroup_name,
|
||||
talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
|
||||
node_id=node_id,
|
||||
preserve_transcript_correction=preserve_transcript_correction,
|
||||
)
|
||||
|
||||
incident_id = await incident_correlator.correlate_call(
|
||||
call_id=call_id,
|
||||
node_id=node_id,
|
||||
system_id=system_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=tags,
|
||||
incident_type=incident_type,
|
||||
location=location,
|
||||
location_coords=location_coords,
|
||||
)
|
||||
# Step 3: Correlate each scene to an incident independently.
|
||||
incident_ids: list[str] = []
|
||||
all_tags: list[str] = []
|
||||
for scene in scenes:
|
||||
all_tags.extend(scene["tags"])
|
||||
incident_id = await incident_correlator.correlate_call(
|
||||
call_id=call_id,
|
||||
node_id=node_id,
|
||||
system_id=system_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=scene["tags"],
|
||||
incident_type=scene["incident_type"],
|
||||
location=scene["location"],
|
||||
location_coords=scene["location_coords"],
|
||||
)
|
||||
if incident_id and incident_id not in incident_ids:
|
||||
incident_ids.append(incident_id)
|
||||
if scene["resolved"] and incident_id:
|
||||
await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
|
||||
logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")
|
||||
|
||||
if resolved and incident_id:
|
||||
await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
|
||||
logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")
|
||||
if incident_ids:
|
||||
await fstore.doc_set("calls", call_id, {"incident_ids": incident_ids})
|
||||
|
||||
# Step 4: Alert dispatch — run once with merged tags from all scenes.
|
||||
await alerter.check_and_dispatch(
|
||||
call_id=call_id,
|
||||
node_id=node_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=tags,
|
||||
tags=list(dict.fromkeys(all_tags)),
|
||||
transcript=transcript,
|
||||
)
|
||||
|
||||
@@ -140,8 +152,8 @@ async def _run_intelligence_pipeline(
|
||||
"""
|
||||
Post-upload intelligence pipeline (runs as a background task):
|
||||
1. Transcribe audio via Google STT
|
||||
2. Extract tags/incident type from transcript
|
||||
3. Correlate with existing incidents (or create new one)
|
||||
2. Detect scenes + extract intelligence (one result per incident in recording)
|
||||
3. Correlate each scene with existing incidents (or create new ones)
|
||||
4. Check alert rules and dispatch notifications
|
||||
"""
|
||||
from app.internal import transcription, intelligence, incident_correlator, alerter
|
||||
@@ -155,35 +167,57 @@ async def _run_intelligence_pipeline(
|
||||
call_id, gcs_uri, talkgroup_name, system_id=system_id
|
||||
)
|
||||
|
||||
# Step 2: Intelligence extraction
|
||||
tags: list[str] = []
|
||||
incident_type: Optional[str] = None
|
||||
location: Optional[str] = None
|
||||
location_coords: Optional[dict] = None
|
||||
resolved: bool = False
|
||||
# Step 2: Scene detection + intelligence extraction
|
||||
scenes: list[dict] = []
|
||||
if transcript:
|
||||
tags, incident_type, location, location_coords, resolved = await intelligence.extract_tags(
|
||||
scenes = await intelligence.extract_scenes(
|
||||
call_id, transcript, talkgroup_name,
|
||||
talkgroup_id=talkgroup_id, system_id=system_id, segments=segments,
|
||||
node_id=node_id,
|
||||
)
|
||||
|
||||
# Step 3: Incident correlation (always runs — unclassified calls can still link via talkgroup)
|
||||
incident_id = await incident_correlator.correlate_call(
|
||||
call_id=call_id,
|
||||
node_id=node_id,
|
||||
system_id=system_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=tags,
|
||||
incident_type=incident_type,
|
||||
location=location,
|
||||
location_coords=location_coords,
|
||||
)
|
||||
# Step 3: Correlate each scene independently.
|
||||
# A single recording can produce multiple incidents on a busy channel.
|
||||
incident_ids: list[str] = []
|
||||
all_tags: list[str] = []
|
||||
for scene in scenes:
|
||||
all_tags.extend(scene["tags"])
|
||||
incident_id = await incident_correlator.correlate_call(
|
||||
call_id=call_id,
|
||||
node_id=node_id,
|
||||
system_id=system_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=scene["tags"],
|
||||
incident_type=scene["incident_type"],
|
||||
location=scene["location"],
|
||||
location_coords=scene["location_coords"],
|
||||
)
|
||||
if incident_id and incident_id not in incident_ids:
|
||||
incident_ids.append(incident_id)
|
||||
if scene["resolved"] and incident_id:
|
||||
await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
|
||||
logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")
|
||||
|
||||
if resolved and incident_id:
|
||||
await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
|
||||
logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")
|
||||
# Correlator also runs for calls with no scenes (unclassified) to attempt
|
||||
# talkgroup-based linking even when no transcript could be produced.
|
||||
if not scenes:
|
||||
incident_id = await incident_correlator.correlate_call(
|
||||
call_id=call_id,
|
||||
node_id=node_id,
|
||||
system_id=system_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=[],
|
||||
incident_type=None,
|
||||
location=None,
|
||||
location_coords=None,
|
||||
)
|
||||
if incident_id:
|
||||
incident_ids.append(incident_id)
|
||||
|
||||
if incident_ids:
|
||||
await fstore.doc_set("calls", call_id, {"incident_ids": incident_ids})
|
||||
|
||||
# Step 4: Alert dispatch (always runs — talkgroup ID rules don't need a transcript)
|
||||
await alerter.check_and_dispatch(
|
||||
@@ -191,6 +225,6 @@ async def _run_intelligence_pipeline(
|
||||
node_id=node_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=tags,
|
||||
tags=list(dict.fromkeys(all_tags)),
|
||||
transcript=transcript,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user