107 lines
3.6 KiB
Python
107 lines
3.6 KiB
Python
"""
|
|
Rules-based intelligence extraction from call transcripts.
|
|
|
|
Scans a transcript for known incident keywords, categorises the call, and
|
|
extracts rough location hints (street/intersection mentions).
|
|
|
|
No external ML dependencies — fast and always available even when STT is
|
|
disabled. Designed to run as part of the post-upload background pipeline.
|
|
"""
|
|
import re
|
|
from typing import Optional
|
|
from app.internal.logger import logger
|
|
from app.internal import firestore as fstore
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Keyword taxonomy
|
|
# ---------------------------------------------------------------------------
|
|
|
|
INCIDENT_KEYWORDS: dict[str, list[str]] = {
|
|
"fire": [
|
|
"fire", "smoke", "flames", "burning", "structure fire", "brush fire",
|
|
"wildfire", "arson", "working fire", "fully involved",
|
|
],
|
|
"ems": [
|
|
"cardiac", "unconscious", "breathing", "overdose", "trauma",
|
|
"injury", "ambulance", "ems", "medic", "chest pain", "stroke",
|
|
"unresponsive", "fall", "laceration",
|
|
],
|
|
"police": [
|
|
"pursuit", "chase", "shots fired", "weapon", "suspect", "robbery",
|
|
"assault", "burglary", "stolen", "fleeing", "armed", "shooting",
|
|
"stabbing", "domestic",
|
|
],
|
|
"accident": [
|
|
"accident", "collision", "crash", "mvr", "vehicle", "rollover",
|
|
"hit and run", "ped", "pedestrian", "pi", "property damage",
|
|
],
|
|
}
|
|
|
|
# Street suffix patterns for location extraction
|
|
_STREET_RE = re.compile(
|
|
r'\b(?:\d+\s+)?[A-Z][a-zA-Z]+(?: [A-Z][a-zA-Z]+)*'
|
|
r'\s+(?:Street|St|Avenue|Ave|Boulevard|Blvd|Drive|Dr|Road|Rd|Lane|Ln'
|
|
r'|Court|Ct|Place|Pl|Way|Circle|Cir|Highway|Hwy|Route|Rt)\b',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def extract_tags(
|
|
call_id: str,
|
|
transcript: str,
|
|
) -> tuple[list[str], Optional[str]]:
|
|
"""
|
|
Extract incident tags from a transcript.
|
|
|
|
Returns:
|
|
(tags, primary_type) — e.g. (["fire", "structure fire"], "fire")
|
|
primary_type is the category with the most keyword hits, or None.
|
|
|
|
Side-effect: updates calls/{call_id}.tags in Firestore.
|
|
"""
|
|
lower = transcript.lower()
|
|
matched: dict[str, list[str]] = {}
|
|
|
|
for category, keywords in INCIDENT_KEYWORDS.items():
|
|
hits = [kw for kw in keywords if kw in lower]
|
|
if hits:
|
|
matched[category] = hits
|
|
|
|
tags: list[str] = []
|
|
for category, hits in matched.items():
|
|
tags.append(category)
|
|
tags.extend(h for h in hits if h != category)
|
|
|
|
# Deduplicate while preserving order
|
|
seen: set[str] = set()
|
|
unique_tags: list[str] = []
|
|
for t in tags:
|
|
if t not in seen:
|
|
seen.add(t)
|
|
unique_tags.append(t)
|
|
|
|
# Primary type = category with most keyword hits
|
|
primary_type: Optional[str] = None
|
|
if matched:
|
|
primary_type = max(matched, key=lambda c: len(matched[c]))
|
|
|
|
if unique_tags:
|
|
try:
|
|
await fstore.doc_update("calls", call_id, {"tags": unique_tags})
|
|
except Exception as e:
|
|
logger.warning(f"Could not save tags for call {call_id}: {e}")
|
|
|
|
logger.info(f"Intelligence: call {call_id} → tags={unique_tags}, type={primary_type}")
|
|
return unique_tags, primary_type
|
|
|
|
|
|
def extract_location_hint(transcript: str) -> Optional[str]:
|
|
"""Return the first street-level location mention found in the transcript, or None."""
|
|
match = _STREET_RE.search(transcript)
|
|
return match.group(0) if match else None
|