""" Rules-based intelligence extraction from call transcripts. Scans a transcript for known incident keywords, categorises the call, and extracts rough location hints (street/intersection mentions). No external ML dependencies — fast and always available even when STT is disabled. Designed to run as part of the post-upload background pipeline. """ import re from typing import Optional from app.internal.logger import logger from app.internal import firestore as fstore # --------------------------------------------------------------------------- # Keyword taxonomy # --------------------------------------------------------------------------- INCIDENT_KEYWORDS: dict[str, list[str]] = { "fire": [ "fire", "smoke", "flames", "burning", "structure fire", "brush fire", "wildfire", "arson", "working fire", "fully involved", ], "ems": [ "cardiac", "unconscious", "breathing", "overdose", "trauma", "injury", "ambulance", "ems", "medic", "chest pain", "stroke", "unresponsive", "fall", "laceration", ], "police": [ "pursuit", "chase", "shots fired", "weapon", "suspect", "robbery", "assault", "burglary", "stolen", "fleeing", "armed", "shooting", "stabbing", "domestic", ], "accident": [ "accident", "collision", "crash", "mvr", "vehicle", "rollover", "hit and run", "ped", "pedestrian", "pi", "property damage", ], } # Street suffix patterns for location extraction _STREET_RE = re.compile( r'\b(?:\d+\s+)?[A-Z][a-zA-Z]+(?: [A-Z][a-zA-Z]+)*' r'\s+(?:Street|St|Avenue|Ave|Boulevard|Blvd|Drive|Dr|Road|Rd|Lane|Ln' r'|Court|Ct|Place|Pl|Way|Circle|Cir|Highway|Hwy|Route|Rt)\b', re.IGNORECASE, ) # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- async def extract_tags( call_id: str, transcript: str, ) -> tuple[list[str], Optional[str]]: """ Extract incident tags from a transcript. Returns: (tags, primary_type) — e.g. (["fire", "structure fire"], "fire") primary_type is the category with the most keyword hits, or None. Side-effect: updates calls/{call_id}.tags in Firestore. """ lower = transcript.lower() matched: dict[str, list[str]] = {} for category, keywords in INCIDENT_KEYWORDS.items(): hits = [kw for kw in keywords if kw in lower] if hits: matched[category] = hits tags: list[str] = [] for category, hits in matched.items(): tags.append(category) tags.extend(h for h in hits if h != category) # Deduplicate while preserving order seen: set[str] = set() unique_tags: list[str] = [] for t in tags: if t not in seen: seen.add(t) unique_tags.append(t) # Primary type = category with most keyword hits primary_type: Optional[str] = None if matched: primary_type = max(matched, key=lambda c: len(matched[c])) if unique_tags: try: await fstore.doc_update("calls", call_id, {"tags": unique_tags}) except Exception as e: logger.warning(f"Could not save tags for call {call_id}: {e}") logger.info(f"Intelligence: call {call_id} → tags={unique_tags}, type={primary_type}") return unique_tags, primary_type def extract_location_hint(transcript: str) -> Optional[str]: """Return the first street-level location mention found in the transcript, or None.""" match = _STREET_RE.search(transcript) return match.group(0) if match else None