From 788afca339a197be72c0973efd1201595faaf5ea Mon Sep 17 00:00:00 2001 From: Logan Date: Sun, 19 Apr 2026 23:27:51 -0400 Subject: [PATCH] Update geocoding intel --- drb-c2-core/app/internal/intelligence.py | 67 ++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/drb-c2-core/app/internal/intelligence.py b/drb-c2-core/app/internal/intelligence.py index 9653145..355f41f 100644 --- a/drb-c2-core/app/internal/intelligence.py +++ b/drb-c2-core/app/internal/intelligence.py @@ -8,6 +8,7 @@ Falls back gracefully if the API is unavailable or returns malformed output. """ import asyncio import json +import re from typing import Optional from app.internal.logger import logger from app.internal import firestore as fstore @@ -37,8 +38,20 @@ System: {system_id} Talkgroup: {talkgroup_name} {transcript_block}""" -# Nominatim viewbox half-width in degrees (~35 km at mid-latitudes) -_GEO_DELTA = 0.3 +# Nominatim viewbox half-width in degrees (~11 km at mid-latitudes) +_GEO_DELTA = 0.1 + +# node_id → state abbreviation/name from one-time reverse geocode +_node_state_cache: dict[str, str] = {} + +# Strip P25 service suffixes to extract the municipality name from a talkgroup +_TG_SUFFIX_RE = re.compile( + r"\s*\b(police\s*dep(t|artment)?|pd|fire\s*(dep(t|artment)|district)?|" + r"ems|rescue|dispatch|fd|tac(tical)?|ops|operations?|command|" + r"(fire\s*)?ground|mutual\s*aid|channel|ch\b|car[-\s]to[-\s]car|" + r"division|unit)\b.*", + re.IGNORECASE, +) async def extract_tags( @@ -86,7 +99,11 @@ async def extract_tags( node_lat = node_doc.get("lat") node_lon = node_doc.get("lon") if node_lat is not None and node_lon is not None: - location_coords = await _geocode_location(location, node_lat, node_lon) + state = await _get_node_state(node_id, node_lat, node_lon) + muni = _municipality_from_tg(talkgroup_name) + hint_parts = [p for p in [muni, state] if p] + query = f"{location}, {', '.join(hint_parts)}" if hint_parts else location + location_coords = await _geocode_location(query, node_lat, node_lon) # Store embedding alongside structured data embedding = await asyncio.to_thread(_sync_embed, _embed_text(transcript, incident_type)) @@ -157,6 +174,50 @@ async def _geocode_location( return None +async def _get_node_state(node_id: str, lat: float, lon: float) -> Optional[str]: + """ + Reverse geocode the node's position once to extract its state. + Result is cached for the process lifetime — nodes don't move. + """ + if node_id in _node_state_cache: + return _node_state_cache[node_id] + + import httpx + headers = {"User-Agent": "DRB-Dispatch/1.0 (public-safety radio monitor)"} + try: + async with httpx.AsyncClient(timeout=5.0) as client: + r = await client.get( + "https://nominatim.openstreetmap.org/reverse", + params={"lat": lat, "lon": lon, "format": "json", "zoom": 5}, + headers=headers, + ) + r.raise_for_status() + data = r.json() + state = data.get("address", {}).get("state", "") + if state: + _node_state_cache[node_id] = state + logger.info(f"Node {node_id} reverse-geocoded to state: {state!r}") + return state + except Exception as e: + logger.warning(f"Node state reverse geocode failed: {e}") + return None + + +def _municipality_from_tg(tg_name: Optional[str]) -> Optional[str]: + """ + Extract the municipality name from a talkgroup name. + e.g. "Ossining PD" → "Ossining", "Westchester County Fire" → "Westchester County" + Returns None for tactical/operational channels with no useful location info. + """ + if not tg_name: + return None + cleaned = _TG_SUFFIX_RE.sub("", tg_name).strip() + # Discard if nothing left, purely numeric, or a short all-caps abbreviation (e.g. "WC", "TAC") + if not cleaned or cleaned.isdigit() or (len(cleaned) <= 3 and cleaned.isupper()): + return None + return cleaned + + def _build_transcript_block(transcript: str, segments: Optional[list[dict]]) -> str: """Format transcript as numbered transmissions if segments are available.""" if segments and len(segments) > 1: