Update geocoding intel

This commit is contained in:
Logan
2026-04-19 23:27:51 -04:00
parent 0fb97c3b3c
commit 788afca339
+64 -3
View File
@@ -8,6 +8,7 @@ Falls back gracefully if the API is unavailable or returns malformed output.
"""
import asyncio
import json
import re
from typing import Optional
from app.internal.logger import logger
from app.internal import firestore as fstore
@@ -37,8 +38,20 @@ System: {system_id}
Talkgroup: {talkgroup_name}
{transcript_block}"""
# Nominatim viewbox half-width in degrees (~35 km at mid-latitudes)
_GEO_DELTA = 0.3
# Nominatim viewbox half-width in degrees (~11 km at mid-latitudes)
_GEO_DELTA = 0.1
# node_id → state abbreviation/name from one-time reverse geocode
_node_state_cache: dict[str, str] = {}
# Strip P25 service suffixes to extract the municipality name from a talkgroup
_TG_SUFFIX_RE = re.compile(
r"\s*\b(police\s*dep(t|artment)?|pd|fire\s*(dep(t|artment)|district)?|"
r"ems|rescue|dispatch|fd|tac(tical)?|ops|operations?|command|"
r"(fire\s*)?ground|mutual\s*aid|channel|ch\b|car[-\s]to[-\s]car|"
r"division|unit)\b.*",
re.IGNORECASE,
)
async def extract_tags(
@@ -86,7 +99,11 @@ async def extract_tags(
node_lat = node_doc.get("lat")
node_lon = node_doc.get("lon")
if node_lat is not None and node_lon is not None:
location_coords = await _geocode_location(location, node_lat, node_lon)
state = await _get_node_state(node_id, node_lat, node_lon)
muni = _municipality_from_tg(talkgroup_name)
hint_parts = [p for p in [muni, state] if p]
query = f"{location}, {', '.join(hint_parts)}" if hint_parts else location
location_coords = await _geocode_location(query, node_lat, node_lon)
# Store embedding alongside structured data
embedding = await asyncio.to_thread(_sync_embed, _embed_text(transcript, incident_type))
@@ -157,6 +174,50 @@ async def _geocode_location(
return None
async def _get_node_state(node_id: str, lat: float, lon: float) -> Optional[str]:
"""
Reverse geocode the node's position once to extract its state.
Result is cached for the process lifetime — nodes don't move.
"""
if node_id in _node_state_cache:
return _node_state_cache[node_id]
import httpx
headers = {"User-Agent": "DRB-Dispatch/1.0 (public-safety radio monitor)"}
try:
async with httpx.AsyncClient(timeout=5.0) as client:
r = await client.get(
"https://nominatim.openstreetmap.org/reverse",
params={"lat": lat, "lon": lon, "format": "json", "zoom": 5},
headers=headers,
)
r.raise_for_status()
data = r.json()
state = data.get("address", {}).get("state", "")
if state:
_node_state_cache[node_id] = state
logger.info(f"Node {node_id} reverse-geocoded to state: {state!r}")
return state
except Exception as e:
logger.warning(f"Node state reverse geocode failed: {e}")
return None
def _municipality_from_tg(tg_name: Optional[str]) -> Optional[str]:
"""
Extract the municipality name from a talkgroup name.
e.g. "Ossining PD""Ossining", "Westchester County Fire""Westchester County"
Returns None for tactical/operational channels with no useful location info.
"""
if not tg_name:
return None
cleaned = _TG_SUFFIX_RE.sub("", tg_name).strip()
# Discard if nothing left, purely numeric, or a short all-caps abbreviation (e.g. "WC", "TAC")
if not cleaned or cleaned.isdigit() or (len(cleaned) <= 3 and cleaned.isupper()):
return None
return cleaned
def _build_transcript_block(transcript: str, segments: Optional[list[dict]]) -> str:
"""Format transcript as numbered transmissions if segments are available."""
if segments and len(segments) > 1: