Update geocoding intel
This commit is contained in:
@@ -8,6 +8,7 @@ Falls back gracefully if the API is unavailable or returns malformed output.
|
|||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from app.internal.logger import logger
|
from app.internal.logger import logger
|
||||||
from app.internal import firestore as fstore
|
from app.internal import firestore as fstore
|
||||||
@@ -37,8 +38,20 @@ System: {system_id}
|
|||||||
Talkgroup: {talkgroup_name}
|
Talkgroup: {talkgroup_name}
|
||||||
{transcript_block}"""
|
{transcript_block}"""
|
||||||
|
|
||||||
# Nominatim viewbox half-width in degrees (~35 km at mid-latitudes)
|
# Nominatim viewbox half-width in degrees (~11 km at mid-latitudes)
|
||||||
_GEO_DELTA = 0.3
|
_GEO_DELTA = 0.1
|
||||||
|
|
||||||
|
# node_id → state abbreviation/name from one-time reverse geocode
|
||||||
|
_node_state_cache: dict[str, str] = {}
|
||||||
|
|
||||||
|
# Strip P25 service suffixes to extract the municipality name from a talkgroup
|
||||||
|
_TG_SUFFIX_RE = re.compile(
|
||||||
|
r"\s*\b(police\s*dep(t|artment)?|pd|fire\s*(dep(t|artment)|district)?|"
|
||||||
|
r"ems|rescue|dispatch|fd|tac(tical)?|ops|operations?|command|"
|
||||||
|
r"(fire\s*)?ground|mutual\s*aid|channel|ch\b|car[-\s]to[-\s]car|"
|
||||||
|
r"division|unit)\b.*",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def extract_tags(
|
async def extract_tags(
|
||||||
@@ -86,7 +99,11 @@ async def extract_tags(
|
|||||||
node_lat = node_doc.get("lat")
|
node_lat = node_doc.get("lat")
|
||||||
node_lon = node_doc.get("lon")
|
node_lon = node_doc.get("lon")
|
||||||
if node_lat is not None and node_lon is not None:
|
if node_lat is not None and node_lon is not None:
|
||||||
location_coords = await _geocode_location(location, node_lat, node_lon)
|
state = await _get_node_state(node_id, node_lat, node_lon)
|
||||||
|
muni = _municipality_from_tg(talkgroup_name)
|
||||||
|
hint_parts = [p for p in [muni, state] if p]
|
||||||
|
query = f"{location}, {', '.join(hint_parts)}" if hint_parts else location
|
||||||
|
location_coords = await _geocode_location(query, node_lat, node_lon)
|
||||||
|
|
||||||
# Store embedding alongside structured data
|
# Store embedding alongside structured data
|
||||||
embedding = await asyncio.to_thread(_sync_embed, _embed_text(transcript, incident_type))
|
embedding = await asyncio.to_thread(_sync_embed, _embed_text(transcript, incident_type))
|
||||||
@@ -157,6 +174,50 @@ async def _geocode_location(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_node_state(node_id: str, lat: float, lon: float) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Reverse geocode the node's position once to extract its state.
|
||||||
|
Result is cached for the process lifetime — nodes don't move.
|
||||||
|
"""
|
||||||
|
if node_id in _node_state_cache:
|
||||||
|
return _node_state_cache[node_id]
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
headers = {"User-Agent": "DRB-Dispatch/1.0 (public-safety radio monitor)"}
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||||
|
r = await client.get(
|
||||||
|
"https://nominatim.openstreetmap.org/reverse",
|
||||||
|
params={"lat": lat, "lon": lon, "format": "json", "zoom": 5},
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()
|
||||||
|
state = data.get("address", {}).get("state", "")
|
||||||
|
if state:
|
||||||
|
_node_state_cache[node_id] = state
|
||||||
|
logger.info(f"Node {node_id} reverse-geocoded to state: {state!r}")
|
||||||
|
return state
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Node state reverse geocode failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _municipality_from_tg(tg_name: Optional[str]) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Extract the municipality name from a talkgroup name.
|
||||||
|
e.g. "Ossining PD" → "Ossining", "Westchester County Fire" → "Westchester County"
|
||||||
|
Returns None for tactical/operational channels with no useful location info.
|
||||||
|
"""
|
||||||
|
if not tg_name:
|
||||||
|
return None
|
||||||
|
cleaned = _TG_SUFFIX_RE.sub("", tg_name).strip()
|
||||||
|
# Discard if nothing left, purely numeric, or a short all-caps abbreviation (e.g. "WC", "TAC")
|
||||||
|
if not cleaned or cleaned.isdigit() or (len(cleaned) <= 3 and cleaned.isupper()):
|
||||||
|
return None
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
def _build_transcript_block(transcript: str, segments: Optional[list[dict]]) -> str:
|
def _build_transcript_block(transcript: str, segments: Optional[list[dict]]) -> str:
|
||||||
"""Format transcript as numbered transmissions if segments are available."""
|
"""Format transcript as numbered transmissions if segments are available."""
|
||||||
if segments and len(segments) > 1:
|
if segments and len(segments) > 1:
|
||||||
|
|||||||
Reference in New Issue
Block a user