From 7d6e97fd4a870296f012a6eb5b67984d523b57e3 Mon Sep 17 00:00:00 2001 From: Logan Date: Mon, 25 May 2026 14:49:02 -0400 Subject: [PATCH] fix: improve geocoding specificity and increase distance threshold for repeater systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit geocode_max_km: 25 → 75 km. The node is a physical receiver, not the system boundary; digital repeaters extend coverage well beyond 25km (North White Plains at 35.5km from the Yorktown node is a legitimate Westchester County location). Query now fully qualified: "High Street" → "High Street, Yorktown, New York". Added _get_node_state() which reverse-geocodes the node position once (cached) using Google Maps to get the state name, appended alongside the municipality. Generic street names (High Street, Main Street) no longer resolve to wrong-country results. --- drb-c2-core/app/config.py | 2 +- drb-c2-core/app/internal/intelligence.py | 57 ++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/drb-c2-core/app/config.py b/drb-c2-core/app/config.py index 098ff2d..58db6d3 100644 --- a/drb-c2-core/app/config.py +++ b/drb-c2-core/app/config.py @@ -32,7 +32,7 @@ class Settings(BaseSettings): embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar) embedding_cross_tg_threshold: float = 0.85 # cross-TG path: same dept + 2+ shared units location_proximity_km: float = 0.5 # radius for location-proximity matching - geocode_max_km: float = 25.0 # reject geocode results farther than this from the node + geocode_max_km: float = 75.0 # reject geocode results farther than this from the node incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls recorrelation_scan_minutes: int = 60 # re-examine orphaned calls ended within this window tg_fast_path_idle_minutes: int = 90 # fast path: max minutes since incident last updated diff --git a/drb-c2-core/app/internal/intelligence.py b/drb-c2-core/app/internal/intelligence.py index 6a833de..d3e0f0a 100644 --- a/drb-c2-core/app/internal/intelligence.py +++ b/drb-c2-core/app/internal/intelligence.py @@ -55,6 +55,9 @@ Talkgroup: {talkgroup_name} # Geographic bias radius for geocoding — half-width in degrees (~55 km) _GEO_DELTA = 0.5 +# Cache node state (e.g. "New York") so we only reverse-geocode once per node +_node_state_cache: dict[str, str] = {} + # Police/law-enforcement phonetic alphabet words (APCO + NATO). # A run of 5+ of these in a transcript is a strong Whisper hallucination signal. _PHONETIC_ALPHA_WORDS = frozenset({ @@ -195,12 +198,19 @@ async def extract_scenes( incident_type = None # Geocode this scene's location. - # Include the municipality from the talkgroup name to help Google - # resolve ambiguous local names (e.g. "Pinebrook" → "Pinebrook, Yorktown"). + # Build the most specific query possible: location + municipality + state. + # e.g. "High Street" → "High Street, Yorktown, New York" + # This prevents generic street names from resolving to wrong-country results. location_coords: Optional[dict] = None if location and node_lat is not None and node_lon is not None: muni = _municipality_from_tg(talkgroup_name) - query = f"{location}, {muni}" if muni else location + state = await _get_node_state(node_id or "", node_lat, node_lon) if node_id else "" + parts = [location] + if muni: + parts.append(muni) + if state: + parts.append(state) + query = ", ".join(parts) location_coords = await _geocode_location(query, node_lat, node_lon) # Embed this scene's content @@ -275,6 +285,47 @@ def _geo_dist_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float: return R * 2 * math.asin(math.sqrt(a)) +async def _get_node_state(node_id: str, lat: float, lon: float) -> str: + """ + Return the US state name (e.g. "New York") for a node's position. + Uses Google Maps Reverse Geocoding; cached for the process lifetime since nodes don't move. + """ + if node_id in _node_state_cache: + return _node_state_cache[node_id] + + import httpx + from app.config import settings + + if not settings.google_maps_api_key: + return "" + + state = "" + try: + async with httpx.AsyncClient(timeout=5.0) as client: + r = await client.get( + "https://maps.googleapis.com/maps/api/geocode/json", + params={ + "latlng": f"{lat},{lon}", + "result_type": "administrative_area_level_1", + "key": settings.google_maps_api_key, + }, + ) + r.raise_for_status() + data = r.json() + if data.get("status") == "OK" and data.get("results"): + for comp in data["results"][0].get("address_components", []): + if "administrative_area_level_1" in comp.get("types", []): + state = comp.get("long_name", "") + break + except Exception as e: + logger.warning(f"Node state lookup failed for {node_id}: {e}") + + if state: + _node_state_cache[node_id] = state + logger.info(f"Node {node_id} state resolved: {state!r}") + return state + + async def _geocode_location( location_str: str, node_lat: float, node_lon: float ) -> Optional[dict]: