fix: improve geocoding specificity and increase distance threshold for repeater systems

geocode_max_km: 25 → 75 km. The node is a physical receiver, not the system boundary;
digital repeaters extend coverage well beyond 25km (North White Plains at 35.5km from
the Yorktown node is a legitimate Westchester County location).

Query now fully qualified: "High Street" → "High Street, Yorktown, New York".
Added _get_node_state() which reverse-geocodes the node position once (cached) using
Google Maps to get the state name, appended alongside the municipality.
Generic street names (High Street, Main Street) no longer resolve to wrong-country results.
This commit is contained in:
Logan
2026-05-25 14:49:02 -04:00
parent ef8e0d1bfa
commit 7d6e97fd4a
2 changed files with 55 additions and 4 deletions
+54 -3
View File
@@ -55,6 +55,9 @@ Talkgroup: {talkgroup_name}
# Geographic bias radius for geocoding — half-width in degrees (~55 km)
_GEO_DELTA = 0.5
# Cache node state (e.g. "New York") so we only reverse-geocode once per node
_node_state_cache: dict[str, str] = {}
# Police/law-enforcement phonetic alphabet words (APCO + NATO).
# A run of 5+ of these in a transcript is a strong Whisper hallucination signal.
_PHONETIC_ALPHA_WORDS = frozenset({
@@ -195,12 +198,19 @@ async def extract_scenes(
incident_type = None
# Geocode this scene's location.
# Include the municipality from the talkgroup name to help Google
# resolve ambiguous local names (e.g. "Pinebrook" → "Pinebrook, Yorktown").
# Build the most specific query possible: location + municipality + state.
# e.g. "High Street" → "High Street, Yorktown, New York"
# This prevents generic street names from resolving to wrong-country results.
location_coords: Optional[dict] = None
if location and node_lat is not None and node_lon is not None:
muni = _municipality_from_tg(talkgroup_name)
query = f"{location}, {muni}" if muni else location
state = await _get_node_state(node_id or "", node_lat, node_lon) if node_id else ""
parts = [location]
if muni:
parts.append(muni)
if state:
parts.append(state)
query = ", ".join(parts)
location_coords = await _geocode_location(query, node_lat, node_lon)
# Embed this scene's content
@@ -275,6 +285,47 @@ def _geo_dist_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
return R * 2 * math.asin(math.sqrt(a))
async def _get_node_state(node_id: str, lat: float, lon: float) -> str:
"""
Return the US state name (e.g. "New York") for a node's position.
Uses Google Maps Reverse Geocoding; cached for the process lifetime since nodes don't move.
"""
if node_id in _node_state_cache:
return _node_state_cache[node_id]
import httpx
from app.config import settings
if not settings.google_maps_api_key:
return ""
state = ""
try:
async with httpx.AsyncClient(timeout=5.0) as client:
r = await client.get(
"https://maps.googleapis.com/maps/api/geocode/json",
params={
"latlng": f"{lat},{lon}",
"result_type": "administrative_area_level_1",
"key": settings.google_maps_api_key,
},
)
r.raise_for_status()
data = r.json()
if data.get("status") == "OK" and data.get("results"):
for comp in data["results"][0].get("address_components", []):
if "administrative_area_level_1" in comp.get("types", []):
state = comp.get("long_name", "")
break
except Exception as e:
logger.warning(f"Node state lookup failed for {node_id}: {e}")
if state:
_node_state_cache[node_id] = state
logger.info(f"Node {node_id} state resolved: {state!r}")
return state
async def _geocode_location(
location_str: str, node_lat: float, node_lon: float
) -> Optional[dict]: