fix: improve geocoding specificity and increase distance threshold for repeater systems
geocode_max_km: 25 → 75 km. The node is a physical receiver, not the system boundary; digital repeaters extend coverage well beyond 25km (North White Plains at 35.5km from the Yorktown node is a legitimate Westchester County location). Query now fully qualified: "High Street" → "High Street, Yorktown, New York". Added _get_node_state() which reverse-geocodes the node position once (cached) using Google Maps to get the state name, appended alongside the municipality. Generic street names (High Street, Main Street) no longer resolve to wrong-country results.
This commit is contained in:
@@ -55,6 +55,9 @@ Talkgroup: {talkgroup_name}
|
||||
# Geographic bias radius for geocoding — half-width in degrees (~55 km)
|
||||
_GEO_DELTA = 0.5
|
||||
|
||||
# Cache node state (e.g. "New York") so we only reverse-geocode once per node
|
||||
_node_state_cache: dict[str, str] = {}
|
||||
|
||||
# Police/law-enforcement phonetic alphabet words (APCO + NATO).
|
||||
# A run of 5+ of these in a transcript is a strong Whisper hallucination signal.
|
||||
_PHONETIC_ALPHA_WORDS = frozenset({
|
||||
@@ -195,12 +198,19 @@ async def extract_scenes(
|
||||
incident_type = None
|
||||
|
||||
# Geocode this scene's location.
|
||||
# Include the municipality from the talkgroup name to help Google
|
||||
# resolve ambiguous local names (e.g. "Pinebrook" → "Pinebrook, Yorktown").
|
||||
# Build the most specific query possible: location + municipality + state.
|
||||
# e.g. "High Street" → "High Street, Yorktown, New York"
|
||||
# This prevents generic street names from resolving to wrong-country results.
|
||||
location_coords: Optional[dict] = None
|
||||
if location and node_lat is not None and node_lon is not None:
|
||||
muni = _municipality_from_tg(talkgroup_name)
|
||||
query = f"{location}, {muni}" if muni else location
|
||||
state = await _get_node_state(node_id or "", node_lat, node_lon) if node_id else ""
|
||||
parts = [location]
|
||||
if muni:
|
||||
parts.append(muni)
|
||||
if state:
|
||||
parts.append(state)
|
||||
query = ", ".join(parts)
|
||||
location_coords = await _geocode_location(query, node_lat, node_lon)
|
||||
|
||||
# Embed this scene's content
|
||||
@@ -275,6 +285,47 @@ def _geo_dist_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
return R * 2 * math.asin(math.sqrt(a))
|
||||
|
||||
|
||||
async def _get_node_state(node_id: str, lat: float, lon: float) -> str:
|
||||
"""
|
||||
Return the US state name (e.g. "New York") for a node's position.
|
||||
Uses Google Maps Reverse Geocoding; cached for the process lifetime since nodes don't move.
|
||||
"""
|
||||
if node_id in _node_state_cache:
|
||||
return _node_state_cache[node_id]
|
||||
|
||||
import httpx
|
||||
from app.config import settings
|
||||
|
||||
if not settings.google_maps_api_key:
|
||||
return ""
|
||||
|
||||
state = ""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
r = await client.get(
|
||||
"https://maps.googleapis.com/maps/api/geocode/json",
|
||||
params={
|
||||
"latlng": f"{lat},{lon}",
|
||||
"result_type": "administrative_area_level_1",
|
||||
"key": settings.google_maps_api_key,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
if data.get("status") == "OK" and data.get("results"):
|
||||
for comp in data["results"][0].get("address_components", []):
|
||||
if "administrative_area_level_1" in comp.get("types", []):
|
||||
state = comp.get("long_name", "")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Node state lookup failed for {node_id}: {e}")
|
||||
|
||||
if state:
|
||||
_node_state_cache[node_id] = state
|
||||
logger.info(f"Node {node_id} state resolved: {state!r}")
|
||||
return state
|
||||
|
||||
|
||||
async def _geocode_location(
|
||||
location_str: str, node_lat: float, node_lon: float
|
||||
) -> Optional[dict]:
|
||||
|
||||
Reference in New Issue
Block a user