fix: improve geocoding specificity and increase distance threshold for repeater systems
geocode_max_km: 25 → 75 km. The node is a physical receiver, not the system boundary; digital repeaters extend coverage well beyond 25km (North White Plains at 35.5km from the Yorktown node is a legitimate Westchester County location). Query now fully qualified: "High Street" → "High Street, Yorktown, New York". Added _get_node_state() which reverse-geocodes the node position once (cached) using Google Maps to get the state name, appended alongside the municipality. Generic street names (High Street, Main Street) no longer resolve to wrong-country results.
This commit is contained in:
@@ -32,7 +32,7 @@ class Settings(BaseSettings):
|
|||||||
embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar)
|
embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar)
|
||||||
embedding_cross_tg_threshold: float = 0.85 # cross-TG path: same dept + 2+ shared units
|
embedding_cross_tg_threshold: float = 0.85 # cross-TG path: same dept + 2+ shared units
|
||||||
location_proximity_km: float = 0.5 # radius for location-proximity matching
|
location_proximity_km: float = 0.5 # radius for location-proximity matching
|
||||||
geocode_max_km: float = 25.0 # reject geocode results farther than this from the node
|
geocode_max_km: float = 75.0 # reject geocode results farther than this from the node
|
||||||
incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls
|
incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls
|
||||||
recorrelation_scan_minutes: int = 60 # re-examine orphaned calls ended within this window
|
recorrelation_scan_minutes: int = 60 # re-examine orphaned calls ended within this window
|
||||||
tg_fast_path_idle_minutes: int = 90 # fast path: max minutes since incident last updated
|
tg_fast_path_idle_minutes: int = 90 # fast path: max minutes since incident last updated
|
||||||
|
|||||||
@@ -55,6 +55,9 @@ Talkgroup: {talkgroup_name}
|
|||||||
# Geographic bias radius for geocoding — half-width in degrees (~55 km)
|
# Geographic bias radius for geocoding — half-width in degrees (~55 km)
|
||||||
_GEO_DELTA = 0.5
|
_GEO_DELTA = 0.5
|
||||||
|
|
||||||
|
# Cache node state (e.g. "New York") so we only reverse-geocode once per node
|
||||||
|
_node_state_cache: dict[str, str] = {}
|
||||||
|
|
||||||
# Police/law-enforcement phonetic alphabet words (APCO + NATO).
|
# Police/law-enforcement phonetic alphabet words (APCO + NATO).
|
||||||
# A run of 5+ of these in a transcript is a strong Whisper hallucination signal.
|
# A run of 5+ of these in a transcript is a strong Whisper hallucination signal.
|
||||||
_PHONETIC_ALPHA_WORDS = frozenset({
|
_PHONETIC_ALPHA_WORDS = frozenset({
|
||||||
@@ -195,12 +198,19 @@ async def extract_scenes(
|
|||||||
incident_type = None
|
incident_type = None
|
||||||
|
|
||||||
# Geocode this scene's location.
|
# Geocode this scene's location.
|
||||||
# Include the municipality from the talkgroup name to help Google
|
# Build the most specific query possible: location + municipality + state.
|
||||||
# resolve ambiguous local names (e.g. "Pinebrook" → "Pinebrook, Yorktown").
|
# e.g. "High Street" → "High Street, Yorktown, New York"
|
||||||
|
# This prevents generic street names from resolving to wrong-country results.
|
||||||
location_coords: Optional[dict] = None
|
location_coords: Optional[dict] = None
|
||||||
if location and node_lat is not None and node_lon is not None:
|
if location and node_lat is not None and node_lon is not None:
|
||||||
muni = _municipality_from_tg(talkgroup_name)
|
muni = _municipality_from_tg(talkgroup_name)
|
||||||
query = f"{location}, {muni}" if muni else location
|
state = await _get_node_state(node_id or "", node_lat, node_lon) if node_id else ""
|
||||||
|
parts = [location]
|
||||||
|
if muni:
|
||||||
|
parts.append(muni)
|
||||||
|
if state:
|
||||||
|
parts.append(state)
|
||||||
|
query = ", ".join(parts)
|
||||||
location_coords = await _geocode_location(query, node_lat, node_lon)
|
location_coords = await _geocode_location(query, node_lat, node_lon)
|
||||||
|
|
||||||
# Embed this scene's content
|
# Embed this scene's content
|
||||||
@@ -275,6 +285,47 @@ def _geo_dist_km(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
|||||||
return R * 2 * math.asin(math.sqrt(a))
|
return R * 2 * math.asin(math.sqrt(a))
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_node_state(node_id: str, lat: float, lon: float) -> str:
|
||||||
|
"""
|
||||||
|
Return the US state name (e.g. "New York") for a node's position.
|
||||||
|
Uses Google Maps Reverse Geocoding; cached for the process lifetime since nodes don't move.
|
||||||
|
"""
|
||||||
|
if node_id in _node_state_cache:
|
||||||
|
return _node_state_cache[node_id]
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from app.config import settings
|
||||||
|
|
||||||
|
if not settings.google_maps_api_key:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
state = ""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||||
|
r = await client.get(
|
||||||
|
"https://maps.googleapis.com/maps/api/geocode/json",
|
||||||
|
params={
|
||||||
|
"latlng": f"{lat},{lon}",
|
||||||
|
"result_type": "administrative_area_level_1",
|
||||||
|
"key": settings.google_maps_api_key,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()
|
||||||
|
if data.get("status") == "OK" and data.get("results"):
|
||||||
|
for comp in data["results"][0].get("address_components", []):
|
||||||
|
if "administrative_area_level_1" in comp.get("types", []):
|
||||||
|
state = comp.get("long_name", "")
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Node state lookup failed for {node_id}: {e}")
|
||||||
|
|
||||||
|
if state:
|
||||||
|
_node_state_cache[node_id] = state
|
||||||
|
logger.info(f"Node {node_id} state resolved: {state!r}")
|
||||||
|
return state
|
||||||
|
|
||||||
|
|
||||||
async def _geocode_location(
|
async def _geocode_location(
|
||||||
location_str: str, node_lat: float, node_lon: float
|
location_str: str, node_lat: float, node_lon: float
|
||||||
) -> Optional[dict]:
|
) -> Optional[dict]:
|
||||||
|
|||||||
Reference in New Issue
Block a user