Fix over-correlation: geocoding precision, thin path ambiguity, skip_reason propagation
- Geocoding: reject GEOMETRIC_CENTER/APPROXIMATE results — vague location strings (regions, city centroids) were resolving to node-area coords and creating false proximity matches that merged unrelated incidents - Thin path: on dispatch channels with multiple active incidents, skip attachment rather than guessing — "10-4" with 3 active incidents is genuinely ambiguous - Short transcripts (≤5 words) now write skip_reason="transcript_too_short" to the call doc, matching garbage transcript behavior - upload.py no-scenes fallback now checks skip_reason before running correlation — flagged calls (garbage, too short) no longer attach via thin path - Update Server README to reflect current project purpose, goals, and pipeline
This commit is contained in:
@@ -272,6 +272,15 @@ async def correlate_call(
|
||||
inc for inc in tg_recent
|
||||
if _incident_idle_minutes(inc, now) <= settings.tg_dispatch_thin_idle_minutes
|
||||
]
|
||||
# A shared dispatch channel may have multiple concurrent incidents.
|
||||
# If more than one is active in the thin window, we cannot know which
|
||||
# incident this "10-4" or "Copy" belongs to — skip rather than guess.
|
||||
if len(thin_pool) > 1:
|
||||
logger.info(
|
||||
f"Correlator fast-path thin: {len(thin_pool)} active incidents on "
|
||||
f"dispatch channel — ambiguous, skipping thin call {call_id}"
|
||||
)
|
||||
thin_pool = []
|
||||
else:
|
||||
thin_pool = tg_recent
|
||||
|
||||
|
||||
@@ -171,6 +171,10 @@ async def extract_scenes(
|
||||
f"Intelligence: call {call_id} — transcript too short for extraction "
|
||||
f"({len(transcript.split())} words), skipping"
|
||||
)
|
||||
try:
|
||||
await fstore.doc_set("calls", call_id, {"skip_reason": "transcript_too_short"})
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
raw_scenes: list[dict] = await asyncio.to_thread(
|
||||
@@ -382,7 +386,19 @@ async def _geocode_location(
|
||||
data = r.json()
|
||||
if data.get("status") != "OK" or not data.get("results"):
|
||||
return None
|
||||
loc = data["results"][0]["geometry"]["location"]
|
||||
result = data["results"][0]
|
||||
location_type = result.get("geometry", {}).get("location_type", "")
|
||||
# Only accept address-level precision. GEOMETRIC_CENTER (city/neighborhood
|
||||
# centroid) and APPROXIMATE (region boundary) produce coordinates that look
|
||||
# valid but are too vague for 0.5km proximity matching — they often resolve
|
||||
# to the same point as the node's position and create false proximity matches.
|
||||
if location_type not in ("ROOFTOP", "RANGE_INTERPOLATED"):
|
||||
logger.info(
|
||||
f"Geocoding rejected '{location_str}' — imprecise result "
|
||||
f"(location_type={location_type!r}), returning None"
|
||||
)
|
||||
return None
|
||||
loc = result["geometry"]["location"]
|
||||
lat, lng = float(loc["lat"]), float(loc["lng"])
|
||||
dist_km = _geo_dist_km(node_lat, node_lon, lat, lng)
|
||||
if dist_km > settings.geocode_max_km:
|
||||
@@ -392,7 +408,7 @@ async def _geocode_location(
|
||||
)
|
||||
return None
|
||||
coords = {"lat": lat, "lng": lng}
|
||||
logger.info(f"Geocoded '{location_str}' → {coords} ({dist_km:.1f}km from node)")
|
||||
logger.info(f"Geocoded '{location_str}' → {coords} ({dist_km:.1f}km from node) [{location_type}]")
|
||||
return coords
|
||||
except Exception as e:
|
||||
logger.warning(f"Geocoding failed for '{location_str}': {e}")
|
||||
|
||||
@@ -241,20 +241,24 @@ async def _run_intelligence_pipeline(
|
||||
|
||||
# Correlator also runs for calls with no scenes (unclassified) to attempt
|
||||
# talkgroup-based linking even when no transcript could be produced.
|
||||
# Skip when extraction flagged the call — garbage or too-short transcripts
|
||||
# carry no signal and would only attach spuriously via the thin path.
|
||||
if not scenes:
|
||||
incident_id = await incident_correlator.correlate_call(
|
||||
call_id=call_id,
|
||||
node_id=node_id,
|
||||
system_id=system_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=[],
|
||||
incident_type=None,
|
||||
location=None,
|
||||
location_coords=None,
|
||||
)
|
||||
if incident_id:
|
||||
incident_ids.append(incident_id)
|
||||
_call_doc = await fstore.doc_get("calls", call_id)
|
||||
if not (_call_doc or {}).get("skip_reason"):
|
||||
incident_id = await incident_correlator.correlate_call(
|
||||
call_id=call_id,
|
||||
node_id=node_id,
|
||||
system_id=system_id,
|
||||
talkgroup_id=talkgroup_id,
|
||||
talkgroup_name=talkgroup_name,
|
||||
tags=[],
|
||||
incident_type=None,
|
||||
location=None,
|
||||
location_coords=None,
|
||||
)
|
||||
if incident_id:
|
||||
incident_ids.append(incident_id)
|
||||
|
||||
if incident_ids:
|
||||
await fstore.doc_set("calls", call_id, {"incident_ids": incident_ids})
|
||||
|
||||
Reference in New Issue
Block a user