Fix over-correlation: geocoding precision, thin path ambiguity, skip_reason propagation

- Geocoding: reject GEOMETRIC_CENTER/APPROXIMATE results — vague location strings
  (regions, city centroids) were resolving to node-area coords and creating false
  proximity matches that merged unrelated incidents
- Thin path: on dispatch channels with multiple active incidents, skip attachment
  rather than guessing — "10-4" with 3 active incidents is genuinely ambiguous
- Short transcripts (≤5 words) now write skip_reason="transcript_too_short" to
  the call doc, matching garbage transcript behavior
- upload.py no-scenes fallback now checks skip_reason before running correlation —
  flagged calls (garbage, too short) no longer attach via thin path
- Update Server README to reflect current project purpose, goals, and pipeline
This commit is contained in:
Logan
2026-05-31 23:51:46 -04:00
parent f774be12b8
commit b77d2cce36
4 changed files with 111 additions and 152 deletions
+18 -2
View File
@@ -171,6 +171,10 @@ async def extract_scenes(
f"Intelligence: call {call_id} — transcript too short for extraction "
f"({len(transcript.split())} words), skipping"
)
try:
await fstore.doc_set("calls", call_id, {"skip_reason": "transcript_too_short"})
except Exception:
pass
return []
raw_scenes: list[dict] = await asyncio.to_thread(
@@ -382,7 +386,19 @@ async def _geocode_location(
data = r.json()
if data.get("status") != "OK" or not data.get("results"):
return None
loc = data["results"][0]["geometry"]["location"]
result = data["results"][0]
location_type = result.get("geometry", {}).get("location_type", "")
# Only accept address-level precision. GEOMETRIC_CENTER (city/neighborhood
# centroid) and APPROXIMATE (region boundary) produce coordinates that look
# valid but are too vague for 0.5km proximity matching — they often resolve
# to the same point as the node's position and create false proximity matches.
if location_type not in ("ROOFTOP", "RANGE_INTERPOLATED"):
logger.info(
f"Geocoding rejected '{location_str}' — imprecise result "
f"(location_type={location_type!r}), returning None"
)
return None
loc = result["geometry"]["location"]
lat, lng = float(loc["lat"]), float(loc["lng"])
dist_km = _geo_dist_km(node_lat, node_lon, lat, lng)
if dist_km > settings.geocode_max_km:
@@ -392,7 +408,7 @@ async def _geocode_location(
)
return None
coords = {"lat": lat, "lng": lng}
logger.info(f"Geocoded '{location_str}'{coords} ({dist_km:.1f}km from node)")
logger.info(f"Geocoded '{location_str}'{coords} ({dist_km:.1f}km from node) [{location_type}]")
return coords
except Exception as e:
logger.warning(f"Geocoding failed for '{location_str}': {e}")