feat: incident correlation overhaul, signal-based auto-resolve, token fixes

Correlator - Raise fast-path idle gate 30 → 90 min (tg_fast_path_idle_minutes) - Fix disambiguate always-commits bug: run _call_fits_incident on winner before committing; fall through to new-incident creation if it fails - Add unit-continuity path (path 1.5): matches all_active by shared unit IDs with a reassignment guard, bridges calls past the idle gate - Add tag-based incident_type inference (_TAG_TYPE_HINTS) as GPT fallback, rescuing tagged calls that would have been dropped (616 observed orphans) - Add master/child incident model: _create_master_incident, _demote_to_child, _add_child_to_master; new incidents stamped incident_type="master" - Add cross-system parent detection (_find_cross_system_parent): two-signal scoring (road overlap=0.4, embedding≥0.78=0.3, proximity=0.3, threshold=0.5) wired into create-if-new path; creates master shell on first cross-system match - Add maybe_resolve_parent: auto-resolves master when all children close; called from upload pipeline (LLM closure) and summarizer stale sweep - Add signal-based auto-resolve via units_active/units_cleared tracking: GPT now extracts cleared_units per scene; _update_incident moves units between active/cleared lists and resolves the incident when active empties; stored on call doc for re-correlation sweep reuse - Add _create_incident initialization of units_active/units_cleared fields Re-correlation sweep - Add corr_sweep_count + MAX_SWEEP_ATTEMPTS=3: orphans get 3 attempts then are tombstoned as corr_path="unlinked", ending the re-sweep loop (previously hammering each orphan 29-31 times per shift) Intelligence extraction - Add cleared_units to GPT prompt schema and rules - Extract and propagate cleared_units per scene; merge across scenes; store on call doc for re-correlation sweep Token management - Fix token release bug: remove release_token call on discord_connected=False in MQTT checkin (transient Discord drops were orphaning bots mid-shift) - Add PUT /tokens/{id}/prefer/{system_id} endpoint: lock a bot token to a system; pass _none as system_id to clear; stored bidirectionally on both token and system documents - discord_join handler resolves preferred_token_id from system doc and passes system_name in MQTT payload
2026-05-10 19:49:05 -04:00
parent 7e1b01a275
commit 8b660d8e10
9 changed files with 509 additions and 23 deletions
@@ -53,12 +53,24 @@ async def send_command(node_id: str, cmd: CommandPayload):
    payload = cmd.model_dump(exclude_none=True)

    if cmd.action == "discord_join":
-        preferred = payload.pop("preferred_token_id", None)
+        # Resolve system doc once — used for preferred token and presence name.
+        system_doc = None
+        system_id = node.get("assigned_system_id")
+        if system_id:
+            system_doc = await fstore.doc_get_cached("systems", system_id)
+
+        # Explicit preferred_token_id in the request beats the system-level preference.
+        preferred = payload.pop("preferred_token_id", None) or (system_doc or {}).get("preferred_token_id")
        token = await assign_token(node_id, preferred_token_id=preferred)
        if not token:
            raise HTTPException(503, "No Discord bot tokens available in the pool.")
        payload["token"] = token

+        # Pass system name so the bot can set its Discord presence on join.
+        system_name = (system_doc or {}).get("name")
+        if system_name:
+            payload["system_name"] = system_name
+
    elif cmd.action == "discord_leave":
        await release_token(node_id)

@@ -60,6 +60,34 @@ async def flush_tokens():
    return {"released": len(results)}


+@router.put("/{token_id}/prefer/{system_id}", status_code=200)
+async def set_preferred_system(token_id: str, system_id: str):
+    """
+    Mark this token as the preferred bot for a system.
+    When a discord_join is issued for any node in that system, this token
+    is tried first before falling back to the general pool.
+    Pass system_id="_none" to clear the preference.
+    """
+    existing = await fstore.doc_get("bot_tokens", token_id)
+    if not existing:
+        raise HTTPException(404, "Token not found.")
+    if system_id == "_none":
+        # Clear any existing preference on the system that pointed to this token.
+        system_doc = await fstore.doc_get("systems", existing.get("preferred_for_system_id", ""))
+        if system_doc:
+            await fstore.doc_set("systems", existing["preferred_for_system_id"], {"preferred_token_id": None})
+        await fstore.doc_set("bot_tokens", token_id, {"preferred_for_system_id": None})
+        return {"ok": True, "preferred_for_system_id": None}
+
+    system_doc = await fstore.doc_get("systems", system_id)
+    if not system_doc:
+        raise HTTPException(404, "System not found.")
+    # Set preference on both sides for easy lookup in either direction.
+    await fstore.doc_set("systems", system_id, {"preferred_token_id": token_id})
+    await fstore.doc_set("bot_tokens", token_id, {"preferred_for_system_id": system_id})
+    return {"ok": True, "preferred_for_system_id": system_id}
+
+
@router.delete("/{token_id}", status_code=204)
 async def delete_token(token_id: str):
    existing = await fstore.doc_get("bot_tokens", token_id)
@@ -125,11 +125,13 @@ async def _run_extraction_pipeline(
            location_coords=scene["location_coords"],
            units=corr_units,
            vehicles=scene.get("vehicles"),
+            cleared_units=scene.get("cleared_units"),
        )
        if incident_id and incident_id not in incident_ids:
            incident_ids.append(incident_id)
        if scene["resolved"] and incident_id:
            await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
+            await incident_correlator.maybe_resolve_parent(incident_id)
            logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")

    if incident_ids:
@@ -224,11 +226,13 @@ async def _run_intelligence_pipeline(
                location_coords=scene["location_coords"],
                units=corr_units,
                vehicles=scene.get("vehicles"),
+                cleared_units=scene.get("cleared_units"),
            )
            if incident_id and incident_id not in incident_ids:
                incident_ids.append(incident_id)
            if scene["resolved"] and incident_id:
                await fstore.doc_set("incidents", incident_id, {"status": "resolved"})
+                await incident_correlator.maybe_resolve_parent(incident_id)
                logger.info(f"Auto-resolved incident {incident_id} (LLM closure detection)")

        # Correlator also runs for calls with no scenes (unclassified) to attempt