Files
server-26/drb-c2-core/app/routers/nodes.py
T
Logan 8b660d8e10 feat: incident correlation overhaul, signal-based auto-resolve, token fixes
Correlator
- Raise fast-path idle gate 30 → 90 min (tg_fast_path_idle_minutes)
- Fix disambiguate always-commits bug: run _call_fits_incident on winner
  before committing; fall through to new-incident creation if it fails
- Add unit-continuity path (path 1.5): matches all_active by shared unit
  IDs with a reassignment guard, bridges calls past the idle gate
- Add tag-based incident_type inference (_TAG_TYPE_HINTS) as GPT fallback,
  rescuing tagged calls that would have been dropped (616 observed orphans)
- Add master/child incident model: _create_master_incident, _demote_to_child,
  _add_child_to_master; new incidents stamped incident_type="master"
- Add cross-system parent detection (_find_cross_system_parent): two-signal
  scoring (road overlap=0.4, embedding≥0.78=0.3, proximity=0.3, threshold=0.5)
  wired into create-if-new path; creates master shell on first cross-system match
- Add maybe_resolve_parent: auto-resolves master when all children close;
  called from upload pipeline (LLM closure) and summarizer stale sweep
- Add signal-based auto-resolve via units_active/units_cleared tracking:
  GPT now extracts cleared_units per scene; _update_incident moves units
  between active/cleared lists and resolves the incident when active empties;
  stored on call doc for re-correlation sweep reuse
- Add _create_incident initialization of units_active/units_cleared fields

Re-correlation sweep
- Add corr_sweep_count + MAX_SWEEP_ATTEMPTS=3: orphans get 3 attempts
  then are tombstoned as corr_path="unlinked", ending the re-sweep loop
  (previously hammering each orphan 29-31 times per shift)

Intelligence extraction
- Add cleared_units to GPT prompt schema and rules
- Extract and propagate cleared_units per scene; merge across scenes;
  store on call doc for re-correlation sweep

Token management
- Fix token release bug: remove release_token call on discord_connected=False
  in MQTT checkin (transient Discord drops were orphaning bots mid-shift)
- Add PUT /tokens/{id}/prefer/{system_id} endpoint: lock a bot token to a
  system; pass _none as system_id to clear; stored bidirectionally on both
  token and system documents
- discord_join handler resolves preferred_token_id from system doc and passes
  system_name in MQTT payload
2026-05-10 19:49:05 -04:00

119 lines
4.3 KiB
Python

import secrets
from fastapi import APIRouter, HTTPException, Depends
from app.models import CommandPayload
from app.internal import firestore as fstore
from app.internal.mqtt_handler import mqtt_handler
from app.internal.auth import require_admin_token
from app.routers.tokens import assign_token, release_token
router = APIRouter(prefix="/nodes", tags=["nodes"])
@router.get("")
async def list_nodes():
return await fstore.collection_list("nodes")
@router.get("/{node_id}")
async def get_node(node_id: str):
node = await fstore.doc_get("nodes", node_id)
if not node:
raise HTTPException(404, f"Node '{node_id}' not found.")
return node
@router.post("/{node_id}/approve")
async def approve_node(node_id: str, _: dict = Depends(require_admin_token)):
node = await fstore.doc_get("nodes", node_id)
if not node:
raise HTTPException(404, f"Node '{node_id}' not found.")
api_key = secrets.token_hex(32)
await fstore.doc_set("node_keys", node_id, {"node_id": node_id, "api_key": api_key}, merge=False)
await fstore.doc_update("nodes", node_id, {"approval_status": "approved"})
mqtt_handler.publish_node_key(node_id, api_key)
return {"ok": True}
@router.post("/{node_id}/reject")
async def reject_node(node_id: str, _: dict = Depends(require_admin_token)):
node = await fstore.doc_get("nodes", node_id)
if not node:
raise HTTPException(404, f"Node '{node_id}' not found.")
await fstore.doc_update("nodes", node_id, {"approval_status": "rejected"})
return {"ok": True}
@router.post("/{node_id}/command")
async def send_command(node_id: str, cmd: CommandPayload):
node = await fstore.doc_get("nodes", node_id)
if not node:
raise HTTPException(404, f"Node '{node_id}' not found.")
payload = cmd.model_dump(exclude_none=True)
if cmd.action == "discord_join":
# Resolve system doc once — used for preferred token and presence name.
system_doc = None
system_id = node.get("assigned_system_id")
if system_id:
system_doc = await fstore.doc_get_cached("systems", system_id)
# Explicit preferred_token_id in the request beats the system-level preference.
preferred = payload.pop("preferred_token_id", None) or (system_doc or {}).get("preferred_token_id")
token = await assign_token(node_id, preferred_token_id=preferred)
if not token:
raise HTTPException(503, "No Discord bot tokens available in the pool.")
payload["token"] = token
# Pass system name so the bot can set its Discord presence on join.
system_name = (system_doc or {}).get("name")
if system_name:
payload["system_name"] = system_name
elif cmd.action == "discord_leave":
await release_token(node_id)
if not mqtt_handler.send_command(node_id, payload):
raise HTTPException(503, "MQTT broker unavailable — command not delivered.")
return {"ok": True}
@router.post("/{node_id}/reissue-key")
async def reissue_node_key(node_id: str, _: dict = Depends(require_admin_token)):
"""Generate a new API key for the node and push it via MQTT (retained).
Use this to rotate a key or recover a node whose key was lost."""
node = await fstore.doc_get("nodes", node_id)
if not node:
raise HTTPException(404, f"Node '{node_id}' not found.")
api_key = secrets.token_hex(32)
await fstore.doc_set("node_keys", node_id, {"node_id": node_id, "api_key": api_key}, merge=False)
mqtt_handler.publish_node_key(node_id, api_key)
return {"ok": True}
@router.post("/{node_id}/config/{system_id}")
async def assign_system(node_id: str, system_id: str):
"""
Assign a system to a node. Fetches the system config from Firestore
and pushes it to the node via MQTT, then marks the node as configured.
"""
node = await fstore.doc_get("nodes", node_id)
if not node:
raise HTTPException(404, f"Node '{node_id}' not found.")
system = await fstore.doc_get("systems", system_id)
if not system:
raise HTTPException(404, f"System '{system_id}' not found.")
# Push config to the node via MQTT
mqtt_handler.push_config(node_id, system)
# Update Firestore
await fstore.doc_update("nodes", node_id, {
"assigned_system_id": system_id,
"configured": True,
})
return {"ok": True}