Files
server-26/drb-c2-core/app/internal/mqtt_handler.py
T
Logan 7e1b01a275 Updates to reduce firestore calls to try and stay in free tier
### Firestore read reductions

**1. `doc_get_cached()` in `firestore.py` — new 5-min TTL cache**
One place, benefits everything. System and node config documents almost never change during a monitoring session.

**2. System doc: 4 reads → 1 per call**
| Before | After |
|---|---|
| `upload.py` — `doc_get("systems")` for ai_flags | `doc_get_cached` |
| `transcription.py` — `get_vocabulary()` → `doc_get("systems")` | cache hit |
| `intelligence.py` — `get_vocabulary()` → `doc_get("systems")` | cache hit |
| `intelligence.py` — `doc_get("systems")` again for ten_codes | eliminated (reads same cached doc) |

**3. Node doc: cached in `_on_call_start` and `intelligence.py`**
The node is read every call event to get `assigned_system_id` and lat/lon for geocoding. Both now use the cache — node assignments and positions essentially never change at runtime.

**4. Node sweeper: 30s → 90s interval**
The sweeper was doing a full node collection scan 3× more often than necessary — the offline threshold is already 90s. Cuts sweeper reads by 66%.

**5. Vocabulary induction: scans all-time calls → last 7 days**
Previously fetched every ended call for a system (could be thousands). Now scoped to the last 7 days.

> **Note:** The vocabulary induction query `(system_id == X, ended_at >= cutoff)` needs a Firestore
> composite index on `(system_id ASC, ended_at ASC)`. When the induction loop first fires it will log
> an error with a Firebase Console link to create it in one click.
2026-05-04 02:05:00 -04:00

288 lines
11 KiB
Python

import asyncio
import json
from datetime import datetime, timezone
from typing import Optional
import paho.mqtt.client as mqtt
from app.config import settings
from app.internal.logger import logger
from app.internal import firestore as fstore
class MQTTHandler:
def __init__(self):
self._client: Optional[mqtt.Client] = None
self._loop: Optional[asyncio.AbstractEventLoop] = None
self._connected = False
def _build_client(self) -> mqtt.Client:
client = mqtt.Client(
callback_api_version=mqtt.CallbackAPIVersion.VERSION2,
client_id="drb-c2-core",
)
if settings.mqtt_user:
client.username_pw_set(settings.mqtt_user, settings.mqtt_pass)
client.on_connect = self._on_connect
client.on_disconnect = self._on_disconnect
client.on_message = self._on_message
return client
def _on_connect(self, client, userdata, flags, reason_code, properties):
if reason_code == 0:
self._connected = True
client.subscribe("nodes/+/checkin", qos=1)
client.subscribe("nodes/+/status", qos=1)
client.subscribe("nodes/+/metadata", qos=1)
client.subscribe("nodes/+/key_request", qos=1)
logger.info("MQTT connected — subscribed to node topics.")
else:
logger.error(f"MQTT connect refused: {reason_code}")
def _on_disconnect(self, client, userdata, disconnect_flags, reason_code, properties):
self._connected = False
logger.warning(f"MQTT disconnected: {reason_code}")
def _on_message(self, client, userdata, msg):
try:
payload = json.loads(msg.payload.decode())
except Exception:
logger.warning(f"Non-JSON MQTT message on {msg.topic}")
return
asyncio.run_coroutine_threadsafe(
self._dispatch(msg.topic, payload), self._loop
)
async def _dispatch(self, topic: str, payload: dict):
parts = topic.split("/")
# Expected: nodes/{node_id}/{type}
if len(parts) != 3 or parts[0] != "nodes":
return
node_id = parts[1]
msg_type = parts[2]
try:
if msg_type == "checkin":
await self._handle_checkin(node_id, payload)
elif msg_type == "status":
await self._handle_status(node_id, payload)
elif msg_type == "metadata":
await self._handle_metadata(node_id, payload)
elif msg_type == "key_request":
await self._handle_key_request(node_id)
except Exception as e:
logger.error(f"MQTT dispatch error [{msg_type}] from {node_id}: {e}")
# ------------------------------------------------------------------
# Checkin — upsert node; flag new unconfigured nodes
# ------------------------------------------------------------------
async def _handle_checkin(self, node_id: str, payload: dict):
existing = await fstore.doc_get("nodes", node_id)
now = datetime.now(timezone.utc)
if not existing:
# First time we've seen this node — create it as unconfigured, pending approval
doc = {
"node_id": node_id,
"name": payload.get("name", node_id),
"lat": payload.get("lat", 0.0),
"lon": payload.get("lon", 0.0),
"status": "unconfigured",
"configured": False,
"last_seen": now.isoformat(),
"assigned_system_id": None,
"approval_status": "pending",
}
await fstore.doc_set("nodes", node_id, doc, merge=False)
logger.info(f"New node registered: {node_id} — pending admin approval.")
else:
updates = {
"last_seen": now.isoformat(),
"name": payload.get("name", existing.get("name", node_id)),
"lat": payload.get("lat", existing.get("lat", 0.0)),
"lon": payload.get("lon", existing.get("lon", 0.0)),
}
# Only promote to online if already configured (don't overwrite explicit status)
if existing.get("configured") and existing.get("status") not in ("recording",):
updates["status"] = "online"
await fstore.doc_update("nodes", node_id, updates)
# Release any orphaned Discord token when the node explicitly reports disconnected
if payload.get("discord_connected") is False:
from app.routers.tokens import release_token
await release_token(node_id)
# ------------------------------------------------------------------
# Status update
# ------------------------------------------------------------------
async def _handle_status(self, node_id: str, payload: dict):
status = payload.get("status")
if not status:
return
await fstore.doc_update("nodes", node_id, {
"status": status,
"last_seen": datetime.now(timezone.utc).isoformat(),
})
# ------------------------------------------------------------------
# Metadata — call_start / call_end events
# ------------------------------------------------------------------
async def _handle_metadata(self, node_id: str, payload: dict):
event = payload.get("event")
if event == "call_start":
await self._on_call_start(node_id, payload)
elif event == "call_end":
await self._on_call_end(node_id, payload)
async def _on_call_start(self, node_id: str, payload: dict):
call_id = payload.get("call_id")
if not call_id:
return
# Look up assigned system for this node (cached — assignment rarely changes)
node = await fstore.doc_get_cached("nodes", node_id)
system_id = node.get("assigned_system_id") if node else None
started_at_raw = payload.get("started_at")
started_at = (
datetime.fromisoformat(started_at_raw)
if started_at_raw
else datetime.now(timezone.utc)
)
# Prefer the name from OP25 metadata; fall back to the system config
tgid_name = payload.get("tgid_name") or ""
if not tgid_name and system_id and payload.get("tgid"):
system_doc = await fstore.doc_get_cached("systems", system_id)
if system_doc:
tgid_int = int(payload["tgid"])
for tg in system_doc.get("config", {}).get("talkgroups", []):
if int(tg.get("id", -1)) == tgid_int:
tgid_name = tg.get("name", "")
break
doc = {
"call_id": call_id,
"node_id": node_id,
"system_id": system_id,
"talkgroup_id": payload.get("tgid"),
"talkgroup_name": tgid_name,
"freq": payload.get("freq"),
"srcaddr": payload.get("srcaddr"),
"started_at": started_at,
"ended_at": None,
"audio_url": None,
"transcript": None,
"incident_id": None,
"location": None,
"tags": [],
"status": "active",
}
await fstore.doc_set("calls", call_id, doc, merge=False)
logger.info(f"Call start: {call_id} (node={node_id}, tgid={payload.get('tgid')})")
async def _on_call_end(self, node_id: str, payload: dict):
call_id = payload.get("call_id")
if not call_id:
return
ended_at_raw = payload.get("ended_at")
ended_at = (
datetime.fromisoformat(ended_at_raw)
if ended_at_raw
else datetime.now(timezone.utc)
)
updates = {
"ended_at": ended_at,
"status": "ended",
}
if payload.get("audio_url"):
updates["audio_url"] = payload["audio_url"]
await fstore.doc_set("calls", call_id, updates)
logger.info(f"Call end: {call_id}")
# ------------------------------------------------------------------
# Key request — re-deliver an existing approved key to a node that
# lost its credentials (e.g. after a directory move / fresh volume)
# ------------------------------------------------------------------
async def _handle_key_request(self, node_id: str):
key_doc = await fstore.doc_get("node_keys", node_id)
if not key_doc or not key_doc.get("api_key"):
logger.warning(f"Key request from {node_id} but no key found in Firestore — node may not be approved yet.")
return
self.publish_node_key(node_id, key_doc["api_key"])
logger.info(f"Re-delivered API key to {node_id} on request.")
# ------------------------------------------------------------------
# Outbound — send a command to a specific node
# ------------------------------------------------------------------
def send_command(self, node_id: str, payload: dict) -> bool:
topic = f"nodes/{node_id}/commands"
if self._client and self._connected:
self._client.publish(topic, json.dumps(payload), qos=1)
logger.info(f"Command sent to {node_id}: {payload.get('action')}")
return True
logger.warning(f"MQTT not connected — could not send command to {node_id}")
return False
def push_config(self, node_id: str, system_config: dict):
topic = f"nodes/{node_id}/config"
if self._client and self._connected:
self._client.publish(topic, json.dumps(system_config), qos=1)
logger.info(f"Config pushed to {node_id}")
else:
logger.warning(f"MQTT not connected — could not push config to {node_id}")
def publish_node_key(self, node_id: str, api_key: str):
"""Publish the provisioned API key to the node (retained so it survives reconnects)."""
topic = f"nodes/{node_id}/api_key"
if self._client and self._connected:
self._client.publish(topic, json.dumps({"api_key": api_key}), qos=2, retain=True)
logger.info(f"API key provisioned to {node_id}")
else:
logger.warning(f"MQTT not connected — could not provision key to {node_id}")
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
async def connect(self):
self._loop = asyncio.get_event_loop()
self._client = self._build_client()
# Start the paho network loop first so it drives reconnects automatically,
# then keep attempting the initial TCP connect until it succeeds.
self._client.loop_start()
asyncio.create_task(self._connect_with_retry())
async def _connect_with_retry(self):
delay = 5
logger.info(f"MQTT connecting to {settings.mqtt_broker}:{settings.mqtt_port}")
while True:
try:
self._client.connect(settings.mqtt_broker, settings.mqtt_port, keepalive=60)
return # paho loop_start + reconnect_delay_set handles the rest
except Exception as e:
logger.warning(f"MQTT connect failed ({e}) — retrying in {delay}s")
await asyncio.sleep(delay)
delay = min(delay * 2, 60)
async def disconnect(self):
if self._client:
self._client.loop_stop()
self._client.disconnect()
@property
def is_connected(self) -> bool:
return self._connected
mqtt_handler = MQTTHandler()