Files
server-26/drb-c2-core/app/config.py
T
Logan 9842b18799 Fix correlation false-merge, switch STT to whisper-1 without vocab prompt
- correlator: unit_overlap on dispatch channels now applies content
  divergence check when the call has geocoded coords but the incident
  doesn't; previously this gap caused unrelated calls to merge into
  stale incidents (e.g. patrol officer at a second scene 70 min later)
- STT: switch default model from gpt-4o-transcribe to whisper-1, which
  faithfully transcribes all exchanges in multi-PTT recordings; gpt-4o
  was silently dropping utterances, starving the correlation engine
- STT: remove vocabulary from the Whisper prompt; whisper-1 echoes
  prompted terms into noise/silence, skewing extracted incident data;
  vocabulary context is now applied exclusively in the GPT extraction
  step (build_gpt_vocab_block) where it is used as reference only
2026-06-03 00:51:25 -04:00

62 lines
2.9 KiB
Python

from pydantic_settings import BaseSettings
from typing import Optional
class Settings(BaseSettings):
# MQTT
mqtt_broker: str = "localhost"
mqtt_port: int = 1883
mqtt_user: Optional[str] = None
mqtt_pass: Optional[str] = None
# GCP
gcp_credentials_path: Optional[str] = None # None → uses ADC
gcs_bucket: Optional[str] = None # None → audio upload disabled
firestore_database: str = "(default)"
# Node health
node_offline_threshold: int = 90 # seconds without checkin before marking offline
# OpenAI (STT + intelligence)
openai_api_key: Optional[str] = None
stt_model: str = "whisper-1" # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
# Google Maps (geocoding)
google_maps_api_key: Optional[str] = None
# Gemini (intelligence extraction, embeddings, incident summaries)
gemini_api_key: Optional[str] = None
# Correlation consensus models
# corr_cheap_model — first-pass LLM correlator (runs on every call)
# corr_smart_model — tiebreaker (only fires when rules and cheap LLM disagree)
corr_cheap_model: str = "gemini-2.0-flash"
corr_smart_model: str = "gemini-1.5-pro"
summary_interval_minutes: int = 2 # how often the summary loop runs
correlation_window_hours: int = 2 # slow/location path: max hours since last call
embedding_similarity_threshold: float = 0.93 # slow-path: requires location corroboration
embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar)
embedding_cross_tg_threshold: float = 0.85 # cross-TG path: same dept + 2+ shared units
location_proximity_km: float = 0.5 # radius for location-proximity matching
geocode_max_km: float = 40.0 # reject geocode results farther than this from the node
incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls
unit_continuity_max_idle_minutes: int = 20 # unit-continuity path: skip if incident idle > this
recorrelation_scan_minutes: int = 60 # re-examine orphaned calls ended within this window
tg_fast_path_idle_minutes: int = 90 # fast path: max minutes since incident last updated
tg_dispatch_thin_idle_minutes: int = 10 # dispatch channels only: thin calls only attach to incidents idle < this many minutes
# Vocabulary learning
vocabulary_induction_interval_hours: int = 24 # how often the induction loop runs
vocabulary_induction_sample_tokens: int = 4000 # ~tokens of transcript text sampled per system
# Internal service key — allows server-side services (discord bot) to call C2 without Firebase
service_key: Optional[str] = None
# CORS — comma-separated list of allowed origins, or "*" for all
cors_origins: list[str] = ["*"]
class Config:
env_file = ".env"
settings = Settings()