9842b18799
- correlator: unit_overlap on dispatch channels now applies content divergence check when the call has geocoded coords but the incident doesn't; previously this gap caused unrelated calls to merge into stale incidents (e.g. patrol officer at a second scene 70 min later) - STT: switch default model from gpt-4o-transcribe to whisper-1, which faithfully transcribes all exchanges in multi-PTT recordings; gpt-4o was silently dropping utterances, starving the correlation engine - STT: remove vocabulary from the Whisper prompt; whisper-1 echoes prompted terms into noise/silence, skewing extracted incident data; vocabulary context is now applied exclusively in the GPT extraction step (build_gpt_vocab_block) where it is used as reference only
62 lines
2.9 KiB
Python
62 lines
2.9 KiB
Python
from pydantic_settings import BaseSettings
|
|
from typing import Optional
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
# MQTT
|
|
mqtt_broker: str = "localhost"
|
|
mqtt_port: int = 1883
|
|
mqtt_user: Optional[str] = None
|
|
mqtt_pass: Optional[str] = None
|
|
|
|
# GCP
|
|
gcp_credentials_path: Optional[str] = None # None → uses ADC
|
|
gcs_bucket: Optional[str] = None # None → audio upload disabled
|
|
firestore_database: str = "(default)"
|
|
|
|
# Node health
|
|
node_offline_threshold: int = 90 # seconds without checkin before marking offline
|
|
|
|
# OpenAI (STT + intelligence)
|
|
openai_api_key: Optional[str] = None
|
|
stt_model: str = "whisper-1" # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
|
|
|
# Google Maps (geocoding)
|
|
google_maps_api_key: Optional[str] = None
|
|
|
|
# Gemini (intelligence extraction, embeddings, incident summaries)
|
|
gemini_api_key: Optional[str] = None
|
|
# Correlation consensus models
|
|
# corr_cheap_model — first-pass LLM correlator (runs on every call)
|
|
# corr_smart_model — tiebreaker (only fires when rules and cheap LLM disagree)
|
|
corr_cheap_model: str = "gemini-2.0-flash"
|
|
corr_smart_model: str = "gemini-1.5-pro"
|
|
summary_interval_minutes: int = 2 # how often the summary loop runs
|
|
correlation_window_hours: int = 2 # slow/location path: max hours since last call
|
|
embedding_similarity_threshold: float = 0.93 # slow-path: requires location corroboration
|
|
embedding_no_location_threshold: float = 0.97 # slow-path: match without location (very high bar)
|
|
embedding_cross_tg_threshold: float = 0.85 # cross-TG path: same dept + 2+ shared units
|
|
location_proximity_km: float = 0.5 # radius for location-proximity matching
|
|
geocode_max_km: float = 40.0 # reject geocode results farther than this from the node
|
|
incident_auto_resolve_minutes: int = 90 # auto-resolve after N minutes with no new calls
|
|
unit_continuity_max_idle_minutes: int = 20 # unit-continuity path: skip if incident idle > this
|
|
recorrelation_scan_minutes: int = 60 # re-examine orphaned calls ended within this window
|
|
tg_fast_path_idle_minutes: int = 90 # fast path: max minutes since incident last updated
|
|
tg_dispatch_thin_idle_minutes: int = 10 # dispatch channels only: thin calls only attach to incidents idle < this many minutes
|
|
|
|
# Vocabulary learning
|
|
vocabulary_induction_interval_hours: int = 24 # how often the induction loop runs
|
|
vocabulary_induction_sample_tokens: int = 4000 # ~tokens of transcript text sampled per system
|
|
|
|
# Internal service key — allows server-side services (discord bot) to call C2 without Firebase
|
|
service_key: Optional[str] = None
|
|
|
|
# CORS — comma-separated list of allowed origins, or "*" for all
|
|
cors_origins: list[str] = ["*"]
|
|
|
|
class Config:
|
|
env_file = ".env"
|
|
|
|
|
|
settings = Settings()
|