Start to learn vocab from talkgroups to improve accuracy of STT
This commit is contained in:
@@ -36,7 +36,7 @@ Rules:
|
||||
|
||||
System: {system_id}
|
||||
Talkgroup: {talkgroup_name}
|
||||
{transcript_block}"""
|
||||
{vocabulary_block}{transcript_block}"""
|
||||
|
||||
# Nominatim viewbox half-width in degrees (~11 km at mid-latitudes)
|
||||
_GEO_DELTA = 0.1
|
||||
@@ -76,8 +76,15 @@ async def extract_tags(
|
||||
Side-effect: updates calls/{call_id} in Firestore with tags, location,
|
||||
location_coords, vehicles, units, severity, transcript_corrected; also stores embedding.
|
||||
"""
|
||||
# Load per-system vocabulary for prompt injection
|
||||
vocabulary: list[str] = []
|
||||
if system_id:
|
||||
from app.internal.vocabulary_learner import get_vocabulary
|
||||
vocab_data = await get_vocabulary(system_id)
|
||||
vocabulary = vocab_data.get("vocabulary") or []
|
||||
|
||||
result = await asyncio.to_thread(
|
||||
_sync_extract, transcript, talkgroup_name, talkgroup_id, system_id, segments
|
||||
_sync_extract, transcript, talkgroup_name, talkgroup_id, system_id, segments, vocabulary
|
||||
)
|
||||
|
||||
tags: list[str] = result.get("tags") or []
|
||||
@@ -233,6 +240,7 @@ def _sync_extract(
|
||||
talkgroup_id: Optional[int],
|
||||
system_id: Optional[str],
|
||||
segments: Optional[list[dict]],
|
||||
vocabulary: Optional[list[str]] = None,
|
||||
) -> dict:
|
||||
"""Call GPT-4o mini and parse the JSON response."""
|
||||
from app.config import settings
|
||||
@@ -242,11 +250,13 @@ def _sync_extract(
|
||||
logger.warning("OPENAI_API_KEY not set — intelligence extraction disabled.")
|
||||
return {}
|
||||
|
||||
from app.internal.vocabulary_learner import build_gpt_vocab_block
|
||||
tg = f"{talkgroup_name} (TGID {talkgroup_id})" if talkgroup_id else (talkgroup_name or "unknown")
|
||||
prompt = _PROMPT_TEMPLATE.format(
|
||||
transcript_block=_build_transcript_block(transcript, segments),
|
||||
talkgroup_name=tg,
|
||||
system_id=system_id or "unknown",
|
||||
vocabulary_block=build_gpt_vocab_block(vocabulary or []),
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user