Start to learn vocab from talkgroups to improve accuracy of STT

This commit is contained in:
Logan
2026-04-21 22:17:30 -04:00
parent 6612e4b683
commit 338b946ba3
11 changed files with 759 additions and 8 deletions
+20 -3
View File
@@ -28,6 +28,7 @@ async def transcribe_call(
call_id: str,
gcs_uri: str,
talkgroup_name: Optional[str] = None,
system_id: Optional[str] = None,
) -> tuple[Optional[str], list[dict]]:
"""
Transcribe audio at the given GCS URI and store the result in Firestore.
@@ -39,8 +40,17 @@ async def transcribe_call(
if not gcs_uri or not gcs_uri.startswith("gs://"):
return None, []
# Load vocabulary for this system (empty list if none yet)
vocabulary: list[str] = []
if system_id:
from app.internal.vocabulary_learner import get_vocabulary
vocab_data = await get_vocabulary(system_id)
vocabulary = vocab_data.get("vocabulary") or []
try:
transcript, segments = await asyncio.to_thread(_sync_transcribe, gcs_uri, talkgroup_name)
transcript, segments = await asyncio.to_thread(
_sync_transcribe, gcs_uri, talkgroup_name, vocabulary
)
except Exception as e:
logger.warning(f"Transcription failed for call {call_id}: {e}")
return None, []
@@ -61,7 +71,11 @@ async def transcribe_call(
return transcript, segments
def _sync_transcribe(gcs_uri: str, talkgroup_name: Optional[str] = None) -> tuple[Optional[str], list[dict]]:
def _sync_transcribe(
gcs_uri: str,
talkgroup_name: Optional[str] = None,
vocabulary: Optional[list[str]] = None,
) -> tuple[Optional[str], list[dict]]:
"""Download audio from GCS and transcribe with OpenAI Whisper."""
from google.cloud import storage as gcs
from google.oauth2 import service_account
@@ -94,7 +108,10 @@ def _sync_transcribe(gcs_uri: str, talkgroup_name: Optional[str] = None) -> tupl
try:
blob.download_to_filename(tmp_path)
prompt = (f"Talkgroup: {talkgroup_name}. " + _WHISPER_PROMPT) if talkgroup_name else _WHISPER_PROMPT
from app.internal.vocabulary_learner import build_whisper_vocab_prompt
vocab_prefix = build_whisper_vocab_prompt(vocabulary or [])
tg_prefix = f"Talkgroup: {talkgroup_name}. " if talkgroup_name else ""
prompt = tg_prefix + vocab_prefix + _WHISPER_PROMPT
openai_client = OpenAI(api_key=settings.openai_api_key)
with open(tmp_path, "rb") as f: