Intel update
This commit is contained in:
@@ -28,38 +28,40 @@ async def transcribe_call(
|
||||
call_id: str,
|
||||
gcs_uri: str,
|
||||
talkgroup_name: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
) -> tuple[Optional[str], list[dict]]:
|
||||
"""
|
||||
Transcribe audio at the given GCS URI and store the result in Firestore.
|
||||
|
||||
Args:
|
||||
call_id: Firestore document ID in the 'calls' collection.
|
||||
gcs_uri: GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3
|
||||
talkgroup_name: Passed through to the intelligence layer; unused here.
|
||||
|
||||
Returns:
|
||||
The raw Whisper transcript string, or None if transcription failed.
|
||||
(transcript, segments) — segments is a list of {start, end, text} dicts,
|
||||
one per detected transmission. Empty list if transcription failed.
|
||||
"""
|
||||
if not gcs_uri or not gcs_uri.startswith("gs://"):
|
||||
return None
|
||||
return None, []
|
||||
|
||||
try:
|
||||
transcript = await asyncio.to_thread(_sync_transcribe, gcs_uri)
|
||||
transcript, segments = await asyncio.to_thread(_sync_transcribe, gcs_uri)
|
||||
except Exception as e:
|
||||
logger.warning(f"Transcription failed for call {call_id}: {e}")
|
||||
return None
|
||||
return None, []
|
||||
|
||||
if transcript:
|
||||
updates: dict = {"transcript": transcript}
|
||||
if segments:
|
||||
updates["segments"] = segments
|
||||
try:
|
||||
await fstore.doc_set("calls", call_id, {"transcript": transcript})
|
||||
logger.info(f"Transcript saved for call {call_id} ({len(transcript)} chars)")
|
||||
await fstore.doc_set("calls", call_id, updates)
|
||||
logger.info(
|
||||
f"Transcript saved for call {call_id} "
|
||||
f"({len(transcript)} chars, {len(segments)} segment(s))"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not save transcript for {call_id}: {e}")
|
||||
|
||||
return transcript
|
||||
return transcript, segments
|
||||
|
||||
|
||||
def _sync_transcribe(gcs_uri: str) -> Optional[str]:
|
||||
def _sync_transcribe(gcs_uri: str) -> tuple[Optional[str], list[dict]]:
|
||||
"""Download audio from GCS and transcribe with OpenAI Whisper."""
|
||||
from google.cloud import storage as gcs
|
||||
from google.oauth2 import service_account
|
||||
@@ -99,8 +101,15 @@ def _sync_transcribe(gcs_uri: str) -> Optional[str]:
|
||||
file=f,
|
||||
language="en",
|
||||
prompt=_WHISPER_PROMPT,
|
||||
response_format="verbose_json",
|
||||
)
|
||||
return response.text.strip() or None
|
||||
text = response.text.strip() or None
|
||||
segments = [
|
||||
{"start": round(s.start, 2), "end": round(s.end, 2), "text": s.text.strip()}
|
||||
for s in (response.segments or [])
|
||||
if s.text.strip()
|
||||
]
|
||||
return text, segments
|
||||
finally:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
Reference in New Issue
Block a user