Massive update

2026-04-11 13:44:08 -04:00
parent fd6c2fd8bf
commit 3b3a136d04
31 changed files with 1919 additions and 94 deletions
@@ -0,0 +1,70 @@
+"""
+Speech-to-text transcription for recorded calls.
+
+Uses Google Cloud Speech-to-Text v1 (authenticated via the same ADC / service
+account used by firebase-admin and google-cloud-storage).
+
+Triggered as a background task from the upload endpoint after a call audio
+file has been successfully stored in GCS.
+"""
+import asyncio
+from typing import Optional
+from app.internal.logger import logger
+from app.internal import firestore as fstore
+
+
+async def transcribe_call(call_id: str, gcs_uri: str) -> Optional[str]:
+    """
+    Transcribe audio at the given GCS URI and store the result in Firestore.
+
+    Args:
+        call_id: Firestore document ID in the 'calls' collection.
+        gcs_uri:  GCS URI of the audio file, e.g. gs://bucket/calls/xyz.mp3
+
+    Returns:
+        The transcript string, or None if transcription failed / was skipped.
+    """
+    if not gcs_uri or not gcs_uri.startswith("gs://"):
+        return None
+
+    try:
+        transcript = await asyncio.to_thread(_sync_transcribe, gcs_uri)
+    except Exception as e:
+        logger.warning(f"Transcription failed for call {call_id}: {e}")
+        return None
+
+    if transcript:
+        try:
+            await fstore.doc_update("calls", call_id, {"transcript": transcript})
+            logger.info(f"Transcript saved for call {call_id} ({len(transcript)} chars)")
+        except Exception as e:
+            logger.warning(f"Could not save transcript for {call_id}: {e}")
+
+    return transcript
+
+
+def _sync_transcribe(gcs_uri: str) -> Optional[str]:
+    """Synchronous STT call — run in a thread via asyncio.to_thread."""
+    from google.cloud import speech
+
+    client = speech.SpeechClient()
+
+    audio = speech.RecognitionAudio(uri=gcs_uri)
+    config = speech.RecognitionConfig(
+        encoding=speech.RecognitionConfig.AudioEncoding.MP3,
+        sample_rate_hertz=22050,
+        language_code="en-US",
+        enable_automatic_punctuation=True,
+        model="latest_long",
+    )
+
+    # Use long_running_recognize for reliability; it handles both short and long audio
+    operation = client.long_running_recognize(config=config, audio=audio)
+    response = operation.result(timeout=120)
+
+    parts = [
+        result.alternatives[0].transcript
+        for result in response.results
+        if result.alternatives
+    ]
+    return " ".join(parts).strip() or None