Start to learn vocab from talkgroups to improve accuracy of STT

2026-04-21 22:17:30 -04:00
parent 6612e4b683
commit 338b946ba3
11 changed files with 759 additions and 8 deletions
@@ -83,6 +83,13 @@ async def patch_transcript(
        "embedding": None,
    })

+    # Learn from the correction: diff original → corrected and add new tokens to vocabulary
+    system_id = call.get("system_id")
+    original_text = call.get("transcript_corrected") or call.get("transcript") or ""
+    if system_id and original_text:
+        from app.internal.vocabulary_learner import learn_from_correction
+        await learn_from_correction(system_id, original_text, body.transcript)
+
    from app.routers.upload import _run_extraction_pipeline
    background_tasks.add_task(
        _run_extraction_pipeline,