Correlation updates
This commit is contained in:
@@ -121,12 +121,26 @@ def _sync_transcribe(
|
||||
language="en",
|
||||
prompt=prompt,
|
||||
response_format="verbose_json",
|
||||
temperature=0,
|
||||
)
|
||||
text = response.text.strip() or None
|
||||
|
||||
# Filter hallucinated segments. Two sources of hallucination in P25 recordings:
|
||||
#
|
||||
# 1. Trailing silence / static — Whisper fills silence past real content with
|
||||
# sequential radio codes (10-4, 10-5...). Clamped by audio duration.
|
||||
#
|
||||
# 2. Leading silence — OP25 recordings typically have a short silence at the
|
||||
# start before the first PTT press. Whisper sometimes hallucinates filler
|
||||
# words or codes over this silence. Detected via no_speech_prob > 0.8
|
||||
# (Whisper's own confidence that a segment contains no real speech).
|
||||
audio_duration: float = getattr(response, "duration", None) or float("inf")
|
||||
segments = [
|
||||
{"start": round(s.start, 2), "end": round(s.end, 2), "text": s.text.strip()}
|
||||
for s in (response.segments or [])
|
||||
if s.text.strip()
|
||||
and s.start < audio_duration
|
||||
and getattr(s, "no_speech_prob", 0.0) < 0.8
|
||||
]
|
||||
return text, segments
|
||||
finally:
|
||||
|
||||
Reference in New Issue
Block a user