Include detected language and confidence in transcript events

Unpacks transcription info instead of discarding it. Adds language and
language_probability fields to transcript events, and includes them in
verbose log output.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 09:21:44 +00:00
parent bdb1aac885
commit 0afe761625

View File

@@ -195,7 +195,7 @@ def transcription_worker():
break break
samples, duration = item samples, duration = item
try: try:
segments, _ = model.transcribe( segments, info = model.transcribe(
samples, samples,
language=args.language, language=args.language,
task=args.task, task=args.task,
@@ -213,9 +213,11 @@ def transcription_worker():
'end': round(float(w.end), 4), 'end': round(float(w.end), 4),
'probability': round(float(w.probability), 4), 'probability': round(float(w.probability), 4),
}) })
log(f'transcript: {json.dumps(text.strip())} ({len(words)} words)') language = info.language
lang_prob = round(float(info.language_probability), 3)
log(f'transcript [{language} {lang_prob}]: {json.dumps(text.strip())} ({len(words)} words)')
if text.strip(): if text.strip():
emit({'event': 'transcript', 'text': text.strip(), 'words': words, 'duration': round(duration, 3)}) emit({'event': 'transcript', 'text': text.strip(), 'words': words, 'duration': round(duration, 3), 'language': language, 'language_probability': lang_prob})
except Exception: except Exception:
msg = traceback.format_exc() msg = traceback.format_exc()
log(f'transcription error:\n{msg}', error=True) log(f'transcription error:\n{msg}', error=True)