forked from efforting.tech/stt-server
Include detected language and confidence in transcript events
Unpacks transcription info instead of discarding it. Adds language and language_probability fields to transcript events, and includes them in verbose log output. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -195,7 +195,7 @@ def transcription_worker():
|
||||
break
|
||||
samples, duration = item
|
||||
try:
|
||||
segments, _ = model.transcribe(
|
||||
segments, info = model.transcribe(
|
||||
samples,
|
||||
language=args.language,
|
||||
task=args.task,
|
||||
@@ -213,9 +213,11 @@ def transcription_worker():
|
||||
'end': round(float(w.end), 4),
|
||||
'probability': round(float(w.probability), 4),
|
||||
})
|
||||
log(f'transcript: {json.dumps(text.strip())} ({len(words)} words)')
|
||||
language = info.language
|
||||
lang_prob = round(float(info.language_probability), 3)
|
||||
log(f'transcript [{language} {lang_prob}]: {json.dumps(text.strip())} ({len(words)} words)')
|
||||
if text.strip():
|
||||
emit({'event': 'transcript', 'text': text.strip(), 'words': words, 'duration': round(duration, 3)})
|
||||
emit({'event': 'transcript', 'text': text.strip(), 'words': words, 'duration': round(duration, 3), 'language': language, 'language_probability': lang_prob})
|
||||
except Exception:
|
||||
msg = traceback.format_exc()
|
||||
log(f'transcription error:\n{msg}', error=True)
|
||||
|
||||
Reference in New Issue
Block a user