Add README, faster-whisper backend, and session fixes
- README explaining experimental/transparency purpose - faster-whisper STT backend (fw-stt.mjs, faster-whisper-server.py, install-faster-whisper.sh) - Bug fixes: Buffer alignment in on_audio, --debug-waveform URL parsing, silent fetch errors, instant dispatch timer leak - Global uncaughtException/unhandledRejection handlers in query-demo.mjs - Design docs: CHANGELOG, COMMAND-DISPATCH, INTERFACE-THEORY, VOICE-POLICY - Systemd service unit templates Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
81
faster-whisper-server.py
Executable file
81
faster-whisper-server.py
Executable file
@@ -0,0 +1,81 @@
|
||||
"""
|
||||
faster-whisper transcription server.
|
||||
|
||||
Protocol (stdin/stdout, one JSON line per exchange):
|
||||
Request: {"audio_b64": "<base64 float32 LE>", "sample_rate": 16000}
|
||||
Response: {"text": "...", "words": [{"word": "...", "start": 0.0, "end": 0.1, "probability": 0.99}]}
|
||||
|
||||
Startup: writes "ready\n" to stdout when the model is loaded.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import base64
|
||||
import argparse
|
||||
import traceback
|
||||
import numpy as np
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model', default='base.en')
|
||||
parser.add_argument('--device', default='cuda')
|
||||
parser.add_argument('--compute-type', default='int8')
|
||||
args = parser.parse_args()
|
||||
|
||||
sys.stderr.write(f'[fw-server] loading {args.model} ({args.device}, {args.compute_type})...\n')
|
||||
sys.stderr.flush()
|
||||
|
||||
try:
|
||||
from faster_whisper import WhisperModel
|
||||
except ImportError:
|
||||
sys.stderr.write('[fw-server] faster-whisper not installed — run: pip install faster-whisper\n')
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type)
|
||||
sys.stderr.write(f'[fw-server] ready on {args.device}\n')
|
||||
except Exception as e:
|
||||
sys.stderr.write(f'[fw-server] {args.device} failed ({e}), falling back to cpu\n')
|
||||
model = WhisperModel(args.model, device='cpu', compute_type='int8')
|
||||
sys.stderr.write('[fw-server] ready on cpu\n')
|
||||
|
||||
sys.stdout.write('ready\n')
|
||||
sys.stdout.flush()
|
||||
|
||||
for raw_line in sys.stdin:
|
||||
line = raw_line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
req = json.loads(line)
|
||||
audio = np.frombuffer(base64.b64decode(req['audio_b64']), dtype=np.float32)
|
||||
sr = req.get('sample_rate', 16000)
|
||||
|
||||
segments, _ = model.transcribe(
|
||||
audio,
|
||||
language='en',
|
||||
word_timestamps=True,
|
||||
vad_filter=False, # VAD already done upstream
|
||||
)
|
||||
|
||||
text = ''
|
||||
words = []
|
||||
for seg in segments:
|
||||
text += seg.text
|
||||
for w in (seg.words or []):
|
||||
words.append({
|
||||
'word': w.word,
|
||||
'start': round(float(w.start), 4),
|
||||
'end': round(float(w.end), 4),
|
||||
'probability': round(float(w.probability), 4),
|
||||
})
|
||||
|
||||
sys.stderr.write(f'[fw-server] {json.dumps(text)} ({len(words)} words)\n')
|
||||
sys.stderr.flush()
|
||||
sys.stdout.write(json.dumps({'text': text, 'words': words}) + '\n')
|
||||
sys.stdout.flush()
|
||||
|
||||
except Exception:
|
||||
sys.stderr.write(f'[fw-server] error:\n{traceback.format_exc()}\n')
|
||||
sys.stderr.flush()
|
||||
sys.stdout.write(json.dumps({'text': '', 'words': [], 'error': traceback.format_exc()}) + '\n')
|
||||
sys.stdout.flush()
|
||||
Reference in New Issue
Block a user