WebSocket server, language/task args, verbose flag, misc improvements #2

Merged
mikael-lovqvist merged 13 commits from mikael-lovqvists-claude-agent/stt-server:websocket-server into main 2026-06-07 09:27:02 +00:00
Showing only changes of commit f18330608d - Show all commits

View File

@@ -16,13 +16,15 @@ Every WebSocket connection receives the full event stream from the moment it
connects — no subscription handshake required.
All log/status messages go to stderr. Stdout is machine-readable events only.
Pass --verbose to enable info logging (startup, VAD events, transcripts).
Errors always go to stderr regardless of verbosity.
Environment:
STT_PORT WebSocket port (default: 11501)
Usage:
./stt-server.py
./stt-server.py --model large-v3 --device cuda --compute-type int8_float16
./stt-server.py --model large-v3 --device cuda --compute-type int8_float16 --verbose
"""
import sys
@@ -45,9 +47,10 @@ HISTORY_SAMPLES = 960000 # 60s ring buffer for pre-roll
PORT = int(__import__('os').environ.get('STT_PORT', 11501))
def log(msg):
sys.stderr.write(f'[stt] {msg}\n')
sys.stderr.flush()
def log(msg, error=False):
if error or verbose:
sys.stderr.write(f'[stt] {msg}\n')
sys.stderr.flush()
# --- WebSocket broadcast ---
@@ -118,7 +121,9 @@ parser = argparse.ArgumentParser()
parser.add_argument('--model', default='base.en')
parser.add_argument('--device', default='cuda')
parser.add_argument('--compute-type', default='int8_float16')
args = parser.parse_args()
parser.add_argument('--verbose', '-v', action='store_true')
args = parser.parse_args()
verbose = args.verbose
log(f'loading faster-whisper {args.model} ({args.device}, {args.compute_type})...')
from faster_whisper import WhisperModel
@@ -126,7 +131,7 @@ try:
model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type)
log(f'model ready on {args.device}')
except Exception as e:
log(f'{args.device} failed ({e}), falling back to cpu')
log(f'{args.device} failed ({e}), falling back to cpu', error=True)
model = WhisperModel(args.model, device='cpu', compute_type='int8')
log('model ready on cpu')
@@ -197,7 +202,7 @@ def transcription_worker():
emit({'event': 'transcript', 'text': text.strip(), 'words': words, 'duration': round(duration, 3)})
except Exception:
msg = traceback.format_exc()
log(f'transcription error:\n{msg}')
log(f'transcription error:\n{msg}', error=True)
emit({'event': 'error', 'message': msg})
finally:
transcription_queue.task_done()