Initial commit — voice pipeline experiment

STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server, query completeness classifier (Ollama), multi-voice demo scripts, and planning docs. Kept as reference; clean rewrite planned in separate repos. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 04:48:54 +00:00
commit db8889aeed
35 changed files with 2782 additions and 0 deletions
--- a/tts-server.mjs
+++ b/tts-server.mjs
@@ -0,0 +1,133 @@
+/**
+ * TTS HTTP server — wraps chatterbox-server.py and exposes a simple HTTP API.
+ * Requests are serialized so generation and playback stay in order.
+ *
+ * Usage:
+ *   node tts-server.mjs
+ *   TTS_PORT=11500 node tts-server.mjs
+ *
+ * API:
+ *   POST /speak   { "text": "...", "audio_prompt": "/path/to/voice.wav", ... }
+ *                 → 200 { "ok": true }  (after generation, playback continues in background)
+ *   GET  /voices  → 200 { "voices": ["rommie", ...], "current": "rommie" | null }
+ *   POST /voice   { "name": "rommie" }
+ *                 → 200 { "ok": true, "name": "rommie", "path": "..." }
+ *   GET  /health  → 200 { "ok": true }
+ */
+
+import * as http from 'node:http'
+import * as fs   from 'node:fs'
+import * as path from 'node:path'
+import yaml from 'js-yaml'
+import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs'
+
+const PORT        = parseInt(process.env.TTS_PORT ?? '11500')
+const VOICES_FILE = path.join(import.meta.dirname, 'voices.yaml')
+
+function reload_voices() {
+	try {
+		const doc = yaml.load(fs.readFileSync(VOICES_FILE, 'utf8'))
+		return doc?.voices ?? {}
+	} catch {
+		return {}
+	}
+}
+
+let voices = reload_voices()
+let current_voice = null  // name of active voice, or null
+
+// --- TTS setup ---
+const tts = new Chatterbox_Tts()
+process.stderr.write('[tts-server] starting chatterbox...\n')
+await tts.init()
+process.stderr.write('[tts-server] chatterbox ready\n')
+
+// Serialize all speak requests through a promise chain
+let queue = Promise.resolve()
+
+function enqueue(fn) {
+	const result = queue.then(fn)
+	// Don't let a failed request poison the queue
+	queue = result.catch(() => {})
+	return result
+}
+
+function read_body(req) {
+	return new Promise((resolve, reject) => {
+		let buf = ''
+		req.on('data', chunk => { buf += chunk })
+		req.on('end', () => {
+			try { resolve(JSON.parse(buf)) } catch (e) { reject(e) }
+		})
+		req.on('error', reject)
+	})
+}
+
+function send(res, status, body) {
+	const payload = JSON.stringify(body)
+	res.writeHead(status, { 'Content-Type': 'application/json' })
+	res.end(payload)
+}
+
+const server = http.createServer(async (req, res) => {
+	if (req.method === 'GET' && req.url === '/health') {
+		return send(res, 200, { ok: true })
+	}
+
+	if (req.method === 'GET' && req.url === '/voices') {
+		voices = reload_voices()
+		const list = Object.entries(voices).map(([name, v]) => ({
+			name,
+			description: v.description ?? '',
+			active: name === current_voice,
+		}))
+		return send(res, 200, { voices: list, current: current_voice })
+	}
+
+	if (req.method === 'POST' && req.url === '/voice') {
+		let body
+		try { body = await read_body(req) } catch {
+			return send(res, 400, { error: 'invalid JSON' })
+		}
+		const { name } = body
+		if (!name) return send(res, 400, { error: 'name required' })
+		voices = reload_voices()
+		if (!voices[name]) return send(res, 404, { error: `unknown voice: ${name}` })
+		current_voice = name
+		process.stderr.write(`[tts-server] voice switched to: ${name}\n`)
+		return send(res, 200, { ok: true, name, path: voices[name].path })
+	}
+
+	if (req.method === 'POST' && req.url === '/speak') {
+		let body
+		try {
+			body = await read_body(req)
+		} catch {
+			return send(res, 400, { error: 'invalid JSON' })
+		}
+
+		const { text, ...opts } = body
+		if (!text) {
+			return send(res, 400, { error: 'text required' })
+		}
+
+		// Inject current voice as default audio_prompt if none provided
+		if (!opts.audio_prompt && current_voice && voices[current_voice]) {
+			opts.audio_prompt = voices[current_voice].path
+		}
+
+		try {
+			await enqueue(() => tts.speak_streaming(text, { preprocess: false, ...opts }))
+			send(res, 200, { ok: true })
+		} catch (err) {
+			send(res, 500, { error: err.message })
+		}
+		return
+	}
+
+	send(res, 404, { error: 'not found' })
+})
+
+server.listen(PORT, () => {
+	process.stderr.write(`[tts-server] listening on port ${PORT}\n`)
+})