/** * TTS HTTP server — wraps chatterbox-server.py and exposes a simple HTTP API. * Requests are serialized so generation and playback stay in order. * * Usage: * node tts-server.mjs * TTS_PORT=11500 node tts-server.mjs * * API: * POST /speak { "text": "...", "audio_prompt": "/path/to/voice.wav", ... } * → 200 { "ok": true } (after generation, playback continues in background) * GET /voices → 200 { "voices": ["rommie", ...], "current": "rommie" | null } * POST /voice { "name": "rommie" } * → 200 { "ok": true, "name": "rommie", "path": "..." } * POST /chime { "name": "ready" } * → 200 { "ok": true } plays chimes/.wav (or .ogg) * GET /health → 200 { "ok": true } */ import * as http from 'node:http' import * as fs from 'node:fs' import * as path from 'node:path' import yaml from 'js-yaml' import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs' const PORT = parseInt(process.env.TTS_PORT ?? '11500') const VOICES_FILE = path.join(import.meta.dirname, 'voices.yaml') function reload_config() { try { const doc = yaml.load(fs.readFileSync(VOICES_FILE, 'utf8')) return { voices: doc?.voices ?? {}, chimes: doc?.chimes ?? {} } } catch { return { voices: {}, chimes: {} } } } let { voices, chimes } = reload_config() let current_voice = null // name of active voice, or null let last_speak_at = 0 // --- TTS setup --- const tts = new Chatterbox_Tts() process.stderr.write('[tts-server] starting chatterbox...\n') await tts.init() process.stderr.write('[tts-server] chatterbox ready\n') // Preload all configured chimes so first play has no decode latency { const { chimes: configured_chimes } = reload_config() for (const [name, file] of Object.entries(configured_chimes)) { tts.preload_chime(file).catch(err => process.stderr.write(`[tts-server] preload failed for chime '${name}': ${err.message}\n`) ) } } // Serialize all speak requests through a promise chain let queue = Promise.resolve() function enqueue(fn) { const result = queue.then(fn) // Don't let a failed request poison the queue queue = result.catch(() => {}) return result } function read_body(req) { return new Promise((resolve, reject) => { let buf = '' req.on('data', chunk => { buf += chunk }) req.on('end', () => { try { resolve(JSON.parse(buf)) } catch (e) { reject(e) } }) req.on('error', reject) }) } function send(res, status, body) { const payload = JSON.stringify(body) res.writeHead(status, { 'Content-Type': 'application/json' }) res.end(payload) } const server = http.createServer(async (req, res) => { if (req.method === 'GET' && req.url === '/health') { return send(res, 200, { ok: true }) } if (req.method === 'GET' && req.url === '/activity') { return send(res, 200, { last_speak_at }) } if (req.method === 'GET' && req.url === '/voices') { ({ voices, chimes } = reload_config()) const list = Object.entries(voices).map(([name, v]) => ({ name, description: v.description ?? '', active: name === current_voice, })) return send(res, 200, { voices: list, current: current_voice }) } if (req.method === 'POST' && req.url === '/voice') { let body try { body = await read_body(req) } catch { return send(res, 400, { error: 'invalid JSON' }) } const { name } = body if (!name) return send(res, 400, { error: 'name required' }) ;({ voices, chimes } = reload_config()) if (!voices[name]) return send(res, 404, { error: `unknown voice: ${name}` }) current_voice = name process.stderr.write(`[tts-server] voice switched to: ${name}\n`) return send(res, 200, { ok: true, name, path: voices[name].path }) } if (req.method === 'POST' && req.url === '/chime') { let body try { body = await read_body(req) } catch { return send(res, 400, { error: 'invalid JSON' }) } const { name } = body if (!name) return send(res, 400, { error: 'name required' }) ;({ voices, chimes } = reload_config()) const file = chimes[name] ?? null if (!file) return send(res, 404, { error: `chime not found: ${name}` }) try { await enqueue(() => tts.play_chime(file)) return send(res, 200, { ok: true }) } catch (err) { return send(res, 500, { error: err.message }) } } if (req.method === 'POST' && req.url === '/speak') { let body try { body = await read_body(req) } catch { return send(res, 400, { error: 'invalid JSON' }) } const { text, ...opts } = body if (!text) { return send(res, 400, { error: 'text required' }) } last_speak_at = Date.now() // Inject current voice as default audio_prompt if none provided if (!opts.audio_prompt && current_voice && voices[current_voice]) { opts.audio_prompt = voices[current_voice].path } const wait = opts.wait === true const promise = enqueue(() => tts.speak_streaming(text, { preprocess: false, ...opts })) if (wait) { try { await promise send(res, 200, { ok: true }) } catch (err) { send(res, 500, { error: err.message }) } } else { promise.catch(err => console.error('speak error:', err.message)) send(res, 200, { ok: true, queued: true }) } return } send(res, 404, { error: 'not found' }) }) server.listen(PORT, () => { process.stderr.write(`[tts-server] listening on port ${PORT}\n`) })