/** * TTS HTTP server — wraps chatterbox-server.py and exposes a simple HTTP API. * Requests are serialized so generation and playback stay in order. * * Usage: * node tts-server.mjs * TTS_PORT=11500 node tts-server.mjs * * API: * POST /speak { "text": "...", "audio_prompt": "/path/to/voice.wav", ... } * → 200 { "ok": true } (after generation, playback continues in background) * GET /voices → 200 { "voices": ["rommie", ...], "current": "rommie" | null } * POST /voice { "name": "rommie" } * → 200 { "ok": true, "name": "rommie", "path": "..." } * GET /health → 200 { "ok": true } */ import * as http from 'node:http' import * as fs from 'node:fs' import * as path from 'node:path' import yaml from 'js-yaml' import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs' const PORT = parseInt(process.env.TTS_PORT ?? '11500') const VOICES_FILE = path.join(import.meta.dirname, 'voices.yaml') function reload_voices() { try { const doc = yaml.load(fs.readFileSync(VOICES_FILE, 'utf8')) return doc?.voices ?? {} } catch { return {} } } let voices = reload_voices() let current_voice = null // name of active voice, or null // --- TTS setup --- const tts = new Chatterbox_Tts() process.stderr.write('[tts-server] starting chatterbox...\n') await tts.init() process.stderr.write('[tts-server] chatterbox ready\n') // Serialize all speak requests through a promise chain let queue = Promise.resolve() function enqueue(fn) { const result = queue.then(fn) // Don't let a failed request poison the queue queue = result.catch(() => {}) return result } function read_body(req) { return new Promise((resolve, reject) => { let buf = '' req.on('data', chunk => { buf += chunk }) req.on('end', () => { try { resolve(JSON.parse(buf)) } catch (e) { reject(e) } }) req.on('error', reject) }) } function send(res, status, body) { const payload = JSON.stringify(body) res.writeHead(status, { 'Content-Type': 'application/json' }) res.end(payload) } const server = http.createServer(async (req, res) => { if (req.method === 'GET' && req.url === '/health') { return send(res, 200, { ok: true }) } if (req.method === 'GET' && req.url === '/voices') { voices = reload_voices() const list = Object.entries(voices).map(([name, v]) => ({ name, description: v.description ?? '', active: name === current_voice, })) return send(res, 200, { voices: list, current: current_voice }) } if (req.method === 'POST' && req.url === '/voice') { let body try { body = await read_body(req) } catch { return send(res, 400, { error: 'invalid JSON' }) } const { name } = body if (!name) return send(res, 400, { error: 'name required' }) voices = reload_voices() if (!voices[name]) return send(res, 404, { error: `unknown voice: ${name}` }) current_voice = name process.stderr.write(`[tts-server] voice switched to: ${name}\n`) return send(res, 200, { ok: true, name, path: voices[name].path }) } if (req.method === 'POST' && req.url === '/speak') { let body try { body = await read_body(req) } catch { return send(res, 400, { error: 'invalid JSON' }) } const { text, ...opts } = body if (!text) { return send(res, 400, { error: 'text required' }) } // Inject current voice as default audio_prompt if none provided if (!opts.audio_prompt && current_voice && voices[current_voice]) { opts.audio_prompt = voices[current_voice].path } try { await enqueue(() => tts.speak_streaming(text, { preprocess: false, ...opts })) send(res, 200, { ok: true }) } catch (err) { send(res, 500, { error: err.message }) } return } send(res, 404, { error: 'not found' }) }) server.listen(PORT, () => { process.stderr.write(`[tts-server] listening on port ${PORT}\n`) })