Initial commit — voice pipeline experiment
STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server, query completeness classifier (Ollama), multi-voice demo scripts, and planning docs. Kept as reference; clean rewrite planned in separate repos. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
133
tts-server.mjs
Normal file
133
tts-server.mjs
Normal file
@@ -0,0 +1,133 @@
|
||||
/**
|
||||
* TTS HTTP server — wraps chatterbox-server.py and exposes a simple HTTP API.
|
||||
* Requests are serialized so generation and playback stay in order.
|
||||
*
|
||||
* Usage:
|
||||
* node tts-server.mjs
|
||||
* TTS_PORT=11500 node tts-server.mjs
|
||||
*
|
||||
* API:
|
||||
* POST /speak { "text": "...", "audio_prompt": "/path/to/voice.wav", ... }
|
||||
* → 200 { "ok": true } (after generation, playback continues in background)
|
||||
* GET /voices → 200 { "voices": ["rommie", ...], "current": "rommie" | null }
|
||||
* POST /voice { "name": "rommie" }
|
||||
* → 200 { "ok": true, "name": "rommie", "path": "..." }
|
||||
* GET /health → 200 { "ok": true }
|
||||
*/
|
||||
|
||||
import * as http from 'node:http'
|
||||
import * as fs from 'node:fs'
|
||||
import * as path from 'node:path'
|
||||
import yaml from 'js-yaml'
|
||||
import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs'
|
||||
|
||||
const PORT = parseInt(process.env.TTS_PORT ?? '11500')
|
||||
const VOICES_FILE = path.join(import.meta.dirname, 'voices.yaml')
|
||||
|
||||
function reload_voices() {
|
||||
try {
|
||||
const doc = yaml.load(fs.readFileSync(VOICES_FILE, 'utf8'))
|
||||
return doc?.voices ?? {}
|
||||
} catch {
|
||||
return {}
|
||||
}
|
||||
}
|
||||
|
||||
let voices = reload_voices()
|
||||
let current_voice = null // name of active voice, or null
|
||||
|
||||
// --- TTS setup ---
|
||||
const tts = new Chatterbox_Tts()
|
||||
process.stderr.write('[tts-server] starting chatterbox...\n')
|
||||
await tts.init()
|
||||
process.stderr.write('[tts-server] chatterbox ready\n')
|
||||
|
||||
// Serialize all speak requests through a promise chain
|
||||
let queue = Promise.resolve()
|
||||
|
||||
function enqueue(fn) {
|
||||
const result = queue.then(fn)
|
||||
// Don't let a failed request poison the queue
|
||||
queue = result.catch(() => {})
|
||||
return result
|
||||
}
|
||||
|
||||
function read_body(req) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let buf = ''
|
||||
req.on('data', chunk => { buf += chunk })
|
||||
req.on('end', () => {
|
||||
try { resolve(JSON.parse(buf)) } catch (e) { reject(e) }
|
||||
})
|
||||
req.on('error', reject)
|
||||
})
|
||||
}
|
||||
|
||||
function send(res, status, body) {
|
||||
const payload = JSON.stringify(body)
|
||||
res.writeHead(status, { 'Content-Type': 'application/json' })
|
||||
res.end(payload)
|
||||
}
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
if (req.method === 'GET' && req.url === '/health') {
|
||||
return send(res, 200, { ok: true })
|
||||
}
|
||||
|
||||
if (req.method === 'GET' && req.url === '/voices') {
|
||||
voices = reload_voices()
|
||||
const list = Object.entries(voices).map(([name, v]) => ({
|
||||
name,
|
||||
description: v.description ?? '',
|
||||
active: name === current_voice,
|
||||
}))
|
||||
return send(res, 200, { voices: list, current: current_voice })
|
||||
}
|
||||
|
||||
if (req.method === 'POST' && req.url === '/voice') {
|
||||
let body
|
||||
try { body = await read_body(req) } catch {
|
||||
return send(res, 400, { error: 'invalid JSON' })
|
||||
}
|
||||
const { name } = body
|
||||
if (!name) return send(res, 400, { error: 'name required' })
|
||||
voices = reload_voices()
|
||||
if (!voices[name]) return send(res, 404, { error: `unknown voice: ${name}` })
|
||||
current_voice = name
|
||||
process.stderr.write(`[tts-server] voice switched to: ${name}\n`)
|
||||
return send(res, 200, { ok: true, name, path: voices[name].path })
|
||||
}
|
||||
|
||||
if (req.method === 'POST' && req.url === '/speak') {
|
||||
let body
|
||||
try {
|
||||
body = await read_body(req)
|
||||
} catch {
|
||||
return send(res, 400, { error: 'invalid JSON' })
|
||||
}
|
||||
|
||||
const { text, ...opts } = body
|
||||
if (!text) {
|
||||
return send(res, 400, { error: 'text required' })
|
||||
}
|
||||
|
||||
// Inject current voice as default audio_prompt if none provided
|
||||
if (!opts.audio_prompt && current_voice && voices[current_voice]) {
|
||||
opts.audio_prompt = voices[current_voice].path
|
||||
}
|
||||
|
||||
try {
|
||||
await enqueue(() => tts.speak_streaming(text, { preprocess: false, ...opts }))
|
||||
send(res, 200, { ok: true })
|
||||
} catch (err) {
|
||||
send(res, 500, { error: err.message })
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
send(res, 404, { error: 'not found' })
|
||||
})
|
||||
|
||||
server.listen(PORT, () => {
|
||||
process.stderr.write(`[tts-server] listening on port ${PORT}\n`)
|
||||
})
|
||||
Reference in New Issue
Block a user