- lib/pending-query.mjs: new state machine for query accumulation wake word, silence timer, send/cancel/pause/resume, instant dispatch, mode toggle (always listen / stop listening), mode query - query-demo.mjs: refactored to use Pending_Query; wake word on by default with silence timer; chimes for dispatch/working/cancel/activate - tts-server.mjs: track last_speak_at, expose /activity endpoint, chime playback via Python queue (soundfile + librosa), preload on startup - chatterbox-server.py: chime and preload commands via stdin protocol - lib/chatterbox-tts.mjs: play_chime and preload_chime methods - test-chime.mjs: simple chime test script - voices.yaml: configured ready/cancel/working/dispatch chimes - CLEANUP-PLAN.md: updated with current state, command vocabulary, future plans Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
171 lines
5.0 KiB
JavaScript
171 lines
5.0 KiB
JavaScript
/**
|
|
* TTS HTTP server — wraps chatterbox-server.py and exposes a simple HTTP API.
|
|
* Requests are serialized so generation and playback stay in order.
|
|
*
|
|
* Usage:
|
|
* node tts-server.mjs
|
|
* TTS_PORT=11500 node tts-server.mjs
|
|
*
|
|
* API:
|
|
* POST /speak { "text": "...", "audio_prompt": "/path/to/voice.wav", ... }
|
|
* → 200 { "ok": true } (after generation, playback continues in background)
|
|
* GET /voices → 200 { "voices": ["rommie", ...], "current": "rommie" | null }
|
|
* POST /voice { "name": "rommie" }
|
|
* → 200 { "ok": true, "name": "rommie", "path": "..." }
|
|
* POST /chime { "name": "ready" }
|
|
* → 200 { "ok": true } plays chimes/<name>.wav (or .ogg)
|
|
* GET /health → 200 { "ok": true }
|
|
*/
|
|
|
|
import * as http from 'node:http'
|
|
import * as fs from 'node:fs'
|
|
import * as path from 'node:path'
|
|
import yaml from 'js-yaml'
|
|
import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs'
|
|
|
|
const PORT = parseInt(process.env.TTS_PORT ?? '11500')
|
|
const VOICES_FILE = path.join(import.meta.dirname, 'voices.yaml')
|
|
|
|
function reload_config() {
|
|
try {
|
|
const doc = yaml.load(fs.readFileSync(VOICES_FILE, 'utf8'))
|
|
return { voices: doc?.voices ?? {}, chimes: doc?.chimes ?? {} }
|
|
} catch {
|
|
return { voices: {}, chimes: {} }
|
|
}
|
|
}
|
|
|
|
let { voices, chimes } = reload_config()
|
|
let current_voice = null // name of active voice, or null
|
|
let last_speak_at = 0
|
|
|
|
// --- TTS setup ---
|
|
const tts = new Chatterbox_Tts()
|
|
process.stderr.write('[tts-server] starting chatterbox...\n')
|
|
await tts.init()
|
|
process.stderr.write('[tts-server] chatterbox ready\n')
|
|
|
|
// Preload all configured chimes so first play has no decode latency
|
|
{
|
|
const { chimes: configured_chimes } = reload_config()
|
|
for (const [name, file] of Object.entries(configured_chimes)) {
|
|
tts.preload_chime(file).catch(err =>
|
|
process.stderr.write(`[tts-server] preload failed for chime '${name}': ${err.message}\n`)
|
|
)
|
|
}
|
|
}
|
|
|
|
// Serialize all speak requests through a promise chain
|
|
let queue = Promise.resolve()
|
|
|
|
function enqueue(fn) {
|
|
const result = queue.then(fn)
|
|
// Don't let a failed request poison the queue
|
|
queue = result.catch(() => {})
|
|
return result
|
|
}
|
|
|
|
function read_body(req) {
|
|
return new Promise((resolve, reject) => {
|
|
let buf = ''
|
|
req.on('data', chunk => { buf += chunk })
|
|
req.on('end', () => {
|
|
try { resolve(JSON.parse(buf)) } catch (e) { reject(e) }
|
|
})
|
|
req.on('error', reject)
|
|
})
|
|
}
|
|
|
|
function send(res, status, body) {
|
|
const payload = JSON.stringify(body)
|
|
res.writeHead(status, { 'Content-Type': 'application/json' })
|
|
res.end(payload)
|
|
}
|
|
|
|
|
|
const server = http.createServer(async (req, res) => {
|
|
if (req.method === 'GET' && req.url === '/health') {
|
|
return send(res, 200, { ok: true })
|
|
}
|
|
|
|
if (req.method === 'GET' && req.url === '/activity') {
|
|
return send(res, 200, { last_speak_at })
|
|
}
|
|
|
|
if (req.method === 'GET' && req.url === '/voices') {
|
|
({ voices, chimes } = reload_config())
|
|
const list = Object.entries(voices).map(([name, v]) => ({
|
|
name,
|
|
description: v.description ?? '',
|
|
active: name === current_voice,
|
|
}))
|
|
return send(res, 200, { voices: list, current: current_voice })
|
|
}
|
|
|
|
if (req.method === 'POST' && req.url === '/voice') {
|
|
let body
|
|
try { body = await read_body(req) } catch {
|
|
return send(res, 400, { error: 'invalid JSON' })
|
|
}
|
|
const { name } = body
|
|
if (!name) return send(res, 400, { error: 'name required' })
|
|
;({ voices, chimes } = reload_config())
|
|
if (!voices[name]) return send(res, 404, { error: `unknown voice: ${name}` })
|
|
current_voice = name
|
|
process.stderr.write(`[tts-server] voice switched to: ${name}\n`)
|
|
return send(res, 200, { ok: true, name, path: voices[name].path })
|
|
}
|
|
|
|
if (req.method === 'POST' && req.url === '/chime') {
|
|
let body
|
|
try { body = await read_body(req) } catch {
|
|
return send(res, 400, { error: 'invalid JSON' })
|
|
}
|
|
const { name } = body
|
|
if (!name) return send(res, 400, { error: 'name required' })
|
|
;({ voices, chimes } = reload_config())
|
|
const file = chimes[name] ?? null
|
|
if (!file) return send(res, 404, { error: `chime not found: ${name}` })
|
|
try {
|
|
await enqueue(() => tts.play_chime(file))
|
|
return send(res, 200, { ok: true })
|
|
} catch (err) {
|
|
return send(res, 500, { error: err.message })
|
|
}
|
|
}
|
|
|
|
if (req.method === 'POST' && req.url === '/speak') {
|
|
let body
|
|
try {
|
|
body = await read_body(req)
|
|
} catch {
|
|
return send(res, 400, { error: 'invalid JSON' })
|
|
}
|
|
|
|
const { text, ...opts } = body
|
|
if (!text) {
|
|
return send(res, 400, { error: 'text required' })
|
|
}
|
|
last_speak_at = Date.now()
|
|
|
|
// Inject current voice as default audio_prompt if none provided
|
|
if (!opts.audio_prompt && current_voice && voices[current_voice]) {
|
|
opts.audio_prompt = voices[current_voice].path
|
|
}
|
|
|
|
try {
|
|
await enqueue(() => tts.speak_streaming(text, { preprocess: false, ...opts }))
|
|
send(res, 200, { ok: true })
|
|
} catch (err) {
|
|
send(res, 500, { error: err.message })
|
|
}
|
|
return
|
|
}
|
|
|
|
send(res, 404, { error: 'not found' })
|
|
})
|
|
|
|
server.listen(PORT, () => {
|
|
process.stderr.write(`[tts-server] listening on port ${PORT}\n`)
|
|
})
|