Files
claude-voice-experiment/tts-server.mjs
mikael-lovqvists-claude-agent 688549a6c3 Add chime endpoint to TTS server
POST /chime {name} plays chimes/<name>.wav or .ogg via pacat.
Goes through the same queue as speak so playback stays ordered.
chimes/ directory holds the audio files (not committed).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 05:23:18 +00:00

166 lines
4.9 KiB
JavaScript

/**
* TTS HTTP server — wraps chatterbox-server.py and exposes a simple HTTP API.
* Requests are serialized so generation and playback stay in order.
*
* Usage:
* node tts-server.mjs
* TTS_PORT=11500 node tts-server.mjs
*
* API:
* POST /speak { "text": "...", "audio_prompt": "/path/to/voice.wav", ... }
* → 200 { "ok": true } (after generation, playback continues in background)
* GET /voices → 200 { "voices": ["rommie", ...], "current": "rommie" | null }
* POST /voice { "name": "rommie" }
* → 200 { "ok": true, "name": "rommie", "path": "..." }
* POST /chime { "name": "ready" }
* → 200 { "ok": true } plays chimes/<name>.wav (or .ogg)
* GET /health → 200 { "ok": true }
*/
import * as http from 'node:http'
import * as fs from 'node:fs'
import * as path from 'node:path'
import { spawn } from 'node:child_process'
import yaml from 'js-yaml'
import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs'
const PORT = parseInt(process.env.TTS_PORT ?? '11500')
const VOICES_FILE = path.join(import.meta.dirname, 'voices.yaml')
const CHIMES_DIR = path.join(import.meta.dirname, 'chimes')
function reload_voices() {
try {
const doc = yaml.load(fs.readFileSync(VOICES_FILE, 'utf8'))
return doc?.voices ?? {}
} catch {
return {}
}
}
let voices = reload_voices()
let current_voice = null // name of active voice, or null
// --- TTS setup ---
const tts = new Chatterbox_Tts()
process.stderr.write('[tts-server] starting chatterbox...\n')
await tts.init()
process.stderr.write('[tts-server] chatterbox ready\n')
// Serialize all speak requests through a promise chain
let queue = Promise.resolve()
function enqueue(fn) {
const result = queue.then(fn)
// Don't let a failed request poison the queue
queue = result.catch(() => {})
return result
}
function read_body(req) {
return new Promise((resolve, reject) => {
let buf = ''
req.on('data', chunk => { buf += chunk })
req.on('end', () => {
try { resolve(JSON.parse(buf)) } catch (e) { reject(e) }
})
req.on('error', reject)
})
}
function send(res, status, body) {
const payload = JSON.stringify(body)
res.writeHead(status, { 'Content-Type': 'application/json' })
res.end(payload)
}
function play_file(file_path) {
return new Promise((resolve, reject) => {
const player = spawn('pacat', ['--playback', file_path])
player.on('close', code => code === 0 ? resolve() : reject(new Error(`pacat exited ${code}`)))
player.on('error', reject)
})
}
const server = http.createServer(async (req, res) => {
if (req.method === 'GET' && req.url === '/health') {
return send(res, 200, { ok: true })
}
if (req.method === 'GET' && req.url === '/voices') {
voices = reload_voices()
const list = Object.entries(voices).map(([name, v]) => ({
name,
description: v.description ?? '',
active: name === current_voice,
}))
return send(res, 200, { voices: list, current: current_voice })
}
if (req.method === 'POST' && req.url === '/voice') {
let body
try { body = await read_body(req) } catch {
return send(res, 400, { error: 'invalid JSON' })
}
const { name } = body
if (!name) return send(res, 400, { error: 'name required' })
voices = reload_voices()
if (!voices[name]) return send(res, 404, { error: `unknown voice: ${name}` })
current_voice = name
process.stderr.write(`[tts-server] voice switched to: ${name}\n`)
return send(res, 200, { ok: true, name, path: voices[name].path })
}
if (req.method === 'POST' && req.url === '/chime') {
let body
try { body = await read_body(req) } catch {
return send(res, 400, { error: 'invalid JSON' })
}
const { name } = body
if (!name) return send(res, 400, { error: 'name required' })
// Try .wav then .ogg
const wav = path.join(CHIMES_DIR, `${name}.wav`)
const ogg = path.join(CHIMES_DIR, `${name}.ogg`)
const file = fs.existsSync(wav) ? wav : fs.existsSync(ogg) ? ogg : null
if (!file) return send(res, 404, { error: `chime not found: ${name}` })
try {
await enqueue(() => play_file(file))
return send(res, 200, { ok: true })
} catch (err) {
return send(res, 500, { error: err.message })
}
}
if (req.method === 'POST' && req.url === '/speak') {
let body
try {
body = await read_body(req)
} catch {
return send(res, 400, { error: 'invalid JSON' })
}
const { text, ...opts } = body
if (!text) {
return send(res, 400, { error: 'text required' })
}
// Inject current voice as default audio_prompt if none provided
if (!opts.audio_prompt && current_voice && voices[current_voice]) {
opts.audio_prompt = voices[current_voice].path
}
try {
await enqueue(() => tts.speak_streaming(text, { preprocess: false, ...opts }))
send(res, 200, { ok: true })
} catch (err) {
send(res, 500, { error: err.message })
}
return
}
send(res, 404, { error: 'not found' })
})
server.listen(PORT, () => {
process.stderr.write(`[tts-server] listening on port ${PORT}\n`)
})