Files
claude-voice-experiment/lib/chatterbox-tts.mjs
mikael-lovqvists-claude-agent db8889aeed Initial commit — voice pipeline experiment
STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server,
query completeness classifier (Ollama), multi-voice demo scripts, and
planning docs. Kept as reference; clean rewrite planned in separate repos.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 04:48:54 +00:00

102 lines
3.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Chatterbox TTS — Node.js wrapper around chatterbox-server.py.
*
* Usage:
* const tts = new Chatterbox_Tts()
* await tts.init()
* await tts.speak('Hello! [chuckle] That is funny.')
* await tts.speak('Something intense.', { exaggeration: 0.8 }) // full model only
* tts.stop()
*
* Paralinguistic tags (embed in text):
* [laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp]
*
* Generation options (passed as second arg to speak()):
* temperature 0.052.0 default 0.8
* top_p 01 default 0.95
* top_k 01000 default 1000
* repetition_penalty ≥1.0 default 1.2
* min_p 01 default 0.0
* audio_prompt string path to reference WAV for voice cloning
* exaggeration 01 emotion intensity (full model only)
* cfg_weight 01 classifier-free guidance (full model only)
*/
import { spawn } from 'node:child_process'
import * as path from 'node:path'
import * as readline from 'node:readline'
import { markdown_to_bark as markdown_to_speech, split_sentences } from './markdown.mjs'
const SERVER = path.join(import.meta.dirname, '..', 'chatterbox-server.py')
export class Chatterbox_Tts {
constructor({
variant = 'turbo', // 'turbo' or 'full'
} = {}) {
this._variant = variant
this._proc = null
this._rl = null
this._resolve = null
}
init() {
return new Promise((resolve, reject) => {
this._proc = spawn(SERVER, [this._variant], {
stdio: ['pipe', 'pipe', 'inherit'],
})
this._proc.on('error', reject)
this._proc.on('close', (code) => {
if (code !== null && code !== 0) {
process.stderr.write(`[chatterbox] server exited with code ${code}\n`)
}
})
this._rl = readline.createInterface({ input: this._proc.stdout })
this._rl.on('line', (line) => {
if (line === 'ready') {
resolve()
return
}
if (line === 'ok' && this._resolve) {
const res = this._resolve
this._resolve = null
res()
}
})
})
}
async speak(text, opts = {}) {
const clean = opts.preprocess === true ? markdown_to_speech(text) : text
return this._send(clean, opts)
}
async speak_streaming(text, opts = {}) {
const clean = opts.preprocess !== false ? markdown_to_speech(text) : text
const sentences = split_sentences(clean)
for (const s of sentences) {
await this._send(s, opts)
}
}
_send(text, opts = {}) {
return new Promise((resolve, reject) => {
if (!this._proc) {
return reject(new Error('Chatterbox_Tts not initialized — call init() first'))
}
this._resolve = resolve
const { preprocess: _, ...gen_opts } = opts
const payload = JSON.stringify({ text, ...gen_opts }) + '\n'
this._proc.stdin.write(payload)
})
}
stop() {
this._rl?.close()
this._proc?.kill()
this._proc = null
this._rl = null
}
}