/** * Chatterbox TTS — Node.js wrapper around chatterbox-server.py. * * Usage: * const tts = new Chatterbox_Tts() * await tts.init() * await tts.speak('Hello! [chuckle] That is funny.') * await tts.speak('Something intense.', { exaggeration: 0.8 }) // full model only * tts.stop() * * Paralinguistic tags (embed in text): * [laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp] * * Generation options (passed as second arg to speak()): * temperature 0.05–2.0 default 0.8 * top_p 0–1 default 0.95 * top_k 0–1000 default 1000 * repetition_penalty ≥1.0 default 1.2 * min_p 0–1 default 0.0 * audio_prompt string path to reference WAV for voice cloning * exaggeration 0–1 emotion intensity (full model only) * cfg_weight 0–1 classifier-free guidance (full model only) */ import { spawn } from 'node:child_process' import * as path from 'node:path' import * as readline from 'node:readline' import { markdown_to_bark as markdown_to_speech, split_sentences } from './markdown.mjs' const SERVER = path.join(import.meta.dirname, '..', 'chatterbox-server.py') export class Chatterbox_Tts { constructor({ variant = 'turbo', // 'turbo' or 'full' } = {}) { this._variant = variant this._proc = null this._rl = null this._resolve = null } init() { return new Promise((resolve, reject) => { this._proc = spawn(SERVER, [this._variant], { stdio: ['pipe', 'pipe', 'inherit'], }) this._proc.on('error', reject) this._proc.on('close', (code) => { if (code !== null && code !== 0) { process.stderr.write(`[chatterbox] server exited with code ${code}\n`) } }) this._rl = readline.createInterface({ input: this._proc.stdout }) this._rl.on('line', (line) => { if (line === 'ready') { resolve() return } if (line === 'ok' && this._resolve) { const res = this._resolve this._resolve = null res() } }) }) } async speak(text, opts = {}) { const clean = opts.preprocess === true ? markdown_to_speech(text) : text return this._send(clean, opts) } async speak_streaming(text, opts = {}) { const clean = opts.preprocess !== false ? markdown_to_speech(text) : text const sentences = split_sentences(clean) for (const s of sentences) { await this._send(s, opts) } } _send(text, opts = {}) { return new Promise((resolve, reject) => { if (!this._proc) { return reject(new Error('Chatterbox_Tts not initialized — call init() first')) } this._resolve = resolve const { preprocess: _, ...gen_opts } = opts const payload = JSON.stringify({ text, ...gen_opts }) + '\n' this._proc.stdin.write(payload) }) } play_chime(path) { return new Promise((resolve, reject) => { if (!this._proc) { return reject(new Error('Chatterbox_Tts not initialized — call init() first')) } this._resolve = resolve this._proc.stdin.write(JSON.stringify({ chime: path }) + '\n') }) } preload_chime(path) { return new Promise((resolve, reject) => { if (!this._proc) { return reject(new Error('Chatterbox_Tts not initialized — call init() first')) } this._resolve = resolve this._proc.stdin.write(JSON.stringify({ preload: path }) + '\n') }) } stop() { this._rl?.close() this._proc?.kill() this._proc = null this._rl = null } }