- lib/pending-query.mjs: new state machine for query accumulation wake word, silence timer, send/cancel/pause/resume, instant dispatch, mode toggle (always listen / stop listening), mode query - query-demo.mjs: refactored to use Pending_Query; wake word on by default with silence timer; chimes for dispatch/working/cancel/activate - tts-server.mjs: track last_speak_at, expose /activity endpoint, chime playback via Python queue (soundfile + librosa), preload on startup - chatterbox-server.py: chime and preload commands via stdin protocol - lib/chatterbox-tts.mjs: play_chime and preload_chime methods - test-chime.mjs: simple chime test script - voices.yaml: configured ready/cancel/working/dispatch chimes - CLEANUP-PLAN.md: updated with current state, command vocabulary, future plans Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
122 lines
4.0 KiB
JavaScript
122 lines
4.0 KiB
JavaScript
/**
|
||
* Chatterbox TTS — Node.js wrapper around chatterbox-server.py.
|
||
*
|
||
* Usage:
|
||
* const tts = new Chatterbox_Tts()
|
||
* await tts.init()
|
||
* await tts.speak('Hello! [chuckle] That is funny.')
|
||
* await tts.speak('Something intense.', { exaggeration: 0.8 }) // full model only
|
||
* tts.stop()
|
||
*
|
||
* Paralinguistic tags (embed in text):
|
||
* [laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp]
|
||
*
|
||
* Generation options (passed as second arg to speak()):
|
||
* temperature 0.05–2.0 default 0.8
|
||
* top_p 0–1 default 0.95
|
||
* top_k 0–1000 default 1000
|
||
* repetition_penalty ≥1.0 default 1.2
|
||
* min_p 0–1 default 0.0
|
||
* audio_prompt string path to reference WAV for voice cloning
|
||
* exaggeration 0–1 emotion intensity (full model only)
|
||
* cfg_weight 0–1 classifier-free guidance (full model only)
|
||
*/
|
||
|
||
import { spawn } from 'node:child_process'
|
||
import * as path from 'node:path'
|
||
import * as readline from 'node:readline'
|
||
import { markdown_to_bark as markdown_to_speech, split_sentences } from './markdown.mjs'
|
||
|
||
const SERVER = path.join(import.meta.dirname, '..', 'chatterbox-server.py')
|
||
|
||
export class Chatterbox_Tts {
|
||
constructor({
|
||
variant = 'turbo', // 'turbo' or 'full'
|
||
} = {}) {
|
||
this._variant = variant
|
||
this._proc = null
|
||
this._rl = null
|
||
this._resolve = null
|
||
}
|
||
|
||
init() {
|
||
return new Promise((resolve, reject) => {
|
||
this._proc = spawn(SERVER, [this._variant], {
|
||
stdio: ['pipe', 'pipe', 'inherit'],
|
||
})
|
||
|
||
this._proc.on('error', reject)
|
||
this._proc.on('close', (code) => {
|
||
if (code !== null && code !== 0) {
|
||
process.stderr.write(`[chatterbox] server exited with code ${code}\n`)
|
||
}
|
||
})
|
||
|
||
this._rl = readline.createInterface({ input: this._proc.stdout })
|
||
this._rl.on('line', (line) => {
|
||
if (line === 'ready') {
|
||
resolve()
|
||
return
|
||
}
|
||
if (line === 'ok' && this._resolve) {
|
||
const res = this._resolve
|
||
this._resolve = null
|
||
res()
|
||
}
|
||
})
|
||
})
|
||
}
|
||
|
||
async speak(text, opts = {}) {
|
||
const clean = opts.preprocess === true ? markdown_to_speech(text) : text
|
||
return this._send(clean, opts)
|
||
}
|
||
|
||
async speak_streaming(text, opts = {}) {
|
||
const clean = opts.preprocess !== false ? markdown_to_speech(text) : text
|
||
const sentences = split_sentences(clean)
|
||
for (const s of sentences) {
|
||
await this._send(s, opts)
|
||
}
|
||
}
|
||
|
||
_send(text, opts = {}) {
|
||
return new Promise((resolve, reject) => {
|
||
if (!this._proc) {
|
||
return reject(new Error('Chatterbox_Tts not initialized — call init() first'))
|
||
}
|
||
this._resolve = resolve
|
||
const { preprocess: _, ...gen_opts } = opts
|
||
const payload = JSON.stringify({ text, ...gen_opts }) + '\n'
|
||
this._proc.stdin.write(payload)
|
||
})
|
||
}
|
||
|
||
play_chime(path) {
|
||
return new Promise((resolve, reject) => {
|
||
if (!this._proc) {
|
||
return reject(new Error('Chatterbox_Tts not initialized — call init() first'))
|
||
}
|
||
this._resolve = resolve
|
||
this._proc.stdin.write(JSON.stringify({ chime: path }) + '\n')
|
||
})
|
||
}
|
||
|
||
preload_chime(path) {
|
||
return new Promise((resolve, reject) => {
|
||
if (!this._proc) {
|
||
return reject(new Error('Chatterbox_Tts not initialized — call init() first'))
|
||
}
|
||
this._resolve = resolve
|
||
this._proc.stdin.write(JSON.stringify({ preload: path }) + '\n')
|
||
})
|
||
}
|
||
|
||
stop() {
|
||
this._rl?.close()
|
||
this._proc?.kill()
|
||
this._proc = null
|
||
this._rl = null
|
||
}
|
||
}
|