Files
claude-voice-experiment/lib/chatterbox-tts.mjs
mikael-lovqvists-claude-agent a7fa2fd218 Add Pending_Query class and voice interaction improvements
- lib/pending-query.mjs: new state machine for query accumulation
  wake word, silence timer, send/cancel/pause/resume, instant dispatch,
  mode toggle (always listen / stop listening), mode query
- query-demo.mjs: refactored to use Pending_Query; wake word on by default
  with silence timer; chimes for dispatch/working/cancel/activate
- tts-server.mjs: track last_speak_at, expose /activity endpoint,
  chime playback via Python queue (soundfile + librosa), preload on startup
- chatterbox-server.py: chime and preload commands via stdin protocol
- lib/chatterbox-tts.mjs: play_chime and preload_chime methods
- test-chime.mjs: simple chime test script
- voices.yaml: configured ready/cancel/working/dispatch chimes
- CLEANUP-PLAN.md: updated with current state, command vocabulary, future plans

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-31 03:59:11 +00:00

122 lines
4.0 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Chatterbox TTS — Node.js wrapper around chatterbox-server.py.
*
* Usage:
* const tts = new Chatterbox_Tts()
* await tts.init()
* await tts.speak('Hello! [chuckle] That is funny.')
* await tts.speak('Something intense.', { exaggeration: 0.8 }) // full model only
* tts.stop()
*
* Paralinguistic tags (embed in text):
* [laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp]
*
* Generation options (passed as second arg to speak()):
* temperature 0.052.0 default 0.8
* top_p 01 default 0.95
* top_k 01000 default 1000
* repetition_penalty ≥1.0 default 1.2
* min_p 01 default 0.0
* audio_prompt string path to reference WAV for voice cloning
* exaggeration 01 emotion intensity (full model only)
* cfg_weight 01 classifier-free guidance (full model only)
*/
import { spawn } from 'node:child_process'
import * as path from 'node:path'
import * as readline from 'node:readline'
import { markdown_to_bark as markdown_to_speech, split_sentences } from './markdown.mjs'
const SERVER = path.join(import.meta.dirname, '..', 'chatterbox-server.py')
export class Chatterbox_Tts {
constructor({
variant = 'turbo', // 'turbo' or 'full'
} = {}) {
this._variant = variant
this._proc = null
this._rl = null
this._resolve = null
}
init() {
return new Promise((resolve, reject) => {
this._proc = spawn(SERVER, [this._variant], {
stdio: ['pipe', 'pipe', 'inherit'],
})
this._proc.on('error', reject)
this._proc.on('close', (code) => {
if (code !== null && code !== 0) {
process.stderr.write(`[chatterbox] server exited with code ${code}\n`)
}
})
this._rl = readline.createInterface({ input: this._proc.stdout })
this._rl.on('line', (line) => {
if (line === 'ready') {
resolve()
return
}
if (line === 'ok' && this._resolve) {
const res = this._resolve
this._resolve = null
res()
}
})
})
}
async speak(text, opts = {}) {
const clean = opts.preprocess === true ? markdown_to_speech(text) : text
return this._send(clean, opts)
}
async speak_streaming(text, opts = {}) {
const clean = opts.preprocess !== false ? markdown_to_speech(text) : text
const sentences = split_sentences(clean)
for (const s of sentences) {
await this._send(s, opts)
}
}
_send(text, opts = {}) {
return new Promise((resolve, reject) => {
if (!this._proc) {
return reject(new Error('Chatterbox_Tts not initialized — call init() first'))
}
this._resolve = resolve
const { preprocess: _, ...gen_opts } = opts
const payload = JSON.stringify({ text, ...gen_opts }) + '\n'
this._proc.stdin.write(payload)
})
}
play_chime(path) {
return new Promise((resolve, reject) => {
if (!this._proc) {
return reject(new Error('Chatterbox_Tts not initialized — call init() first'))
}
this._resolve = resolve
this._proc.stdin.write(JSON.stringify({ chime: path }) + '\n')
})
}
preload_chime(path) {
return new Promise((resolve, reject) => {
if (!this._proc) {
return reject(new Error('Chatterbox_Tts not initialized — call init() first'))
}
this._resolve = resolve
this._proc.stdin.write(JSON.stringify({ preload: path }) + '\n')
})
}
stop() {
this._rl?.close()
this._proc?.kill()
this._proc = null
this._rl = null
}
}