Files
claude-voice-experiment/acting-demo-chatterbox.mjs
mikael-lovqvists-claude-agent db8889aeed Initial commit — voice pipeline experiment
STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server,
query completeness classifier (Ollama), multi-voice demo scripts, and
planning docs. Kept as reference; clean rewrite planned in separate repos.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 04:48:54 +00:00

110 lines
4.7 KiB
JavaScript

/**
* Acting demo for Chatterbox TTS.
*
* Run: node acting-demo-chatterbox.mjs [start-section] [turbo|full]
*
* Paralinguistic tags:
* [laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp]
*
* Exaggeration (full model only, 0.0-1.0):
* 0.0 = neutral, 1.0 = maximum emotion intensity
*/
import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs'
const START = parseInt(process.argv[2] ?? '1', 10)
const VARIANT = process.argv[3] ?? 'turbo'
const tts = new Chatterbox_Tts({ variant: VARIANT })
process.stderr.write(`[chatterbox] loading ${VARIANT} model...\n`)
await tts.init()
process.stderr.write('[chatterbox] ready\n\n')
function section(title) {
process.stdout.write(`\n${'─'.repeat(50)}\n${title}\n${'─'.repeat(50)}\n`)
}
async function say(text, opts = {}) {
process.stdout.write(` "${text}"\n`)
if (Object.keys(opts).length) process.stdout.write(` ${JSON.stringify(opts)}\n`)
process.stdout.write('\n')
await tts.speak(text, { preprocess: false, ...opts })
}
// ─── 1. Baseline ───────────────────────────────────────
section('1. Baseline')
if (START <= 1) {
await say('The package has arrived. Please sign here.')
}
// ─── 2. Paralinguistic tags ────────────────────────────
section('2. Paralinguistic tags')
if (START <= 2) {
await say('[cough] Right, where were we.')
await say('And then he just... [sigh] gave up.')
await say('[gasp] I had no idea you were here!')
await say('Ha! [laugh] That is the funniest thing I have heard all week.')
await say('[chuckle] Well, that is certainly one way to look at it.')
await say('[groan] Not this again.')
}
// ─── 3. Punctuation and rhythm ─────────────────────────
section('3. Punctuation and rhythm')
if (START <= 3) {
await say('It was... quiet. Too quiet.')
await say('He said he was fine — but his eyes told a different story.')
await say('I told you. I told you this would happen. But did anyone listen? No.')
}
// ─── 4. Exaggeration — full model only ─────────────────
section(`4. Exaggeration (${VARIANT === 'full' ? 'active' : 'ignored in turbo — run with: node acting-demo-chatterbox.mjs 4 full'})`)
if (START <= 4) {
await say('I am so incredibly happy to see you today!', { exaggeration: 0.0 })
await say('I am so incredibly happy to see you today!', { exaggeration: 0.5 })
await say('I am so incredibly happy to see you today!', { exaggeration: 1.0 })
}
// ─── 5. Temperature ────────────────────────────────────
section('5. Temperature (lower = more predictable)')
if (START <= 5) {
await say('Something is very wrong here and I think we should leave now.', { temperature: 0.3 })
await say('Something is very wrong here and I think we should leave now.', { temperature: 0.8 })
await say('Something is very wrong here and I think we should leave now.', { temperature: 1.4 })
}
// ─── 6. Paragraph ──────────────────────────────────────
section('6. Paragraph with tags')
if (START <= 6) {
await say(
'It was a cold evening when the letter finally arrived. '
+ '[sigh] She had been waiting for months... not knowing whether the news would be good — or bad. '
+ 'Her hands trembled as she tore open the envelope. '
+ '[gasp] Inside was a single sheet of paper with just three words written on it.'
)
}
// ─── 7. Audio prompt / voice cloning ───────────────────
// Pass a WAV file path as third argument:
// node acting-demo-chatterbox.mjs 7 turbo /path/to/voice.wav
section('7. Audio prompt (voice cloning)')
if (START <= 7) {
const audio_prompt = process.argv[4]
if (!audio_prompt) {
process.stdout.write(' Skipped — pass a WAV file as the 4th argument:\n')
process.stdout.write(' node acting-demo-chatterbox.mjs 7 turbo /path/to/voice.wav\n\n')
process.stdout.write(' Requirements: WAV file, at least 5 seconds, clean speech, no music/noise\n')
} else {
process.stdout.write(` voice: ${audio_prompt}\n\n`)
await say('Hello. This is a test of voice cloning.', { audio_prompt })
await say(
'It was a cold evening when the letter finally arrived. '
+ '[sigh] She had been waiting for months... not knowing whether the news would be good — or bad.',
{ audio_prompt }
)
}
}
tts.stop()
section('Done')