STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server, query completeness classifier (Ollama), multi-voice demo scripts, and planning docs. Kept as reference; clean rewrite planned in separate repos. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
110 lines
4.7 KiB
JavaScript
110 lines
4.7 KiB
JavaScript
/**
|
|
* Acting demo for Chatterbox TTS.
|
|
*
|
|
* Run: node acting-demo-chatterbox.mjs [start-section] [turbo|full]
|
|
*
|
|
* Paralinguistic tags:
|
|
* [laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp]
|
|
*
|
|
* Exaggeration (full model only, 0.0-1.0):
|
|
* 0.0 = neutral, 1.0 = maximum emotion intensity
|
|
*/
|
|
|
|
import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs'
|
|
|
|
const START = parseInt(process.argv[2] ?? '1', 10)
|
|
const VARIANT = process.argv[3] ?? 'turbo'
|
|
|
|
const tts = new Chatterbox_Tts({ variant: VARIANT })
|
|
|
|
process.stderr.write(`[chatterbox] loading ${VARIANT} model...\n`)
|
|
await tts.init()
|
|
process.stderr.write('[chatterbox] ready\n\n')
|
|
|
|
function section(title) {
|
|
process.stdout.write(`\n${'─'.repeat(50)}\n${title}\n${'─'.repeat(50)}\n`)
|
|
}
|
|
|
|
async function say(text, opts = {}) {
|
|
process.stdout.write(` "${text}"\n`)
|
|
if (Object.keys(opts).length) process.stdout.write(` ${JSON.stringify(opts)}\n`)
|
|
process.stdout.write('\n')
|
|
await tts.speak(text, { preprocess: false, ...opts })
|
|
}
|
|
|
|
// ─── 1. Baseline ───────────────────────────────────────
|
|
section('1. Baseline')
|
|
if (START <= 1) {
|
|
await say('The package has arrived. Please sign here.')
|
|
}
|
|
|
|
// ─── 2. Paralinguistic tags ────────────────────────────
|
|
section('2. Paralinguistic tags')
|
|
if (START <= 2) {
|
|
await say('[cough] Right, where were we.')
|
|
await say('And then he just... [sigh] gave up.')
|
|
await say('[gasp] I had no idea you were here!')
|
|
await say('Ha! [laugh] That is the funniest thing I have heard all week.')
|
|
await say('[chuckle] Well, that is certainly one way to look at it.')
|
|
await say('[groan] Not this again.')
|
|
}
|
|
|
|
// ─── 3. Punctuation and rhythm ─────────────────────────
|
|
section('3. Punctuation and rhythm')
|
|
if (START <= 3) {
|
|
await say('It was... quiet. Too quiet.')
|
|
await say('He said he was fine — but his eyes told a different story.')
|
|
await say('I told you. I told you this would happen. But did anyone listen? No.')
|
|
}
|
|
|
|
// ─── 4. Exaggeration — full model only ─────────────────
|
|
section(`4. Exaggeration (${VARIANT === 'full' ? 'active' : 'ignored in turbo — run with: node acting-demo-chatterbox.mjs 4 full'})`)
|
|
if (START <= 4) {
|
|
await say('I am so incredibly happy to see you today!', { exaggeration: 0.0 })
|
|
await say('I am so incredibly happy to see you today!', { exaggeration: 0.5 })
|
|
await say('I am so incredibly happy to see you today!', { exaggeration: 1.0 })
|
|
}
|
|
|
|
// ─── 5. Temperature ────────────────────────────────────
|
|
section('5. Temperature (lower = more predictable)')
|
|
if (START <= 5) {
|
|
await say('Something is very wrong here and I think we should leave now.', { temperature: 0.3 })
|
|
await say('Something is very wrong here and I think we should leave now.', { temperature: 0.8 })
|
|
await say('Something is very wrong here and I think we should leave now.', { temperature: 1.4 })
|
|
}
|
|
|
|
// ─── 6. Paragraph ──────────────────────────────────────
|
|
section('6. Paragraph with tags')
|
|
if (START <= 6) {
|
|
await say(
|
|
'It was a cold evening when the letter finally arrived. '
|
|
+ '[sigh] She had been waiting for months... not knowing whether the news would be good — or bad. '
|
|
+ 'Her hands trembled as she tore open the envelope. '
|
|
+ '[gasp] Inside was a single sheet of paper with just three words written on it.'
|
|
)
|
|
}
|
|
|
|
// ─── 7. Audio prompt / voice cloning ───────────────────
|
|
// Pass a WAV file path as third argument:
|
|
// node acting-demo-chatterbox.mjs 7 turbo /path/to/voice.wav
|
|
section('7. Audio prompt (voice cloning)')
|
|
if (START <= 7) {
|
|
const audio_prompt = process.argv[4]
|
|
if (!audio_prompt) {
|
|
process.stdout.write(' Skipped — pass a WAV file as the 4th argument:\n')
|
|
process.stdout.write(' node acting-demo-chatterbox.mjs 7 turbo /path/to/voice.wav\n\n')
|
|
process.stdout.write(' Requirements: WAV file, at least 5 seconds, clean speech, no music/noise\n')
|
|
} else {
|
|
process.stdout.write(` voice: ${audio_prompt}\n\n`)
|
|
await say('Hello. This is a test of voice cloning.', { audio_prompt })
|
|
await say(
|
|
'It was a cold evening when the letter finally arrived. '
|
|
+ '[sigh] She had been waiting for months... not knowing whether the news would be good — or bad.',
|
|
{ audio_prompt }
|
|
)
|
|
}
|
|
}
|
|
|
|
tts.stop()
|
|
section('Done')
|