claude-voice-experiment/acting-demo-chatterbox.mjs

/**
 * Acting demo for Chatterbox TTS.
 *
 * Run: node acting-demo-chatterbox.mjs [start-section] [turbo|full]
 *
 * Paralinguistic tags:
 *   [laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp]
 *
 * Exaggeration (full model only, 0.0-1.0):
 *   0.0 = neutral, 1.0 = maximum emotion intensity
 */

import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs'

const START   = parseInt(process.argv[2] ?? '1', 10)
const VARIANT = process.argv[3] ?? 'turbo'

const tts = new Chatterbox_Tts({ variant: VARIANT })

process.stderr.write(`[chatterbox] loading ${VARIANT} model...\n`)
await tts.init()
process.stderr.write('[chatterbox] ready\n\n')

function section(title) {
    process.stdout.write(`\n${'─'.repeat(50)}\n${title}\n${'─'.repeat(50)}\n`)
}

async function say(text, opts = {}) {
    process.stdout.write(`  "${text}"\n`)
    if (Object.keys(opts).length) process.stdout.write(`  ${JSON.stringify(opts)}\n`)
    process.stdout.write('\n')
    await tts.speak(text, { preprocess: false, ...opts })
}

// ─── 1. Baseline ───────────────────────────────────────
section('1. Baseline')
if (START <= 1) {
    await say('The package has arrived. Please sign here.')
}

// ─── 2. Paralinguistic tags ────────────────────────────
section('2. Paralinguistic tags')
if (START <= 2) {
    await say('[cough] Right, where were we.')
    await say('And then he just... [sigh] gave up.')
    await say('[gasp] I had no idea you were here!')
    await say('Ha! [laugh] That is the funniest thing I have heard all week.')
    await say('[chuckle] Well, that is certainly one way to look at it.')
    await say('[groan] Not this again.')
}

// ─── 3. Punctuation and rhythm ─────────────────────────
section('3. Punctuation and rhythm')
if (START <= 3) {
    await say('It was... quiet. Too quiet.')
    await say('He said he was fine — but his eyes told a different story.')
    await say('I told you. I told you this would happen. But did anyone listen? No.')
}

// ─── 4. Exaggeration — full model only ─────────────────
section(`4. Exaggeration (${VARIANT === 'full' ? 'active' : 'ignored in turbo — run with: node acting-demo-chatterbox.mjs 4 full'})`)
if (START <= 4) {
    await say('I am so incredibly happy to see you today!', { exaggeration: 0.0 })
    await say('I am so incredibly happy to see you today!', { exaggeration: 0.5 })
    await say('I am so incredibly happy to see you today!', { exaggeration: 1.0 })
}

// ─── 5. Temperature ────────────────────────────────────
section('5. Temperature (lower = more predictable)')
if (START <= 5) {
    await say('Something is very wrong here and I think we should leave now.', { temperature: 0.3 })
    await say('Something is very wrong here and I think we should leave now.', { temperature: 0.8 })
    await say('Something is very wrong here and I think we should leave now.', { temperature: 1.4 })
}

// ─── 6. Paragraph ──────────────────────────────────────
section('6. Paragraph with tags')
if (START <= 6) {
    await say(
        'It was a cold evening when the letter finally arrived. '
        + '[sigh] She had been waiting for months... not knowing whether the news would be good — or bad. '
        + 'Her hands trembled as she tore open the envelope. '
        + '[gasp] Inside was a single sheet of paper with just three words written on it.'
    )
}

// ─── 7. Audio prompt / voice cloning ───────────────────
// Pass a WAV file path as third argument:
//   node acting-demo-chatterbox.mjs 7 turbo /path/to/voice.wav
section('7. Audio prompt (voice cloning)')
if (START <= 7) {
    const audio_prompt = process.argv[4]
    if (!audio_prompt) {
        process.stdout.write('  Skipped — pass a WAV file as the 4th argument:\n')
        process.stdout.write('  node acting-demo-chatterbox.mjs 7 turbo /path/to/voice.wav\n\n')
        process.stdout.write('  Requirements: WAV file, at least 5 seconds, clean speech, no music/noise\n')
    } else {
        process.stdout.write(`  voice: ${audio_prompt}\n\n`)
        await say('Hello. This is a test of voice cloning.', { audio_prompt })
        await say(
            'It was a cold evening when the letter finally arrived. '
            + '[sigh] She had been waiting for months... not knowing whether the news would be good — or bad.',
            { audio_prompt }
        )
    }
}

tts.stop()
section('Done')