STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server, query completeness classifier (Ollama), multi-voice demo scripts, and planning docs. Kept as reference; clean rewrite planned in separate repos. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
136 lines
5.0 KiB
JavaScript
136 lines
5.0 KiB
JavaScript
/**
|
|
* Demonstrates acting and expressiveness with Bark TTS.
|
|
*
|
|
* Run: node acting-demo-bark.mjs [start-section]
|
|
*
|
|
* Bark expressive techniques:
|
|
* UPPERCASE — stress/emphasis on a word
|
|
* ... — hesitation, trailing off
|
|
* [laughs] — laughter
|
|
* [sighs] — sigh
|
|
* [gasps] — sharp intake of breath
|
|
* [clears throat] — throat clearing
|
|
* ♪ text ♪ — sung phrase
|
|
* — — abrupt break
|
|
* ! — raised energy
|
|
*
|
|
* Voice presets (English):
|
|
* v2/en_speaker_0 calm female
|
|
* v2/en_speaker_1 calm male
|
|
* v2/en_speaker_3 deep male
|
|
* v2/en_speaker_6 neutral/warm (default)
|
|
* v2/en_speaker_9 expressive
|
|
*/
|
|
|
|
import { Bark_Tts } from './lib/bark-tts.mjs'
|
|
|
|
const tts = new Bark_Tts()
|
|
|
|
process.stderr.write('[bark] loading model...\n')
|
|
await tts.init()
|
|
process.stderr.write('[bark] ready\n\n')
|
|
|
|
const START = parseInt(process.argv[2] ?? '1', 10)
|
|
|
|
function section(title) {
|
|
process.stdout.write(`\n${'─'.repeat(50)}\n${title}\n${'─'.repeat(50)}\n`)
|
|
}
|
|
|
|
async function say(text, voice = undefined) {
|
|
const opts = { preprocess: false }
|
|
if (voice) opts.voice = voice
|
|
process.stdout.write(` ${voice ?? 'default'}\n "${text}"\n\n`)
|
|
await tts.speak(text, opts)
|
|
}
|
|
|
|
// ─── 1. Baseline ───────────────────────────────────────
|
|
section('1. Baseline')
|
|
if (START <= 1) {
|
|
await say('The package has arrived. Please sign here.')
|
|
}
|
|
|
|
// ─── 2. Paralinguistic tokens ──────────────────────────
|
|
section('2. Paralinguistic tokens')
|
|
if (START <= 2) {
|
|
await say('[clears throat] Right. Where were we.')
|
|
await say('And then he just... [sighs] gave up.')
|
|
await say('[gasps] I had no idea you were here!')
|
|
await say("Ha. [laughs] That's the funniest thing I've heard all week.")
|
|
}
|
|
|
|
// ─── 3. Emphasis via UPPERCASE ─────────────────────────
|
|
section('3. Emphasis via UPPERCASE')
|
|
if (START <= 3) {
|
|
await say('You need to stop. RIGHT NOW.')
|
|
await say('I have told you THIS BEFORE.')
|
|
await say('This is ABSOLUTELY unacceptable.')
|
|
}
|
|
|
|
// ─── 4. Hesitation and rhythm ──────────────────────────
|
|
section('4. Hesitation and rhythm')
|
|
if (START <= 4) {
|
|
await say('It was... quiet. Too quiet.')
|
|
await say('I just... I don\'t know what to say.')
|
|
await say('He said he was fine — but his eyes told a different story.')
|
|
}
|
|
|
|
// ─── 5. Sung phrase ────────────────────────────────────
|
|
section('5. Singing')
|
|
if (START <= 5) {
|
|
await say('♪ Happy birthday to you, happy birthday to you ♪')
|
|
}
|
|
|
|
// ─── 6. Acronyms ───────────────────────────────────────
|
|
section('6. Acronyms')
|
|
if (START <= 6) {
|
|
// Without spacing — Bark may try to pronounce as a word
|
|
await say('The FBI is investigating.')
|
|
await say('NASA launched a new rocket.')
|
|
await say('I work with the CPU and GPU.')
|
|
// With letter spacing — forces spelling out
|
|
await say('The F B I is investigating.')
|
|
await say('N A S A launched a new rocket.')
|
|
await say('I work with the C P U and G P U.')
|
|
}
|
|
|
|
// ─── 7. Voice character ────────────────────────────────
|
|
section('7. Voice presets — same line')
|
|
if (START <= 7) {
|
|
const line = "Well, that's certainly one way to look at it."
|
|
for (const voice of ['v2/en_speaker_0', 'v2/en_speaker_3', 'v2/en_speaker_6', 'v2/en_speaker_9']) {
|
|
await say(line, voice)
|
|
}
|
|
}
|
|
|
|
// ─── 7. Combined scene ─────────────────────────────────
|
|
section('8. Combined — a tense scene')
|
|
if (START <= 8) {
|
|
await say('Something is wrong.', 'v2/en_speaker_9')
|
|
await say('I can FEEL it.', 'v2/en_speaker_9')
|
|
await say('[gasps] Run!', 'v2/en_speaker_9')
|
|
}
|
|
|
|
// ─── 9. Paragraph — voices 7-9 ─────────────────────────
|
|
// Pass voices as CLI args to override, e.g.:
|
|
// node acting-demo-bark.mjs 9 v2/en_speaker_0 v2/en_speaker_2 v2/en_speaker_5
|
|
const PARA_VOICES = process.argv.slice(3).length
|
|
? process.argv.slice(3)
|
|
: ['v2/en_speaker_7', 'v2/en_speaker_8', 'v2/en_speaker_9']
|
|
|
|
const PARAGRAPH = 'It was a cold evening when the letter FINALLY arrived. '
|
|
+ '[sighs] She had been waiting for months... not knowing whether the news would be good — or bad. '
|
|
+ 'Her hands trembled as she tore open the envelope. '
|
|
+ 'Inside was a single sheet of paper with just THREE words written on it. '
|
|
+ '[gasps] She read them twice... then sat down slowly, and stared at the wall.'
|
|
|
|
section('9. Paragraph — voice comparison')
|
|
if (START <= 9) {
|
|
for (const voice of PARA_VOICES) {
|
|
process.stdout.write(`\n — ${voice} —\n`)
|
|
await say(PARAGRAPH, voice)
|
|
}
|
|
}
|
|
|
|
tts.stop()
|
|
section('Done')
|