Files
claude-voice-experiment/lib/bark-tts.mjs
mikael-lovqvists-claude-agent db8889aeed Initial commit — voice pipeline experiment
STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server,
query completeness classifier (Ollama), multi-voice demo scripts, and
planning docs. Kept as reference; clean rewrite planned in separate repos.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 04:48:54 +00:00

113 lines
3.6 KiB
JavaScript

/**
* Bark TTS — Node.js wrapper around bark-server.py.
*
* Spawns the Python server once, keeps it alive, sends requests as JSON lines.
* Markdown is preprocessed before sending: **bold** → UPPERCASE, etc.
*
* Usage:
* const tts = new Bark_Tts()
* await tts.init() // spawns server, waits for model load
* await tts.speak('Hello')
* await tts.speak('**Very** important point.') // emphasis via CAPS
* tts.stop()
*
* Environment variables:
* BARK_MODEL HuggingFace model id (default: suno/bark)
* BARK_VOICE voice preset (default: v2/en_speaker_6)
*
* Bark voice presets (English):
* v2/en_speaker_0 calm female
* v2/en_speaker_1 calm male
* v2/en_speaker_3 deep male
* v2/en_speaker_6 neutral/warm (default)
* v2/en_speaker_9 expressive
*/
import { spawn } from 'node:child_process'
import * as path from 'node:path'
import * as readline from 'node:readline'
import { markdown_to_bark, split_sentences } from './markdown.mjs'
const BARK_MODEL = process.env.BARK_MODEL || 'suno/bark'
const BARK_VOICE = process.env.BARK_VOICE || 'v2/en_speaker_6'
const SERVER = path.join(import.meta.dirname, '..', 'bark-server.py')
export class Bark_Tts {
constructor({
model = BARK_MODEL,
voice = BARK_VOICE,
} = {}) {
this._model = model
this._voice = voice
this._proc = null
this._rl = null
this._resolve = null // resolver for the current in-flight request
}
/** Spawn bark-server.py and wait until it signals "ready". */
init() {
return new Promise((resolve, reject) => {
this._proc = spawn(SERVER, [this._model, this._voice], {
stdio: ['pipe', 'pipe', 'inherit'],
})
this._proc.on('error', reject)
this._proc.on('close', (code) => {
if (code !== 0 && code !== null) {
process.stderr.write(`[bark] server exited with code ${code}\n`)
}
})
this._rl = readline.createInterface({ input: this._proc.stdout })
this._rl.on('line', (line) => {
if (line === 'ready') {
resolve()
return
}
if (line === 'ok' && this._resolve) {
const res = this._resolve
this._resolve = null
res()
}
})
})
}
/** Preprocess markdown and speak as a single request. */
async speak(text, { voice = this._voice, preprocess = true } = {}) {
const clean = preprocess ? markdown_to_bark(text) : text
return this._send(clean, voice)
}
/**
* Preprocess markdown and speak sentence by sentence.
* Lower latency — first sentence starts playing while rest are queued.
*/
async speak_streaming(text, opts = {}) {
const clean = opts.preprocess !== false ? markdown_to_bark(text) : text
const sentences = split_sentences(clean)
for (const s of sentences) {
await this._send(s, opts.voice ?? this._voice)
}
}
_send(text, voice) {
return new Promise((resolve, reject) => {
if (!this._proc) {
return reject(new Error('Bark_Tts not initialized — call init() first'))
}
this._resolve = resolve
const payload = JSON.stringify({ text, voice }) + '\n'
this._proc.stdin.write(payload)
})
}
stop() {
this._rl?.close()
this._proc?.kill()
this._proc = null
this._rl = null
}
}