/** * Bark TTS — Node.js wrapper around bark-server.py. * * Spawns the Python server once, keeps it alive, sends requests as JSON lines. * Markdown is preprocessed before sending: **bold** → UPPERCASE, etc. * * Usage: * const tts = new Bark_Tts() * await tts.init() // spawns server, waits for model load * await tts.speak('Hello') * await tts.speak('**Very** important point.') // emphasis via CAPS * tts.stop() * * Environment variables: * BARK_MODEL HuggingFace model id (default: suno/bark) * BARK_VOICE voice preset (default: v2/en_speaker_6) * * Bark voice presets (English): * v2/en_speaker_0 calm female * v2/en_speaker_1 calm male * v2/en_speaker_3 deep male * v2/en_speaker_6 neutral/warm (default) * v2/en_speaker_9 expressive */ import { spawn } from 'node:child_process' import * as path from 'node:path' import * as readline from 'node:readline' import { markdown_to_bark, split_sentences } from './markdown.mjs' const BARK_MODEL = process.env.BARK_MODEL || 'suno/bark' const BARK_VOICE = process.env.BARK_VOICE || 'v2/en_speaker_6' const SERVER = path.join(import.meta.dirname, '..', 'bark-server.py') export class Bark_Tts { constructor({ model = BARK_MODEL, voice = BARK_VOICE, } = {}) { this._model = model this._voice = voice this._proc = null this._rl = null this._resolve = null // resolver for the current in-flight request } /** Spawn bark-server.py and wait until it signals "ready". */ init() { return new Promise((resolve, reject) => { this._proc = spawn(SERVER, [this._model, this._voice], { stdio: ['pipe', 'pipe', 'inherit'], }) this._proc.on('error', reject) this._proc.on('close', (code) => { if (code !== 0 && code !== null) { process.stderr.write(`[bark] server exited with code ${code}\n`) } }) this._rl = readline.createInterface({ input: this._proc.stdout }) this._rl.on('line', (line) => { if (line === 'ready') { resolve() return } if (line === 'ok' && this._resolve) { const res = this._resolve this._resolve = null res() } }) }) } /** Preprocess markdown and speak as a single request. */ async speak(text, { voice = this._voice, preprocess = true } = {}) { const clean = preprocess ? markdown_to_bark(text) : text return this._send(clean, voice) } /** * Preprocess markdown and speak sentence by sentence. * Lower latency — first sentence starts playing while rest are queued. */ async speak_streaming(text, opts = {}) { const clean = opts.preprocess !== false ? markdown_to_bark(text) : text const sentences = split_sentences(clean) for (const s of sentences) { await this._send(s, opts.voice ?? this._voice) } } _send(text, voice) { return new Promise((resolve, reject) => { if (!this._proc) { return reject(new Error('Bark_Tts not initialized — call init() first')) } this._resolve = resolve const payload = JSON.stringify({ text, voice }) + '\n' this._proc.stdin.write(payload) }) } stop() { this._rl?.close() this._proc?.kill() this._proc = null this._rl = null } }