STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server, query completeness classifier (Ollama), multi-voice demo scripts, and planning docs. Kept as reference; clean rewrite planned in separate repos. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
48 lines
1.2 KiB
JavaScript
48 lines
1.2 KiB
JavaScript
/**
|
|
* Read text from stdin and speak it using a voice reference clip.
|
|
*
|
|
* Usage:
|
|
* echo "Hello world" | node speak-as.mjs /path/to/voice.wav
|
|
* cat script.txt | node speak-as.mjs /path/to/voice.wav
|
|
* node speak-as.mjs /path/to/voice.wav (then type, Ctrl+D when done)
|
|
*/
|
|
|
|
import * as fs from 'node:fs'
|
|
import * as readline from 'node:readline'
|
|
import { Chatterbox_Tts } from './lib/chatterbox-tts.mjs'
|
|
|
|
const audio_prompt = process.argv[2]
|
|
|
|
if (!audio_prompt) {
|
|
process.stderr.write('Usage: node speak-as.mjs /path/to/voice.wav\n')
|
|
process.exit(1)
|
|
}
|
|
|
|
if (!fs.existsSync(audio_prompt)) {
|
|
process.stderr.write(`File not found: ${audio_prompt}\n`)
|
|
process.exit(1)
|
|
}
|
|
|
|
const tts = new Chatterbox_Tts()
|
|
await tts.init()
|
|
|
|
const rl = readline.createInterface({
|
|
input: process.stdin,
|
|
output: process.stdout,
|
|
terminal: process.stdin.isTTY,
|
|
})
|
|
|
|
const norm = s => s.toLowerCase().replace(/[^a-z0-9 ]/g, '').replace(/\s+/g, ' ').trim()
|
|
let last_norm = ''
|
|
|
|
for await (const line of rl) {
|
|
const text = line.trim()
|
|
if (!text) continue
|
|
const n = norm(text)
|
|
if (n === last_norm) continue
|
|
last_norm = n
|
|
await tts.speak(text, { audio_prompt })
|
|
}
|
|
|
|
tts.stop()
|