STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server, query completeness classifier (Ollama), multi-voice demo scripts, and planning docs. Kept as reference; clean rewrite planned in separate repos. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
67 lines
2.1 KiB
JavaScript
67 lines
2.1 KiB
JavaScript
/**
|
|
* Convert markdown to Bark-friendly plain text.
|
|
*
|
|
* Bark emphasis conventions:
|
|
* UPPERCASE words — stress/emphasis
|
|
* ... — hesitation / trailing off
|
|
* [laughs] [sighs] — paralinguistic tokens (pass through as-is)
|
|
*
|
|
* Markdown mapping:
|
|
* **bold** / __bold__ → UPPERCASE (strong emphasis)
|
|
* *italic* / _italic_ → unchanged (soft emphasis, Bark handles naturally)
|
|
* # Heading → text + pause via ".\n"
|
|
* `code` → unchanged (spoken literally)
|
|
* ```block``` → skipped
|
|
* [text](url) → text only
|
|
* --- → "..." (pause)
|
|
* - item / 1. item → item text (bullet stripped)
|
|
*/
|
|
|
|
export function markdown_to_bark(text) {
|
|
// Remove fenced code blocks entirely
|
|
text = text.replace(/```[\s\S]*?```/g, '')
|
|
|
|
// Inline code — keep the content, drop backticks
|
|
text = text.replace(/`([^`]+)`/g, '$1')
|
|
|
|
// Links — keep visible text
|
|
text = text.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1')
|
|
|
|
// Images — drop
|
|
text = text.replace(/!\[([^\]]*)\]\([^)]*\)/g, '')
|
|
|
|
// **bold** / __bold__ → UPPERCASE
|
|
text = text.replace(/\*\*([^*]+)\*\*/g, (_, w) => w.toUpperCase())
|
|
text = text.replace(/__([^_]+)__/g, (_, w) => w.toUpperCase())
|
|
|
|
// *italic* / _italic_ — strip markers, keep text
|
|
text = text.replace(/\*([^*]+)\*/g, '$1')
|
|
text = text.replace(/_([^_]+)_/g, '$1')
|
|
|
|
// Headings — keep text, add a beat after
|
|
text = text.replace(/^#{1,6}\s+(.+)$/gm, '$1.')
|
|
|
|
// Horizontal rules → pause
|
|
text = text.replace(/^[-*_]{3,}\s*$/gm, '...')
|
|
|
|
// List bullets / numbers — strip prefix
|
|
text = text.replace(/^\s*[-*+]\s+/gm, '')
|
|
text = text.replace(/^\s*\d+\.\s+/gm, '')
|
|
|
|
// Collapse excess blank lines
|
|
text = text.replace(/\n{3,}/g, '\n\n')
|
|
|
|
return text.trim()
|
|
}
|
|
|
|
/**
|
|
* Split text into sentences suitable for sequential TTS.
|
|
* Keeps sentence-ending punctuation with the preceding sentence.
|
|
*/
|
|
export function split_sentences(text) {
|
|
return text
|
|
.split(/(?<=[.!?])\s+/)
|
|
.map(s => s.trim())
|
|
.filter(s => s.length > 0)
|
|
}
|