Files
claude-voice-experiment/lib/markdown.mjs
mikael-lovqvists-claude-agent db8889aeed Initial commit — voice pipeline experiment
STT (Silero VAD + Whisper via sherpa-onnx), Chatterbox TTS HTTP server,
query completeness classifier (Ollama), multi-voice demo scripts, and
planning docs. Kept as reference; clean rewrite planned in separate repos.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 04:48:54 +00:00

67 lines
2.1 KiB
JavaScript

/**
* Convert markdown to Bark-friendly plain text.
*
* Bark emphasis conventions:
* UPPERCASE words — stress/emphasis
* ... — hesitation / trailing off
* [laughs] [sighs] — paralinguistic tokens (pass through as-is)
*
* Markdown mapping:
* **bold** / __bold__ → UPPERCASE (strong emphasis)
* *italic* / _italic_ → unchanged (soft emphasis, Bark handles naturally)
* # Heading → text + pause via ".\n"
* `code` → unchanged (spoken literally)
* ```block``` → skipped
* [text](url) → text only
* --- → "..." (pause)
* - item / 1. item → item text (bullet stripped)
*/
export function markdown_to_bark(text) {
// Remove fenced code blocks entirely
text = text.replace(/```[\s\S]*?```/g, '')
// Inline code — keep the content, drop backticks
text = text.replace(/`([^`]+)`/g, '$1')
// Links — keep visible text
text = text.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1')
// Images — drop
text = text.replace(/!\[([^\]]*)\]\([^)]*\)/g, '')
// **bold** / __bold__ → UPPERCASE
text = text.replace(/\*\*([^*]+)\*\*/g, (_, w) => w.toUpperCase())
text = text.replace(/__([^_]+)__/g, (_, w) => w.toUpperCase())
// *italic* / _italic_ — strip markers, keep text
text = text.replace(/\*([^*]+)\*/g, '$1')
text = text.replace(/_([^_]+)_/g, '$1')
// Headings — keep text, add a beat after
text = text.replace(/^#{1,6}\s+(.+)$/gm, '$1.')
// Horizontal rules → pause
text = text.replace(/^[-*_]{3,}\s*$/gm, '...')
// List bullets / numbers — strip prefix
text = text.replace(/^\s*[-*+]\s+/gm, '')
text = text.replace(/^\s*\d+\.\s+/gm, '')
// Collapse excess blank lines
text = text.replace(/\n{3,}/g, '\n\n')
return text.trim()
}
/**
* Split text into sentences suitable for sequential TTS.
* Keeps sentence-ending punctuation with the preceding sentence.
*/
export function split_sentences(text) {
return text
.split(/(?<=[.!?])\s+/)
.map(s => s.trim())
.filter(s => s.length > 0)
}