claude-voice-experiment/lib/markdown.mjs

/**
 * Convert markdown to Bark-friendly plain text.
 *
 * Bark emphasis conventions:
 *   UPPERCASE words     — stress/emphasis
 *   ...                 — hesitation / trailing off
 *   [laughs] [sighs]    — paralinguistic tokens (pass through as-is)
 *
 * Markdown mapping:
 *   **bold** / __bold__ → UPPERCASE  (strong emphasis)
 *   *italic* / _italic_ → unchanged  (soft emphasis, Bark handles naturally)
 *   # Heading           → text + pause via ".\n"
 *   `code`              → unchanged (spoken literally)
 *   ```block```         → skipped
 *   [text](url)         → text only
 *   ---                 → "..." (pause)
 *   - item / 1. item    → item text (bullet stripped)
 */

export function markdown_to_bark(text) {
    // Remove fenced code blocks entirely
    text = text.replace(/```[\s\S]*?```/g, '')

    // Inline code — keep the content, drop backticks
    text = text.replace(/`([^`]+)`/g, '$1')

    // Links — keep visible text
    text = text.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1')

    // Images — drop
    text = text.replace(/!\[([^\]]*)\]\([^)]*\)/g, '')

    // **bold** / __bold__ → UPPERCASE
    text = text.replace(/\*\*([^*]+)\*\*/g, (_, w) => w.toUpperCase())
    text = text.replace(/__([^_]+)__/g, (_, w) => w.toUpperCase())

    // *italic* / _italic_ — strip markers, keep text
    text = text.replace(/\*([^*]+)\*/g, '$1')
    text = text.replace(/_([^_]+)_/g, '$1')

    // Headings — keep text, add a beat after
    text = text.replace(/^#{1,6}\s+(.+)$/gm, '$1.')

    // Horizontal rules → pause
    text = text.replace(/^[-*_]{3,}\s*$/gm, '...')

    // List bullets / numbers — strip prefix
    text = text.replace(/^\s*[-*+]\s+/gm, '')
    text = text.replace(/^\s*\d+\.\s+/gm, '')

    // Collapse excess blank lines
    text = text.replace(/\n{3,}/g, '\n\n')

    return text.trim()
}

/**
 * Split text into sentences suitable for sequential TTS.
 * Keeps sentence-ending punctuation with the preceding sentence.
 */
export function split_sentences(text) {
    return text
        .split(/(?<=[.!?])\s+/)
        .map(s => s.trim())
        .filter(s => s.length > 0)
}