claude-voice-experiment/lib/llm.mjs

/**
 * Optional LLM cleanup via Ollama REST API.
 *
 * Cleans up raw Whisper output: fixes punctuation, capitalization,
 * removes filler words, corrects obvious recognition errors.
 *
 * Set OLLAMA_MODEL env var to choose the model (default: phi3:mini).
 * Set OLLAMA_URL env var to change the base URL (default: http://localhost:11434).
 */

const OLLAMA_URL   = process.env.OLLAMA_URL   || 'http://localhost:11434'
const OLLAMA_MODEL = process.env.OLLAMA_MODEL || 'phi3:mini'

export async function llm_available() {
	try {
		const res = await fetch(`${OLLAMA_URL}/api/tags`, {
			signal: AbortSignal.timeout(2000),
		})
		return res.ok
	} catch {
		return false
	}
}

export async function list_models() {
	const res  = await fetch(`${OLLAMA_URL}/api/tags`)
	const data = await res.json()
	return (data.models ?? []).map(m => m.name)
}

/**
 * Clean up a raw STT transcription.
 * Returns the cleaned string, or the original if the request fails.
 */
export async function cleanup(raw_text, model = OLLAMA_MODEL) {
	const prompt = [
		'You are a transcription editor. Clean up this speech-to-text output:',
		'- Fix punctuation and capitalization',
		'- Remove meaningless filler words (um, uh, like, you know)',
		'- Correct obvious word recognition errors based on context',
		'- Keep the meaning and phrasing intact',
		'- Return ONLY the cleaned text, no explanation, no quotes',
		'',
		raw_text,
	].join('\n')

	try {
		const res = await fetch(`${OLLAMA_URL}/api/generate`, {
			method:  'POST',
			headers: { 'Content-Type': 'application/json' },
			body: JSON.stringify({
				model,
				prompt,
				stream:  false,
				options: { temperature: 0.1, num_predict: 256 },
			}),
			signal: AbortSignal.timeout(10_000),
		})
		const data = await res.json()
		return data.response?.trim() || raw_text
	} catch {
		return raw_text
	}
}