claude-voice-experiment/query-demo.mjs

/**
 * Voice query demo <20><> accumulates STT utterances into a query, checks
 * completeness with a local LLM classifier, then dispatches to Claude Code.
 *
 * Usage:
 *   node query-demo.mjs [--audio-prompt voice.wav] [--whisper model] [--tmux-container-id 0x...] [--claude-remote /path/to/claude-remote.mjs]
 *
 * If tmux-container-id and --claude-remote are supplied, completed queries are
 * dispatched to Claude Code. Otherwise they are only logged to stderr.
 *
 * Wake word mode (default on): say "computer" to activate, then speak your query.
 * End with a send-word (go/done/send) to dispatch, or abort/cancel to discard.
 * Pass --wake-word off to disable and always accumulate.
 */


import { execFileSync }    from 'node:child_process'
import { Stt }              from './lib/stt.mjs'
import { Tts_Client }       from './lib/tts-client.mjs'
import { Pending_Query }    from './lib/pending-query.mjs'

function get_arg(name) {
	const i = process.argv.indexOf(name)
	return i !== -1 ? process.argv[i + 1] : null
}

const audio_prompt   = get_arg('--audio-prompt') ?? '/home/devilholk/Documents/rommie-sample.wav'
const whisper_name   = get_arg('--whisper')      ?? 'base.en'
const container_id      = get_arg('--tmux-container-id')
const claude_remote  = get_arg('--claude-remote')

const SILENCE_TIMEOUT = parseInt(get_arg('--silence-timeout') ?? '6000')


const tts = new Tts_Client()
const stt = new Stt({ whisper_name })

const USE_WAKE_WORD = get_arg('--wake-word') !== 'off'

stt.init()
process.stderr.write(`[query-demo] whisper model: ${whisper_name}\n`)
process.stderr.write(`[query-demo] voice: ${audio_prompt}\n`)
if (container_id && claude_remote) {
	process.stderr.write(`[query-demo] dispatch: container ${container_id} via ${claude_remote}\n`)
} else {
	process.stderr.write(`[query-demo] no --tmux-container-id/--claude-remote — logging only\n`)
}
process.stderr.write(`[query-demo] wake word: ${USE_WAKE_WORD ? 'on (say "computer" to activate)' : 'off'}\n`)
if (USE_WAKE_WORD) {
	await tts.speak('Ready. Say computer to begin a query, or always listen for hands-free mode. Say help for usage.', { audio_prompt })
} else {
	await tts.speak('Always listening. Say go to send, cancel to discard, or always listen to switch to wake-word mode. Say help for usage.', { audio_prompt })
}

function make_prompt(query) {
	return [
		'[voice-buddy] You have received a voice query.',
		'',
		'This is a voice interface. When you are done, use the `speak` shell command to inform the user of the outcome — keep it brief and spoken-word natural.',
		'If the task is a question, speak the answer directly.',
		'If the task is an action, carry it out and then speak a short confirmation.',
		'If the task will take more than a few seconds, use `speak` to say a brief acknowledgement BEFORE starting work, so the user knows you received it.',
		'',
		'--- Query ---',
		query,
		'--- End of query ---',
	].join('\n')
}

function dispatch(query) {
	execFileSync('node', [claude_remote, '--tmux-container-id', container_id], {
		input:    make_prompt(query),
		encoding: 'utf8',
		stdio:    ['pipe', 'inherit', 'inherit'],
	})
}


const pending = new Pending_Query({
	silence_timeout: SILENCE_TIMEOUT,
	use_wake_word: USE_WAKE_WORD,
	use_timer: USE_WAKE_WORD,
	on_activate: async () => {
		process.stderr.write('[query-demo] activated\n')
		await tts.chime('ready').catch(() => {})
	},
	on_mode_query: async ({ use_wake_word, active }) => {
		const msg = use_wake_word
			? (active ? 'Listening for your query.' : 'Wake word mode. Say computer to begin.')
			: 'Always listening.'
		await tts.speak(msg, { audio_prompt })
	},
	on_mode_change: async ({ use_wake_word }) => {
		const msg = use_wake_word ? 'Wake word mode. Say computer to begin.' : 'Always listening.'
		process.stderr.write(`[query-demo] mode: ${use_wake_word ? 'wake word' : 'always listening'}\n`)
		await tts.speak(msg, { audio_prompt })
	},
	on_submit: async (text) => {
		process.stderr.write(`[query-demo] query: ${JSON.stringify(text)}\n`)
		if (container_id && claude_remote) {
			await tts.chime('dispatch').catch(() => {})
			const dispatch_time = Date.now()
			dispatch(text)
			setTimeout(async () => {
				try {
					const res = await fetch(`${tts._url}/activity`)
					const { last_speak_at } = await res.json()
					if (last_speak_at < dispatch_time) {
						await tts.chime('working').catch(() => {})
					}
				} catch {
					await tts.chime('working').catch(() => {})
				}
			}, 4000)
		}
	},
	on_cancel: async () => {
		process.stderr.write('[query-demo] query cancelled\n')
		await tts.chime('cancel').catch(() => tts.speak('Cancelled.', { audio_prompt }))
	},
	on_empty_submit: async () => {
		process.stderr.write('[query-demo] submit with nothing accumulated\n')
		await tts.speak('Nothing to send.', { audio_prompt })
	},
})

stt.listen(async (text) => {
	await pending.process_utterance(text)
}, { on_audio: (chunk) => pending.on_audio(chunk) })