/** * Voice query demo �� accumulates STT utterances into a query, checks * completeness with a local LLM classifier, then dispatches to Claude Code. * * Usage: * node query-demo.mjs [--audio-prompt voice.wav] [--whisper model] [--tmux-container-id 0x...] [--claude-remote /path/to/claude-remote.mjs] * * If tmux-container-id and --claude-remote are supplied, completed queries are * dispatched to Claude Code. Otherwise they are only logged to stderr. * * Wake word mode (default on): say "computer" to activate, then speak your query. * End with a send-word (go/done/send) to dispatch, or abort/cancel to discard. * Pass --wake-word off to disable and always accumulate. */ import { execFileSync } from 'node:child_process' import { Stt } from './lib/stt.mjs' import { Tts_Client } from './lib/tts-client.mjs' import { Pending_Query } from './lib/pending-query.mjs' function get_arg(name) { const i = process.argv.indexOf(name) return i !== -1 ? process.argv[i + 1] : null } const audio_prompt = get_arg('--audio-prompt') ?? '/home/devilholk/Documents/rommie-sample.wav' const whisper_name = get_arg('--whisper') ?? 'base.en' const container_id = get_arg('--tmux-container-id') const claude_remote = get_arg('--claude-remote') const SILENCE_TIMEOUT = parseInt(get_arg('--silence-timeout') ?? '6000') const tts = new Tts_Client() const stt = new Stt({ whisper_name }) const USE_WAKE_WORD = get_arg('--wake-word') !== 'off' stt.init() process.stderr.write(`[query-demo] whisper model: ${whisper_name}\n`) process.stderr.write(`[query-demo] voice: ${audio_prompt}\n`) if (container_id && claude_remote) { process.stderr.write(`[query-demo] dispatch: container ${container_id} via ${claude_remote}\n`) } else { process.stderr.write(`[query-demo] no --tmux-container-id/--claude-remote — logging only\n`) } process.stderr.write(`[query-demo] wake word: ${USE_WAKE_WORD ? 'on (say "computer" to activate)' : 'off'}\n`) if (USE_WAKE_WORD) { await tts.speak('Ready. Say computer to begin a query, or always listen for hands-free mode. Say help for usage.', { audio_prompt }) } else { await tts.speak('Always listening. Say go to send, cancel to discard, or always listen to switch to wake-word mode. Say help for usage.', { audio_prompt }) } function make_prompt(query) { return [ '[voice-buddy] You have received a voice query.', '', 'This is a voice interface. When you are done, use the `speak` shell command to inform the user of the outcome — keep it brief and spoken-word natural.', 'If the task is a question, speak the answer directly.', 'If the task is an action, carry it out and then speak a short confirmation.', 'If the task will take more than a few seconds, use `speak` to say a brief acknowledgement BEFORE starting work, so the user knows you received it.', '', '--- Query ---', query, '--- End of query ---', ].join('\n') } function dispatch(query) { execFileSync('node', [claude_remote, '--tmux-container-id', container_id], { input: make_prompt(query), encoding: 'utf8', stdio: ['pipe', 'inherit', 'inherit'], }) } const pending = new Pending_Query({ silence_timeout: SILENCE_TIMEOUT, use_wake_word: USE_WAKE_WORD, use_timer: USE_WAKE_WORD, on_activate: async () => { process.stderr.write('[query-demo] activated\n') await tts.chime('ready').catch(() => {}) }, on_mode_query: async ({ use_wake_word, active }) => { const msg = use_wake_word ? (active ? 'Listening for your query.' : 'Wake word mode. Say computer to begin.') : 'Always listening.' await tts.speak(msg, { audio_prompt }) }, on_mode_change: async ({ use_wake_word }) => { const msg = use_wake_word ? 'Wake word mode. Say computer to begin.' : 'Always listening.' process.stderr.write(`[query-demo] mode: ${use_wake_word ? 'wake word' : 'always listening'}\n`) await tts.speak(msg, { audio_prompt }) }, on_submit: async (text) => { process.stderr.write(`[query-demo] query: ${JSON.stringify(text)}\n`) if (container_id && claude_remote) { await tts.chime('dispatch').catch(() => {}) const dispatch_time = Date.now() dispatch(text) setTimeout(async () => { try { const res = await fetch(`${tts._url}/activity`) const { last_speak_at } = await res.json() if (last_speak_at < dispatch_time) { await tts.chime('working').catch(() => {}) } } catch { await tts.chime('working').catch(() => {}) } }, 4000) } }, on_cancel: async () => { process.stderr.write('[query-demo] query cancelled\n') await tts.chime('cancel').catch(() => tts.speak('Cancelled.', { audio_prompt })) }, on_empty_submit: async () => { process.stderr.write('[query-demo] submit with nothing accumulated\n') await tts.speak('Nothing to send.', { audio_prompt }) }, }) stt.listen(async (text) => { await pending.process_utterance(text) }, { on_audio: (chunk) => pending.on_audio(chunk) })