Files
claude-voice-experiment/query-demo.mjs
mikael-lovqvists-claude-agent 20873be786 Add README, faster-whisper backend, and session fixes
- README explaining experimental/transparency purpose
- faster-whisper STT backend (fw-stt.mjs, faster-whisper-server.py, install-faster-whisper.sh)
- Bug fixes: Buffer alignment in on_audio, --debug-waveform URL parsing, silent fetch errors, instant dispatch timer leak
- Global uncaughtException/unhandledRejection handlers in query-demo.mjs
- Design docs: CHANGELOG, COMMAND-DISPATCH, INTERFACE-THEORY, VOICE-POLICY
- Systemd service unit templates

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-07 06:39:14 +00:00

153 lines
5.7 KiB
JavaScript
Raw Permalink Blame History

/**
* Voice query demo <20><> accumulates STT utterances into a query, checks
* completeness with a local LLM classifier, then dispatches to Claude Code.
*
* Usage:
* node query-demo.mjs [--audio-prompt voice.wav] [--whisper model] [--tmux-container-id 0x...] [--claude-remote /path/to/claude-remote.mjs]
*
* If tmux-container-id and --claude-remote are supplied, completed queries are
* dispatched to Claude Code. Otherwise they are only logged to stderr.
*
* Wake word mode (default on): say "computer" to activate, then speak your query.
* End with a send-word (go/done/send) to dispatch, or abort/cancel to discard.
* Pass --wake-word off to disable and always accumulate.
*/
import { execFileSync } from 'node:child_process'
process.on('uncaughtException', err => {
process.stderr.write(`[uncaughtException] ${err.stack ?? err.message}\n`)
})
process.on('unhandledRejection', (reason) => {
process.stderr.write(`[unhandledRejection] ${reason?.stack ?? reason}\n`)
})
import { Tts_Client } from './lib/tts-client.mjs'
import { Pending_Query } from './lib/pending-query.mjs'
function get_arg(name) {
const i = process.argv.indexOf(name)
return i !== -1 ? process.argv[i + 1] : null
}
const audio_prompt = get_arg('--audio-prompt') ?? '/home/devilholk/Documents/rommie-sample.wav'
const whisper_name = get_arg('--whisper') ?? 'base.en'
const container_id = get_arg('--tmux-container-id')
const claude_remote = get_arg('--claude-remote')
const _dw = get_arg('--debug-waveform')
const debug_waveform = (_dw && !_dw.startsWith('--'))
? _dw
: (process.argv.includes('--debug-waveform') ? 'http://localhost:3888/emit/audio-debug' : null)
const stt_backend = get_arg('--stt') ?? 'sherpa-onnx'
const SILENCE_TIMEOUT = parseInt(get_arg('--silence-timeout') ?? '6000')
const { Stt } = stt_backend === 'faster-whisper'
? await import('./lib/fw-stt.mjs')
: await import('./lib/stt.mjs')
const tts = new Tts_Client()
const stt = new Stt({ whisper_name, debug_url: debug_waveform })
const USE_WAKE_WORD = get_arg('--wake-word') !== 'off'
stt.init()
process.stderr.write(`[query-demo] whisper model: ${whisper_name}\n`)
process.stderr.write(`[query-demo] voice: ${audio_prompt}\n`)
if (container_id && claude_remote) {
process.stderr.write(`[query-demo] dispatch: container ${container_id} via ${claude_remote}\n`)
} else {
process.stderr.write(`[query-demo] no --tmux-container-id/--claude-remote — logging only\n`)
}
process.stderr.write(`[query-demo] wake word: ${USE_WAKE_WORD ? 'on (say "computer" to activate)' : 'off'}\n`)
process.stderr.write(`[query-demo] stt backend: ${stt_backend}\n`)
if (debug_waveform) {
process.stderr.write(`[query-demo] debug waveform: ${debug_waveform}\n`)
}
if (USE_WAKE_WORD) {
await tts.speak('Ready. Say computer to begin a query, or always listen for hands-free mode. Say help for usage.', { audio_prompt })
} else {
await tts.speak('Always listening. Say go to send, cancel to discard, or always listen to switch to wake-word mode. Say help for usage.', { audio_prompt })
}
function make_prompt(query) {
return [
'[voice-buddy] You have received a voice query.',
'',
'This is a voice interface. When you are done, use the `speak` shell command to inform the user of the outcome — keep it brief and spoken-word natural.',
'If the task is a question, speak the answer directly.',
'If the task is an action, carry it out and then speak a short confirmation.',
'If the task will take more than a few seconds, use `speak` to say a brief acknowledgement BEFORE starting work, so the user knows you received it.',
'',
'--- Query ---',
query,
'--- End of query ---',
].join('\n')
}
function dispatch(query) {
execFileSync('node', [claude_remote, '--tmux-container-id', container_id], {
input: make_prompt(query),
encoding: 'utf8',
stdio: ['pipe', 'inherit', 'inherit'],
})
}
const pending = new Pending_Query({
silence_timeout: SILENCE_TIMEOUT,
use_wake_word: USE_WAKE_WORD,
use_timer: USE_WAKE_WORD,
on_activate: async () => {
process.stderr.write('[query-demo] activated\n')
await tts.chime('ready').catch(() => {})
},
on_mode_query: async ({ use_wake_word, active }) => {
const msg = use_wake_word
? (active ? 'Listening for your query.' : 'Wake word mode. Say computer to begin.')
: 'Always listening.'
await tts.speak(msg, { audio_prompt })
},
on_mode_change: async ({ use_wake_word }) => {
const msg = use_wake_word ? 'Wake word mode. Say computer to begin.' : 'Always listening.'
process.stderr.write(`[query-demo] mode: ${use_wake_word ? 'wake word' : 'always listening'}\n`)
await tts.speak(msg, { audio_prompt })
},
on_submit: async (text) => {
process.stderr.write(`[query-demo] query: ${JSON.stringify(text)}\n`)
if (container_id && claude_remote) {
await tts.chime('dispatch').catch(() => {})
const dispatch_time = Date.now()
dispatch(text)
setTimeout(async () => {
try {
const res = await fetch(`${tts._url}/activity`)
if (!res.ok) {
await tts.chime('working').catch(() => {})
return
}
const { last_speak_at } = await res.json()
if (!(last_speak_at >= dispatch_time)) {
await tts.chime('working').catch(() => {})
}
} catch {
await tts.chime('working').catch(() => {})
}
}, 4000)
}
},
on_cancel: async () => {
process.stderr.write('[query-demo] query cancelled\n')
await tts.chime('cancel').catch(() => tts.speak('Cancelled.', { audio_prompt }))
},
on_empty_submit: async () => {
process.stderr.write('[query-demo] submit with nothing accumulated\n')
await tts.speak('Nothing to send.', { audio_prompt })
},
})
stt.listen(async (text) => {
await pending.process_utterance(text)
}, { on_audio: (chunk) => pending.on_audio(chunk) })