Files
claude-voice-experiment/query-demo.mjs
mikael-lovqvists-claude-agent 9d2ffd1b0d Fix silence timeout never firing — gate on audio amplitude
on_audio was resetting the timer on every chunk including silence,
so the timeout never fired. Now passes the raw chunk and checks RMS;
only resets if energy is above 0.02 (speech, not ambient silence).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 05:09:20 +00:00

133 lines
4.2 KiB
JavaScript
Raw Blame History

/**
* Voice query demo <20><> accumulates STT utterances into a query, checks
* completeness with a local LLM classifier, then dispatches to Claude Code.
*
* Usage:
* node query-demo.mjs [--audio-prompt voice.wav] [--whisper model] [--window-id 0x...] [--claude-remote /path/to/claude-remote.mjs]
*
* If --window-id and --claude-remote are supplied, completed queries are
* dispatched to Claude Code. Otherwise they are only logged to stderr.
*
* A query is considered complete when:
* - The classifier says so
* - The last word is a send-word (go/done/send)
* - No new fragment arrives within SILENCE_TIMEOUT ms
*/
import { execFileSync } from 'node:child_process'
import { Stt } from './lib/stt.mjs'
import { Tts_Client } from './lib/tts-client.mjs'
import { is_query_complete } from './lib/local-query-complete.mjs'
function get_arg(name) {
const i = process.argv.indexOf(name)
return i !== -1 ? process.argv[i + 1] : null
}
const audio_prompt = get_arg('--audio-prompt') ?? '/home/devilholk/Documents/rommie-sample.wav'
const whisper_name = get_arg('--whisper') ?? 'base.en'
const window_id = get_arg('--window-id')
const claude_remote = get_arg('--claude-remote')
const SILENCE_TIMEOUT = parseInt(get_arg('--silence-timeout') ?? '6000')
const tts = new Tts_Client()
const stt = new Stt({ whisper_name })
stt.init()
process.stderr.write(`[query-demo] whisper model: ${whisper_name}\n`)
process.stderr.write(`[query-demo] voice: ${audio_prompt}\n`)
if (window_id && claude_remote) {
process.stderr.write(`[query-demo] dispatch: window ${window_id} via ${claude_remote}\n`)
} else {
process.stderr.write(`[query-demo] no --window-id/--claude-remote — logging only\n`)
}
await tts.speak('Ready for input.', { audio_prompt })
function make_prompt(query) {
return [
'[voice-buddy] You have received a voice query.',
'',
'This is a voice interface. When you are done, use the `speak` shell command to inform the user of the outcome — keep it brief and spoken-word natural.',
'If the task is a question, speak the answer directly.',
'If the task is an action, carry it out and then speak a short confirmation.',
'If the task will take more than a few seconds, use `speak` to say a brief acknowledgement BEFORE starting work, so the user knows you received it.',
'',
'--- Query ---',
query,
'--- End of query ---',
].join('\n')
}
function dispatch(query) {
execFileSync('node', [claude_remote, '--window-id', window_id], {
input: make_prompt(query),
encoding: 'utf8',
stdio: ['pipe', 'inherit', 'inherit'],
})
}
async function submit(query) {
query = query.trim()
if (!query) {
return
}
process.stderr.write(`[query-demo] query: ${JSON.stringify(query)}\n`)
if (window_id && claude_remote) {
dispatch(query)
}
await tts.speak('Efforting.', { audio_prompt })
}
const SEND_WORDS = new Set(['go', 'done', 'send'])
let accumulated = ''
let silence_timer = null
function reset_silence_timer() {
clearTimeout(silence_timer)
silence_timer = setTimeout(async () => {
if (!accumulated) {
return
}
process.stderr.write('[query-demo] silence timeout — submitting\n')
const query = accumulated
accumulated = ''
await submit(query)
}, SILENCE_TIMEOUT)
}
stt.listen(async (text) => {
accumulated = accumulated ? `${accumulated}\n${text}` : text
process.stderr.write(`[query-demo] fragment: ${JSON.stringify(accumulated)}\n`)
reset_silence_timer()
const last_line = accumulated.split('\n').at(-1)
const last_norm = last_line.toLowerCase().replace(/[^a-z]/g, '')
const forced = SEND_WORDS.has(last_norm)
if (!forced) {
const complete = await is_query_complete(last_line)
if (!complete) {
return
}
}
clearTimeout(silence_timer)
const lines = accumulated.split('\n')
const query = (forced ? lines.slice(0, -1) : lines).join('\n')
accumulated = ''
await submit(query)
}, { on_audio: (chunk) => {
if (!accumulated) return
// Only reset silence timer if audio has significant energy (not silence)
const samples = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength >> 1)
let sum = 0
for (let i = 0; i < samples.length; i++) sum += samples[i] * samples[i]
const rms = Math.sqrt(sum / samples.length) / 32768
if (rms > 0.02) reset_silence_timer()
} })