125 lines
3.9 KiB
JavaScript
125 lines
3.9 KiB
JavaScript
/**
|
||
* Voice query demo <20><> accumulates STT utterances into a query, checks
|
||
* completeness with a local LLM classifier, then dispatches to Claude Code.
|
||
*
|
||
* Usage:
|
||
* node query-demo.mjs [--audio-prompt voice.wav] [--whisper model] [--window-id 0x...] [--claude-remote /path/to/claude-remote.mjs]
|
||
*
|
||
* If --window-id and --claude-remote are supplied, completed queries are
|
||
* dispatched to Claude Code. Otherwise they are only logged to stderr.
|
||
*
|
||
* A query is considered complete when:
|
||
* - The classifier says so
|
||
* - The last word is a send-word (go/done/send)
|
||
* - No new fragment arrives within SILENCE_TIMEOUT ms
|
||
*/
|
||
|
||
import { execFileSync } from 'node:child_process'
|
||
import { Stt } from './lib/stt.mjs'
|
||
import { Tts_Client } from './lib/tts-client.mjs'
|
||
import { is_query_complete } from './lib/local-query-complete.mjs'
|
||
|
||
function get_arg(name) {
|
||
const i = process.argv.indexOf(name)
|
||
return i !== -1 ? process.argv[i + 1] : null
|
||
}
|
||
|
||
const audio_prompt = get_arg('--audio-prompt') ?? '/home/devilholk/Documents/rommie-sample.wav'
|
||
const whisper_name = get_arg('--whisper') ?? 'base.en'
|
||
const window_id = get_arg('--window-id')
|
||
const claude_remote = get_arg('--claude-remote')
|
||
|
||
const SILENCE_TIMEOUT = parseInt(get_arg('--silence-timeout') ?? '6000')
|
||
|
||
const tts = new Tts_Client()
|
||
const stt = new Stt({ whisper_name })
|
||
|
||
stt.init()
|
||
process.stderr.write(`[query-demo] whisper model: ${whisper_name}\n`)
|
||
process.stderr.write(`[query-demo] voice: ${audio_prompt}\n`)
|
||
if (window_id && claude_remote) {
|
||
process.stderr.write(`[query-demo] dispatch: window ${window_id} via ${claude_remote}\n`)
|
||
} else {
|
||
process.stderr.write(`[query-demo] no --window-id/--claude-remote — logging only\n`)
|
||
}
|
||
await tts.speak('Ready for input.', { audio_prompt })
|
||
|
||
function make_prompt(query) {
|
||
return [
|
||
'[voice-buddy] You have received a voice query.',
|
||
'',
|
||
'This is a voice interface. When you are done, use the `speak` shell command to inform the user of the outcome — keep it brief and spoken-word natural.',
|
||
'If the task is a question, speak the answer directly.',
|
||
'If the task is an action, carry it out and then speak a short confirmation.',
|
||
'If the task will take more than a few seconds, use `speak` to say a brief acknowledgement BEFORE starting work, so the user knows you received it.',
|
||
'',
|
||
'--- Query ---',
|
||
query,
|
||
'--- End of query ---',
|
||
].join('\n')
|
||
}
|
||
|
||
function dispatch(query) {
|
||
execFileSync('node', [claude_remote, '--window-id', window_id], {
|
||
input: make_prompt(query),
|
||
encoding: 'utf8',
|
||
stdio: ['pipe', 'inherit', 'inherit'],
|
||
})
|
||
}
|
||
|
||
async function submit(query) {
|
||
query = query.trim()
|
||
if (!query) {
|
||
return
|
||
}
|
||
process.stderr.write(`[query-demo] query: ${JSON.stringify(query)}\n`)
|
||
if (window_id && claude_remote) {
|
||
dispatch(query)
|
||
}
|
||
await tts.speak('Efforting.', { audio_prompt })
|
||
}
|
||
|
||
const SEND_WORDS = new Set(['go', 'done', 'send'])
|
||
|
||
let accumulated = ''
|
||
let silence_timer = null
|
||
|
||
function reset_silence_timer() {
|
||
clearTimeout(silence_timer)
|
||
silence_timer = setTimeout(async () => {
|
||
if (!accumulated) {
|
||
return
|
||
}
|
||
process.stderr.write('[query-demo] silence timeout — submitting\n')
|
||
const query = accumulated
|
||
accumulated = ''
|
||
await submit(query)
|
||
}, SILENCE_TIMEOUT)
|
||
}
|
||
|
||
stt.listen(async (text) => {
|
||
accumulated = accumulated ? `${accumulated}\n${text}` : text
|
||
process.stderr.write(`[query-demo] fragment: ${JSON.stringify(accumulated)}\n`)
|
||
|
||
reset_silence_timer()
|
||
|
||
const last_line = accumulated.split('\n').at(-1)
|
||
const last_norm = last_line.toLowerCase().replace(/[^a-z]/g, '')
|
||
const forced = SEND_WORDS.has(last_norm)
|
||
|
||
if (!forced) {
|
||
const complete = await is_query_complete(last_line)
|
||
if (!complete) {
|
||
return
|
||
}
|
||
}
|
||
|
||
clearTimeout(silence_timer)
|
||
|
||
const lines = accumulated.split('\n')
|
||
const query = (forced ? lines.slice(0, -1) : lines).join('\n')
|
||
accumulated = ''
|
||
|
||
await submit(query)
|
||
}, { on_audio: () => { if (accumulated) reset_silence_timer() } })
|