- lib/pending-query.mjs: new state machine for query accumulation wake word, silence timer, send/cancel/pause/resume, instant dispatch, mode toggle (always listen / stop listening), mode query - query-demo.mjs: refactored to use Pending_Query; wake word on by default with silence timer; chimes for dispatch/working/cancel/activate - tts-server.mjs: track last_speak_at, expose /activity endpoint, chime playback via Python queue (soundfile + librosa), preload on startup - chatterbox-server.py: chime and preload commands via stdin protocol - lib/chatterbox-tts.mjs: play_chime and preload_chime methods - test-chime.mjs: simple chime test script - voices.yaml: configured ready/cancel/working/dispatch chimes - CLEANUP-PLAN.md: updated with current state, command vocabulary, future plans Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
131 lines
4.8 KiB
JavaScript
131 lines
4.8 KiB
JavaScript
/**
|
||
* Voice query demo <20><> accumulates STT utterances into a query, checks
|
||
* completeness with a local LLM classifier, then dispatches to Claude Code.
|
||
*
|
||
* Usage:
|
||
* node query-demo.mjs [--audio-prompt voice.wav] [--whisper model] [--tmux-container-id 0x...] [--claude-remote /path/to/claude-remote.mjs]
|
||
*
|
||
* If tmux-container-id and --claude-remote are supplied, completed queries are
|
||
* dispatched to Claude Code. Otherwise they are only logged to stderr.
|
||
*
|
||
* Wake word mode (default on): say "computer" to activate, then speak your query.
|
||
* End with a send-word (go/done/send) to dispatch, or abort/cancel to discard.
|
||
* Pass --wake-word off to disable and always accumulate.
|
||
*/
|
||
|
||
|
||
|
||
import { execFileSync } from 'node:child_process'
|
||
import { Stt } from './lib/stt.mjs'
|
||
import { Tts_Client } from './lib/tts-client.mjs'
|
||
import { Pending_Query } from './lib/pending-query.mjs'
|
||
|
||
function get_arg(name) {
|
||
const i = process.argv.indexOf(name)
|
||
return i !== -1 ? process.argv[i + 1] : null
|
||
}
|
||
|
||
const audio_prompt = get_arg('--audio-prompt') ?? '/home/devilholk/Documents/rommie-sample.wav'
|
||
const whisper_name = get_arg('--whisper') ?? 'base.en'
|
||
const container_id = get_arg('--tmux-container-id')
|
||
const claude_remote = get_arg('--claude-remote')
|
||
|
||
const SILENCE_TIMEOUT = parseInt(get_arg('--silence-timeout') ?? '6000')
|
||
|
||
|
||
const tts = new Tts_Client()
|
||
const stt = new Stt({ whisper_name })
|
||
|
||
const USE_WAKE_WORD = get_arg('--wake-word') !== 'off'
|
||
|
||
stt.init()
|
||
process.stderr.write(`[query-demo] whisper model: ${whisper_name}\n`)
|
||
process.stderr.write(`[query-demo] voice: ${audio_prompt}\n`)
|
||
if (container_id && claude_remote) {
|
||
process.stderr.write(`[query-demo] dispatch: container ${container_id} via ${claude_remote}\n`)
|
||
} else {
|
||
process.stderr.write(`[query-demo] no --tmux-container-id/--claude-remote — logging only\n`)
|
||
}
|
||
process.stderr.write(`[query-demo] wake word: ${USE_WAKE_WORD ? 'on (say "computer" to activate)' : 'off'}\n`)
|
||
if (USE_WAKE_WORD) {
|
||
await tts.speak('Ready. Say computer to begin a query, or always listen for hands-free mode. Say help for usage.', { audio_prompt })
|
||
} else {
|
||
await tts.speak('Always listening. Say go to send, cancel to discard, or always listen to switch to wake-word mode. Say help for usage.', { audio_prompt })
|
||
}
|
||
|
||
function make_prompt(query) {
|
||
return [
|
||
'[voice-buddy] You have received a voice query.',
|
||
'',
|
||
'This is a voice interface. When you are done, use the `speak` shell command to inform the user of the outcome — keep it brief and spoken-word natural.',
|
||
'If the task is a question, speak the answer directly.',
|
||
'If the task is an action, carry it out and then speak a short confirmation.',
|
||
'If the task will take more than a few seconds, use `speak` to say a brief acknowledgement BEFORE starting work, so the user knows you received it.',
|
||
'',
|
||
'--- Query ---',
|
||
query,
|
||
'--- End of query ---',
|
||
].join('\n')
|
||
}
|
||
|
||
function dispatch(query) {
|
||
execFileSync('node', [claude_remote, '--tmux-container-id', container_id], {
|
||
input: make_prompt(query),
|
||
encoding: 'utf8',
|
||
stdio: ['pipe', 'inherit', 'inherit'],
|
||
})
|
||
}
|
||
|
||
|
||
const pending = new Pending_Query({
|
||
silence_timeout: SILENCE_TIMEOUT,
|
||
use_wake_word: USE_WAKE_WORD,
|
||
use_timer: USE_WAKE_WORD,
|
||
on_activate: async () => {
|
||
process.stderr.write('[query-demo] activated\n')
|
||
await tts.chime('ready').catch(() => {})
|
||
},
|
||
on_mode_query: async ({ use_wake_word, active }) => {
|
||
const msg = use_wake_word
|
||
? (active ? 'Listening for your query.' : 'Wake word mode. Say computer to begin.')
|
||
: 'Always listening.'
|
||
await tts.speak(msg, { audio_prompt })
|
||
},
|
||
on_mode_change: async ({ use_wake_word }) => {
|
||
const msg = use_wake_word ? 'Wake word mode. Say computer to begin.' : 'Always listening.'
|
||
process.stderr.write(`[query-demo] mode: ${use_wake_word ? 'wake word' : 'always listening'}\n`)
|
||
await tts.speak(msg, { audio_prompt })
|
||
},
|
||
on_submit: async (text) => {
|
||
process.stderr.write(`[query-demo] query: ${JSON.stringify(text)}\n`)
|
||
if (container_id && claude_remote) {
|
||
await tts.chime('dispatch').catch(() => {})
|
||
const dispatch_time = Date.now()
|
||
dispatch(text)
|
||
setTimeout(async () => {
|
||
try {
|
||
const res = await fetch(`${tts._url}/activity`)
|
||
const { last_speak_at } = await res.json()
|
||
if (last_speak_at < dispatch_time) {
|
||
await tts.chime('working').catch(() => {})
|
||
}
|
||
} catch {
|
||
await tts.chime('working').catch(() => {})
|
||
}
|
||
}, 4000)
|
||
}
|
||
},
|
||
on_cancel: async () => {
|
||
process.stderr.write('[query-demo] query cancelled\n')
|
||
await tts.chime('cancel').catch(() => tts.speak('Cancelled.', { audio_prompt }))
|
||
},
|
||
on_empty_submit: async () => {
|
||
process.stderr.write('[query-demo] submit with nothing accumulated\n')
|
||
await tts.speak('Nothing to send.', { audio_prompt })
|
||
},
|
||
})
|
||
|
||
stt.listen(async (text) => {
|
||
await pending.process_utterance(text)
|
||
}, { on_audio: (chunk) => pending.on_audio(chunk) })
|