diff --git a/lib/stt.mjs b/lib/stt.mjs index bb5bc80..ac0c408 100644 --- a/lib/stt.mjs +++ b/lib/stt.mjs @@ -132,7 +132,7 @@ export class Stt { let pending = Buffer.alloc(0) mic.stdout.on('data', (chunk) => { - if (on_audio) on_audio() + if (on_audio) on_audio(chunk) pending = Buffer.concat([pending, chunk]) // Feed complete VAD windows diff --git a/query-demo.mjs b/query-demo.mjs index 7bf1d04..69684fd 100644 --- a/query-demo.mjs +++ b/query-demo.mjs @@ -121,4 +121,12 @@ stt.listen(async (text) => { accumulated = '' await submit(query) -}, { on_audio: () => { if (accumulated) reset_silence_timer() } }) +}, { on_audio: (chunk) => { + if (!accumulated) return + // Only reset silence timer if audio has significant energy (not silence) + const samples = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength >> 1) + let sum = 0 + for (let i = 0; i < samples.length; i++) sum += samples[i] * samples[i] + const rms = Math.sqrt(sum / samples.length) / 32768 + if (rms > 0.02) reset_silence_timer() +} })