Debounce speech detection — require 3 consecutive loud chunks (~150ms)

A single transient noise no longer resets the silence timer.
Only sustained audio energy counts as speech.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-30 05:12:03 +00:00
parent 9d2ffd1b0d
commit 8604d7ea51

View File

@@ -83,6 +83,8 @@ const SEND_WORDS = new Set(['go', 'done', 'send'])
let accumulated = ''
let silence_timer = null
let loud_chunk_streak = 0
const LOUD_STREAK_NEEDED = 3 // ~150ms of sustained loud audio to count as speech
function reset_silence_timer() {
clearTimeout(silence_timer)
@@ -123,10 +125,14 @@ stt.listen(async (text) => {
await submit(query)
}, { on_audio: (chunk) => {
if (!accumulated) return
// Only reset silence timer if audio has significant energy (not silence)
const samples = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength >> 1)
let sum = 0
for (let i = 0; i < samples.length; i++) sum += samples[i] * samples[i]
const rms = Math.sqrt(sum / samples.length) / 32768
if (rms > 0.02) reset_silence_timer()
if (rms > 0.02) {
loud_chunk_streak++
if (loud_chunk_streak >= LOUD_STREAK_NEEDED) reset_silence_timer()
} else {
loud_chunk_streak = 0
}
} })