Debounce speech detection — require 3 consecutive loud chunks (~150ms)
A single transient noise no longer resets the silence timer. Only sustained audio energy counts as speech. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -81,8 +81,10 @@ async function submit(query) {
|
|||||||
|
|
||||||
const SEND_WORDS = new Set(['go', 'done', 'send'])
|
const SEND_WORDS = new Set(['go', 'done', 'send'])
|
||||||
|
|
||||||
let accumulated = ''
|
let accumulated = ''
|
||||||
let silence_timer = null
|
let silence_timer = null
|
||||||
|
let loud_chunk_streak = 0
|
||||||
|
const LOUD_STREAK_NEEDED = 3 // ~150ms of sustained loud audio to count as speech
|
||||||
|
|
||||||
function reset_silence_timer() {
|
function reset_silence_timer() {
|
||||||
clearTimeout(silence_timer)
|
clearTimeout(silence_timer)
|
||||||
@@ -123,10 +125,14 @@ stt.listen(async (text) => {
|
|||||||
await submit(query)
|
await submit(query)
|
||||||
}, { on_audio: (chunk) => {
|
}, { on_audio: (chunk) => {
|
||||||
if (!accumulated) return
|
if (!accumulated) return
|
||||||
// Only reset silence timer if audio has significant energy (not silence)
|
|
||||||
const samples = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength >> 1)
|
const samples = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength >> 1)
|
||||||
let sum = 0
|
let sum = 0
|
||||||
for (let i = 0; i < samples.length; i++) sum += samples[i] * samples[i]
|
for (let i = 0; i < samples.length; i++) sum += samples[i] * samples[i]
|
||||||
const rms = Math.sqrt(sum / samples.length) / 32768
|
const rms = Math.sqrt(sum / samples.length) / 32768
|
||||||
if (rms > 0.02) reset_silence_timer()
|
if (rms > 0.02) {
|
||||||
|
loud_chunk_streak++
|
||||||
|
if (loud_chunk_streak >= LOUD_STREAK_NEEDED) reset_silence_timer()
|
||||||
|
} else {
|
||||||
|
loud_chunk_streak = 0
|
||||||
|
}
|
||||||
} })
|
} })
|
||||||
|
|||||||
Reference in New Issue
Block a user