Add Pending_Query class and voice interaction improvements

- lib/pending-query.mjs: new state machine for query accumulation wake word, silence timer, send/cancel/pause/resume, instant dispatch, mode toggle (always listen / stop listening), mode query - query-demo.mjs: refactored to use Pending_Query; wake word on by default with silence timer; chimes for dispatch/working/cancel/activate - tts-server.mjs: track last_speak_at, expose /activity endpoint, chime playback via Python queue (soundfile + librosa), preload on startup - chatterbox-server.py: chime and preload commands via stdin protocol - lib/chatterbox-tts.mjs: play_chime and preload_chime methods - test-chime.mjs: simple chime test script - voices.yaml: configured ready/cancel/working/dispatch chimes - CLEANUP-PLAN.md: updated with current state, command vocabulary, future plans Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-31 03:59:11 +00:00
parent 4fdad055e4
commit a7fa2fd218
9 changed files with 487 additions and 95 deletions
--- a/chatterbox-server.py
+++ b/chatterbox-server.py
@@ -4,6 +4,8 @@ Chatterbox TTS server — keeps model loaded, reads JSON lines from stdin.

 Protocol:
  stdin:  {"text": "...", "temperature": 0.8, "top_p": 0.95}
+          {"chime": "/path/to/file.wav"}
+          {"preload": "/path/to/file.wav"}
  stdout: "ok\n" after each utterance is generated (playback may still be in progress)
  stderr: status/timing messages

@@ -169,18 +171,50 @@ def generate(text, opts):
    return samples


+_chime_cache = {}
+
+def load_chime(path):
+    if path in _chime_cache:
+        return _chime_cache[path]
+    samples, sr = sf.read(path, dtype='float32', always_2d=True)
+    samples = samples.mean(axis=1)  # stereo → mono
+    if sr != SAMPLE_RATE:
+        samples = _librosa.resample(samples, orig_sr=sr, target_sr=SAMPLE_RATE)
+    _chime_cache[path] = samples
+    return samples
+
+
 for line in sys.stdin:
    line = line.strip()
    if not line:
        continue

    try:
-        req  = json.loads(line)
-        text = req.pop('text', '')
-        opts = req  # remaining fields are generation options
+        req = json.loads(line)
    except json.JSONDecodeError:
-        text = line
-        opts = {}
+        req = {'text': line}
+
+    if 'preload' in req:
+        try:
+            load_chime(req['preload'])
+            log(f'preloaded chime: {req["preload"]}')
+        except Exception as e:
+            log(f'preload error: {e}')
+        print('ok', flush=True)
+        continue
+
+    if 'chime' in req:
+        try:
+            samples = load_chime(req['chime'])
+            playback_queue.put(samples)
+        except Exception as e:
+            log(f'chime error: {e}')
+            traceback.print_exc(file=sys.stderr)
+        print('ok', flush=True)
+        continue
+
+    text = req.pop('text', '')
+    opts = req

    if not text:
        print('ok', flush=True)