Compare commits
7 Commits
bdae4c047f
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| cfbbd528d3 | |||
| f4ae96c6b9 | |||
| b24414c3f3 | |||
| e15ba903c1 | |||
| 6357ff6a58 | |||
| 26837bec6a | |||
| f6ff8c72e8 |
@@ -10,6 +10,11 @@ This project aims to provide text to voice with voice cloning ability. It is usi
|
|||||||
This project started as a [vibe-coded](https://en.wikipedia.org/wiki/Vibe_coding) [experiment](https://gitea.efforting.tech/mikael-lovqvists-claude-agent/claude-voice-experiment) but this version is somewhat more hands on.
|
This project started as a [vibe-coded](https://en.wikipedia.org/wiki/Vibe_coding) [experiment](https://gitea.efforting.tech/mikael-lovqvists-claude-agent/claude-voice-experiment) but this version is somewhat more hands on.
|
||||||
|
|
||||||
|
|
||||||
|
## Running
|
||||||
|
|
||||||
|
The quickest way to test this is to setup according to the instructions below and then use the example scripts under [`examples/`](./examples).
|
||||||
|
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
### Setup [venv](https://docs.python.org/3/library/venv.html) for [python](https://www.python.org/)
|
### Setup [venv](https://docs.python.org/3/library/venv.html) for [python](https://www.python.org/)
|
||||||
|
|||||||
@@ -1,13 +1,39 @@
|
|||||||
#!/usr/bin/env -S bash -c 'exec "$(dirname "$0")/venv/bin/python3" "$0" "$@"'
|
#!/usr/bin/env -S bash -c 'exec "$(dirname "$0")/venv/bin/python3" "$0" "$@"'
|
||||||
"""
|
"""
|
||||||
Chatterbox TTS server — keeps model loaded, reads JSON lines from stdin.
|
Chatterbox TTS WebSocket server — keeps model loaded, exposes a JSON WebSocket API.
|
||||||
|
|
||||||
Protocol:
|
Connect to ws://host:TTS_PORT (default ws://localhost:11500).
|
||||||
stdin: {"text": "...", "temperature": 0.8, "top_p": 0.95}
|
|
||||||
{"chime": "/path/to/file.wav"}
|
Client → Server:
|
||||||
{"preload": "/path/to/file.wav"}
|
{"type": "speak", "id"?: N, "text": "...", ...generation_opts}
|
||||||
stdout: "ok\n" after each utterance is generated (playback may still be in progress)
|
{"type": "chime", "id"?: N, "path": "..."}
|
||||||
stderr: status/timing messages
|
{"type": "preload", "path": "..."}
|
||||||
|
{"type": "abort_current"} — kill active playback, advance to next queued item
|
||||||
|
{"type": "abort_all"} — kill active playback + drain all queues
|
||||||
|
{"type": "terminate"}
|
||||||
|
|
||||||
|
Server → requesting client:
|
||||||
|
{"status": "ok", "id": N} (speak/chime)
|
||||||
|
{"status": "ok"}
|
||||||
|
{"status": "error", "message": "..."}
|
||||||
|
|
||||||
|
Server → all clients (broadcast):
|
||||||
|
{"event": "queued", "id": N}
|
||||||
|
{"event": "started", "id": N}
|
||||||
|
{"event": "finished", "id": N}
|
||||||
|
{"event": "aborted", "id": N}
|
||||||
|
{"event": "error", "id": N, "message": "..."}
|
||||||
|
|
||||||
|
Generation options (speak):
|
||||||
|
temperature, top_p, top_k, repetition_penalty, min_p
|
||||||
|
audio_prompt — path to reference WAV for voice cloning
|
||||||
|
exaggeration — 0.0-1.0, full model only
|
||||||
|
cfg_weight — full model only
|
||||||
|
|
||||||
|
Environment:
|
||||||
|
TTS_PORT port to listen on (default: 11500)
|
||||||
|
HF_TOKEN_FILE path to HuggingFace token file (default: ~/.secrets/hugging-face.token)
|
||||||
|
HF_HUB_CACHE path to HuggingFace hub cache (default: ~/.cache/huggingface/hub)
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
./chatterbox-server.py
|
./chatterbox-server.py
|
||||||
@@ -16,9 +42,6 @@ Usage:
|
|||||||
|
|
||||||
Paralinguistic tags supported in text:
|
Paralinguistic tags supported in text:
|
||||||
[laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp]
|
[laugh] [chuckle] [cough] [clear throat] [sigh] [shush] [groan] [sniff] [gasp]
|
||||||
|
|
||||||
Full model only:
|
|
||||||
exaggeration 0.0-1.0 emotion intensity (ignored in turbo)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -28,6 +51,11 @@ import time
|
|||||||
import queue
|
import queue
|
||||||
import threading
|
import threading
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import traceback
|
||||||
|
import tempfile
|
||||||
|
import asyncio
|
||||||
|
import itertools
|
||||||
|
from pathlib import Path
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
TOKEN_FILE = os.environ.get('HF_TOKEN_FILE', os.path.expanduser('~/.secrets/hugging-face.token'))
|
TOKEN_FILE = os.environ.get('HF_TOKEN_FILE', os.path.expanduser('~/.secrets/hugging-face.token'))
|
||||||
@@ -37,35 +65,33 @@ try:
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def find_hf_cache(repo_id):
|
|
||||||
"""Return the local snapshot path if the model is already cached, else None."""
|
|
||||||
from pathlib import Path
|
|
||||||
cache_dir = Path(os.environ.get('HF_HUB_CACHE', os.path.expanduser('~/.cache/huggingface/hub')))
|
|
||||||
|
|
||||||
repo_dir = cache_dir / f"models--{repo_id.replace('/', '--')}" / 'snapshots'
|
def find_hf_cache(repo_id):
|
||||||
|
cache_dir = Path(os.environ.get('HF_HUB_CACHE', os.path.expanduser('~/.cache/huggingface/hub')))
|
||||||
|
repo_dir = cache_dir / f"models--{repo_id.replace('/', '--')}" / 'snapshots'
|
||||||
if repo_dir.exists():
|
if repo_dir.exists():
|
||||||
snapshots = sorted(repo_dir.iterdir(), key=lambda p: p.stat().st_mtime)
|
snapshots = sorted(repo_dir.iterdir(), key=lambda p: p.stat().st_mtime)
|
||||||
if snapshots:
|
if snapshots:
|
||||||
return str(snapshots[-1])
|
return str(snapshots[-1])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
VARIANT = sys.argv[1] if len(sys.argv) > 1 else 'turbo'
|
|
||||||
|
VARIANT = sys.argv[1] if len(sys.argv) > 1 else 'turbo'
|
||||||
|
PORT = int(os.environ.get('TTS_PORT', 11500))
|
||||||
SAMPLE_RATE = 24000
|
SAMPLE_RATE = 24000
|
||||||
|
|
||||||
|
|
||||||
def log(msg):
|
def log(msg):
|
||||||
print(f'[chatterbox] {msg}', file=sys.stderr, flush=True)
|
print(f'[chatterbox] {msg}', file=sys.stderr, flush=True)
|
||||||
|
|
||||||
|
|
||||||
log(f'loading chatterbox-{VARIANT}...')
|
log(f'loading chatterbox-{VARIANT}...')
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
|
|
||||||
import tempfile
|
|
||||||
import traceback
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
import torch
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
import librosa as _librosa
|
import librosa as _librosa
|
||||||
|
|
||||||
# librosa.resample returns float64 in newer numpy — patch it to always return float32
|
|
||||||
_orig_resample = _librosa.resample
|
_orig_resample = _librosa.resample
|
||||||
def _resample_float32(*args, **kwargs):
|
def _resample_float32(*args, **kwargs):
|
||||||
return _orig_resample(*args, **kwargs).astype(np.float32)
|
return _orig_resample(*args, **kwargs).astype(np.float32)
|
||||||
@@ -92,48 +118,59 @@ else:
|
|||||||
model = Model.from_pretrained(device=device)
|
model = Model.from_pretrained(device=device)
|
||||||
|
|
||||||
log(f'ready on {device} ({time.time() - t0:.1f}s load time)')
|
log(f'ready on {device} ({time.time() - t0:.1f}s load time)')
|
||||||
print('ready', flush=True)
|
|
||||||
|
_wav_cache = {}
|
||||||
|
_chime_cache = {}
|
||||||
|
_gen_lock = threading.Lock()
|
||||||
|
|
||||||
|
_SENTINEL = object()
|
||||||
|
|
||||||
|
_id_counter = itertools.count(1)
|
||||||
|
def _next_id():
|
||||||
|
return next(_id_counter)
|
||||||
|
|
||||||
|
_job_queue = queue.Queue() # dicts: {'id', 'type', ...}
|
||||||
|
_playback_queue = queue.Queue() # dicts: {'id', 'samples'}
|
||||||
|
|
||||||
|
_current_proc = None
|
||||||
|
_current_proc_lock = threading.Lock()
|
||||||
|
_abort_flag = threading.Event()
|
||||||
|
|
||||||
|
_ws_clients = set() # asyncio.Queue per connected client
|
||||||
|
_ws_clients_lock = threading.Lock()
|
||||||
|
_ws_loop = None
|
||||||
|
|
||||||
|
|
||||||
_wav_cache = {}
|
def broadcast(event):
|
||||||
|
if _ws_loop is None:
|
||||||
|
return
|
||||||
|
msg = json.dumps(event)
|
||||||
|
with _ws_clients_lock:
|
||||||
|
clients = list(_ws_clients)
|
||||||
|
for q in clients:
|
||||||
|
_ws_loop.call_soon_threadsafe(q.put_nowait, msg)
|
||||||
|
|
||||||
|
|
||||||
def ensure_float32_wav(path):
|
def ensure_float32_wav(path):
|
||||||
"""Re-save audio as float32 mono WAV to work around librosa/numpy float64 issue.
|
|
||||||
Result is cached by input path so repeated calls with the same file are free."""
|
|
||||||
if path in _wav_cache:
|
if path in _wav_cache:
|
||||||
return _wav_cache[path]
|
return _wav_cache[path]
|
||||||
wav, sr = sf.read(path, dtype='float32', always_2d=True)
|
wav, sr = sf.read(path, dtype='float32', always_2d=True)
|
||||||
wav = wav.mean(axis=1) # stereo → mono if needed
|
wav = wav.mean(axis=1)
|
||||||
tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
||||||
sf.write(tmp.name, wav, sr, subtype='FLOAT')
|
sf.write(tmp.name, wav, sr, subtype='FLOAT')
|
||||||
_wav_cache[path] = tmp.name
|
_wav_cache[path] = tmp.name
|
||||||
return tmp.name
|
return tmp.name
|
||||||
|
|
||||||
|
|
||||||
_SENTINEL = object()
|
def load_chime(path):
|
||||||
|
if path in _chime_cache:
|
||||||
playback_queue = queue.Queue()
|
return _chime_cache[path]
|
||||||
|
samples, sr = sf.read(path, dtype='float32', always_2d=True)
|
||||||
|
samples = samples.mean(axis=1)
|
||||||
def playback_worker():
|
if sr != SAMPLE_RATE:
|
||||||
"""Plays audio samples in order. Runs in its own thread."""
|
samples = _librosa.resample(samples, orig_sr=sr, target_sr=SAMPLE_RATE)
|
||||||
while True:
|
_chime_cache[path] = samples
|
||||||
item = playback_queue.get()
|
return samples
|
||||||
if item is _SENTINEL:
|
|
||||||
break
|
|
||||||
samples = item
|
|
||||||
proc = subprocess.Popen(
|
|
||||||
['pacat', '--format=float32le', f'--rate={SAMPLE_RATE}', '--channels=1'],
|
|
||||||
stdin=subprocess.PIPE,
|
|
||||||
)
|
|
||||||
proc.stdin.write(samples.tobytes())
|
|
||||||
proc.stdin.close()
|
|
||||||
proc.wait()
|
|
||||||
playback_queue.task_done()
|
|
||||||
|
|
||||||
|
|
||||||
playback_thread = threading.Thread(target=playback_worker, daemon=True)
|
|
||||||
playback_thread.start()
|
|
||||||
|
|
||||||
|
|
||||||
def generate(text, opts):
|
def generate(text, opts):
|
||||||
@@ -168,68 +205,182 @@ def generate(text, opts):
|
|||||||
elapsed = time.time() - t1
|
elapsed = time.time() - t1
|
||||||
duration = len(samples) / SAMPLE_RATE
|
duration = len(samples) / SAMPLE_RATE
|
||||||
log(f'generated {duration:.1f}s audio in {elapsed:.1f}s rtf={elapsed/duration:.2f}')
|
log(f'generated {duration:.1f}s audio in {elapsed:.1f}s rtf={elapsed/duration:.2f}')
|
||||||
|
|
||||||
return samples
|
return samples
|
||||||
|
|
||||||
|
|
||||||
_chime_cache = {}
|
def generation_worker():
|
||||||
|
while True:
|
||||||
def load_chime(path):
|
item = _job_queue.get()
|
||||||
if path in _chime_cache:
|
if item is _SENTINEL:
|
||||||
return _chime_cache[path]
|
_job_queue.task_done()
|
||||||
samples, sr = sf.read(path, dtype='float32', always_2d=True)
|
break
|
||||||
samples = samples.mean(axis=1) # stereo → mono
|
job_id = item['id']
|
||||||
if sr != SAMPLE_RATE:
|
job_type = item['type']
|
||||||
samples = _librosa.resample(samples, orig_sr=sr, target_sr=SAMPLE_RATE)
|
|
||||||
_chime_cache[path] = samples
|
|
||||||
return samples
|
|
||||||
|
|
||||||
|
|
||||||
for line in sys.stdin:
|
|
||||||
line = line.strip()
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
req = json.loads(line)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
req = {'text': line}
|
|
||||||
|
|
||||||
if 'preload' in req:
|
|
||||||
try:
|
try:
|
||||||
load_chime(req['preload'])
|
if job_type == 'speak':
|
||||||
log(f'preloaded chime: {req["preload"]}')
|
with _gen_lock:
|
||||||
|
samples = generate(item['text'], item)
|
||||||
|
_playback_queue.put({'id': job_id, 'samples': samples})
|
||||||
|
elif job_type == 'chime':
|
||||||
|
samples = load_chime(item['path'])
|
||||||
|
_playback_queue.put({'id': job_id, 'samples': samples})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log(f'preload error: {e}')
|
|
||||||
print('ok', flush=True)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if 'chime' in req:
|
|
||||||
try:
|
|
||||||
samples = load_chime(req['chime'])
|
|
||||||
playback_queue.put(samples)
|
|
||||||
except Exception as e:
|
|
||||||
log(f'chime error: {e}')
|
|
||||||
traceback.print_exc(file=sys.stderr)
|
traceback.print_exc(file=sys.stderr)
|
||||||
print('ok', flush=True)
|
broadcast({'event': 'error', 'id': job_id, 'message': str(e)})
|
||||||
continue
|
_job_queue.task_done()
|
||||||
|
|
||||||
text = req.pop('text', '')
|
|
||||||
opts = req
|
|
||||||
|
|
||||||
if not text:
|
def playback_worker():
|
||||||
print('ok', flush=True)
|
global _current_proc
|
||||||
continue
|
while True:
|
||||||
|
item = _playback_queue.get()
|
||||||
|
if item is _SENTINEL:
|
||||||
|
_playback_queue.task_done()
|
||||||
|
break
|
||||||
|
job_id = item['id']
|
||||||
|
samples = item['samples']
|
||||||
|
|
||||||
|
_abort_flag.clear()
|
||||||
|
broadcast({'event': 'started', 'id': job_id})
|
||||||
|
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
['pacat', '--format=float32le', f'--rate={SAMPLE_RATE}', '--channels=1'],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
)
|
||||||
|
with _current_proc_lock:
|
||||||
|
_current_proc = proc
|
||||||
|
|
||||||
|
try:
|
||||||
|
proc.stdin.write(samples.tobytes())
|
||||||
|
proc.stdin.close()
|
||||||
|
except BrokenPipeError:
|
||||||
|
pass
|
||||||
|
proc.wait()
|
||||||
|
|
||||||
|
with _current_proc_lock:
|
||||||
|
_current_proc = None
|
||||||
|
|
||||||
|
if _abort_flag.is_set():
|
||||||
|
broadcast({'event': 'aborted', 'id': job_id})
|
||||||
|
else:
|
||||||
|
broadcast({'event': 'finished', 'id': job_id})
|
||||||
|
|
||||||
|
_playback_queue.task_done()
|
||||||
|
|
||||||
|
|
||||||
|
def abort_current():
|
||||||
|
_abort_flag.set()
|
||||||
|
with _current_proc_lock:
|
||||||
|
if _current_proc is not None:
|
||||||
|
_current_proc.kill()
|
||||||
|
|
||||||
|
|
||||||
|
def abort_all():
|
||||||
|
drained_ids = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
item = _job_queue.get_nowait()
|
||||||
|
if item is not _SENTINEL:
|
||||||
|
drained_ids.append(item['id'])
|
||||||
|
_job_queue.task_done()
|
||||||
|
except queue.Empty:
|
||||||
|
break
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
item = _playback_queue.get_nowait()
|
||||||
|
if item is not _SENTINEL:
|
||||||
|
drained_ids.append(item['id'])
|
||||||
|
_playback_queue.task_done()
|
||||||
|
except queue.Empty:
|
||||||
|
break
|
||||||
|
|
||||||
|
abort_current()
|
||||||
|
|
||||||
|
for jid in drained_ids:
|
||||||
|
broadcast({'event': 'aborted', 'id': jid})
|
||||||
|
|
||||||
|
|
||||||
|
threading.Thread(target=generation_worker, daemon=True).start()
|
||||||
|
threading.Thread(target=playback_worker, daemon=True).start()
|
||||||
|
|
||||||
|
|
||||||
|
async def _ws_handler(websocket):
|
||||||
|
q = asyncio.Queue()
|
||||||
|
with _ws_clients_lock:
|
||||||
|
_ws_clients.add(q)
|
||||||
|
|
||||||
|
async def sender():
|
||||||
|
while True:
|
||||||
|
msg = await q.get()
|
||||||
|
await websocket.send(msg)
|
||||||
|
|
||||||
|
sender_task = asyncio.create_task(sender())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
samples = generate(text, opts)
|
async for raw in websocket:
|
||||||
playback_queue.put(samples)
|
try:
|
||||||
except Exception as e:
|
msg = json.loads(raw)
|
||||||
log(f'error: {e}')
|
except json.JSONDecodeError:
|
||||||
traceback.print_exc(file=sys.stderr)
|
await websocket.send(json.dumps({'status': 'error', 'message': 'invalid JSON'}))
|
||||||
|
continue
|
||||||
|
|
||||||
print('ok', flush=True)
|
msg_type = msg.get('type', '')
|
||||||
|
|
||||||
# Drain playback before exit
|
if msg_type in ('speak', 'chime'):
|
||||||
playback_queue.put(_SENTINEL)
|
job_id = msg.get('id') or _next_id()
|
||||||
playback_thread.join()
|
job = dict(msg)
|
||||||
|
job['id'] = job_id
|
||||||
|
_job_queue.put(job)
|
||||||
|
broadcast({'event': 'queued', 'id': job_id})
|
||||||
|
await websocket.send(json.dumps({'status': 'ok', 'id': job_id}))
|
||||||
|
|
||||||
|
elif msg_type == 'preload':
|
||||||
|
path = msg.get('path', '')
|
||||||
|
try:
|
||||||
|
await asyncio.get_running_loop().run_in_executor(None, load_chime, path)
|
||||||
|
log(f'preloaded: {path}')
|
||||||
|
await websocket.send(json.dumps({'status': 'ok'}))
|
||||||
|
except Exception as e:
|
||||||
|
await websocket.send(json.dumps({'status': 'error', 'message': str(e)}))
|
||||||
|
|
||||||
|
elif msg_type == 'abort_current':
|
||||||
|
abort_current()
|
||||||
|
await websocket.send(json.dumps({'status': 'ok'}))
|
||||||
|
|
||||||
|
elif msg_type == 'abort_all':
|
||||||
|
abort_all()
|
||||||
|
await websocket.send(json.dumps({'status': 'ok'}))
|
||||||
|
|
||||||
|
elif msg_type == 'terminate':
|
||||||
|
await websocket.send(json.dumps({'status': 'ok'}))
|
||||||
|
asyncio.get_running_loop().stop()
|
||||||
|
|
||||||
|
else:
|
||||||
|
await websocket.send(json.dumps({'status': 'error', 'message': f'unknown type: {msg_type}'}))
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
with _ws_clients_lock:
|
||||||
|
_ws_clients.discard(q)
|
||||||
|
sender_task.cancel()
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
global _ws_loop
|
||||||
|
from websockets.asyncio.server import serve as ws_serve
|
||||||
|
_ws_loop = asyncio.get_running_loop()
|
||||||
|
async with ws_serve(_ws_handler, '0.0.0.0', PORT, reuse_address=True):
|
||||||
|
log(f'listening on port {PORT}')
|
||||||
|
await asyncio.Future()
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
_job_queue.put(_SENTINEL)
|
||||||
|
_playback_queue.put(_SENTINEL)
|
||||||
|
|||||||
34
examples/abort-demo.mjs
Normal file
34
examples/abort-demo.mjs
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
// Start speaking a long sentence, then abort a few seconds in.
|
||||||
|
// Usage: node abort-demo.mjs
|
||||||
|
|
||||||
|
const PORT = process.env.TTS_PORT ?? '11500'
|
||||||
|
const text = 'This is a very long sentence that will be cut off before it finishes, ' +
|
||||||
|
'because a few seconds after playback starts we will send an abort command ' +
|
||||||
|
'to demonstrate the abort current functionality of the server.'
|
||||||
|
|
||||||
|
const ws = new WebSocket(`ws://localhost:${PORT}`)
|
||||||
|
|
||||||
|
ws.addEventListener('open', () => {
|
||||||
|
ws.send(JSON.stringify({ type: 'speak', text }))
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('message', ({ data }) => {
|
||||||
|
const msg = JSON.parse(data)
|
||||||
|
console.log(msg)
|
||||||
|
|
||||||
|
if (msg.event === 'started') {
|
||||||
|
setTimeout(() => {
|
||||||
|
console.log('aborting...')
|
||||||
|
ws.send(JSON.stringify({ type: 'abort_current' }))
|
||||||
|
}, 3000)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (msg.event === 'aborted' || msg.event === 'finished' || msg.event === 'error') {
|
||||||
|
ws.close()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('error', (e) => {
|
||||||
|
console.error('error:', e.message)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
28
examples/chime.mjs
Normal file
28
examples/chime.mjs
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
// Play a chime WAV file via the Chatterbox TTS server.
|
||||||
|
// Usage: node chime.mjs /path/to/chime.wav
|
||||||
|
|
||||||
|
const PORT = process.env.TTS_PORT ?? '11500'
|
||||||
|
const path = process.argv[2]
|
||||||
|
|
||||||
|
if (!path) {
|
||||||
|
console.error('usage: node chime.mjs /path/to/chime.wav')
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ws = new WebSocket(`ws://localhost:${PORT}`)
|
||||||
|
|
||||||
|
ws.addEventListener('open', () => {
|
||||||
|
ws.send(JSON.stringify({ type: 'chime', path }))
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('message', ({ data }) => {
|
||||||
|
const msg = JSON.parse(data)
|
||||||
|
if (msg.event === 'finished' || msg.event === 'aborted' || msg.event === 'error') {
|
||||||
|
ws.close()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('error', (e) => {
|
||||||
|
console.error('error:', e.message)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
23
examples/speak.mjs
Normal file
23
examples/speak.mjs
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
// Speak text via the Chatterbox TTS server.
|
||||||
|
// Usage: node speak.mjs "Hello world"
|
||||||
|
|
||||||
|
const PORT = process.env.TTS_PORT ?? '11500'
|
||||||
|
const text = process.argv[2] ?? 'Hello from Node.'
|
||||||
|
|
||||||
|
const ws = new WebSocket(`ws://localhost:${PORT}`)
|
||||||
|
|
||||||
|
ws.addEventListener('open', () => {
|
||||||
|
ws.send(JSON.stringify({ type: 'speak', text }))
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('message', ({ data }) => {
|
||||||
|
const msg = JSON.parse(data)
|
||||||
|
if (msg.event === 'finished' || msg.event === 'aborted' || msg.event === 'error') {
|
||||||
|
ws.close()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('error', (e) => {
|
||||||
|
console.error('error:', e.message)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
24
examples/terminate.mjs
Normal file
24
examples/terminate.mjs
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
// Gracefully terminate the Chatterbox TTS server.
|
||||||
|
// Usage: node terminate.mjs
|
||||||
|
|
||||||
|
const PORT = process.env.TTS_PORT ?? '11500'
|
||||||
|
|
||||||
|
const ws = new WebSocket(`ws://localhost:${PORT}`)
|
||||||
|
|
||||||
|
ws.addEventListener('open', () => {
|
||||||
|
ws.send(JSON.stringify({ type: 'terminate' }))
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('message', ({ data }) => {
|
||||||
|
const msg = JSON.parse(data)
|
||||||
|
if (msg.status !== 'ok') {
|
||||||
|
console.error('error:', msg.message)
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
ws.close()
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('error', (e) => {
|
||||||
|
console.error('error:', e.message)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
30
examples/voice-clone.mjs
Normal file
30
examples/voice-clone.mjs
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
// Speak text using a reference WAV for voice cloning.
|
||||||
|
// The server reads the audio_prompt path from its own filesystem.
|
||||||
|
// Usage: node voice-clone.mjs /path/to/reference.wav "Text to speak"
|
||||||
|
|
||||||
|
const PORT = process.env.TTS_PORT ?? '11500'
|
||||||
|
const audio_prompt = process.argv[2]
|
||||||
|
const text = process.argv[3] ?? 'Hello, this is a cloned voice.'
|
||||||
|
|
||||||
|
if (!audio_prompt) {
|
||||||
|
console.error('usage: node voice-clone.mjs /path/to/reference.wav "text"')
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ws = new WebSocket(`ws://localhost:${PORT}`)
|
||||||
|
|
||||||
|
ws.addEventListener('open', () => {
|
||||||
|
ws.send(JSON.stringify({ type: 'speak', text, audio_prompt }))
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('message', ({ data }) => {
|
||||||
|
const msg = JSON.parse(data)
|
||||||
|
if (msg.event === 'finished' || msg.event === 'aborted' || msg.event === 'error') {
|
||||||
|
ws.close()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
ws.addEventListener('error', (e) => {
|
||||||
|
console.error('error:', e.message)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
@@ -12,7 +12,7 @@ fi
|
|||||||
|
|
||||||
echo "==> installing Python dependencies"
|
echo "==> installing Python dependencies"
|
||||||
"${VENV}/bin/pip" install --upgrade pip --quiet
|
"${VENV}/bin/pip" install --upgrade pip --quiet
|
||||||
"${VENV}/bin/pip" install chatterbox-tts
|
"${VENV}/bin/pip" install chatterbox-tts websockets
|
||||||
|
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
|||||||
Reference in New Issue
Block a user