From 4fa4baee17dae86e5a6017b9f7034234542b8683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikael=20L=C3=B6vqvist?= Date: Sun, 7 Jun 2026 10:14:56 +0200 Subject: [PATCH] Initial commit --- README.md | 18 ++++++ setup-venv-local-build.sh | 118 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 README.md create mode 100755 setup-venv-local-build.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9874b7 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# Voice to text interface + +## Overview + +This project aims to provide voice to text using faster-whisper as backend. + + +## Origin + +This project started as a [vibe-coded](https://en.wikipedia.org/wiki/Vibe_coding) [experiment](https://gitea.efforting.tech/mikael-lovqvists-claude-agent/claude-voice-experiment) but this version is somewhat more hands on. + + + +## Setup + +### Setup [venv](https://docs.python.org/3/library/venv.html) for [python](https://www.python.org/) + +We will have two different setups here depending on if you want to build ctranslate2 locally or not. This shall be documented. \ No newline at end of file diff --git a/setup-venv-local-build.sh b/setup-venv-local-build.sh new file mode 100755 index 0000000..d86aa92 --- /dev/null +++ b/setup-venv-local-build.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV="${SCRIPT_DIR}/venv" +BUILD_DIR="${SCRIPT_DIR}/build/ctranslate2" +MODEL="${1:-base.en}" +TOKEN_FILE="${HOME}/.secrets/hugging-face.token" + +# Locate CUDA +if [ -z "${CUDA_HOME:-}" ]; then + for candidate in /opt/cuda /usr/local/cuda /usr; do + if [ -f "${candidate}/bin/nvcc" ]; then + export CUDA_HOME="${candidate}" + break + fi + done +fi + +if [ -z "${CUDA_HOME:-}" ]; then + echo "ERROR: CUDA not found. Set CUDA_HOME manually." >&2 + exit 1 +fi + +echo "==> CUDA: ${CUDA_HOME}" +"${CUDA_HOME}/bin/nvcc" --version | head -1 + +for tool in cmake git; do + if ! command -v "${tool}" &>/dev/null; then + echo "ERROR: ${tool} not found — install with: sudo pacman -S ${tool}" >&2 + exit 1 + fi +done + +if [ ! -d "${VENV}" ]; then + echo "==> creating venv at ${VENV}" + python3 -m venv "${VENV}" +fi + +echo "==> upgrading pip + build tools" +"${VENV}/bin/pip" install --upgrade pip wheel setuptools pybind11 --quiet + +# --- clone (skipped if already done) --- +if [ ! -d "${BUILD_DIR}/src/.git" ]; then + echo "==> cloning ctranslate2 from source..." + mkdir -p "${BUILD_DIR}" + git clone --recursive --depth 1 https://github.com/OpenNMT/CTranslate2 "${BUILD_DIR}/src" +else + echo "==> ctranslate2 source already present, skipping clone" +fi + +# --- cmake build (skipped if library already installed) --- +if [ ! -f "${VENV}/lib/libctranslate2.so" ] && ! ls "${VENV}/lib/libctranslate2.so."* &>/dev/null 2>&1; then + echo "==> configuring ctranslate2 C++ library..." + mkdir -p "${BUILD_DIR}/cmake-build" + cmake \ + -S "${BUILD_DIR}/src" \ + -B "${BUILD_DIR}/cmake-build" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX="${VENV}" \ + -DWITH_CUDA=ON \ + -DCUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" \ + -DCMAKE_CUDA_COMPILER="${CUDA_HOME}/bin/nvcc" \ + -DWITH_MKL=OFF \ + -DBUILD_CLI=OFF \ + -DWITH_TESTS=OFF \ + -DCMAKE_POLICY_VERSION_MINIMUM=3.5 + + echo "==> building ctranslate2 C++ library (this takes 10-20 minutes)..." + cmake --build "${BUILD_DIR}/cmake-build" --parallel "$(nproc)" + cmake --install "${BUILD_DIR}/cmake-build" +else + echo "==> libctranslate2 already installed, skipping cmake build" +fi + +echo "==> verifying install..." +ls "${VENV}/include/ctranslate2/" | head -3 +ls "${VENV}/lib/libctranslate2"* 2>/dev/null || { echo "ERROR: libctranslate2 not found in venv/lib" >&2; exit 1; } +grep "WITH_CUDA" "${BUILD_DIR}/cmake-build/CMakeCache.txt" | grep -v "^#" || true + +# --- Python bindings --- +# Always reinstall from source to ensure we use our CUDA 13 build, not a PyPI wheel +echo "==> removing any existing ctranslate2 install..." +"${VENV}/bin/pip" uninstall -y ctranslate2 2>/dev/null || true + +echo "==> building ctranslate2 Python bindings from source..." +CT2_ROOT="${VENV}" \ +LIBRARY_PATH="${VENV}/lib:${VENV}/lib64${LIBRARY_PATH:+:${LIBRARY_PATH}}" \ +LDFLAGS="-Wl,-rpath,${VENV}/lib" \ + "${VENV}/bin/pip" install "${BUILD_DIR}/src/python" --no-build-isolation + +# --- faster-whisper --- +if ! "${VENV}/bin/python3" -c "import faster_whisper" &>/dev/null 2>&1; then + echo "==> installing faster-whisper" + "${VENV}/bin/pip" install faster-whisper --no-deps +else + echo "==> faster-whisper already installed, skipping" +fi + +if [ -f "${TOKEN_FILE}" ]; then + export HF_TOKEN="$(cat "${TOKEN_FILE}")" + echo "==> HuggingFace token loaded from ${TOKEN_FILE}" +else + echo "==> no token found at ${TOKEN_FILE} — unauthenticated download" +fi + +echo "==> pre-downloading model: ${MODEL}" +"${VENV}/bin/python3" - < done. Run with: node query-demo.mjs --stt faster-whisper"