Initial commit

2026-06-07 10:14:56 +02:00
commit 4fa4baee17
2 changed files with 136 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,18 @@
+# Voice to text interface
+
+## Overview
+
+This project aims to provide voice to text using faster-whisper as backend.
+
+
+## Origin
+
+This project started as a [vibe-coded](https://en.wikipedia.org/wiki/Vibe_coding) [experiment](https://gitea.efforting.tech/mikael-lovqvists-claude-agent/claude-voice-experiment) but this version is somewhat more hands on.
+
+
+
+## Setup
+
+### Setup [venv](https://docs.python.org/3/library/venv.html) for [python](https://www.python.org/)
+
+We will have two different setups here depending on if you want to build ctranslate2 locally or not. This shall be documented.
--- a/setup-venv-local-build.sh
+++ b/setup-venv-local-build.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VENV="${SCRIPT_DIR}/venv"
+BUILD_DIR="${SCRIPT_DIR}/build/ctranslate2"
+MODEL="${1:-base.en}"
+TOKEN_FILE="${HOME}/.secrets/hugging-face.token"
+
+# Locate CUDA
+if [ -z "${CUDA_HOME:-}" ]; then
+	for candidate in /opt/cuda /usr/local/cuda /usr; do
+		if [ -f "${candidate}/bin/nvcc" ]; then
+			export CUDA_HOME="${candidate}"
+			break
+		fi
+	done
+fi
+
+if [ -z "${CUDA_HOME:-}" ]; then
+	echo "ERROR: CUDA not found. Set CUDA_HOME manually." >&2
+	exit 1
+fi
+
+echo "==> CUDA: ${CUDA_HOME}"
+"${CUDA_HOME}/bin/nvcc" --version | head -1
+
+for tool in cmake git; do
+	if ! command -v "${tool}" &>/dev/null; then
+		echo "ERROR: ${tool} not found — install with: sudo pacman -S ${tool}" >&2
+		exit 1
+	fi
+done
+
+if [ ! -d "${VENV}" ]; then
+	echo "==> creating venv at ${VENV}"
+	python3 -m venv "${VENV}"
+fi
+
+echo "==> upgrading pip + build tools"
+"${VENV}/bin/pip" install --upgrade pip wheel setuptools pybind11 --quiet
+
+# --- clone (skipped if already done) ---
+if [ ! -d "${BUILD_DIR}/src/.git" ]; then
+	echo "==> cloning ctranslate2 from source..."
+	mkdir -p "${BUILD_DIR}"
+	git clone --recursive --depth 1 https://github.com/OpenNMT/CTranslate2 "${BUILD_DIR}/src"
+else
+	echo "==> ctranslate2 source already present, skipping clone"
+fi
+
+# --- cmake build (skipped if library already installed) ---
+if [ ! -f "${VENV}/lib/libctranslate2.so" ] && ! ls "${VENV}/lib/libctranslate2.so."* &>/dev/null 2>&1; then
+	echo "==> configuring ctranslate2 C++ library..."
+	mkdir -p "${BUILD_DIR}/cmake-build"
+	cmake \
+		-S "${BUILD_DIR}/src" \
+		-B "${BUILD_DIR}/cmake-build" \
+		-DCMAKE_BUILD_TYPE=Release \
+		-DCMAKE_INSTALL_PREFIX="${VENV}" \
+		-DWITH_CUDA=ON \
+		-DCUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" \
+		-DCMAKE_CUDA_COMPILER="${CUDA_HOME}/bin/nvcc" \
+		-DWITH_MKL=OFF \
+		-DBUILD_CLI=OFF \
+		-DWITH_TESTS=OFF \
+		-DCMAKE_POLICY_VERSION_MINIMUM=3.5
+
+	echo "==> building ctranslate2 C++ library (this takes 10-20 minutes)..."
+	cmake --build "${BUILD_DIR}/cmake-build" --parallel "$(nproc)"
+	cmake --install "${BUILD_DIR}/cmake-build"
+else
+	echo "==> libctranslate2 already installed, skipping cmake build"
+fi
+
+echo "==> verifying install..."
+ls "${VENV}/include/ctranslate2/" | head -3
+ls "${VENV}/lib/libctranslate2"* 2>/dev/null || { echo "ERROR: libctranslate2 not found in venv/lib" >&2; exit 1; }
+grep "WITH_CUDA" "${BUILD_DIR}/cmake-build/CMakeCache.txt" | grep -v "^#" || true
+
+# --- Python bindings ---
+# Always reinstall from source to ensure we use our CUDA 13 build, not a PyPI wheel
+echo "==> removing any existing ctranslate2 install..."
+"${VENV}/bin/pip" uninstall -y ctranslate2 2>/dev/null || true
+
+echo "==> building ctranslate2 Python bindings from source..."
+CT2_ROOT="${VENV}" \
+LIBRARY_PATH="${VENV}/lib:${VENV}/lib64${LIBRARY_PATH:+:${LIBRARY_PATH}}" \
+LDFLAGS="-Wl,-rpath,${VENV}/lib" \
+	"${VENV}/bin/pip" install "${BUILD_DIR}/src/python" --no-build-isolation
+
+# --- faster-whisper ---
+if ! "${VENV}/bin/python3" -c "import faster_whisper" &>/dev/null 2>&1; then
+	echo "==> installing faster-whisper"
+	"${VENV}/bin/pip" install faster-whisper --no-deps
+else
+	echo "==> faster-whisper already installed, skipping"
+fi
+
+if [ -f "${TOKEN_FILE}" ]; then
+	export HF_TOKEN="$(cat "${TOKEN_FILE}")"
+	echo "==> HuggingFace token loaded from ${TOKEN_FILE}"
+else
+	echo "==> no token found at ${TOKEN_FILE} — unauthenticated download"
+fi
+
+echo "==> pre-downloading model: ${MODEL}"
+"${VENV}/bin/python3" - <<EOF
+from faster_whisper import WhisperModel
+print("downloading ${MODEL}...")
+WhisperModel("${MODEL}", device="cuda", compute_type="int8_float16")
+print("done")
+EOF
+
+chmod +x "${SCRIPT_DIR}/faster-whisper-server.py"
+
+echo ""
+echo "==> done. Run with: node query-demo.mjs --stt faster-whisper"