stt-server/setup-venv-local-build.sh

#!/usr/bin/env bash
#
# setup-venv-local-build.sh — builds ctranslate2 from source and installs faster-whisper.
#
# USE THIS SCRIPT when the PyPI ctranslate2 wheel does not match your CUDA version.
# The PyPI wheel targets a specific CUDA major version (e.g. CUDA 12). If your system
# has a newer version (e.g. CUDA 13), the wheel will fail at runtime because it tries
# to dlopen libcublas.so.12 which does not exist. Building from source compiles against
# your actual installed CUDA and links correctly.
#
# For systems where the PyPI wheel works (CUDA version matches), use setup-venv.sh
# instead — it is much faster and simpler.
#
# Environment overrides:
#   PYTHON_ENV      path to venv (default: ./venv)
#   HF_TOKEN_FILE   path to HuggingFace token file (default: ~/.secrets/hugging-face.token)
#   HF_HUB_CACHE    path to HuggingFace hub cache (default: ~/.cache/huggingface/hub)
#   CUDA_HOME       path to CUDA toolkit (auto-detected if not set)
#
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VENV="${PYTHON_ENV:-${SCRIPT_DIR}/venv}"
BUILD_DIR="${SCRIPT_DIR}/build/ctranslate2"
MODEL="${1:-base.en}"
TOKEN_FILE="${HF_TOKEN_FILE:-${HOME}/.secrets/hugging-face.token}"

# Locate CUDA
if [ -z "${CUDA_HOME:-}" ]; then
	for candidate in /opt/cuda /usr/local/cuda /usr; do
		if [ -f "${candidate}/bin/nvcc" ]; then
			export CUDA_HOME="${candidate}"
			break
		fi
	done
fi

if [ -z "${CUDA_HOME:-}" ]; then
	echo "ERROR: CUDA not found. Set CUDA_HOME manually." >&2
	exit 1
fi

echo "==> CUDA: ${CUDA_HOME}"
"${CUDA_HOME}/bin/nvcc" --version | head -1

for tool in cmake git python3; do
	if ! command -v "${tool}" &>/dev/null; then
		echo "ERROR: ${tool} not found" >&2
		exit 1
	fi
done

if [ ! -d "${VENV}" ]; then
	echo "==> creating venv at ${VENV}"
	python3 -m venv "${VENV}"
fi

echo "==> upgrading pip + build tools"
"${VENV}/bin/pip" install --upgrade pip wheel setuptools pybind11 --quiet
"${VENV}/bin/pip" install torch silero-vad websockets

# --- clone (skipped if already done) ---
if [ ! -d "${BUILD_DIR}/src/.git" ]; then
	echo "==> cloning ctranslate2 from source..."
	mkdir -p "${BUILD_DIR}"
	git clone --recursive --depth 1 https://github.com/OpenNMT/CTranslate2 "${BUILD_DIR}/src"
else
	echo "==> ctranslate2 source already present, skipping clone"
fi

# --- cmake build (skipped if library already installed) ---
if [ ! -f "${VENV}/lib/libctranslate2.so" ] && ! ls "${VENV}/lib/libctranslate2.so."* &>/dev/null 2>&1; then
	echo "==> configuring ctranslate2 C++ library..."
	mkdir -p "${BUILD_DIR}/cmake-build"
	cmake \
		-S "${BUILD_DIR}/src" \
		-B "${BUILD_DIR}/cmake-build" \
		-DCMAKE_BUILD_TYPE=Release \
		-DCMAKE_INSTALL_PREFIX="${VENV}" \
		-DWITH_CUDA=ON \
		-DCUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}" \
		-DCMAKE_CUDA_COMPILER="${CUDA_HOME}/bin/nvcc" \
		-DWITH_MKL=OFF \
		-DBUILD_CLI=OFF \
		-DWITH_TESTS=OFF \
		-DCMAKE_POLICY_VERSION_MINIMUM=3.5

	echo "==> building ctranslate2 C++ library (this takes 10-20 minutes)..."
	cmake --build "${BUILD_DIR}/cmake-build" --parallel "$(nproc)"
	cmake --install "${BUILD_DIR}/cmake-build"
else
	echo "==> libctranslate2 already installed, skipping cmake build"
fi

echo "==> verifying install..."
ls "${VENV}/include/ctranslate2/" | head -3
ls "${VENV}/lib/libctranslate2"* 2>/dev/null || { echo "ERROR: libctranslate2 not found in venv/lib" >&2; exit 1; }
grep "WITH_CUDA" "${BUILD_DIR}/cmake-build/CMakeCache.txt" | grep -v "^#" || true

# --- faster-whisper (with all deps, including PyPI ctranslate2) ---
# Install faster-whisper normally so all its dependencies (av, huggingface_hub, etc.)
# are satisfied. This will pull in the PyPI ctranslate2 wheel, which we override next.
if ! "${VENV}/bin/python3" -c "import faster_whisper" &>/dev/null 2>&1; then
	echo "==> installing faster-whisper"
	"${VENV}/bin/pip" install faster-whisper
else
	echo "==> faster-whisper already installed, skipping"
fi

# --- Python bindings (always reinstalled from source) ---
# Override the PyPI ctranslate2 wheel pulled in above with our source-built version.
# This is the whole point of this script: the PyPI wheel links against a fixed CUDA
# major version (e.g. libcublas.so.12) while our build links against the system version.
echo "==> removing PyPI ctranslate2..."
"${VENV}/bin/pip" uninstall -y ctranslate2 2>/dev/null || true

echo "==> installing source-built ctranslate2 Python bindings..."
CT2_ROOT="${VENV}" \
LIBRARY_PATH="${VENV}/lib:${VENV}/lib64${LIBRARY_PATH:+:${LIBRARY_PATH}}" \
LDFLAGS="-Wl,-rpath,${VENV}/lib" \
	"${VENV}/bin/pip" install "${BUILD_DIR}/src/python" --no-build-isolation

# --- model download ---
if [ -f "${TOKEN_FILE}" ]; then
	export HF_TOKEN="$(cat "${TOKEN_FILE}")"
	echo "==> HuggingFace token loaded from ${TOKEN_FILE}"
else
	echo "==> no token found at ${TOKEN_FILE} — unauthenticated download"
fi

if [ -n "${HF_HUB_CACHE:-}" ]; then
	echo "==> HuggingFace cache: ${HF_HUB_CACHE}"
fi

echo "==> pre-downloading model: ${MODEL}"
"${VENV}/bin/python3" - <<EOF
from faster_whisper import WhisperModel
print("downloading ${MODEL}...")
WhisperModel("${MODEL}", device="cuda", compute_type="int8_float16")
print("done")
EOF

echo ""
echo "==> done. Venv ready at ${VENV}"