Skip to content

chore: Migrate from Poetry to UV, added health checkpoint, and improved dockerimage #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Version control
.git
.gitignore
.gitattributes

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
.env
.venv/
venv/
ENV/
env/
.pytest_cache/
.coverage
htmlcov/
.tox/
.nox/
.hypothesis/
.mypy_cache/

# IDE
.idea/
.vscode/
*.swp
*.swo
.DS_Store

# Project specific
tests/
docs/
*.md
!README.md
Makefile
docker-compose*.yml
.dockerignore
Dockerfile*

# Cache and temp files
*.log
.cache/
tmp/
temp/

# Distribution / packaging
dist/
build/
*.egg-info/

# Local development
*.local
.env.local
.env.*.local

# Model cache and downloads
model_cache/
downloads/

*.pyc
*.pyo
*.pyd
.env
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -18,3 +18,4 @@
.idea/misc.xml
.env
.DS_Store
__pycache__/
110 changes: 83 additions & 27 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,42 +1,98 @@
# Use a base image with CUDA support and the desired Python version
FROM python:3.12-slim-bookworm

FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
ARG CPU_ONLY=false

WORKDIR /app

RUN apt-get update \
&& apt-get install -y redis-server libgl1 libglib2.0-0 curl wget git procps \
&& apt-get clean
# Install build dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 && \
rm -rf /var/lib/apt/lists/*

# Install Poetry and configure it
RUN pip install poetry \
&& poetry config virtualenvs.create false
# Enable bytecode compilation and set proper link mode for cache mounting
ENV UV_COMPILE_BYTECODE=1 \
UV_LINK_MODE=copy \
HF_HOME=/app/.cache/huggingface \
TORCH_HOME=/app/.cache/torch \
PYTHONPATH=/app \
OMP_NUM_THREADS=4

COPY pyproject.toml poetry.lock ./
# Copy dependency files and README
COPY pyproject.toml uv.lock README.md ./

# Install dependencies before torch
RUN poetry install --no-interaction --no-root
# Install dependencies but not the project itself
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-install-project

# Copy the rest of the project
COPY . .

# Install PyTorch separately based on CPU_ONLY flag
RUN if [ "$CPU_ONLY" = "true" ]; then \
pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
# Better GPU detection: Check both architecture and if NVIDIA is available
RUN ARCH=$(uname -m) && \
if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
USE_GPU=false; \
else \
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
USE_GPU=true; \
fi && \
echo "Detected GPU availability: $USE_GPU" && \
# For PyTorch installation with architecture detection
uv pip uninstall -y torch torchvision torchaudio || true && \
if [ "$USE_GPU" = "false" ]; then \
# For CPU or ARM architectures or no NVIDIA
echo "Installing PyTorch for CPU" && \
uv pip install --no-cache-dir torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu; \
else \
# For x86_64 with GPU support
echo "Installing PyTorch with CUDA support" && \
uv pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
fi

ENV HF_HOME=/tmp/ \
TORCH_HOME=/tmp/ \
OMP_NUM_THREADS=4
# Install the project in non-editable mode
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-editable

# Download models for the pipeline
RUN uv run python -c "from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True)"

# Pre-download EasyOCR models with better GPU detection
RUN ARCH=$(uname -m) && \
if [ "$CPU_ONLY" = "true" ] || [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
echo "Downloading EasyOCR models for CPU" && \
uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=False); print('EasyOCR CPU models downloaded successfully')"; \
else \
echo "Downloading EasyOCR models with GPU support" && \
uv run python -c "import easyocr; reader = easyocr.Reader(['fr', 'de', 'es', 'en', 'it', 'pt'], gpu=True); print('EasyOCR GPU models downloaded successfully')"; \
fi

RUN python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True);'
# Production stage
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
WORKDIR /app

# Pre-download EasyOCR models in compatible groups
RUN python -c 'import easyocr; \
reader = easyocr.Reader(["fr", "de", "es", "en", "it", "pt"], gpu=True); \
print("EasyOCR models downloaded successfully")'
# Install runtime dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends redis-server libgl1 libglib2.0-0 curl && \
rm -rf /var/lib/apt/lists/*

COPY . .
# Set environment variables
ENV HF_HOME=/app/.cache/huggingface \
TORCH_HOME=/app/.cache/torch \
PYTHONPATH=/app \
OMP_NUM_THREADS=4 \
UV_COMPILE_BYTECODE=1

EXPOSE 8080
# Create a non-root user
RUN useradd --create-home app && \
mkdir -p /app && \
chown -R app:app /app /tmp

CMD ["poetry", "run", "uvicorn", "--port", "8080", "--host", "0.0.0.0", "main:app"]
# Copy the virtual environment from the builder stage
COPY --from=builder --chown=app:app /app/.venv /app/.venv
ENV PATH="/app/.venv/bin:$PATH"

# Copy necessary files for the application
COPY --chown=app:app . .

# Switch to non-root user
USER app

EXPOSE 8080
CMD ["uvicorn", "main:app", "--port", "8080", "--host", "0.0.0.0"]
125 changes: 125 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Variables
PYTHON = poetry run
DOCKER_CPU_COMPOSE = docker-compose -f docker-compose.cpu.yml
DOCKER_GPU_COMPOSE = docker-compose -f docker-compose.gpu.yml
DOCKER_IMAGE = docling-api
PORT = 8080
WORKERS = 4

.PHONY: help install dev-setup start stop clean docker-* test lint format

help:
@echo "Available commands:"
@echo "Development:"
@echo " install - Install project dependencies using Poetry"
@echo " dev-setup - Setup development environment (install Redis, etc.)"
@echo " start - Start all development services locally"
@echo " stop - Stop all development services"
@echo " clean - Clean up temporary files and caches"
@echo ""
@echo "Docker:"
@echo " docker-build-cpu - Build Docker image (CPU version)"
@echo " docker-build-gpu - Build Docker image (GPU version)"
@echo " docker-start - Auto-detect system and start appropriate container (CPU/GPU)"
@echo " docker-start-cpu - Start services in CPU mode"
@echo " docker-start-gpu - Start services in GPU mode"
@echo " docker-stop - Stop all Docker services"
@echo " docker-clean - Clean Docker resources"
@echo ""
@echo "Code Quality:"
@echo " format - Format code using black"
@echo " lint - Run linter"
@echo " test - Run tests"

# Development commands
install:
curl -sSL https://install.python-poetry.org | python3 -
poetry install

dev-setup:
@echo "Setting up development environment..."
@if [ "$(shell uname)" = "Darwin" ]; then \
brew install redis; \
brew services start redis; \
elif [ -f /etc/debian_version ]; then \
sudo apt-get update && sudo apt-get install -y redis-server; \
sudo service redis-server start; \
fi
@echo "Creating .env file..."
@echo "REDIS_HOST=redis://localhost:6379/0" > .env
@echo "ENV=development" >> .env

start:
@echo "Starting FastAPI server..."
$(PYTHON) uvicorn main:app --reload --port $(PORT) & \
echo "Starting Celery worker..." && \
$(PYTHON) celery -A worker.celery_config worker --pool=solo -n worker_primary --loglevel=info & \
echo "Starting Flower dashboard..." && \
$(PYTHON) celery -A worker.celery_config flower --port=5555

stop:
@echo "Stopping services..."
@pkill -f "uvicorn main:app" || true
@pkill -f "celery" || true
@if [ "$(shell uname)" = "Darwin" ]; then \
brew services stop redis; \
elif [ -f /etc/debian_version ]; then \
sudo service redis-server stop; \
fi

# Docker commands
docker-build-cpu:
docker build --build-arg CPU_ONLY=true -t $(DOCKER_IMAGE):cpu .

docker-build-gpu:
docker build -t $(DOCKER_IMAGE):gpu .

docker-start-cpu:
$(DOCKER_CPU_COMPOSE) up --build --scale celery_worker=1

docker-start-gpu:
$(DOCKER_GPU_COMPOSE) up --build --scale celery_worker=3

# Auto-detect architecture and start appropriate container
docker-start:
@echo "Auto-detecting system architecture..."
@if [ "$(shell uname -m)" = "arm64" ] || [ "$(shell uname -m)" = "aarch64" ] || ! command -v nvidia-smi >/dev/null 2>&1; then \
echo "ARM architecture or NVIDIA drivers not detected. Using CPU mode."; \
$(MAKE) docker-start-cpu; \
else \
echo "NVIDIA GPU detected. Using GPU mode."; \
$(MAKE) docker-start-gpu; \
fi

docker-stop:
$(DOCKER_CPU_COMPOSE) down
$(DOCKER_GPU_COMPOSE) down

docker-clean:
docker system prune -f
docker volume prune -f

# Code quality commands
format:
$(PYTHON) black .

lint:
$(PYTHON) flake8 .
$(PYTHON) mypy .

test:
$(PYTHON) pytest

clean:
find . -type d -name "__pycache__" -exec rm -rf {} +
find . -type f -name "*.pyc" -delete
find . -type f -name "*.pyo" -delete
find . -type f -name "*.pyd" -delete
find . -type f -name ".coverage" -delete
find . -type d -name "*.egg-info" -exec rm -rf {} +
find . -type d -name "*.egg" -exec rm -rf {} +
find . -type d -name ".pytest_cache" -exec rm -rf {} +
find . -type d -name ".mypy_cache" -exec rm -rf {} +
find . -type d -name ".tox" -exec rm -rf {} +
find . -type d -name "build" -exec rm -rf {} +
find . -type d -name "dist" -exec rm -rf {} +
Loading