diff --git a/.github/workflows/mlflow-ci.yml b/.github/workflows/mlflow-ci.yml index 78d7913..40e1581 100644 --- a/.github/workflows/mlflow-ci.yml +++ b/.github/workflows/mlflow-ci.yml @@ -55,11 +55,10 @@ jobs: - name: Run Lint and Template working-directory: applications/mlflow run: | - # Use Taskfile to add Helm repos, lint charts and generate templates - task add:repos:helm - task update:deps:helm - task lint - task template + # Use Taskfile to lint charts and generate templates + task helm:update-deps + task helm:lint + task helm:template - name: Upload rendered templates if: failure() @@ -73,80 +72,12 @@ jobs: working-directory: applications/mlflow run: | # Ensure Chart.yaml and HelmChart versions are in sync - task check:versions + task versions:verify - helm-docs: - runs-on: ubuntu-22.04 - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install Task - uses: arduino/setup-task@v1 - with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Set up Helm - uses: azure/setup-helm@v4.3.0 - with: - version: v3.13.3 - - - name: Install helm-docs - run: | - HELM_DOCS_VERSION=v1.12.0 - wget https://github.com/norwoodj/helm-docs/releases/download/${HELM_DOCS_VERSION}/helm-docs_${HELM_DOCS_VERSION#v}_Linux_x86_64.tar.gz -O - | tar -xz - sudo mv helm-docs /usr/local/bin/helm-docs - helm-docs --version - - - name: Check Helm Documentation - working-directory: applications/mlflow - run: | - # Use Taskfile to check if helm docs are up to date - task add:repos:helm - task update:deps:helm - task docs:helm:check - - - name: Generate Helm Documentation - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - working-directory: applications/mlflow - run: | - # Only generate documentation on main branch pushes - task docs:helm:generate - - - name: Generate KOTS Manifest Guide - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - working-directory: applications/mlflow - run: | - # Generate KOTS manifest guide - task docs:kots:summary - - - name: Create PR if docs changed - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - uses: peter-evans/create-pull-request@v5 - with: - token: ${{ secrets.GITHUB_TOKEN }} - commit-message: "docs: update documentation" - title: "docs: update documentation" - body: | - This PR updates documentation: - - - Updated Helm chart documentation based on the current templates - - Generated KOTS manifest guide for platform engineers - - Automatically generated by the MLflow CI workflow. - branch: update-docs - base: main - labels: documentation - paths: | - applications/mlflow/charts/*/README.md - applications/mlflow/docs/KOTS_MANIFEST_GUIDE.md create-release: runs-on: ubuntu-22.04 - needs: [lint-and-template, helm-docs] + needs: [lint-and-template] outputs: customer-id: ${{ steps.create-customer.outputs.customer-id }} channel-slug: ${{ steps.create-release.outputs.channel-slug }} @@ -177,10 +108,9 @@ jobs: working-directory: applications/mlflow run: | # Update and package charts - task add:repos:helm - task update:deps:helm - task update:versions:chart - task package:charts + task helm:update-deps + task release:update-versions + task helm:package # Extract MLflow chart version for reference - name: Extract MLflow chart version @@ -198,7 +128,7 @@ jobs: with: app-slug: ${{ env.APP_SLUG }} api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} - yaml-dir: applications/mlflow/kots/ + yaml-dir: applications/mlflow/release/ promote-channel: ci-automation-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} version: ${{ steps.chart-version.outputs.chart_version }} @@ -275,7 +205,7 @@ jobs: # Get license ID using the task - capture only the last line of output echo "Getting license ID..." - INSTALLATION_ID=$(CUSTOMER_NAME="$CUSTOMER_NAME" task get:license-id:customer | tail -n 1) + INSTALLATION_ID=$(CUSTOMER_NAME="$CUSTOMER_NAME" task license:get-id | tail -n 1) # Check if we got a result if [ -z "$INSTALLATION_ID" ]; then @@ -309,7 +239,7 @@ jobs: echo "$KUBECONFIG" > "$KUBECONFIG_FILE" # Create namespace using taskfile - KUBECONFIG="$KUBECONFIG_FILE" task setup:namespaces + KUBECONFIG="$KUBECONFIG_FILE" task namespaces:create env: KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} @@ -321,7 +251,7 @@ jobs: echo "$KUBECONFIG" > "$KUBECONFIG_FILE" # Run task to test Helm installation - KUBECONFIG="$KUBECONFIG_FILE" MLFLOW_VALUES="${{ matrix.config.values_file }}" task test:install:helm + KUBECONFIG="$KUBECONFIG_FILE" MLFLOW_VALUES="${{ matrix.config.values_file }}" task helm:test-install env: KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} REPLICATED_APP: ${{ env.APP_SLUG }} @@ -345,7 +275,7 @@ jobs: python -m pip install mlflow numpy pandas scikit-learn pytest requests # Run tests - task run:tests:app + task test:app env: KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} PORT: 5000 @@ -441,7 +371,7 @@ jobs: # Try to download license echo "Attempting to download license..." set +e - CUSTOMER_NAME="$CUSTOMER_NAME" task download:license:customer + CUSTOMER_NAME="$CUSTOMER_NAME" task license:download DOWNLOAD_RESULT=$? set -e @@ -616,7 +546,7 @@ jobs: python -m pip install mlflow numpy pandas scikit-learn pytest requests # Run tests - task run:tests:app + task test:app env: KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} PORT: 5000 diff --git a/applications/mlflow/.docker/bashrc b/applications/mlflow/.docker/bashrc new file mode 100644 index 0000000..b90e63b --- /dev/null +++ b/applications/mlflow/.docker/bashrc @@ -0,0 +1,29 @@ +# MLflow development container .bashrc + +# Set important environment variables +export SHELL=/bin/bash +export HOME=/home/devuser + +# Ensure clean exit +trap 'exit 0' EXIT + +# Custom prompt +export PS1="\[\033[01;32m\]mlflow-dev\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]$ " + +# Useful aliases +alias k="kubectl" +alias kgp="kubectl get pods" +alias kgs="kubectl get services" +alias kgc="kubectl get configmaps" + +# Define a clean exit function +clean_exit() { + echo "Exiting development shell..." + exit 0 +} + +# Override the exit command to ensure it always exits cleanly +alias exit="clean_exit" + +# Welcome message +echo "Type 'task --list' to see available tasks" \ No newline at end of file diff --git a/applications/mlflow/.docker/entrypoint.sh b/applications/mlflow/.docker/entrypoint.sh new file mode 100644 index 0000000..8559e1b --- /dev/null +++ b/applications/mlflow/.docker/entrypoint.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e + +# Set essential environment variables +export SHELL=/bin/bash +export HOME=${HOME:-/home/devuser} +export USER=${USER:-devuser} + +# Trap to ensure clean exit +trap 'exit 0' EXIT + +# Basic initialization +echo "Initializing development environment..." + +# Check for kube config and warn if none found +if [ ! -f ~/.kube/config ]; then + echo "Warning: No Kubernetes config file found." + echo "Ensure ~/.kube/config is mounted from host." +else + echo "Found Kubernetes config. Using existing configuration." +fi + +# Check for Helm config and warn if not found +if [ ! -d ~/.config/helm ]; then + echo "Warning: No Helm configuration found. If you need Helm repos, please ensure ~/.config/helm is mounted." +fi + +# Network mode information +NETWORK_MODE="Container network" +if [ "${HOST_NETWORK:-false}" = "true" ]; then + NETWORK_MODE="Host network (ports opened in container are accessible on host)" +fi + +# Print welcome message +cat << EOF +====================================================== +MLflow Development Environment +------------------------------------------------------ +- All required tools are pre-installed: + * task, helm, kubectl, yq, jq, etc. +- Use 'task --list' to see available tasks +- Directories from host are mounted in /workspace + +Kubernetes Setup: +- Using host's Kubernetes config + +Networking: +- ${NETWORK_MODE} +====================================================== +EOF + +# Print environment debug information +echo "Environment:" +echo "- SHELL: $SHELL" +echo "- HOME: $HOME" +echo "- USER: $USER" +echo "- PATH: $PATH" +echo "" + +# Run the command +exec "$@" \ No newline at end of file diff --git a/applications/mlflow/DEVELOPMENT.md b/applications/mlflow/DEVELOPMENT.md index 9717db6..5143105 100644 --- a/applications/mlflow/DEVELOPMENT.md +++ b/applications/mlflow/DEVELOPMENT.md @@ -2,42 +2,106 @@ This document provides information about developing, testing, and releasing the MLflow application using the included Taskfile. +## Development + +## Prerequisites + +- Docker +- [Task](https://taskfile.dev/installation/) - A task runner / simpler Make alternative + +Optional (for direct installation on your machine): +- go (1.20+) +- Helm +- kubectl + ## Development Workflow -The MLflow application includes a Taskfile.yml that provides tasks for developing, testing, and publishing the application. +We use a [Taskfile](https://taskfile.dev/) to manage development tasks. The recommended approach is to use our containerized development environment, which includes all necessary dependencies. + +### Containerized Development Environment + +Our Docker-based development environment provides: +* Consistent environment across team members +* All required dependencies pre-installed +* No need to install Go, Python, Helm, or other tools locally +* Works on any operating system with Docker support + +To get started: + +```bash +# Build the development image +task dev:build-image + +# Start a development container +# This creates and runs the container in the background +task dev:start + +# Enter a shell in an already running development container +task dev:shell +``` + +The tasks above will: +1. Build the development Docker image if needed +2. Start a container with the proper mounts and environment +3. Provide you with a shell inside the container +4. Allow you to run all task commands directly + +### Common Development Tasks + +Once you're inside the development container (or on your local machine with all prerequisites installed), you can run these common tasks: + +```bash +# Update Helm dependencies +task helm:update-deps + +# Lint the Helm chart +task helm:lint + +# Template the chart (no values overrides) +task helm:template -### Prerequisites +# Install the chart to your Kubernetes cluster +task helm:install-local -- [Task](https://taskfile.dev/#/installation) command line tool -- Kubernetes cluster configured in your current context -- kubectl, helm, and python3 installed +# Run application tests +task test:app +``` -### Local Development +### Development Workflow Steps -Follow this workflow for development: +1. Set up the development environment: + ```bash + task dev:build-image + ``` + +2. Start the development container: + ```bash + task dev:start + ``` -1. Add required Helm repositories and update dependencies: +3. Enter the development container: ```bash - task update:deps:helm + task dev:shell ``` -2. Lint charts to check for issues: +4. Update Helm dependencies: ```bash - task lint + task helm:update-deps ``` -3. Template charts to verify the rendered manifests: +5. Lint and template charts to check for issues: ```bash - task template + task helm:lint + task helm:template ``` -4. Install charts for development: +6. Install charts for development: ```bash # Installs with Replicated SDK disabled - task install:helm:local + task helm:install-local # Optionally specify a custom values file - MLFLOW_VALUES=./my-values.yaml task install:helm:local + MLFLOW_VALUES=./my-values.yaml task helm:install-local ``` > **Note:** For local development, the Replicated SDK is explicitly disabled (`replicated.enabled=false`). This allows development without requiring access to the Replicated platform. @@ -46,15 +110,23 @@ Follow this workflow for development: > > The Helm releases are created with names `infra` and `mlflow` in the `mlflow` namespace. -5. Run application tests: +7. Run application tests: ```bash - task run:tests:app + task test:app ``` -6. Make changes to your charts and repeat steps 2-5 as needed +8. Make changes to your charts and repeat steps 5-7 as needed This workflow allows rapid iteration without needing to publish to the Replicated registry. +### Container Management + +If you encounter issues with the container: + +- Stop the container: `task dev:stop` +- Restart it: `task dev:restart` +- Rebuild the image if needed: `task dev:build-image` + ## Creating a Release When you're ready to publish your changes to the Replicated platform: @@ -63,9 +135,8 @@ When you're ready to publish your changes to the Replicated platform: 2. Update documentation: ```bash - # If helm-docs is not installed - cd charts/mlflow - docker run --rm -v "$(pwd):/helm-docs" -u $(id -u) jnorwood/helm-docs:latest + # Generate Helm documentation + task helm:docs:generate ``` 3. Set up the required environment variables: @@ -82,13 +153,13 @@ When you're ready to publish your changes to the Replicated platform: ```bash # This updates KOTS manifests with the current chart versions # and packages the charts as .tgz files - task package:charts + task helm:package ``` 5. Create a release in Replicated: ```bash # This uploads the packaged charts and creates a new release - task create:release + task release:create ``` 6. Verify the release was created successfully in the Replicated vendor portal @@ -105,57 +176,56 @@ This workflow tests the full Replicated release and distribution process: export REPLICATED_CHANNEL=channel_name # Login to the registry - task login:registry + task registry:login ``` 2. Test the Helm installation from the Replicated registry: ```bash # This pulls charts from the Replicated registry with SDK enabled - task test:install:helm + task helm:test-install ``` > **Note:** This creates Helm releases named `infra` and `mlflow` in the `mlflow` namespace. 3. Verify the installation with application tests: ```bash - task run:tests:app + task test:app ``` You can also run the complete test suite after setting up environment variables: ```bash -task run:tests:all +task test:all ``` This workflow validates the entire release pipeline from publishing to installation, ensuring that your charts work correctly when distributed through the Replicated platform. -## Updating Documentation +## Troubleshooting -Before creating a release, ensure the documentation is up-to-date: +### Container Issues -1. Update version information in `charts/mlflow/Chart.yaml` if needed. +If you encounter issues with the container: -2. Update the changelog in `charts/mlflow/README_CHANGELOG.md.gotmpl` with details about the new release. +- Stop the container: `task dev:stop` +- Restart it: `task dev:restart` +- Rebuild the image if needed: `task dev:build-image` -3. Generate documentation using helm-docs: - ```bash - # From the mlflow chart directory - cd charts/mlflow - - # If helm-docs is installed locally - helm-docs - - # Or use Docker - docker run --rm -v "$(pwd):/helm-docs" -u $(id -u) jnorwood/helm-docs:latest - ``` +### Port Forwarding Issues -4. Verify the generated documentation: - - `README.md` - Main chart documentation - - `README_CHANGELOG.md` - Changelog - - `README_CONFIG.md` - Configuration reference +If you encounter issues with port forwarding: -## CI/CD Pipeline +1. Check if the port is already in use on your host machine +2. Try using a different port by specifying it when starting the service +3. The development environment automatically tries ports 5000-5004 and will use the first available one -This application includes a CI/CD pipeline implemented with GitHub Actions. The pipeline handles: +## CI/CD Pipeline Integration + +For CI, we push the development image to ttl.sh with: + +```bash +task ci:push-image +``` + +The MLflow application includes a CI/CD pipeline implemented with GitHub Actions. The pipeline handles: - Linting and validating Helm chart templates - Creating releases in Replicated @@ -169,8 +239,4 @@ The pipeline workflow: 4. `kots-install-test`: Tests KOTS installation 5. `cleanup-test-release`: Cleans up test resources -The pipeline is triggered on: -- Pull requests affecting the MLflow application -- Pushes to the main branch - For more details, see the workflow definition in [.github/workflows/mlflow-ci.yml](../../.github/workflows/mlflow-ci.yml). diff --git a/applications/mlflow/Dockerfile b/applications/mlflow/Dockerfile new file mode 100644 index 0000000..f796b79 --- /dev/null +++ b/applications/mlflow/Dockerfile @@ -0,0 +1,93 @@ +FROM ubuntu:22.04 + +# Set environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + SHELL=/bin/bash \ + HOME=/home/devuser + +# Install common utilities and prerequisites +RUN apt-get update && apt-get install -y \ + curl \ + git \ + gnupg \ + jq \ + lsb-release \ + lsof \ + make \ + netcat \ + python3 \ + python3-pip \ + python3-venv \ + software-properties-common \ + sudo \ + tar \ + unzip \ + vim \ + wget \ + && rm -rf /var/lib/apt/lists/* \ + # Create a symlink from python3 to python for compatibility + && ln -sf /usr/bin/python3 /usr/bin/python + +# Install CLI tools +RUN curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash \ + # Install kubectl + && curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \ + && chmod +x kubectl \ + && mv kubectl /usr/local/bin/ \ + # Install Kind + && curl -Lo /usr/local/bin/kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-amd64 \ + && chmod +x /usr/local/bin/kind \ + # Install yq + && wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq \ + && chmod +x /usr/local/bin/yq \ + # Install Task + && sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin \ + # Install Replicated CLI + && curl -s https://api.github.com/repos/replicatedhq/replicated/releases/latest \ + | grep "browser_download_url.*linux_amd64.tar.gz" \ + | cut -d : -f 2,3 \ + | tr -d \" \ + | wget -qi - \ + && tar xf replicated_*_linux_amd64.tar.gz replicated \ + && mv replicated /usr/local/bin/ \ + && rm replicated_*_linux_amd64.tar.gz + +# Install Python dependencies directly +RUN pip3 install --upgrade pip wheel setuptools \ + && pip3 install --no-cache-dir \ + "mlflow>=2.8.0,<3.0.0" \ + "numpy>=1.24.0" \ + "pandas>=2.0.0" \ + "pytest" \ + "requests" \ + "scikit-learn>=1.2.0" \ + --ignore-installed \ + || true + +# Create a non-root user for better security +RUN groupadd -r devuser \ + && useradd -r -g devuser -m -s /bin/bash devuser \ + && echo "devuser ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/devuser + +# Copy configuration files +COPY .docker/entrypoint.sh /entrypoint.sh +COPY .docker/bashrc /home/devuser/.bashrc + +# Set up fallback entrypoint if needed and fix permissions +RUN if [ ! -f /entrypoint.sh ]; then \ + echo '#!/bin/bash' > /entrypoint.sh && \ + echo 'set -e' >> /entrypoint.sh && \ + echo '# Run the command' >> /entrypoint.sh && \ + echo 'exec "$@"' >> /entrypoint.sh; \ + fi \ + && chmod +x /entrypoint.sh \ + && chown devuser:devuser /home/devuser/.bashrc + +# Set working directory +WORKDIR /app + +# Switch to non-root user +USER devuser + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["bash"] \ No newline at end of file diff --git a/applications/mlflow/README.md b/applications/mlflow/README.md index d23b4ec..b8fd16e 100644 --- a/applications/mlflow/README.md +++ b/applications/mlflow/README.md @@ -32,6 +32,7 @@ helm install mlflow oci://registry.replicated.com/mlflow/stable - [MLflow Helm Chart Documentation](./charts/mlflow/README.md) - Installation and configuration details - [Configuration Reference](./charts/mlflow/README_CONFIG.md) - Detailed configuration options +- [Development Guide](./DEVELOPMENT.md) - Guide for development including containerized environment ## For Developers @@ -42,6 +43,19 @@ If you're looking to contribute to or customize this application, please refer t - Release process - CI/CD integration - Helm chart customization +- Containerized development environment + +For containerized development, we offer a Docker-based development environment: + +```bash +# Enter the development container shell +task dev:shell + +# For Kubernetes development with Kind +task dev:shell:kind +``` + +See the [Development Guide](./DEVELOPMENT.md) for more details. We use [helm-docs](https://github.com/norwoodj/helm-docs) for chart documentation. See the [Development Guide](./DEVELOPMENT.md) for details. @@ -103,12 +117,11 @@ cd platform-examples/applications/mlflow # Install Task CLI (if not already installed) # See https://taskfile.dev/#/installation -# Add required Helm repositories and update dependencies -task add:repos:helm -task update:deps:helm +# Update dependencies and install charts +task helm:update-deps # Install charts locally with Replicated SDK disabled -task install:helm:local +task helm:install-local # Access MLflow UI at http://localhost:5000 ``` diff --git a/applications/mlflow/Taskfile.yml b/applications/mlflow/Taskfile.yml index f1d5753..1646ca0 100644 --- a/applications/mlflow/Taskfile.yml +++ b/applications/mlflow/Taskfile.yml @@ -7,7 +7,7 @@ version: '3' vars: # Directory structure CHART_DIR: ./charts - KOTS_DIR: ./kots + RELEASE_DIR: ./release TESTS_DIR: ./tests # Testing configuration @@ -21,6 +21,14 @@ vars: CI: sh: echo "${CI:-false}" + # Docker configuration + DEV_IMAGE: ttl.sh/mlflow-dev:dev + CI_IMAGE_REGISTRY: ttl.sh + CI_IMAGE_NAME: mlflow-dev + CI_IMAGE_TAG: + sh: git rev-parse --short HEAD 2>/dev/null || echo "dev" + CI_IMAGE_TTL: 1h + # Resource-related parameters (adjustable for local/CI environments) TIMEOUT: '{{if eq .CI "true"}}5m{{else}}3m{{end}}' WAIT_RETRIES: '{{if eq .CI "true"}}30{{else}}15{{end}}' @@ -35,7 +43,7 @@ vars: # Release configuration # APP_NAME can be overridden by setting REPLICATED_APP environment variable APP_NAME: '{{.REPLICATED_APP | default "diamon-mlflow"}}' - YAML_DIR: "./kots" + YAML_DIR: "./release" # Default task shows help tasks: @@ -47,9 +55,131 @@ tasks: - task --list silent: true + # Development environment tasks + dev:build-image: + desc: Build development Docker image + silent: true + cmds: + - docker build -t {{.DEV_IMAGE}} -f Dockerfile . + + dev:start: + desc: Start development container in background + silent: true + cmds: + - echo "Starting development container..." + - | + # Check if container is running + if docker ps | grep -q "mlflow-dev-container"; then + echo "Container mlflow-dev-container is already running" + # Check if container exists but is not running + elif docker ps -a | grep -q "mlflow-dev-container"; then + echo "Container mlflow-dev-container exists but is not running. Starting it..." + docker start mlflow-dev-container + # Create and start new container + else + # Try ports 5000-5004, use the first available one + PORT_FOUND=false + for PORT in {5000..5004}; do + if ! lsof -i :$PORT > /dev/null; then + PORT_ARG="-p $PORT:$PORT" + echo "Using port $PORT for MLflow UI" + PORT_FOUND=true + break + fi + done + + # If all standard ports are in use, use a random port + if [ "$PORT_FOUND" = "false" ]; then + PORT_ARG="-P" + echo "All standard ports are in use. Using a random port." + fi + + # Start container with host networking for kubectl port-forward compatibility + CONTAINER_ID=$(docker run --name mlflow-dev-container --network host -d \ + -v $(pwd):/workspace \ + -v ~/.kube:/home/devuser/.kube \ + -v ~/.helm:/home/devuser/.helm \ + -v ~/.replicated:/home/devuser/.replicated \ + -e SHELL=/bin/bash \ + -e HOME=/home/devuser \ + -e USER=devuser \ + -e HOST_NETWORK=true \ + -w /workspace \ + {{.DEV_IMAGE}} sleep infinity) + + if [ $? -eq 0 ]; then + echo "Development container started successfully with ID: $CONTAINER_ID" + echo "Ports opened within the container will be directly accessible on your host machine." + else + echo "Failed to start development container" + exit 1 + fi + fi + + dev:ensure-running: + desc: Ensure the development container is running + internal: true + silent: true + cmds: + - | + if ! docker ps | grep -q "mlflow-dev-container"; then + echo "Container mlflow-dev-container is not running. Starting it..." + task dev:start + else + echo "Container mlflow-dev-container is already running." + fi + status: + - docker ps | grep -q "mlflow-dev-container" + + dev:shell: + desc: Attach to development container shell + deps: [dev:ensure-running] + silent: true + cmds: + - echo "Connecting to mlflow-dev-container..." + - docker exec -it mlflow-dev-container /bin/bash + + dev:stop: + desc: Stop development container + silent: true + cmds: + - | + if docker ps | grep -q "mlflow-dev-container"; then + echo "Stopping development container..." + docker stop mlflow-dev-container + else + echo "Container mlflow-dev-container is not running" + fi + + dev:restart: + desc: Restart development container + silent: true + cmds: + - task: dev:stop + - task: dev:start + + # CI image tasks + ci:build-image: + desc: Build and tag a CI image with the current git SHA + silent: true + cmds: + - echo "Building CI Docker image {{.CI_IMAGE_REGISTRY}}/{{.CI_IMAGE_NAME}}-{{.CI_IMAGE_TAG}}:{{.CI_IMAGE_TTL}}..." + - docker build -t {{.CI_IMAGE_REGISTRY}}/{{.CI_IMAGE_NAME}}-{{.CI_IMAGE_TAG}}:{{.CI_IMAGE_TTL}} . + - echo "✅ CI image built successfully" + + ci:push-image: + desc: Push the CI image to a temporary registry (ttl.sh) + deps: [ci:build-image] + silent: true + cmds: + - echo "Pushing CI Docker image to {{.CI_IMAGE_REGISTRY}}..." + - docker push {{.CI_IMAGE_REGISTRY}}/{{.CI_IMAGE_NAME}}-{{.CI_IMAGE_TAG}}:{{.CI_IMAGE_TTL}} + - echo "✅ CI image pushed successfully to {{.CI_IMAGE_REGISTRY}}/{{.CI_IMAGE_NAME}}-{{.CI_IMAGE_TAG}}:{{.CI_IMAGE_TTL}}" + # Version checking task - check:versions: + versions:verify: desc: Check if versions match between Chart.yaml files and HelmChart manifests + silent: true cmds: - echo "Checking chart versions consistency..." - | @@ -63,7 +193,7 @@ tasks: chart_yaml="{{.CHART_DIR}}/$chart/Chart.yaml" # Get the HelmChart resource path - helmchart="{{.KOTS_DIR}}/$chart-chart.yaml" + helmchart="{{.RELEASE_DIR}}/$chart-chart.yaml" # Check if both files exist if [ ! -f "$chart_yaml" ]; then @@ -110,22 +240,45 @@ tasks: if [ "$all_match" = true ]; then echo "✅ All chart versions match between Chart.yaml and HelmChart resources." else - echo "❌ Version mismatches found! Please run 'task update:versions' to synchronize them." + echo "❌ Version mismatches found! Please run 'task release:update-versions' to synchronize them." exit 1 fi + status: + - | + # For each available chart, check that versions match + for chart in {{.CHARTS}}; do + # Get the Chart.yaml path + chart_yaml="{{.CHART_DIR}}/$chart/Chart.yaml" + + # Get the HelmChart resource path + helmchart="{{.RELEASE_DIR}}/$chart-chart.yaml" + + # Check if both files exist + if [ ! -f "$chart_yaml" ] || [ ! -f "$helmchart" ]; then + exit 1 + fi + + # Get versions from both files + chart_yaml_version=$(yq e '.version' "$chart_yaml") + helmchart_version=$(yq e '.spec.chart.chartVersion' "$helmchart") + + # Check if versions could be extracted + if [ -z "$chart_yaml_version" ] || [ -z "$helmchart_version" ]; then + exit 1 + fi + + # Compare versions + if [ "$chart_yaml_version" != "$helmchart_version" ]; then + exit 1 + fi + done + # All versions match + exit 0 - # Repository setup - renamed - add:repos:helm: - desc: Add required Helm repositories - cmds: - - helm repo add cnpg https://cloudnative-pg.github.io/charts - - helm repo add minio https://operator.min.io/ - - helm repo update - # Dependency update - renamed - update:deps:helm: + helm:update-deps: desc: Update Helm chart dependencies - deps: [add:repos:helm] + silent: true cmds: - echo "Updating Helm chart dependencies..." - for: { var: CHARTS } @@ -134,77 +287,119 @@ tasks: helm dependency update {{.CHART_DIR}}/{{.ITEM}} - echo "Helm chart dependencies updated successfully." - # Chart linting - lint: + # Lint charts + helm:lint: desc: Lint Helm charts - deps: [add:repos:helm, update:deps:helm] + silent: true cmds: - echo "Linting Helm charts..." - - for: { var: CHARTS } - cmd: | - echo "Linting {{.ITEM}} chart..." - helm lint {{.CHART_DIR}}/{{.ITEM}} - - echo "Linting completed successfully." + - | + # For each chart, run helm lint with path + for chart in {{.CHARTS}}; do + echo "Linting $chart chart..." + helm lint {{.CHART_DIR}}/$chart + done + - echo "✅ Helm lint check complete." # Template rendering - template: + helm:template: desc: Template Helm charts with standard configuration and output to a directory - deps: [add:repos:helm, update:deps:helm] + silent: true + deps: [helm:update-deps] cmds: - - echo "Templating Helm charts..." + - echo "Rendering Helm chart templates..." - | - # Create templates directory if it doesn't exist - TEMPLATES_DIR="{{.CHART_DIR}}/.rendered-templates" - echo "Creating templates directory: $TEMPLATES_DIR" - mkdir -p "$TEMPLATES_DIR" - - # Clean up any previous templates - echo "Cleaning up previous templates..." - rm -rf "$TEMPLATES_DIR"/* - - for: { var: CHARTS } - cmd: | - echo "=== Rendering templates for {{.ITEM}} chart ===" - echo "===============================================" - - # Create directory for this chart - CHART_TEMPLATES_DIR="{{.CHART_DIR}}/.rendered-templates/{{.ITEM}}" - mkdir -p "$CHART_TEMPLATES_DIR" + # Create directory for rendered templates + mkdir -p {{.CHART_DIR}}/.rendered-templates + + # For each chart, render templates + for chart in {{.CHARTS}}; do + echo "Rendering templates for $chart chart..." - # Render templates to file with default values - helm template {{.CHART_DIR}}/{{.ITEM}} --output-dir "$CHART_TEMPLATES_DIR" --debug + # Create output directory + OUTPUT_DIR="{{.CHART_DIR}}/.rendered-templates/$chart" + mkdir -p "$OUTPUT_DIR" - # Also output to stdout for visibility - echo "Templates written to: $CHART_TEMPLATES_DIR" - echo "=== End of templates for {{.ITEM}} chart ===" - echo "" - - echo "All chart templates have been output to {{.CHART_DIR}}/.rendered-templates" + # Render templates + helm template "$chart" {{.CHART_DIR}}/$chart \ + --namespace {{.NAMESPACE}} \ + --set replicated.enabled=false \ + --output-dir "$OUTPUT_DIR" + done + - echo "✅ Templates rendered successfully to {{.CHART_DIR}}/.rendered-templates" - # Version update for packaged charts - update:versions:chart: - desc: Update chart version references in KOTS manifests + # Update chart versions + release:update-versions: + desc: Update chart version references in release manifests + silent: true cmds: - - for: { var: CHARTS } - cmd: | - sed -i 's|chartVersion: [0-9a-zA-Z.-]*|chartVersion: {{if eq .ITEM "mlflow"}}{{.MLFLOW_VERSION}}{{else}}{{.INFRA_VERSION}}{{end}}|g' {{.KOTS_DIR}}/{{.ITEM}}-chart.yaml - - echo "Chart versions updated in KOTS manifests." - - cmd: task check:versions || echo "⚠️ Version check failed after update. Please verify manually." + - echo "Updating chart version references in release manifests..." + - | + # Get chart versions + MLFLOW_VERSION="{{.MLFLOW_VERSION}}" + INFRA_VERSION="{{.INFRA_VERSION}}" + + echo "Working with chart versions:" + echo "MLflow chart version: $MLFLOW_VERSION" + echo "Infra chart version: $INFRA_VERSION" + + # Update MLflow HelmChart manifest + MLFLOW_HELMCHART="{{.RELEASE_DIR}}/mlflow-chart.yaml" + if [ -f "$MLFLOW_HELMCHART" ]; then + echo "Updating version in $MLFLOW_HELMCHART to $MLFLOW_VERSION..." + + if command -v yq &> /dev/null; then + # Use yq if available + yq eval ".spec.chart.chartVersion = \"$MLFLOW_VERSION\"" -i "$MLFLOW_HELMCHART" + else + # Fallback to sed + sed -i.bak "s/chartVersion:.*/chartVersion: \"$MLFLOW_VERSION\"/" "$MLFLOW_HELMCHART" && rm "${MLFLOW_HELMCHART}.bak" + fi + else + echo "⚠️ MLflow HelmChart not found at $MLFLOW_HELMCHART" + fi + + # Update Infra HelmChart manifest + INFRA_HELMCHART="{{.RELEASE_DIR}}/infra-chart.yaml" + if [ -f "$INFRA_HELMCHART" ]; then + echo "Updating version in $INFRA_HELMCHART to $INFRA_VERSION..." + + if command -v yq &> /dev/null; then + # Use yq if available + yq eval ".spec.chart.chartVersion = \"$INFRA_VERSION\"" -i "$INFRA_HELMCHART" + else + # Fallback to sed + sed -i.bak "s/chartVersion:.*/chartVersion: \"$INFRA_VERSION\"/" "$INFRA_HELMCHART" && rm "${INFRA_HELMCHART}.bak" + fi + else + echo "⚠️ Infra HelmChart not found at $INFRA_HELMCHART" + fi + + echo "✅ Release manifest versions updated successfully." + - cmd: task versions:verify || echo "⚠️ Version check failed after update. Please verify manually." + ignore_error: true - # Packaging tasks - package:charts: + # Package charts + helm:package: desc: Package Helm charts for distribution - deps: [add:repos:helm, update:deps:helm, update:versions:chart] + silent: true + deps: [helm:update-deps, release:update-versions] cmds: - - echo "Packaging Helm charts..." - - for: { var: CHARTS } - cmd: | - echo "Packaging {{.ITEM}} chart..." - helm package {{.CHART_DIR}}/{{.ITEM}} -u -d {{.KOTS_DIR}} - - echo "Charts packaged successfully in {{.KOTS_DIR}} directory." + - echo "Packaging Helm charts for distribution..." + - | + # For each chart, package it using absolute paths instead of cd + for chart in {{.CHARTS}}; do + echo "Packaging $chart chart..." + # Use absolute paths in helm package command and specify destination directory + helm package {{.CHART_DIR}}/$chart -d {{.RELEASE_DIR}} + done + - echo "✅ Charts packaged successfully. Chart archives are in the release directory." - # Release creation - create:release: + # Create a release in the Replicated vendor portal + release:create: desc: Create a release in Replicated - deps: [check:versions, package:charts] + deps: [versions:verify, helm:package] + silent: true vars: VERSION: '{{.VERSION | default .MLFLOW_VERSION}}' REPLICATED_CHANNEL: '{{.REPLICATED_CHANNEL | default ""}}' @@ -232,179 +427,17 @@ tasks: exit 1 fi - # Namespace setup - setup:namespaces: - desc: Create and setup required namespaces - cmds: - - echo "Setting up required namespaces..." - - kubectl create namespace {{.NAMESPACE}} --dry-run=client -o yaml | kubectl apply -f - - - echo "Namespace setup complete" - - # Registry authentication/setup - login:registry: - desc: Login to Replicated registry (requires REPLICATED_LICENSE_ID env var) - cmds: - - echo "Authenticating with Replicated registry..." - - | - if [ -z "$REPLICATED_LICENSE_ID" ]; then - echo "ERROR: REPLICATED_LICENSE_ID environment variable must be set" - exit 1 - fi - helm registry login registry.replicated.com \ - --username="$REPLICATED_LICENSE_ID" \ - --password="$REPLICATED_LICENSE_ID" - - echo "Registry login successful." - - # Customer license ID retrieval - get:license-id:customer: - desc: Extract license ID from a customer (requires REPLICATED_API_TOKEN and customer name) - cmds: - - echo "Extracting license ID for customer {{.CUSTOMER_NAME}}..." - - | - # Validate required environment variables - if [ -z "$REPLICATED_API_TOKEN" ]; then - echo "ERROR: REPLICATED_API_TOKEN environment variable must be set" - exit 1 - fi - - if [ -z "{{.CUSTOMER_NAME}}" ]; then - echo "ERROR: CUSTOMER_NAME parameter is required" - exit 1 - fi - - # Run vendor-cli to inspect the customer and get the installation ID as JSON - echo "Running vendor-cli to inspect customer..." - CUSTOMER_JSON=$(docker run --rm \ - -e REPLICATED_API_TOKEN=$REPLICATED_API_TOKEN \ - -e REPLICATED_APP={{.APP_NAME}} \ - replicated/vendor-cli:latest \ - customer inspect --customer "{{.CUSTOMER_NAME}}" --output json) - - # Use jq to properly extract the installationId - INSTALLATION_ID=$(echo "$CUSTOMER_JSON" | jq -r '.installationId') - - # Check if we got a valid ID - if [ -z "$INSTALLATION_ID" ] || [ "$INSTALLATION_ID" = "null" ]; then - echo "Failed to extract installationId from customer JSON" - echo "JSON structure:" - echo "$CUSTOMER_JSON" | jq 'del(.installationId)' # Print JSON without the license ID - exit 1 - fi - - # Print the license ID so it can be captured - echo "$INSTALLATION_ID" - - # Download customer license - download:license:customer: - desc: Download license for a customer (requires REPLICATED_API_TOKEN and customer name) - cmds: - - echo "Downloading license for customer {{.CUSTOMER_NAME}}..." - - | - # Validate required environment variables - if [ -z "$REPLICATED_API_TOKEN" ]; then - echo "ERROR: REPLICATED_API_TOKEN environment variable must be set" - exit 1 - fi - - if [ -z "{{.CUSTOMER_NAME}}" ]; then - echo "ERROR: CUSTOMER_NAME parameter is required" - exit 1 - fi - - # Create a temporary directory for the license if it doesn't exist - mkdir -p /tmp/replicated - OUTPUT_FILE="/tmp/replicated/license-download-output.txt" - LICENSE_FILE="/tmp/replicated/license.yaml" - - # Run vendor-cli to download the customer license to a temporary file first - echo "Running vendor-cli to download license..." - TMP_LICENSE_FILE=$(mktemp) - set +e - docker run --rm \ - -e REPLICATED_API_TOKEN=$REPLICATED_API_TOKEN \ - -e REPLICATED_APP={{.APP_NAME}} \ - replicated/vendor-cli:latest \ - customer download-license --customer "{{.CUSTOMER_NAME}}" > "$TMP_LICENSE_FILE" 2>$OUTPUT_FILE - DOWNLOAD_EXIT_CODE=$? - set -e - - if [ $DOWNLOAD_EXIT_CODE -ne 0 ]; then - echo "ERROR: Failed to download license for customer {{.CUSTOMER_NAME}}" - echo "Error output:" - cat $OUTPUT_FILE - rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" - exit 1 - fi - - # Check if the file is empty - if [ ! -s "$TMP_LICENSE_FILE" ]; then - echo "ERROR: Downloaded license file is empty" - cat $OUTPUT_FILE - rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" - exit 1 - fi - - # Verify the license file is valid YAML - if command -v yq >/dev/null 2>&1; then - echo "Validating license file is proper YAML..." - if ! yq eval . "$TMP_LICENSE_FILE" > /dev/null 2>&1; then - echo "ERROR: Downloaded license file is not valid YAML" - echo "License file content:" - cat "$TMP_LICENSE_FILE" - rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" - exit 1 - fi - else - echo "WARNING: yq not found, skipping YAML validation" - fi - - # Remove any extra output or text before the YAML content - # This extracts content between first '---' and the end of file - if grep -q "^---" "$TMP_LICENSE_FILE"; then - echo "License appears to be in YAML format with document marker, extracting YAML content..." - sed -n '/^---/,$p' "$TMP_LICENSE_FILE" > "$LICENSE_FILE" - else - # If no '---' marker is found, check for '{' to identify JSON - if grep -q "{" "$TMP_LICENSE_FILE"; then - echo "License appears to be in JSON format, converting to YAML..." - if command -v yq >/dev/null 2>&1; then - cat "$TMP_LICENSE_FILE" | yq eval -P > "$LICENSE_FILE" - else - echo "ERROR: Cannot convert JSON to YAML without yq" - cat "$TMP_LICENSE_FILE" - rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" - exit 1 - fi - else - # If neither YAML nor JSON markers are found, just copy the file - echo "No YAML document marker or JSON found. Copying file as-is..." - cat "$TMP_LICENSE_FILE" > "$LICENSE_FILE" - fi - fi - - # Log some debug information - echo "License file content (first 5 lines):" - head -n 5 "$LICENSE_FILE" - - # Verify file exists and has content - if [ ! -s "$LICENSE_FILE" ]; then - echo "ERROR: Final license file is empty after processing" - rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" - exit 1 - fi - - echo "License successfully downloaded to $LICENSE_FILE" - rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" - # Cleanup tasks - clean:files:charts: - desc: Clean packaged charts from KOTS directory + clean:charts: + desc: Clean packaged charts from release directory + silent: true cmds: - - rm -f {{.KOTS_DIR}}/*.tgz - - echo "Chart packages cleaned from {{.KOTS_DIR}}" + - rm -f {{.RELEASE_DIR}}/*.tgz + - echo "Chart packages cleaned from {{.RELEASE_DIR}}" - clean:files:templates: + clean:templates: desc: Clean rendered templates directory + silent: true cmds: - rm -rf {{.CHART_DIR}}/.rendered-templates - echo "Rendered templates cleaned from {{.CHART_DIR}}/.rendered-templates" @@ -412,14 +445,16 @@ tasks: # Main clean task clean:all: desc: Clean all generated files - deps: [clean:files:charts, clean:files:templates] + deps: [clean:charts, clean:templates] + silent: true cmds: - echo "All generated files cleaned successfully" # Helm test task - test:install:helm: + helm:test-install: desc: Run Helm installation test from Replicated registry - deps: [login:registry, setup:namespaces] + deps: [registry:login, namespaces:create] + silent: true cmds: - echo "Running Helm installation test with custom values..." - | @@ -461,7 +496,7 @@ tasks: fi # Clean up any existing port forwarding first - task cleanup:port:forward || true + task helm:port:cleanup || true # Install infra chart from Replicated registry echo "Installing infra chart from Replicated registry..." @@ -488,14 +523,15 @@ tasks: } echo "Helm installation with custom values completed successfully." - - task: forward:port + - task: helm:port:forward - # KOTS test task - test:install:kots: - desc: Run KOTS installation test - deps: [setup:namespaces] + # Replicated KOTS test task + kots:test-install: + desc: Run Replicated KOTS installation test + deps: [namespaces:create] + silent: true cmds: - - echo "Running KOTS installation test..." + - echo "Running Replicated KOTS installation test..." - | if [ -z "$REPLICATED_LICENSE_ID" ]; then echo "ERROR: REPLICATED_LICENSE_ID environment variable must be set" @@ -513,7 +549,7 @@ tasks: fi # Clean up any existing port forwarding first - task cleanup:port:forward || true + task helm:port:cleanup || true # Create directory for license file if it doesn't exist mkdir -p /tmp/replicated @@ -522,7 +558,7 @@ tasks: # Validate license file exists and has content if [ ! -f "$LICENSE_FILE" ] || [ ! -s "$LICENSE_FILE" ]; then echo "ERROR: License file does not exist or is empty at $LICENSE_FILE" - echo "Please download the license file using the customer:download-license task first" + echo "Please download the license file using the license:download task first" exit 1 fi @@ -567,79 +603,21 @@ tasks: echo "❌ KOTS installation failed" echo "Checking app status:" kubectl get app -n default - echo "Checking pods:" - kubectl get pods -n default - echo "Checking pod logs:" - kubectl logs -n default -l app=kotsadm --tail=50 - exit 1 - fi - - echo "✅ KOTS installation completed. Setting up port forwarding for testing..." - - task: forward:port - - # Port forwarding task - now broken into smaller sub-tasks - find:mlflow:service: - desc: Find the MLflow service in the namespace - internal: true - cmds: - - echo "Looking for MLflow service in namespace {{.NAMESPACE}}..." - - | - # Wait for the MLflow service to be created - echo "Waiting for MLflow service to be created..." - MAX_RETRIES={{.WAIT_RETRIES}} - RETRY_INTERVAL={{.RETRY_INTERVAL}} - RETRY_COUNT=0 - SERVICE_FOUND=false - - while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do - echo "Check $((RETRY_COUNT+1))/$MAX_RETRIES: Looking for MLflow service..." - if kubectl get svc mlflow -n {{.NAMESPACE}} --no-headers 2>/dev/null; then - echo "✅ MLflow service found!" - SERVICE_FOUND=true - break - else - echo "MLflow service not found yet. Waiting $RETRY_INTERVAL seconds..." - RETRY_COUNT=$((RETRY_COUNT+1)) - sleep $RETRY_INTERVAL - fi - done - - if [ "$SERVICE_FOUND" != "true" ]; then - echo "❌ ERROR: MLflow service not found after $((MAX_RETRIES * RETRY_INTERVAL)) seconds." - echo "Showing all available services in the namespace:" - kubectl get svc -n {{.NAMESPACE}} - echo "Showing pod status in the namespace:" - kubectl get pods -n {{.NAMESPACE}} - echo "Showing pod details:" - kubectl describe pods -n {{.NAMESPACE}} -l app.kubernetes.io/name=mlflow - exit 1 - fi - - # Verify the services are present - echo "Verifying MLflow service exists..." - kubectl get svc -n {{.NAMESPACE}} - echo "✅ MLflow service verification completed." - - wait:mlflow:pods: - desc: Wait for MLflow pods to be ready - internal: true - cmds: - - echo "Checking MLflow pod status..." - - | - # Check pod status and wait for them to be running - echo "Checking pod status..." - kubectl get pods -n {{.NAMESPACE}} + echo "Checking pods:" + kubectl get pods -n default + echo "Checking pod logs:" + kubectl logs -n default -l app=kotsadm --tail=50 + exit 1 + fi - echo "Waiting for MLflow pods to be running..." - kubectl wait --for=condition=Ready pods --selector=app.kubernetes.io/name=mlflow -n {{.NAMESPACE}} --timeout={{.TIMEOUT}} || { - echo "WARNING: Timed out waiting for pods to be ready, will try port-forwarding anyway" - kubectl describe pods -n {{.NAMESPACE}} - } - echo "✅ Pod readiness check completed." + echo "✅ Replicated installation completed. Setting up port forwarding for testing..." + - task: helm:port:forward - setup:port:forward: + # Port forwarding task - now broken into smaller sub-tasks + helm:port:setup: desc: Set up port forwarding to MLflow service internal: true + silent: true cmds: - echo "Setting up port forwarding to MLflow service..." - | @@ -693,9 +671,10 @@ tasks: echo $PORT_FORWARD_PID > /tmp/mlflow-port-forward-main.pid echo "✅ Port forwarding initialized." - check:port:forward: + helm:port:check: desc: Check if port forwarding is working and restart if needed internal: true + silent: true cmds: - echo "Checking port forwarding status..." - | @@ -749,9 +728,10 @@ tasks: fi echo "✅ Port forwarding check completed." - test:connectivity: + helm:port:test: desc: Test connectivity to MLflow service internal: true + silent: true cmds: - echo "Testing connectivity to MLflow on localhost:{{.PORT}}..." - | @@ -802,19 +782,33 @@ tasks: fi echo "✅ Connectivity test completed." - forward:port: + helm:port:forward: desc: Setup port forwarding to MLflow service for testing internal: true - deps: [find:mlflow:service, wait:mlflow:pods] + deps: [helm:service:find, helm:pods:wait] + silent: true cmds: - echo "Setting up port forwarding to MLflow service..." - - task: setup:port:forward - - task: check:port:forward - - task: test:connectivity + - task: helm:port:setup + - task: helm:port:check + - task: helm:port:test - echo "✅ Port forwarding setup completed successfully." + status: + - | + # Check if port forwarding is already active + if [ -f "/tmp/mlflow-port-forward-main.pid" ]; then + PID=$(cat /tmp/mlflow-port-forward-main.pid) + if ps -p $PID > /dev/null && lsof -i:{{.PORT}} | grep -q LISTEN; then + # Port forwarding is active + exit 0 + fi + fi + # Port forwarding is not active + exit 1 - cleanup:port:forward: + helm:port:cleanup: desc: Clean up port forwarding processes + silent: true cmds: - echo "Cleaning up port forwarding processes..." - | @@ -861,11 +855,21 @@ tasks: fi echo "✅ Port forwarding cleanup completed" + status: + - | + # Check if there's no port forwarding to clean up + if [ ! -f "/tmp/mlflow-port-forward-main.pid" ] && ! ps aux | grep "port-forward.*:{{.PORT}}" | grep -v grep | grep -q .; then + # No port forwarding processes found + exit 0 + fi + # Port forwarding processes exist that need to be cleaned up + exit 1 - # Local installation task (renamed from test:install:local) - install:helm:local: + # Local installation task + helm:install-local: desc: Install MLflow with local Helm charts for development (with Replicated SDK disabled) - deps: [add:repos:helm, setup:namespaces] + deps: [namespaces:create] + silent: true cmds: - echo "Installing MLflow with local Helm charts (Replicated SDK disabled)..." - | @@ -885,7 +889,7 @@ tasks: fi # Clean up any existing port forwarding first - task cleanup:port:forward || true + task helm:port:cleanup || true # Install infra chart from local directory echo "Installing infra chart from local directory..." @@ -911,11 +915,12 @@ tasks: } echo "Local Helm installation completed successfully." - - task: forward:port + - task: helm:port:forward # App test task - run:tests:app: + test:app: desc: Run application tests against the running MLflow service + silent: true cmds: - echo "Running MLflow application tests against localhost:{{.PORT}}..." - | @@ -1006,8 +1011,8 @@ tasks: fi echo "💡 Environment is persistent for faster future runs." - echo " To force dependency updates: FORCE_DEPS_UPDATE=yes task run:tests:app" - echo " To clean up environment: task clean:venv" + echo " To force dependency updates: FORCE_DEPS_UPDATE=yes task test:app" + echo " To clean up environment: task venv:clean" # Exit early since we've already run the tests exit 0 @@ -1060,21 +1065,23 @@ tasks: # Note about cleaning up echo "💡 Environment is persistent for faster future runs." - echo " To force dependency updates: FORCE_DEPS_UPDATE=yes task run:tests:app" - echo " To clean up environment: task clean:venv" + echo " To force dependency updates: FORCE_DEPS_UPDATE=yes task test:app" + echo " To clean up environment: task venv:clean" fi # All tests task - run:tests:all: + test:all: desc: Run all tests - deps: [test:install:helm, run:tests:app] + deps: [helm:test-install, test:app] + silent: true cmds: - echo "All tests completed successfully" - - task: cleanup:port:forward + - task: helm:port:cleanup # Alternative test task with venv - run:tests:app:venv: + test:app:venv: desc: Run application tests using a virtual environment for better isolation + silent: true cmds: - echo "Running application tests in a virtual environment..." - | @@ -1157,16 +1164,18 @@ tasks: rm -f "$TMP_SCRIPT" # Task to clean the virtual environment - clean:venv: + venv:clean: desc: Clean up the Python virtual environment used for testing + silent: true cmds: - echo "Cleaning up Python virtual environment..." - rm -rf "{{.TESTS_DIR}}/.venv" - echo "✅ Python virtual environment cleaned" # Documentation generation tasks - docs:helm:generate: + helm:docs:generate: desc: Generate Helm chart documentation from templates + silent: true cmds: - echo "Generating Helm chart documentation..." - | @@ -1184,8 +1193,9 @@ tasks: echo "✅ Helm chart documentation generated successfully." - docs:helm:check: + helm:docs:verify: desc: Check if Helm chart documentation is up to date + silent: true cmds: - echo "Checking if Helm chart documentation is up to date..." - | @@ -1221,7 +1231,7 @@ tasks: # Compare with current docs if [ -f "$readme_path" ] && ! diff -q "$readme_path" "$tmp_readme" > /dev/null; then - echo "❌ Documentation for $chart chart is outdated. Run 'task docs:helm:generate' to update." + echo "❌ Documentation for $chart chart is outdated. Run 'task helm:docs:generate' to update." docs_outdated=true else echo "✅ Documentation for $chart chart is up to date." @@ -1230,15 +1240,248 @@ tasks: # Exit with error if any docs are outdated if [ "$docs_outdated" = true ]; then - echo "❌ Some chart documentation files are outdated. Run 'task docs:helm:generate' to update them." + echo "❌ Some chart documentation files are outdated. Run 'task helm:docs:generate' to update them." exit 1 else echo "✅ All chart documentation is up to date." fi + # Version extraction extract:version:chart: desc: Extract and print the MLflow chart version + silent: true cmds: - | echo "{{.MLFLOW_VERSION}}" + + # Namespace setup + namespaces:create: + desc: Create and setup required namespaces + silent: true + cmds: + - echo "Setting up required namespaces..." + - kubectl create namespace {{.NAMESPACE}} --dry-run=client -o yaml | kubectl apply -f - + - echo "Namespace setup complete" + status: + - kubectl get namespace {{.NAMESPACE}} --no-headers --output=name 2>/dev/null + + # Registry authentication/setup + registry:login: + desc: Login to Replicated registry (requires REPLICATED_LICENSE_ID env var) + silent: true + cmds: + - echo "Authenticating with Replicated registry..." + - | + if [ -z "$REPLICATED_LICENSE_ID" ]; then + echo "ERROR: REPLICATED_LICENSE_ID environment variable must be set" + exit 1 + fi + helm registry login registry.replicated.com \ + --username="$REPLICATED_LICENSE_ID" \ + --password="$REPLICATED_LICENSE_ID" + - echo "Registry login successful." + + # License retrieval and management + license:get-id: + desc: Extract license ID from a customer (requires REPLICATED_API_TOKEN and customer name) + silent: true + cmds: + - echo "Extracting license ID for customer {{.CUSTOMER_NAME}}..." + - | + # Validate required environment variables + if [ -z "$REPLICATED_API_TOKEN" ]; then + echo "ERROR: REPLICATED_API_TOKEN environment variable must be set" + exit 1 + fi + + if [ -z "{{.CUSTOMER_NAME}}" ]; then + echo "ERROR: CUSTOMER_NAME parameter is required" + exit 1 + fi + + # Run vendor-cli to inspect the customer and get the installation ID as JSON + echo "Running vendor-cli to inspect customer..." + CUSTOMER_JSON=$(docker run --rm \ + -e REPLICATED_API_TOKEN=$REPLICATED_API_TOKEN \ + -e REPLICATED_APP={{.APP_NAME}} \ + replicated/vendor-cli:latest \ + customer inspect --customer "{{.CUSTOMER_NAME}}" --output json) + + # Use jq to properly extract the installationId + INSTALLATION_ID=$(echo "$CUSTOMER_JSON" | jq -r '.installationId') + + # Check if we got a valid ID + if [ -z "$INSTALLATION_ID" ] || [ "$INSTALLATION_ID" = "null" ]; then + echo "Failed to extract installationId from customer JSON" + echo "JSON structure:" + echo "$CUSTOMER_JSON" | jq 'del(.installationId)' # Print JSON without the license ID + exit 1 + fi + + # Print the license ID so it can be captured + echo "$INSTALLATION_ID" + + # Download customer license + license:download: + desc: Download license for a customer (requires REPLICATED_API_TOKEN and customer name) + silent: true + cmds: + - echo "Downloading license for customer {{.CUSTOMER_NAME}}..." + - | + # Validate required environment variables + if [ -z "$REPLICATED_API_TOKEN" ]; then + echo "ERROR: REPLICATED_API_TOKEN environment variable must be set" + exit 1 + fi + + if [ -z "{{.CUSTOMER_NAME}}" ]; then + echo "ERROR: CUSTOMER_NAME parameter is required" + exit 1 + fi + + # Create a temporary directory for the license if it doesn't exist + mkdir -p /tmp/replicated + OUTPUT_FILE="/tmp/replicated/license-download-output.txt" + LICENSE_FILE="/tmp/replicated/license.yaml" + + # Run vendor-cli to download the customer license to a temporary file first + echo "Running vendor-cli to download license..." + TMP_LICENSE_FILE=$(mktemp) + set +e + docker run --rm \ + -e REPLICATED_API_TOKEN=$REPLICATED_API_TOKEN \ + -e REPLICATED_APP={{.APP_NAME}} \ + replicated/vendor-cli:latest \ + customer download-license --customer "{{.CUSTOMER_NAME}}" > "$TMP_LICENSE_FILE" 2>$OUTPUT_FILE + DOWNLOAD_EXIT_CODE=$? + set -e + + if [ $DOWNLOAD_EXIT_CODE -ne 0 ]; then + echo "ERROR: Failed to download license for customer {{.CUSTOMER_NAME}}" + echo "Error output:" + cat $OUTPUT_FILE + rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" + exit 1 + fi + + # Check if the file is empty + if [ ! -s "$TMP_LICENSE_FILE" ]; then + echo "ERROR: Downloaded license file is empty" + cat $OUTPUT_FILE + rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" + exit 1 + fi + + # Verify the license file is valid YAML + if command -v yq >/dev/null 2>&1; then + echo "Validating license file is proper YAML..." + if ! yq eval . "$TMP_LICENSE_FILE" > /dev/null 2>&1; then + echo "ERROR: Downloaded license file is not valid YAML" + echo "License file content:" + cat "$TMP_LICENSE_FILE" + rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" + exit 1 + fi + else + echo "WARNING: yq not found, skipping YAML validation" + fi + + # Remove any extra output or text before the YAML content + # This extracts content between first '---' and the end of file + if grep -q "^---" "$TMP_LICENSE_FILE"; then + echo "License appears to be in YAML format with document marker, extracting YAML content..." + sed -n '/^---/,$p' "$TMP_LICENSE_FILE" > "$LICENSE_FILE" + else + # If no '---' marker is found, check for '{' to identify JSON + if grep -q "{" "$TMP_LICENSE_FILE"; then + echo "License appears to be in JSON format, converting to YAML..." + if command -v yq >/dev/null 2>&1; then + cat "$TMP_LICENSE_FILE" | yq eval -P > "$LICENSE_FILE" + else + echo "ERROR: Cannot convert JSON to YAML without yq" + cat "$TMP_LICENSE_FILE" + rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" + exit 1 + fi + else + # If neither YAML nor JSON markers are found, just copy the file + echo "No YAML document marker or JSON found. Copying file as-is..." + cat "$TMP_LICENSE_FILE" > "$LICENSE_FILE" + fi + fi + + # Log some debug information + echo "License file content (first 5 lines):" + head -n 5 "$LICENSE_FILE" + + # Verify file exists and has content + if [ ! -s "$LICENSE_FILE" ]; then + echo "ERROR: Final license file is empty after processing" + rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" + exit 1 + fi + + echo "License successfully downloaded to $LICENSE_FILE" + rm -f $OUTPUT_FILE "$TMP_LICENSE_FILE" + + # Add the renamed MLflow service tasks + helm:service:find: + desc: Find the MLflow service in the namespace + internal: true + silent: true + cmds: + - echo "Looking for MLflow service in namespace {{.NAMESPACE}}..." + - | + # Wait for the MLflow service to be created + echo "Waiting for MLflow service to be created..." + MAX_RETRIES={{.WAIT_RETRIES}} + RETRY_INTERVAL={{.RETRY_INTERVAL}} + RETRY_COUNT=0 + SERVICE_FOUND=false + + while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do + echo "Check $((RETRY_COUNT+1))/$MAX_RETRIES: Looking for MLflow service..." + if kubectl get svc mlflow -n {{.NAMESPACE}} --no-headers 2>/dev/null; then + echo "✅ MLflow service found!" + SERVICE_FOUND=true + break + else + echo "MLflow service not found yet. Waiting $RETRY_INTERVAL seconds..." + RETRY_COUNT=$((RETRY_COUNT+1)) + sleep $RETRY_INTERVAL + fi + done + + if [ "$SERVICE_FOUND" != "true" ]; then + echo "❌ ERROR: MLflow service not found after $((MAX_RETRIES * RETRY_INTERVAL)) seconds." + echo "Showing all available services in the namespace:" + kubectl get svc -n {{.NAMESPACE}} + echo "Showing pod status in the namespace:" + kubectl get pods -n {{.NAMESPACE}} + echo "Showing pod details:" + kubectl describe pods -n {{.NAMESPACE}} -l app.kubernetes.io/name=mlflow + exit 1 + fi + + # Verify the services are present + echo "Verifying MLflow service exists..." + kubectl get svc -n {{.NAMESPACE}} + echo "✅ MLflow service verification completed." + + helm:pods:wait: + desc: Wait for MLflow pods to be ready + internal: true silent: true + cmds: + - echo "Checking MLflow pod status..." + - | + # Check pod status and wait for them to be running + echo "Checking pod status..." + kubectl get pods -n {{.NAMESPACE}} + + echo "Waiting for MLflow pods to be running..." + kubectl wait --for=condition=Ready pods --selector=app.kubernetes.io/name=mlflow -n {{.NAMESPACE}} --timeout={{.TIMEOUT}} || { + echo "WARNING: Timed out waiting for pods to be ready, will try port-forwarding anyway" + kubectl describe pods -n {{.NAMESPACE}} + } + echo "✅ Pod readiness check completed." diff --git a/applications/mlflow/kots/ec.yaml b/applications/mlflow/release/ec.yaml similarity index 100% rename from applications/mlflow/kots/ec.yaml rename to applications/mlflow/release/ec.yaml diff --git a/applications/mlflow/kots/infra-chart.yaml b/applications/mlflow/release/infra-chart.yaml similarity index 100% rename from applications/mlflow/kots/infra-chart.yaml rename to applications/mlflow/release/infra-chart.yaml diff --git a/applications/mlflow/kots/k8s-app.yaml b/applications/mlflow/release/k8s-app.yaml similarity index 100% rename from applications/mlflow/kots/k8s-app.yaml rename to applications/mlflow/release/k8s-app.yaml diff --git a/applications/mlflow/kots/kots-app.yaml b/applications/mlflow/release/kots-app.yaml similarity index 100% rename from applications/mlflow/kots/kots-app.yaml rename to applications/mlflow/release/kots-app.yaml diff --git a/applications/mlflow/kots/kots-config.yaml b/applications/mlflow/release/kots-config.yaml similarity index 100% rename from applications/mlflow/kots/kots-config.yaml rename to applications/mlflow/release/kots-config.yaml diff --git a/applications/mlflow/kots/kots-preflight.yaml b/applications/mlflow/release/kots-preflight.yaml similarity index 100% rename from applications/mlflow/kots/kots-preflight.yaml rename to applications/mlflow/release/kots-preflight.yaml diff --git a/applications/mlflow/kots/kots-support-bundle.yaml b/applications/mlflow/release/kots-support-bundle.yaml similarity index 100% rename from applications/mlflow/kots/kots-support-bundle.yaml rename to applications/mlflow/release/kots-support-bundle.yaml diff --git a/applications/mlflow/kots/mlflow-chart.yaml b/applications/mlflow/release/mlflow-chart.yaml similarity index 99% rename from applications/mlflow/kots/mlflow-chart.yaml rename to applications/mlflow/release/mlflow-chart.yaml index 50da8d2..cfedd35 100644 --- a/applications/mlflow/kots/mlflow-chart.yaml +++ b/applications/mlflow/release/mlflow-chart.yaml @@ -47,7 +47,6 @@ spec: pools: pool0: size: repl{{ ConfigOption "embedded_s3_volume_size"}} - optionalValues: - when: 'repl{{ ConfigOptionEquals "mlflow_ingress_type" "load_balancer" }}' recursiveMerge: true @@ -82,7 +81,7 @@ spec: s3: enabled: true accessKeyId: repl{{ ConfigOption "external_s3_access_key"}} - secretAccessKey: repl{{ ConfigOption "external_s3_secret_key"}} + secretAccessKey: repl{{ ConfigOption "external_s3_secret_key"}} external: enabled: true host: repl{{ ConfigOption "external_s3_host"}} diff --git a/applications/onlineboutique/release/cluster.yaml b/applications/onlineboutique/release/cluster.yaml index b194f2c..84adfa4 100644 --- a/applications/onlineboutique/release/cluster.yaml +++ b/applications/onlineboutique/release/cluster.yaml @@ -1,7 +1,7 @@ apiVersion: embeddedcluster.replicated.com/v1beta1 kind: Config spec: - version: 1.19.0+k8s-1.30 + version: 2.3.0+k8s-1.30 roles: controller: name: controller @@ -11,3 +11,11 @@ spec: - name: worker labels: worker: "true" + unsupportedOverrides: + k0s: | + config: + spec: + workerProfiles: + - name: increased-pod-limit + values: + maxPods: 250 diff --git a/applications/wg-easy/Taskfile.yaml b/applications/wg-easy/Taskfile.yaml index ae079c6..5707be4 100644 --- a/applications/wg-easy/Taskfile.yaml +++ b/applications/wg-easy/Taskfile.yaml @@ -2,11 +2,12 @@ version: "3" includes: utils: ./taskfiles/utils.yml + dev: ./taskfiles/container.yml vars: # Application configuration APP_NAME: '{{.REPLICATED_APP | default "wg-easy"}}' - + # Cluster configuration CLUSTER_NAME: '{{.CLUSTER_NAME | default "test-cluster"}}' K8S_VERSION: '{{.K8S_VERSION | default "1.32.2"}}' @@ -14,19 +15,23 @@ vars: INSTANCE_TYPE: '{{.INSTANCE_TYPE | default "r1.small"}}' DISTRIBUTION: '{{.DISTRIBUTION | default "k3s"}}' KUBECONFIG_FILE: './{{.CLUSTER_NAME}}.kubeconfig' - + # Ports configuration EXPOSE_PORTS: - port: 30443 protocol: https - port: 30080 protocol: http - + # GCP default configuration GCP_PROJECT: '{{.GCP_PROJECT | default "replicated-qa"}}' GCP_ZONE: '{{.GCP_ZONE | default "us-central1-a"}}' VM_NAME: '{{.VM_NAME | default (printf "%s-dev" (or (env "GUSER") "user"))}}' + # Docker workflow configuration + IMAGE_NAME: ttl.sh/wg-easy-dev + CONTAINER_NAME: wg-easy-dev + tasks: default: desc: Show available tasks @@ -91,7 +96,7 @@ tasks: echo "Removing old kubeconfig file" rm -f {{.KUBECONFIG_FILE}} fi - fi + fi setup-kubeconfig: desc: Get kubeconfig and prepare cluster for application deployment @@ -112,7 +117,7 @@ tasks: true fi deps: - - create-cluster + - cluster-create - verify-kubeconfig dependencies-update: @@ -138,7 +143,7 @@ tasks: if [ -z "$CLUSTER_ID" ]; then exit 1 fi - + # Check if all ports are already exposed expected_count={{len .EXPOSE_PORTS}} port_checks="" @@ -147,7 +152,7 @@ tasks: {{end}} # Remove trailing "or " port_checks="${port_checks% or }" - + PORT_COUNT=$(replicated cluster port ls $CLUSTER_ID --output json | jq -r ".[] | select($port_checks) | .upstream_port" | wc -l | tr -d ' ') [ "$PORT_COUNT" -eq "$expected_count" ] cmds: @@ -169,10 +174,10 @@ tasks: echo "Error: Could not find cluster with name {{.CLUSTER_NAME}}" exit 1 fi - + # Get exposed URLs ENV_VARS=$(task utils:port-operations OPERATION=getenv CLUSTER_NAME={{.CLUSTER_NAME}}) - + # Deploy with helmfile echo "Using $ENV_VARS" eval "KUBECONFIG={{.KUBECONFIG_FILE}} $ENV_VARS helmfile sync --wait" @@ -193,7 +198,7 @@ tasks: echo "No clusters found with name {{.CLUSTER_NAME}}" exit 0 fi - + for id in $CLUSTER_IDS; do echo "Deleting cluster ID: $id" replicated cluster rm "$id" @@ -213,7 +218,7 @@ tasks: - echo "Preparing release files..." - rm -rf ./release - mkdir -p ./release - + # Copy all non-config.yaml files - echo "Copying non-config YAML files to release folder..." - find . -path '*/replicated/*.yaml' -not -name 'config.yaml' -exec cp {} ./release/ \; @@ -237,27 +242,27 @@ tasks: yq '.spec.chart.chartVersion = strenv(version) | .spec.chart.chartVersion style="single"' $directory/$helmChartName | tee release/$helmChartName done < <(find . -maxdepth 2 -mindepth 2 -type d -name replicated) - + # Merge config.yaml files - echo "Merging config.yaml files..." - | # Start with an empty config file echo "{}" > ./release/config.yaml - + # Merge all app config.yaml files first (excluding root replicated) for config_file in $(find . -path '*/replicated/config.yaml' | grep -v "^./replicated/"); do echo "Merging $config_file..." yq eval-all '. as $item ireduce ({}; . * $item)' ./release/config.yaml "$config_file" > ./release/config.yaml.new mv ./release/config.yaml.new ./release/config.yaml done - + # Merge root config.yaml last if [ -f "./replicated/config.yaml" ]; then echo "Merging root config.yaml last..." yq eval-all '. as $item ireduce ({}; . * $item)' ./release/config.yaml "./replicated/config.yaml" > ./release/config.yaml.new mv ./release/config.yaml.new ./release/config.yaml fi - + # Package Helm charts - echo "Packaging Helm charts..." - | @@ -267,7 +272,7 @@ tasks: # Navigate to chart directory, package it, and move the resulting .tgz to release folder (cd "$chart_dir" && helm package . && mv *.tgz ../release/) done - + - echo "Release files prepared in ./release/ directory" deps: - update-version diff --git a/applications/wg-easy/container/Containerfile b/applications/wg-easy/container/Containerfile new file mode 100644 index 0000000..7c62451 --- /dev/null +++ b/applications/wg-easy/container/Containerfile @@ -0,0 +1,67 @@ +# Base image for all shared Dockerfiles for taskfiles +# Use this image as base image for app specific docker files +FROM --platform=$BUILDPLATFORM ubuntu:24.04 + +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /tools + +# Set environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + HOME=/home/devuser + +# Install CLI tools +RUN apt-get update && apt-get install -y \ + curl \ + jq \ + yq \ + gnupg \ + sudo \ + + # Install Helm + && curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash \ + + # Install kubectl + && curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \ + && chmod +x kubectl \ + && mv kubectl /usr/local/bin/ \ + + # Install Task + && sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin \ + + # Install Helmfile + && curl -Ls $(curl -s https://api.github.com/repos/helmfile/helmfile/releases/latest \ + | grep "browser_download_url.*linux_amd64.tar.gz" \ + | cut -d : -f 2,3 \ + | tr -d \") -o helmfile.tar.gz \ + && tar xf helmfile.tar.gz helmfile && rm helmfile.tar.gz \ + && mv helmfile /usr/local/bin/helmfile \ + + # Install Replicated CLI + && curl -Ls $(curl -s https://api.github.com/repos/replicatedhq/replicated/releases/latest \ + | grep "browser_download_url.*linux_amd64.tar.gz" \ + | cut -d : -f 2,3 \ + | tr -d \") -o replicated.tar.gz \ + && tar xf replicated.tar.gz replicated && rm replicated.tar.gz \ + && mv replicated /usr/local/bin/replicated \ + + # Install Google Cloud CLI + && echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \ + && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg \ + && apt-get update \ + && apt-get install google-cloud-cli -y \ + && rm -rf /var/lib/apt/lists/* + +# Create a non-root user for better security +RUN groupadd -r devuser \ + && useradd -r -g devuser -m -s /bin/bash devuser \ + && echo "devuser ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/devuser + +# Set working directory +WORKDIR /app + +# Switch to non-root user +USER devuser + +CMD ["bash"] diff --git a/applications/wg-easy/taskfiles/container.yml b/applications/wg-easy/taskfiles/container.yml new file mode 100644 index 0000000..7f4cd19 --- /dev/null +++ b/applications/wg-easy/taskfiles/container.yml @@ -0,0 +1,87 @@ +version: "3" + +# Development environment tasks +tasks: + build-image: + desc: Build development Docker image + vars: + IMAGE_NAME: '{{.IMAGE_NAME}}' + IMAGE_TAG: '{{.IMAGE_TAG | default "latest"}}' + CONTAINERFILE: '{{.CONTAINERFILE | default "./container/Containerfile"}}' + BUILD_ARGS: '{{.BUILD_ARGS | default ""}}' + requires: + vars: [IMAGE_NAME, IMAGE_TAG, CONTAINERFILE] + + cmds: + - docker build -t {{.IMAGE_NAME}} -f {{.CONTAINERFILE}} . + + start: + desc: Start development container in background + silent: true + cmds: + - task: start-implementation + + start-implementation: + desc: Start development container in background + silent: true + run: once + internal: true + vars: + IMAGE_NAME: '{{.IMAGE_NAME}}' + CONTAINER_NAME: '{{.CONTAINER_NAME}}' + IMAGE_TAG: '{{.IMAGE_TAG | default "latest"}}' + requires: + vars: [IMAGE_NAME, CONTAINER_NAME] + + status: + - docker ps | grep -q "{{.CONTAINER_NAME}}" + cmds: + - | + # Start container with host networking for kubectl port-forward compatibility + CONTAINER_ID=$(docker run --rm --name {{.CONTAINER_NAME}} -d \ + -v $(pwd):/workspace \ + -e HOME=/home/devuser \ + -e USER=devuser \ + -w /workspace \ + {{.IMAGE_NAME}}:{{.IMAGE_TAG}} bash -c 'trap "exit" TERM; while :; do sleep 0.1; done') + + if [ $? -eq 0 ]; then + echo "Development container started successfully with ID: $CONTAINER_ID" + else + echo "Failed to start development container" + exit 1 + fi + + shell: + desc: Attach to development container shell + silent: true + requires: + vars: [CONTAINER_NAME] + deps: + - start-implementation + cmds: + - echo "Connecting to {{.CONTAINER_NAME}}..." + - docker exec -it {{.CONTAINER_NAME}} /bin/bash + + stop: + desc: Stop development container + silent: true + requires: + vars: [CONTAINER_NAME] + cmds: + - | + if docker ps | grep -q "{{.CONTAINER_NAME}}"; then + echo "Stopping {{.CONTAINER_NAME}} development container..." + docker stop {{.CONTAINER_NAME}} + else + echo "Container {{.CONTAINER_NAME}} is not running" + fi + + restart: + desc: Restart development container + silent: true + requires: + vars: [CONTAINER_NAME] + cmds: + - task: stop + - task: start diff --git a/patterns/embedded-cluster-kubelet-overrides/README.md b/patterns/embedded-cluster-kubelet-overrides/README.md new file mode 100644 index 0000000..ac6c9ef --- /dev/null +++ b/patterns/embedded-cluster-kubelet-overrides/README.md @@ -0,0 +1,46 @@ +# Kubelet overrides for Embedded Cluster + +If your application has unique environmental requirements such as additional kernel features or specific kubelet settings to function correctly, you can configure these settings via a Worker Profile. + +To do this, specify a workerProfile as per the k0s documentation in the k0s config section of unsupportedOverrides within your Embedded Cluster config + +## Example 1: Increasing the number of pods schedulable to a single node + +This example can benefit your Embedded Cluster releases if you deploy a large number of workloads but your customers prefer to run your application on a single node. + +```yaml +apiVersion: embeddedcluster.replicated.com/v1beta1 +kind: Config +spec: + unsupportedOverrides: + k0s: | + config: + spec: + workerProfiles: + - name: increased-pod-limit + values: + maxPods: 250 +``` + +## Example 2: Set allowed unsafe sysctl settings + +```yaml +apiVersion: embeddedcluster.replicated.com/v1beta1 +kind: Config +spec: + unsupportedOverrides: + k0s: |- + config: + spec: + workerProfiles: + - name: default + values: + allowedUnsafeSysctls: + - net.ipv4.ip_forward +``` + +## Further reading + +[K0s documentation on worker profiles](https://docs.k0sproject.io/stable/configuration/#specworkerprofiles) + +[Kubelet configuration options](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration)