Mlflow Release Pipeline #46
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: MLflow CI | |
| on: | |
| pull_request: | |
| paths: | |
| - 'applications/mlflow/charts/**' | |
| - 'applications/mlflow/kots/**' | |
| - 'applications/mlflow/tests/**' | |
| - 'applications/mlflow/Taskfile.yml' | |
| - '.github/workflows/mlflow-ci.yml' | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - 'applications/mlflow/charts/**' | |
| - 'applications/mlflow/kots/**' | |
| - 'applications/mlflow/tests/**' | |
| - 'applications/mlflow/Taskfile.yml' | |
| - '.github/workflows/mlflow-ci.yml' | |
| env: | |
| APP_SLUG: diamon-mlflow | |
| jobs: | |
| lint-and-template: | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Helm | |
| uses: azure/[email protected] | |
| with: | |
| version: v3.13.3 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: 3.12 | |
| - name: Install Task | |
| uses: arduino/setup-task@v1 | |
| with: | |
| version: 3.x | |
| repo-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Install yq | |
| run: | | |
| wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq | |
| chmod +x /usr/local/bin/yq | |
| - name: Run Lint and Template | |
| working-directory: applications/mlflow | |
| run: | | |
| # Use Taskfile to add Helm repos, lint charts and generate templates | |
| task add:helm:repos | |
| task lint | |
| task template | |
| - name: Check Version Consistency | |
| working-directory: applications/mlflow | |
| run: | | |
| # Ensure Chart.yaml and HelmChart versions are in sync | |
| task check:versions | |
| create-release: | |
| runs-on: ubuntu-22.04 | |
| needs: [lint-and-template] | |
| outputs: | |
| customer-id: ${{ steps.create-customer.outputs.customer-id }} | |
| channel-slug: ${{ steps.create-release.outputs.channel-slug }} | |
| chart-version: ${{ steps.chart-version.outputs.chart_version }} | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Helm | |
| uses: azure/[email protected] | |
| with: | |
| version: v3.13.3 | |
| - name: Install Task | |
| uses: arduino/setup-task@v1 | |
| with: | |
| version: 3.x | |
| repo-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Install yq | |
| run: | | |
| wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq | |
| chmod +x /usr/local/bin/yq | |
| - name: Package and Update Versions | |
| working-directory: applications/mlflow | |
| run: | | |
| # Update and package charts | |
| task update:versions | |
| task package | |
| # Extract MLflow chart version for reference | |
| - name: Extract MLflow chart version | |
| id: chart-version | |
| working-directory: applications/mlflow | |
| run: | | |
| # Extract MLflow chart version using taskfile variable | |
| CHART_VERSION=$(task -s version:extract) | |
| echo "chart_version=$CHART_VERSION" >> $GITHUB_OUTPUT | |
| echo "Using MLflow chart version: $CHART_VERSION" | |
| - name: Create release | |
| id: create-release | |
| uses: replicatedhq/replicated-actions/[email protected] | |
| with: | |
| app-slug: ${{ env.APP_SLUG }} | |
| api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| yaml-dir: applications/mlflow/kots/ | |
| promote-channel: ci-automation-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} | |
| version: ${{ steps.chart-version.outputs.chart_version }} | |
| - name: Create customer | |
| id: create-customer | |
| uses: replicatedhq/replicated-actions/create-customer@main | |
| with: | |
| app-slug: ${{ env.APP_SLUG }} | |
| api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| customer-name: automated-${{ github.run_id }} | |
| customer-email: [email protected] | |
| license-type: dev | |
| channel-slug: ${{ steps.create-release.outputs.channel-slug }} | |
| is-kots-install-enabled: "true" | |
| helm-install-test: | |
| runs-on: ubuntu-22.04 | |
| needs: [create-release] | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| cluster: | |
| - distribution: kind | |
| version: 1.32 | |
| config: | |
| - name: nodeport-ingress-disabled | |
| values_file: tests/helm/nodeport-ingress-disabled.yaml | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Helm | |
| uses: azure/[email protected] | |
| with: | |
| version: v3.13.3 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: 3.12 | |
| - name: Install Task | |
| uses: arduino/setup-task@v1 | |
| with: | |
| version: 3.x | |
| repo-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Install yq | |
| run: | | |
| wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq | |
| chmod +x /usr/local/bin/yq | |
| # Install jq via apt-get | |
| - name: Install jq | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y jq | |
| # Get license ID from customer inspect | |
| - name: Get License ID | |
| id: get-license | |
| working-directory: applications/mlflow | |
| run: | | |
| # Get license ID using the Taskfile task | |
| INSTALLATION_ID=$(CUSTOMER_NAME="automated-${{ github.run_id }}" task customer:get-license-id) | |
| echo "license_id=$INSTALLATION_ID" >> $GITHUB_OUTPUT | |
| env: | |
| REPLICATED_API_TOKEN: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| - name: Create Cluster | |
| id: create-cluster | |
| uses: replicatedhq/replicated-actions/[email protected] | |
| with: | |
| api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| kubernetes-distribution: ${{ matrix.cluster.distribution }} | |
| kubernetes-version: ${{ matrix.cluster.version }} | |
| cluster-name: mlflow-ci-${{ github.run_id }}-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }} | |
| disk: 100 | |
| instance-type: r1.large | |
| ttl: 1h | |
| export-kubeconfig: true | |
| - name: Setup Namespace | |
| working-directory: applications/mlflow | |
| run: | | |
| # Save kubeconfig to a file | |
| KUBECONFIG_FILE="/tmp/kubeconfig-helm-test-${{ github.run_id }}" | |
| echo "$KUBECONFIG" > "$KUBECONFIG_FILE" | |
| # Create namespace using taskfile | |
| KUBECONFIG="$KUBECONFIG_FILE" task setup:namespaces | |
| env: | |
| KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} | |
| - name: Run Helm Test | |
| working-directory: applications/mlflow | |
| run: | | |
| # Save kubeconfig to a file | |
| KUBECONFIG_FILE="/tmp/kubeconfig-helm-test-${{ github.run_id }}" | |
| echo "$KUBECONFIG" > "$KUBECONFIG_FILE" | |
| # Run task to test Helm installation | |
| KUBECONFIG="$KUBECONFIG_FILE" MLFLOW_VALUES="${{ matrix.config.values_file }}" task test:helm | |
| env: | |
| KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} | |
| REPLICATED_APP: ${{ env.APP_SLUG }} | |
| REPLICATED_CHANNEL: ${{ needs.create-release.outputs.channel-slug }} | |
| REPLICATED_LICENSE_ID: ${{ steps.get-license.outputs.license_id }} | |
| TIMEOUT: 5m | |
| WAIT_RETRIES: 30 | |
| RETRY_INTERVAL: 10 | |
| - name: Run Application Tests | |
| working-directory: applications/mlflow | |
| run: | | |
| # Run task to test application | |
| task test:run-app-tests | |
| env: | |
| KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} | |
| PORT: 5000 | |
| - name: Install troubleshoot | |
| run: curl -L https://github.com/replicatedhq/troubleshoot/releases/latest/download/support-bundle_linux_amd64.tar.gz | tar xzvf - | |
| if: failure() | |
| - name: Collect bundle | |
| run: | | |
| # Save kubeconfig to a file | |
| KUBECONFIG_FILE="/tmp/kubeconfig-helm-bundle-${{ github.run_id }}" | |
| echo "$KUBECONFIG" > "$KUBECONFIG_FILE" | |
| echo "Saved kubeconfig to $KUBECONFIG_FILE" | |
| ./support-bundle --kubeconfig="$KUBECONFIG_FILE" --interactive=false -o ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }} https://raw.githubusercontent.com/replicatedhq/troubleshoot-specs/main/in-cluster/default.yaml | |
| if: failure() | |
| env: | |
| KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} | |
| - name: Upload support bundle artifact | |
| uses: actions/upload-artifact@v4 | |
| if: failure() | |
| with: | |
| name: mlflow-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }} | |
| path: 'ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }}.tar.gz' | |
| - name: Remove Cluster | |
| uses: replicatedhq/replicated-actions/[email protected] | |
| if: ${{ always() && steps.create-cluster.outputs.cluster-id != '' }} | |
| with: | |
| api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| cluster-id: ${{ steps.create-cluster.outputs.cluster-id }} | |
| kots-install-test: | |
| runs-on: ubuntu-22.04 | |
| needs: [create-release] | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| cluster: | |
| - distribution: kind | |
| version: 1.32 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: 3.12 | |
| - name: Install Task | |
| uses: arduino/setup-task@v1 | |
| with: | |
| version: 3.x | |
| repo-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Install yq | |
| run: | | |
| wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq | |
| chmod +x /usr/local/bin/yq | |
| - name: Create Cluster | |
| id: create-cluster | |
| uses: replicatedhq/replicated-actions/[email protected] | |
| with: | |
| api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| kubernetes-distribution: ${{ matrix.cluster.distribution }} | |
| kubernetes-version: ${{ matrix.cluster.version }} | |
| cluster-name: mlflow-kots-${{ github.run_id }}-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }} | |
| disk: 100 | |
| instance-type: r1.large | |
| ttl: 1h | |
| export-kubeconfig: true | |
| # Download license using task | |
| - name: Download license | |
| id: download-license | |
| working-directory: applications/mlflow | |
| run: | | |
| # Create a temporary file to store the license | |
| mkdir -p /tmp/replicated | |
| # Download license using taskfile | |
| CUSTOMER_NAME="${{ needs.create-release.outputs.customer-id }}" task customer:download-license > /tmp/replicated/license.yaml | |
| # Read the license and set it as an output | |
| LICENSE_CONTENT=$(cat /tmp/replicated/license.yaml) | |
| # Use EOF delimiter for multi-line output | |
| echo "license<<EOF" >> $GITHUB_OUTPUT | |
| echo "$LICENSE_CONTENT" >> $GITHUB_OUTPUT | |
| echo "EOF" >> $GITHUB_OUTPUT | |
| env: | |
| REPLICATED_API_TOKEN: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| # Install using KOTS | |
| - name: KOTS Install | |
| uses: replicatedhq/replicated-actions/[email protected] | |
| with: | |
| kubeconfig: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} | |
| kots-version: latest | |
| app-slug: ${{ env.APP_SLUG }}/${{ needs.create-release.outputs.channel-slug }} | |
| app-version-label: ${{ needs.create-release.outputs.chart-version }} | |
| license-file: ${{ steps.download-license.outputs.license }} | |
| namespace: default | |
| wait-duration: 10m | |
| shared-password: 'replicatedmlflow' | |
| # Set up port forwarding after KOTS installation is complete | |
| - name: Set up port forwarding | |
| id: port-forward | |
| run: | | |
| # Use kubeconfig file | |
| KUBECONFIG_FILE="/tmp/kubeconfig-kots-test-${{ github.run_id }}" | |
| echo "$KUBECONFIG" > "$KUBECONFIG_FILE" | |
| echo "Saved kubeconfig to $KUBECONFIG_FILE" | |
| # Hardcoded port 5000 for simplicity | |
| PORT="5000" | |
| echo "Using port: $PORT for testing" | |
| # Wait for the MLflow service to be created | |
| echo "Waiting for MLflow service to be created..." | |
| MAX_RETRIES=30 | |
| RETRY_INTERVAL=10 | |
| RETRY_COUNT=0 | |
| SERVICE_FOUND=false | |
| while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do | |
| echo "Check $((RETRY_COUNT+1))/$MAX_RETRIES: Looking for MLflow service..." | |
| if KUBECONFIG="$KUBECONFIG_FILE" kubectl get svc mlflow -n default --no-headers 2>/dev/null; then | |
| echo "✅ MLflow service found!" | |
| SERVICE_FOUND=true | |
| break | |
| else | |
| echo "MLflow service not found yet. Waiting $RETRY_INTERVAL seconds..." | |
| RETRY_COUNT=$((RETRY_COUNT+1)) | |
| sleep $RETRY_INTERVAL | |
| fi | |
| done | |
| if [ "$SERVICE_FOUND" != "true" ]; then | |
| echo "❌ ERROR: MLflow service not found after $((MAX_RETRIES * RETRY_INTERVAL)) seconds." | |
| echo "Showing all available services in the namespace:" | |
| KUBECONFIG="$KUBECONFIG_FILE" kubectl get svc -n default | |
| echo "Showing KOTS application status:" | |
| KUBECONFIG="$KUBECONFIG_FILE" kubectl get app -n default | |
| echo "Showing all pods in the namespace:" | |
| KUBECONFIG="$KUBECONFIG_FILE" kubectl get pods -n default | |
| exit 1 | |
| fi | |
| # Verify services are present | |
| echo "Verifying MLflow service exists..." | |
| KUBECONFIG="$KUBECONFIG_FILE" kubectl get svc -n default | |
| # Check pod status and wait for them to be running | |
| echo "Checking pod status..." | |
| KUBECONFIG="$KUBECONFIG_FILE" kubectl get pods -n default | |
| echo "Waiting for MLflow pods to be running..." | |
| KUBECONFIG="$KUBECONFIG_FILE" kubectl wait --for=condition=Ready pods --selector=app.kubernetes.io/name=mlflow -n default --timeout=2m || { | |
| echo "WARNING: Timed out waiting for pods to be ready, will try port-forwarding anyway" | |
| KUBECONFIG="$KUBECONFIG_FILE" kubectl describe pods -n default | |
| } | |
| # Set up port forwarding in the background | |
| echo "Setting up port forwarding to run in the background" | |
| nohup bash -c "KUBECONFIG='$KUBECONFIG_FILE' kubectl port-forward -n default svc/mlflow $PORT:5000 &>/tmp/port-forward-kots-${{ github.run_id }}.log" & | |
| PORT_FORWARD_PID=$! | |
| echo "port_forward_pid=$PORT_FORWARD_PID" >> $GITHUB_OUTPUT | |
| echo "Set up port forwarding with PID: $PORT_FORWARD_PID" | |
| # Set hostname for testing | |
| echo "hostname=localhost:$PORT" >> $GITHUB_OUTPUT | |
| echo "Test endpoint will be: localhost:$PORT" | |
| # Give port-forward more time to establish | |
| echo "Waiting for port-forward to establish..." | |
| sleep 15 | |
| # Basic connectivity check | |
| echo "Checking connectivity to MLflow..." | |
| if curl -s -o /dev/null -w "%{http_code}" http://localhost:$PORT/; then | |
| echo "Successfully connected to MLflow service!" | |
| else | |
| echo "Warning: Initial connection attempt failed, service may still be starting" | |
| # Show the port-forward log for debugging | |
| echo "Port-forward log:" | |
| cat /tmp/port-forward-kots-${{ github.run_id }}.log || true | |
| # If port-forward failed, check pod logs | |
| echo "Pod logs:" | |
| KUBECONFIG="$KUBECONFIG_FILE" kubectl logs -n default -l app.kubernetes.io/name=mlflow --tail=20 || true | |
| fi | |
| env: | |
| KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} | |
| # Application testing with our consolidated test file | |
| - name: Run Application Tests | |
| working-directory: applications/mlflow | |
| run: | | |
| # Run task to test application | |
| task test:run-app-tests | |
| env: | |
| KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} | |
| PORT: 5000 | |
| - name: Install troubleshoot | |
| run: curl -L https://github.com/replicatedhq/troubleshoot/releases/latest/download/support-bundle_linux_amd64.tar.gz | tar xzvf - | |
| if: failure() | |
| - name: Collect bundle | |
| run: | | |
| # Save kubeconfig to a file | |
| KUBECONFIG_FILE="/tmp/kubeconfig-kots-bundle-${{ github.run_id }}" | |
| echo "$KUBECONFIG" > "$KUBECONFIG_FILE" | |
| echo "Saved kubeconfig to $KUBECONFIG_FILE" | |
| ./support-bundle --kubeconfig="$KUBECONFIG_FILE" --interactive=false -o kots-ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }} https://raw.githubusercontent.com/replicatedhq/troubleshoot-specs/main/in-cluster/default.yaml | |
| if: failure() | |
| env: | |
| KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }} | |
| - name: Upload support bundle artifact | |
| uses: actions/upload-artifact@v4 | |
| if: failure() | |
| with: | |
| name: mlflow-kots-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }} | |
| path: 'kots-ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}.tar.gz' | |
| - name: Remove Cluster | |
| uses: replicatedhq/replicated-actions/[email protected] | |
| if: ${{ always() && steps.create-cluster.outputs.cluster-id != '' }} | |
| with: | |
| api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| cluster-id: ${{ steps.create-cluster.outputs.cluster-id }} | |
| cleanup-test-release: | |
| runs-on: ubuntu-22.04 | |
| needs: [create-release, kots-install-test, helm-install-test] | |
| if: always() | |
| steps: | |
| - name: Archive Customer | |
| if: ${{ always() && needs.create-release.outputs.customer-id != '' }} | |
| uses: replicatedhq/replicated-actions/[email protected] | |
| with: | |
| api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| customer-id: ${{ needs.create-release.outputs.customer-id }} | |
| - name: Archive Channel | |
| if: ${{ always() && needs.create-release.outputs.channel-slug != '' }} | |
| uses: replicatedhq/replicated-actions/[email protected] | |
| with: | |
| app-slug: ${{ env.APP_SLUG }} | |
| api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} | |
| channel-slug: ${{ needs.create-release.outputs.channel-slug }} |