Skip to content

Mlflow Release Pipeline #25

Mlflow Release Pipeline

Mlflow Release Pipeline #25

Workflow file for this run

name: MLflow CI
on:
pull_request:
paths:
- 'applications/mlflow/charts/**'
- 'applications/mlflow/kots/**'
- '.github/workflows/mlflow-ci.yml'
push:
branches:
- main
paths:
- 'applications/mlflow/charts/**'
- 'applications/mlflow/kots/**'
- '.github/workflows/mlflow-ci.yml'
env:
APP_SLUG: diamon-mlflow
jobs:
lint-and-template:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/[email protected]
with:
version: v3.13.3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.12
- name: Set up chart-testing
uses: helm/[email protected]
- name: Add Helm repositories
run: |
cd applications/mlflow
make add-helm-repositories
- name: Lint charts
run: |
cd applications/mlflow
make lint
- name: Template charts with SDK disabled
id: template
run: |
cd applications/mlflow
make template
- name: Upload templates (if templating failed)
uses: actions/upload-artifact@v4
if: failure()
with:
name: failed-templates
path: applications/mlflow/rendered-templates
if-no-files-found: warn
create-release:
runs-on: ubuntu-22.04
needs: [lint-and-template]
outputs:
license-id: ${{ steps.create-customer.outputs.license-id }}
customer-id: ${{ steps.create-customer.outputs.customer-id }}
channel-slug: ${{ steps.create-release.outputs.channel-slug }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/[email protected]
with:
version: v3.13.3
# Add required Helm repositories
- name: Add Helm repositories
run: |
cd applications/mlflow
make add-helm-repositories
- name: Package infra chart
run: |
helm package applications/mlflow/charts/infra -d applications/mlflow/kots/ -u
if [ ! -f applications/mlflow/kots/infra-*.tgz ]; then
echo "Error: Infra chart packaging failed"
exit 1
fi
- name: Package mlflow chart
run: |
helm package applications/mlflow/charts/mlflow -d applications/mlflow/kots/ -u
if [ ! -f applications/mlflow/kots/mlflow-*.tgz ]; then
echo "Error: MLflow chart packaging failed"
exit 1
fi
# The following steps implement our versioning strategy:
# 1. We extract the chart version from mlflow-chart.yaml
# 2. We use this version for the Replicated release
# This ensures that the Replicated release version always matches the MLflow chart version
- name: Extract MLflow chart version
id: chart-version
run: |
CHART_VERSION=$(grep 'chartVersion:' applications/mlflow/kots/mlflow-chart.yaml | awk '{print $2}')
echo "chart_version=$CHART_VERSION" >> $GITHUB_OUTPUT
echo "Using MLflow chart version: $CHART_VERSION"
- name: Create release
id: create-release
uses: replicatedhq/replicated-actions/[email protected]
with:
app-slug: ${{ env.APP_SLUG }}
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
yaml-dir: applications/mlflow/kots/
promote-channel: ci-automation-${{ github.run_id }}
version: ${{ steps.chart-version.outputs.chart_version }}
- name: Create customer
id: create-customer
uses: replicatedhq/replicated-actions/create-customer@main
with:
app-slug: ${{ env.APP_SLUG }}
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
customer-name: automated-${{ github.run_id }}
customer-email: [email protected]
license-type: dev
channel-slug: ${{ steps.create-release.outputs.channel-slug }}
is-kots-install-enabled: "true"
helm-install-test:
runs-on: ubuntu-22.04
needs: [create-release]
strategy:
fail-fast: false
matrix:
cluster:
- distribution: kind
version: 1.32
#- distribution: kind
# version: 1.31
#- distribution: kind
# version: 1.30
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/[email protected]
with:
version: v3.13.3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.12
- name: Set up chart-testing
uses: helm/[email protected]
# Install jq via apt-get
- name: Install jq
run: |
sudo apt-get update
sudo apt-get install -y jq
# Get license ID from customer inspect
- name: Get License ID
id: get-license
run: |
# Run vendor-cli to inspect the customer and get the installation ID
CUSTOMER_JSON=$(docker run --rm \
-e REPLICATED_API_TOKEN=${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} \
-e REPLICATED_APP=${{ env.APP_SLUG }} \
replicated/vendor-cli:latest \
customer inspect --customer "automated-${{ github.run_id }}" --output json)
# Use jq to properly extract the installationId
INSTALLATION_ID=$(echo "$CUSTOMER_JSON" | jq -r '.installationId')
if [ -z "$INSTALLATION_ID" ] || [ "$INSTALLATION_ID" = "null" ]; then
echo "Failed to extract installationId from customer JSON"
echo "JSON structure:"
echo "$CUSTOMER_JSON" | jq 'del(.installationId)' # Print JSON without the license ID
exit 1
fi
# Don't print the actual license ID, just indicate success
echo "Successfully extracted installationId"
echo "license_id=$INSTALLATION_ID" >> $GITHUB_OUTPUT
- name: Create Cluster
id: create-cluster
uses: replicatedhq/replicated-actions/[email protected]
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
kubernetes-distribution: ${{ matrix.cluster.distribution }}
kubernetes-version: ${{ matrix.cluster.version }}
cluster-name: mlflow-ci-${{ github.run_id }}-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}
ttl: 1h
export-kubeconfig: true
- name: Add Helm repositories
run: |
cd applications/mlflow
make add-helm-repositories
# Authenticate with the Replicated registry
- name: Login to Replicated registry
run: |
echo "Authenticating with Replicated registry using license ID from previous step"
cd applications/mlflow
# Print env var name to check it's set in the shell (without revealing value)
if [ -n "$REPLICATED_LICENSE_ID" ]; then
echo "REPLICATED_LICENSE_ID is set in the shell"
else
echo "ERROR: REPLICATED_LICENSE_ID is not set in the shell"
exit 1
fi
# Pass the env var directly to make
REPLICATED_LICENSE_ID="$REPLICATED_LICENSE_ID" make registry-login
env:
REPLICATED_LICENSE_ID: ${{ steps.get-license.outputs.license_id }}
- name: Run Helm installation test with charts from Replicated registry
run: |
cd applications/mlflow
# Save kubeconfig to a file
KUBECONFIG_FILE="/tmp/kubeconfig-${{ github.run_id }}"
echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
echo "Saved kubeconfig to $KUBECONFIG_FILE"
# Pass env vars directly to make
KUBECONFIG="$KUBECONFIG_FILE" REPLICATED_APP="$REPLICATED_APP" REPLICATED_CHANNEL="$REPLICATED_CHANNEL" REPLICATED_LICENSE_ID="$REPLICATED_LICENSE_ID" make test-replicated-helm
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
REPLICATED_APP: ${{ env.APP_SLUG }}
REPLICATED_CHANNEL: ${{ needs.create-release.outputs.channel-slug }}
REPLICATED_LICENSE_ID: ${{ steps.get-license.outputs.license_id }}
- name: Install troubleshoot
run: curl -L https://github.com/replicatedhq/troubleshoot/releases/latest/download/support-bundle_linux_amd64.tar.gz | tar xzvf -
if: failure()
- name: Collect bundle
run: |
# Save kubeconfig to a file
KUBECONFIG_FILE="/tmp/kubeconfig-helm-bundle-${{ github.run_id }}"
echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
echo "Saved kubeconfig to $KUBECONFIG_FILE"
./support-bundle --kubeconfig="$KUBECONFIG_FILE" --interactive=false -o ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }} https://raw.githubusercontent.com/replicatedhq/troubleshoot-specs/main/in-cluster/default.yaml
if: failure()
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
- name: Upload support bundle artifact
uses: actions/upload-artifact@v4
if: failure()
with:
name: mlflow-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}
path: 'ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}.tar.gz'
- name: Remove Cluster
uses: replicatedhq/replicated-actions/[email protected]
if: ${{ always() && steps.create-cluster.outputs.cluster-id != '' }}
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
cluster-id: ${{ steps.create-cluster.outputs.cluster-id }}
kots-install-test:
runs-on: ubuntu-22.04
needs: [create-release]
strategy:
fail-fast: false
matrix:
cluster:
- distribution: kind
version: 1.32
#- distribution: kind
# version: 1.31
#- distribution: kind
# version: 1.30
#- distribution: aks
# version: 1.31
#- distribution: aks
# version: 1.30
#- distribution: gke
# version: 1.32
#- distribution: gke
# version: 1.31
#- distribution: gke
# version: 1.30
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Create Cluster
id: create-cluster
uses: replicatedhq/replicated-actions/[email protected]
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
kubernetes-distribution: ${{ matrix.cluster.distribution }}
kubernetes-version: ${{ matrix.cluster.version }}
cluster-name: mlflow-kots-${{ github.run_id }}-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}
ttl: 1h
export-kubeconfig: true
# The following step extracts the chart version to use it for the KOTS installation
- name: Extract MLflow chart version
id: chart-version
run: |
CHART_VERSION=$(grep 'chartVersion:' applications/mlflow/kots/mlflow-chart.yaml | awk '{print $2}')
echo "chart_version=$CHART_VERSION" >> $GITHUB_OUTPUT
echo "Using MLflow chart version: $CHART_VERSION"
# Download license using Replicated vendor-cli Docker container
- name: Download license
id: download-license
run: |
# Create a temporary file to store the license
mkdir -p /tmp/replicated
# Run the vendor-cli command and capture its output
docker run --rm \
-e REPLICATED_API_TOKEN=${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }} \
-e REPLICATED_APP=${{ env.APP_SLUG }} \
replicated/vendor-cli:latest \
customer download-license --customer ${{ needs.create-release.outputs.customer-id }} > /tmp/replicated/license.yaml
# Read the license and set it as an output
LICENSE_CONTENT=$(cat /tmp/replicated/license.yaml)
# Use EOF delimiter for multi-line output
echo "license<<EOF" >> $GITHUB_OUTPUT
echo "$LICENSE_CONTENT" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
# Install using KOTS
- name: KOTS Install
uses: replicatedhq/replicated-actions/[email protected]
with:
kubeconfig: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
kots-version: latest
app-slug: ${{ env.APP_SLUG }}/ci-automation-${{ github.run_id }}
app-version-label: ${{ steps.chart-version.outputs.chart_version }}
license-file: ${{ steps.download-license.outputs.license }}
namespace: default
wait-duration: 10m
shared-password: 'replicatedmlflow'
- name: Install troubleshoot
run: curl -L https://github.com/replicatedhq/troubleshoot/releases/latest/download/support-bundle_linux_amd64.tar.gz | tar xzvf -
if: failure()
- name: Collect bundle
run: |
# Save kubeconfig to a file
KUBECONFIG_FILE="/tmp/kubeconfig-kots-bundle-${{ github.run_id }}"
echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
echo "Saved kubeconfig to $KUBECONFIG_FILE"
./support-bundle --kubeconfig="$KUBECONFIG_FILE" --interactive=false -o kots-ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }} https://raw.githubusercontent.com/replicatedhq/troubleshoot-specs/main/in-cluster/default.yaml
if: failure()
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
- name: Upload support bundle artifact
uses: actions/upload-artifact@v4
if: failure()
with:
name: mlflow-kots-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}
path: 'kots-ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}.tar.gz'
- name: Remove Cluster
uses: replicatedhq/replicated-actions/[email protected]
if: ${{ always() && steps.create-cluster.outputs.cluster-id != '' }}
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
cluster-id: ${{ steps.create-cluster.outputs.cluster-id }}
cleanup-test-release:
runs-on: ubuntu-22.04
needs: [create-release, kots-install-test, helm-install-test]
if: always()
steps:
- name: Archive Customer
if: ${{ always() && needs.create-release.outputs.customer-id != '' }}
uses: replicatedhq/replicated-actions/[email protected]
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
customer-id: ${{ needs.create-release.outputs.customer-id }}
- name: Archive Channel
if: ${{ always() && needs.create-release.outputs.channel-slug != '' }}
uses: replicatedhq/replicated-actions/[email protected]
with:
app-slug: ${{ env.APP_SLUG }}
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
channel-slug: ${{ needs.create-release.outputs.channel-slug }}