Skip to content

chore: dependency updates #99

chore: dependency updates

chore: dependency updates #99

Workflow file for this run

name: MLflow CI
on:
pull_request:
paths:
- 'applications/mlflow/charts/**'
- 'applications/mlflow/kots/**'
- 'applications/mlflow/tests/**'
- 'applications/mlflow/Taskfile.yml'
- '.github/workflows/mlflow-ci.yml'
push:
branches:
- main
paths:
- 'applications/mlflow/charts/**'
- 'applications/mlflow/kots/**'
- 'applications/mlflow/tests/**'
- 'applications/mlflow/Taskfile.yml'
- '.github/workflows/mlflow-ci.yml'
env:
APP_SLUG: diamon-mlflow
REPLICATED_APP: diamon-mlflow
jobs:
lint-and-template:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/[email protected]
with:
version: v3.13.3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.12
- name: Install Task
uses: arduino/setup-task@v1
with:
version: 3.x
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install yq
run: |
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
chmod +x /usr/local/bin/yq
- name: Run Lint and Template
working-directory: applications/mlflow
run: |
# Use Taskfile to lint charts and generate templates
task helm:update-deps
task helm:lint
task helm:template
- name: Upload rendered templates
if: failure()
uses: actions/upload-artifact@v4
with:
name: mlflow-rendered-templates
path: applications/mlflow/charts/.rendered-templates/
retention-days: 7
- name: Check Version Consistency
working-directory: applications/mlflow
run: |
# Ensure Chart.yaml and HelmChart versions are in sync
task versions:verify
create-release:
runs-on: ubuntu-22.04
needs: [lint-and-template]
outputs:
customer-id: ${{ steps.create-customer.outputs.customer-id }}
channel-slug: ${{ steps.create-release.outputs.channel-slug }}
chart-version: ${{ steps.chart-version.outputs.chart_version }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/[email protected]
with:
version: v3.13.3
- name: Install Task
uses: arduino/setup-task@v1
with:
version: 3.x
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install yq
run: |
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
chmod +x /usr/local/bin/yq
- name: Package and Update Versions
working-directory: applications/mlflow
run: |
# Update and package charts
task helm:update-deps
task release:update-versions
task helm:package
# Extract MLflow chart version for reference
- name: Extract MLflow chart version
id: chart-version
working-directory: applications/mlflow
run: |
# Extract MLflow chart version using taskfile variable
CHART_VERSION=$(task -s extract:version:chart)
echo "chart_version=$CHART_VERSION" >> $GITHUB_OUTPUT
echo "Using MLflow chart version: $CHART_VERSION"
- name: Create release
id: create-release
uses: replicatedhq/replicated-actions/[email protected]
with:
app-slug: ${{ env.APP_SLUG }}
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
yaml-dir: applications/mlflow/release/
promote-channel: ci-automation-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
version: ${{ steps.chart-version.outputs.chart_version }}
- name: Create customer
id: create-customer
uses: replicatedhq/replicated-actions/create-customer@main
with:
app-slug: ${{ env.APP_SLUG }}
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
customer-name: automated-${{ github.run_id }}
customer-email: [email protected]
license-type: dev
channel-slug: ${{ steps.create-release.outputs.channel-slug }}
is-kots-install-enabled: "true"
helm-install-test:
runs-on: ubuntu-22.04
needs: [create-release]
strategy:
fail-fast: false
matrix:
cluster:
- distribution: k3s
version: 1.32
- distribution: gke
version: 1.32
config:
- name: nodeport-ingress-disabled
values_file: tests/helm/nodeport-ingress-disabled.yaml
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/[email protected]
with:
version: v3.13.3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.12
- name: Install Task
uses: arduino/setup-task@v1
with:
version: 3.x
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install yq
run: |
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
chmod +x /usr/local/bin/yq
# Install jq via apt-get
- name: Install jq
run: |
sudo apt-get update
sudo apt-get install -y jq
# Get license ID from customer inspect
- name: Get License ID
id: get-license
working-directory: applications/mlflow
run: |
# Create directory for license
mkdir -p /tmp/replicated
# Get customer name from previous step
CUSTOMER_NAME="${{ needs.create-release.outputs.customer-id }}"
echo "Using customer name: $CUSTOMER_NAME"
# Get license ID using the task - capture only the last line of output
echo "Getting license ID..."
INSTALLATION_ID=$(CUSTOMER_NAME="$CUSTOMER_NAME" task license:get-id | tail -n 1)
# Check if we got a result
if [ -z "$INSTALLATION_ID" ]; then
echo "ERROR: Got empty license ID"
exit 1
fi
echo "License ID retrieved successfully"
echo "license_id=$INSTALLATION_ID" >> $GITHUB_OUTPUT
env:
REPLICATED_API_TOKEN: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
- name: Create Cluster
id: create-cluster
uses: replicatedhq/replicated-actions/[email protected]
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
kubernetes-distribution: ${{ matrix.cluster.distribution }}
kubernetes-version: ${{ matrix.cluster.version }}
cluster-name: mlflow-ci-${{ github.run_id }}-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }}
disk: 100
nodes: 3
ttl: 1h
export-kubeconfig: true
- name: Setup Namespace
working-directory: applications/mlflow
run: |
# Save kubeconfig to a file
KUBECONFIG_FILE="/tmp/kubeconfig-helm-test-${{ github.run_id }}"
echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
# Create namespace using taskfile
KUBECONFIG="$KUBECONFIG_FILE" task namespaces:create
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
- name: Run Helm Test
working-directory: applications/mlflow
run: |
# Save kubeconfig to a file
KUBECONFIG_FILE="/tmp/kubeconfig-helm-test-${{ github.run_id }}"
echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
# Run task to test Helm installation
KUBECONFIG="$KUBECONFIG_FILE" MLFLOW_VALUES="${{ matrix.config.values_file }}" task helm:test-install
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
REPLICATED_APP: ${{ env.APP_SLUG }}
REPLICATED_CHANNEL: ${{ needs.create-release.outputs.channel-slug }}
REPLICATED_LICENSE_ID: ${{ steps.get-license.outputs.license_id }}
TIMEOUT: 5m
WAIT_RETRIES: 30
RETRY_INTERVAL: 10
- name: Run Application Tests
working-directory: applications/mlflow
run: |
# Create Python virtual environment
python -m venv ./venv
source ./venv/bin/activate
# Install dependencies directly
python -m pip install --upgrade pip wheel setuptools
# Install required packages
python -m pip install mlflow numpy pandas scikit-learn pytest requests
# Run tests
task test:app
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
PORT: 5000
- name: Install troubleshoot
run: curl -L https://github.com/replicatedhq/troubleshoot/releases/latest/download/support-bundle_linux_amd64.tar.gz | tar xzvf -
if: failure()
- name: Collect bundle
run: |
# Save kubeconfig to a file
KUBECONFIG_FILE="/tmp/kubeconfig-helm-bundle-${{ github.run_id }}"
echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
echo "Saved kubeconfig to $KUBECONFIG_FILE"
./support-bundle --kubeconfig="$KUBECONFIG_FILE" --interactive=false -o ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }} https://raw.githubusercontent.com/replicatedhq/troubleshoot-specs/main/in-cluster/default.yaml
if: failure()
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
- name: Upload support bundle artifact
uses: actions/upload-artifact@v4
if: failure()
with:
name: mlflow-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }}
path: 'ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}-${{ matrix.config.name }}.tar.gz'
- name: Remove Cluster
uses: replicatedhq/replicated-actions/[email protected]
if: ${{ always() && steps.create-cluster.outputs.cluster-id != '' }}
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
cluster-id: ${{ steps.create-cluster.outputs.cluster-id }}
kots-install-test:
runs-on: ubuntu-22.04
needs: [create-release]
strategy:
fail-fast: false
matrix:
cluster:
- distribution: k3s
version: 1.32
- distribution: gke
version: 1.32
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.12
- name: Install Task
uses: arduino/setup-task@v1
with:
version: 3.x
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install yq
run: |
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
chmod +x /usr/local/bin/yq
- name: Create Cluster
id: create-cluster
uses: replicatedhq/replicated-actions/[email protected]
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
kubernetes-distribution: ${{ matrix.cluster.distribution }}
kubernetes-version: ${{ matrix.cluster.version }}
cluster-name: mlflow-kots-${{ github.run_id }}-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}
disk: 100
nodes: 3
ttl: 1h
export-kubeconfig: true
# Download license using task
- name: Download license
id: download-license
working-directory: applications/mlflow
run: |
# Create a temporary file to store the license
mkdir -p /tmp/replicated
# Set customer name for download
CUSTOMER_NAME="${{ needs.create-release.outputs.customer-id }}"
echo "Using customer name: $CUSTOMER_NAME for license download"
# Try to download license
echo "Attempting to download license..."
set +e
CUSTOMER_NAME="$CUSTOMER_NAME" task license:download
DOWNLOAD_RESULT=$?
set -e
if [ $DOWNLOAD_RESULT -ne 0 ]; then
echo "Failed to download license. Error code: $DOWNLOAD_RESULT"
# Diagnostic steps
echo "Checking if license file exists..."
ls -la /tmp/replicated || true
exit 1
fi
# Check that license file exists and has content
if [ ! -f "/tmp/replicated/license.yaml" ] || [ ! -s "/tmp/replicated/license.yaml" ]; then
echo "License file is missing or empty!"
exit 1
fi
echo "License file downloaded successfully to /tmp/replicated/license.yaml"
# Read the license and set it as an output
LICENSE_CONTENT=$(cat /tmp/replicated/license.yaml)
# Use EOF delimiter for multi-line output
echo "license<<EOF" >> $GITHUB_OUTPUT
echo "$LICENSE_CONTENT" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
env:
REPLICATED_API_TOKEN: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
# Verify license file is valid
- name: Verify License File
run: |
# Create a temporary file to check the license
echo "Saving license file for validation..."
LICENSE_FILE="/tmp/replicated/license-ci.yaml"
mkdir -p /tmp/replicated
# Save the license content to a file
cat << 'EOF' > $LICENSE_FILE
${{ steps.download-license.outputs.license }}
EOF
echo "License file content (first 10 lines):"
head -n 10 $LICENSE_FILE
# Check if the license file is valid YAML
echo "Validating license file..."
yq eval . $LICENSE_FILE > /dev/null
if [ $? -ne 0 ]; then
echo "ERROR: License file is not valid YAML"
exit 1
else
echo "✅ License file is valid YAML"
fi
# Install using KOTS
- name: KOTS Install
uses: replicatedhq/replicated-actions/[email protected]
with:
kubeconfig: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
kots-version: latest
app-slug: ${{ env.APP_SLUG }}/${{ needs.create-release.outputs.channel-slug }}
app-version-label: ${{ needs.create-release.outputs.chart-version }}
license-file: ${{ steps.download-license.outputs.license }}
namespace: default
wait-duration: 10m
shared-password: 'replicatedmlflow'
skip-preflights: true
debug: true
# Set up port forwarding after KOTS installation is complete
- name: Set up port forwarding
id: port-forward
run: |
# Use kubeconfig file
KUBECONFIG_FILE="/tmp/kubeconfig-kots-test-${{ github.run_id }}"
echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
echo "Saved kubeconfig to $KUBECONFIG_FILE"
# Hardcoded port 5000 for simplicity
PORT="5000"
echo "Using port: $PORT for testing"
# Wait for the MLflow service to be created
echo "Waiting for MLflow service to be created..."
MAX_RETRIES=30
RETRY_INTERVAL=10
RETRY_COUNT=0
SERVICE_FOUND=false
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
echo "Check $((RETRY_COUNT+1))/$MAX_RETRIES: Looking for MLflow service..."
if KUBECONFIG="$KUBECONFIG_FILE" kubectl get svc mlflow -n default --no-headers 2>/dev/null; then
echo "✅ MLflow service found!"
SERVICE_FOUND=true
break
else
echo "MLflow service not found yet. Waiting $RETRY_INTERVAL seconds..."
RETRY_COUNT=$((RETRY_COUNT+1))
sleep $RETRY_INTERVAL
fi
done
if [ "$SERVICE_FOUND" != "true" ]; then
echo "❌ ERROR: MLflow service not found after $((MAX_RETRIES * RETRY_INTERVAL)) seconds."
echo "Showing all available services in the namespace:"
KUBECONFIG="$KUBECONFIG_FILE" kubectl get svc -n default
echo "Showing KOTS application status:"
KUBECONFIG="$KUBECONFIG_FILE" kubectl get app -n default
echo "Showing all pods in the namespace:"
KUBECONFIG="$KUBECONFIG_FILE" kubectl get pods -n default
exit 1
fi
# Verify services are present
echo "Verifying MLflow service exists..."
KUBECONFIG="$KUBECONFIG_FILE" kubectl get svc -n default
# Check pod status and wait for them to be running
echo "Checking pod status..."
KUBECONFIG="$KUBECONFIG_FILE" kubectl get pods -n default
echo "Waiting for MLflow pods to be running..."
KUBECONFIG="$KUBECONFIG_FILE" kubectl wait --for=condition=Ready pods --selector=app.kubernetes.io/name=mlflow -n default --timeout=2m || {
echo "WARNING: Timed out waiting for pods to be ready, will try port-forwarding anyway"
KUBECONFIG="$KUBECONFIG_FILE" kubectl describe pods -n default
}
# Set up port forwarding in the background
echo "Setting up port forwarding to run in the background"
nohup bash -c "KUBECONFIG='$KUBECONFIG_FILE' kubectl port-forward -n default svc/mlflow $PORT:5000 &>/tmp/port-forward-kots-${{ github.run_id }}.log" &
PORT_FORWARD_PID=$!
echo "port_forward_pid=$PORT_FORWARD_PID" >> $GITHUB_OUTPUT
echo "Set up port forwarding with PID: $PORT_FORWARD_PID"
# Set hostname for testing
echo "hostname=localhost:$PORT" >> $GITHUB_OUTPUT
echo "Test endpoint will be: localhost:$PORT"
# Give port-forward more time to establish
echo "Waiting for port-forward to establish..."
sleep 15
# Basic connectivity check
echo "Checking connectivity to MLflow..."
if curl -s -o /dev/null -w "%{http_code}" http://localhost:$PORT/; then
echo "Successfully connected to MLflow service!"
else
echo "Warning: Initial connection attempt failed, service may still be starting"
# Show the port-forward log for debugging
echo "Port-forward log:"
cat /tmp/port-forward-kots-${{ github.run_id }}.log || true
# If port-forward failed, check pod logs
echo "Pod logs:"
KUBECONFIG="$KUBECONFIG_FILE" kubectl logs -n default -l app.kubernetes.io/name=mlflow --tail=20 || true
fi
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
- name: Run Application Tests
working-directory: applications/mlflow
run: |
# Create Python virtual environment
python -m venv ./venv
source ./venv/bin/activate
# Install dependencies directly
python -m pip install --upgrade pip wheel setuptools
# Install required packages
python -m pip install mlflow numpy pandas scikit-learn pytest requests
# Run tests
task test:app
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
PORT: 5000
- name: Install troubleshoot
run: curl -L https://github.com/replicatedhq/troubleshoot/releases/latest/download/support-bundle_linux_amd64.tar.gz | tar xzvf -
if: failure()
- name: Collect bundle
run: |
# Save kubeconfig to a file
KUBECONFIG_FILE="/tmp/kubeconfig-kots-bundle-${{ github.run_id }}"
echo "$KUBECONFIG" > "$KUBECONFIG_FILE"
echo "Saved kubeconfig to $KUBECONFIG_FILE"
./support-bundle --kubeconfig="$KUBECONFIG_FILE" --interactive=false -o kots-ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }} https://raw.githubusercontent.com/replicatedhq/troubleshoot-specs/main/in-cluster/default.yaml
if: failure()
env:
KUBECONFIG: ${{ steps.create-cluster.outputs.cluster-kubeconfig }}
- name: Upload support bundle artifact
uses: actions/upload-artifact@v4
if: failure()
with:
name: mlflow-kots-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}
path: 'kots-ci-bundle-${{ matrix.cluster.distribution }}-${{ matrix.cluster.version }}.tar.gz'
- name: Remove Cluster
uses: replicatedhq/replicated-actions/[email protected]
if: ${{ always() && steps.create-cluster.outputs.cluster-id != '' }}
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
cluster-id: ${{ steps.create-cluster.outputs.cluster-id }}
cleanup-test-release:
runs-on: ubuntu-22.04
needs: [create-release, kots-install-test, helm-install-test]
if: always()
steps:
- name: Archive Customer
if: ${{ always() && needs.create-release.outputs.customer-id != '' }}
uses: replicatedhq/replicated-actions/[email protected]
with:
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
customer-id: ${{ needs.create-release.outputs.customer-id }}
- name: Archive Channel
if: ${{ always() && needs.create-release.outputs.channel-slug != '' }}
uses: replicatedhq/replicated-actions/[email protected]
with:
app-slug: ${{ env.APP_SLUG }}
api-token: ${{ secrets.REPLICATED_PLATFORM_EXAMPLES_TOKEN }}
channel-slug: ${{ needs.create-release.outputs.channel-slug }}