Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions hack/e2e-util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ DUMP_LOGS="true"
export KUBEFLOW_VERSION=v1.7.0
export IMAGE_KUBEFLOW_OPERATOR="docker.io/kubeflow/training-operator:v1-855e096"

export KUBERAY_VERSION=1.1.0
export IMAGE_KUBERAY_OPERATOR="quay.io/kuberay/operator:v1.1.1"

# These are small images used by the e2e tests.
# Pull and kind load to avoid long delays during testing
export IMAGE_ECHOSERVER="quay.io/project-codeflare/echo-server:1.0"
Expand Down Expand Up @@ -66,6 +69,18 @@ function update_test_host {
echo "Kind was sucessfully installed."
fi

which helm >/dev/null 2>&1
if [ $? -ne 0 ]
then
# Installing helm3
echo "Downloading and installing helm..."
curl -fsSL -o ${ROOT_DIR}/get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 &&
chmod 700 ${ROOT_DIR}/get_helm.sh && ${ROOT_DIR}/get_helm.sh
[ $? -ne 0 ] && echo "Failed to download and install helm" && exit 1
echo "Helm was sucessfully installed."
rm -rf ${ROOT_DIR}/get_helm.sh
fi

kubectl kuttl version >/dev/null 2>&1
if [ $? -ne 0 ]
then
Expand Down Expand Up @@ -113,10 +128,19 @@ function check_prerequisites {
else
echo -n "found kuttl plugin for kubectl, " && kubectl kuttl version
fi

which helm >/dev/null 2>&1
if [ $? -ne 0 ]
then
echo "helm not installed, exiting."
exit 1
else
echo -n "found helm, " && helm version
fi
}

function pull_images {
for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_KUBEFLOW_OPERATOR}
for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_KUBEFLOW_OPERATOR} ${IMAGE_KUBERAY_OPERATOR}
do
docker pull $image
if [ $? -ne 0 ]
Expand All @@ -139,7 +163,7 @@ function kind_up_cluster {
fi
CLUSTER_STARTED="true"

for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_KUBEFLOW_OPERATOR}
for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_KUBEFLOW_OPERATOR} ${IMAGE_KUBERAY_OPERATOR}
do
kind load docker-image ${image} ${CLUSTER_CONTEXT}
if [ $? -ne 0 ]
Expand All @@ -153,14 +177,21 @@ function kind_up_cluster {
function configure_cluster {
echo "Installing Kubeflow operator version $KUBEFLOW_VERSION"
kubectl apply -k "github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=$KUBEFLOW_VERSION"

# Sleep until the kubeflow operator is running
echo "Waiting for pods in the kubeflow namespace to become ready"
while [[ $(kubectl get pods -n kubeflow -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}' | tr ' ' '\n' | sort -u) != "True" ]]
do
echo -n "." && sleep 1;
done
echo ""

echo "Installing Kuberay operator version $KUBERAY_VERSION"
helm install kuberay-operator kuberay-operator --repo https://ray-project.github.io/kuberay-helm/ --version $KUBERAY_VERSION --create-namespace -n kuberay-system
echo "Waiting for pods in the kuberay namespace to become ready"
while [[ $(kubectl get pods -n kuberay-system -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}' | tr ' ' '\n' | sort -u) != "True" ]]
do
echo -n "." && sleep 1;
done
echo ""
}

function wait_for_appwrapper_controller {
Expand Down
16 changes: 15 additions & 1 deletion test/e2e/appwrapper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,21 @@ var _ = Describe("AppWrapper E2E Test", func() {
})
})

// TODO: KubeRay GVKs (would have to deploy KubeRay operator on e2e test cluster)
Describe("Creation of Kuberay GVKs", Label("Kueue", "Standalone"), func() {
It("RayClusters", func() {
aw := createAppWrapper(ctx, raycluster(500, 2, 250))
appwrappers = append(appwrappers, aw)
// Non-functonal RayCluster; will never reach Running Phase
Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(workloadv1beta2.AppWrapperResuming))
})

It("RayJobs", func() {
aw := createAppWrapper(ctx, rayjob(500, 2, 250))
appwrappers = append(appwrappers, aw)
// Non-functonal RayJob; will never reach Running Phase
Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(workloadv1beta2.AppWrapperResuming))
})
})

// TODO: JobSets (would have to deploy JobSet controller on e2e test cluster)

Expand Down
122 changes: 122 additions & 0 deletions test/e2e/fixtures_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,128 @@ func pytorchjob(replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWra
}
}

// This is not a functional RayCluster:
// 1. Using a dummy busybox image to avoid pulling a large & rate-limited image from dockerhub,
// which means the command injected by the kuberay operator will never work.
//
// It is only useful to check that we validate the PodSpecTemplates and can reach the Resuming state.
const rayclusterYAML = `
apiVersion: ray.io/v1
kind: RayCluster
metadata:
name: %v
spec:
rayVersion: '2.9.0'
headGroupSpec:
rayStartParams: {}
template:
spec:
containers:
- name: ray-head
image: quay.io/project-codeflare/busybox:1.36
command: ["sh", "-c", "sleep 10"]
resources:
requests:
cpu: %v

workerGroupSpecs:
- replicas: %v
minReplicas: %v
maxReplicas: %v
groupName: small-group
rayStartParams: {}
# Pod template
template:
spec:
containers:
- name: ray-worker
image: quay.io/project-codeflare/busybox:1.36
command: ["sh", "-c", "sleep 10"]
resources:
requests:
cpu: %v
`

func raycluster(milliCPUHead int64, replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent {
yamlString := fmt.Sprintf(rayclusterYAML,
randName("raycluster"),
resource.NewMilliQuantity(milliCPUHead, resource.DecimalSI),
replicasWorker, replicasWorker, replicasWorker,
resource.NewMilliQuantity(milliCPUWorker, resource.DecimalSI),
)
jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString))
Expect(err).NotTo(HaveOccurred())
return workloadv1beta2.AppWrapperComponent{
DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{
{Replicas: ptr.To(int32(1)), Path: "template.spec.headGroupSpec.template"},
{Replicas: ptr.To(int32(replicasWorker)), Path: "template.spec.workerGroupSpecs[0].template"},
},
Template: runtime.RawExtension{Raw: jsonBytes},
}
}

// This is not a functional RayJob:
// 1. Using a dummy busybox image to avoid pulling a large & rate-limited image from dockerhub,
// which means the command injected by the kuberay operator will never work.
//
// It is only useful to check that we validate the PodSpecTemplates and can reach the Resuming state.
const rayjobYAML = `
apiVersion: ray.io/v1
kind: RayJob
metadata:
name: %v
spec:
shutdownAfterJobFinishes: true
rayClusterSpec:
rayVersion: '2.9.0'
headGroupSpec:
rayStartParams: {}
template:
spec:
containers:
- name: ray-head
image: quay.io/project-codeflare/busybox:1.36
command: ["sh", "-c", "sleep 10"]
resources:
requests:
cpu: %v

workerGroupSpecs:
- replicas: %v
minReplicas: %v
maxReplicas: %v
groupName: small-group
rayStartParams: {}
# Pod template
template:
spec:
containers:
- name: ray-worker
image: quay.io/project-codeflare/busybox:1.36
command: ["sh", "-c", "sleep 10"]
resources:
requests:
cpu: %v
`

func rayjob(milliCPUHead int64, replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent {
yamlString := fmt.Sprintf(rayjobYAML,
randName("raycluster"),
resource.NewMilliQuantity(milliCPUHead, resource.DecimalSI),
replicasWorker, replicasWorker, replicasWorker,
resource.NewMilliQuantity(milliCPUWorker, resource.DecimalSI),
)
jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString))
Expect(err).NotTo(HaveOccurred())
return workloadv1beta2.AppWrapperComponent{
DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{
{Replicas: ptr.To(int32(1)), Path: "template.spec.rayClusterSpec.headGroupSpec.template"},
{Replicas: ptr.To(int32(replicasWorker)), Path: "template.spec.rayClusterSpec.workerGroupSpecs[0].template"},
},
Template: runtime.RawExtension{Raw: jsonBytes},
}
}

const jobSetYAML = `
apiVersion: jobset.x-k8s.io/v1alpha2
kind: JobSet
Expand Down