From 940dd318684fe6bfc5085fb0d5c47b130ce92e24 Mon Sep 17 00:00:00 2001 From: David Grove Date: Wed, 19 Feb 2025 17:24:44 -0500 Subject: [PATCH 1/5] cleanup: workloadv1beta2 ==> awv1beta2 --- cmd/main.go | 4 +- .../appwrapper/appwrapper_controller.go | 236 +++++++++--------- .../appwrapper/appwrapper_controller_test.go | 148 +++++------ .../controller/appwrapper/fixtures_test.go | 38 +-- .../appwrapper/resource_management.go | 34 +-- internal/controller/appwrapper/suite_test.go | 4 +- .../workload/workload_controller.go | 24 +- internal/webhook/appwrapper_fixtures_test.go | 74 +++--- internal/webhook/appwrapper_webhook.go | 16 +- internal/webhook/appwrapper_webhook_test.go | 8 +- internal/webhook/suite_test.go | 4 +- pkg/utils/utils.go | 44 ++-- test/e2e/appwrapper_test.go | 80 +++--- test/e2e/fixtures_test.go | 96 +++---- test/e2e/util_test.go | 46 ++-- 15 files changed, 428 insertions(+), 428 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index df08717..5ecbe8f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -49,7 +49,7 @@ import ( kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/internal/metrics" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/controller" @@ -67,7 +67,7 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(kueue.AddToScheme(scheme)) - utilruntime.Must(workloadv1beta2.AddToScheme(scheme)) + utilruntime.Must(awv1beta2.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme } diff --git a/internal/controller/appwrapper/appwrapper_controller.go b/internal/controller/appwrapper/appwrapper_controller.go index eb01a0f..8f7ada1 100644 --- a/internal/controller/appwrapper/appwrapper_controller.go +++ b/internal/controller/appwrapper/appwrapper_controller.go @@ -41,7 +41,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/internal/metrics" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/utils" @@ -98,13 +98,13 @@ type componentStatusSummary struct { // //gocyclo:ignore func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - aw := &workloadv1beta2.AppWrapper{} + aw := &awv1beta2.AppWrapper{} if err := r.Get(ctx, req.NamespacedName, aw); err != nil { return ctrl.Result{}, nil } // stop reconciliation if managed by another controller - if aw.Spec.ManagedBy != nil && *aw.Spec.ManagedBy != workloadv1beta2.AppWrapperControllerName { + if aw.Spec.ManagedBy != nil && *aw.Spec.ManagedBy != awv1beta2.AppWrapperControllerName { return ctrl.Result{}, nil } @@ -113,30 +113,30 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) if controllerutil.ContainsFinalizer(aw, AppWrapperFinalizer) { statusUpdated := false orig := copyForStatusPatch(aw) - if meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)) { + if meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)) { if !r.deleteComponents(ctx, aw) { // one or more components are still terminating - if aw.Status.Phase != workloadv1beta2.AppWrapperTerminating { + if aw.Status.Phase != awv1beta2.AppWrapperTerminating { // Set Phase for better UX, but ignore errors. We still want to requeue after 5 seconds (not immediately) - aw.Status.Phase = workloadv1beta2.AppWrapperTerminating + aw.Status.Phase = awv1beta2.AppWrapperTerminating _ = r.Status().Patch(ctx, aw, client.MergeFrom(orig)) } return ctrl.Result{RequeueAfter: 5 * time.Second}, nil // check after a short while } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperTerminating), + Reason: string(awv1beta2.AppWrapperTerminating), Message: "Resources successfully deleted", }) statusUpdated = true } - if meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved)) { + if meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved)) { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.QuotaReserved), + Type: string(awv1beta2.QuotaReserved), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperTerminating), + Reason: string(awv1beta2.AppWrapperTerminating), Message: "No resources deployed", }) statusUpdated = true @@ -159,15 +159,15 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) switch aw.Status.Phase { - case workloadv1beta2.AppWrapperEmpty: // initial state + case awv1beta2.AppWrapperEmpty: // initial state orig := copyForStatusPatch(aw) if err := utils.EnsureComponentStatusInitialized(aw); err != nil { return ctrl.Result{}, err } - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperSuspended) + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperSuspended) - case workloadv1beta2.AppWrapperSuspended: // no components deployed + case awv1beta2.AppWrapperSuspended: // no components deployed if aw.Spec.Suspend { return ctrl.Result{}, nil // remain suspended } @@ -183,40 +183,40 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) // begin deployment orig := copyForStatusPatch(aw) meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.QuotaReserved), + Type: string(awv1beta2.QuotaReserved), Status: metav1.ConditionTrue, - Reason: string(workloadv1beta2.AppWrapperResuming), + Reason: string(awv1beta2.AppWrapperResuming), Message: "Suspend is false", }) meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionTrue, - Reason: string(workloadv1beta2.AppWrapperResuming), + Reason: string(awv1beta2.AppWrapperResuming), Message: "Suspend is false", }) meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.PodsReady), + Type: string(awv1beta2.PodsReady), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperResuming), + Reason: string(awv1beta2.AppWrapperResuming), Message: "Suspend is false", }) meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.Unhealthy), + Type: string(awv1beta2.Unhealthy), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperResuming), + Reason: string(awv1beta2.AppWrapperResuming), Message: "Suspend is false", }) - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperResuming) + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperResuming) - case workloadv1beta2.AppWrapperResuming: // deploying components + case awv1beta2.AppWrapperResuming: // deploying components if aw.Spec.Suspend { - return ctrl.Result{}, r.transitionToPhase(ctx, copyForStatusPatch(aw), aw, workloadv1beta2.AppWrapperSuspending) // abort deployment + return ctrl.Result{}, r.transitionToPhase(ctx, copyForStatusPatch(aw), aw, awv1beta2.AppWrapperSuspending) // abort deployment } err, fatal := r.createComponents(ctx, aw) // NOTE: createComponents applies patches to aw.Status incrementally as resources are created orig := copyForStatusPatch(aw) if err != nil { if !fatal { - startTime := meta.FindStatusCondition(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)).LastTransitionTime + startTime := meta.FindStatusCondition(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)).LastTransitionTime graceDuration := r.admissionGraceDuration(ctx, aw) if time.Now().Before(startTime.Add(graceDuration)) { // be patient; non-fatal error; requeue and keep trying @@ -225,24 +225,24 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) } detailMsg := fmt.Sprintf("error creating components: %v", err) meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.Unhealthy), + Type: string(awv1beta2.Unhealthy), Status: metav1.ConditionTrue, Reason: "CreateFailed", Message: detailMsg, }) - r.Recorder.Event(aw, v1.EventTypeNormal, string(workloadv1beta2.Unhealthy), "CreateFailed: "+detailMsg) + r.Recorder.Event(aw, v1.EventTypeNormal, string(awv1beta2.Unhealthy), "CreateFailed: "+detailMsg) if fatal { - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperFailed) // always move to failed on fatal error + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperFailed) // always move to failed on fatal error } else { return ctrl.Result{}, r.resetOrFail(ctx, orig, aw, false, 1) } } - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperRunning) + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperRunning) - case workloadv1beta2.AppWrapperRunning: // components deployed + case awv1beta2.AppWrapperRunning: // components deployed orig := copyForStatusPatch(aw) if aw.Spec.Suspend { - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperSuspending) // begin undeployment + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperSuspending) // begin undeployment } // Gather status information at the Component and Pod level. @@ -259,13 +259,13 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) detailMsg := fmt.Sprintf("Only found %v deployed components, but was expecting %v", compStatus.deployed, compStatus.expected) if compStatus.deployed != compStatus.expected { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.Unhealthy), + Type: string(awv1beta2.Unhealthy), Status: metav1.ConditionTrue, Reason: "MissingComponent", Message: detailMsg, }) - r.Recorder.Event(aw, v1.EventTypeNormal, string(workloadv1beta2.Unhealthy), "MissingComponent: "+detailMsg) - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperFailed) + r.Recorder.Event(aw, v1.EventTypeNormal, string(awv1beta2.Unhealthy), "MissingComponent: "+detailMsg) + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperFailed) } // If a component's controller has put it into a failed state, we do not need @@ -273,12 +273,12 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) detailMsg = fmt.Sprintf("Found %v failed components", compStatus.failed) if compStatus.failed > 0 { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.Unhealthy), + Type: string(awv1beta2.Unhealthy), Status: metav1.ConditionTrue, Reason: "FailedComponent", Message: detailMsg, }) - r.Recorder.Event(aw, v1.EventTypeNormal, string(workloadv1beta2.Unhealthy), "FailedComponent: "+detailMsg) + r.Recorder.Event(aw, v1.EventTypeNormal, string(awv1beta2.Unhealthy), "FailedComponent: "+detailMsg) return ctrl.Result{}, r.resetOrFail(ctx, orig, aw, podStatus.terminalFailure, 1) } @@ -286,38 +286,38 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) if podStatus.succeeded >= podStatus.expected && (podStatus.pending+podStatus.running+podStatus.failed == 0) { msg := fmt.Sprintf("%v pods succeeded and no running, pending, or failed pods", podStatus.succeeded) meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.QuotaReserved), + Type: string(awv1beta2.QuotaReserved), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperSucceeded), + Reason: string(awv1beta2.AppWrapperSucceeded), Message: msg, }) meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionTrue, - Reason: string(workloadv1beta2.AppWrapperSucceeded), + Reason: string(awv1beta2.AppWrapperSucceeded), Message: msg, }) - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperSucceeded) + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperSucceeded) } // Handle Failed Pods if podStatus.failed > 0 { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.Unhealthy), + Type: string(awv1beta2.Unhealthy), Status: metav1.ConditionTrue, Reason: "FoundFailedPods", // Intentionally no detailed message with failed pod count, since changing the message resets the transition time }) // Grace period to give the resource controller a chance to correct the failure - whenDetected := meta.FindStatusCondition(aw.Status.Conditions, string(workloadv1beta2.Unhealthy)).LastTransitionTime + whenDetected := meta.FindStatusCondition(aw.Status.Conditions, string(awv1beta2.Unhealthy)).LastTransitionTime gracePeriod := r.failureGraceDuration(ctx, aw) now := time.Now() deadline := whenDetected.Add(gracePeriod) if now.Before(deadline) { return requeueAfter(deadline.Sub(now), r.Status().Patch(ctx, aw, client.MergeFrom(orig))) } else { - r.Recorder.Eventf(aw, v1.EventTypeNormal, string(workloadv1beta2.Unhealthy), "FoundFailedPods: %v failed pods", podStatus.failed) + r.Recorder.Eventf(aw, v1.EventTypeNormal, string(awv1beta2.Unhealthy), "FoundFailedPods: %v failed pods", podStatus.failed) return ctrl.Result{}, r.resetOrFail(ctx, orig, aw, podStatus.terminalFailure, 1) } } @@ -326,20 +326,20 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) detailMsg = fmt.Sprintf("Workload contains pods using NoExecute resources on Nodes: %v", podStatus.noExecuteNodes) if len(podStatus.noExecuteNodes) > 0 { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.Unhealthy), + Type: string(awv1beta2.Unhealthy), Status: metav1.ConditionTrue, Reason: "AutopilotNoExecute", Message: detailMsg, }) - r.Recorder.Event(aw, v1.EventTypeNormal, string(workloadv1beta2.Unhealthy), detailMsg) + r.Recorder.Event(aw, v1.EventTypeNormal, string(awv1beta2.Unhealthy), detailMsg) return ctrl.Result{}, r.resetOrFail(ctx, orig, aw, false, 0) // Autopilot triggered evacuation does not increment retry count } - clearCondition(aw, workloadv1beta2.Unhealthy, "FoundNoFailedPods", "") + clearCondition(aw, awv1beta2.Unhealthy, "FoundNoFailedPods", "") if podStatus.running+podStatus.succeeded >= podStatus.expected { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.PodsReady), + Type: string(awv1beta2.PodsReady), Status: metav1.ConditionTrue, Reason: "SufficientPodsReady", Message: fmt.Sprintf("%v pods running; %v pods succeeded", podStatus.running, podStatus.succeeded), @@ -349,8 +349,8 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Not ready yet; either continue to wait or giveup if the warmup period has expired podDetailsMessage := fmt.Sprintf("%v pods pending; %v pods running; %v pods succeeded", podStatus.pending, podStatus.running, podStatus.succeeded) - clearCondition(aw, workloadv1beta2.PodsReady, "InsufficientPodsReady", podDetailsMessage) - whenDeployed := meta.FindStatusCondition(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)).LastTransitionTime + clearCondition(aw, awv1beta2.PodsReady, "InsufficientPodsReady", podDetailsMessage) + whenDeployed := meta.FindStatusCondition(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)).LastTransitionTime var graceDuration time.Duration if podStatus.pending+podStatus.running+podStatus.succeeded >= podStatus.expected { graceDuration = r.warmupGraceDuration(ctx, aw) @@ -361,60 +361,60 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) return requeueAfter(5*time.Second, r.Status().Patch(ctx, aw, client.MergeFrom(orig))) } else { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.Unhealthy), + Type: string(awv1beta2.Unhealthy), Status: metav1.ConditionTrue, Reason: "InsufficientPodsReady", Message: podDetailsMessage, }) - r.Recorder.Event(aw, v1.EventTypeNormal, string(workloadv1beta2.Unhealthy), "InsufficientPodsReady: "+podDetailsMessage) + r.Recorder.Event(aw, v1.EventTypeNormal, string(awv1beta2.Unhealthy), "InsufficientPodsReady: "+podDetailsMessage) return ctrl.Result{}, r.resetOrFail(ctx, orig, aw, podStatus.terminalFailure, 1) } - case workloadv1beta2.AppWrapperSuspending: // undeploying components + case awv1beta2.AppWrapperSuspending: // undeploying components orig := copyForStatusPatch(aw) // finish undeploying components irrespective of desired state (suspend bit) - if meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)) { + if meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)) { if !r.deleteComponents(ctx, aw) { return requeueAfter(5*time.Second, r.Status().Patch(ctx, aw, client.MergeFrom(orig))) } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperSuspended), + Reason: string(awv1beta2.AppWrapperSuspended), Message: "Suspend is true", }) } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.QuotaReserved), + Type: string(awv1beta2.QuotaReserved), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperSuspended), + Reason: string(awv1beta2.AppWrapperSuspended), Message: "Suspend is true", }) - clearCondition(aw, workloadv1beta2.PodsReady, string(workloadv1beta2.AppWrapperSuspended), "") - clearCondition(aw, workloadv1beta2.Unhealthy, string(workloadv1beta2.AppWrapperSuspended), "") - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperSuspended) + clearCondition(aw, awv1beta2.PodsReady, string(awv1beta2.AppWrapperSuspended), "") + clearCondition(aw, awv1beta2.Unhealthy, string(awv1beta2.AppWrapperSuspended), "") + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperSuspended) - case workloadv1beta2.AppWrapperResetting: + case awv1beta2.AppWrapperResetting: orig := copyForStatusPatch(aw) if aw.Spec.Suspend { - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperSuspending) // Suspending trumps Resetting + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperSuspending) // Suspending trumps Resetting } - clearCondition(aw, workloadv1beta2.PodsReady, string(workloadv1beta2.AppWrapperResetting), "") - if meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)) { + clearCondition(aw, awv1beta2.PodsReady, string(awv1beta2.AppWrapperResetting), "") + if meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)) { if !r.deleteComponents(ctx, aw) { return requeueAfter(5*time.Second, r.Status().Patch(ctx, aw, client.MergeFrom(orig))) } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperResetting), + Reason: string(awv1beta2.AppWrapperResetting), Message: "Resources deleted for resetting AppWrapper", }) } // Pause before transitioning to Resuming to heuristically allow transient system problems to subside - whenReset := meta.FindStatusCondition(aw.Status.Conditions, string(workloadv1beta2.Unhealthy)).LastTransitionTime + whenReset := meta.FindStatusCondition(aw.Status.Conditions, string(awv1beta2.Unhealthy)).LastTransitionTime pauseDuration := r.retryPauseDuration(ctx, aw) now := time.Now() deadline := whenReset.Add(pauseDuration) @@ -423,14 +423,14 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionTrue, - Reason: string(workloadv1beta2.AppWrapperResuming), + Reason: string(awv1beta2.AppWrapperResuming), Message: "Reset complete; resuming", }) - return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperResuming) + return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperResuming) - case workloadv1beta2.AppWrapperFailed: + case awv1beta2.AppWrapperFailed: // Support for debugging failed jobs. // When an appwrapper is annotated with a non-zero debugging delay, // we hold quota for the delay period and do not delete the resources of @@ -440,12 +440,12 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) orig := copyForStatusPatch(aw) if deletionDelay > 0 && !aw.Spec.Suspend { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.DeletingResources), + Type: string(awv1beta2.DeletingResources), Status: metav1.ConditionFalse, Reason: "DeletionPaused", - Message: fmt.Sprintf("%v has value %v", workloadv1beta2.DeletionOnFailureGracePeriodAnnotation, deletionDelay), + Message: fmt.Sprintf("%v has value %v", awv1beta2.DeletionOnFailureGracePeriodAnnotation, deletionDelay), }) - whenDelayed := meta.FindStatusCondition(aw.Status.Conditions, string(workloadv1beta2.DeletingResources)).LastTransitionTime + whenDelayed := meta.FindStatusCondition(aw.Status.Conditions, string(awv1beta2.DeletingResources)).LastTransitionTime now := time.Now() deadline := whenDelayed.Add(deletionDelay) @@ -454,7 +454,7 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } - if meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)) { + if meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)) { if !r.deleteComponents(ctx, aw) { return requeueAfter(5*time.Second, r.Status().Patch(ctx, aw, client.MergeFrom(orig))) } @@ -463,24 +463,24 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) msg = "Kueue forced resource deletion by suspending AppWrapper" } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperFailed), + Reason: string(awv1beta2.AppWrapperFailed), Message: msg, }) } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.QuotaReserved), + Type: string(awv1beta2.QuotaReserved), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperFailed), + Reason: string(awv1beta2.AppWrapperFailed), Message: "No resources deployed", }) return ctrl.Result{}, r.Status().Patch(ctx, aw, client.MergeFrom(orig)) - case workloadv1beta2.AppWrapperSucceeded: - if meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)) { + case awv1beta2.AppWrapperSucceeded: + if meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)) { deletionDelay := r.timeToLiveAfterSucceededDuration(ctx, aw) - whenSucceeded := meta.FindStatusCondition(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)).LastTransitionTime + whenSucceeded := meta.FindStatusCondition(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)).LastTransitionTime now := time.Now() deadline := whenSucceeded.Add(deletionDelay) if now.Before(deadline) { @@ -492,9 +492,9 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) return requeueAfter(5*time.Second, r.Status().Patch(ctx, aw, client.MergeFrom(orig))) } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionFalse, - Reason: string(workloadv1beta2.AppWrapperSucceeded), + Reason: string(awv1beta2.AppWrapperSucceeded), Message: fmt.Sprintf("Time to live after success of %v expired", deletionDelay), }) return ctrl.Result{}, r.Status().Patch(ctx, aw, client.MergeFrom(orig)) @@ -505,7 +505,7 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } -func (r *AppWrapperReconciler) transitionToPhase(ctx context.Context, orig *workloadv1beta2.AppWrapper, modified *workloadv1beta2.AppWrapper, phase workloadv1beta2.AppWrapperPhase) error { +func (r *AppWrapperReconciler) transitionToPhase(ctx context.Context, orig *awv1beta2.AppWrapper, modified *awv1beta2.AppWrapper, phase awv1beta2.AppWrapperPhase) error { modified.Status.Phase = phase if err := r.Status().Patch(ctx, modified, client.MergeFrom(orig)); err != nil { return err @@ -515,22 +515,22 @@ func (r *AppWrapperReconciler) transitionToPhase(ctx context.Context, orig *work return nil } -func (r *AppWrapperReconciler) resetOrFail(ctx context.Context, orig *workloadv1beta2.AppWrapper, aw *workloadv1beta2.AppWrapper, terminalFailure bool, retryIncrement int32) error { +func (r *AppWrapperReconciler) resetOrFail(ctx context.Context, orig *awv1beta2.AppWrapper, aw *awv1beta2.AppWrapper, terminalFailure bool, retryIncrement int32) error { maxRetries := r.retryLimit(ctx, aw) if !terminalFailure && aw.Status.Retries < maxRetries { aw.Status.Retries += retryIncrement - return r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperResetting) + return r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperResetting) } else { - return r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperFailed) + return r.transitionToPhase(ctx, orig, aw, awv1beta2.AppWrapperFailed) } } //gocyclo:ignore -func (r *AppWrapperReconciler) getPodStatus(ctx context.Context, aw *workloadv1beta2.AppWrapper) (*podStatusSummary, error) { +func (r *AppWrapperReconciler) getPodStatus(ctx context.Context, aw *awv1beta2.AppWrapper) (*podStatusSummary, error) { pods := &v1.PodList{} if err := r.List(ctx, pods, client.InNamespace(aw.Namespace), - client.MatchingLabels{workloadv1beta2.AppWrapperLabel: aw.Name}); err != nil { + client.MatchingLabels{awv1beta2.AppWrapperLabel: aw.Name}); err != nil { return nil, err } pc, err := utils.ExpectedPodCount(aw) @@ -621,7 +621,7 @@ func (r *AppWrapperReconciler) getPodStatus(ctx context.Context, aw *workloadv1b } //gocyclo:ignore -func (r *AppWrapperReconciler) getComponentStatus(ctx context.Context, aw *workloadv1beta2.AppWrapper) (*componentStatusSummary, error) { +func (r *AppWrapperReconciler) getComponentStatus(ctx context.Context, aw *awv1beta2.AppWrapper) (*componentStatusSummary, error) { summary := &componentStatusSummary{expected: int32(len(aw.Status.ComponentStatus))} for componentIdx := range aw.Status.ComponentStatus { @@ -765,8 +765,8 @@ func (r *AppWrapperReconciler) limitDuration(desired time.Duration) time.Duratio } } -func (r *AppWrapperReconciler) admissionGraceDuration(ctx context.Context, aw *workloadv1beta2.AppWrapper) time.Duration { - if userPeriod, ok := aw.Annotations[workloadv1beta2.AdmissionGracePeriodDurationAnnotation]; ok { +func (r *AppWrapperReconciler) admissionGraceDuration(ctx context.Context, aw *awv1beta2.AppWrapper) time.Duration { + if userPeriod, ok := aw.Annotations[awv1beta2.AdmissionGracePeriodDurationAnnotation]; ok { if duration, err := time.ParseDuration(userPeriod); err == nil { return r.limitDuration(duration) } else { @@ -776,8 +776,8 @@ func (r *AppWrapperReconciler) admissionGraceDuration(ctx context.Context, aw *w return r.limitDuration(r.Config.FaultTolerance.AdmissionGracePeriod) } -func (r *AppWrapperReconciler) warmupGraceDuration(ctx context.Context, aw *workloadv1beta2.AppWrapper) time.Duration { - if userPeriod, ok := aw.Annotations[workloadv1beta2.WarmupGracePeriodDurationAnnotation]; ok { +func (r *AppWrapperReconciler) warmupGraceDuration(ctx context.Context, aw *awv1beta2.AppWrapper) time.Duration { + if userPeriod, ok := aw.Annotations[awv1beta2.WarmupGracePeriodDurationAnnotation]; ok { if duration, err := time.ParseDuration(userPeriod); err == nil { return r.limitDuration(duration) } else { @@ -787,8 +787,8 @@ func (r *AppWrapperReconciler) warmupGraceDuration(ctx context.Context, aw *work return r.limitDuration(r.Config.FaultTolerance.WarmupGracePeriod) } -func (r *AppWrapperReconciler) failureGraceDuration(ctx context.Context, aw *workloadv1beta2.AppWrapper) time.Duration { - if userPeriod, ok := aw.Annotations[workloadv1beta2.FailureGracePeriodDurationAnnotation]; ok { +func (r *AppWrapperReconciler) failureGraceDuration(ctx context.Context, aw *awv1beta2.AppWrapper) time.Duration { + if userPeriod, ok := aw.Annotations[awv1beta2.FailureGracePeriodDurationAnnotation]; ok { if duration, err := time.ParseDuration(userPeriod); err == nil { return r.limitDuration(duration) } else { @@ -798,8 +798,8 @@ func (r *AppWrapperReconciler) failureGraceDuration(ctx context.Context, aw *wor return r.limitDuration(r.Config.FaultTolerance.FailureGracePeriod) } -func (r *AppWrapperReconciler) retryLimit(ctx context.Context, aw *workloadv1beta2.AppWrapper) int32 { - if userLimit, ok := aw.Annotations[workloadv1beta2.RetryLimitAnnotation]; ok { +func (r *AppWrapperReconciler) retryLimit(ctx context.Context, aw *awv1beta2.AppWrapper) int32 { + if userLimit, ok := aw.Annotations[awv1beta2.RetryLimitAnnotation]; ok { if limit, err := strconv.Atoi(userLimit); err == nil { return int32(limit) } else { @@ -809,8 +809,8 @@ func (r *AppWrapperReconciler) retryLimit(ctx context.Context, aw *workloadv1bet return r.Config.FaultTolerance.RetryLimit } -func (r *AppWrapperReconciler) retryPauseDuration(ctx context.Context, aw *workloadv1beta2.AppWrapper) time.Duration { - if userPeriod, ok := aw.Annotations[workloadv1beta2.RetryPausePeriodDurationAnnotation]; ok { +func (r *AppWrapperReconciler) retryPauseDuration(ctx context.Context, aw *awv1beta2.AppWrapper) time.Duration { + if userPeriod, ok := aw.Annotations[awv1beta2.RetryPausePeriodDurationAnnotation]; ok { if duration, err := time.ParseDuration(userPeriod); err == nil { return r.limitDuration(duration) } else { @@ -820,8 +820,8 @@ func (r *AppWrapperReconciler) retryPauseDuration(ctx context.Context, aw *workl return r.limitDuration(r.Config.FaultTolerance.RetryPausePeriod) } -func (r *AppWrapperReconciler) forcefulDeletionGraceDuration(ctx context.Context, aw *workloadv1beta2.AppWrapper) time.Duration { - if userPeriod, ok := aw.Annotations[workloadv1beta2.ForcefulDeletionGracePeriodAnnotation]; ok { +func (r *AppWrapperReconciler) forcefulDeletionGraceDuration(ctx context.Context, aw *awv1beta2.AppWrapper) time.Duration { + if userPeriod, ok := aw.Annotations[awv1beta2.ForcefulDeletionGracePeriodAnnotation]; ok { if duration, err := time.ParseDuration(userPeriod); err == nil { return r.limitDuration(duration) } else { @@ -831,8 +831,8 @@ func (r *AppWrapperReconciler) forcefulDeletionGraceDuration(ctx context.Context return r.limitDuration(r.Config.FaultTolerance.ForcefulDeletionGracePeriod) } -func (r *AppWrapperReconciler) deletionOnFailureGraceDuration(ctx context.Context, aw *workloadv1beta2.AppWrapper) time.Duration { - if userPeriod, ok := aw.Annotations[workloadv1beta2.DeletionOnFailureGracePeriodAnnotation]; ok { +func (r *AppWrapperReconciler) deletionOnFailureGraceDuration(ctx context.Context, aw *awv1beta2.AppWrapper) time.Duration { + if userPeriod, ok := aw.Annotations[awv1beta2.DeletionOnFailureGracePeriodAnnotation]; ok { if duration, err := time.ParseDuration(userPeriod); err == nil { return r.limitDuration(duration) } else { @@ -842,8 +842,8 @@ func (r *AppWrapperReconciler) deletionOnFailureGraceDuration(ctx context.Contex return 0 * time.Second } -func (r *AppWrapperReconciler) timeToLiveAfterSucceededDuration(ctx context.Context, aw *workloadv1beta2.AppWrapper) time.Duration { - if userPeriod, ok := aw.Annotations[workloadv1beta2.SuccessTTLAnnotation]; ok { +func (r *AppWrapperReconciler) timeToLiveAfterSucceededDuration(ctx context.Context, aw *awv1beta2.AppWrapper) time.Duration { + if userPeriod, ok := aw.Annotations[awv1beta2.SuccessTTLAnnotation]; ok { if duration, err := time.ParseDuration(userPeriod); err == nil { if duration > 0 && duration < r.Config.FaultTolerance.SuccessTTL { return duration @@ -855,9 +855,9 @@ func (r *AppWrapperReconciler) timeToLiveAfterSucceededDuration(ctx context.Cont return r.Config.FaultTolerance.SuccessTTL } -func (r *AppWrapperReconciler) terminalExitCodes(_ context.Context, aw *workloadv1beta2.AppWrapper) []int { +func (r *AppWrapperReconciler) terminalExitCodes(_ context.Context, aw *awv1beta2.AppWrapper) []int { ans := []int{} - if exitCodeAnn, ok := aw.Annotations[workloadv1beta2.TerminalExitCodesAnnotation]; ok { + if exitCodeAnn, ok := aw.Annotations[awv1beta2.TerminalExitCodesAnnotation]; ok { exitCodes := strings.Split(exitCodeAnn, ",") for _, str := range exitCodes { exitCode, err := strconv.Atoi(str) @@ -869,9 +869,9 @@ func (r *AppWrapperReconciler) terminalExitCodes(_ context.Context, aw *workload return ans } -func (r *AppWrapperReconciler) retryableExitCodes(_ context.Context, aw *workloadv1beta2.AppWrapper) []int { +func (r *AppWrapperReconciler) retryableExitCodes(_ context.Context, aw *awv1beta2.AppWrapper) []int { ans := []int{} - if exitCodeAnn, ok := aw.Annotations[workloadv1beta2.RetryableExitCodesAnnotation]; ok { + if exitCodeAnn, ok := aw.Annotations[awv1beta2.RetryableExitCodesAnnotation]; ok { exitCodes := strings.Split(exitCodeAnn, ",") for _, str := range exitCodes { exitCode, err := strconv.Atoi(str) @@ -883,7 +883,7 @@ func (r *AppWrapperReconciler) retryableExitCodes(_ context.Context, aw *workloa return ans } -func clearCondition(aw *workloadv1beta2.AppWrapper, condition workloadv1beta2.AppWrapperCondition, reason string, message string) { +func clearCondition(aw *awv1beta2.AppWrapper, condition awv1beta2.AppWrapperCondition, reason string, message string) { if meta.IsStatusConditionTrue(aw.Status.Conditions, string(condition)) { meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ Type: string(condition), @@ -897,7 +897,7 @@ func clearCondition(aw *workloadv1beta2.AppWrapper, condition workloadv1beta2.Ap // podMapFunc maps pods to appwrappers and generates reconcile.Requests for those whose Status.Phase is PodSucceeded func (r *AppWrapperReconciler) podMapFunc(ctx context.Context, obj client.Object) []reconcile.Request { pod := obj.(*v1.Pod) - if name, ok := pod.Labels[workloadv1beta2.AppWrapperLabel]; ok { + if name, ok := pod.Labels[awv1beta2.AppWrapperLabel]; ok { if pod.Status.Phase == v1.PodSucceeded { return []reconcile.Request{{NamespacedName: types.NamespacedName{Namespace: pod.Namespace, Name: name}}} } @@ -908,15 +908,15 @@ func (r *AppWrapperReconciler) podMapFunc(ctx context.Context, obj client.Object // SetupWithManager sets up the controller with the Manager. func (r *AppWrapperReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&workloadv1beta2.AppWrapper{}). + For(&awv1beta2.AppWrapper{}). Watches(&v1.Pod{}, handler.EnqueueRequestsFromMapFunc(r.podMapFunc)). Named("AppWrapper"). Complete(r) } // copyForStatusPatch returns an AppWrapper with an empty Spec and a DeepCopy of orig's Status for use in a subsequent Status().Patch(...) call -func copyForStatusPatch(orig *workloadv1beta2.AppWrapper) *workloadv1beta2.AppWrapper { - copy := workloadv1beta2.AppWrapper{ +func copyForStatusPatch(orig *awv1beta2.AppWrapper) *awv1beta2.AppWrapper { + copy := awv1beta2.AppWrapper{ TypeMeta: orig.TypeMeta, ObjectMeta: orig.ObjectMeta, Status: *orig.Status.DeepCopy(), diff --git a/internal/controller/appwrapper/appwrapper_controller_test.go b/internal/controller/appwrapper/appwrapper_controller_test.go index 8d44b3c..b9dd743 100644 --- a/internal/controller/appwrapper/appwrapper_controller_test.go +++ b/internal/controller/appwrapper/appwrapper_controller_test.go @@ -33,7 +33,7 @@ import ( "sigs.k8s.io/kueue/pkg/podset" utilslices "sigs.k8s.io/kueue/pkg/util/slices" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/internal/controller/workload" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/utils" @@ -51,7 +51,7 @@ var _ = Describe("AppWrapper Controller", func() { } var kueuePodSets []kueue.PodSet - advanceToResuming := func(components ...workloadv1beta2.AppWrapperComponent) { + advanceToResuming := func(components ...awv1beta2.AppWrapperComponent) { By("Create an AppWrapper") aw := toAppWrapper(components...) aw.Spec.Suspend = true @@ -81,7 +81,7 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperSuspended)) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSuspended)) By("Updating aw.Spec by invoking RunWithPodSetsInfo") Expect((*workload.AppWrapper)(aw).RunWithPodSetsInfo([]podset.PodSetInfo{markerPodSet, markerPodSet})).To(Succeed()) @@ -93,10 +93,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperResuming)) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperResuming)) Expect(controllerutil.ContainsFinalizer(aw, AppWrapperFinalizer)).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) } @@ -107,10 +107,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw := getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperRunning)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.PodsReady))).Should(BeFalse()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperRunning)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady))).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) podStatus, err := awReconciler.getPodStatus(ctx, aw) @@ -124,10 +124,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperRunning)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.PodsReady))).Should(BeFalse()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperRunning)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady))).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) podStatus, err = awReconciler.getPodStatus(ctx, aw) @@ -147,10 +147,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperRunning)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.PodsReady))).Should(BeTrue()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperRunning)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady))).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).PodsReady()).Should(BeTrue()) @@ -214,7 +214,7 @@ var _ = Describe("AppWrapper Controller", func() { AfterEach(func() { By("Cleanup the AppWrapper and ensure no Pods remain") - aw := &workloadv1beta2.AppWrapper{} + aw := &awv1beta2.AppWrapper{} Expect(k8sClient.Get(ctx, awName, aw)).To(Succeed()) Expect(k8sClient.Delete(ctx, aw)).To(Succeed()) @@ -242,9 +242,9 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperRunning)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperRunning)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) pc, err := utils.ExpectedPodCount(aw) @@ -262,9 +262,9 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperSucceeded)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeFalse()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSucceeded)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) _, _, finished := (*workload.AppWrapper)(aw).Finished() @@ -276,7 +276,7 @@ var _ = Describe("AppWrapper Controller", func() { _, err = awReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: awName}) Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeFalse()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeFalse()) }) It("Running Workloads can be Suspended", func() { @@ -295,9 +295,9 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperSuspending)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSuspending)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeTrue()) @@ -308,9 +308,9 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperSuspended)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeFalse()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeFalse()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSuspended)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeFalse()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeTrue()) podStatus, err := awReconciler.getPodStatus(ctx, aw) @@ -332,9 +332,9 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperFailed)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperFailed)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) _, _, finished := (*workload.AppWrapper)(aw).Finished() @@ -346,10 +346,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperFailed)) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperFailed)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeFalse()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeFalse()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeFalse()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) _, _, finished = (*workload.AppWrapper)(aw).Finished() @@ -364,10 +364,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw := getAppWrapper(awName) - Expect(aw.Status.Phase).Should(Equal(workloadv1beta2.AppWrapperFailed)) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved))).Should(BeTrue()) - Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.PodsReady))).Should(BeFalse()) + Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperFailed)) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) + Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady))).Should(BeFalse()) Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) podStatus, err := awReconciler.getPodStatus(ctx, aw) @@ -384,7 +384,7 @@ var _ = Describe("AppWrapper Controller", func() { By("Validate expected markers and Autopilot anti-affinities were injected") for _, p := range pods { - Expect(p.Labels).Should(HaveKeyWithValue(workloadv1beta2.AppWrapperLabel, awName.Name)) + Expect(p.Labels).Should(HaveKeyWithValue(awv1beta2.AppWrapperLabel, awName.Name)) validateMarkers(&p) validateAutopilot(&p) } @@ -399,7 +399,7 @@ var _ = Describe("AppWrapper Controller", func() { By("Validate expected markers and Autopilot anti-affinities were injected") for _, p := range pods { - Expect(p.Labels).Should(HaveKeyWithValue(workloadv1beta2.AppWrapperLabel, awName.Name)) + Expect(p.Labels).Should(HaveKeyWithValue(awv1beta2.AppWrapperLabel, awName.Name)) validateMarkers(&p) validateAutopilot(&p) } @@ -438,7 +438,7 @@ var _ = Describe("AppWrapper Annotations", func() { }) It("Unannotated appwrappers use defaults", func() { - aw := &workloadv1beta2.AppWrapper{} + aw := &awv1beta2.AppWrapper{} Expect(awReconciler.admissionGraceDuration(ctx, aw)).Should(Equal(awReconciler.Config.FaultTolerance.AdmissionGracePeriod)) Expect(awReconciler.warmupGraceDuration(ctx, aw)).Should(Equal(awReconciler.Config.FaultTolerance.WarmupGracePeriod)) Expect(awReconciler.failureGraceDuration(ctx, aw)).Should(Equal(awReconciler.Config.FaultTolerance.FailureGracePeriod)) @@ -451,17 +451,17 @@ var _ = Describe("AppWrapper Annotations", func() { It("Valid annotations override defaults", func() { allowed := 10 * time.Second - aw := &workloadv1beta2.AppWrapper{ + aw := &awv1beta2.AppWrapper{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - workloadv1beta2.AdmissionGracePeriodDurationAnnotation: allowed.String(), - workloadv1beta2.WarmupGracePeriodDurationAnnotation: allowed.String(), - workloadv1beta2.FailureGracePeriodDurationAnnotation: allowed.String(), - workloadv1beta2.RetryPausePeriodDurationAnnotation: allowed.String(), - workloadv1beta2.RetryLimitAnnotation: "101", - workloadv1beta2.ForcefulDeletionGracePeriodAnnotation: allowed.String(), - workloadv1beta2.DeletionOnFailureGracePeriodAnnotation: allowed.String(), - workloadv1beta2.SuccessTTLAnnotation: allowed.String(), + awv1beta2.AdmissionGracePeriodDurationAnnotation: allowed.String(), + awv1beta2.WarmupGracePeriodDurationAnnotation: allowed.String(), + awv1beta2.FailureGracePeriodDurationAnnotation: allowed.String(), + awv1beta2.RetryPausePeriodDurationAnnotation: allowed.String(), + awv1beta2.RetryLimitAnnotation: "101", + awv1beta2.ForcefulDeletionGracePeriodAnnotation: allowed.String(), + awv1beta2.DeletionOnFailureGracePeriodAnnotation: allowed.String(), + awv1beta2.SuccessTTLAnnotation: allowed.String(), }, }, } @@ -477,17 +477,17 @@ var _ = Describe("AppWrapper Annotations", func() { It("Malformed annotations use defaults", func() { malformed := "222badTime" - aw := &workloadv1beta2.AppWrapper{ + aw := &awv1beta2.AppWrapper{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - workloadv1beta2.AdmissionGracePeriodDurationAnnotation: malformed, - workloadv1beta2.WarmupGracePeriodDurationAnnotation: malformed, - workloadv1beta2.FailureGracePeriodDurationAnnotation: malformed, - workloadv1beta2.RetryPausePeriodDurationAnnotation: malformed, - workloadv1beta2.RetryLimitAnnotation: "abc", - workloadv1beta2.ForcefulDeletionGracePeriodAnnotation: malformed, - workloadv1beta2.DeletionOnFailureGracePeriodAnnotation: malformed, - workloadv1beta2.SuccessTTLAnnotation: malformed, + awv1beta2.AdmissionGracePeriodDurationAnnotation: malformed, + awv1beta2.WarmupGracePeriodDurationAnnotation: malformed, + awv1beta2.FailureGracePeriodDurationAnnotation: malformed, + awv1beta2.RetryPausePeriodDurationAnnotation: malformed, + awv1beta2.RetryLimitAnnotation: "abc", + awv1beta2.ForcefulDeletionGracePeriodAnnotation: malformed, + awv1beta2.DeletionOnFailureGracePeriodAnnotation: malformed, + awv1beta2.SuccessTTLAnnotation: malformed, }, }, } @@ -504,16 +504,16 @@ var _ = Describe("AppWrapper Annotations", func() { It("Out of bounds annotations are clipped", func() { negative := -10 * time.Minute tooLong := 2 * awReconciler.Config.FaultTolerance.GracePeriodMaximum - aw := &workloadv1beta2.AppWrapper{ + aw := &awv1beta2.AppWrapper{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - workloadv1beta2.AdmissionGracePeriodDurationAnnotation: negative.String(), - workloadv1beta2.WarmupGracePeriodDurationAnnotation: tooLong.String(), - workloadv1beta2.FailureGracePeriodDurationAnnotation: tooLong.String(), - workloadv1beta2.RetryPausePeriodDurationAnnotation: negative.String(), - workloadv1beta2.ForcefulDeletionGracePeriodAnnotation: tooLong.String(), - workloadv1beta2.DeletionOnFailureGracePeriodAnnotation: tooLong.String(), - workloadv1beta2.SuccessTTLAnnotation: (awReconciler.Config.FaultTolerance.SuccessTTL + 10*time.Second).String(), + awv1beta2.AdmissionGracePeriodDurationAnnotation: negative.String(), + awv1beta2.WarmupGracePeriodDurationAnnotation: tooLong.String(), + awv1beta2.FailureGracePeriodDurationAnnotation: tooLong.String(), + awv1beta2.RetryPausePeriodDurationAnnotation: negative.String(), + awv1beta2.ForcefulDeletionGracePeriodAnnotation: tooLong.String(), + awv1beta2.DeletionOnFailureGracePeriodAnnotation: tooLong.String(), + awv1beta2.SuccessTTLAnnotation: (awReconciler.Config.FaultTolerance.SuccessTTL + 10*time.Second).String(), }, }, } @@ -527,11 +527,11 @@ var _ = Describe("AppWrapper Annotations", func() { }) It("Parsing of terminal exits codes", func() { - aw := &workloadv1beta2.AppWrapper{ + aw := &awv1beta2.AppWrapper{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - workloadv1beta2.TerminalExitCodesAnnotation: "3,10,abc,42", - workloadv1beta2.RetryableExitCodesAnnotation: "x,10,20", + awv1beta2.TerminalExitCodesAnnotation: "3,10,abc,42", + awv1beta2.RetryableExitCodesAnnotation: "x,10,20", }, }, } diff --git a/internal/controller/appwrapper/fixtures_test.go b/internal/controller/appwrapper/fixtures_test.go index aa90c32..f59551b 100644 --- a/internal/controller/appwrapper/fixtures_test.go +++ b/internal/controller/appwrapper/fixtures_test.go @@ -33,7 +33,7 @@ import ( kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/yaml" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" ) const charset = "abcdefghijklmnopqrstuvwxyz0123456789" @@ -47,16 +47,16 @@ func randName(baseName string) string { return fmt.Sprintf("%s-%s", baseName, string(b)) } -func toAppWrapper(components ...workloadv1beta2.AppWrapperComponent) *workloadv1beta2.AppWrapper { - return &workloadv1beta2.AppWrapper{ - TypeMeta: metav1.TypeMeta{APIVersion: workloadv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, +func toAppWrapper(components ...awv1beta2.AppWrapperComponent) *awv1beta2.AppWrapper { + return &awv1beta2.AppWrapper{ + TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, ObjectMeta: metav1.ObjectMeta{Name: randName("aw"), Namespace: "default"}, - Spec: workloadv1beta2.AppWrapperSpec{Components: components}, + Spec: awv1beta2.AppWrapperSpec{Components: components}, } } -func getAppWrapper(typeNamespacedName types.NamespacedName) *workloadv1beta2.AppWrapper { - aw := &workloadv1beta2.AppWrapper{} +func getAppWrapper(typeNamespacedName types.NamespacedName) *awv1beta2.AppWrapper { + aw := &awv1beta2.AppWrapper{} err := k8sClient.Get(ctx, typeNamespacedName, aw) Expect(err).NotTo(HaveOccurred()) return aw @@ -69,13 +69,13 @@ func getNode(name string) *v1.Node { return node } -func getPods(aw *workloadv1beta2.AppWrapper) []v1.Pod { +func getPods(aw *awv1beta2.AppWrapper) []v1.Pod { result := []v1.Pod{} podList := &v1.PodList{} err := k8sClient.List(ctx, podList, &client.ListOptions{Namespace: aw.Namespace}) Expect(err).NotTo(HaveOccurred()) for _, pod := range podList.Items { - if awn, found := pod.Labels[workloadv1beta2.AppWrapperLabel]; found && awn == aw.Name { + if awn, found := pod.Labels[awv1beta2.AppWrapperLabel]; found && awn == aw.Name { result = append(result, pod) } } @@ -83,7 +83,7 @@ func getPods(aw *workloadv1beta2.AppWrapper) []v1.Pod { } // envTest doesn't have a Pod controller; so simulate it -func setPodStatus(aw *workloadv1beta2.AppWrapper, phase v1.PodPhase, numToChange int32) error { +func setPodStatus(aw *awv1beta2.AppWrapper, phase v1.PodPhase, numToChange int32) error { podList := &v1.PodList{} err := k8sClient.List(ctx, podList, &client.ListOptions{Namespace: aw.Namespace}) if err != nil { @@ -93,7 +93,7 @@ func setPodStatus(aw *workloadv1beta2.AppWrapper, phase v1.PodPhase, numToChange if numToChange <= 0 { return nil } - if awn, found := pod.Labels[workloadv1beta2.AppWrapperLabel]; found && awn == aw.Name { + if awn, found := pod.Labels[awv1beta2.AppWrapperLabel]; found && awn == aw.Name { pod.Status.Phase = phase err = k8sClient.Status().Update(ctx, &pod) if err != nil { @@ -123,7 +123,7 @@ spec: limits: nvidia.com/gpu: %v` -func pod(milliCPU int64, numGPU int64, declarePodSets bool) workloadv1beta2.AppWrapperComponent { +func pod(milliCPU int64, numGPU int64, declarePodSets bool) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(podYAML, randName("pod"), resource.NewMilliQuantity(milliCPU, resource.DecimalSI), @@ -132,11 +132,11 @@ func pod(milliCPU int64, numGPU int64, declarePodSets bool) workloadv1beta2.AppW jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - awc := &workloadv1beta2.AppWrapperComponent{ + awc := &awv1beta2.AppWrapperComponent{ Template: runtime.RawExtension{Raw: jsonBytes}, } if declarePodSets { - awc.DeclaredPodSets = []workloadv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(1)), Path: "template"}} + awc.DeclaredPodSets = []awv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(1)), Path: "template"}} } return *awc } @@ -181,11 +181,11 @@ spec: limits: nvidia.com/gpu: 1` -func complexPodYaml() workloadv1beta2.AppWrapperComponent { +func complexPodYaml() awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(complexPodYAML, randName("pod")) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - awc := &workloadv1beta2.AppWrapperComponent{ + awc := &awv1beta2.AppWrapperComponent{ Template: runtime.RawExtension{Raw: jsonBytes}, } return *awc @@ -205,15 +205,15 @@ spec: requests: cpu: %v` -func malformedPod(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func malformedPod(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(malformedPodYAML, randName("pod"), resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(1)), Path: "template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(1)), Path: "template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } diff --git a/internal/controller/appwrapper/resource_management.go b/internal/controller/appwrapper/resource_management.go index 51e4261..37deb7d 100644 --- a/internal/controller/appwrapper/resource_management.go +++ b/internal/controller/appwrapper/resource_management.go @@ -22,7 +22,7 @@ import ( "fmt" "time" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/pkg/utils" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -188,7 +188,7 @@ func addNodeSelectorsToAffinity(spec map[string]interface{}, exprsToAdd []v1.Nod } //gocyclo:ignore -func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *workloadv1beta2.AppWrapper, componentIdx int) (error, bool) { +func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *awv1beta2.AppWrapper, componentIdx int) (error, bool) { component := aw.Spec.Components[componentIdx] componentStatus := aw.Status.ComponentStatus[componentIdx] toMap := func(x interface{}) map[string]string { @@ -218,11 +218,11 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *workload if err != nil { return err, true } - awLabels := map[string]string{workloadv1beta2.AppWrapperLabel: aw.Name} + awLabels := map[string]string{awv1beta2.AppWrapperLabel: aw.Name} obj.SetLabels(utilmaps.MergeKeepFirst(obj.GetLabels(), awLabels)) for podSetsIdx, podSet := range componentStatus.PodSets { - toInject := &workloadv1beta2.AppWrapperPodSetInfo{} + toInject := &awv1beta2.AppWrapperPodSetInfo{} if r.Config.EnableKueueIntegrations { if podSetsIdx < len(component.PodSetInfos) { toInject = &component.PodSetInfos[podSetsIdx] @@ -354,12 +354,12 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *workload log.FromContext(ctx).Info("After injection", "obj", obj) orig := copyForStatusPatch(aw) - if meta.FindStatusCondition(aw.Status.ComponentStatus[componentIdx].Conditions, string(workloadv1beta2.ResourcesDeployed)) == nil { + if meta.FindStatusCondition(aw.Status.ComponentStatus[componentIdx].Conditions, string(awv1beta2.ResourcesDeployed)) == nil { aw.Status.ComponentStatus[componentIdx].Name = obj.GetName() aw.Status.ComponentStatus[componentIdx].Kind = obj.GetKind() aw.Status.ComponentStatus[componentIdx].APIVersion = obj.GetAPIVersion() meta.SetStatusCondition(&aw.Status.ComponentStatus[componentIdx].Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionUnknown, Reason: "ComponentCreationInitiated", }) @@ -383,7 +383,7 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *workload // resource not actually created; patch status to reflect that orig := copyForStatusPatch(aw) meta.SetStatusCondition(&aw.Status.ComponentStatus[componentIdx].Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionFalse, Reason: "ComponentCreationErrored", }) @@ -399,7 +399,7 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *workload orig = copyForStatusPatch(aw) aw.Status.ComponentStatus[componentIdx].Name = obj.GetName() // Update name to support usage of GenerateName meta.SetStatusCondition(&aw.Status.ComponentStatus[componentIdx].Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionTrue, Reason: "ComponentCreatedSuccessfully", }) @@ -411,9 +411,9 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *workload } // createComponents incrementally patches aw.Status -- MUST NOT CARRY STATUS PATCHES ACROSS INVOCATIONS -func (r *AppWrapperReconciler) createComponents(ctx context.Context, aw *workloadv1beta2.AppWrapper) (error, bool) { +func (r *AppWrapperReconciler) createComponents(ctx context.Context, aw *awv1beta2.AppWrapper) (error, bool) { for componentIdx := range aw.Spec.Components { - if !meta.IsStatusConditionTrue(aw.Status.ComponentStatus[componentIdx].Conditions, string(workloadv1beta2.ResourcesDeployed)) { + if !meta.IsStatusConditionTrue(aw.Status.ComponentStatus[componentIdx].Conditions, string(awv1beta2.ResourcesDeployed)) { if err, fatal := r.createComponent(ctx, aw, componentIdx); err != nil { return err, fatal } @@ -422,10 +422,10 @@ func (r *AppWrapperReconciler) createComponents(ctx context.Context, aw *workloa return nil, false } -func (r *AppWrapperReconciler) deleteComponents(ctx context.Context, aw *workloadv1beta2.AppWrapper) bool { +func (r *AppWrapperReconciler) deleteComponents(ctx context.Context, aw *awv1beta2.AppWrapper) bool { deleteIfPresent := func(idx int, opts ...client.DeleteOption) bool { cs := &aw.Status.ComponentStatus[idx] - rd := meta.FindStatusCondition(cs.Conditions, string(workloadv1beta2.ResourcesDeployed)) + rd := meta.FindStatusCondition(cs.Conditions, string(awv1beta2.ResourcesDeployed)) if rd == nil || rd.Status == metav1.ConditionFalse || (rd.Status == metav1.ConditionUnknown && cs.Name == "") { return false // not present } @@ -437,7 +437,7 @@ func (r *AppWrapperReconciler) deleteComponents(ctx context.Context, aw *workloa if apierrors.IsNotFound(err) { // Has already been undeployed; update componentStatus and return not present meta.SetStatusCondition(&cs.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.ResourcesDeployed), + Type: string(awv1beta2.ResourcesDeployed), Status: metav1.ConditionFalse, Reason: "CompononetDeleted", }) @@ -451,7 +451,7 @@ func (r *AppWrapperReconciler) deleteComponents(ctx context.Context, aw *workloa } meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{ - Type: string(workloadv1beta2.DeletingResources), + Type: string(awv1beta2.DeletingResources), Status: metav1.ConditionTrue, Reason: "DeletionInitiated", }) @@ -462,7 +462,7 @@ func (r *AppWrapperReconciler) deleteComponents(ctx context.Context, aw *workloa } deletionGracePeriod := r.forcefulDeletionGraceDuration(ctx, aw) - whenInitiated := meta.FindStatusCondition(aw.Status.Conditions, string(workloadv1beta2.DeletingResources)).LastTransitionTime + whenInitiated := meta.FindStatusCondition(aw.Status.Conditions, string(awv1beta2.DeletingResources)).LastTransitionTime gracePeriodExpired := time.Now().After(whenInitiated.Time.Add(deletionGracePeriod)) if componentsRemaining && !gracePeriodExpired { @@ -474,13 +474,13 @@ func (r *AppWrapperReconciler) deleteComponents(ctx context.Context, aw *workloa if err := r.List(ctx, pods, client.UnsafeDisableDeepCopy, client.InNamespace(aw.Namespace), - client.MatchingLabels{workloadv1beta2.AppWrapperLabel: aw.Name}); err != nil { + client.MatchingLabels{awv1beta2.AppWrapperLabel: aw.Name}); err != nil { log.FromContext(ctx).Error(err, "Pod list error") } if !componentsRemaining && len(pods.Items) == 0 { // no resources or pods left; deletion is complete - clearCondition(aw, workloadv1beta2.DeletingResources, "DeletionComplete", "") + clearCondition(aw, awv1beta2.DeletingResources, "DeletionComplete", "") return true } diff --git a/internal/controller/appwrapper/suite_test.go b/internal/controller/appwrapper/suite_test.go index 1f7aaba..d332cf4 100644 --- a/internal/controller/appwrapper/suite_test.go +++ b/internal/controller/appwrapper/suite_test.go @@ -38,7 +38,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" ) @@ -85,7 +85,7 @@ var _ = BeforeSuite(func() { Expect(cfg).NotTo(BeNil()) scheme := apimachineryruntime.NewScheme() - err = workloadv1beta2.AddToScheme(scheme) + err = awv1beta2.AddToScheme(scheme) Expect(err).NotTo(HaveOccurred()) err = admissionv1.AddToScheme(scheme) diff --git a/internal/controller/workload/workload_controller.go b/internal/controller/workload/workload_controller.go index ea75e78..fa74860 100644 --- a/internal/controller/workload/workload_controller.go +++ b/internal/controller/workload/workload_controller.go @@ -27,7 +27,7 @@ import ( "sigs.k8s.io/kueue/pkg/controller/jobframework" "sigs.k8s.io/kueue/pkg/podset" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/pkg/utils" ) @@ -39,10 +39,10 @@ import ( // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=resourceflavors,verbs=get;list;watch // +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloadpriorityclasses,verbs=get;list;watch -type AppWrapper workloadv1beta2.AppWrapper +type AppWrapper awv1beta2.AppWrapper var ( - GVK = workloadv1beta2.GroupVersion.WithKind("AppWrapper") + GVK = awv1beta2.GroupVersion.WithKind("AppWrapper") WorkloadReconciler = jobframework.NewGenericReconcilerFactory( func() jobframework.GenericJob { return &AppWrapper{} }, func(b *builder.Builder, c client.Client) *builder.Builder { @@ -52,7 +52,7 @@ var ( ) func (aw *AppWrapper) Object() client.Object { - return (*workloadv1beta2.AppWrapper)(aw) + return (*awv1beta2.AppWrapper)(aw) } func (aw *AppWrapper) IsSuspended() bool { @@ -60,7 +60,7 @@ func (aw *AppWrapper) IsSuspended() bool { } func (aw *AppWrapper) IsActive() bool { - return meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.QuotaReserved)) + return meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved)) } func (aw *AppWrapper) Suspend() { @@ -72,7 +72,7 @@ func (aw *AppWrapper) GVK() schema.GroupVersionKind { } func (aw *AppWrapper) PodSets() []kueue.PodSet { - podSets, err := utils.GetPodSets((*workloadv1beta2.AppWrapper)(aw)) + podSets, err := utils.GetPodSets((*awv1beta2.AppWrapper)(aw)) if err != nil { // Kueue will raise an error on zero length PodSet; the Kueue GenericJob API prevents propagating the actual error. return []kueue.PodSet{} @@ -85,7 +85,7 @@ func (aw *AppWrapper) PodSets() []kueue.PodSet { } func (aw *AppWrapper) RunWithPodSetsInfo(podSetsInfo []podset.PodSetInfo) error { - if err := utils.SetPodSetInfos((*workloadv1beta2.AppWrapper)(aw), podSetsInfo); err != nil { + if err := utils.SetPodSetInfos((*awv1beta2.AppWrapper)(aw), podSetsInfo); err != nil { return err } aw.Spec.Suspend = false @@ -93,16 +93,16 @@ func (aw *AppWrapper) RunWithPodSetsInfo(podSetsInfo []podset.PodSetInfo) error } func (aw *AppWrapper) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool { - return utils.ClearPodSetInfos((*workloadv1beta2.AppWrapper)(aw)) + return utils.ClearPodSetInfos((*awv1beta2.AppWrapper)(aw)) } func (aw *AppWrapper) Finished() (message string, success, finished bool) { switch aw.Status.Phase { - case workloadv1beta2.AppWrapperSucceeded: + case awv1beta2.AppWrapperSucceeded: return "AppWrapper finished successfully", true, true - case workloadv1beta2.AppWrapperFailed: - if meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)) { + case awv1beta2.AppWrapperFailed: + if meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)) { return "Still deleting resources for failed AppWrapper", false, false } else { return "AppWrapper failed", false, true @@ -112,5 +112,5 @@ func (aw *AppWrapper) Finished() (message string, success, finished bool) { } func (aw *AppWrapper) PodsReady() bool { - return meta.IsStatusConditionTrue(aw.Status.Conditions, string(workloadv1beta2.PodsReady)) + return meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady)) } diff --git a/internal/webhook/appwrapper_fixtures_test.go b/internal/webhook/appwrapper_fixtures_test.go index ea5182e..586d102 100644 --- a/internal/webhook/appwrapper_fixtures_test.go +++ b/internal/webhook/appwrapper_fixtures_test.go @@ -23,7 +23,7 @@ import ( . "github.com/onsi/gomega" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -43,16 +43,16 @@ func randName(baseName string) string { return fmt.Sprintf("%s-%s", baseName, string(b)) } -func toAppWrapper(components ...workloadv1beta2.AppWrapperComponent) *workloadv1beta2.AppWrapper { - return &workloadv1beta2.AppWrapper{ - TypeMeta: metav1.TypeMeta{APIVersion: workloadv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, +func toAppWrapper(components ...awv1beta2.AppWrapperComponent) *awv1beta2.AppWrapper { + return &awv1beta2.AppWrapper{ + TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, ObjectMeta: metav1.ObjectMeta{Name: randName("aw"), Namespace: "default"}, - Spec: workloadv1beta2.AppWrapperSpec{Components: components}, + Spec: awv1beta2.AppWrapperSpec{Components: components}, } } -func getAppWrapper(typeNamespacedName types.NamespacedName) *workloadv1beta2.AppWrapper { - aw := &workloadv1beta2.AppWrapper{} +func getAppWrapper(typeNamespacedName types.NamespacedName) *awv1beta2.AppWrapper { + aw := &awv1beta2.AppWrapper{} err := k8sClient.Get(ctx, typeNamespacedName, aw) Expect(err).NotTo(HaveOccurred()) return aw @@ -73,27 +73,27 @@ spec: requests: cpu: %v` -func pod(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func pod(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(podYAML, randName("pod"), resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } -func podForInference(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func podForInference(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(podYAML, randName("pod"), resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ + return awv1beta2.AppWrapperComponent{ Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -114,7 +114,7 @@ spec: requests: cpu: %v` -func namespacedPod(namespace string, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func namespacedPod(namespace string, milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(namespacedPodYAML, randName("pod"), namespace, @@ -122,8 +122,8 @@ func namespacedPod(namespace string, milliCPU int64) workloadv1beta2.AppWrapperC jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -141,12 +141,12 @@ spec: port: 80 targetPort: 8080` -func service() workloadv1beta2.AppWrapperComponent { +func service() awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(serviceYAML, randName("service")) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -177,7 +177,7 @@ spec: requests: cpu: %v` -func deployment(replicaCount int, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func deployment(replicaCount int, milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(deploymentYAML, randName("deployment"), replicaCount, @@ -185,13 +185,13 @@ func deployment(replicaCount int, milliCPU int64) workloadv1beta2.AppWrapperComp jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(replicaCount)), Path: "template.spec.template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(replicaCount)), Path: "template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } -func deploymentForInference(replicaCount int, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func deploymentForInference(replicaCount int, milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(deploymentYAML, randName("deployment"), replicaCount, @@ -199,7 +199,7 @@ func deploymentForInference(replicaCount int, milliCPU int64) workloadv1beta2.Ap jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ + return awv1beta2.AppWrapperComponent{ Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -376,7 +376,7 @@ spec: name: odh-ca-cert ` -func rayCluster(workerCount int, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func rayCluster(workerCount int, milliCPU int64) awv1beta2.AppWrapperComponent { workerCPU := resource.NewMilliQuantity(milliCPU, resource.DecimalSI) yamlString := fmt.Sprintf(rayClusterYAML, randName("raycluster"), @@ -385,8 +385,8 @@ func rayCluster(workerCount int, milliCPU int64) workloadv1beta2.AppWrapperCompo jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{ + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{ {Replicas: ptr.To(int32(1)), Path: "template.spec.headGroupSpec.template"}, {Replicas: ptr.To(int32(workerCount)), Path: "template.spec.workerGroupSpecs[0].template"}, }, @@ -394,7 +394,7 @@ func rayCluster(workerCount int, milliCPU int64) workloadv1beta2.AppWrapperCompo } } -func rayClusterForInference(workerCount int, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func rayClusterForInference(workerCount int, milliCPU int64) awv1beta2.AppWrapperComponent { workerCPU := resource.NewMilliQuantity(milliCPU, resource.DecimalSI) yamlString := fmt.Sprintf(rayClusterYAML, randName("raycluster"), @@ -403,7 +403,7 @@ func rayClusterForInference(workerCount int, milliCPU int64) workloadv1beta2.App jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ + return awv1beta2.AppWrapperComponent{ Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -447,7 +447,7 @@ spec: cpu: %v ` -func jobSet(replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent { +func jobSet(replicasWorker int, milliCPUWorker int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(jobSetYAML, randName("jobset"), replicasWorker, replicasWorker, @@ -455,8 +455,8 @@ func jobSet(replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapper ) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{ + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{ {Path: "template.spec.replicatedJobs[0].template.spec.template"}, {Replicas: ptr.To(int32(replicasWorker)), Path: "template.spec.replicatedJobs[1].template.spec.template"}, }, @@ -483,7 +483,7 @@ spec: requests: cpu: %v` -func jobForInference(parallelism int, completions int, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func jobForInference(parallelism int, completions int, milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(jobYAML, randName("job"), parallelism, @@ -492,7 +492,7 @@ func jobForInference(parallelism int, completions int, milliCPU int64) workloadv jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ + return awv1beta2.AppWrapperComponent{ Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -534,7 +534,7 @@ spec: requests: cpu: %v` -func pytorchJobForInference(masterMilliCPU int64, workerReplicas int, workerMilliCPU int64) workloadv1beta2.AppWrapperComponent { +func pytorchJobForInference(masterMilliCPU int64, workerReplicas int, workerMilliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(pytorchJobYAML, randName("pytorch-job"), resource.NewMilliQuantity(masterMilliCPU, resource.DecimalSI), @@ -543,7 +543,7 @@ func pytorchJobForInference(masterMilliCPU int64, workerReplicas int, workerMill jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ + return awv1beta2.AppWrapperComponent{ Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -576,7 +576,7 @@ spec: cpu: %v ` -func rayJobForInference(workerCount int, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func rayJobForInference(workerCount int, milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(rayJobYAML, randName("rayjob"), workerCount, @@ -584,7 +584,7 @@ func rayJobForInference(workerCount int, milliCPU int64) workloadv1beta2.AppWrap jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ + return awv1beta2.AppWrapperComponent{ Template: runtime.RawExtension{Raw: jsonBytes}, } } diff --git a/internal/webhook/appwrapper_webhook.go b/internal/webhook/appwrapper_webhook.go index 9ec4908..35a1f61 100644 --- a/internal/webhook/appwrapper_webhook.go +++ b/internal/webhook/appwrapper_webhook.go @@ -42,7 +42,7 @@ import ( "sigs.k8s.io/kueue/pkg/controller/jobframework" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" wlc "github.com/project-codeflare/appwrapper/internal/controller/workload" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/utils" @@ -81,7 +81,7 @@ var _ webhook.CustomDefaulter = &appWrapperWebhook{} // 2. Ensure Suspend is set appropriately // 3. Add labels with the user name and id func (w *appWrapperWebhook) Default(ctx context.Context, obj runtime.Object) error { - aw := obj.(*workloadv1beta2.AppWrapper) + aw := obj.(*awv1beta2.AppWrapper) log.FromContext(ctx).V(2).Info("Applying defaults", "job", aw) // Queue name and Suspend @@ -113,7 +113,7 @@ var _ webhook.CustomValidator = &appWrapperWebhook{} // ValidateCreate validates invariants when an AppWrapper is created func (w *appWrapperWebhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { - aw := obj.(*workloadv1beta2.AppWrapper) + aw := obj.(*awv1beta2.AppWrapper) log.FromContext(ctx).V(2).Info("Validating create", "job", aw) allErrors := w.validateAppWrapperCreate(ctx, aw) if w.enableKueueIntegrations { @@ -124,8 +124,8 @@ func (w *appWrapperWebhook) ValidateCreate(ctx context.Context, obj runtime.Obje // ValidateUpdate validates invariants when an AppWrapper is updated func (w *appWrapperWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { - oldAW := oldObj.(*workloadv1beta2.AppWrapper) - newAW := newObj.(*workloadv1beta2.AppWrapper) + oldAW := oldObj.(*awv1beta2.AppWrapper) + newAW := newObj.(*awv1beta2.AppWrapper) log.FromContext(ctx).V(2).Info("Validating update", "job", newAW) allErrors := w.validateAppWrapperUpdate(oldAW, newAW) if w.enableKueueIntegrations { @@ -149,7 +149,7 @@ func (w *appWrapperWebhook) ValidateDelete(context.Context, runtime.Object) (adm // 3. AppWrappers must not contain any resources that the user could not create directly // 4. Every PodSet must be well-formed: the Path must exist and must be parseable as a PodSpecTemplate // 5. AppWrappers must contain between 1 and 8 PodSets (Kueue invariant) -func (w *appWrapperWebhook) validateAppWrapperCreate(ctx context.Context, aw *workloadv1beta2.AppWrapper) field.ErrorList { +func (w *appWrapperWebhook) validateAppWrapperCreate(ctx context.Context, aw *awv1beta2.AppWrapper) field.ErrorList { allErrors := field.ErrorList{} components := aw.Spec.Components componentsPath := field.NewPath("spec").Child("components") @@ -251,7 +251,7 @@ func (w *appWrapperWebhook) validateAppWrapperCreate(ctx context.Context, aw *wo } // validateAppWrapperUpdate enforces deep immutablity of all fields that were validated by validateAppWrapperCreate -func (w *appWrapperWebhook) validateAppWrapperUpdate(old *workloadv1beta2.AppWrapper, new *workloadv1beta2.AppWrapper) field.ErrorList { +func (w *appWrapperWebhook) validateAppWrapperUpdate(old *awv1beta2.AppWrapper, new *awv1beta2.AppWrapper) field.ErrorList { allErrors := field.ErrorList{} msg := "attempt to change immutable field" componentsPath := field.NewPath("spec").Child("components") @@ -340,7 +340,7 @@ func SetupAppWrapperWebhook(mgr ctrl.Manager, awConfig *config.AppWrapperConfig) } return ctrl.NewWebhookManagedBy(mgr). - For(&workloadv1beta2.AppWrapper{}). + For(&awv1beta2.AppWrapper{}). WithDefaulter(wh). WithValidator(wh). Complete() diff --git a/internal/webhook/appwrapper_webhook_test.go b/internal/webhook/appwrapper_webhook_test.go index 5cb7205..31ca724 100644 --- a/internal/webhook/appwrapper_webhook_test.go +++ b/internal/webhook/appwrapper_webhook_test.go @@ -22,7 +22,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -149,8 +149,8 @@ var _ = Describe("AppWrapper Webhook Tests", func() { child := toAppWrapper(pod(100)) childBytes, err := json.Marshal(child) Expect(err).ShouldNot(HaveOccurred()) - aw := toAppWrapper(pod(100), workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{}, + aw := toAppWrapper(pod(100), awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{}, Template: runtime.RawExtension{Raw: childBytes}, }) Expect(k8sClient.Create(ctx, aw)).ShouldNot(Succeed()) @@ -197,7 +197,7 @@ var _ = Describe("AppWrapper Webhook Tests", func() { Expect(k8sClient.Update(ctx, aw)).Should(Succeed()) aw = getAppWrapper(awName) - aw.Spec.Components[1].PodSetInfos = make([]workloadv1beta2.AppWrapperPodSetInfo, 1) + aw.Spec.Components[1].PodSetInfos = make([]awv1beta2.AppWrapperPodSetInfo, 1) Expect(k8sClient.Update(ctx, aw)).Should(Succeed()) }) diff --git a/internal/webhook/suite_test.go b/internal/webhook/suite_test.go index 6b532d0..db1ae23 100644 --- a/internal/webhook/suite_test.go +++ b/internal/webhook/suite_test.go @@ -45,7 +45,7 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/pkg/config" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" ) @@ -101,7 +101,7 @@ var _ = BeforeSuite(func() { Expect(cfg).NotTo(BeNil()) scheme := apimachineryruntime.NewScheme() - err = workloadv1beta2.AddToScheme(scheme) + err = awv1beta2.AddToScheme(scheme) Expect(err).NotTo(HaveOccurred()) err = admissionv1.AddToScheme(scheme) diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index c526d26..5c967a8 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -41,7 +41,7 @@ import ( kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/podset" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" ) const templateString = "template" @@ -299,7 +299,7 @@ func getValueAtPath(obj map[string]interface{}, path string) (interface{}, error return cursor, nil } -func Replicas(ps workloadv1beta2.AppWrapperPodSet) int32 { +func Replicas(ps awv1beta2.AppWrapperPodSet) int32 { if ps.Replicas == nil { return 1 } else { @@ -307,7 +307,7 @@ func Replicas(ps workloadv1beta2.AppWrapperPodSet) int32 { } } -func ExpectedPodCount(aw *workloadv1beta2.AppWrapper) (int32, error) { +func ExpectedPodCount(aw *awv1beta2.AppWrapper) (int32, error) { if err := EnsureComponentStatusInitialized(aw); err != nil { return 0, err } @@ -321,13 +321,13 @@ func ExpectedPodCount(aw *workloadv1beta2.AppWrapper) (int32, error) { } // EnsureComponentStatusInitialized initializes aw.Status.ComponenetStatus, including performing PodSet inference for known GVKs -func EnsureComponentStatusInitialized(aw *workloadv1beta2.AppWrapper) error { +func EnsureComponentStatusInitialized(aw *awv1beta2.AppWrapper) error { if len(aw.Status.ComponentStatus) == len(aw.Spec.Components) { return nil } // Construct definitive PodSets from the Spec + InferPodSets and cache in the Status (to avoid clashing with user updates to the Spec via apply) - compStatus := make([]workloadv1beta2.AppWrapperComponentStatus, len(aw.Spec.Components)) + compStatus := make([]awv1beta2.AppWrapperComponentStatus, len(aw.Spec.Components)) for idx := range aw.Spec.Components { if len(aw.Spec.Components[idx].DeclaredPodSets) > 0 { compStatus[idx].PodSets = aw.Spec.Components[idx].DeclaredPodSets @@ -350,7 +350,7 @@ func EnsureComponentStatusInitialized(aw *workloadv1beta2.AppWrapper) error { } // GetPodSets constructs the kueue.PodSets for an AppWrapper -func GetPodSets(aw *workloadv1beta2.AppWrapper) ([]kueue.PodSet, error) { +func GetPodSets(aw *awv1beta2.AppWrapper) ([]kueue.PodSet, error) { podSets := []kueue.PodSet{} if err := EnsureComponentStatusInitialized(aw); err != nil { return nil, err @@ -378,21 +378,21 @@ func GetPodSets(aw *workloadv1beta2.AppWrapper) ([]kueue.PodSet, error) { } // SetPodSetInfos propagates podSetsInfo into the PodSetInfos of aw.Spec.Components -func SetPodSetInfos(aw *workloadv1beta2.AppWrapper, podSetsInfo []podset.PodSetInfo) error { +func SetPodSetInfos(aw *awv1beta2.AppWrapper, podSetsInfo []podset.PodSetInfo) error { if err := EnsureComponentStatusInitialized(aw); err != nil { return err } podSetsInfoIndex := 0 for idx := range aw.Spec.Components { if len(aw.Spec.Components[idx].PodSetInfos) != len(aw.Status.ComponentStatus[idx].PodSets) { - aw.Spec.Components[idx].PodSetInfos = make([]workloadv1beta2.AppWrapperPodSetInfo, len(aw.Status.ComponentStatus[idx].PodSets)) + aw.Spec.Components[idx].PodSetInfos = make([]awv1beta2.AppWrapperPodSetInfo, len(aw.Status.ComponentStatus[idx].PodSets)) } for podSetIdx := range aw.Status.ComponentStatus[idx].PodSets { podSetsInfoIndex += 1 if podSetsInfoIndex > len(podSetsInfo) { continue // we will return an error below...continuing to get an accurate count for the error message } - aw.Spec.Components[idx].PodSetInfos[podSetIdx] = workloadv1beta2.AppWrapperPodSetInfo{ + aw.Spec.Components[idx].PodSetInfos[podSetIdx] = awv1beta2.AppWrapperPodSetInfo{ Annotations: podSetsInfo[podSetsInfoIndex-1].Annotations, Labels: podSetsInfo[podSetsInfoIndex-1].Labels, NodeSelector: podSetsInfo[podSetsInfoIndex-1].NodeSelector, @@ -409,7 +409,7 @@ func SetPodSetInfos(aw *workloadv1beta2.AppWrapper, podSetsInfo []podset.PodSetI } // ClearPodSetInfos clears the PodSetInfos saved by SetPodSetInfos -func ClearPodSetInfos(aw *workloadv1beta2.AppWrapper) bool { +func ClearPodSetInfos(aw *awv1beta2.AppWrapper) bool { for idx := range aw.Spec.Components { aw.Spec.Components[idx].PodSetInfos = nil } @@ -462,10 +462,10 @@ var templatesForGVK = map[schema.GroupVersionKind][]resourceTemplate{ } // inferPodSets infers PodSets for RayJobs and RayClusters -func inferRayPodSets(obj *unstructured.Unstructured, clusterSpecPrefix string) ([]workloadv1beta2.AppWrapperPodSet, error) { - podSets := []workloadv1beta2.AppWrapperPodSet{} +func inferRayPodSets(obj *unstructured.Unstructured, clusterSpecPrefix string) ([]awv1beta2.AppWrapperPodSet, error) { + podSets := []awv1beta2.AppWrapperPodSet{} - podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{Replicas: ptr.To(int32(1)), Path: clusterSpecPrefix + "headGroupSpec.template"}) + podSets = append(podSets, awv1beta2.AppWrapperPodSet{Replicas: ptr.To(int32(1)), Path: clusterSpecPrefix + "headGroupSpec.template"}) if workers, err := getValueAtPath(obj.UnstructuredContent(), clusterSpecPrefix+"workerGroupSpecs"); err == nil { if workers, ok := workers.([]interface{}); ok { for i := range workers { @@ -477,7 +477,7 @@ func inferRayPodSets(obj *unstructured.Unstructured, clusterSpecPrefix string) ( if err != nil { return nil, err } - podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{Replicas: ptr.To(replicas), Path: workerGroupSpecPrefix + templateString}) + podSets = append(podSets, awv1beta2.AppWrapperPodSet{Replicas: ptr.To(replicas), Path: workerGroupSpecPrefix + templateString}) } } } @@ -486,9 +486,9 @@ func inferRayPodSets(obj *unstructured.Unstructured, clusterSpecPrefix string) ( } // InferPodSets infers PodSets for known GVKs -func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperPodSet, error) { +func InferPodSets(obj *unstructured.Unstructured) ([]awv1beta2.AppWrapperPodSet, error) { gvk := obj.GroupVersionKind() - podSets := []workloadv1beta2.AppWrapperPodSet{} + podSets := []awv1beta2.AppWrapperPodSet{} switch gvk { case schema.GroupVersionKind{Group: "batch", Version: "v1", Kind: "Job"}: @@ -499,7 +499,7 @@ func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperP if completions, err := GetReplicas(obj, "template.spec.completions"); err == nil && completions < replicas { replicas = completions } - podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{ + podSets = append(podSets, awv1beta2.AppWrapperPodSet{ Replicas: ptr.To(replicas), Path: "template.spec.template", Annotations: map[string]string{ @@ -525,7 +525,7 @@ func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperP if r, err := GetReplicas(obj, jobSpecPrefix+"replicas"); err == nil { replicas = r } - podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{ + podSets = append(podSets, awv1beta2.AppWrapperPodSet{ Replicas: ptr.To(replicas * podCount), Path: jobSpecPrefix + "template.spec.template", Annotations: map[string]string{ @@ -549,7 +549,7 @@ func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperP if err != nil { return nil, err } - podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{ + podSets = append(podSets, awv1beta2.AppWrapperPodSet{ Replicas: ptr.To(replicas), Path: prefix + templateString, Annotations: map[string]string{ @@ -582,7 +582,7 @@ func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperP if err != nil { return nil, err } - podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{Replicas: ptr.To(replicas), Path: template.path}) + podSets = append(podSets, awv1beta2.AppWrapperPodSet{Replicas: ptr.To(replicas), Path: template.path}) } } } @@ -597,13 +597,13 @@ func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperP } // ValidatePodSets validates the declared and inferred PodSets -func ValidatePodSets(declared []workloadv1beta2.AppWrapperPodSet, inferred []workloadv1beta2.AppWrapperPodSet) error { +func ValidatePodSets(declared []awv1beta2.AppWrapperPodSet, inferred []awv1beta2.AppWrapperPodSet) error { if len(declared) == 0 { return nil } // Validate that there are no duplicate paths in declared - declaredPaths := map[string]workloadv1beta2.AppWrapperPodSet{} + declaredPaths := map[string]awv1beta2.AppWrapperPodSet{} for _, p := range declared { if _, ok := declaredPaths[p.Path]; ok { return fmt.Errorf("multiple DeclaredPodSets with path '%v'", p.Path) diff --git a/test/e2e/appwrapper_test.go b/test/e2e/appwrapper_test.go index d2a1572..a07943a 100644 --- a/test/e2e/appwrapper_test.go +++ b/test/e2e/appwrapper_test.go @@ -31,14 +31,14 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/utils/ptr" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" ) var _ = Describe("AppWrapper E2E Test", func() { - var appwrappers []*workloadv1beta2.AppWrapper + var appwrappers []*awv1beta2.AppWrapper BeforeEach(func() { - appwrappers = []*workloadv1beta2.AppWrapper{} + appwrappers = []*awv1beta2.AppWrapper{} }) AfterEach(func() { @@ -51,32 +51,32 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createAppWrapper(ctx, pod(250), pod(250)) appwrappers = append(appwrappers, aw) Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Consistently(AppWrapperPhase(ctx, aw), 5*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning)) + Consistently(AppWrapperPhase(ctx, aw), 5*time.Second).Should(Equal(awv1beta2.AppWrapperRunning)) }) It("Deployments", func() { aw := createAppWrapper(ctx, deployment(2, 200)) appwrappers = append(appwrappers, aw) Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Consistently(AppWrapperPhase(ctx, aw), 5*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning)) + Consistently(AppWrapperPhase(ctx, aw), 5*time.Second).Should(Equal(awv1beta2.AppWrapperRunning)) }) It("StatefulSets", func() { aw := createAppWrapper(ctx, statefulset(2, 200)) appwrappers = append(appwrappers, aw) Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Consistently(AppWrapperPhase(ctx, aw), 5*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning)) + Consistently(AppWrapperPhase(ctx, aw), 5*time.Second).Should(Equal(awv1beta2.AppWrapperRunning)) }) It("Batch Jobs", func() { aw := createAppWrapper(ctx, batchjob(250)) appwrappers = append(appwrappers, aw) Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Consistently(AppWrapperPhase(ctx, aw), 5*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning)) + Consistently(AppWrapperPhase(ctx, aw), 5*time.Second).Should(Equal(awv1beta2.AppWrapperRunning)) }) It("Mixed Basic Resources", func() { aw := createAppWrapper(ctx, pod(100), deployment(2, 100), statefulset(2, 100), service(), batchjob(100)) appwrappers = append(appwrappers, aw) Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Consistently(AppWrapperPhase(ctx, aw), 10*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning)) + Consistently(AppWrapperPhase(ctx, aw), 10*time.Second).Should(Equal(awv1beta2.AppWrapperRunning)) }) }) @@ -93,14 +93,14 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createAppWrapper(ctx, raycluster(500, 2, 250)) appwrappers = append(appwrappers, aw) // Non-functonal RayCluster; will never reach Running Phase - Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(workloadv1beta2.AppWrapperResuming)) + Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(awv1beta2.AppWrapperResuming)) }) It("RayJobs", func() { aw := createAppWrapper(ctx, rayjob(500, 2, 250)) appwrappers = append(appwrappers, aw) // Non-functonal RayJob; will never reach Running Phase - Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(workloadv1beta2.AppWrapperResuming)) + Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(awv1beta2.AppWrapperResuming)) }) }) @@ -111,7 +111,7 @@ var _ = Describe("AppWrapper E2E Test", func() { // TODO: Need dev versions of kueue/jobset to get correct handling of ownership // Once those are released change the test to: // Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(workloadv1beta2.AppWrapperResuming)) + Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(awv1beta2.AppWrapperResuming)) }) }) @@ -195,8 +195,8 @@ var _ = Describe("AppWrapper E2E Test", func() { child := toAppWrapper(pod(100)) childBytes, err := json.Marshal(child) Expect(err).ShouldNot(HaveOccurred()) - aw := toAppWrapper(pod(100), workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{}, + aw := toAppWrapper(pod(100), awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{}, Template: runtime.RawExtension{Raw: childBytes}, }) Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed()) @@ -207,23 +207,23 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) awName := types.NamespacedName{Name: aw.Name, Namespace: aw.Namespace} - Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) { + Expect(updateAppWrapper(ctx, awName, func(aw *awv1beta2.AppWrapper) { aw.Spec.Components[0].Template = aw.Spec.Components[1].Template })).ShouldNot(Succeed()) - Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) { + Expect(updateAppWrapper(ctx, awName, func(aw *awv1beta2.AppWrapper) { aw.Spec.Components = append(aw.Spec.Components, aw.Spec.Components[0]) })).ShouldNot(Succeed()) - Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) { + Expect(updateAppWrapper(ctx, awName, func(aw *awv1beta2.AppWrapper) { aw.Spec.Components[0].DeclaredPodSets = append(aw.Spec.Components[0].DeclaredPodSets, aw.Spec.Components[0].DeclaredPodSets...) })).ShouldNot(Succeed()) - Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) { + Expect(updateAppWrapper(ctx, awName, func(aw *awv1beta2.AppWrapper) { aw.Spec.Components[0].DeclaredPodSets[0].Path = "bad" })).ShouldNot(Succeed()) - Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) { + Expect(updateAppWrapper(ctx, awName, func(aw *awv1beta2.AppWrapper) { aw.Spec.Components[0].DeclaredPodSets[0].Replicas = ptr.To(int32(12)) })).ShouldNot(Succeed()) }) @@ -255,12 +255,12 @@ var _ = Describe("AppWrapper E2E Test", func() { By("Jobs should be queued when quota is exhausted") aw3 := createAppWrapper(ctx, deployment(2, 250)) appwrappers = append(appwrappers, aw3) - Eventually(AppWrapperPhase(ctx, aw3), 30*time.Second).Should(Equal(workloadv1beta2.AppWrapperSuspended)) - Consistently(AppWrapperPhase(ctx, aw3), 20*time.Second).Should(Equal(workloadv1beta2.AppWrapperSuspended)) + Eventually(AppWrapperPhase(ctx, aw3), 30*time.Second).Should(Equal(awv1beta2.AppWrapperSuspended)) + Consistently(AppWrapperPhase(ctx, aw3), 20*time.Second).Should(Equal(awv1beta2.AppWrapperSuspended)) By("Queued job is admitted when quota becomes available") Expect(deleteAppWrapper(ctx, aw.Name, aw.Namespace)).Should(Succeed()) - appwrappers = []*workloadv1beta2.AppWrapper{aw2, aw3} + appwrappers = []*awv1beta2.AppWrapper{aw2, aw3} Expect(waitAWPodsReady(ctx, aw3)).Should(Succeed()) }) }) @@ -291,7 +291,7 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createAppWrapper(ctx, succeedingBatchjob(500)) appwrappers = append(appwrappers, aw) Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Eventually(AppWrapperPhase(ctx, aw), 60*time.Second).Should(Equal(workloadv1beta2.AppWrapperSucceeded)) + Eventually(AppWrapperPhase(ctx, aw), 60*time.Second).Should(Equal(awv1beta2.AppWrapperSucceeded)) }) It("A failed Batch Job will be Reset up to retryLimit and then Failed", func() { @@ -299,14 +299,14 @@ var _ = Describe("AppWrapper E2E Test", func() { if aw.Annotations == nil { aw.Annotations = make(map[string]string) } - aw.Annotations[workloadv1beta2.FailureGracePeriodDurationAnnotation] = "0s" - aw.Annotations[workloadv1beta2.RetryLimitAnnotation] = "2" - aw.Annotations[workloadv1beta2.RetryPausePeriodDurationAnnotation] = "5s" + aw.Annotations[awv1beta2.FailureGracePeriodDurationAnnotation] = "0s" + aw.Annotations[awv1beta2.RetryLimitAnnotation] = "2" + aw.Annotations[awv1beta2.RetryPausePeriodDurationAnnotation] = "5s" Expect(getClient(ctx).Create(ctx, aw)).To(Succeed()) appwrappers = append(appwrappers, aw) Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Eventually(AppWrapperPhase(ctx, aw), 90*time.Second).Should(Equal(workloadv1beta2.AppWrapperResetting)) - Eventually(AppWrapperPhase(ctx, aw), 180*time.Second).Should(Equal(workloadv1beta2.AppWrapperFailed)) + Eventually(AppWrapperPhase(ctx, aw), 90*time.Second).Should(Equal(awv1beta2.AppWrapperResetting)) + Eventually(AppWrapperPhase(ctx, aw), 180*time.Second).Should(Equal(awv1beta2.AppWrapperFailed)) aw = getAppWrapper(ctx, types.NamespacedName{Name: aw.Name, Namespace: aw.Namespace}) Expect(aw.Status.Retries).Should(Equal(int32(2))) }) @@ -314,14 +314,14 @@ var _ = Describe("AppWrapper E2E Test", func() { It("Deleting a Running Component yields a failed AppWrapper", func() { aw := createAppWrapper(ctx, pytorchjob(2, 500)) appwrappers = append(appwrappers, aw) - Eventually(AppWrapperPhase(ctx, aw), 60*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning)) + Eventually(AppWrapperPhase(ctx, aw), 60*time.Second).Should(Equal(awv1beta2.AppWrapperRunning)) aw = getAppWrapper(ctx, types.NamespacedName{Name: aw.Name, Namespace: aw.Namespace}) toDelete := &metav1.PartialObjectMetadata{ TypeMeta: metav1.TypeMeta{Kind: aw.Status.ComponentStatus[0].Kind, APIVersion: aw.Status.ComponentStatus[0].APIVersion}, ObjectMeta: metav1.ObjectMeta{Name: aw.Status.ComponentStatus[0].Name, Namespace: aw.Namespace}, } Expect(getClient(ctx).Delete(ctx, toDelete)).Should(Succeed()) - Eventually(AppWrapperPhase(ctx, aw), 60*time.Second).Should(Equal(workloadv1beta2.AppWrapperFailed)) + Eventually(AppWrapperPhase(ctx, aw), 60*time.Second).Should(Equal(awv1beta2.AppWrapperFailed)) }) }) @@ -342,9 +342,9 @@ var _ = Describe("AppWrapper E2E Test", func() { err = updateNode(ctx, nodeName, func(n *v1.Node) { n.Labels["autopilot.ibm.com/gpuhealth"] = "EVICT" }) Expect(err).ShouldNot(HaveOccurred()) By("workload is reset") - Eventually(AppWrapperPhase(ctx, aw), 120*time.Second).Should(Equal(workloadv1beta2.AppWrapperResetting)) + Eventually(AppWrapperPhase(ctx, aw), 120*time.Second).Should(Equal(awv1beta2.AppWrapperResetting)) By("workload is running again") - Eventually(AppWrapperPhase(ctx, aw), 120*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning)) + Eventually(AppWrapperPhase(ctx, aw), 120*time.Second).Should(Equal(awv1beta2.AppWrapperRunning)) Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) }) }) @@ -355,15 +355,15 @@ var _ = Describe("AppWrapper E2E Test", func() { if aw.Annotations == nil { aw.Annotations = make(map[string]string) } - aw.Annotations[workloadv1beta2.FailureGracePeriodDurationAnnotation] = "10s" - aw.Annotations[workloadv1beta2.WarmupGracePeriodDurationAnnotation] = "10s" - aw.Annotations[workloadv1beta2.RetryLimitAnnotation] = "1" - aw.Annotations[workloadv1beta2.RetryPausePeriodDurationAnnotation] = "5s" + aw.Annotations[awv1beta2.FailureGracePeriodDurationAnnotation] = "10s" + aw.Annotations[awv1beta2.WarmupGracePeriodDurationAnnotation] = "10s" + aw.Annotations[awv1beta2.RetryLimitAnnotation] = "1" + aw.Annotations[awv1beta2.RetryPausePeriodDurationAnnotation] = "5s" Expect(getClient(ctx).Create(ctx, aw)).To(Succeed()) appwrappers = append(appwrappers, aw) - Eventually(AppWrapperPhase(ctx, aw), 30*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning)) - Eventually(AppWrapperPhase(ctx, aw), 30*time.Second).Should(Equal(workloadv1beta2.AppWrapperResetting)) - Eventually(AppWrapperPhase(ctx, aw), 180*time.Second).Should(Equal(workloadv1beta2.AppWrapperFailed)) + Eventually(AppWrapperPhase(ctx, aw), 30*time.Second).Should(Equal(awv1beta2.AppWrapperRunning)) + Eventually(AppWrapperPhase(ctx, aw), 30*time.Second).Should(Equal(awv1beta2.AppWrapperResetting)) + Eventually(AppWrapperPhase(ctx, aw), 180*time.Second).Should(Equal(awv1beta2.AppWrapperFailed)) aw = getAppWrapper(ctx, types.NamespacedName{Name: aw.Name, Namespace: aw.Namespace}) Expect(aw.Status.Retries).Should(Equal(int32(1))) }) @@ -387,7 +387,7 @@ var _ = Describe("AppWrapper E2E Test", func() { By("Polling for all AppWrappers to be Running") err := wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 1*time.Minute, false, func(ctx context.Context) (done bool, err error) { t := time.Now() - toCheckAWS := make([]*workloadv1beta2.AppWrapper, 0, len(appwrappers)) + toCheckAWS := make([]*awv1beta2.AppWrapper, 0, len(appwrappers)) for _, aw := range nonRunningAWs { if !checkAppWrapperRunning(ctx, aw) { toCheckAWS = append(toCheckAWS, aw) @@ -413,7 +413,7 @@ var _ = Describe("AppWrapper E2E Test", func() { nonReadyAWs := appwrappers err = wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 3*time.Minute, false, func(ctx context.Context) (done bool, err error) { t := time.Now() - toCheckAWS := make([]*workloadv1beta2.AppWrapper, 0, len(appwrappers)) + toCheckAWS := make([]*awv1beta2.AppWrapper, 0, len(appwrappers)) for _, aw := range nonReadyAWs { if !checkAllAWPodsReady(ctx, aw) { toCheckAWS = append(toCheckAWS, aw) diff --git a/test/e2e/fixtures_test.go b/test/e2e/fixtures_test.go index 9a87109..191ff05 100644 --- a/test/e2e/fixtures_test.go +++ b/test/e2e/fixtures_test.go @@ -28,7 +28,7 @@ import ( "k8s.io/utils/ptr" "sigs.k8s.io/yaml" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" ) const charset = "abcdefghijklmnopqrstuvwxyz0123456789" @@ -58,15 +58,15 @@ spec: requests: cpu: %v` -func pod(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func pod(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(podYAML, randName("pod"), resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -87,7 +87,7 @@ spec: requests: cpu: %v` -func namespacedPod(namespace string, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func namespacedPod(namespace string, milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(namespacedPodYAML, randName("pod"), namespace, @@ -95,8 +95,8 @@ func namespacedPod(namespace string, milliCPU int64) workloadv1beta2.AppWrapperC jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -114,12 +114,12 @@ spec: port: 80 targetPort: 8080` -func service() workloadv1beta2.AppWrapperComponent { +func service() awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(serviceYAML, randName("service")) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -150,7 +150,7 @@ spec: requests: cpu: %v` -func deployment(replicaCount int, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func deployment(replicaCount int, milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(deploymentYAML, randName("deployment"), replicaCount, @@ -158,8 +158,8 @@ func deployment(replicaCount int, milliCPU int64) workloadv1beta2.AppWrapperComp jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(replicaCount)), Path: "template.spec.template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(replicaCount)), Path: "template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -190,7 +190,7 @@ spec: requests: cpu: %v` -func statefulset(replicaCount int, milliCPU int64) workloadv1beta2.AppWrapperComponent { +func statefulset(replicaCount int, milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(statefulesetYAML, randName("statefulset"), replicaCount, @@ -198,8 +198,8 @@ func statefulset(replicaCount int, milliCPU int64) workloadv1beta2.AppWrapperCom jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(replicaCount)), Path: "template.spec.template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Replicas: ptr.To(int32(replicaCount)), Path: "template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -223,15 +223,15 @@ spec: cpu: %v ` -func batchjob(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func batchjob(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(batchJobYAML, "batchjob-", resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -255,15 +255,15 @@ spec: cpu: %v ` -func failingBatchjob(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func failingBatchjob(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(failingBatchJobYAML, randName("batchjob"), resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -287,15 +287,15 @@ spec: cpu: %v ` -func succeedingBatchjob(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func succeedingBatchjob(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(succeedingBatchJobYAML, "batchjob-", resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -323,15 +323,15 @@ spec: cpu: %v ` -func stuckInitBatchjob(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func stuckInitBatchjob(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(stuckInitBatchJobYAML, randName("batchjob"), resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -361,15 +361,15 @@ spec: cpu: %v ` -func jobset(milliCPU int64) workloadv1beta2.AppWrapperComponent { +func jobset(milliCPU int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(jobsetYAML, "jobset-", resource.NewMilliQuantity(milliCPU, resource.DecimalSI)) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template.spec.replicatedJobs[0].template.spec.template"}}, + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template.spec.replicatedJobs[0].template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } @@ -399,7 +399,7 @@ spec: cpu: %v ` -func pytorchjob(replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent { +func pytorchjob(replicasWorker int, milliCPUWorker int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(pytorchYAML, randName("pytorchjob"), replicasWorker, @@ -407,8 +407,8 @@ func pytorchjob(replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWra ) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{ + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{ {Replicas: ptr.To(int32(replicasWorker)), Path: "template.spec.pytorchReplicaSpecs.Worker.template"}, }, Template: runtime.RawExtension{Raw: jsonBytes}, @@ -457,7 +457,7 @@ spec: cpu: %v ` -func raycluster(milliCPUHead int64, replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent { +func raycluster(milliCPUHead int64, replicasWorker int, milliCPUWorker int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(rayclusterYAML, randName("raycluster"), resource.NewMilliQuantity(milliCPUHead, resource.DecimalSI), @@ -466,8 +466,8 @@ func raycluster(milliCPUHead int64, replicasWorker int, milliCPUWorker int64) wo ) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{ + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{ {Replicas: ptr.To(int32(1)), Path: "template.spec.headGroupSpec.template"}, {Replicas: ptr.To(int32(replicasWorker)), Path: "template.spec.workerGroupSpecs[0].template"}, }, @@ -519,7 +519,7 @@ spec: cpu: %v ` -func rayjob(milliCPUHead int64, replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent { +func rayjob(milliCPUHead int64, replicasWorker int, milliCPUWorker int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(rayjobYAML, randName("raycluster"), resource.NewMilliQuantity(milliCPUHead, resource.DecimalSI), @@ -528,8 +528,8 @@ func rayjob(milliCPUHead int64, replicasWorker int, milliCPUWorker int64) worklo ) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{ + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{ {Replicas: ptr.To(int32(1)), Path: "template.spec.rayClusterSpec.headGroupSpec.template"}, {Replicas: ptr.To(int32(replicasWorker)), Path: "template.spec.rayClusterSpec.workerGroupSpecs[0].template"}, }, @@ -576,7 +576,7 @@ spec: cpu: %v ` -func jobSet(replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapperComponent { +func jobSet(replicasWorker int, milliCPUWorker int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(jobSetYAML, randName("jobset"), replicasWorker, replicasWorker, @@ -584,8 +584,8 @@ func jobSet(replicasWorker int, milliCPUWorker int64) workloadv1beta2.AppWrapper ) jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{ + return awv1beta2.AppWrapperComponent{ + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{ {Path: "template.spec.replicatedJobs[0].template.spec.template"}, {Replicas: ptr.To(int32(replicasWorker)), Path: "template.spec.replicatedJobs[1].template.spec.template"}, }, @@ -615,7 +615,7 @@ spec: nvidia.com/gpu: %v ` -func autopilotjob(milliCPU int64, gpus int64) workloadv1beta2.AppWrapperComponent { +func autopilotjob(milliCPU int64, gpus int64) awv1beta2.AppWrapperComponent { yamlString := fmt.Sprintf(autopilotJobYAML, "apjob-", resource.NewMilliQuantity(milliCPU, resource.DecimalSI), @@ -624,12 +624,12 @@ func autopilotjob(milliCPU int64, gpus int64) workloadv1beta2.AppWrapperComponen jsonBytes, err := yaml.YAMLToJSON([]byte(yamlString)) Expect(err).NotTo(HaveOccurred()) - return workloadv1beta2.AppWrapperComponent{ + return awv1beta2.AppWrapperComponent{ Annotations: map[string]string{ - workloadv1beta2.RetryPausePeriodDurationAnnotation: "5s", - workloadv1beta2.FailureGracePeriodDurationAnnotation: "5s", + awv1beta2.RetryPausePeriodDurationAnnotation: "5s", + awv1beta2.FailureGracePeriodDurationAnnotation: "5s", }, - DeclaredPodSets: []workloadv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, + DeclaredPodSets: []awv1beta2.AppWrapperPodSet{{Path: "template.spec.template"}}, Template: runtime.RawExtension{Raw: jsonBytes}, } } diff --git a/test/e2e/util_test.go b/test/e2e/util_test.go index 60a7e47..50ee3c0 100644 --- a/test/e2e/util_test.go +++ b/test/e2e/util_test.go @@ -40,7 +40,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/pkg/utils" ) @@ -68,7 +68,7 @@ func extendContextWithClient(ctx context.Context) context.Context { baseConfig := ctrl.GetConfigOrDie() scheme := runtime.NewScheme() Expect(clientgoscheme.AddToScheme(scheme)).To(Succeed()) - Expect(workloadv1beta2.AddToScheme(scheme)).To(Succeed()) + Expect(awv1beta2.AddToScheme(scheme)).To(Succeed()) Expect(kueue.AddToScheme(scheme)).To(Succeed()) // Create a client with full permissions @@ -145,7 +145,7 @@ func ensureTestQueuesExist(ctx context.Context) { Expect(client.IgnoreAlreadyExists(err)).To(Succeed()) } -func cleanupTestObjects(ctx context.Context, appwrappers []*workloadv1beta2.AppWrapper) { +func cleanupTestObjects(ctx context.Context, appwrappers []*awv1beta2.AppWrapper) { if appwrappers == nil { return } @@ -162,32 +162,32 @@ func cleanupTestObjects(ctx context.Context, appwrappers []*workloadv1beta2.AppW func deleteAppWrapper(ctx context.Context, name string, namespace string) error { foreground := metav1.DeletePropagationForeground - aw := &workloadv1beta2.AppWrapper{ObjectMeta: metav1.ObjectMeta{ + aw := &awv1beta2.AppWrapper{ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, }} return getClient(ctx).Delete(ctx, aw, &client.DeleteOptions{PropagationPolicy: &foreground}) } -func createAppWrapper(ctx context.Context, components ...workloadv1beta2.AppWrapperComponent) *workloadv1beta2.AppWrapper { +func createAppWrapper(ctx context.Context, components ...awv1beta2.AppWrapperComponent) *awv1beta2.AppWrapper { aw := toAppWrapper(components...) Expect(getClient(ctx).Create(ctx, aw)).To(Succeed()) return aw } -func toAppWrapper(components ...workloadv1beta2.AppWrapperComponent) *workloadv1beta2.AppWrapper { - return &workloadv1beta2.AppWrapper{ - TypeMeta: metav1.TypeMeta{APIVersion: workloadv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, +func toAppWrapper(components ...awv1beta2.AppWrapperComponent) *awv1beta2.AppWrapper { + return &awv1beta2.AppWrapper{ + TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, ObjectMeta: metav1.ObjectMeta{ Name: randName("aw"), Namespace: testNamespace, Labels: map[string]string{kc.QueueLabel: testQueueName}, }, - Spec: workloadv1beta2.AppWrapperSpec{Components: components}, + Spec: awv1beta2.AppWrapperSpec{Components: components}, } } -func updateAppWrapper(ctx context.Context, awName types.NamespacedName, update func(*workloadv1beta2.AppWrapper)) error { +func updateAppWrapper(ctx context.Context, awName types.NamespacedName, update func(*awv1beta2.AppWrapper)) error { for { aw := getAppWrapper(ctx, awName) update(aw) @@ -201,8 +201,8 @@ func updateAppWrapper(ctx context.Context, awName types.NamespacedName, update f } } -func getAppWrapper(ctx context.Context, awName types.NamespacedName) *workloadv1beta2.AppWrapper { - aw := &workloadv1beta2.AppWrapper{} +func getAppWrapper(ctx context.Context, awName types.NamespacedName) *awv1beta2.AppWrapper { + aw := &awv1beta2.AppWrapper{} err := getClient(ctx).Get(ctx, awName, aw) Expect(err).NotTo(HaveOccurred()) return aw @@ -215,7 +215,7 @@ func getNodeForAppwrapper(ctx context.Context, awName types.NamespacedName) (str return "", err } for _, pod := range podList.Items { - if awn, found := pod.Labels[workloadv1beta2.AppWrapperLabel]; found && awn == awName.Name { + if awn, found := pod.Labels[awv1beta2.AppWrapperLabel]; found && awn == awName.Name { return pod.Spec.NodeName, nil } } @@ -248,7 +248,7 @@ func podsInPhase(awNamespace string, awName string, phase []v1.PodPhase, minimum matchingPodCount := int32(0) for _, pod := range podList.Items { - if awn, found := pod.Labels[workloadv1beta2.AppWrapperLabel]; found && awn == awName { + if awn, found := pod.Labels[awv1beta2.AppWrapperLabel]; found && awn == awName { for _, p := range phase { if pod.Status.Phase == p { matchingPodCount++ @@ -271,7 +271,7 @@ func noPodsExist(awNamespace string, awName string) wait.ConditionWithContextFun } for _, podFromPodList := range podList.Items { - if awn, found := podFromPodList.Labels[workloadv1beta2.AppWrapperLabel]; found && awn == awName { + if awn, found := podFromPodList.Labels[awv1beta2.AppWrapperLabel]; found && awn == awName { return false, nil } } @@ -283,7 +283,7 @@ func waitAWPodsDeleted(ctx context.Context, awNamespace string, awName string) e return wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 90*time.Second, true, noPodsExist(awNamespace, awName)) } -func waitAWPodsReady(ctx context.Context, aw *workloadv1beta2.AppWrapper) error { +func waitAWPodsReady(ctx context.Context, aw *awv1beta2.AppWrapper) error { numExpected, err := utils.ExpectedPodCount(aw) if err != nil { return err @@ -292,7 +292,7 @@ func waitAWPodsReady(ctx context.Context, aw *workloadv1beta2.AppWrapper) error return wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 120*time.Second, true, podsInPhase(aw.Namespace, aw.Name, phases, numExpected)) } -func checkAllAWPodsReady(ctx context.Context, aw *workloadv1beta2.AppWrapper) bool { +func checkAllAWPodsReady(ctx context.Context, aw *awv1beta2.AppWrapper) bool { numExpected, err := utils.ExpectedPodCount(aw) if err != nil { return false @@ -302,18 +302,18 @@ func checkAllAWPodsReady(ctx context.Context, aw *workloadv1beta2.AppWrapper) bo return err == nil } -func checkAppWrapperRunning(ctx context.Context, aw *workloadv1beta2.AppWrapper) bool { - aw2 := &workloadv1beta2.AppWrapper{} +func checkAppWrapperRunning(ctx context.Context, aw *awv1beta2.AppWrapper) bool { + aw2 := &awv1beta2.AppWrapper{} err := getClient(ctx).Get(ctx, client.ObjectKey{Namespace: aw.Namespace, Name: aw.Name}, aw2) Expect(err).NotTo(HaveOccurred()) - return aw2.Status.Phase == workloadv1beta2.AppWrapperRunning + return aw2.Status.Phase == awv1beta2.AppWrapperRunning } -func AppWrapperPhase(ctx context.Context, aw *workloadv1beta2.AppWrapper) func(g Gomega) workloadv1beta2.AppWrapperPhase { +func AppWrapperPhase(ctx context.Context, aw *awv1beta2.AppWrapper) func(g Gomega) awv1beta2.AppWrapperPhase { name := aw.Name namespace := aw.Namespace - return func(g Gomega) workloadv1beta2.AppWrapperPhase { - aw := &workloadv1beta2.AppWrapper{} + return func(g Gomega) awv1beta2.AppWrapperPhase { + aw := &awv1beta2.AppWrapper{} err := getClient(ctx).Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, aw) g.Expect(err).NotTo(HaveOccurred()) return aw.Status.Phase From 3829595dc75050ba349698861c885c0a5430f11e Mon Sep 17 00:00:00 2001 From: David Grove Date: Wed, 19 Feb 2025 17:50:45 -0500 Subject: [PATCH 2/5] cleanup: start reducing dependency on internal workload controller --- .../appwrapper/appwrapper_controller_test.go | 58 +++++-------------- .../appwrapper/resource_management.go | 8 +-- 2 files changed, 18 insertions(+), 48 deletions(-) diff --git a/internal/controller/appwrapper/appwrapper_controller_test.go b/internal/controller/appwrapper/appwrapper_controller_test.go index b9dd743..b7fea07 100644 --- a/internal/controller/appwrapper/appwrapper_controller_test.go +++ b/internal/controller/appwrapper/appwrapper_controller_test.go @@ -29,12 +29,9 @@ import ( "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" - kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/kueue/pkg/podset" - utilslices "sigs.k8s.io/kueue/pkg/util/slices" awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" - "github.com/project-codeflare/appwrapper/internal/controller/workload" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/utils" ) @@ -49,7 +46,6 @@ var _ = Describe("AppWrapper Controller", func() { Tolerations: []v1.Toleration{{Key: "aKey", Operator: "Exists", Effect: "NoSchedule"}}, SchedulingGates: []v1.PodSchedulingGate{{Name: "aGate"}}, } - var kueuePodSets []kueue.PodSet advanceToResuming := func(components ...awv1beta2.AppWrapperComponent) { By("Create an AppWrapper") @@ -74,7 +70,6 @@ var _ = Describe("AppWrapper Controller", func() { Scheme: k8sClient.Scheme(), Config: awConfig, } - kueuePodSets = (*workload.AppWrapper)(aw).PodSets() By("Reconciling: Empty -> Suspended") _, err := awReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: awName}) @@ -83,9 +78,9 @@ var _ = Describe("AppWrapper Controller", func() { aw = getAppWrapper(awName) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSuspended)) - By("Updating aw.Spec by invoking RunWithPodSetsInfo") - Expect((*workload.AppWrapper)(aw).RunWithPodSetsInfo([]podset.PodSetInfo{markerPodSet, markerPodSet})).To(Succeed()) - Expect(aw.Spec.Suspend).To(BeFalse()) + By("Updating aw.Spec by invoking utils.SetPodSetInfos and setting suspend to false") + Expect(utils.SetPodSetInfos(aw, []podset.PodSetInfo{markerPodSet, markerPodSet})).To(Succeed()) + aw.Spec.Suspend = false Expect(k8sClient.Update(ctx, aw)).To(Succeed()) By("Reconciling: Suspended -> Resuming") @@ -93,12 +88,11 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperResuming)) Expect(controllerutil.ContainsFinalizer(aw, AppWrapperFinalizer)).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) } beginRunning := func() { @@ -107,12 +101,11 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw := getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperRunning)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady))).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) podStatus, err := awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) Expect(utils.ExpectedPodCount(aw)).Should(Equal(podStatus.pending)) @@ -124,12 +117,11 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperRunning)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady))).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) podStatus, err = awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) Expect(podStatus.running).Should(Equal(int32(1))) @@ -147,18 +139,14 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperRunning)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady))).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).PodsReady()).Should(BeTrue()) podStatus, err := awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) Expect(podStatus.running).Should(Equal(pc)) - _, _, finished := (*workload.AppWrapper)(aw).Finished() - Expect(finished).Should(BeFalse()) } validateMarkers := func(p *v1.Pod) { @@ -242,11 +230,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperRunning)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) pc, err := utils.ExpectedPodCount(aw) Expect(err).NotTo(HaveOccurred()) Expect(pc).Should(Equal(int32(2))) @@ -262,13 +249,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSucceeded)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) - _, _, finished := (*workload.AppWrapper)(aw).Finished() - Expect(finished).Should(BeTrue()) By("Resources are Removed after TTL expires") _, err = awReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: awName}) @@ -286,8 +270,8 @@ var _ = Describe("AppWrapper Controller", func() { By("Invoking Suspend and RestorePodSetsInfo") aw := getAppWrapper(awName) - (*workload.AppWrapper)(aw).Suspend() - Expect((*workload.AppWrapper)(aw).RestorePodSetsInfo(utilslices.Map(kueuePodSets, podset.FromPodSet))).To(BeTrue()) + aw.Spec.Suspend = true + Expect(utils.ClearPodSetInfos(aw)).To(BeTrue()) Expect(k8sClient.Update(ctx, aw)).To(Succeed()) By("Reconciling: Running -> Suspending") @@ -295,11 +279,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeTrue()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSuspending)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeTrue()) By("Reconciling: Suspending -> Suspended") _, err = awReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: awName}) // initiate deletion @@ -308,11 +291,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeTrue()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSuspended)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeFalse()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeTrue()) podStatus, err := awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) Expect(podStatus.failed + podStatus.succeeded + podStatus.running + podStatus.pending).Should(Equal(int32(0))) @@ -332,13 +314,10 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw = getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperFailed)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) - _, _, finished := (*workload.AppWrapper)(aw).Finished() - Expect(finished).Should(BeFalse()) _, err = awReconciler.Reconcile(ctx, reconcile.Request{NamespacedName: awName}) // initiate deletion Expect(err).NotTo(HaveOccurred()) @@ -347,13 +326,9 @@ var _ = Describe("AppWrapper Controller", func() { aw = getAppWrapper(awName) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperFailed)) - + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeFalse()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) - _, _, finished = (*workload.AppWrapper)(aw).Finished() - Expect(finished).Should(BeTrue()) }) It("Failure during resource creation leads to a failed AppWrapper", func() { @@ -364,12 +339,11 @@ var _ = Describe("AppWrapper Controller", func() { Expect(err).NotTo(HaveOccurred()) aw := getAppWrapper(awName) + Expect(aw.Spec.Suspend).Should(BeFalse()) Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperFailed)) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved))).Should(BeTrue()) Expect(meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady))).Should(BeFalse()) - Expect((*workload.AppWrapper)(aw).IsActive()).Should(BeTrue()) - Expect((*workload.AppWrapper)(aw).IsSuspended()).Should(BeFalse()) podStatus, err := awReconciler.getPodStatus(ctx, aw) Expect(err).NotTo(HaveOccurred()) Expect(podStatus.pending).Should(Equal(int32(1))) diff --git a/internal/controller/appwrapper/resource_management.go b/internal/controller/appwrapper/resource_management.go index 37deb7d..618f3ba 100644 --- a/internal/controller/appwrapper/resource_management.go +++ b/internal/controller/appwrapper/resource_management.go @@ -223,12 +223,8 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *awv1beta for podSetsIdx, podSet := range componentStatus.PodSets { toInject := &awv1beta2.AppWrapperPodSetInfo{} - if r.Config.EnableKueueIntegrations { - if podSetsIdx < len(component.PodSetInfos) { - toInject = &component.PodSetInfos[podSetsIdx] - } else { - return fmt.Errorf("missing podSetInfo %v for component %v", podSetsIdx, componentIdx), true - } + if podSetsIdx < len(component.PodSetInfos) { + toInject = &component.PodSetInfos[podSetsIdx] } p, err := utils.GetRawTemplate(obj.UnstructuredContent(), podSet.Path) From f27059eec24211a78218b436b3de66dc510c4589 Mon Sep 17 00:00:00 2001 From: David Grove Date: Wed, 19 Feb 2025 18:14:51 -0500 Subject: [PATCH 3/5] adjust appwrapper/kueue api to push kueue types into workload controller --- .../appwrapper/appwrapper_controller_test.go | 5 +-- .../workload/workload_controller.go | 28 +++++++++++--- pkg/utils/utils.go | 37 ++++++------------- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/internal/controller/appwrapper/appwrapper_controller_test.go b/internal/controller/appwrapper/appwrapper_controller_test.go index b7fea07..3bf75d0 100644 --- a/internal/controller/appwrapper/appwrapper_controller_test.go +++ b/internal/controller/appwrapper/appwrapper_controller_test.go @@ -29,7 +29,6 @@ import ( "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "sigs.k8s.io/kueue/pkg/podset" awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/pkg/config" @@ -39,7 +38,7 @@ import ( var _ = Describe("AppWrapper Controller", func() { var awReconciler *AppWrapperReconciler var awName types.NamespacedName - markerPodSet := podset.PodSetInfo{ + markerPodSet := awv1beta2.AppWrapperPodSetInfo{ Labels: map[string]string{"testkey1": "value1"}, Annotations: map[string]string{"test2": "test2"}, NodeSelector: map[string]string{"nodeName": "myNode"}, @@ -79,7 +78,7 @@ var _ = Describe("AppWrapper Controller", func() { Expect(aw.Status.Phase).Should(Equal(awv1beta2.AppWrapperSuspended)) By("Updating aw.Spec by invoking utils.SetPodSetInfos and setting suspend to false") - Expect(utils.SetPodSetInfos(aw, []podset.PodSetInfo{markerPodSet, markerPodSet})).To(Succeed()) + Expect(utils.SetPodSetInfos(aw, []awv1beta2.AppWrapperPodSetInfo{markerPodSet, markerPodSet})).To(Succeed()) aw.Spec.Suspend = false Expect(k8sClient.Update(ctx, aw)).To(Succeed()) diff --git a/internal/controller/workload/workload_controller.go b/internal/controller/workload/workload_controller.go index fa74860..a81dba5 100644 --- a/internal/controller/workload/workload_controller.go +++ b/internal/controller/workload/workload_controller.go @@ -17,6 +17,8 @@ limitations under the License. package workload import ( + "fmt" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime/schema" @@ -72,21 +74,35 @@ func (aw *AppWrapper) GVK() schema.GroupVersionKind { } func (aw *AppWrapper) PodSets() []kueue.PodSet { - podSets, err := utils.GetPodSets((*awv1beta2.AppWrapper)(aw)) + podSpecTemplates, awPodSets, err := utils.GetComponentPodSpecs((*awv1beta2.AppWrapper)(aw)) if err != nil { // Kueue will raise an error on zero length PodSet; the Kueue GenericJob API prevents propagating the actual error. return []kueue.PodSet{} } - for psIndex := range podSets { - podSets[psIndex].TopologyRequest = jobframework.PodSetTopologyRequest(&podSets[psIndex].Template.ObjectMeta, nil, nil, nil) + podSets := []kueue.PodSet{} + for psIndex := range podSpecTemplates { + podSets = append(podSets, kueue.PodSet{ + Name: fmt.Sprintf("%s-%v", aw.Name, psIndex), + Template: *podSpecTemplates[psIndex], + Count: utils.Replicas(awPodSets[psIndex]), + TopologyRequest: jobframework.PodSetTopologyRequest(&(podSpecTemplates[psIndex].ObjectMeta), nil, nil, nil), + }) } - return podSets } func (aw *AppWrapper) RunWithPodSetsInfo(podSetsInfo []podset.PodSetInfo) error { - if err := utils.SetPodSetInfos((*awv1beta2.AppWrapper)(aw), podSetsInfo); err != nil { - return err + awPodSetsInfo := make([]awv1beta2.AppWrapperPodSetInfo, len(podSetsInfo)) + for idx := range podSetsInfo { + awPodSetsInfo[idx].Annotations = podSetsInfo[idx].Annotations + awPodSetsInfo[idx].Labels = podSetsInfo[idx].Labels + awPodSetsInfo[idx].NodeSelector = podSetsInfo[idx].NodeSelector + awPodSetsInfo[idx].Tolerations = podSetsInfo[idx].Tolerations + awPodSetsInfo[idx].SchedulingGates = podSetsInfo[idx].SchedulingGates + } + + if err := utils.SetPodSetInfos((*awv1beta2.AppWrapper)(aw), awPodSetsInfo); err != nil { + return fmt.Errorf("%w: %v", podset.ErrInvalidPodsetInfo, err) } aw.Spec.Suspend = false return nil diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 5c967a8..6df1ffd 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -38,9 +38,6 @@ import ( "k8s.io/utils/ptr" jobsetapi "sigs.k8s.io/jobset/api/jobset/v1alpha2" - kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" - "sigs.k8s.io/kueue/pkg/podset" - awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" ) @@ -349,36 +346,32 @@ func EnsureComponentStatusInitialized(aw *awv1beta2.AppWrapper) error { return nil } -// GetPodSets constructs the kueue.PodSets for an AppWrapper -func GetPodSets(aw *awv1beta2.AppWrapper) ([]kueue.PodSet, error) { - podSets := []kueue.PodSet{} +func GetComponentPodSpecs(aw *awv1beta2.AppWrapper) ([]*v1.PodTemplateSpec, []awv1beta2.AppWrapperPodSet, error) { + templates := []*v1.PodTemplateSpec{} + podSets := []awv1beta2.AppWrapperPodSet{} if err := EnsureComponentStatusInitialized(aw); err != nil { - return nil, err + return nil, nil, err } for idx := range aw.Status.ComponentStatus { if len(aw.Status.ComponentStatus[idx].PodSets) > 0 { obj := &unstructured.Unstructured{} if _, _, err := unstructured.UnstructuredJSONScheme.Decode(aw.Spec.Components[idx].Template.Raw, nil, obj); err != nil { // Should be unreachable; Template.Raw validated by AppWrapper AdmissionController - return nil, err + return nil, nil, err } - for psIdx, podSet := range aw.Status.ComponentStatus[idx].PodSets { - replicas := Replicas(podSet) + for _, podSet := range aw.Status.ComponentStatus[idx].PodSets { if template, err := GetPodTemplateSpec(obj, podSet.Path); err == nil { - podSets = append(podSets, kueue.PodSet{ - Name: fmt.Sprintf("%s-%v-%v", aw.Name, idx, psIdx), - Template: *template, - Count: replicas, - }) + templates = append(templates, template) + podSets = append(podSets, podSet) } } } } - return podSets, nil + return templates, podSets, nil } // SetPodSetInfos propagates podSetsInfo into the PodSetInfos of aw.Spec.Components -func SetPodSetInfos(aw *awv1beta2.AppWrapper, podSetsInfo []podset.PodSetInfo) error { +func SetPodSetInfos(aw *awv1beta2.AppWrapper, podSetsInfo []awv1beta2.AppWrapperPodSetInfo) error { if err := EnsureComponentStatusInitialized(aw); err != nil { return err } @@ -392,18 +385,12 @@ func SetPodSetInfos(aw *awv1beta2.AppWrapper, podSetsInfo []podset.PodSetInfo) e if podSetsInfoIndex > len(podSetsInfo) { continue // we will return an error below...continuing to get an accurate count for the error message } - aw.Spec.Components[idx].PodSetInfos[podSetIdx] = awv1beta2.AppWrapperPodSetInfo{ - Annotations: podSetsInfo[podSetsInfoIndex-1].Annotations, - Labels: podSetsInfo[podSetsInfoIndex-1].Labels, - NodeSelector: podSetsInfo[podSetsInfoIndex-1].NodeSelector, - Tolerations: podSetsInfo[podSetsInfoIndex-1].Tolerations, - SchedulingGates: podSetsInfo[podSetsInfoIndex-1].SchedulingGates, - } + aw.Spec.Components[idx].PodSetInfos[podSetIdx] = podSetsInfo[podSetIdx] } } if podSetsInfoIndex != len(podSetsInfo) { - return podset.BadPodSetsInfoLenError(podSetsInfoIndex, len(podSetsInfo)) + return fmt.Errorf("expecting %d podsets, got %d", podSetsInfoIndex, len(podSetsInfo)) } return nil } From 20d938a409a0ada8c42f24e8f11cedc7d359903b Mon Sep 17 00:00:00 2001 From: David Grove Date: Thu, 20 Feb 2025 13:03:03 -0500 Subject: [PATCH 4/5] vendor 2 map util functions from Kueue --- .../appwrapper/resource_management.go | 2 +- internal/util/maps.go | 63 +++++++++++++++++++ internal/webhook/appwrapper_webhook.go | 4 +- internal/webhook/appwrapper_webhook_test.go | 7 +-- 4 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 internal/util/maps.go diff --git a/internal/controller/appwrapper/resource_management.go b/internal/controller/appwrapper/resource_management.go index 618f3ba..12774ea 100644 --- a/internal/controller/appwrapper/resource_management.go +++ b/internal/controller/appwrapper/resource_management.go @@ -23,6 +23,7 @@ import ( "time" awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + utilmaps "github.com/project-codeflare/appwrapper/internal/util" "github.com/project-codeflare/appwrapper/pkg/utils" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -35,7 +36,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/kueue/pkg/podset" - utilmaps "sigs.k8s.io/kueue/pkg/util/maps" ) func parseComponent(raw []byte, expectedNamespace string) (*unstructured.Unstructured, error) { diff --git a/internal/util/maps.go b/internal/util/maps.go new file mode 100644 index 0000000..35c855c --- /dev/null +++ b/internal/util/maps.go @@ -0,0 +1,63 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* + This file contains a copy of the functions from https://github.com/kubernetes-sigs/kueue/blob/main/pkg/util/maps/maps.go + that are used by the AppWrapper controlller. + + We copy the used functions to eliminate our go dependency on Kueue, which simplifies bundling AppWrapper + in the codeflare-operator in RedHat OpenShift AI. +*/ + +package maps + +import ( + "fmt" + "maps" +) + +// merge merges a and b while resolving the conflicts by calling commonKeyValue +func merge[K comparable, V any, S ~map[K]V](a, b S, commonKeyValue func(a, b V) V) S { + if a == nil { + return maps.Clone(b) + } + + ret := maps.Clone(a) + + for k, v := range b { + if _, found := a[k]; found { + ret[k] = commonKeyValue(a[k], v) + } else { + ret[k] = v + } + } + return ret +} + +// MergeKeepFirst merges a and b keeping the values in a in case of conflict +func MergeKeepFirst[K comparable, V any, S ~map[K]V](a, b S) S { + return merge(a, b, func(v, _ V) V { return v }) +} + +// HaveConflict checks if a and b have the same key, but different value +func HaveConflict[K comparable, V comparable, S ~map[K]V](a, b S) error { + for k, av := range a { + if bv, found := b[k]; found && av != bv { + return fmt.Errorf("conflict for key=%v, value1=%v, value2=%v", k, av, bv) + } + } + return nil +} diff --git a/internal/webhook/appwrapper_webhook.go b/internal/webhook/appwrapper_webhook.go index 35a1f61..68800b5 100644 --- a/internal/webhook/appwrapper_webhook.go +++ b/internal/webhook/appwrapper_webhook.go @@ -32,18 +32,16 @@ import ( "k8s.io/client-go/kubernetes" authClientv1 "k8s.io/client-go/kubernetes/typed/authorization/v1" "k8s.io/utils/ptr" - utilmaps "sigs.k8s.io/kueue/pkg/util/maps" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - "sigs.k8s.io/kueue/pkg/controller/jobframework" awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" wlc "github.com/project-codeflare/appwrapper/internal/controller/workload" + utilmaps "github.com/project-codeflare/appwrapper/internal/util" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/utils" ) diff --git a/internal/webhook/appwrapper_webhook_test.go b/internal/webhook/appwrapper_webhook_test.go index 31ca724..cf0cd98 100644 --- a/internal/webhook/appwrapper_webhook_test.go +++ b/internal/webhook/appwrapper_webhook_test.go @@ -21,13 +21,12 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" - utilmaps "sigs.k8s.io/kueue/pkg/util/maps" + + awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + utilmaps "github.com/project-codeflare/appwrapper/internal/util" ) var _ = Describe("AppWrapper Webhook Tests", func() { From 13ae9aa9675ea137af3bc132d98269610a0abbd8 Mon Sep 17 00:00:00 2001 From: David Grove Date: Thu, 20 Feb 2025 13:11:35 -0500 Subject: [PATCH 5/5] eliminate imports of Kueue's podset package --- internal/controller/appwrapper/resource_management.go | 7 +++---- internal/util/maps.go | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/controller/appwrapper/resource_management.go b/internal/controller/appwrapper/resource_management.go index 12774ea..2abfb0a 100644 --- a/internal/controller/appwrapper/resource_management.go +++ b/internal/controller/appwrapper/resource_management.go @@ -35,7 +35,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/kueue/pkg/podset" ) func parseComponent(raw []byte, expectedNamespace string) (*unstructured.Unstructured, error) { @@ -241,7 +240,7 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *awv1beta if len(toInject.Annotations) > 0 { existing := toMap(metadata["annotations"]) if err := utilmaps.HaveConflict(existing, toInject.Annotations); err != nil { - return podset.BadPodSetsUpdateError("annotations", err), true + return fmt.Errorf("conflict updating annotations: %w", err), true } metadata["annotations"] = utilmaps.MergeKeepFirst(existing, toInject.Annotations) } @@ -250,7 +249,7 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *awv1beta mergedLabels := utilmaps.MergeKeepFirst(toInject.Labels, awLabels) existing := toMap(metadata["labels"]) if err := utilmaps.HaveConflict(existing, mergedLabels); err != nil { - return podset.BadPodSetsUpdateError("labels", err), true + return fmt.Errorf("conflict updating labels: %w", err), true } metadata["labels"] = utilmaps.MergeKeepFirst(existing, mergedLabels) @@ -258,7 +257,7 @@ func (r *AppWrapperReconciler) createComponent(ctx context.Context, aw *awv1beta if len(toInject.NodeSelector) > 0 { existing := toMap(spec["nodeSelector"]) if err := utilmaps.HaveConflict(existing, toInject.NodeSelector); err != nil { - return podset.BadPodSetsUpdateError("nodeSelector", err), true + return fmt.Errorf("conflict updating nodeSelector: %w", err), true } spec["nodeSelector"] = utilmaps.MergeKeepFirst(existing, toInject.NodeSelector) } diff --git a/internal/util/maps.go b/internal/util/maps.go index 35c855c..8040163 100644 --- a/internal/util/maps.go +++ b/internal/util/maps.go @@ -15,11 +15,12 @@ limitations under the License. */ /* - This file contains a copy of the functions from https://github.com/kubernetes-sigs/kueue/blob/main/pkg/util/maps/maps.go + This file contains a copy of the two map utility functions from + https://github.com/kubernetes-sigs/kueue/blob/main/pkg/util/maps/maps.go that are used by the AppWrapper controlller. - We copy the used functions to eliminate our go dependency on Kueue, which simplifies bundling AppWrapper - in the codeflare-operator in RedHat OpenShift AI. + We "vendor" the used functions to eliminate our go dependency on Kueue. + This simplifies bundling AppWrapper in the codeflare-operator in RedHat OpenShift AI. */ package maps