From c9ea1076d6d73c2d894bd9cba566f9ea8ce5a318 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Wed, 30 Oct 2024 22:18:39 +0000 Subject: [PATCH 1/4] Add event recorder and leader election support Signed-off-by: Brad Davidson --- e2e/framework/controller/deployment.go | 7 ++ main.go | 19 ++++- manifests/clusterrole.yaml | 24 ++++++ manifests/system-upgrade-controller.yaml | 22 ++++++ pkg/upgrade/controller.go | 66 ++++++++++++++-- pkg/upgrade/handle_batch.go | 3 + pkg/upgrade/handle_core.go | 3 + pkg/upgrade/handle_upgrade.go | 97 ++++++++++++++++++++---- pkg/upgrade/job/job.go | 12 +-- pkg/version/version.go | 1 + 10 files changed, 225 insertions(+), 29 deletions(-) diff --git a/e2e/framework/controller/deployment.go b/e2e/framework/controller/deployment.go index c5c09672..ae4d6a88 100644 --- a/e2e/framework/controller/deployment.go +++ b/e2e/framework/controller/deployment.go @@ -53,6 +53,13 @@ func NewDeployment(name string, opt ...DeploymentOption) *appsv1.Deployment { FieldPath: "metadata.namespace", }, }, + }, { + Name: "SYSTEM_UPGRADE_CONTROLLER_NODE_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "spec.nodeName", + }, + }, }} container.VolumeMounts = []corev1.VolumeMount{{ Name: `tmp`, diff --git a/main.go b/main.go index fb985d57..19eab155 100644 --- a/main.go +++ b/main.go @@ -21,15 +21,15 @@ import ( ) var ( - debug bool - kubeConfig, masterURL string + debug, leaderElect bool + kubeConfig, masterURL, nodeName string namespace, name, serviceAccountName string threads int ) func main() { app := cli.NewApp() - app.Name = "system-upgrade-controller" + app.Name = version.Program app.Usage = "in ur system controllin ur upgradez" app.Version = fmt.Sprintf("%s (%s)", version.Version, version.GitCommit) app.Flags = []cli.Flag{ @@ -38,6 +38,11 @@ func main() { EnvVar: "SYSTEM_UPGRADE_CONTROLLER_DEBUG", Destination: &debug, }, + cli.BoolFlag{ + Name: "leader-elect", + EnvVar: "SYSTEM_UPGRADE_CONTROLLER_LEADER_ELECT", + Destination: &leaderElect, + }, cli.StringFlag{ Name: "kubeconfig", EnvVar: "SYSTEM_UPGRADE_CONTROLLER_KUBE_CONFIG", @@ -55,6 +60,12 @@ func main() { Required: true, Destination: &name, }, + cli.StringFlag{ + Name: "node-name", + EnvVar: "SYSTEM_UPGRADE_CONTROLLER_NODE_NAME", + Required: false, + Destination: &nodeName, + }, cli.StringFlag{ Name: "namespace", EnvVar: "SYSTEM_UPGRADE_CONTROLLER_NAMESPACE", @@ -93,7 +104,7 @@ func Run(_ *cli.Context) { if err != nil { logrus.Fatal(err) } - ctl, err := upgrade.NewController(cfg, namespace, name, 2*time.Hour) + ctl, err := upgrade.NewController(cfg, namespace, name, nodeName, leaderElect, 2*time.Hour) if err != nil { logrus.Fatal(err) } diff --git a/manifests/clusterrole.yaml b/manifests/clusterrole.yaml index 78ffd6b1..1accfc76 100644 --- a/manifests/clusterrole.yaml +++ b/manifests/clusterrole.yaml @@ -37,6 +37,30 @@ rules: - nodes verbs: - update +- apiGroups: + - "" + resources: + - events + verbs: + - get + - create + - patch + - update +- apiGroups: + - "coordination.k8s.io" + resources: + - leases + verbs: + - create +- apiGroups: + - "coordination.k8s.io" + resources: + - leases + resourceNames: + - "system-upgrade-controller" + verbs: + - get + - update - apiGroups: - upgrade.cattle.io resources: diff --git a/manifests/system-upgrade-controller.yaml b/manifests/system-upgrade-controller.yaml index 955f5497..8749102e 100644 --- a/manifests/system-upgrade-controller.yaml +++ b/manifests/system-upgrade-controller.yaml @@ -19,6 +19,7 @@ metadata: data: SYSTEM_UPGRADE_CONTROLLER_DEBUG: "false" SYSTEM_UPGRADE_CONTROLLER_THREADS: "2" + SYSTEM_UPGRADE_CONTROLLER_LEADER_ELECT: "true" SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS: "900" SYSTEM_UPGRADE_JOB_BACKOFF_LIMIT: "99" SYSTEM_UPGRADE_JOB_IMAGE_PULL_POLICY: "Always" @@ -33,12 +34,16 @@ metadata: name: system-upgrade-controller namespace: system-upgrade spec: + strategy: + type: Recreate selector: matchLabels: upgrade.cattle.io/controller: system-upgrade-controller template: metadata: labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: system-upgrade-controller upgrade.cattle.io/controller: system-upgrade-controller # necessary to avoid drain spec: affinity: @@ -48,6 +53,19 @@ spec: - matchExpressions: - key: "node-role.kubernetes.io/control-plane" operator: "Exists" + - key: "kubernetes.io/os" + operator: "In" + values: + - "linux" + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: "kubernetes.io/hostname" + labelSelector: + matchExpressions: + - key: "app.kubernetes.io/name" + operator: "In" + values: + - "system-upgrade-controller" serviceAccountName: system-upgrade tolerations: - key: "CriticalAddonsOnly" @@ -90,6 +108,10 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + - name: SYSTEM_UPGRADE_CONTROLLER_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName volumeMounts: - name: etc-ssl mountPath: /etc/ssl diff --git a/pkg/upgrade/controller.go b/pkg/upgrade/controller.go index 4afce479..77987e5b 100644 --- a/pkg/upgrade/controller.go +++ b/pkg/upgrade/controller.go @@ -4,21 +4,31 @@ import ( "context" "errors" "fmt" + "os" "time" upgradectl "github.com/rancher/system-upgrade-controller/pkg/generated/controllers/upgrade.cattle.io" upgradeplan "github.com/rancher/system-upgrade-controller/pkg/upgrade/plan" + "github.com/rancher/system-upgrade-controller/pkg/version" "github.com/rancher/wrangler/v3/pkg/apply" "github.com/rancher/wrangler/v3/pkg/crd" batchctl "github.com/rancher/wrangler/v3/pkg/generated/controllers/batch" corectl "github.com/rancher/wrangler/v3/pkg/generated/controllers/core" + "github.com/rancher/wrangler/v3/pkg/leader" + "github.com/rancher/wrangler/v3/pkg/schemes" "github.com/rancher/wrangler/v3/pkg/start" + "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" + typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" + "k8s.io/client-go/tools/record" ) var ( + ErrPlanNotReady = errors.New("plan is not valid and resolved") ErrControllerNameRequired = errors.New("controller name is required") ErrControllerNamespaceRequired = errors.New("controller namespace is required") ) @@ -26,20 +36,23 @@ var ( type Controller struct { Namespace string Name string + NodeName string cfg *rest.Config kcs *kubernetes.Clientset - clusterID string + clusterID string + leaderElect bool coreFactory *corectl.Factory batchFactory *batchctl.Factory upgradeFactory *upgradectl.Factory - apply apply.Apply + apply apply.Apply + recorder record.EventRecorder } -func NewController(cfg *rest.Config, namespace, name string, resync time.Duration) (ctl *Controller, err error) { +func NewController(cfg *rest.Config, namespace, name, nodeName string, leaderElect bool, resync time.Duration) (ctl *Controller, err error) { if namespace == "" { return nil, ErrControllerNamespaceRequired } @@ -47,6 +60,13 @@ func NewController(cfg *rest.Config, namespace, name string, resync time.Duratio return nil, ErrControllerNameRequired } + if nodeName == "" { + nodeName, err = os.Hostname() + if err != nil { + return nil, err + } + } + if cfg == nil { cfg, err = rest.InClusterConfig() if err != nil { @@ -55,9 +75,11 @@ func NewController(cfg *rest.Config, namespace, name string, resync time.Duratio } ctl = &Controller{ - Namespace: namespace, - Name: name, - cfg: cfg, + Namespace: namespace, + Name: name, + NodeName: nodeName, + cfg: cfg, + leaderElect: leaderElect, } ctl.kcs, err = kubernetes.NewForConfig(cfg) @@ -90,10 +112,24 @@ func NewController(cfg *rest.Config, namespace, name string, resync time.Duratio return nil, err } + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartStructuredLogging(0) + eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: ctl.kcs.CoreV1().Events(metav1.NamespaceAll)}) + ctl.recorder = eventBroadcaster.NewRecorder(schemes.All, corev1.EventSource{Component: ctl.Name, Host: ctl.NodeName}) + return ctl, nil } func (ctl *Controller) Start(ctx context.Context, threads int) error { + // This is consistent with events attached to the node generated by the kubelet + // https://github.com/kubernetes/kubernetes/blob/612130dd2f4188db839ea5c2dea07a96b0ad8d1c/pkg/kubelet/kubelet.go#L479-L485 + nodeRef := &corev1.ObjectReference{ + Kind: "Node", + Name: ctl.NodeName, + UID: types.UID(ctl.NodeName), + Namespace: "", + } + // cluster id hack: see https://groups.google.com/forum/#!msg/kubernetes-sig-architecture/mVGobfD4TpY/nkdbkX1iBwAJ systemNS, err := ctl.kcs.CoreV1().Namespaces().Get(ctx, metav1.NamespaceSystem, metav1.GetOptions{}) if err != nil { @@ -119,7 +155,23 @@ func (ctl *Controller) Start(ctx context.Context, threads int) error { return err } - return start.All(ctx, threads, ctl.coreFactory, ctl.batchFactory, ctl.upgradeFactory) + appName := fmt.Sprintf("%s %s (%s)", version.Program, version.Version, version.GitCommit) + run := func(ctx context.Context) { + if err := start.All(ctx, threads, ctl.coreFactory, ctl.batchFactory, ctl.upgradeFactory); err != nil { + ctl.recorder.Eventf(nodeRef, corev1.EventTypeWarning, "StartFailed", "%s failed to start controllers for %s/%s: %v", appName, ctl.Namespace, ctl.Name, err) + logrus.Panicf("Failed to start controllers: %v", err) + } + ctl.recorder.Eventf(nodeRef, corev1.EventTypeNormal, "Started", "%s running as %s/%s", appName, ctl.Namespace, ctl.Name) + } + + if ctl.leaderElect { + ctl.recorder.Eventf(nodeRef, corev1.EventTypeNormal, "Starting", "%s starting leader election for %s/%s", appName, ctl.Namespace, ctl.Name) + leader.RunOrDie(ctx, ctl.Namespace, ctl.Name, ctl.kcs, run) + } else { + run(ctx) + } + + return nil } func (ctl *Controller) registerCRD(ctx context.Context) error { diff --git a/pkg/upgrade/handle_batch.go b/pkg/upgrade/handle_batch.go index 28a237a2..843e1125 100644 --- a/pkg/upgrade/handle_batch.go +++ b/pkg/upgrade/handle_batch.go @@ -11,6 +11,7 @@ import ( upgradeapi "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io" upgradejob "github.com/rancher/system-upgrade-controller/pkg/upgrade/job" batchctlv1 "github.com/rancher/wrangler/v3/pkg/generated/controllers/batch/v1" + "github.com/sirupsen/logrus" batchv1 "k8s.io/api/batch/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -61,6 +62,7 @@ func (ctl *Controller) handleJobs(ctx context.Context) error { return obj, deleteJob(jobs, obj, metav1.DeletePropagationBackground) } // trigger the plan when we're done, might free up a concurrency slot + logrus.Debugf("Enqueing sync of Plan %s/%s from Job %s/%s", obj.Namespace, planName, obj.Namespace, obj.Name) defer plans.Enqueue(obj.Namespace, planName) // identify the node that this job is targeting nodeName, ok := obj.Labels[upgradeapi.LabelNode] @@ -127,6 +129,7 @@ func enqueueOrDelete(jobController batchctlv1.JobController, job *batchv1.Job, d ttlSecondsAfterFinished = time.Second * time.Duration(*job.Spec.TTLSecondsAfterFinished) } if interval := time.Now().Sub(lastTransitionTime); interval < ttlSecondsAfterFinished { + logrus.Debugf("Enqueing sync of Job %s/%s in %v", job.Namespace, job.Name, ttlSecondsAfterFinished-interval) jobController.EnqueueAfter(job.Namespace, job.Name, ttlSecondsAfterFinished-interval) return nil } diff --git a/pkg/upgrade/handle_core.go b/pkg/upgrade/handle_core.go index 1d357f7c..4ed6f6e6 100644 --- a/pkg/upgrade/handle_core.go +++ b/pkg/upgrade/handle_core.go @@ -3,6 +3,7 @@ package upgrade import ( "context" + "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -24,6 +25,7 @@ func (ctl *Controller) handleNodes(ctx context.Context) error { if selector, err := metav1.LabelSelectorAsSelector(plan.Spec.NodeSelector); err != nil { return obj, err } else if selector.Matches(labels.Set(obj.Labels)) { + logrus.Debugf("Enqueing sync of Plan %s/%s from Node %s", plan.Namespace, plan.Name, obj.Name) plans.Enqueue(plan.Namespace, plan.Name) } } @@ -49,6 +51,7 @@ func (ctl *Controller) handleSecrets(ctx context.Context) error { for _, secret := range plan.Spec.Secrets { if obj.Name == secret.Name { if !secret.IgnoreUpdates { + logrus.Debugf("Enqueing sync of Plan %s/%s from Secret %s/%s", plan.Namespace, plan.Name, obj.Namespace, obj.Name) plans.Enqueue(plan.Namespace, plan.Name) continue } diff --git a/pkg/upgrade/handle_upgrade.go b/pkg/upgrade/handle_upgrade.go index bb8f8722..841db0ee 100644 --- a/pkg/upgrade/handle_upgrade.go +++ b/pkg/upgrade/handle_upgrade.go @@ -2,6 +2,8 @@ package upgrade import ( "context" + "slices" + "strings" "time" upgradeapiv1 "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io/v1" @@ -11,6 +13,7 @@ import ( upgradeplan "github.com/rancher/system-upgrade-controller/pkg/upgrade/plan" "github.com/rancher/wrangler/v3/pkg/generic" "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -20,6 +23,7 @@ func (ctl *Controller) handlePlans(ctx context.Context) error { plans := ctl.upgradeFactory.Upgrade().V1().Plan() secrets := ctl.coreFactory.Core().V1().Secret() secretsCache := secrets.Cache() + recorder := ctl.recorder // process plan events, mutating status accordingly upgradectlv1.RegisterPlanStatusHandler(ctx, plans, "", ctl.Name, @@ -29,27 +33,52 @@ func (ctl *Controller) handlePlans(ctx context.Context) error { } logrus.Debugf("PLAN STATUS HANDLER: plan=%s/%s@%s, status=%+v", obj.Namespace, obj.Name, obj.ResourceVersion, status) + // ensure that the complete status is present + complete := upgradeapiv1.PlanComplete + complete.CreateUnknownIfNotExists(obj) + + // validate plan, and generate events for transitions validated := upgradeapiv1.PlanSpecValidated validated.CreateUnknownIfNotExists(obj) if err := upgradeplan.Validate(obj); err != nil { + if !validated.IsFalse(obj) { + recorder.Eventf(obj, corev1.EventTypeWarning, "ValidateFailed", "Failed to validate plan: %v", err) + } validated.SetError(obj, "Error", err) return upgradeplan.DigestStatus(obj, secretsCache) } - validated.False(obj) + if !validated.IsTrue(obj) { + recorder.Event(obj, corev1.EventTypeNormal, "Validated", "Plan is valid") + } validated.SetError(obj, "PlanIsValid", nil) + // resolve version from spec or channel, and generate events for transitions resolved := upgradeapiv1.PlanLatestResolved resolved.CreateUnknownIfNotExists(obj) + // raise error if neither version nor channel are set. this is handled separate from other validation. if obj.Spec.Version == "" && obj.Spec.Channel == "" { + if !resolved.IsFalse(obj) { + recorder.Event(obj, corev1.EventTypeWarning, "ResolveFailed", upgradeapiv1.ErrPlanUnresolvable.Error()) + } resolved.SetError(obj, "Error", upgradeapiv1.ErrPlanUnresolvable) return upgradeplan.DigestStatus(obj, secretsCache) } + // use static version from spec if set if obj.Spec.Version != "" { - resolved.False(obj) + latest := upgradeplan.MungeVersion(obj.Spec.Version) + if !resolved.IsTrue(obj) || obj.Status.LatestVersion != latest { + // Version has changed, set complete to false and emit event + recorder.Eventf(obj, corev1.EventTypeNormal, "Resolved", "Resolved latest version from Spec.Version: %s", latest) + complete.False(obj) + complete.Message(obj, "") + complete.Reason(obj, "Resolved") + } + obj.Status.LatestVersion = latest resolved.SetError(obj, "Version", nil) - obj.Status.LatestVersion = upgradeplan.MungeVersion(obj.Spec.Version) return upgradeplan.DigestStatus(obj, secretsCache) } + // re-enqueue a sync at the next channel polling interval, if the LastUpdated time + // on the resolved status indicates that the interval has not been reached. if resolved.IsTrue(obj) { if lastUpdated, err := time.Parse(time.RFC3339, resolved.GetLastUpdated(obj)); err == nil { if interval := time.Now().Sub(lastUpdated); interval < upgradeplan.PollingInterval { @@ -58,13 +87,24 @@ func (ctl *Controller) handlePlans(ctx context.Context) error { } } } + // no static version, poll the channel to get latest version latest, err := upgradeplan.ResolveChannel(ctx, obj.Spec.Channel, obj.Status.LatestVersion, ctl.clusterID) if err != nil { + if !resolved.IsFalse(obj) { + recorder.Eventf(obj, corev1.EventTypeWarning, "ResolveFailed", "Failed to resolve latest version from Spec.Channel: %v", err) + } return status, err } - resolved.False(obj) + latest = upgradeplan.MungeVersion(latest) + if !resolved.IsTrue(obj) || obj.Status.LatestVersion != latest { + // Version has changed, set complete to false and emit event + recorder.Eventf(obj, corev1.EventTypeNormal, "Resolved", "Resolved latest version from Spec.Channel: %s", latest) + complete.False(obj) + complete.Message(obj, "") + complete.Reason(obj, "Resolved") + } + obj.Status.LatestVersion = latest resolved.SetError(obj, "Channel", nil) - obj.Status.LatestVersion = upgradeplan.MungeVersion(latest) return upgradeplan.DigestStatus(obj, secretsCache) }, ) @@ -75,30 +115,61 @@ func (ctl *Controller) handlePlans(ctx context.Context) error { if obj == nil { return objects, status, nil } + logrus.Debugf("PLAN GENERATING HANDLER: plan=%s/%s@%s, status=%+v", obj.Namespace, obj.Name, obj.ResourceVersion, status) - if !upgradeapiv1.PlanSpecValidated.IsTrue(obj) { - return objects, status, nil - } - if !upgradeapiv1.PlanLatestResolved.IsTrue(obj) { + // return early without selecting nodes if the plan is not validated and resolved + complete := upgradeapiv1.PlanComplete + if !upgradeapiv1.PlanSpecValidated.IsTrue(obj) || !upgradeapiv1.PlanLatestResolved.IsTrue(obj) { + complete.SetError(obj, "NotReady", ErrPlanNotReady) return objects, status, nil } + + // select nodes to apply the plan on based on nodeSelector, plan hash, and concurrency concurrentNodes, err := upgradeplan.SelectConcurrentNodes(obj, nodes.Cache()) if err != nil { + recorder.Eventf(obj, corev1.EventTypeWarning, "SelectNodesFailed", "Failed to select Nodes: %v", err) + complete.SetError(obj, "SelectNodesFailed", err) return objects, status, err } + + // Create an upgrade job for each node, and add the node name to Status.Applying + // Note that this initially creates paused jobs, and then on a second pass once + // the node has been added to Status.Applying the job parallelism is patched to 1 + // to unpause the job. Ref: https://github.com/rancher/system-upgrade-controller/issues/134 concurrentNodeNames := make([]string, len(concurrentNodes)) for i := range concurrentNodes { node := concurrentNodes[i] objects = append(objects, upgradejob.New(obj, node, ctl.Name)) concurrentNodeNames[i] = upgradenode.Hostname(node) } - obj.Status.Applying = concurrentNodeNames[:] - upgradeapiv1.PlanComplete.SetStatusBool(obj, len(concurrentNodeNames) == 0) + + if len(concurrentNodeNames) > 0 { + // If the node list has changed, update Applying status with new node list and emit an event + if !slices.Equal(obj.Status.Applying, concurrentNodeNames) { + recorder.Eventf(obj, corev1.EventTypeNormal, "SyncJob", "Jobs synced for version %s on Nodes %s. Hash: %s", + obj.Status.LatestVersion, strings.Join(concurrentNodeNames, ","), obj.Status.LatestHash) + } + obj.Status.Applying = concurrentNodeNames[:] + complete.False(obj) + complete.Message(obj, "") + complete.Reason(obj, "SyncJob") + } else { + // set PlanComplete to true when no nodes have been selected, + // and emit an event if the plan just completed + if !complete.IsTrue(obj) { + recorder.Eventf(obj, corev1.EventTypeNormal, "Complete", "Jobs complete for version %s. Hash: %s", + obj.Status.LatestVersion, obj.Status.LatestHash) + } + obj.Status.Applying = nil + complete.SetError(obj, "Complete", nil) + } + return objects, obj.Status, nil }, &generic.GeneratingHandlerOptions{ - AllowClusterScoped: true, - NoOwnerReference: true, + AllowClusterScoped: true, + NoOwnerReference: true, + UniqueApplyForResourceVersion: true, }, ) diff --git a/pkg/upgrade/job/job.go b/pkg/upgrade/job/job.go index 3a0bf1ae..df5aa95e 100644 --- a/pkg/upgrade/job/job.go +++ b/pkg/upgrade/job/job.go @@ -2,7 +2,7 @@ package job import ( "os" - "sort" + "slices" "strconv" "strings" @@ -18,6 +18,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" + "k8s.io/utils/pointer" ) const ( @@ -240,13 +241,14 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat ImagePullSecrets: plan.Spec.ImagePullSecrets, }, }, - Completions: new(int32), - Parallelism: new(int32), + Completions: pointer.Int32(1), // Run only once + Parallelism: pointer.Int32(0), // Create Job paused }, } - *job.Spec.Completions = 1 - if i := sort.SearchStrings(plan.Status.Applying, nodeHostname); i < len(plan.Status.Applying) && plan.Status.Applying[i] == nodeHostname { + // After the Job has been created and registered as in-progress in the Plan Status, + // update parallelism to 1 to unpause it. Ref: https://github.com/rancher/system-upgrade-controller/issues/134 + if slices.Contains(plan.Status.Applying, nodeHostname) { *job.Spec.Parallelism = 1 } diff --git a/pkg/version/version.go b/pkg/version/version.go index a645beef..3c52397c 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -1,6 +1,7 @@ package version var ( + Program = "system-upgrade-controller" Version = "dev" GitCommit = "HEAD" ) From 4904a0c3d1da0d268ebb58ca40b81adbab4a5ea1 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Wed, 30 Oct 2024 22:19:20 +0000 Subject: [PATCH 2/4] Add support for Spec.Window to set time window Signed-off-by: Brad Davidson --- go.mod | 21 ++++++---- go.sum | 42 +++++++++++-------- pkg/apis/upgrade.cattle.io/v1/types.go | 18 ++++++++ .../v1/zz_generated_deepcopy.go | 26 ++++++++++++ pkg/upgrade/controller.go | 1 + pkg/upgrade/handle_upgrade.go | 14 +++++++ pkg/upgrade/plan/plan.go | 7 ++++ 7 files changed, 102 insertions(+), 27 deletions(-) diff --git a/go.mod b/go.mod index da9d150a..8604dfed 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ toolchain go1.23.2 require ( github.com/docker/distribution v2.8.3+incompatible + github.com/kubereboot/kured v0.0.0-20241022191328-c77090d5fdd3 github.com/onsi/ginkgo/v2 v2.20.0 github.com/onsi/gomega v1.34.1 github.com/rancher/lasso v0.0.0-20240805175815-a40054127062 @@ -63,6 +64,8 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/moby/spdystream v0.4.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect @@ -71,9 +74,9 @@ require ( github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_golang v1.19.1 // indirect + github.com/prometheus/client_golang v1.20.5 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/common v0.60.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/spf13/cobra v1.8.1 // indirect @@ -94,16 +97,16 @@ require ( go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.26.0 // indirect - golang.org/x/crypto v0.26.0 // indirect + golang.org/x/crypto v0.27.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/mod v0.20.0 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/oauth2 v0.21.0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.23.0 // indirect - golang.org/x/term v0.23.0 // indirect - golang.org/x/text v0.17.0 // indirect - golang.org/x/time v0.3.0 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/term v0.24.0 // indirect + golang.org/x/text v0.18.0 // indirect + golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.24.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect diff --git a/go.sum b/go.sum index 411f5514..9deed6be 100644 --- a/go.sum +++ b/go.sum @@ -112,6 +112,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -120,6 +122,10 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kubereboot/kured v0.0.0-20241022191328-c77090d5fdd3 h1:RSZdBiSSvHYHOouL39wfEY1M/rmohcZwKtbXXMVn/GQ= +github.com/kubereboot/kured v0.0.0-20241022191328-c77090d5fdd3/go.mod h1:p+N3H/+P5wIFG2ogglMjjhTvRC+aAt3saVrp8IA782U= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8= @@ -144,12 +150,12 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= -github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= -github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rancher/lasso v0.0.0-20240805175815-a40054127062 h1:5bnfZ50IoWO25tjbPy8OFbs3TaGDm/9vZJzTba++t94= @@ -236,8 +242,8 @@ go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -248,10 +254,10 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -261,16 +267,16 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= -golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200505023115-26f46d2f7ef8/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= diff --git a/pkg/apis/upgrade.cattle.io/v1/types.go b/pkg/apis/upgrade.cattle.io/v1/types.go index 1a19898c..f3c26c0f 100644 --- a/pkg/apis/upgrade.cattle.io/v1/types.go +++ b/pkg/apis/upgrade.cattle.io/v1/types.go @@ -6,6 +6,7 @@ package v1 import ( "time" + "github.com/kubereboot/kured/pkg/timewindow" "github.com/rancher/system-upgrade-controller/pkg/apis/condition" "github.com/rancher/wrangler/v3/pkg/genericcondition" corev1 "k8s.io/api/core/v1" @@ -48,6 +49,7 @@ type PlanSpec struct { Exclusive bool `json:"exclusive,omitempty"` + Window *TimeWindowSpec `json:"window,omitempty"` Prepare *ContainerSpec `json:"prepare,omitempty"` Cordon bool `json:"cordon,omitempty"` Drain *DrainSpec `json:"drain,omitempty"` @@ -99,3 +101,19 @@ type SecretSpec struct { Path string `json:"path,omitempty"` IgnoreUpdates bool `json:"ignoreUpdates,omitempty"` } + +// TimeWindowSpec describes a time window in which a Plan should be processed. +type TimeWindowSpec struct { + Days []string `json:"days,omitempty"` + StartTime string `json:"startTime,omitempty"` + EndTime string `json:"endTime,omitempty"` + TimeZone string `json:"timeZone,omitempty"` +} + +func (tws *TimeWindowSpec) Contains(t time.Time) bool { + tw, err := timewindow.New(tws.Days, tws.StartTime, tws.EndTime, tws.TimeZone) + if err != nil { + return false + } + return tw.Contains(t) +} diff --git a/pkg/apis/upgrade.cattle.io/v1/zz_generated_deepcopy.go b/pkg/apis/upgrade.cattle.io/v1/zz_generated_deepcopy.go index 11c70cb7..7cfe7e71 100644 --- a/pkg/apis/upgrade.cattle.io/v1/zz_generated_deepcopy.go +++ b/pkg/apis/upgrade.cattle.io/v1/zz_generated_deepcopy.go @@ -207,6 +207,11 @@ func (in *PlanSpec) DeepCopyInto(out *PlanSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Window != nil { + in, out := &in.Window, &out.Window + *out = new(TimeWindowSpec) + (*in).DeepCopyInto(*out) + } if in.Prepare != nil { in, out := &in.Prepare, &out.Prepare *out = new(ContainerSpec) @@ -282,6 +287,27 @@ func (in *SecretSpec) DeepCopy() *SecretSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TimeWindowSpec) DeepCopyInto(out *TimeWindowSpec) { + *out = *in + if in.Days != nil { + in, out := &in.Days, &out.Days + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TimeWindowSpec. +func (in *TimeWindowSpec) DeepCopy() *TimeWindowSpec { + if in == nil { + return nil + } + out := new(TimeWindowSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VolumeSpec) DeepCopyInto(out *VolumeSpec) { *out = *in diff --git a/pkg/upgrade/controller.go b/pkg/upgrade/controller.go index 77987e5b..08d4aca9 100644 --- a/pkg/upgrade/controller.go +++ b/pkg/upgrade/controller.go @@ -29,6 +29,7 @@ import ( var ( ErrPlanNotReady = errors.New("plan is not valid and resolved") + ErrOutsideWindow = errors.New("current time is not within configured window") ErrControllerNameRequired = errors.New("controller name is required") ErrControllerNamespaceRequired = errors.New("controller namespace is required") ) diff --git a/pkg/upgrade/handle_upgrade.go b/pkg/upgrade/handle_upgrade.go index 841db0ee..810544d6 100644 --- a/pkg/upgrade/handle_upgrade.go +++ b/pkg/upgrade/handle_upgrade.go @@ -144,6 +144,20 @@ func (ctl *Controller) handlePlans(ctx context.Context) error { } if len(concurrentNodeNames) > 0 { + // Don't start creating Jobs for the Plan if we're outside the window; just + // enqueue the plan to check again in a minute to see if we're within the window yet. + // The Plan is allowed to continue processing as long as there are nodes in progress. + if window := obj.Spec.Window; window != nil { + if len(obj.Status.Applying) == 0 && !window.Contains(time.Now()) { + if complete.GetReason(obj) != "Waiting" { + recorder.Eventf(obj, corev1.EventTypeNormal, "Waiting", "Waiting for start of Spec.Window to sync Jobs for version %s. Hash: %s", obj.Status.LatestVersion, obj.Status.LatestHash) + } + plans.EnqueueAfter(obj.Namespace, obj.Name, time.Minute) + complete.SetError(obj, "Waiting", ErrOutsideWindow) + return nil, status, nil + } + } + // If the node list has changed, update Applying status with new node list and emit an event if !slices.Equal(obj.Status.Applying, concurrentNodeNames) { recorder.Eventf(obj, corev1.EventTypeNormal, "SyncJob", "Jobs synced for version %s on Nodes %s. Hash: %s", diff --git a/pkg/upgrade/plan/plan.go b/pkg/upgrade/plan/plan.go index 32aeb8bc..9dd8249c 100644 --- a/pkg/upgrade/plan/plan.go +++ b/pkg/upgrade/plan/plan.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "github.com/kubereboot/kured/pkg/timewindow" upgradeapi "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io" upgradeapiv1 "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io/v1" "github.com/rancher/wrangler/v3/pkg/crd" @@ -33,6 +34,7 @@ const ( var ( ErrDrainDeleteConflict = fmt.Errorf("spec.drain cannot specify both deleteEmptydirData and deleteLocalData") ErrDrainPodSelectorNotSelectable = fmt.Errorf("spec.drain.podSelector is not selectable") + ErrInvalidWindow = fmt.Errorf("spec.window is invalid") PollingInterval = func(defaultValue time.Duration) time.Duration { if str, ok := os.LookupEnv("SYSTEM_UPGRADE_PLAN_POLLING_INTERVAL"); ok { @@ -250,5 +252,10 @@ func Validate(plan *upgradeapiv1.Plan) error { } } } + if windowSpec := plan.Spec.Window; windowSpec != nil { + if _, err := timewindow.New(windowSpec.Days, windowSpec.StartTime, windowSpec.EndTime, windowSpec.TimeZone); err != nil { + return ErrInvalidWindow + } + } return nil } From 741b0135e7f68e40e956053ab44a07740b029a0e Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 1 Nov 2024 20:11:32 +0000 Subject: [PATCH 3/4] Add support for Spec.PostCompleteDelay to set delay after job completion Signed-off-by: Brad Davidson --- pkg/apis/upgrade.cattle.io/v1/types.go | 13 +++--- .../v1/zz_generated_deepcopy.go | 5 +++ pkg/upgrade/handle_batch.go | 41 ++++++++++++++----- pkg/upgrade/job/job.go | 17 +++++++- pkg/upgrade/plan/plan.go | 4 ++ 5 files changed, 62 insertions(+), 18 deletions(-) diff --git a/pkg/apis/upgrade.cattle.io/v1/types.go b/pkg/apis/upgrade.cattle.io/v1/types.go index f3c26c0f..2b106159 100644 --- a/pkg/apis/upgrade.cattle.io/v1/types.go +++ b/pkg/apis/upgrade.cattle.io/v1/types.go @@ -49,12 +49,13 @@ type PlanSpec struct { Exclusive bool `json:"exclusive,omitempty"` - Window *TimeWindowSpec `json:"window,omitempty"` - Prepare *ContainerSpec `json:"prepare,omitempty"` - Cordon bool `json:"cordon,omitempty"` - Drain *DrainSpec `json:"drain,omitempty"` - Upgrade *ContainerSpec `json:"upgrade,omitempty" wrangler:"required"` - ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` + Window *TimeWindowSpec `json:"window,omitempty"` + Prepare *ContainerSpec `json:"prepare,omitempty"` + Cordon bool `json:"cordon,omitempty"` + Drain *DrainSpec `json:"drain,omitempty"` + Upgrade *ContainerSpec `json:"upgrade,omitempty" wrangler:"required"` + ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` + PostCompleteDelay *metav1.Duration `json:"postCompleteDelay,omitempty"` } // PlanStatus represents the resulting state from processing Plan events. diff --git a/pkg/apis/upgrade.cattle.io/v1/zz_generated_deepcopy.go b/pkg/apis/upgrade.cattle.io/v1/zz_generated_deepcopy.go index 7cfe7e71..b379098a 100644 --- a/pkg/apis/upgrade.cattle.io/v1/zz_generated_deepcopy.go +++ b/pkg/apis/upgrade.cattle.io/v1/zz_generated_deepcopy.go @@ -232,6 +232,11 @@ func (in *PlanSpec) DeepCopyInto(out *PlanSpec) { *out = make([]corev1.LocalObjectReference, len(*in)) copy(*out, *in) } + if in.PostCompleteDelay != nil { + in, out := &in.PostCompleteDelay, &out.PostCompleteDelay + *out = new(metav1.Duration) + **out = **in + } return } diff --git a/pkg/upgrade/handle_batch.go b/pkg/upgrade/handle_batch.go index 843e1125..97692e82 100644 --- a/pkg/upgrade/handle_batch.go +++ b/pkg/upgrade/handle_batch.go @@ -7,12 +7,12 @@ import ( "strconv" "time" - "github.com/rancher/system-upgrade-controller/pkg/apis/condition" upgradeapi "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io" upgradejob "github.com/rancher/system-upgrade-controller/pkg/upgrade/job" batchctlv1 "github.com/rancher/wrangler/v3/pkg/generated/controllers/batch/v1" "github.com/sirupsen/logrus" batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -81,13 +81,39 @@ func (ctl *Controller) handleJobs(ctx context.Context) error { } // if the job has failed enqueue-or-delete it depending on the TTL window if upgradejob.ConditionFailed.IsTrue(obj) { - return obj, enqueueOrDelete(jobs, obj, upgradejob.ConditionFailed) + failedTime := upgradejob.ConditionFailed.GetLastTransitionTime(obj) + if failedTime.IsZero() { + return obj, fmt.Errorf("condition %q missing field %q", upgradejob.ConditionFailed, "LastTransitionTime") + } + ctl.recorder.Eventf(plan, corev1.EventTypeWarning, "JobFailed", "Job failed on Node %s", node.Name) + return obj, enqueueOrDelete(jobs, obj, failedTime) } // if the job has completed tag the node then enqueue-or-delete depending on the TTL window if upgradejob.ConditionComplete.IsTrue(obj) { + completeTime := upgradejob.ConditionComplete.GetLastTransitionTime(obj) + if completeTime.IsZero() { + return obj, fmt.Errorf("condition %q missing field %q", upgradejob.ConditionComplete, "LastTransitionTime") + } planLabel := upgradeapi.LabelPlanName(planName) if planHash, ok := obj.Labels[planLabel]; ok { - node.Labels[planLabel] = planHash + var delay time.Duration + if plan.Spec.PostCompleteDelay != nil { + delay = plan.Spec.PostCompleteDelay.Duration + } + // if the job has not been completed for the configured delay, re-enqueue + // it for processing once the delay has elapsed. + // the job's TTLSecondsAfterFinished is guaranteed to be set to a larger value + // than the plan's requested delay. + if interval := time.Now().Sub(completeTime); interval < delay { + logrus.Debugf("Enqueing sync of Job %s/%s in %v", obj.Namespace, obj.Name, delay-interval) + ctl.recorder.Eventf(plan, corev1.EventTypeNormal, "JobCompleteWaiting", "Job completed on Node %s, waiting %s PostCompleteDelay", node.Name, delay) + jobs.EnqueueAfter(obj.Namespace, obj.Name, delay-interval) + } else { + ctl.recorder.Eventf(plan, corev1.EventTypeNormal, "JobComplete", "Job completed on Node %s", node.Name) + node.Labels[planLabel] = planHash + } + // mark the node as schedulable even if the delay has not elapsed, so that + // workloads can resume scheduling. if node.Spec.Unschedulable && (plan.Spec.Cordon || plan.Spec.Drain != nil) { node.Spec.Unschedulable = false } @@ -95,7 +121,7 @@ func (ctl *Controller) handleJobs(ctx context.Context) error { return obj, err } } - return obj, enqueueOrDelete(jobs, obj, upgradejob.ConditionComplete) + return obj, enqueueOrDelete(jobs, obj, completeTime) } // if the job is hasn't failed or completed but the job Node is not on the applying list, consider it running out-of-turn and delete it if i := sort.SearchStrings(plan.Status.Applying, nodeName); i == len(plan.Status.Applying) || @@ -108,12 +134,7 @@ func (ctl *Controller) handleJobs(ctx context.Context) error { return nil } -func enqueueOrDelete(jobController batchctlv1.JobController, job *batchv1.Job, done condition.Cond) error { - lastTransitionTime := done.GetLastTransitionTime(job) - if lastTransitionTime.IsZero() { - return fmt.Errorf("condition %q missing field %q", done, "LastTransitionTime") - } - +func enqueueOrDelete(jobController batchctlv1.JobController, job *batchv1.Job, lastTransitionTime time.Time) error { var ttlSecondsAfterFinished time.Duration if job.Spec.TTLSecondsAfterFinished == nil { diff --git a/pkg/upgrade/job/job.go b/pkg/upgrade/job/job.go index df5aa95e..fa17f260 100644 --- a/pkg/upgrade/job/job.go +++ b/pkg/upgrade/job/job.go @@ -5,6 +5,7 @@ import ( "slices" "strconv" "strings" + "time" "github.com/rancher/system-upgrade-controller/pkg/apis/condition" upgradeapi "github.com/rancher/system-upgrade-controller/pkg/apis/upgrade.cattle.io" @@ -133,9 +134,21 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat labelPlanName := upgradeapi.LabelPlanName(plan.Name) nodeHostname := upgradenode.Hostname(node) shortNodeName := strings.SplitN(node.Name, ".", 2)[0] + ttlSecondsAfterFinished := TTLSecondsAfterFinished + + // Ensure that the job's TTLSecondsAfterFinished is at least 1 minute longer than + // the requested post-upgrade delay, so that the controller has time to see that + // it has been completed for the requested duration. + if delay := plan.Spec.PostCompleteDelay; delay != nil { + ttlPostCompleteDelay := delay.Duration + time.Minute + ttlAfterFinished := time.Duration(ttlSecondsAfterFinished) * time.Second + if ttlAfterFinished < ttlPostCompleteDelay { + ttlSecondsAfterFinished = int32(ttlPostCompleteDelay.Seconds()) + } + } jobAnnotations := labels.Set{ - upgradeapi.AnnotationTTLSecondsAfterFinished: strconv.FormatInt(int64(TTLSecondsAfterFinished), 10), + upgradeapi.AnnotationTTLSecondsAfterFinished: strconv.FormatInt(int64(ttlSecondsAfterFinished), 10), } podAnnotations := labels.Set{} @@ -171,7 +184,7 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat Spec: batchv1.JobSpec{ PodReplacementPolicy: &PodReplacementPolicy, BackoffLimit: &BackoffLimit, - TTLSecondsAfterFinished: &TTLSecondsAfterFinished, + TTLSecondsAfterFinished: &ttlSecondsAfterFinished, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Annotations: podAnnotations, diff --git a/pkg/upgrade/plan/plan.go b/pkg/upgrade/plan/plan.go index 9dd8249c..92a2448f 100644 --- a/pkg/upgrade/plan/plan.go +++ b/pkg/upgrade/plan/plan.go @@ -35,6 +35,7 @@ var ( ErrDrainDeleteConflict = fmt.Errorf("spec.drain cannot specify both deleteEmptydirData and deleteLocalData") ErrDrainPodSelectorNotSelectable = fmt.Errorf("spec.drain.podSelector is not selectable") ErrInvalidWindow = fmt.Errorf("spec.window is invalid") + ErrInvalidDelay = fmt.Errorf("spec.postCompleteDelay is negative") PollingInterval = func(defaultValue time.Duration) time.Duration { if str, ok := os.LookupEnv("SYSTEM_UPGRADE_PLAN_POLLING_INTERVAL"); ok { @@ -257,5 +258,8 @@ func Validate(plan *upgradeapiv1.Plan) error { return ErrInvalidWindow } } + if delay := plan.Spec.PostCompleteDelay; delay != nil && delay.Duration < 0 { + return ErrInvalidDelay + } return nil } From 373ed65b1f026487e30674a53a65b522673e2557 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 4 Nov 2024 20:00:51 +0000 Subject: [PATCH 4/4] Update e2e for new features Signed-off-by: Brad Davidson --- e2e/framework/controller/deployment.go | 7 +- e2e/suite/job_generate_test.go | 151 ++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 2 deletions(-) diff --git a/e2e/framework/controller/deployment.go b/e2e/framework/controller/deployment.go index ae4d6a88..8c5bb803 100644 --- a/e2e/framework/controller/deployment.go +++ b/e2e/framework/controller/deployment.go @@ -19,7 +19,9 @@ type DeploymentOption func(*appsv1.Deployment) func NewDeployment(name string, opt ...DeploymentOption) *appsv1.Deployment { labels := map[string]string{ - upgradeapi.LabelController: name, + upgradeapi.LabelController: name, + "app.kubernetes.io/name": name, + "app.kubernetes.io/component": "controller", } securityContext := &corev1.SecurityContext{ AllowPrivilegeEscalation: pointer.Bool(false), @@ -46,6 +48,9 @@ func NewDeployment(name string, opt ...DeploymentOption) *appsv1.Deployment { container.Env = []corev1.EnvVar{{ Name: "SYSTEM_UPGRADE_CONTROLLER_NAME", Value: name, + }, { + Name: "SYSTEM_UPGRADE_CONTROLLER_LEADER_ELECT", + Value: "true", }, { Name: "SYSTEM_UPGRADE_CONTROLLER_NAMESPACE", ValueFrom: &corev1.EnvVarSource{ diff --git a/e2e/suite/job_generate_test.go b/e2e/suite/job_generate_test.go index 9516cdd2..008d06ce 100644 --- a/e2e/suite/job_generate_test.go +++ b/e2e/suite/job_generate_test.go @@ -114,7 +114,7 @@ var _ = Describe("Job Generation", func() { plan, err = e2e.WaitForPlanCondition(plan.Name, upgradeapiv1.PlanSpecValidated, 30*time.Second) Expect(err).ToNot(HaveOccurred()) Expect(upgradeapiv1.PlanSpecValidated.IsTrue(plan)).To(BeFalse()) - Expect(upgradeapiv1.PlanSpecValidated.GetMessage(plan)).To(ContainSubstring("cannot specify both deleteEmptydirData and deleteLocalData")) + Expect(upgradeapiv1.PlanSpecValidated.GetMessage(plan)).To(ContainSubstring("spec.drain cannot specify both deleteEmptydirData and deleteLocalData")) plan.Spec.Drain.DeleteLocalData = nil plan, err = e2e.UpdatePlan(plan) @@ -162,6 +162,155 @@ var _ = Describe("Job Generation", func() { }) }) + When("fails because of invalid time window", func() { + var ( + err error + plan *upgradeapiv1.Plan + jobs []batchv1.Job + ) + BeforeEach(func() { + plan = e2e.NewPlan("fail-window-", "library/alpine:3.18", []string{"sh", "-c"}, "exit 0") + plan.Spec.Version = "latest" + plan.Spec.Concurrency = 1 + plan.Spec.ServiceAccountName = e2e.Namespace.Name + plan.Spec.Window = &upgradeapiv1.TimeWindowSpec{ + Days: []string{"never"}, + StartTime: "00:00:00", + EndTime: "23:59:59", + TimeZone: "UTC", + } + plan.Spec.NodeSelector = &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "node-role.kubernetes.io/control-plane", + Operator: metav1.LabelSelectorOpDoesNotExist, + }}, + } + plan, err = e2e.CreatePlan(plan) + Expect(err).ToNot(HaveOccurred()) + + plan, err = e2e.WaitForPlanCondition(plan.Name, upgradeapiv1.PlanSpecValidated, 30*time.Second) + Expect(err).ToNot(HaveOccurred()) + Expect(upgradeapiv1.PlanSpecValidated.IsTrue(plan)).To(BeFalse()) + Expect(upgradeapiv1.PlanSpecValidated.GetMessage(plan)).To(ContainSubstring("spec.window is invalid")) + + plan.Spec.Window.Days = []string{"su", "mo", "tu", "we", "th", "fr", "sa"} + plan, err = e2e.UpdatePlan(plan) + Expect(err).ToNot(HaveOccurred()) + + plan, err = e2e.WaitForPlanCondition(plan.Name, upgradeapiv1.PlanSpecValidated, 30*time.Second) + Expect(err).ToNot(HaveOccurred()) + Expect(upgradeapiv1.PlanSpecValidated.IsTrue(plan)).To(BeTrue()) + + jobs, err = e2e.WaitForPlanJobs(plan, 1, 120*time.Second) + Expect(err).ToNot(HaveOccurred()) + Expect(jobs).To(HaveLen(1)) + }) + It("should apply successfully after edit", func() { + Expect(jobs).To(HaveLen(1)) + Expect(jobs[0].Status.Succeeded).To(BeNumerically("==", 1)) + Expect(jobs[0].Status.Active).To(BeNumerically("==", 0)) + Expect(jobs[0].Status.Failed).To(BeNumerically("==", 0)) + Expect(jobs[0].Spec.Template.Spec.InitContainers).To(HaveLen(1)) + Expect(jobs[0].Spec.Template.Spec.InitContainers[0].Args).To(ContainElement(ContainSubstring("!upgrade.cattle.io/controller"))) + Expect(jobs[0].Spec.Template.Spec.InitContainers[0].Args).To(ContainElement(ContainSubstring("component notin (sonobuoy)"))) + }) + AfterEach(func() { + if CurrentSpecReport().Failed() { + podList, _ := e2e.ClientSet.CoreV1().Pods(e2e.Namespace.Name).List(context.Background(), metav1.ListOptions{}) + for _, pod := range podList.Items { + containerNames := []string{} + for _, container := range pod.Spec.InitContainers { + containerNames = append(containerNames, container.Name) + } + for _, container := range pod.Spec.Containers { + containerNames = append(containerNames, container.Name) + } + for _, container := range containerNames { + reportName := fmt.Sprintf("podlogs-%s-%s", pod.Name, container) + logs := e2e.ClientSet.CoreV1().Pods(e2e.Namespace.Name).GetLogs(pod.Name, &v1.PodLogOptions{Container: container}) + if logStreamer, err := logs.Stream(context.Background()); err == nil { + if podLogs, err := io.ReadAll(logStreamer); err == nil { + AddReportEntry(reportName, string(podLogs)) + } + } + } + } + } + }) + }) + + When("fails because of invalid post complete delay", func() { + var ( + err error + plan *upgradeapiv1.Plan + jobs []batchv1.Job + ) + BeforeEach(func() { + plan = e2e.NewPlan("fail-post-complete-delay-", "library/alpine:3.18", []string{"sh", "-c"}, "exit 0") + plan.Spec.Version = "latest" + plan.Spec.Concurrency = 1 + plan.Spec.ServiceAccountName = e2e.Namespace.Name + plan.Spec.PostCompleteDelay = &metav1.Duration{Duration: -30 * time.Second} + plan.Spec.NodeSelector = &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "node-role.kubernetes.io/control-plane", + Operator: metav1.LabelSelectorOpDoesNotExist, + }}, + } + plan, err = e2e.CreatePlan(plan) + Expect(err).ToNot(HaveOccurred()) + + plan, err = e2e.WaitForPlanCondition(plan.Name, upgradeapiv1.PlanSpecValidated, 30*time.Second) + Expect(err).ToNot(HaveOccurred()) + Expect(upgradeapiv1.PlanSpecValidated.IsTrue(plan)).To(BeFalse()) + Expect(upgradeapiv1.PlanSpecValidated.GetMessage(plan)).To(ContainSubstring("spec.postCompleteDelay is negative")) + + plan.Spec.PostCompleteDelay.Duration = time.Second + plan, err = e2e.UpdatePlan(plan) + Expect(err).ToNot(HaveOccurred()) + + plan, err = e2e.WaitForPlanCondition(plan.Name, upgradeapiv1.PlanSpecValidated, 30*time.Second) + Expect(err).ToNot(HaveOccurred()) + Expect(upgradeapiv1.PlanSpecValidated.IsTrue(plan)).To(BeTrue()) + + jobs, err = e2e.WaitForPlanJobs(plan, 1, 120*time.Second) + Expect(err).ToNot(HaveOccurred()) + Expect(jobs).To(HaveLen(1)) + }) + It("should apply successfully after edit", func() { + Expect(jobs).To(HaveLen(1)) + Expect(jobs[0].Status.Succeeded).To(BeNumerically("==", 1)) + Expect(jobs[0].Status.Active).To(BeNumerically("==", 0)) + Expect(jobs[0].Status.Failed).To(BeNumerically("==", 0)) + Expect(jobs[0].Spec.Template.Spec.InitContainers).To(HaveLen(1)) + Expect(jobs[0].Spec.Template.Spec.InitContainers[0].Args).To(ContainElement(ContainSubstring("!upgrade.cattle.io/controller"))) + Expect(jobs[0].Spec.Template.Spec.InitContainers[0].Args).To(ContainElement(ContainSubstring("component notin (sonobuoy)"))) + }) + AfterEach(func() { + if CurrentSpecReport().Failed() { + podList, _ := e2e.ClientSet.CoreV1().Pods(e2e.Namespace.Name).List(context.Background(), metav1.ListOptions{}) + for _, pod := range podList.Items { + containerNames := []string{} + for _, container := range pod.Spec.InitContainers { + containerNames = append(containerNames, container.Name) + } + for _, container := range pod.Spec.Containers { + containerNames = append(containerNames, container.Name) + } + for _, container := range containerNames { + reportName := fmt.Sprintf("podlogs-%s-%s", pod.Name, container) + logs := e2e.ClientSet.CoreV1().Pods(e2e.Namespace.Name).GetLogs(pod.Name, &v1.PodLogOptions{Container: container}) + if logStreamer, err := logs.Stream(context.Background()); err == nil { + if podLogs, err := io.ReadAll(logStreamer); err == nil { + AddReportEntry(reportName, string(podLogs)) + } + } + } + } + } + }) + }) + When("updated secret should not change hash", func() { var ( err error