Skip to content

Commit f346511

Browse files
authored
Merge pull request #8474 from MaximilianoUribe/muribefalcon/cherry-pick-force-delete-failed-instance
chore: cherry-pick forceDeleteFailedNodes into release branch 1.33
2 parents 0fafd9e + 26c59e8 commit f346511

File tree

4 files changed

+374
-366
lines changed

4 files changed

+374
-366
lines changed

cluster-autoscaler/config/autoscaling_options.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ type AutoscalingOptions struct {
305305
CheckCapacityProvisioningRequestBatchTimebox time.Duration
306306
// ForceDeleteLongUnregisteredNodes is used to enable/disable ignoring min size constraints during removal of long unregistered nodes
307307
ForceDeleteLongUnregisteredNodes bool
308+
// ForceDeleteFailedNodes is used to enable/disable ignoring min size constraints during removal of failed nodes
309+
ForceDeleteFailedNodes bool
308310
// DynamicResourceAllocationEnabled configures whether logic for handling DRA objects is enabled.
309311
DynamicResourceAllocationEnabled bool
310312
// ClusterSnapshotParallelism is the maximum parallelism of cluster snapshot creation.

cluster-autoscaler/config/flags/flags.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ var (
223223
checkCapacityProvisioningRequestMaxBatchSize = flag.Int("check-capacity-provisioning-request-max-batch-size", 10, "Maximum number of provisioning requests to process in a single batch.")
224224
checkCapacityProvisioningRequestBatchTimebox = flag.Duration("check-capacity-provisioning-request-batch-timebox", 10*time.Second, "Maximum time to process a batch of provisioning requests.")
225225
forceDeleteLongUnregisteredNodes = flag.Bool("force-delete-unregistered-nodes", false, "Whether to enable force deletion of long unregistered nodes, regardless of the min size of the node group the belong to.")
226+
forceDeleteFailedNodes = flag.Bool("force-delete-failed-nodes", false, "Whether to enable force deletion of failed nodes, regardless of the min size of the node group the belong to.")
226227
enableDynamicResourceAllocation = flag.Bool("enable-dynamic-resource-allocation", false, "Whether logic for handling DRA (Dynamic Resource Allocation) objects is enabled.")
227228
clusterSnapshotParallelism = flag.Int("cluster-snapshot-parallelism", 16, "Maximum parallelism of cluster snapshot creation.")
228229
checkCapacityProcessorInstance = flag.String("check-capacity-processor-instance", "", "Name of the processor instance. Only ProvisioningRequests that define this name in their parameters with the key \"processorInstance\" will be processed by this CA instance. It only refers to check capacity ProvisioningRequests, but if not empty, best-effort atomic ProvisioningRequests processing is disabled in this instance. Not recommended: Until CA 1.35, ProvisioningRequests with this name as prefix in their class will be also processed.")
@@ -391,6 +392,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
391392
CheckCapacityProvisioningRequestMaxBatchSize: *checkCapacityProvisioningRequestMaxBatchSize,
392393
CheckCapacityProvisioningRequestBatchTimebox: *checkCapacityProvisioningRequestBatchTimebox,
393394
ForceDeleteLongUnregisteredNodes: *forceDeleteLongUnregisteredNodes,
395+
ForceDeleteFailedNodes: *forceDeleteFailedNodes,
394396
DynamicResourceAllocationEnabled: *enableDynamicResourceAllocation,
395397
ClusterSnapshotParallelism: *clusterSnapshotParallelism,
396398
CheckCapacityProcessorInstance: *checkCapacityProcessorInstance,

cluster-autoscaler/core/static_autoscaler.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -879,8 +879,15 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
879879
nodeGroup := nodeGroups[nodeGroupId]
880880
if nodeGroup == nil {
881881
err = fmt.Errorf("node group %s not found", nodeGroupId)
882-
} else if nodesToDelete, err = overrideNodesToDeleteForZeroOrMax(a.NodeGroupDefaults, nodeGroup, nodesToDelete); err == nil {
883-
err = nodeGroup.DeleteNodes(nodesToDelete)
882+
} else if nodesToDelete, err = overrideNodesToDeleteForZeroOrMax(a.NodeGroupDefaults, nodeGroup, nodesToDelete); err == nil && len(nodesToDelete) > 0 {
883+
if a.ForceDeleteFailedNodes {
884+
err = nodeGroup.ForceDeleteNodes(nodesToDelete)
885+
if errors.Is(err, cloudprovider.ErrNotImplemented) {
886+
err = nodeGroup.DeleteNodes(nodesToDelete)
887+
}
888+
} else {
889+
err = nodeGroup.DeleteNodes(nodesToDelete)
890+
}
884891
}
885892

886893
if err != nil {

0 commit comments

Comments
 (0)