@@ -743,7 +743,7 @@ func (a *StaticAutoscaler) removeOldUnregisteredNodes(allUnregisteredNodes []clu
743
743
csr * clusterstate.ClusterStateRegistry , currentTime time.Time , logRecorder * utils.LogEventRecorder ) (bool , error ) {
744
744
745
745
nodeGroups := a .nodeGroupsById ()
746
- nodesToBeDeletedByNodeGroupId := make (map [string ][]clusterstate.UnregisteredNode )
746
+ nodesToDeleteByNodeGroupId := make (map [string ][]clusterstate.UnregisteredNode )
747
747
for _ , unregisteredNode := range allUnregisteredNodes {
748
748
nodeGroup , err := a .CloudProvider .NodeGroupForNode (unregisteredNode .Node )
749
749
if err != nil {
@@ -762,12 +762,12 @@ func (a *StaticAutoscaler) removeOldUnregisteredNodes(allUnregisteredNodes []clu
762
762
763
763
if unregisteredNode .UnregisteredSince .Add (maxNodeProvisionTime ).Before (currentTime ) {
764
764
klog .V (0 ).Infof ("Marking unregistered node %v for removal" , unregisteredNode .Node .Name )
765
- nodesToBeDeletedByNodeGroupId [nodeGroup .Id ()] = append (nodesToBeDeletedByNodeGroupId [nodeGroup .Id ()], unregisteredNode )
765
+ nodesToDeleteByNodeGroupId [nodeGroup .Id ()] = append (nodesToDeleteByNodeGroupId [nodeGroup .Id ()], unregisteredNode )
766
766
}
767
767
}
768
768
769
769
removedAny := false
770
- for nodeGroupId , unregisteredNodesToDelete := range nodesToBeDeletedByNodeGroupId {
770
+ for nodeGroupId , unregisteredNodesToDelete := range nodesToDeleteByNodeGroupId {
771
771
nodeGroup := nodeGroups [nodeGroupId ]
772
772
773
773
klog .V (0 ).Infof ("Removing %v unregistered nodes for node group %v" , len (unregisteredNodesToDelete ), nodeGroupId )
@@ -787,21 +787,11 @@ func (a *StaticAutoscaler) removeOldUnregisteredNodes(allUnregisteredNodes []clu
787
787
}
788
788
nodesToDelete := toNodes (unregisteredNodesToDelete )
789
789
790
- opts , err := nodeGroup . GetOptions (a .NodeGroupDefaults )
791
- if err != nil && err != cloudprovider . ErrNotImplemented {
792
- klog .Warningf ("Failed to get node group options for %s: %s " , nodeGroupId , err )
790
+ nodesToDelete , err = overrideNodesToDeleteForZeroOrMax (a .NodeGroupDefaults , nodeGroup , nodesToDelete )
791
+ if err != nil {
792
+ klog .Warningf ("Failed to remove unregistered nodes from node group %s: %v " , nodeGroupId , err )
793
793
continue
794
794
}
795
- // If a scale-up of "ZeroOrMaxNodeScaling" node group failed, the cleanup
796
- // should stick to the all-or-nothing principle. Deleting all nodes.
797
- if opts != nil && opts .ZeroOrMaxNodeScaling {
798
- instances , err := nodeGroup .Nodes ()
799
- if err != nil {
800
- klog .Warningf ("Failed to fill in unregistered nodes from group %s based on ZeroOrMaxNodeScaling option: %s" , nodeGroupId , err )
801
- continue
802
- }
803
- nodesToDelete = instancesToFakeNodes (instances )
804
- }
805
795
806
796
err = nodeGroup .DeleteNodes (nodesToDelete )
807
797
csr .InvalidateNodeInstancesCacheEntry (nodeGroup )
@@ -835,35 +825,19 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() bool {
835
825
// We always schedule deleting of incoming errornous nodes
836
826
// TODO[lukaszos] Consider adding logic to not retry delete every loop iteration
837
827
nodeGroups := a .nodeGroupsById ()
838
- nodesToBeDeletedByNodeGroupId := a .clusterStateRegistry .GetCreatedNodesWithErrors ()
828
+ nodesToDeleteByNodeGroupId := a .clusterStateRegistry .GetCreatedNodesWithErrors ()
839
829
840
830
deletedAny := false
841
831
842
- for nodeGroupId , nodesToBeDeleted := range nodesToBeDeletedByNodeGroupId {
832
+ for nodeGroupId , nodesToDelete := range nodesToDeleteByNodeGroupId {
843
833
var err error
844
- klog .V (1 ).Infof ("Deleting %v from %v node group because of create errors" , len (nodesToBeDeleted ), nodeGroupId )
834
+ klog .V (1 ).Infof ("Deleting %v from %v node group because of create errors" , len (nodesToDelete ), nodeGroupId )
845
835
846
836
nodeGroup := nodeGroups [nodeGroupId ]
847
837
if nodeGroup == nil {
848
838
err = fmt .Errorf ("node group %s not found" , nodeGroupId )
849
- } else {
850
- var opts * config.NodeGroupAutoscalingOptions
851
- opts , err = nodeGroup .GetOptions (a .NodeGroupDefaults )
852
- if err != nil && err != cloudprovider .ErrNotImplemented {
853
- klog .Warningf ("Failed to get node group options for %s: %s" , nodeGroupId , err )
854
- continue
855
- }
856
- // If a scale-up of "ZeroOrMaxNodeScaling" node group failed, the cleanup
857
- // should stick to the all-or-nothing principle. Deleting all nodes.
858
- if opts != nil && opts .ZeroOrMaxNodeScaling {
859
- instances , err := nodeGroup .Nodes ()
860
- if err != nil {
861
- klog .Warningf ("Failed to fill in failed nodes from group %s based on ZeroOrMaxNodeScaling option: %s" , nodeGroupId , err )
862
- continue
863
- }
864
- nodesToBeDeleted = instancesToFakeNodes (instances )
865
- }
866
- err = nodeGroup .DeleteNodes (nodesToBeDeleted )
839
+ } else if nodesToDelete , err = overrideNodesToDeleteForZeroOrMax (a .NodeGroupDefaults , nodeGroup , nodesToDelete ); err == nil {
840
+ err = nodeGroup .DeleteNodes (nodesToDelete )
867
841
}
868
842
869
843
if err != nil {
@@ -877,6 +851,27 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() bool {
877
851
return deletedAny
878
852
}
879
853
854
+ // overrideNodesToDeleteForZeroOrMax returns a list of nodes to delete, taking into account that
855
+ // node deletion for a "ZeroOrMaxNodeScaling" node group is atomic and should delete all nodes.
856
+ // For a non-"ZeroOrMaxNodeScaling" node group it returns the unchanged list of nodes to delete.
857
+ func overrideNodesToDeleteForZeroOrMax (defaults config.NodeGroupAutoscalingOptions , nodeGroup cloudprovider.NodeGroup , nodesToDelete []* apiv1.Node ) ([]* apiv1.Node , error ) {
858
+ opts , err := nodeGroup .GetOptions (defaults )
859
+ if err != nil && err != cloudprovider .ErrNotImplemented {
860
+ return []* apiv1.Node {}, fmt .Errorf ("Failed to get node group options for %s: %s" , nodeGroup .Id (), err )
861
+ }
862
+ // If a scale-up of "ZeroOrMaxNodeScaling" node group failed, the cleanup
863
+ // should stick to the all-or-nothing principle. Deleting all nodes.
864
+ if opts != nil && opts .ZeroOrMaxNodeScaling {
865
+ instances , err := nodeGroup .Nodes ()
866
+ if err != nil {
867
+ return []* apiv1.Node {}, fmt .Errorf ("Failed to fill in nodes to delete from group %s based on ZeroOrMaxNodeScaling option: %s" , nodeGroup .Id (), err )
868
+ }
869
+ return instancesToFakeNodes (instances ), nil
870
+ }
871
+ // No override needed.
872
+ return nodesToDelete , nil
873
+ }
874
+
880
875
// instancesToNodes returns a list of fake nodes with just names populated,
881
876
// so that they can be passed as nodes to delete
882
877
func instancesToFakeNodes (instances []cloudprovider.Instance ) []* apiv1.Node {
0 commit comments