Skip to content

Commit f9d508e

Browse files
Add upcoming node groups state checker
1 parent 6b0f412 commit f9d508e

File tree

16 files changed

+311
-107
lines changed

16 files changed

+311
-107
lines changed

cluster-autoscaler/cloudprovider/test/test_cloud_provider.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -216,15 +216,14 @@ func (tcp *TestCloudProvider) NewNodeGroup(machineType string, labels map[string
216216

217217
// NewNodeGroupWithId creates a new node group with custom ID suffix.
218218
func (tcp *TestCloudProvider) NewNodeGroupWithId(machineType string, labels map[string]string, systemLabels map[string]string,
219-
taints []apiv1.Taint, extraResources map[string]resource.Quantity, upcoming bool, id string) (cloudprovider.NodeGroup, error) {
219+
taints []apiv1.Taint, extraResources map[string]resource.Quantity, id string) (cloudprovider.NodeGroup, error) {
220220
return &TestNodeGroup{
221221
cloudProvider: tcp,
222222
id: "autoprovisioned-" + machineType + "-" + id,
223223
minSize: 0,
224224
maxSize: 1000,
225225
targetSize: 0,
226226
exist: false,
227-
upcoming: upcoming,
228227
autoprovisioned: true,
229228
machineType: machineType,
230229
labels: labels,
@@ -256,15 +255,14 @@ func (tcp *TestCloudProvider) BuildNodeGroup(id string, min, max, size int, auto
256255
}
257256

258257
// BuildUpcomingNodeGroup returns an upcoming test node group.
259-
func (tcp *TestCloudProvider) BuildUpcomingNodeGroup(id string, min, max, size int, autoprovisioned bool, machineType string, opts *config.NodeGroupAutoscalingOptions) *TestNodeGroup {
258+
func (tcp *TestCloudProvider) BuildUpcomingNodeGroup(id string, min, max, size int, autoprovisioned bool, machineType string, opts *config.NodeGroupAutoscalingOptions, exists bool) *TestNodeGroup {
260259
return &TestNodeGroup{
261260
cloudProvider: tcp,
262261
id: id,
263262
minSize: min,
264263
maxSize: max,
265264
targetSize: size,
266-
exist: false,
267-
upcoming: true,
265+
exist: exists,
268266
autoprovisioned: autoprovisioned,
269267
machineType: machineType,
270268
opts: opts,
@@ -278,8 +276,8 @@ func (tcp *TestCloudProvider) AddNodeGroup(id string, min int, max int, size int
278276
}
279277

280278
// AddUpcomingNodeGroup adds upcoming node group to test cloud provider.
281-
func (tcp *TestCloudProvider) AddUpcomingNodeGroup(id string, min int, max int, size int) {
282-
nodeGroup := tcp.BuildUpcomingNodeGroup(id, min, max, size, false, "", nil)
279+
func (tcp *TestCloudProvider) AddUpcomingNodeGroup(id string, min int, max int, size int, exists bool) {
280+
nodeGroup := tcp.BuildUpcomingNodeGroup(id, min, max, size, false, "", nil, exists)
283281
tcp.InsertNodeGroup(nodeGroup)
284282
}
285283

cluster-autoscaler/clusterstate/clusterstate.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils"
3030
"k8s.io/autoscaler/cluster-autoscaler/metrics"
3131
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig"
32+
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups"
3233
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
3334
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
3435
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
@@ -141,6 +142,7 @@ type ClusterStateRegistry struct {
141142
cloudProviderNodeInstancesCache *utils.CloudProviderNodeInstancesCache
142143
interrupt chan struct{}
143144
nodeGroupConfigProcessor nodegroupconfig.NodeGroupConfigProcessor
145+
asyncNodeGroupStateChecker asyncnodegroups.AsyncNodeGroupStateChecker
144146

145147
// scaleUpFailures contains information about scale-up failures for each node group. It should be
146148
// cleared periodically to avoid unnecessary accumulation.
@@ -155,7 +157,7 @@ type NodeGroupScalingSafety struct {
155157
}
156158

157159
// NewClusterStateRegistry creates new ClusterStateRegistry.
158-
func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config ClusterStateRegistryConfig, logRecorder *utils.LogEventRecorder, backoff backoff.Backoff, nodeGroupConfigProcessor nodegroupconfig.NodeGroupConfigProcessor) *ClusterStateRegistry {
160+
func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config ClusterStateRegistryConfig, logRecorder *utils.LogEventRecorder, backoff backoff.Backoff, nodeGroupConfigProcessor nodegroupconfig.NodeGroupConfigProcessor, asyncNodeGroupStateChecker asyncnodegroups.AsyncNodeGroupStateChecker) *ClusterStateRegistry {
159161
return &ClusterStateRegistry{
160162
scaleUpRequests: make(map[string]*ScaleUpRequest),
161163
scaleDownRequests: make([]*ScaleDownRequest, 0),
@@ -175,6 +177,7 @@ func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config C
175177
interrupt: make(chan struct{}),
176178
scaleUpFailures: make(map[string][]ScaleUpFailure),
177179
nodeGroupConfigProcessor: nodeGroupConfigProcessor,
180+
asyncNodeGroupStateChecker: asyncNodeGroupStateChecker,
178181
}
179182
}
180183

@@ -684,7 +687,7 @@ func (csr *ClusterStateRegistry) updateIncorrectNodeGroupSizes(currentTime time.
684687
klog.Warningf("Acceptable range for node group %s not found", nodeGroup.Id())
685688
continue
686689
}
687-
if nodeGroup.IsUpcoming() {
690+
if csr.asyncNodeGroupStateChecker.IsUpcoming(nodeGroup) {
688691
// Nodes for upcoming node groups reside in-memory and wait for node group to be fully
689692
// created. There is no need to mark their sizes incorrect.
690693
continue
@@ -986,7 +989,7 @@ func (csr *ClusterStateRegistry) GetUpcomingNodes() (upcomingCounts map[string]i
986989
registeredNodeNames = map[string][]string{}
987990
for _, nodeGroup := range csr.cloudProvider.NodeGroups() {
988991
id := nodeGroup.Id()
989-
if nodeGroup.IsUpcoming() {
992+
if csr.asyncNodeGroupStateChecker.IsUpcoming(nodeGroup) {
990993
size, err := nodeGroup.TargetSize()
991994
if size >= 0 || err != nil {
992995
upcomingCounts[id] = size

0 commit comments

Comments
 (0)