Skip to content

Commit efb8f9c

Browse files
Add upcoming node groups state checker
1 parent 6b0f412 commit efb8f9c

File tree

16 files changed

+380
-152
lines changed

16 files changed

+380
-152
lines changed

cluster-autoscaler/cloudprovider/test/test_cloud_provider.go

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -216,83 +216,66 @@ func (tcp *TestCloudProvider) NewNodeGroup(machineType string, labels map[string
216216

217217
// NewNodeGroupWithId creates a new node group with custom ID suffix.
218218
func (tcp *TestCloudProvider) NewNodeGroupWithId(machineType string, labels map[string]string, systemLabels map[string]string,
219-
taints []apiv1.Taint, extraResources map[string]resource.Quantity, upcoming bool, id string) (cloudprovider.NodeGroup, error) {
219+
taints []apiv1.Taint, extraResources map[string]resource.Quantity, id string) (cloudprovider.NodeGroup, error) {
220220
return &TestNodeGroup{
221221
cloudProvider: tcp,
222222
id: "autoprovisioned-" + machineType + "-" + id,
223223
minSize: 0,
224224
maxSize: 1000,
225225
targetSize: 0,
226226
exist: false,
227-
upcoming: upcoming,
228227
autoprovisioned: true,
229228
machineType: machineType,
230229
labels: labels,
231230
taints: taints,
232231
}, nil
233232
}
234233

235-
// InsertNodeGroup adds already created node group to test cloud provider.
236-
func (tcp *TestCloudProvider) InsertNodeGroup(nodeGroup cloudprovider.NodeGroup) {
237-
tcp.Lock()
238-
defer tcp.Unlock()
239-
240-
tcp.groups[nodeGroup.Id()] = nodeGroup
241-
}
242-
243234
// BuildNodeGroup returns a test node group.
244-
func (tcp *TestCloudProvider) BuildNodeGroup(id string, min, max, size int, autoprovisioned bool, machineType string, opts *config.NodeGroupAutoscalingOptions) *TestNodeGroup {
235+
func (tcp *TestCloudProvider) BuildNodeGroup(id string, min, max, size int, autoprovisioned bool, machineType string, opts *config.NodeGroupAutoscalingOptions, exists bool) *TestNodeGroup {
245236
return &TestNodeGroup{
246237
cloudProvider: tcp,
247238
id: id,
248239
minSize: min,
249240
maxSize: max,
250241
targetSize: size,
251-
exist: true,
242+
exist: exists,
252243
autoprovisioned: autoprovisioned,
253244
machineType: machineType,
254245
opts: opts,
255246
}
256247
}
257248

258-
// BuildUpcomingNodeGroup returns an upcoming test node group.
259-
func (tcp *TestCloudProvider) BuildUpcomingNodeGroup(id string, min, max, size int, autoprovisioned bool, machineType string, opts *config.NodeGroupAutoscalingOptions) *TestNodeGroup {
260-
return &TestNodeGroup{
261-
cloudProvider: tcp,
262-
id: id,
263-
minSize: min,
264-
maxSize: max,
265-
targetSize: size,
266-
exist: false,
267-
upcoming: true,
268-
autoprovisioned: autoprovisioned,
269-
machineType: machineType,
270-
opts: opts,
271-
}
249+
// InsertNodeGroup adds already created node group to test cloud provider.
250+
func (tcp *TestCloudProvider) InsertNodeGroup(nodeGroup cloudprovider.NodeGroup) {
251+
tcp.Lock()
252+
defer tcp.Unlock()
253+
254+
tcp.groups[nodeGroup.Id()] = nodeGroup
272255
}
273256

274257
// AddNodeGroup adds node group to test cloud provider.
275258
func (tcp *TestCloudProvider) AddNodeGroup(id string, min int, max int, size int) {
276-
nodeGroup := tcp.BuildNodeGroup(id, min, max, size, false, "", nil)
259+
nodeGroup := tcp.BuildNodeGroup(id, min, max, size, false, "", nil, true)
277260
tcp.InsertNodeGroup(nodeGroup)
278261
}
279262

280263
// AddUpcomingNodeGroup adds upcoming node group to test cloud provider.
281264
func (tcp *TestCloudProvider) AddUpcomingNodeGroup(id string, min int, max int, size int) {
282-
nodeGroup := tcp.BuildUpcomingNodeGroup(id, min, max, size, false, "", nil)
265+
nodeGroup := tcp.BuildNodeGroup(id, min, max, size, false, "", nil, false)
283266
tcp.InsertNodeGroup(nodeGroup)
284267
}
285268

286269
// AddNodeGroupWithCustomOptions adds node group with custom options
287270
// to test cloud provider.
288271
func (tcp *TestCloudProvider) AddNodeGroupWithCustomOptions(id string, min int, max int, size int, opts *config.NodeGroupAutoscalingOptions) {
289-
nodeGroup := tcp.BuildNodeGroup(id, min, max, size, false, "", opts)
272+
nodeGroup := tcp.BuildNodeGroup(id, min, max, size, false, "", opts, true)
290273
tcp.InsertNodeGroup(nodeGroup)
291274
}
292275

293276
// AddAutoprovisionedNodeGroup adds node group to test cloud provider.
294277
func (tcp *TestCloudProvider) AddAutoprovisionedNodeGroup(id string, min int, max int, size int, machineType string) *TestNodeGroup {
295-
nodeGroup := tcp.BuildNodeGroup(id, min, max, size, true, machineType, nil)
278+
nodeGroup := tcp.BuildNodeGroup(id, min, max, size, true, machineType, nil, true)
296279
tcp.InsertNodeGroup(nodeGroup)
297280
return nodeGroup
298281
}

cluster-autoscaler/clusterstate/clusterstate.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils"
3030
"k8s.io/autoscaler/cluster-autoscaler/metrics"
3131
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig"
32+
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups"
3233
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
3334
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
3435
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
@@ -141,6 +142,7 @@ type ClusterStateRegistry struct {
141142
cloudProviderNodeInstancesCache *utils.CloudProviderNodeInstancesCache
142143
interrupt chan struct{}
143144
nodeGroupConfigProcessor nodegroupconfig.NodeGroupConfigProcessor
145+
asyncNodeGroupStateChecker asyncnodegroups.AsyncNodeGroupStateChecker
144146

145147
// scaleUpFailures contains information about scale-up failures for each node group. It should be
146148
// cleared periodically to avoid unnecessary accumulation.
@@ -155,7 +157,7 @@ type NodeGroupScalingSafety struct {
155157
}
156158

157159
// NewClusterStateRegistry creates new ClusterStateRegistry.
158-
func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config ClusterStateRegistryConfig, logRecorder *utils.LogEventRecorder, backoff backoff.Backoff, nodeGroupConfigProcessor nodegroupconfig.NodeGroupConfigProcessor) *ClusterStateRegistry {
160+
func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config ClusterStateRegistryConfig, logRecorder *utils.LogEventRecorder, backoff backoff.Backoff, nodeGroupConfigProcessor nodegroupconfig.NodeGroupConfigProcessor, asyncNodeGroupStateChecker asyncnodegroups.AsyncNodeGroupStateChecker) *ClusterStateRegistry {
159161
return &ClusterStateRegistry{
160162
scaleUpRequests: make(map[string]*ScaleUpRequest),
161163
scaleDownRequests: make([]*ScaleDownRequest, 0),
@@ -175,6 +177,7 @@ func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config C
175177
interrupt: make(chan struct{}),
176178
scaleUpFailures: make(map[string][]ScaleUpFailure),
177179
nodeGroupConfigProcessor: nodeGroupConfigProcessor,
180+
asyncNodeGroupStateChecker: asyncNodeGroupStateChecker,
178181
}
179182
}
180183

@@ -684,7 +687,7 @@ func (csr *ClusterStateRegistry) updateIncorrectNodeGroupSizes(currentTime time.
684687
klog.Warningf("Acceptable range for node group %s not found", nodeGroup.Id())
685688
continue
686689
}
687-
if nodeGroup.IsUpcoming() {
690+
if csr.asyncNodeGroupStateChecker.IsUpcoming(nodeGroup) {
688691
// Nodes for upcoming node groups reside in-memory and wait for node group to be fully
689692
// created. There is no need to mark their sizes incorrect.
690693
continue
@@ -986,7 +989,7 @@ func (csr *ClusterStateRegistry) GetUpcomingNodes() (upcomingCounts map[string]i
986989
registeredNodeNames = map[string][]string{}
987990
for _, nodeGroup := range csr.cloudProvider.NodeGroups() {
988991
id := nodeGroup.Id()
989-
if nodeGroup.IsUpcoming() {
992+
if csr.asyncNodeGroupStateChecker.IsUpcoming(nodeGroup) {
990993
size, err := nodeGroup.TargetSize()
991994
if size >= 0 || err != nil {
992995
upcomingCounts[id] = size

0 commit comments

Comments
 (0)