Skip to content

Commit 6d32a56

Browse files
authored
fix: not failing the main loop when one NodeGroup fails on TemplateNodeInfo() (#8402)
* fix: not failing the main loop when one NodeGroup fails on TemplateNodeInfo() * test: add a unit test
1 parent 2289138 commit 6d32a56

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,10 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext,
140140
// working nodes in the node groups. By default CA tries to use a real-world example.
141141
nodeInfo, err := simulator.SanitizedTemplateNodeInfoFromNodeGroup(nodeGroup, daemonsets, taintConfig)
142142
if err != nil {
143-
if errors.Is(err, cloudprovider.ErrNotImplemented) {
144-
continue
145-
} else {
143+
if !errors.Is(err, cloudprovider.ErrNotImplemented) {
146144
klog.Errorf("Unable to build proper template node for %s: %v", id, err)
147-
return map[string]*framework.NodeInfo{}, caerror.ToAutoscalerError(caerror.CloudProviderError, err)
148145
}
146+
continue
149147
}
150148
result[id] = nodeInfo
151149
}

cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,35 @@ func TestGetNodeInfosCacheExpired(t *testing.T) {
302302

303303
}
304304

305+
func TestProcessHandlesTemplateNodeInfoErrors(t *testing.T) {
306+
now := time.Now()
307+
308+
tn := BuildTestNode("tn", 1000, 1000)
309+
tni := framework.NewTestNodeInfo(tn)
310+
311+
provider := testprovider.NewTestCloudProviderBuilder().WithMachineTemplates(
312+
map[string]*framework.NodeInfo{"ng2": tni}).Build()
313+
314+
provider.AddNodeGroup("ng1", 0, 10, 0)
315+
provider.AddNodeGroup("ng2", 0, 10, 0)
316+
317+
ctx := context.AutoscalingContext{
318+
CloudProvider: provider,
319+
ClusterSnapshot: testsnapshot.NewTestSnapshotOrDie(t),
320+
}
321+
322+
res, err := NewMixedTemplateNodeInfoProvider(&cacheTtl, false).Process(&ctx, []*apiv1.Node{}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
323+
324+
// Should not fail despite ng1 error - continues processing
325+
assert.NoError(t, err)
326+
assert.Equal(t, 1, len(res))
327+
328+
_, found := res["ng2"]
329+
assert.True(t, found)
330+
_, found = res["ng1"]
331+
assert.False(t, found) // ng1 skipped due to template error
332+
}
333+
305334
func assertEqualNodeCapacities(t *testing.T, expected, actual *apiv1.Node) {
306335
t.Helper()
307336
assert.NotEqual(t, actual.Status, nil, "")

0 commit comments

Comments
 (0)