Skip to content

Commit 256eb8c

Browse files
committed
Merge branch 'master' into utho-autoscaler
2 parents 395afaa + 24ba0e7 commit 256eb8c

File tree

13 files changed

+279
-68
lines changed

13 files changed

+279
-68
lines changed

charts/cluster-autoscaler/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ name: cluster-autoscaler
1111
sources:
1212
- https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
1313
type: application
14-
version: 9.48.0
14+
version: 9.49.0

charts/cluster-autoscaler/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ vpa:
463463
| containerSecurityContext | object | `{}` | [Security context for container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) |
464464
| customArgs | list | `[]` | Additional custom container arguments. Refer to https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-are-the-parameters-to-ca for the full list of cluster autoscaler parameters and their default values. List of arguments as strings. |
465465
| deployment.annotations | object | `{}` | Annotations to add to the Deployment object. |
466+
| dnsConfig | object | `{}` | [Pod's DNS Config](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config) |
466467
| dnsPolicy | string | `"ClusterFirst"` | Defaults to `ClusterFirst`. Valid values are: `ClusterFirstWithHostNet`, `ClusterFirst`, `Default` or `None`. If autoscaler does not depend on cluster DNS, recommended to set this to `Default`. |
467468
| envFromConfigMap | string | `""` | ConfigMap name to use as envFrom. |
468469
| envFromSecret | string | `""` | Secret name to use as envFrom. |

charts/cluster-autoscaler/templates/deployment.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ spec:
4040
{{- if .Values.priorityClassName }}
4141
priorityClassName: "{{ .Values.priorityClassName }}"
4242
{{- end }}
43+
{{- with .Values.dnsConfig }}
44+
dnsConfig:
45+
{{- toYaml . | nindent 8 }}
46+
{{- end }}
4347
{{- if .Values.dnsPolicy }}
4448
dnsPolicy: "{{ .Values.dnsPolicy }}"
4549
{{- end }}

charts/cluster-autoscaler/values.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,18 @@ deployment:
170170
# deployment.annotations -- Annotations to add to the Deployment object.
171171
annotations: {}
172172

173+
# dnsConfig -- [Pod's DNS Config](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config)
174+
dnsConfig: {}
175+
# nameservers:
176+
# - 1.2.3.4
177+
# searches:
178+
# - ns1.svc.cluster-domain.example
179+
# - my.dns.search.suffix
180+
# options:
181+
# - name: ndots
182+
# value: "2"
183+
# - name: edns0
184+
173185
# dnsPolicy -- Defaults to `ClusterFirst`. Valid values are:
174186
# `ClusterFirstWithHostNet`, `ClusterFirst`, `Default` or `None`.
175187
# If autoscaler does not depend on cluster DNS, recommended to set this to `Default`.

cluster-autoscaler/config/autoscaling_options.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ type NodeGroupAutoscalingOptions struct {
5252
MaxNodeProvisionTime time.Duration
5353
// ZeroOrMaxNodeScaling means that a node group should be scaled up to maximum size or down to zero nodes all at once instead of one-by-one.
5454
ZeroOrMaxNodeScaling bool
55+
// AllowNonAtomicScaleUpToMax indicates that partially failing scale-ups of ZeroOrMaxNodeScaling node groups should not be cancelled
56+
AllowNonAtomicScaleUpToMax bool
5557
// IgnoreDaemonSetsUtilization sets if daemonsets utilization should be considered during node scale-down
5658
IgnoreDaemonSetsUtilization bool
5759
}

cluster-autoscaler/core/autoscaler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ type Autoscaler interface {
7878
ExitCleanUp()
7979
// LastScaleUpTime is a time of the last scale up
8080
LastScaleUpTime() time.Time
81-
// LastScaleUpTime is a time of the last scale down
81+
// LastScaleDownDeleteTime is a time of the last scale down
8282
LastScaleDownDeleteTime() time.Time
8383
}
8484

cluster-autoscaler/core/static_autoscaler.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,10 @@ func (a *StaticAutoscaler) removeOldUnregisteredNodes(allUnregisteredNodes []clu
801801
continue
802802
}
803803

804+
if len(nodesToDelete) == 0 {
805+
continue
806+
}
807+
804808
if a.ForceDeleteLongUnregisteredNodes {
805809
err = nodeGroup.ForceDeleteNodes(nodesToDelete)
806810
if err == cloudprovider.ErrNotImplemented {
@@ -880,12 +884,14 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
880884
if nodeGroup == nil {
881885
err = fmt.Errorf("node group %s not found", nodeGroupId)
882886
} else if nodesToDelete, err = overrideNodesToDeleteForZeroOrMax(a.NodeGroupDefaults, nodeGroup, nodesToDelete); err == nil {
883-
err = nodeGroup.DeleteNodes(nodesToDelete)
887+
if len(nodesToDelete) > 0 {
888+
err = nodeGroup.DeleteNodes(nodesToDelete)
889+
}
884890
}
885891

886892
if err != nil {
887893
klog.Warningf("Error while trying to delete nodes from %v: %v", nodeGroupId, err)
888-
} else {
894+
} else if len(nodesToDelete) > 0 {
889895
deletedAny = true
890896
a.clusterStateRegistry.InvalidateNodeInstancesCacheEntry(nodeGroup)
891897
}
@@ -898,21 +904,29 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
898904
}
899905

900906
// overrideNodesToDeleteForZeroOrMax returns a list of nodes to delete, taking into account that
901-
// node deletion for a "ZeroOrMaxNodeScaling" node group is atomic and should delete all nodes.
907+
// node deletion for a "ZeroOrMaxNodeScaling" should either keep or remove all the nodes.
902908
// For a non-"ZeroOrMaxNodeScaling" node group it returns the unchanged list of nodes to delete.
903909
func overrideNodesToDeleteForZeroOrMax(defaults config.NodeGroupAutoscalingOptions, nodeGroup cloudprovider.NodeGroup, nodesToDelete []*apiv1.Node) ([]*apiv1.Node, error) {
904910
opts, err := nodeGroup.GetOptions(defaults)
905911
if err != nil && err != cloudprovider.ErrNotImplemented {
906912
return []*apiv1.Node{}, fmt.Errorf("Failed to get node group options for %s: %s", nodeGroup.Id(), err)
907913
}
908914
// If a scale-up of "ZeroOrMaxNodeScaling" node group failed, the cleanup
909-
// should stick to the all-or-nothing principle. Deleting all nodes.
915+
// node deletion for a "ZeroOrMaxNodeScaling" node group is atomic and should delete all nodes or none.
910916
if opts != nil && opts.ZeroOrMaxNodeScaling {
911917
instances, err := nodeGroup.Nodes()
912918
if err != nil {
913919
return []*apiv1.Node{}, fmt.Errorf("Failed to fill in nodes to delete from group %s based on ZeroOrMaxNodeScaling option: %s", nodeGroup.Id(), err)
914920
}
915-
return instancesToFakeNodes(instances), nil
921+
922+
// Remove all nodes in case when either:
923+
// 1. All nodes are failing
924+
// 2. AllowNonAtomicScaleUpToMax is false which means we want to atomically remove partially failed node groups
925+
if len(instances) == len(nodesToDelete) || !opts.AllowNonAtomicScaleUpToMax {
926+
// Remove all nodes
927+
return instancesToFakeNodes(instances), nil
928+
}
929+
return []*apiv1.Node{}, nil
916930
}
917931
// No override needed.
918932
return nodesToDelete, nil

0 commit comments

Comments
 (0)