Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
86796b9
feat(recommender): add OOMMinBumpUp&OOMBumpUpRatio to CRD
omerap12 Apr 7, 2025
8228c32
lint
omerap12 Apr 7, 2025
4c6bdfa
Add to test
omerap12 Apr 7, 2025
7605f81
fmt
omerap12 Apr 7, 2025
e0eeaaf
align values with defaults
omerap12 Apr 7, 2025
7ccaf49
fixed functions
omerap12 Apr 8, 2025
6787e30
Add e2e test and fixed typos
omerap12 Apr 9, 2025
0d00a1e
fmt: fixed function name
omerap12 Apr 11, 2025
85df968
merged master
omerap12 Jun 14, 2025
c2d0b5a
run generate flags
omerap12 Jun 14, 2025
d3211d6
in-progress
omerap12 Jun 18, 2025
95c22fa
in-progress
omerap12 Jun 18, 2025
53c610e
update e2e test
omerap12 Jul 26, 2025
bf5cb23
update e2e
omerap12 Jul 27, 2025
8463a5d
update e2e
omerap12 Jul 27, 2025
fad9615
migrate to quantity
omerap12 Jul 27, 2025
1679398
lint OOM -> Oom
omerap12 Jul 31, 2025
87f8bd6
fmt & fix e2e
omerap12 Jul 31, 2025
52110a7
OomBumpUpRatio->OOMBumpRatio
omerap12 Jul 31, 2025
6dbfc45
OOMBumpRatio -> OOMBumpUpRatio
omerap12 Aug 1, 2025
bdfdc9f
fixed typo
omerap12 Aug 1, 2025
58aec44
fixed comment
omerap12 Aug 4, 2025
fa913df
fixed typo
omerap12 Aug 5, 2025
7ebd1d0
fixed comment
omerap12 Aug 5, 2025
561c1b2
changed controller-gen version
omerap12 Aug 5, 2025
c04e788
fixed error message
omerap12 Aug 6, 2025
b73a5b5
Fixed feature flag default
omerap12 Aug 6, 2025
91ac55d
Minimum OOMBumpUpRatio is 1
omerap12 Aug 8, 2025
c843b16
update flags
omerap12 Aug 8, 2025
68b52f8
fixed e2e tests for PerVPAConfig
omerap12 Aug 9, 2025
e979364
Merge branch 'master' into oom-feat
omerap12 Aug 12, 2025
72889dd
update flags
omerap12 Aug 12, 2025
151ee25
Add logs when feature gate is disabled
omerap12 Aug 12, 2025
6cbd449
fixed e2e
omerap12 Aug 12, 2025
697934c
Add feature flag to admission controller
omerap12 Aug 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,22 @@ spec:
- Auto
- "Off"
type: string
oomBumpUpRatio:
anyOf:
- type: integer
- type: string
description: oomBumpUpRatio is the ratio to increase memory
when OOM is detected.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
oomMinBumpUp:
anyOf:
- type: integer
- type: string
description: oomMinBumpUp is the minimum increase in memory
when OOM is detected.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: array
type: object
Expand Down
2 changes: 2 additions & 0 deletions vertical-pod-autoscaler/docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ _Appears in:_
| `maxAllowed` _[ResourceList](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcelist-v1-core)_ | Specifies the maximum amount of resources that will be recommended<br />for the container. The default is no maximum. | | |
| `controlledResources` _[ResourceName](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcename-v1-core)_ | Specifies the type of recommendations that will be computed<br />(and possibly applied) by VPA.<br />If not specified, the default of [ResourceCPU, ResourceMemory] will be used. | | |
| `controlledValues` _[ContainerControlledValues](#containercontrolledvalues)_ | Specifies which resource values should be controlled.<br />The default is "RequestsAndLimits". | | Enum: [RequestsAndLimits RequestsOnly] <br /> |
| `oomBumpUpRatio` _float_ | OOMBumpUpRatio is the ratio to increase resources when OOM is detected. | | Minimum: 1 <br /> |
| `oomMinBumpUp` _float_ | OOMMinBumpUp is the minimum increase in resources when OOM is detected. | | Minimum: 0 <br /> |


#### ContainerScalingMode
Expand Down
10 changes: 5 additions & 5 deletions vertical-pod-autoscaler/docs/flags.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ This document is auto-generated from the flag definitions in the VPA admission-c
| `address` | string | ":8944" | The address to expose Prometheus metrics. |
| `alsologtostderr` | | | log to standard error as well as files (no effect when -logtostderr=true) |
| `client-ca-file` | string | "/etc/tls-certs/caCert.pem" | Path to CA PEM file. |
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true) |
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
| `ignored-vpa-object-namespaces` | string | | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
| `kube-api-burst` | float | 100 | QPS burst limit when making requests to Kubernetes apiserver |
| `kube-api-qps` | float | 50 | QPS limit when making requests to Kubernetes apiserver |
Expand Down Expand Up @@ -68,7 +68,7 @@ This document is auto-generated from the flag definitions in the VPA recommender
| `cpu-integer-post-processor-enabled` | | | Enable the cpu-integer recommendation post processor. The post processor will round up CPU recommendations to a whole CPU for pods which were opted in by setting an appropriate label on VPA object (experimental) |
| `external-metrics-cpu-metric` | string | | ALPHA. Metric to use with external metrics provider for CPU usage. |
| `external-metrics-memory-metric` | string | | ALPHA. Metric to use with external metrics provider for memory usage. |
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true) |
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
| `history-length` | string | "8d" | How much time back prometheus have to be queried to get historical metrics |
| `history-resolution` | string | "1h" | Resolution at which Prometheus is queried for historical metrics |
| `humanize-memory` | | | DEPRECATED: Convert memory values in recommendations to the highest appropriate SI unit with up to 2 decimal places for better readability. This flag is deprecated and will be removed in a future version. Use --round-memory-bytes instead. |
Expand All @@ -95,8 +95,8 @@ This document is auto-generated from the flag definitions in the VPA recommender
| `metric-for-pod-labels` | string | "up{job=\"kubernetes-pods\"}" | Which metric to look for pod labels in metrics |
| `min-checkpoints` | int | 10 | Minimum number of checkpoints to write per recommender's main loop. WARNING: this flag is deprecated and doesn't have any effect. It will be removed in a future release. Refer to update-worker-count to influence the minimum number of checkpoints written per loop. |
| `one-output` | severity | | If true, only write logs to their native level (vs also writing to each lower severity level; no effect when -logtostderr=true) |
| `oom-bump-up-ratio` | float | 1.2 | The memory bump up ratio when OOM occurred, default is 1.2. |
| `oom-min-bump-up-bytes` | float | 1.048576e+08 | The minimal increase of memory when OOM occurred in bytes, default is 100 * 1024 * 1024 |
| `oom-bump-up-ratio` | float | 1.2 | Default memory bump up ratio when OOM occurs. This value applies to all VPAs unless overridden in the VPA spec. Default is 1.2. |
| `oom-min-bump-up-bytes` | float | 1.048576e+08 | Default minimal increase of memory (in bytes) when OOM occurs. This value applies to all VPAs unless overridden in the VPA spec. Default is 100 * 1024 * 1024 (100Mi). |
| `password` | string | | The password used in the prometheus server basic auth |
| `pod-label-prefix` | string | "pod_label_" | Which prefix to look for pod labels in metrics |
| `pod-name-label` | string | "kubernetes_pod_name" | Label name to look for pod names |
Expand Down Expand Up @@ -144,7 +144,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod
| `eviction-rate-burst` | int | 1 | Burst of pods that can be evicted. |
| `eviction-rate-limit` | float | | Number of pods that can be evicted per seconds. A rate limit set to 0 or -1 will disable<br>the rate limiter. (default -1) |
| `eviction-tolerance` | float | 0.5 | Fraction of replica count that can be evicted for update, if more than one pod can be evicted. |
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true) |
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
| `ignored-vpa-object-namespaces` | string | | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
| `in-recommendation-bounds-eviction-lifetime-threshold` | | 12h0m0s | duration Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range |
| `kube-api-burst` | float | 100 | QPS burst limit when making requests to Kubernetes apiserver |
Expand Down
163 changes: 143 additions & 20 deletions vertical-pod-autoscaler/e2e/v1/admission_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -882,26 +882,149 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
err := InstallRawVPA(f, validVPA)
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Valid VPA object rejected")

ginkgo.By("Setting up invalid VPA object")
// The invalid object differs by name and minAllowed - there is an invalid "requests" field.
invalidVPA := []byte(`{
"kind": "VerticalPodAutoscaler",
"apiVersion": "autoscaling.k8s.io/v1",
"metadata": {"name": "hamster-vpa-invalid"},
"spec": {
"targetRef": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"name":"hamster"
},
"resourcePolicy": {
"containerPolicies": [{"containerName": "*", "minAllowed":{"requests":{"cpu":"50m"}}}]
}
}
}`)
err2 := InstallRawVPA(f, invalidVPA)
gomega.Expect(err2).To(gomega.HaveOccurred(), "Invalid VPA object accepted")
gomega.Expect(err2.Error()).To(gomega.MatchRegexp(`.*admission webhook .*vpa.* denied the request: .*`))
ginkgo.By("Setting up invalid VPA objects")
testCases := []struct {
name string
vpaJSON string
expectedErr string
}{
{
name: "Invalid oomBumpUpRatio (negative value)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
"metadata": {"name": "oom-test-vpa"},
"spec": {
"targetRef": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"name": "oom-test"
},
"updatePolicy": {
"updateMode": "Auto"
},
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
"oomBumpUpRatio": -1,
"oomMinBumpUp": 104857600
}]
}
}
}`,
expectedErr: "spec.resourcePolicy.containerPolicies[0].oomBumpUpRatio: Invalid value: -1: spec.resourcePolicy.containerPolicies[0].oomBumpUpRatio in body should be greater than or equal to 1",
},
{
name: "Invalid oomBumpUpRatio (string value)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
"metadata": {"name": "oom-test-vpa"},
"spec": {
"targetRef": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"name": "oom-test"
},
"updatePolicy": {
"updateMode": "Auto"
},
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
"oomBumpUpRatio": "12",
"oomMinBumpUp": 104857600
}]
}
}
}`,
expectedErr: "json: cannot unmarshal string into Go struct field ContainerResourcePolicy.spec.resourcePolicy.containerPolicies.oomBumpUpRatio of type float64",
},
{
name: "Invalid oomBumpUpRatio (less than 1)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
"metadata": {"name": "oom-test-vpa"},
"spec": {
"targetRef": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"name": "oom-test"
},
"updatePolicy": {
"updateMode": "Auto"
},
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
"oomBumpUpRatio": 0.5,
"oomMinBumpUp": 104857600
}]
}
}
}`,
expectedErr: "spec.resourcePolicy.containerPolicies[0].oomBumpUpRatio: Invalid value: 0.5: spec.resourcePolicy.containerPolicies[0].oomBumpUpRatio in body should be greater than or equal to 1",
},
{
name: "Invalid oomMinBumpUp (negative value)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
"metadata": {"name": "oom-test-vpa"},
"spec": {
"targetRef": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"name": "oom-test"
},
"updatePolicy": {
"updateMode": "Auto"
},
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
"oomBumpUpRatio": 2,
"oomMinBumpUp": -1
}]
}
}
}`,
expectedErr: "spec.resourcePolicy.containerPolicies[0].oomMinBumpUp: Invalid value: -1: spec.resourcePolicy.containerPolicies[0].oomMinBumpUp in body should be greater than or equal to 0",
},
{
name: "Invalid minAllowed (invalid requests field)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
"metadata": {"name": "hamster-vpa-invalid"},
"spec": {
"targetRef": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"name": "hamster"
},
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
"minAllowed": {
"requests": {
"cpu": "50m"
}
}
}]
}
}
}`,
expectedErr: "admission webhook .*vpa.* denied the request:",
},
}
for _, tc := range testCases {
ginkgo.By(fmt.Sprintf("Testing %s", tc.name))
err := InstallRawVPA(f, []byte(tc.vpaJSON))
gomega.Expect(err).To(gomega.HaveOccurred(), "Invalid VPA object accepted")
gomega.Expect(err.Error()).To(gomega.MatchRegexp(tc.expectedErr))
}
})

ginkgo.It("reloads the webhook leaf and CA certificate", func(ctx ginkgo.SpecContext) {
Expand Down
15 changes: 15 additions & 0 deletions vertical-pod-autoscaler/e2e/v1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
framework_deployment "k8s.io/kubernetes/test/e2e/framework/deployment"
Expand Down Expand Up @@ -612,6 +613,20 @@ func WaitForPodsUpdatedWithoutEviction(f *framework.Framework, initialPods *apiv
return err
}

// checkPerVPAConfigTestsEnabled checks if the PerVPAConfig feature gate is enabled
// in the VPA recommender.
func checkPerVPAConfigTestsEnabled(f *framework.Framework) {
ginkgo.By("Checking PerVPAConfig feature gate is enabled for recommender")
deploy, err := f.ClientSet.AppsV1().Deployments(VpaNamespace).Get(context.TODO(), "vpa-recommender", metav1.GetOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(deploy.Spec.Template.Spec.Containers).To(gomega.HaveLen(1))
vpaRecommenderPod := deploy.Spec.Template.Spec.Containers[0]
gomega.Expect(vpaRecommenderPod.Name).To(gomega.Equal("recommender"))
if !anyContainsSubstring(vpaRecommenderPod.Args, fmt.Sprintf("%s=true", string(features.PerVPAConfig))) {
ginkgo.Skip("Skipping suite: PerVPAConfig feature gate is not enabled for the VPA recommender")
}
}

func anyContainsSubstring(arr []string, substr string) bool {
for _, s := range arr {
if strings.Contains(s, substr) {
Expand Down
59 changes: 59 additions & 0 deletions vertical-pod-autoscaler/e2e/v1/recommender.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

autoscaling "k8s.io/api/autoscaling/v1"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
Expand Down Expand Up @@ -411,6 +412,64 @@ var _ = RecommenderE2eDescribe("VPA CRD object", func() {
})
})

var _ = RecommenderE2eDescribe("OOM with custom config", ginkgo.Label("FG:PerVPAConfig"), func() {
const replicas = 3
f := framework.NewDefaultFramework("vertical-pod-autoscaling")
f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline
var (
vpaCRD *vpa_types.VerticalPodAutoscaler
vpaClientSet vpa_clientset.Interface
)
ginkgo.BeforeEach(func() {
checkPerVPAConfigTestsEnabled(f)
ns := f.Namespace.Name
vpaClientSet = getVpaClientSet(f)
ginkgo.By("Setting up a hamster deployment")
runOomingReplicationController(
f.ClientSet,
ns,
"hamster",
replicas)
ginkgo.By("Setting up a VPA CRD")
targetRef := &autoscaling.CrossVersionObjectReference{
APIVersion: "v1",
Kind: "Deployment",
Name: "hamster",
}
containerName := GetHamsterContainerNameByIndex(0)
vpaCRD = test.VerticalPodAutoscaler().
WithName("hamster-vpa").
WithNamespace(f.Namespace.Name).
WithTargetRef(targetRef).
WithContainer(containerName).
WithOOMBumpUpRatio(resource.NewQuantity(2, resource.DecimalSI)).
Get()
InstallVPA(f, vpaCRD)
})
ginkgo.It("have memory requests growing with OOMs more than the default", func() {
listOptions := metav1.ListOptions{
LabelSelector: "name=hamster",
FieldSelector: getPodSelectorExcludingDonePodsOrDie(),
}
err := waitForResourceRequestInRangeInPods(
f, oomTestTimeout, listOptions, apiv1.ResourceMemory,
ParseQuantityOrDie("1024Mi"), ParseQuantityOrDie("1024Mi"))
gomega.Expect(err).NotTo(gomega.HaveOccurred())
ginkgo.By("Waiting for recommendation to be filled")
vpa, err := WaitForRecommendationPresent(vpaClientSet, vpaCRD)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(1))

currentMemory := vpa.Status.Recommendation.ContainerRecommendations[0].Target.Memory().Value()
oomReplicationControllerRequestLimit := int64(1024 * 1024 * 1024) // from runOomingReplicationController
defaultBumpMemory := float64(oomReplicationControllerRequestLimit) * 1.2 // DefaultOOMBumpUpRatio
customBumpMemory := float64(oomReplicationControllerRequestLimit) * 2.0 // Custom ratio from VPA config

gomega.Expect(currentMemory).Should(gomega.BeNumerically(">", int64(defaultBumpMemory)),
fmt.Sprintf("Memory recommendation should be at bigger than default bump up ratio (2x). Got: %d, Expected: >= %d", currentMemory, int64(customBumpMemory)))
})
})

func deleteRecommender(c clientset.Interface) error {
namespace := "kube-system"
listOptions := metav1.ListOptions{}
Expand Down
Loading
Loading