kubernetes · omerap12 · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025
diff --git a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml
@@ -372,6 +372,22 @@ spec:
                           - Auto
                           - "Off"
                           type: string
+                        oomBumpUpRatio:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: oomBumpUpRatio is the ratio to increase memory
+                            when OOM is detected.
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
+                        oomMinBumpUp:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: oomMinBumpUp is the minimum increase in memory
+                            when OOM is detected.
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
                       type: object
                     type: array
                 type: object

diff --git a/vertical-pod-autoscaler/docs/api.md b/vertical-pod-autoscaler/docs/api.md
@@ -48,6 +48,8 @@ _Appears in:_
 | `maxAllowed` _[ResourceList](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcelist-v1-core)_ | Specifies the maximum amount of resources that will be recommended<br />for the container. The default is no maximum. |  |  |
 | `controlledResources` _[ResourceName](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#resourcename-v1-core)_ | Specifies the type of recommendations that will be computed<br />(and possibly applied) by VPA.<br />If not specified, the default of [ResourceCPU, ResourceMemory] will be used. |  |  |
 | `controlledValues` _[ContainerControlledValues](#containercontrolledvalues)_ | Specifies which resource values should be controlled.<br />The default is "RequestsAndLimits". |  | Enum: [RequestsAndLimits RequestsOnly] <br /> |
+| `oomBumpUpRatio` _float_ | OOMBumpUpRatio is the ratio to increase resources when OOM is detected. |  | Minimum: 1 <br /> |
+| `oomMinBumpUp` _float_ | OOMMinBumpUp is the minimum increase in resources when OOM is detected. |  | Minimum: 0 <br /> |
 
 
 #### ContainerScalingMode

diff --git a/vertical-pod-autoscaler/docs/flags.md b/vertical-pod-autoscaler/docs/flags.md
@@ -14,7 +14,7 @@ This document is auto-generated from the flag definitions in the VPA admission-c
 | `address` | string |  ":8944" | The address to expose Prometheus metrics.  |
 | `alsologtostderr` |  |  | log to standard error as well as files (no effect when -logtostderr=true) |
 | `client-ca-file` | string |  "/etc/tls-certs/caCert.pem" | Path to CA PEM file.  |
-| `feature-gates` | mapStringBool |  | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true) |
+| `feature-gates` | mapStringBool |  | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
 | `ignored-vpa-object-namespaces` | string |  | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
 | `kube-api-burst` | float |  100 | QPS burst limit when making requests to Kubernetes apiserver  |
 | `kube-api-qps` | float |  50 | QPS limit when making requests to Kubernetes apiserver  |
@@ -68,7 +68,7 @@ This document is auto-generated from the flag definitions in the VPA recommender
 | `cpu-integer-post-processor-enabled` |  |  | Enable the cpu-integer recommendation post processor. The post processor will round up CPU recommendations to a whole CPU for pods which were opted in by setting an appropriate label on VPA object (experimental) |
 | `external-metrics-cpu-metric` | string |  | ALPHA.  Metric to use with external metrics provider for CPU usage. |
 | `external-metrics-memory-metric` | string |  | ALPHA.  Metric to use with external metrics provider for memory usage. |
-| `feature-gates` | mapStringBool |  | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true) |
+| `feature-gates` | mapStringBool |  | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
 | `history-length` | string |  "8d" | How much time back prometheus have to be queried to get historical metrics  |
 | `history-resolution` | string |  "1h" | Resolution at which Prometheus is queried for historical metrics  |
 | `humanize-memory` |  |  | DEPRECATED: Convert memory values in recommendations to the highest appropriate SI unit with up to 2 decimal places for better readability. This flag is deprecated and will be removed in a future version. Use --round-memory-bytes instead. |
@@ -95,8 +95,8 @@ This document is auto-generated from the flag definitions in the VPA recommender
 | `metric-for-pod-labels` | string |  "up{job=\"kubernetes-pods\"}" | Which metric to look for pod labels in metrics  |
 | `min-checkpoints` | int |  10 | Minimum number of checkpoints to write per recommender's main loop. WARNING: this flag is deprecated and doesn't have any effect. It will be removed in a future release. Refer to update-worker-count to influence the minimum number of checkpoints written per loop.  |
 | `one-output` | severity |  | If true, only write logs to their native level (vs also writing to each lower severity level; no effect when -logtostderr=true) |
-| `oom-bump-up-ratio` | float |  1.2 | The memory bump up ratio when OOM occurred, default is 1.2.  |
-| `oom-min-bump-up-bytes` | float |  1.048576e+08 | The minimal increase of memory when OOM occurred in bytes, default is 100 * 1024 * 1024  |
+| `oom-bump-up-ratio` | float |  1.2 | Default memory bump up ratio when OOM occurs. This value applies to all VPAs unless overridden in the VPA spec. Default is 1.2.  |
+| `oom-min-bump-up-bytes` | float |  1.048576e+08 | Default minimal increase of memory (in bytes) when OOM occurs. This value applies to all VPAs unless overridden in the VPA spec. Default is 100 * 1024 * 1024 (100Mi).  |
 | `password` | string |  | The password used in the prometheus server basic auth |
 | `pod-label-prefix` | string |  "pod_label_" | Which prefix to look for pod labels in metrics  |
 | `pod-name-label` | string |  "kubernetes_pod_name" | Label name to look for pod names  |
@@ -144,7 +144,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod
 | `eviction-rate-burst` | int |  1 | Burst of pods that can be evicted.  |
 | `eviction-rate-limit` | float |  | Number of pods that can be evicted per seconds. A rate limit set to 0 or -1 will disable<br>the rate limiter. (default -1) |
 | `eviction-tolerance` | float |  0.5 | Fraction of replica count that can be evicted for update, if more than one pod can be evicted.  |
-| `feature-gates` | mapStringBool |  | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true) |
+| `feature-gates` | mapStringBool |  | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
 | `ignored-vpa-object-namespaces` | string |  | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
 | `in-recommendation-bounds-eviction-lifetime-threshold` |  |  12h0m0s | duration   Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range  |
 | `kube-api-burst` | float |  100 | QPS burst limit when making requests to Kubernetes apiserver  |

diff --git a/vertical-pod-autoscaler/e2e/v1/admission_controller.go b/vertical-pod-autoscaler/e2e/v1/admission_controller.go
@@ -882,26 +882,149 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
 		err := InstallRawVPA(f, validVPA)
 		gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Valid VPA object rejected")
 
-		ginkgo.By("Setting up invalid VPA object")
-		// The invalid object differs by name and minAllowed - there is an invalid "requests" field.
-		invalidVPA := []byte(`{
-			"kind": "VerticalPodAutoscaler",
-			"apiVersion": "autoscaling.k8s.io/v1",
-			"metadata": {"name": "hamster-vpa-invalid"},
-			"spec": {
-				"targetRef": {
-					"apiVersion": "apps/v1",
-					"kind": "Deployment",
-					"name":"hamster"
-				},
-		   	"resourcePolicy": {
-		  		"containerPolicies": [{"containerName": "*", "minAllowed":{"requests":{"cpu":"50m"}}}]
-		  	}
-		  }
-		}`)
-		err2 := InstallRawVPA(f, invalidVPA)
-		gomega.Expect(err2).To(gomega.HaveOccurred(), "Invalid VPA object accepted")
-		gomega.Expect(err2.Error()).To(gomega.MatchRegexp(`.*admission webhook .*vpa.* denied the request: .*`))
+		ginkgo.By("Setting up invalid VPA objects")
+		testCases := []struct {
+			name        string
+			vpaJSON     string
+			expectedErr string
+		}{
+			{
+				name: "Invalid oomBumpUpRatio (negative value)",
+				vpaJSON: `{
+            "apiVersion": "autoscaling.k8s.io/v1",
+            "kind": "VerticalPodAutoscaler",
+            "metadata": {"name": "oom-test-vpa"},
+            "spec": {
+                "targetRef": {
+                    "apiVersion": "apps/v1",
+                    "kind": "Deployment",
+                    "name": "oom-test"
+                },
+                "updatePolicy": {
+                    "updateMode": "Auto"
+                },
+                "resourcePolicy": {
+                    "containerPolicies": [{
+                        "containerName": "*",
+                        "oomBumpUpRatio": -1,
+                        "oomMinBumpUp": 104857600
+                    }]
+                }
+            }
+        }`,
+				expectedErr: "spec.resourcePolicy.containerPolicies[0].oomBumpUpRatio: Invalid value: -1: spec.resourcePolicy.containerPolicies[0].oomBumpUpRatio in body should be greater than or equal to 1",
+			},
+			{
+				name: "Invalid oomBumpUpRatio (string value)",
+				vpaJSON: `{
+            "apiVersion": "autoscaling.k8s.io/v1",
+            "kind": "VerticalPodAutoscaler",
+            "metadata": {"name": "oom-test-vpa"},
+            "spec": {
+                "targetRef": {
+                    "apiVersion": "apps/v1",
+                    "kind": "Deployment",
+                    "name": "oom-test"
+                },
+                "updatePolicy": {
+                    "updateMode": "Auto"
+                },
+                "resourcePolicy": {
+                    "containerPolicies": [{
+                        "containerName": "*",
+                        "oomBumpUpRatio": "12",
+                        "oomMinBumpUp": 104857600
+                    }]
+                }
+            }
+        }`,
+				expectedErr: "json: cannot unmarshal string into Go struct field ContainerResourcePolicy.spec.resourcePolicy.containerPolicies.oomBumpUpRatio of type float64",
+			},
+			{
+				name: "Invalid oomBumpUpRatio (less than 1)",
+				vpaJSON: `{
+            "apiVersion": "autoscaling.k8s.io/v1",
+            "kind": "VerticalPodAutoscaler",
+            "metadata": {"name": "oom-test-vpa"},
+            "spec": {
+                "targetRef": {
+                    "apiVersion": "apps/v1",
+                    "kind": "Deployment",
+                    "name": "oom-test"
+                },
+                "updatePolicy": {
+                    "updateMode": "Auto"
+                },
+                "resourcePolicy": {
+                    "containerPolicies": [{
+                        "containerName": "*",
+                        "oomBumpUpRatio": 0.5,
+                        "oomMinBumpUp": 104857600
+                    }]
+                }
+            }
+        }`,
+				expectedErr: "spec.resourcePolicy.containerPolicies[0].oomBumpUpRatio: Invalid value: 0.5: spec.resourcePolicy.containerPolicies[0].oomBumpUpRatio in body should be greater than or equal to 1",
+			},
+			{
+				name: "Invalid oomMinBumpUp (negative value)",
+				vpaJSON: `{
+            "apiVersion": "autoscaling.k8s.io/v1",
+            "kind": "VerticalPodAutoscaler",
+            "metadata": {"name": "oom-test-vpa"},
+            "spec": {
+                "targetRef": {
+                    "apiVersion": "apps/v1",
+                    "kind": "Deployment",
+                    "name": "oom-test"
+                },
+                "updatePolicy": {
+                    "updateMode": "Auto"
+                },
+                "resourcePolicy": {
+                    "containerPolicies": [{
+                        "containerName": "*",
+                        "oomBumpUpRatio": 2,
+                        "oomMinBumpUp": -1
+                    }]
+                }
+            }
+        }`,
+				expectedErr: "spec.resourcePolicy.containerPolicies[0].oomMinBumpUp: Invalid value: -1: spec.resourcePolicy.containerPolicies[0].oomMinBumpUp in body should be greater than or equal to 0",
+			},
+			{
+				name: "Invalid minAllowed (invalid requests field)",
+				vpaJSON: `{
+            "apiVersion": "autoscaling.k8s.io/v1",
+            "kind": "VerticalPodAutoscaler",
+            "metadata": {"name": "hamster-vpa-invalid"},
+            "spec": {
+                "targetRef": {
+                    "apiVersion": "apps/v1",
+                    "kind": "Deployment",
+                    "name": "hamster"
+                },
+                "resourcePolicy": {
+                    "containerPolicies": [{
+                        "containerName": "*",
+                        "minAllowed": {
+                            "requests": {
+                                "cpu": "50m"
+                            }
+                        }
+                    }]
+                }
+            }
+        }`,
+				expectedErr: "admission webhook .*vpa.* denied the request:",
+			},
+		}
+		for _, tc := range testCases {
+			ginkgo.By(fmt.Sprintf("Testing %s", tc.name))
+			err := InstallRawVPA(f, []byte(tc.vpaJSON))
+			gomega.Expect(err).To(gomega.HaveOccurred(), "Invalid VPA object accepted")
+			gomega.Expect(err.Error()).To(gomega.MatchRegexp(tc.expectedErr))
+		}
 	})
 
 	ginkgo.It("reloads the webhook leaf and CA certificate", func(ctx ginkgo.SpecContext) {

diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go
@@ -37,6 +37,7 @@ import (
 	"k8s.io/apimachinery/pkg/util/wait"
 	vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
 	vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned"
+	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features"
 	clientset "k8s.io/client-go/kubernetes"
 	"k8s.io/kubernetes/test/e2e/framework"
 	framework_deployment "k8s.io/kubernetes/test/e2e/framework/deployment"
@@ -612,6 +613,20 @@ func WaitForPodsUpdatedWithoutEviction(f *framework.Framework, initialPods *apiv
 	return err
 }
 
+// checkPerVPAConfigTestsEnabled checks if the PerVPAConfig feature gate is enabled
+// in the VPA recommender.
+func checkPerVPAConfigTestsEnabled(f *framework.Framework) {
+	ginkgo.By("Checking PerVPAConfig feature gate is enabled for recommender")
+	deploy, err := f.ClientSet.AppsV1().Deployments(VpaNamespace).Get(context.TODO(), "vpa-recommender", metav1.GetOptions{})
+	gomega.Expect(err).NotTo(gomega.HaveOccurred())
+	gomega.Expect(deploy.Spec.Template.Spec.Containers).To(gomega.HaveLen(1))
+	vpaRecommenderPod := deploy.Spec.Template.Spec.Containers[0]
+	gomega.Expect(vpaRecommenderPod.Name).To(gomega.Equal("recommender"))
+	if !anyContainsSubstring(vpaRecommenderPod.Args, fmt.Sprintf("%s=true", string(features.PerVPAConfig))) {
+		ginkgo.Skip("Skipping suite: PerVPAConfig feature gate is not enabled for the VPA recommender")
+	}
+}
+
 func anyContainsSubstring(arr []string, substr string) bool {
 	for _, s := range arr {
 		if strings.Contains(s, substr) {

diff --git a/vertical-pod-autoscaler/e2e/v1/recommender.go b/vertical-pod-autoscaler/e2e/v1/recommender.go
@@ -24,6 +24,7 @@ import (
 
 	autoscaling "k8s.io/api/autoscaling/v1"
 	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/fields"
 	vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
@@ -411,6 +412,64 @@ var _ = RecommenderE2eDescribe("VPA CRD object", func() {
 	})
 })
 
+var _ = RecommenderE2eDescribe("OOM with custom config", ginkgo.Label("FG:PerVPAConfig"), func() {
+	const replicas = 3
+	f := framework.NewDefaultFramework("vertical-pod-autoscaling")
+	f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline
+	var (
+		vpaCRD       *vpa_types.VerticalPodAutoscaler
+		vpaClientSet vpa_clientset.Interface
+	)
+	ginkgo.BeforeEach(func() {
+		checkPerVPAConfigTestsEnabled(f)
+		ns := f.Namespace.Name
+		vpaClientSet = getVpaClientSet(f)
+		ginkgo.By("Setting up a hamster deployment")
+		runOomingReplicationController(
+			f.ClientSet,
+			ns,
+			"hamster",
+			replicas)
+		ginkgo.By("Setting up a VPA CRD")
+		targetRef := &autoscaling.CrossVersionObjectReference{
+			APIVersion: "v1",
+			Kind:       "Deployment",
+			Name:       "hamster",
+		}
+		containerName := GetHamsterContainerNameByIndex(0)
+		vpaCRD = test.VerticalPodAutoscaler().
+			WithName("hamster-vpa").
+			WithNamespace(f.Namespace.Name).
+			WithTargetRef(targetRef).
+			WithContainer(containerName).
+			WithOOMBumpUpRatio(resource.NewQuantity(2, resource.DecimalSI)).
+			Get()
+		InstallVPA(f, vpaCRD)
+	})
+	ginkgo.It("have memory requests growing with OOMs more than the default", func() {
+		listOptions := metav1.ListOptions{
+			LabelSelector: "name=hamster",
+			FieldSelector: getPodSelectorExcludingDonePodsOrDie(),
+		}
+		err := waitForResourceRequestInRangeInPods(
+			f, oomTestTimeout, listOptions, apiv1.ResourceMemory,
+			ParseQuantityOrDie("1024Mi"), ParseQuantityOrDie("1024Mi"))
+		gomega.Expect(err).NotTo(gomega.HaveOccurred())
+		ginkgo.By("Waiting for recommendation to be filled")
+		vpa, err := WaitForRecommendationPresent(vpaClientSet, vpaCRD)
+		gomega.Expect(err).NotTo(gomega.HaveOccurred())
+		gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(1))
+
+		currentMemory := vpa.Status.Recommendation.ContainerRecommendations[0].Target.Memory().Value()
+		oomReplicationControllerRequestLimit := int64(1024 * 1024 * 1024)        // from runOomingReplicationController
+		defaultBumpMemory := float64(oomReplicationControllerRequestLimit) * 1.2 // DefaultOOMBumpUpRatio
+		customBumpMemory := float64(oomReplicationControllerRequestLimit) * 2.0  // Custom ratio from VPA config
+
+		gomega.Expect(currentMemory).Should(gomega.BeNumerically(">", int64(defaultBumpMemory)),
+			fmt.Sprintf("Memory recommendation should be at bigger than default bump up ratio (2x). Got: %d, Expected: >= %d", currentMemory, int64(customBumpMemory)))
+	})
+})
+
 func deleteRecommender(c clientset.Interface) error {
 	namespace := "kube-system"
 	listOptions := metav1.ListOptions{}