Merge pull request #8320 from adrianmoisey/fix-race

k8s-ci-robot · web-flow · commit 13dde43b10f2 · 2025-08-13T14:47:07.000-07:00
Add some race-condition protection to VPA recommender
diff --git a/vertical-pod-autoscaler/docs/flags.md b/vertical-pod-autoscaler/docs/flags.md
@@ -125,7 +125,7 @@ This document is auto-generated from the flag definitions in the VPA recommender
 | `storage` | string |  | Specifies storage mode. Supported values: prometheus, checkpoint  |
 | `target-cpu-percentile` | float |  0.9 | CPU usage percentile that will be used as a base for CPU target recommendation. Doesn't affect CPU lower bound, CPU upper bound nor memory recommendations.  |
 | `target-memory-percentile` | float |  0.9 | Memory usage percentile that will be used as a base for memory target recommendation. Doesn't affect memory lower bound nor memory upper bound.  |
-| `update-worker-count` |  |  10 | kube-api-qps                       Number of concurrent workers to update VPA recommendations and checkpoints. When increasing this setting, make sure the client-side rate limits (kube-api-qps and `kube-api-burst`) are either increased or turned off as well. Determines the minimum number of VPA checkpoints written per recommender loop.  |
+| `update-worker-count` | int |  10 | Number of concurrent workers to update VPA recommendations and checkpoints. When increasing this setting, make sure the client-side rate limits ('kube-api-qps' and 'kube-api-burst') are either increased or turned off as well. Determines the minimum number of VPA checkpoints written per recommender loop.  |
 | `use-external-metrics` |  |  | ALPHA.  Use an external metrics provider instead of metrics_server. |
 | `username` | string |  | The username used in the prometheus server basic auth |
 | `v,` |  | : 4 | , --v Level                                                set the log level verbosity  (default 4) |
diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go
@@ -66,7 +66,7 @@ var (
 	address                = flag.String("address", ":8942", "The address to expose Prometheus metrics.")
 	storage                = flag.String("storage", "", `Specifies storage mode. Supported values: prometheus, checkpoint (default)`)
 	memorySaver            = flag.Bool("memory-saver", false, `If true, only track pods which have an associated VPA`)
-	updateWorkerCount      = flag.Int("update-worker-count", 10, "Number of concurrent workers to update VPA recommendations and checkpoints. When increasing this setting, make sure the client-side rate limits (`kube-api-qps` and `kube-api-burst`) are either increased or turned off as well. Determines the minimum number of VPA checkpoints written per recommender loop.")
+	updateWorkerCount      = flag.Int("update-worker-count", 10, "Number of concurrent workers to update VPA recommendations and checkpoints. When increasing this setting, make sure the client-side rate limits ('kube-api-qps' and 'kube-api-burst') are either increased or turned off as well. Determines the minimum number of VPA checkpoints written per recommender loop.")
 )
 
 // Prometheus history provider flags
diff --git a/vertical-pod-autoscaler/pkg/recommender/model/aggregate_container_state.go b/vertical-pod-autoscaler/pkg/recommender/model/aggregate_container_state.go
@@ -37,6 +37,7 @@ package model
 
 import (
 	"fmt"
+	"sync"
 	"time"
 
 	corev1 "k8s.io/api/core/v1"
@@ -105,16 +106,27 @@ type AggregateContainerState struct {
 	// we want to know if it needs recommendation, if the recommendation
 	// is present and if the automatic updates are on (are we able to
 	// apply the recommendation to the pods).
-	LastRecommendation  corev1.ResourceList
+	lastRecommendation  corev1.ResourceList
 	IsUnderVPA          bool
 	UpdateMode          *vpa_types.UpdateMode
 	ScalingMode         *vpa_types.ContainerScalingMode
 	ControlledResources *[]ResourceName
+
+	mutex sync.RWMutex
 }
 
-// GetLastRecommendation returns last recorded recommendation.
+// GetLastRecommendation returns last recorded recommendation in a thread-safe manner.
 func (a *AggregateContainerState) GetLastRecommendation() corev1.ResourceList {
-	return a.LastRecommendation
+	a.mutex.RLock()
+	defer a.mutex.RUnlock()
+	return a.lastRecommendation
+}
+
+// SetLastRecommendation sets the last recorded recommendation in a thread-safe manner.
+func (a *AggregateContainerState) SetLastRecommendation(recommendation corev1.ResourceList) {
+	a.mutex.Lock()
+	defer a.mutex.Unlock()
+	a.lastRecommendation = recommendation
 }
 
 // NeedsRecommendation returns true if the state should have recommendation calculated.
@@ -147,7 +159,7 @@ func (a *AggregateContainerState) GetControlledResources() []ResourceName {
 // a VPA object.
 func (a *AggregateContainerState) MarkNotAutoscaled() {
 	a.IsUnderVPA = false
-	a.LastRecommendation = nil
+	a.SetLastRecommendation(nil)
 	a.UpdateMode = nil
 	a.ScalingMode = nil
 	a.ControlledResources = nil
diff --git a/vertical-pod-autoscaler/pkg/recommender/model/cluster.go b/vertical-pod-autoscaler/pkg/recommender/model/cluster.go
@@ -19,6 +19,7 @@ package model
 import (
 	"context"
 	"fmt"
+	"sync"
 	"time"
 
 	apiv1 "k8s.io/api/core/v1"
@@ -70,6 +71,7 @@ type clusterState struct {
 	// VPA objects in the cluster that have no recommendation mapped to the first
 	// time we've noticed the recommendation missing or last time we logged
 	// a warning about it.
+	// TODO consider switching to a sync.Map for emptyVPAs
 	emptyVPAs map[VpaID]time.Time
 	// Observed VPAs. Used to check if there are updates needed.
 	observedVPAs []*vpa_types.VerticalPodAutoscaler
@@ -82,6 +84,9 @@ type clusterState struct {
 
 	lastAggregateContainerStateGC time.Time
 	gcInterval                    time.Duration
+
+	// Mutex to protect concurrent access to maps
+	mutex sync.RWMutex
 }
 
 // StateMapSize is the number of pods being tracked by the VPA
@@ -319,6 +324,8 @@ func (cluster *clusterState) DeleteVpa(vpaID VpaID) error {
 		state.MarkNotAutoscaled()
 	}
 	delete(cluster.vpas, vpaID)
+	cluster.mutex.Lock()
+	defer cluster.mutex.Unlock()
 	delete(cluster.emptyVPAs, vpaID)
 	return nil
 }
@@ -464,6 +471,8 @@ func (cluster *clusterState) getContributiveAggregateStateKeys(ctx context.Conte
 // keep track of empty recommendations and log information about them
 // periodically.
 func (cluster *clusterState) RecordRecommendation(vpa *Vpa, now time.Time) error {
+	cluster.mutex.Lock()
+	defer cluster.mutex.Unlock()
 	if vpa.Recommendation != nil && len(vpa.Recommendation.ContainerRecommendations) > 0 {
 		delete(cluster.emptyVPAs, vpa.ID)
 		return nil
diff --git a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go
@@ -166,8 +166,8 @@ func (vpa *Vpa) UpdateRecommendation(recommendation *vpa_types.RecommendedPodRes
 	for _, containerRecommendation := range recommendation.ContainerRecommendations {
 		for container, state := range vpa.aggregateContainerStates {
 			if container.ContainerName() == containerRecommendation.ContainerName {
-				metrics_quality.ObserveRecommendationChange(state.LastRecommendation, containerRecommendation.UncappedTarget, vpa.UpdateMode, vpa.PodCount)
-				state.LastRecommendation = containerRecommendation.UncappedTarget
+				metrics_quality.ObserveRecommendationChange(state.GetLastRecommendation(), containerRecommendation.UncappedTarget, vpa.UpdateMode, vpa.PodCount)
+				state.SetLastRecommendation(containerRecommendation.UncappedTarget)
 			}
 		}
 	}
diff --git a/vertical-pod-autoscaler/pkg/recommender/model/vpa_test.go b/vertical-pod-autoscaler/pkg/recommender/model/vpa_test.go
@@ -193,7 +193,7 @@ func TestUpdateRecommendation(t *testing.T) {
 			for container, rec := range tc.containers {
 				state := &AggregateContainerState{}
 				if rec != nil {
-					state.LastRecommendation = corev1.ResourceList{
+					state.lastRecommendation = corev1.ResourceList{
 						corev1.ResourceCPU:    resource.MustParse(rec.cpu),
 						corev1.ResourceMemory: resource.MustParse(rec.mem),
 					}
@@ -209,9 +209,9 @@ func TestUpdateRecommendation(t *testing.T) {
 			for key, state := range vpa.aggregateContainerStates {
 				expected, ok := tc.expectedLast[key.ContainerName()]
 				if !ok {
-					assert.Nil(t, state.LastRecommendation)
+					assert.Nil(t, state.lastRecommendation)
 				} else {
-					assert.Equal(t, expected, state.LastRecommendation)
+					assert.Equal(t, expected, state.lastRecommendation)
 				}
 			}
 		})
diff --git a/vertical-pod-autoscaler/pkg/recommender/routines/recommender_test.go b/vertical-pod-autoscaler/pkg/recommender/routines/recommender_test.go
@@ -0,0 +1,137 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package routines
+
+import (
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/apimachinery/pkg/runtime"
+
+	v1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
+	vpa_fake "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned/fake"
+	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/logic"
+	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
+	metrics_recommender "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics/recommender"
+	"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test"
+)
+
+type mockPodResourceRecommender struct{}
+
+func (m *mockPodResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) logic.RecommendedPodResources {
+	return logic.RecommendedPodResources{}
+}
+
+// TestProcessUpdateVPAsConcurrency tests processVPAUpdate for race conditions when run concurrently
+func TestProcessUpdateVPAsConcurrency(t *testing.T) {
+	updateWorkerCount := 10
+
+	vpaCount := 1000
+	vpas := make(map[model.VpaID]*model.Vpa, vpaCount)
+	apiObjectVPAs := make([]*v1.VerticalPodAutoscaler, vpaCount)
+	fakedClient := make([]runtime.Object, vpaCount)
+
+	for i := range vpaCount {
+		vpaName := fmt.Sprintf("test-vpa-%d", i)
+		vpaID := model.VpaID{
+			Namespace: "default",
+			VpaName:   vpaName,
+		}
+		selector, err := labels.Parse("app=test")
+		assert.NoError(t, err, "Failed to parse label selector")
+		vpas[vpaID] = model.NewVpa(vpaID, selector, time.Now())
+
+		apiObjectVPAs[i] = test.VerticalPodAutoscaler().
+			WithName(vpaName).
+			WithNamespace("default").
+			WithContainer("test-container").
+			Get()
+
+		fakedClient[i] = apiObjectVPAs[i]
+	}
+
+	fakeClient := vpa_fake.NewSimpleClientset(fakedClient...).AutoscalingV1()
+	r := &recommender{
+		clusterState:                model.NewClusterState(time.Minute),
+		vpaClient:                   fakeClient,
+		podResourceRecommender:      &mockPodResourceRecommender{},
+		recommendationPostProcessor: []RecommendationPostProcessor{},
+	}
+
+	labelSelector, err := metav1.ParseToLabelSelector("app=test")
+	assert.NoError(t, err, "Failed to parse label selector")
+	parsedSelector, err := metav1.LabelSelectorAsSelector(labelSelector)
+	assert.NoError(t, err, "Failed to convert label selector to selector")
+
+	// Inject into clusterState
+	for _, vpa := range apiObjectVPAs {
+		err := r.clusterState.AddOrUpdateVpa(vpa, parsedSelector)
+		assert.NoError(t, err, "Failed to add or update VPA in cluster state")
+	}
+	r.clusterState.SetObservedVPAs(apiObjectVPAs)
+
+	// Run processVPAUpdate concurrently for all VPAs
+	var wg sync.WaitGroup
+
+	cnt := metrics_recommender.NewObjectCounter()
+	defer cnt.Observe()
+
+	// Create a channel to send VPA updates to workers
+	vpaUpdates := make(chan *v1.VerticalPodAutoscaler, len(apiObjectVPAs))
+
+	var counter int64
+
+	// Start workers
+	for range updateWorkerCount {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for observedVpa := range vpaUpdates {
+				key := model.VpaID{
+					Namespace: observedVpa.Namespace,
+					VpaName:   observedVpa.Name,
+				}
+
+				vpa, found := r.clusterState.VPAs()[key]
+				if !found {
+					return
+				}
+
+				atomic.AddInt64(&counter, 1)
+
+				processVPAUpdate(r, vpa, observedVpa)
+				cnt.Add(vpa)
+			}
+		}()
+	}
+
+	// Send VPA updates to the workers
+	for _, observedVpa := range apiObjectVPAs {
+		vpaUpdates <- observedVpa
+	}
+
+	close(vpaUpdates)
+	wg.Wait()
+
+	assert.Equal(t, int64(vpaCount), atomic.LoadInt64(&counter), "Not all VPAs were processed")
+}
diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/recommender/recommender.go b/vertical-pod-autoscaler/pkg/utils/metrics/recommender/recommender.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"net/http"
 	"strconv"
+	"sync"
 	"time"
 
 	"github.com/prometheus/client_golang/prometheus"
@@ -119,7 +120,8 @@ type objectCounterKey struct {
 
 // ObjectCounter helps split all VPA objects into buckets
 type ObjectCounter struct {
-	cnt map[objectCounterKey]int
+	cnt   map[objectCounterKey]int
+	mutex sync.RWMutex
 }
 
 // Register initializes all metrics for VPA Recommender
@@ -189,11 +191,15 @@ func (oc *ObjectCounter) Add(vpa *model.Vpa) {
 		matchesPods:       vpa.HasMatchedPods(),
 		unsupportedConfig: vpa.Conditions.ConditionActive(vpa_types.ConfigUnsupported),
 	}
+	oc.mutex.Lock()
 	oc.cnt[key]++
+	oc.mutex.Unlock()
 }
 
 // Observe passes all the computed bucket values to metrics
 func (oc *ObjectCounter) Observe() {
+	oc.mutex.RLock()
+	defer oc.mutex.RUnlock()
 	for k, v := range oc.cnt {
 		vpaObjectCount.WithLabelValues(
 			k.mode,

Original file line number	Diff line number	Diff line change
`@@ -66,7 +66,7 @@ var (`
`66`	`66`	`address = flag.String("address", ":8942", "The address to expose Prometheus metrics.")`
`67`	`67`	storage = flag.String("storage", "", `Specifies storage mode. Supported values: prometheus, checkpoint (default)`)
`68`	`68`	memorySaver = flag.Bool("memory-saver", false, `If true, only track pods which have an associated VPA`)
`69`		- updateWorkerCount = flag.Int("update-worker-count", 10, "Number of concurrent workers to update VPA recommendations and checkpoints. When increasing this setting, make sure the client-side rate limits (`kube-api-qps` and `kube-api-burst`) are either increased or turned off as well. Determines the minimum number of VPA checkpoints written per recommender loop.")
	`69`	`+ updateWorkerCount = flag.Int("update-worker-count", 10, "Number of concurrent workers to update VPA recommendations and checkpoints. When increasing this setting, make sure the client-side rate limits ('kube-api-qps' and 'kube-api-burst') are either increased or turned off as well. Determines the minimum number of VPA checkpoints written per recommender loop.")`
`70`	`70`	`)`
`71`	`71`
`72`	`72`	`// Prometheus history provider flags`
Original file line number	Diff line number	Diff line change
`@@ -166,8 +166,8 @@ func (vpa Vpa) UpdateRecommendation(recommendation vpa_types.RecommendedPodRes`
`166`	`166`	`for _, containerRecommendation := range recommendation.ContainerRecommendations {`
`167`	`167`	`for container, state := range vpa.aggregateContainerStates {`
`168`	`168`	`if container.ContainerName() == containerRecommendation.ContainerName {`
`169`		`- metrics_quality.ObserveRecommendationChange(state.LastRecommendation, containerRecommendation.UncappedTarget, vpa.UpdateMode, vpa.PodCount)`
`170`		`- state.LastRecommendation = containerRecommendation.UncappedTarget`
	`169`	`+ metrics_quality.ObserveRecommendationChange(state.GetLastRecommendation(), containerRecommendation.UncappedTarget, vpa.UpdateMode, vpa.PodCount)`
	`170`	`+ state.SetLastRecommendation(containerRecommendation.UncappedTarget)`
`171`	`171`	`}`
`172`	`172`	`}`
`173`	`173`	`}`