Skip to content

Commit 2e528f9

Browse files
authored
Merge pull request #8445 from vitanovs/feat/vpa-updater-enrich-counters-with-vpa-context-labels
feat(vpa/updater): Add `VPA` resource `name` and `namespace` to success and fail resource updates counter metrics
2 parents a9cb59f + be88f4f commit 2e528f9

File tree

3 files changed

+98
-72
lines changed

3 files changed

+98
-72
lines changed

vertical-pod-autoscaler/pkg/updater/logic/updater.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,17 +289,18 @@ func (u *updater) RunOnce(ctx context.Context) {
289289
err = u.inPlaceRateLimiter.Wait(ctx)
290290
if err != nil {
291291
klog.V(0).InfoS("In-place rate limiter wait failed for in-place resize", "error", err)
292+
metrics_updater.RecordFailedInPlaceUpdate(vpaSize, vpa.Name, vpa.Namespace, "InPlaceUpdateRateLimiterWaitFailed")
292293
return
293294
}
294295
err := inPlaceLimiter.InPlaceUpdate(pod, vpa, u.eventRecorder)
295296
if err != nil {
296297
klog.V(0).InfoS("In-place resize failed, falling back to eviction", "error", err, "pod", klog.KObj(pod))
297-
metrics_updater.RecordFailedInPlaceUpdate(vpaSize, "InPlaceUpdateError")
298+
metrics_updater.RecordFailedInPlaceUpdate(vpaSize, vpa.Name, vpa.Namespace, "InPlaceUpdateError")
298299
podsForEviction = append(podsForEviction, pod)
299300
continue
300301
}
301302
withInPlaceUpdated = true
302-
metrics_updater.AddInPlaceUpdatedPod(vpaSize)
303+
metrics_updater.AddInPlaceUpdatedPod(vpaSize, vpa.Name, vpa.Namespace)
303304
}
304305

305306
for _, pod := range podsForEviction {
@@ -310,16 +311,17 @@ func (u *updater) RunOnce(ctx context.Context) {
310311
err = u.evictionRateLimiter.Wait(ctx)
311312
if err != nil {
312313
klog.V(0).InfoS("Eviction rate limiter wait failed", "error", err)
314+
metrics_updater.RecordFailedEviction(vpaSize, vpa.Name, vpa.Namespace, updateMode, "EvictionRateLimiterWaitFailed")
313315
return
314316
}
315317
klog.V(2).InfoS("Evicting pod", "pod", klog.KObj(pod))
316318
evictErr := evictionLimiter.Evict(pod, vpa, u.eventRecorder)
317319
if evictErr != nil {
318320
klog.V(0).InfoS("Eviction failed", "error", evictErr, "pod", klog.KObj(pod))
319-
metrics_updater.RecordFailedEviction(vpaSize, updateMode, "EvictionError")
321+
metrics_updater.RecordFailedEviction(vpaSize, vpa.Name, vpa.Namespace, updateMode, "EvictionError")
320322
} else {
321323
withEvicted = true
322-
metrics_updater.AddEvictedPod(vpaSize, updateMode)
324+
metrics_updater.AddEvictedPod(vpaSize, vpa.Name, vpa.Namespace, updateMode)
323325
}
324326
}
325327

vertical-pod-autoscaler/pkg/utils/metrics/updater/updater.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ var (
6565
Namespace: metricsNamespace,
6666
Name: "evicted_pods_total",
6767
Help: "Number of Pods evicted by Updater to apply a new recommendation.",
68-
}, []string{"vpa_size_log2", "update_mode"},
68+
}, []string{"vpa_size_log2", "update_mode", "vpa_name", "vpa_namespace"},
6969
)
7070

7171
vpasWithEvictablePodsCount = prometheus.NewGaugeVec(
@@ -89,7 +89,7 @@ var (
8989
Namespace: metricsNamespace,
9090
Name: "failed_eviction_attempts_total",
9191
Help: "Number of failed attempts to update Pods by eviction",
92-
}, []string{"vpa_size_log2", "update_mode", "reason"},
92+
}, []string{"vpa_size_log2", "update_mode", "reason", "vpa_name", "vpa_namespace"},
9393
)
9494

9595
inPlaceUpdatableCount = prometheus.NewGaugeVec(
@@ -105,7 +105,7 @@ var (
105105
Namespace: metricsNamespace,
106106
Name: "in_place_updated_pods_total",
107107
Help: "Number of Pods updated in-place by Updater to apply a new recommendation.",
108-
}, []string{"vpa_size_log2"},
108+
}, []string{"vpa_size_log2", "vpa_name", "vpa_namespace"},
109109
)
110110

111111
vpasWithInPlaceUpdatablePodsCount = prometheus.NewGaugeVec(
@@ -129,7 +129,7 @@ var (
129129
Namespace: metricsNamespace,
130130
Name: "failed_in_place_update_attempts_total",
131131
Help: "Number of failed attempts to update Pods in-place.",
132-
}, []string{"vpa_size_log2", "reason"},
132+
}, []string{"vpa_size_log2", "reason", "vpa_name", "vpa_namespace"},
133133
)
134134

135135
functionLatency = metrics.CreateExecutionTimeMetric(metricsNamespace,
@@ -200,15 +200,15 @@ func NewVpasWithEvictedPodsCounter() *UpdateModeAndSizeBasedGauge {
200200
}
201201

202202
// AddEvictedPod increases the counter of pods evicted by Updater, by given VPA size
203-
func AddEvictedPod(vpaSize int, mode vpa_types.UpdateMode) {
203+
func AddEvictedPod(vpaSize int, vpaName string, vpaNamespace string, mode vpa_types.UpdateMode) {
204204
log2 := metrics.GetVpaSizeLog2(vpaSize)
205-
evictedCount.WithLabelValues(strconv.Itoa(log2), string(mode)).Inc()
205+
evictedCount.WithLabelValues(strconv.Itoa(log2), string(mode), vpaName, vpaNamespace).Inc()
206206
}
207207

208-
// RecordFailedEviction increases the counter of failed eviction attempts by given VPA size, update mode and reason
209-
func RecordFailedEviction(vpaSize int, mode vpa_types.UpdateMode, reason string) {
208+
// RecordFailedEviction increases the counter of failed eviction attempts by given VPA size, name, namespace, update mode and reason
209+
func RecordFailedEviction(vpaSize int, vpaName string, vpaNamespace string, mode vpa_types.UpdateMode, reason string) {
210210
log2 := metrics.GetVpaSizeLog2(vpaSize)
211-
failedEvictionAttempts.WithLabelValues(strconv.Itoa(log2), string(mode), reason).Inc()
211+
failedEvictionAttempts.WithLabelValues(strconv.Itoa(log2), string(mode), reason, vpaName, vpaNamespace).Inc()
212212
}
213213

214214
// NewInPlaceUpdatablePodsCounter returns a wrapper for counting Pods which are matching in-place update criteria
@@ -227,15 +227,15 @@ func NewVpasWithInPlaceUpdatedPodsCounter() *SizeBasedGauge {
227227
}
228228

229229
// AddInPlaceUpdatedPod increases the counter of pods updated in place by Updater, by given VPA size
230-
func AddInPlaceUpdatedPod(vpaSize int) {
230+
func AddInPlaceUpdatedPod(vpaSize int, vpaName string, vpaNamespace string) {
231231
log2 := metrics.GetVpaSizeLog2(vpaSize)
232-
inPlaceUpdatedCount.WithLabelValues(strconv.Itoa(log2)).Inc()
232+
inPlaceUpdatedCount.WithLabelValues(strconv.Itoa(log2), vpaName, vpaNamespace).Inc()
233233
}
234234

235-
// RecordFailedInPlaceUpdate increases the counter of failed in-place update attempts by given VPA size and reason
236-
func RecordFailedInPlaceUpdate(vpaSize int, reason string) {
235+
// RecordFailedInPlaceUpdate increases the counter of failed in-place update attempts by given VPA size, name, namespace and reason
236+
func RecordFailedInPlaceUpdate(vpaSize int, vpaName string, vpaNamespace string, reason string) {
237237
log2 := metrics.GetVpaSizeLog2(vpaSize)
238-
failedInPlaceUpdateAttempts.WithLabelValues(strconv.Itoa(log2), reason).Inc()
238+
failedInPlaceUpdateAttempts.WithLabelValues(strconv.Itoa(log2), reason, vpaName, vpaNamespace).Inc()
239239
}
240240

241241
// Add increases the counter for the given VPA size

vertical-pod-autoscaler/pkg/utils/metrics/updater/updater_test.go

Lines changed: 78 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -27,30 +27,36 @@ import (
2727

2828
func TestAddEvictedPod(t *testing.T) {
2929
testCases := []struct {
30-
desc string
31-
vpaSize int
32-
mode vpa_types.UpdateMode
33-
log2 string
30+
desc string
31+
vpaSize int
32+
mode vpa_types.UpdateMode
33+
log2 string
34+
vpaName string
35+
vpaNamespace string
3436
}{
3537
{
36-
desc: "VPA size 5, mode Auto",
37-
vpaSize: 5,
38-
mode: vpa_types.UpdateModeAuto,
39-
log2: "2",
38+
desc: "VPA size 5, mode Auto",
39+
vpaSize: 5,
40+
mode: vpa_types.UpdateModeAuto,
41+
log2: "2",
42+
vpaName: "vpa-5",
43+
vpaNamespace: "vpa-ns-5",
4044
},
4145
{
42-
desc: "VPA size 10, mode Off",
43-
vpaSize: 10,
44-
mode: vpa_types.UpdateModeOff,
45-
log2: "3",
46+
desc: "VPA size 10, mode Off",
47+
vpaSize: 10,
48+
mode: vpa_types.UpdateModeOff,
49+
log2: "3",
50+
vpaName: "vpa-10",
51+
vpaNamespace: "vpa-ns-10",
4652
},
4753
}
4854

4955
for _, tc := range testCases {
5056
t.Run(tc.desc, func(t *testing.T) {
5157
t.Cleanup(evictedCount.Reset)
52-
AddEvictedPod(tc.vpaSize, tc.mode)
53-
val := testutil.ToFloat64(evictedCount.WithLabelValues(tc.log2, string(tc.mode)))
58+
AddEvictedPod(tc.vpaSize, tc.vpaName, tc.vpaNamespace, tc.mode)
59+
val := testutil.ToFloat64(evictedCount.WithLabelValues(tc.log2, string(tc.mode), tc.vpaName, tc.vpaNamespace))
5460
if val != 1 {
5561
t.Errorf("Unexpected value for evictedCount metric with labels (%s, %s): got %v, want 1", tc.log2, string(tc.mode), val)
5662
}
@@ -60,31 +66,37 @@ func TestAddEvictedPod(t *testing.T) {
6066

6167
func TestRecordFailedEviction(t *testing.T) {
6268
testCases := []struct {
63-
desc string
64-
vpaSize int
65-
mode vpa_types.UpdateMode
66-
reason string
67-
log2 string
69+
desc string
70+
vpaSize int
71+
mode vpa_types.UpdateMode
72+
reason string
73+
log2 string
74+
vpaName string
75+
vpaNamespace string
6876
}{
6977
{
70-
desc: "VPA size 2, some reason",
71-
vpaSize: 2,
72-
reason: "some_reason",
73-
log2: "1",
78+
desc: "VPA size 2, some reason",
79+
vpaSize: 2,
80+
reason: "some_reason",
81+
log2: "1",
82+
vpaName: "vpa-2",
83+
vpaNamespace: "vpa-2-ns",
7484
},
7585
{
76-
desc: "VPA size 20, another reason",
77-
vpaSize: 20,
78-
reason: "another_reason",
79-
log2: "4",
86+
desc: "VPA size 20, another reason",
87+
vpaSize: 20,
88+
reason: "another_reason",
89+
log2: "4",
90+
vpaName: "vpa-20",
91+
vpaNamespace: "vpa-20-ns",
8092
},
8193
}
8294

8395
for _, tc := range testCases {
8496
t.Run(tc.desc, func(t *testing.T) {
8597
t.Cleanup(failedEvictionAttempts.Reset)
86-
RecordFailedEviction(tc.vpaSize, tc.mode, tc.reason)
87-
val := testutil.ToFloat64(failedEvictionAttempts.WithLabelValues(tc.log2, string(tc.mode), tc.reason))
98+
RecordFailedEviction(tc.vpaSize, tc.vpaName, tc.vpaNamespace, tc.mode, tc.reason)
99+
val := testutil.ToFloat64(failedEvictionAttempts.WithLabelValues(tc.log2, string(tc.mode), tc.reason, tc.vpaName, tc.vpaNamespace))
88100
if val != 1 {
89101
t.Errorf("Unexpected value for FailedEviction metric with labels (%s, %s): got %v, want 1", tc.log2, tc.reason, val)
90102
}
@@ -94,27 +106,33 @@ func TestRecordFailedEviction(t *testing.T) {
94106

95107
func TestAddInPlaceUpdatedPod(t *testing.T) {
96108
testCases := []struct {
97-
desc string
98-
vpaSize int
99-
log2 string
109+
desc string
110+
vpaSize int
111+
log2 string
112+
vpaName string
113+
vpaNamespace string
100114
}{
101115
{
102-
desc: "VPA size 10",
103-
vpaSize: 10,
104-
log2: "3",
116+
desc: "VPA size 10",
117+
vpaSize: 10,
118+
log2: "3",
119+
vpaName: "vpa-10",
120+
vpaNamespace: "vpa-ns-10",
105121
},
106122
{
107-
desc: "VPA size 1",
108-
vpaSize: 1,
109-
log2: "0",
123+
desc: "VPA size 1",
124+
vpaSize: 1,
125+
log2: "0",
126+
vpaName: "vpa-1",
127+
vpaNamespace: "vpa-ns-1",
110128
},
111129
}
112130

113131
for _, tc := range testCases {
114132
t.Run(tc.desc, func(t *testing.T) {
115133
t.Cleanup(inPlaceUpdatedCount.Reset)
116-
AddInPlaceUpdatedPod(tc.vpaSize)
117-
val := testutil.ToFloat64(inPlaceUpdatedCount.WithLabelValues(tc.log2))
134+
AddInPlaceUpdatedPod(tc.vpaSize, tc.vpaName, tc.vpaNamespace)
135+
val := testutil.ToFloat64(inPlaceUpdatedCount.WithLabelValues(tc.log2, tc.vpaName, tc.vpaNamespace))
118136
if val != 1 {
119137
t.Errorf("Unexpected value for InPlaceUpdatedPod metric with labels (%s): got %v, want 1", tc.log2, val)
120138
}
@@ -124,30 +142,36 @@ func TestAddInPlaceUpdatedPod(t *testing.T) {
124142

125143
func TestRecordFailedInPlaceUpdate(t *testing.T) {
126144
testCases := []struct {
127-
desc string
128-
vpaSize int
129-
reason string
130-
log2 string
145+
desc string
146+
vpaSize int
147+
reason string
148+
log2 string
149+
vpaName string
150+
vpaNamespace string
131151
}{
132152
{
133-
desc: "VPA size 2, some reason",
134-
vpaSize: 2,
135-
reason: "some_reason",
136-
log2: "1",
153+
desc: "VPA size 2, some reason",
154+
vpaSize: 2,
155+
reason: "some_reason",
156+
log2: "1",
157+
vpaName: "vpa-2",
158+
vpaNamespace: "vpa-2-ns",
137159
},
138160
{
139-
desc: "VPA size 20, another reason",
140-
vpaSize: 20,
141-
reason: "another_reason",
142-
log2: "4",
161+
desc: "VPA size 20, another reason",
162+
vpaSize: 20,
163+
reason: "another_reason",
164+
log2: "4",
165+
vpaName: "vpa-20",
166+
vpaNamespace: "vpa-20-ns",
143167
},
144168
}
145169

146170
for _, tc := range testCases {
147171
t.Run(tc.desc, func(t *testing.T) {
148172
t.Cleanup(failedInPlaceUpdateAttempts.Reset)
149-
RecordFailedInPlaceUpdate(tc.vpaSize, tc.reason)
150-
val := testutil.ToFloat64(failedInPlaceUpdateAttempts.WithLabelValues(tc.log2, tc.reason))
173+
RecordFailedInPlaceUpdate(tc.vpaSize, tc.vpaName, tc.vpaNamespace, tc.reason)
174+
val := testutil.ToFloat64(failedInPlaceUpdateAttempts.WithLabelValues(tc.log2, tc.reason, tc.vpaName, tc.vpaNamespace))
151175
if val != 1 {
152176
t.Errorf("Unexpected value for FailedInPlaceUpdate metric with labels (%s, %s): got %v, want 1", tc.log2, tc.reason, val)
153177
}

0 commit comments

Comments
 (0)