Skip to content

Commit b259f89

Browse files
authored
test: fix metrics flake in TestSurfaceFightError (#1812)
The previous change fixed the flakiness with the RootSync status validation, but now the metrics validation is still flaky because the async status updates may stop before the expected number of fight metrics are emitted. This change validates the RootSync status and metric in parallel and only stops the async status updates once both conditions succeed/timeout. Also adds an initial assertion to verify there are no fight errors at the beginning.
1 parent 894c374 commit b259f89

File tree

1 file changed

+28
-21
lines changed

1 file changed

+28
-21
lines changed

e2e/testcases/remediator_test.go

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"kpt.dev/configsync/e2e/nomostest"
2424
"kpt.dev/configsync/e2e/nomostest/metrics"
25+
"kpt.dev/configsync/e2e/nomostest/taskgroup"
2526
nomostesting "kpt.dev/configsync/e2e/nomostest/testing"
2627
"kpt.dev/configsync/pkg/api/configsync"
2728
"kpt.dev/configsync/pkg/core"
@@ -45,9 +46,23 @@ func TestSurfaceFightError(t *testing.T) {
4546
nt.Must(rootSyncGitRepo.CommitAndPush("Add Namespace and RoleBinding"))
4647
nt.Must(nt.WatchForAllSyncs())
4748

49+
rootSyncNN := nomostest.RootSyncNN(configsync.RootSyncName)
50+
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
51+
if err != nil {
52+
nt.T.Fatal(err)
53+
}
54+
commitHash := rootSyncGitRepo.MustHash(nt.T)
55+
nt.T.Log("Validate there are no error metrics initially")
56+
nt.Must(nomostest.ValidateMetrics(nt,
57+
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{})))
58+
4859
// Make the # of updates exceed the fightThreshold defined in pkg/syncer/reconcile/fight_detector.go
4960
ctx, cancel := context.WithCancel(t.Context())
61+
nt.T.Cleanup(func() {
62+
cancel() // cancel in case an assertion failed early
63+
})
5064
go func() {
65+
nt.T.Log("Initialize async status updates")
5166
for {
5267
select {
5368
case <-ctx.Done():
@@ -62,28 +77,20 @@ func TestSurfaceFightError(t *testing.T) {
6277
}()
6378

6479
nt.T.Log("The RootSync reports a fight error")
65-
err := nt.Watcher.WatchForRootSyncSyncError(configsync.RootSyncName, status.FightErrorCode,
66-
"This may indicate Config Sync is fighting with another controller over the object.", nil)
67-
cancel()
68-
if err != nil {
69-
nt.T.Fatal(err)
70-
}
71-
72-
rootSyncNN := nomostest.RootSyncNN(configsync.RootSyncName)
73-
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
74-
if err != nil {
75-
nt.T.Fatal(err)
76-
}
77-
commitHash := rootSyncGitRepo.MustHash(nt.T)
78-
79-
err = nomostest.ValidateMetrics(nt,
80-
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
81-
Fights: 5,
82-
}))
83-
if err != nil {
84-
nt.T.Fatal(err)
85-
}
80+
tg := taskgroup.New()
81+
tg.Go(func() error {
82+
return nt.Watcher.WatchForRootSyncSyncError(configsync.RootSyncName, status.FightErrorCode,
83+
"This may indicate Config Sync is fighting with another controller over the object.", nil)
84+
})
85+
tg.Go(func() error {
86+
return nomostest.ValidateMetrics(nt,
87+
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
88+
Fights: 5, // Validate at least 5 - increases with remediation attempts
89+
}))
90+
})
91+
nt.Must(tg.Wait())
8692

93+
cancel() // Stop async status updates
8794
nt.T.Log("The fight error should be auto-resolved if no more fights")
8895
nt.Must(nt.WatchForAllSyncs())
8996
}

0 commit comments

Comments
 (0)