Skip to content

Commit d3d7f7d

Browse files
committed
libct/cg: improve cgroup removal logic
The current code is only doing retries in RemovePaths, which is only used for cgroup v1 (cgroup v2 uses RemovePath, which makes no retries). Let's remove all retry logic and logging from RemovePaths, together with: - os.Stat check from RemovePaths (its usage probably made sense before commit 19be8e5 but not after); - error/warning logging from RemovePaths (this was added by commit 19be8e5 in 2020 and so far we've seen no errors other than EBUSY, so reporting the actual error proved to be useless). Add the retry logic to rmdir, and the second retry bool argument. Decrease the initial delay and increase the number of retries from the old implementation so it can take up to ~1 sec before returning EBUSY (was about 0.3 sec). Hopefully, as a result, we'll have less "failed to remove cgroup paths" errors. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 29283bb commit d3d7f7d

File tree

1 file changed

+32
-42
lines changed

1 file changed

+32
-42
lines changed

libcontainer/cgroups/utils.go

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -217,79 +217,69 @@ func PathExists(path string) bool {
217217
return true
218218
}
219219

220-
func rmdir(path string) error {
220+
// rmdir tries to remove a directory, optionally retrying on EBUSY.
221+
func rmdir(path string, retry bool) error {
222+
delay := time.Millisecond
223+
tries := 10
224+
225+
again:
221226
err := unix.Rmdir(path)
222-
if err == nil || err == unix.ENOENT {
227+
switch err { // nolint:errorlint // unix errors are bare
228+
case nil, unix.ENOENT:
223229
return nil
230+
case unix.EINTR:
231+
goto again
232+
case unix.EBUSY:
233+
if retry && tries > 0 {
234+
time.Sleep(delay)
235+
delay *= 2
236+
tries--
237+
goto again
238+
239+
}
224240
}
225241
return &os.PathError{Op: "rmdir", Path: path, Err: err}
226242
}
227243

228244
// RemovePath aims to remove cgroup path. It does so recursively,
229245
// by removing any subdirectories (sub-cgroups) first.
230246
func RemovePath(path string) error {
231-
// try the fast path first
232-
if err := rmdir(path); err == nil {
247+
// Try the fast path first.
248+
if err := rmdir(path, false); err == nil {
233249
return nil
234250
}
235251

236252
infos, err := os.ReadDir(path)
237-
if err != nil {
238-
if os.IsNotExist(err) {
239-
err = nil
240-
}
253+
if err != nil && !os.IsNotExist(err) {
241254
return err
242255
}
243256
for _, info := range infos {
244257
if info.IsDir() {
245-
// We should remove subcgroups dir first
258+
// We should remove subcgroup first.
246259
if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
247260
break
248261
}
249262
}
250263
}
251264
if err == nil {
252-
err = rmdir(path)
265+
err = rmdir(path, true)
253266
}
254267
return err
255268
}
256269

257270
// RemovePaths iterates over the provided paths removing them.
258-
// We trying to remove all paths five times with increasing delay between tries.
259-
// If after all there are not removed cgroups - appropriate error will be
260-
// returned.
261271
func RemovePaths(paths map[string]string) (err error) {
262-
const retries = 5
263-
delay := 10 * time.Millisecond
264-
for i := 0; i < retries; i++ {
265-
if i != 0 {
266-
time.Sleep(delay)
267-
delay *= 2
268-
}
269-
for s, p := range paths {
270-
if err := RemovePath(p); err != nil {
271-
// do not log intermediate iterations
272-
switch i {
273-
case 0:
274-
logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)")
275-
case retries - 1:
276-
logrus.WithError(err).Error("Failed to remove cgroup")
277-
}
278-
}
279-
_, err := os.Stat(p)
280-
// We need this strange way of checking cgroups existence because
281-
// RemoveAll almost always returns error, even on already removed
282-
// cgroups
283-
if os.IsNotExist(err) {
284-
delete(paths, s)
285-
}
286-
}
287-
if len(paths) == 0 {
288-
//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
289-
paths = make(map[string]string)
290-
return nil
272+
for s, p := range paths {
273+
if err := RemovePath(p); err == nil {
274+
delete(paths, s)
291275
}
292276
}
277+
if len(paths) == 0 {
278+
//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
279+
// TODO: switch to clear once Go < 1.21 is not supported.
280+
paths = make(map[string]string)
281+
return nil
282+
}
293283
return fmt.Errorf("Failed to remove paths: %v", paths)
294284
}
295285

0 commit comments

Comments
 (0)