@@ -16,6 +16,7 @@ import (
1616 "strconv"
1717 "strings"
1818 "sync"
19+ "syscall"
1920 "time"
2021
2122 "github.com/opencontainers/runtime-spec/specs-go"
@@ -310,18 +311,78 @@ func (p *setnsProcess) addIntoCgroupV2() error {
310311}
311312
312313func (p * setnsProcess ) addIntoCgroup () error {
314+ if p .cmd .SysProcAttr .UseCgroupFD {
315+ // We've used cgroupfd successfully, so the process is
316+ // already in the proper cgroup, nothing to do here.
317+ return nil
318+ }
313319 if cgroups .IsCgroup2UnifiedMode () {
314320 return p .addIntoCgroupV2 ()
315321 }
316322 return p .addIntoCgroupV1 ()
317323}
318324
325+ // prepareCgroupFD sets up p.cmd to use clone3 with CLONE_INTO_CGROUP
326+ // to join cgroup early, in p.cmd.Start. Returns an *os.File which
327+ // must be closed by the caller after p.Cmd.Start return.
328+ func (p * setnsProcess ) prepareCgroupFD () (* os.File , error ) {
329+ if ! cgroups .IsCgroup2UnifiedMode () {
330+ return nil , nil
331+ }
332+
333+ base := p .manager .Path ("" )
334+ if base == "" { // No cgroup to join.
335+ return nil , nil
336+ }
337+ sub := ""
338+ if p .process .SubCgroupPaths != nil {
339+ sub = p .process .SubCgroupPaths ["" ]
340+ }
341+ cgroup := path .Join (base , sub )
342+ if ! strings .HasPrefix (cgroup , base ) {
343+ return nil , fmt .Errorf ("bad sub cgroup path: %s" , sub )
344+ }
345+
346+ fd , err := cgroups .OpenFile (base , sub , unix .O_PATH | unix .O_DIRECTORY | unix .O_CLOEXEC )
347+ if err != nil {
348+ if p .rootlessCgroups {
349+ return nil , nil
350+ }
351+ return nil , fmt .Errorf ("can't open cgroup: %w" , err )
352+ }
353+
354+ logrus .Debugf ("using CLONE_INTO_CGROUP %q" , cgroup )
355+ if p .cmd .SysProcAttr == nil {
356+ p .cmd .SysProcAttr = & syscall.SysProcAttr {}
357+ }
358+ p .cmd .SysProcAttr .UseCgroupFD = true
359+ p .cmd .SysProcAttr .CgroupFD = int (fd .Fd ())
360+
361+ return fd , nil
362+ }
363+
319364func (p * setnsProcess ) start () (retErr error ) {
320365 defer p .comm .closeParent ()
321366
367+ fd , err := p .prepareCgroupFD ()
368+ if err != nil {
369+ return err
370+ }
371+
322372 // Get the "before" value of oom kill count.
323373 oom , _ := p .manager .OOMKillCount ()
324- err := p .startWithCPUAffinity ()
374+
375+ err = p .startWithCPUAffinity ()
376+ if fd != nil {
377+ fd .Close ()
378+ }
379+ if err != nil && p .cmd .SysProcAttr .UseCgroupFD {
380+ logrus .Debugf ("exec with CLONE_INTO_CGROUP failed: %v; retrying without" , err )
381+ // SysProcAttr.CgroupFD is never used when UseCgroupFD is unset.
382+ p .cmd .SysProcAttr .UseCgroupFD = false
383+ err = p .startWithCPUAffinity ()
384+ }
385+
325386 // Close the child-side of the pipes (controlled by child).
326387 p .comm .closeChild ()
327388 if err != nil {
0 commit comments