Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
if intelrdt.IsCMTEnabled() {
s.IntelRdt.CMTStats = is.CMTStats
}

s.IntelRdt.Schemata = is.Schemata
}

s.NetworkInterfaces = ls.Interfaces
Expand Down
7 changes: 6 additions & 1 deletion features.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,12 @@ var featuresCommand = cli.Command{
Enabled: &t,
},
IntelRdt: &features.IntelRdt{
Enabled: &t,
Enabled: &t,
Schemata: &t,
},
MemoryPolicy: &features.MemoryPolicy{
Modes: specconv.KnownMemoryPolicyModes(),
Flags: specconv.KnownMemoryPolicyFlags(),
},
MountExtensions: &features.MountExtensions{
IDMap: &features.IDMap{
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ require (
github.com/moby/sys/userns v0.1.0
github.com/mrunalp/fileutils v0.5.1
github.com/opencontainers/cgroups v0.0.4
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0
github.com/opencontainers/selinux v1.12.0
github.com/seccomp/libseccomp-golang v0.11.1
github.com/sirupsen/logrus v1.9.3
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/opencontainers/cgroups v0.0.4 h1:XVj8P/IHVms/j+7eh8ggdkTLAxjz84ZzuFyGoE28DR4=
github.com/opencontainers/cgroups v0.0.4/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs=
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67 h1:Q+KewUGTMamIe6Q39xCD/T1NC1POmaTlWnhjikCrZHA=
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0 h1:RLn0YfUWkiqPGtgUANvJrcjIkCHGRl3jcz/c557M28M=
github.com/opencontainers/runtime-spec v1.2.2-0.20250818071321-383cadbf08c0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=
github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand Down
13 changes: 13 additions & 0 deletions internal/linux/linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package linux

import (
"os"
"unsafe"

"golang.org/x/sys/unix"
)
Expand Down Expand Up @@ -72,3 +73,15 @@ func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
})
return os.NewSyscallError("sendmsg", err)
}

// SetMempolicy wraps set_mempolicy.
func SetMempolicy(mode uint, mask *unix.CPUSet) error {
err := retryOnEINTR(func() error {
_, _, errno := unix.Syscall(unix.SYS_SET_MEMPOLICY, uintptr(mode), uintptr(unsafe.Pointer(mask)), unsafe.Sizeof(*mask)*8)
if errno != 0 {
return errno
}
return nil
})
return os.NewSyscallError("set_mempolicy", err)
}
12 changes: 8 additions & 4 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ type Config struct {
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`

// MemoryPolicy specifies NUMA memory policy for the container.
MemoryPolicy *LinuxMemoryPolicy `json:"memory_policy,omitempty"`

// RootlessEUID is set when the runc was launched with non-zero EUID.
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
// When RootlessEUID is set, runc creates a new userns for the container.
Expand Down Expand Up @@ -305,7 +308,8 @@ type CPUAffinity struct {
Initial, Final *unix.CPUSet
}

func toCPUSet(str string) (*unix.CPUSet, error) {
// ToCPUSet parses a string in list format into a unix.CPUSet, e.g. "0-3,5,7-9".
func ToCPUSet(str string) (*unix.CPUSet, error) {
if str == "" {
return nil, nil
}
Expand Down Expand Up @@ -356,7 +360,7 @@ func toCPUSet(str string) (*unix.CPUSet, error) {
}
}
if s.Count() == 0 {
return nil, fmt.Errorf("no CPUs found in %q", str)
return nil, fmt.Errorf("no members found in set %q", str)
}

return s, nil
Expand All @@ -367,11 +371,11 @@ func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
if sa == nil {
return nil, nil
}
initial, err := toCPUSet(sa.Initial)
initial, err := ToCPUSet(sa.Initial)
if err != nil {
return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
}
final, err := toCPUSet(sa.Final)
final, err := ToCPUSet(sa.Final)
if err != nil {
return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
}
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/configs/intelrdt.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ type IntelRdt struct {
// The identity for RDT Class of Service
ClosID string `json:"closID,omitempty"`

// Schemata is a generic field to specify schemata file in the resctrl
// filesystem. Each element represents one line written to the schemata file.
Schemata []string `json:"schemata,omitempty"`

// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
Expand Down
31 changes: 31 additions & 0 deletions libcontainer/configs/memorypolicy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package configs

import "golang.org/x/sys/unix"

// Memory policy modes and flags as defined in /usr/include/linux/mempolicy.h

//nolint:revive,staticcheck,nolintlint // ignore ALL_CAPS errors in consts from numaif.h, will match unix.* in the future
const (
MPOL_DEFAULT = 0
MPOL_PREFERRED = 1
MPOL_BIND = 2
MPOL_INTERLEAVE = 3
MPOL_LOCAL = 4
MPOL_PREFERRED_MANY = 5
MPOL_WEIGHTED_INTERLEAVE = 6

MPOL_F_STATIC_NODES = 1 << 15
MPOL_F_RELATIVE_NODES = 1 << 14
MPOL_F_NUMA_BALANCING = 1 << 13
)

// LinuxMemoryPolicy contains memory policy configuration.
type LinuxMemoryPolicy struct {
// Mode specifies memory policy mode without mode flags. See
// set_mempolicy() documentation for details.
Mode uint `json:"mode,omitempty"`
// Flags contains mode flags.
Flags uint `json:"flags,omitempty"`
// Nodes contains NUMA nodes to which the mode applies.
Nodes *unix.CPUSet `json:"nodes,omitempty"`
}
4 changes: 2 additions & 2 deletions libcontainer/configs/tocpuset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ func TestToCPUSet(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.in, func(t *testing.T) {
out, err := toCPUSet(tc.in)
t.Logf("toCPUSet(%q) = %v (error: %v)", tc.in, out, err)
out, err := ToCPUSet(tc.in)
t.Logf("ToCPUSet(%q) = %v (error: %v)", tc.in, out, err)
// Check the error.
if tc.isErr {
if err == nil {
Expand Down
24 changes: 24 additions & 0 deletions libcontainer/configs/validate/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func Validate(config *configs.Config) error {
mountsStrict,
scheduler,
ioPriority,
memoryPolicy,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -482,3 +483,26 @@ func ioPriority(config *configs.Config) error {

return nil
}

func memoryPolicy(config *configs.Config) error {
mpol := config.MemoryPolicy
if mpol == nil {
return nil
}
switch mpol.Mode {
case configs.MPOL_DEFAULT, configs.MPOL_LOCAL:
if mpol.Nodes != nil && mpol.Nodes.Count() != 0 {
return fmt.Errorf("memory policy mode requires 0 nodes but got %d", mpol.Nodes.Count())
}
case configs.MPOL_BIND, configs.MPOL_INTERLEAVE,
configs.MPOL_PREFERRED_MANY, configs.MPOL_WEIGHTED_INTERLEAVE:
if mpol.Nodes == nil || mpol.Nodes.Count() == 0 {
return fmt.Errorf("memory policy mode requires at least one node but got 0")
}
case configs.MPOL_PREFERRED:
// Zero or more nodes are allowed by the kernel.
default:
return fmt.Errorf("invalid memory policy mode: %d", mpol.Mode)
}
return nil
}
8 changes: 8 additions & 0 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,14 @@ func setupIOPriority(config *initConfig) error {
return nil
}

func setupMemoryPolicy(config *configs.Config) error {
mpol := config.MemoryPolicy
if mpol == nil {
return nil
}
return linux.SetMempolicy(mpol.Mode|mpol.Flags, config.MemoryPolicy.Nodes)
}

func setupPersonality(config *configs.Config) error {
return system.SetLinuxPersonality(config.Personality.Domain)
}
Expand Down
47 changes: 14 additions & 33 deletions libcontainer/intelrdt/intelrdt.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,16 +326,6 @@ func getIntelRdtParamString(path, file string) (string, error) {
return string(bytes.TrimSpace(contents)), nil
}

func writeFile(dir, file, data string) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
if err := os.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil {
return newLastCmdError(fmt.Errorf("intelrdt: unable to write %v: %w", data, err))
}
return nil
}

// Get the read-only L3 cache information
func getL3CacheInfo() (*L3CacheInfo, error) {
l3CacheInfo := &L3CacheInfo{}
Expand Down Expand Up @@ -462,11 +452,11 @@ func (m *Manager) Apply(pid int) (err error) {
m.mu.Lock()
defer m.mu.Unlock()

if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" {
if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" && len(m.config.IntelRdt.Schemata) == 0 {
// Check that the CLOS exists, i.e. it has been pre-configured to
// conform with the runtime spec
if _, err := os.Stat(path); err != nil {
return fmt.Errorf("clos dir not accessible (must be pre-created when l3CacheSchema and memBwSchema are empty): %w", err)
return fmt.Errorf("clos dir not accessible (must be pre-created when schemata, l3CacheSchema and memBwSchema are empty): %w", err)
}
}

Expand Down Expand Up @@ -534,6 +524,8 @@ func (m *Manager) GetStats() (*Stats, error) {
}
schemaStrings := strings.Split(tmpStrings, "\n")

stats.Schemata = schemaStrings

if IsCATEnabled() {
// The read-only L3 cache information
l3CacheInfo, err := getL3CacheInfo()
Expand Down Expand Up @@ -637,35 +629,24 @@ func (m *Manager) Set(container *configs.Config) error {
// For example, on a two-socket machine, the schema line could be
// "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on
// socket 0 and 7000 MBps memory bandwidth limit on socket 1.
if container.IntelRdt != nil {
path := m.GetPath()
l3CacheSchema := container.IntelRdt.L3CacheSchema
memBwSchema := container.IntelRdt.MemBwSchema

if r := container.IntelRdt; r != nil {
// TODO: verify that l3CacheSchema and/or memBwSchema match the
// existing schemata if ClosID has been specified. This is a more
// involved than reading the file and doing plain string comparison as
// the value written in does not necessarily match what gets read out
// (leading zeros, cache id ordering etc).

// Write a single joint schema string to schemata file
if l3CacheSchema != "" && memBwSchema != "" {
if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
return err
}
}

// Write only L3 cache schema string to schemata file
if l3CacheSchema != "" && memBwSchema == "" {
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
return err
var schemata strings.Builder
for _, s := range append([]string{r.L3CacheSchema, r.MemBwSchema}, r.Schemata...) {
if s != "" {
schemata.WriteString(s)
schemata.WriteString("\n")
}
}

// Write only memory bandwidth schema string to schemata file
if l3CacheSchema == "" && memBwSchema != "" {
if err := writeFile(path, "schemata", memBwSchema); err != nil {
return err
if schemata.Len() > 0 {
path := filepath.Join(m.GetPath(), "schemata")
if err := os.WriteFile(path, []byte(schemata.String()), 0o600); err != nil {
return newLastCmdError(fmt.Errorf("intelrdt: unable to write %q: %w", schemata.String(), err))
}
}
}
Expand Down
Loading