Skip to content

Commit b58fe51

Browse files
committed
Add memory policy support
Implement support for Linux memory policy in OCI spec PR: opencontainers/runtime-spec#1282 Signed-off-by: Antti Kervinen <[email protected]>
1 parent 9902a3d commit b58fe51

File tree

15 files changed

+489
-10
lines changed

15 files changed

+489
-10
lines changed

features.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ var featuresCommand = cli.Command{
5858
IntelRdt: &features.IntelRdt{
5959
Enabled: &t,
6060
},
61+
MemoryPolicy: &features.MemoryPolicy{
62+
Modes: specconv.KnownMemoryPolicyModes(),
63+
Flags: specconv.KnownMemoryPolicyFlags(),
64+
},
6165
MountExtensions: &features.MountExtensions{
6266
IDMap: &features.IDMap{
6367
Enabled: &t,

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ require (
1515
github.com/moby/sys/userns v0.1.0
1616
github.com/mrunalp/fileutils v0.5.1
1717
github.com/opencontainers/cgroups v0.0.4
18-
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67
18+
github.com/opencontainers/runtime-spec v1.2.2-0.20250804081626-bfdffd548aa6
1919
github.com/opencontainers/selinux v1.12.0
2020
github.com/seccomp/libseccomp-golang v0.11.0
2121
github.com/sirupsen/logrus v1.9.3

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm
4747
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
4848
github.com/opencontainers/cgroups v0.0.4 h1:XVj8P/IHVms/j+7eh8ggdkTLAxjz84ZzuFyGoE28DR4=
4949
github.com/opencontainers/cgroups v0.0.4/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs=
50-
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67 h1:Q+KewUGTMamIe6Q39xCD/T1NC1POmaTlWnhjikCrZHA=
51-
github.com/opencontainers/runtime-spec v1.2.2-0.20250401095657-e935f995dd67/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
50+
github.com/opencontainers/runtime-spec v1.2.2-0.20250804081626-bfdffd548aa6 h1:6S6r1L8VO9b1UfgIQi+nteqlElma9KDlzZw/nM3ctI0=
51+
github.com/opencontainers/runtime-spec v1.2.2-0.20250804081626-bfdffd548aa6/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
5252
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=
5353
github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U=
5454
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

internal/linux/linux.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package linux
22

33
import (
44
"os"
5+
"unsafe"
56

67
"golang.org/x/sys/unix"
78
)
@@ -72,3 +73,28 @@ func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
7273
})
7374
return os.NewSyscallError("sendmsg", err)
7475
}
76+
77+
func bitmaskFromInts(bits []int) []uint64 {
78+
maxBit := 0
79+
for _, bit := range bits {
80+
if bit > maxBit {
81+
maxBit = bit
82+
}
83+
}
84+
mask := make([]uint64, (maxBit/64)+1)
85+
for _, bit := range bits {
86+
mask[bit/64] |= (1 << (bit % 64))
87+
}
88+
return mask
89+
}
90+
91+
// SetMempolicy wraps set_mempolicy.
92+
func SetMempolicy(mode uint, nodes []int) error {
93+
nodemask := bitmaskFromInts(nodes)
94+
nodemaskPtr := unsafe.Pointer(&nodemask[0])
95+
_, _, errno := unix.Syscall(unix.SYS_SET_MEMPOLICY, uintptr(mode), uintptr(nodemaskPtr), uintptr(len(nodemask)*64))
96+
if errno != 0 {
97+
return os.NewSyscallError("set_mempolicy", errno)
98+
}
99+
return nil
100+
}

libcontainer/configs/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ type Config struct {
214214
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
215215
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
216216

217+
// MemoryPolicy specifies NUMA memory policy for the container.
218+
MemoryPolicy *LinuxMemoryPolicy `json:"memoryPolicy,omitempty"`
219+
217220
// RootlessEUID is set when the runc was launched with non-zero EUID.
218221
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
219222
// When RootlessEUID is set, runc creates a new userns for the container.

libcontainer/configs/memorypolicy.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package configs
2+
3+
// LinuxMemoryPolicy contains memory policy configuration.
4+
type LinuxMemoryPolicy struct {
5+
// Mode combines memory poliy mode and mode flags. Refer to
6+
// set_mempolicy() documentation for details.
7+
Mode uint
8+
// Contains NUMA nodes to which the mode applies.
9+
Nodes []int
10+
}

libcontainer/init_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,13 @@ func setupIOPriority(config *initConfig) error {
659659
return nil
660660
}
661661

662+
func setupMemoryPolicy(config *configs.Config) error {
663+
if config.MemoryPolicy == nil {
664+
return nil
665+
}
666+
return linux.SetMempolicy(config.MemoryPolicy.Mode, config.MemoryPolicy.Nodes)
667+
}
668+
662669
func setupPersonality(config *configs.Config) error {
663670
return system.SetLinuxPersonality(config.Personality.Domain)
664671
}

libcontainer/setns_init_linux.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ func (l *linuxSetnsInit) Init() error {
8080
if err := setupIOPriority(l.config); err != nil {
8181
return err
8282
}
83+
84+
if err := setupMemoryPolicy(l.config.Config); err != nil {
85+
return err
86+
}
87+
8388
// Tell our parent that we're ready to exec. This must be done before the
8489
// Seccomp rules have been applied, because we need to be able to read and
8590
// write to a socket.

libcontainer/specconv/spec_linux.go

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"path/filepath"
1111
"sort"
12+
"strconv"
1213
"strings"
1314
"sync"
1415
"time"
@@ -41,6 +42,14 @@ var (
4142
flag int
4243
}
4344
complexFlags map[string]func(*configs.Mount)
45+
mpolModeMap map[specs.MemoryPolicyModeType]uint
46+
mpolModeFMap map[specs.MemoryPolicyFlagType]uint
47+
)
48+
49+
const (
50+
// maxNumaNode is the maximum NUMA node number supported.
51+
// Must be large enough to cover what the kernel supports.
52+
maxNumaNode = 4095
4453
)
4554

4655
func initMaps() {
@@ -148,6 +157,22 @@ func initMaps() {
148157
m.IDMapping.Recursive = true
149158
},
150159
}
160+
161+
mpolModeMap = map[specs.MemoryPolicyModeType]uint{
162+
specs.MpolDefault: 0,
163+
specs.MpolPreferred: 1,
164+
specs.MpolBind: 2,
165+
specs.MpolInterleave: 3,
166+
specs.MpolLocal: 4,
167+
specs.MpolPreferredMany: 5,
168+
specs.MpolWeightedInterleave: 6,
169+
}
170+
171+
mpolModeFMap = map[specs.MemoryPolicyFlagType]uint{
172+
specs.MpolFStaticNodes: 1 << 15,
173+
specs.MpolFRelativeNodes: 1 << 14,
174+
specs.MpolFNumaBalancing: 1 << 13,
175+
}
151176
})
152177
}
153178

@@ -184,6 +209,30 @@ func KnownMountOptions() []string {
184209
return res
185210
}
186211

212+
// KnownMemoryPolicyModes returns the list of the known memory policy modes.
213+
// Used by `runc features`.
214+
func KnownMemoryPolicyModes() []string {
215+
initMaps()
216+
var res []string
217+
for k := range mpolModeMap {
218+
res = append(res, string(k))
219+
}
220+
sort.Strings(res)
221+
return res
222+
}
223+
224+
// KnownMemoryPolicyFlags returns the list of the known memory policy mode flags.
225+
// Used by `runc features`.
226+
func KnownMemoryPolicyFlags() []string {
227+
initMaps()
228+
var res []string
229+
for k := range mpolModeFMap {
230+
res = append(res, string(k))
231+
}
232+
sort.Strings(res)
233+
return res
234+
}
235+
187236
// AllowedDevices is the set of devices which are automatically included for
188237
// all containers.
189238
//
@@ -467,6 +516,28 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
467516
MemBwSchema: spec.Linux.IntelRdt.MemBwSchema,
468517
}
469518
}
519+
if !isEmptyMemoryPolicy(spec.Linux.MemoryPolicy) {
520+
var ok bool
521+
var err error
522+
specMp := spec.Linux.MemoryPolicy
523+
confMp := &configs.LinuxMemoryPolicy{}
524+
confMp.Mode, ok = mpolModeMap[specMp.Mode]
525+
if !ok {
526+
return nil, fmt.Errorf("invalid memory policy mode %q", specMp.Mode)
527+
}
528+
confMp.Nodes, err = parseListSet(specMp.Nodes, 0, maxNumaNode)
529+
if err != nil {
530+
return nil, fmt.Errorf("invalid memory policy nodes %q: %w", specMp.Nodes, err)
531+
}
532+
for _, specFlag := range specMp.Flags {
533+
confModeFlag, ok := mpolModeFMap[specFlag]
534+
if !ok {
535+
return nil, fmt.Errorf("invalid memory policy flag %q", specFlag)
536+
}
537+
confMp.Mode |= confModeFlag
538+
}
539+
config.MemoryPolicy = confMp
540+
}
470541
if spec.Linux.Personality != nil {
471542
if len(spec.Linux.Personality.Flags) > 0 {
472543
logrus.Warnf("ignoring unsupported personality flags: %+v because personality flag has not supported at this time", spec.Linux.Personality.Flags)
@@ -1135,6 +1206,67 @@ func parseMountOptions(options []string) *configs.Mount {
11351206
return &m
11361207
}
11371208

1209+
// parseListSet parses "list set" syntax ("0,61-63,2") into a list ([0, 61, 62, 63, 2]).
1210+
func parseListSet(listSet string, minValue, maxValue int) ([]int, error) {
1211+
var result []int
1212+
parts := strings.Split(listSet, ",")
1213+
for _, part := range parts {
1214+
switch {
1215+
case part == "":
1216+
continue
1217+
case strings.Contains(part, "-"):
1218+
rangeParts := strings.Split(part, "-")
1219+
if len(rangeParts) != 2 {
1220+
return nil, fmt.Errorf("invalid range: %s", part)
1221+
}
1222+
start, err := strconv.Atoi(rangeParts[0])
1223+
if err != nil {
1224+
return nil, err
1225+
}
1226+
end, err := strconv.Atoi(rangeParts[1])
1227+
if err != nil {
1228+
return nil, err
1229+
}
1230+
if start > end {
1231+
return nil, fmt.Errorf("invalid range %s: start > end", part)
1232+
}
1233+
if start < minValue || end > maxValue {
1234+
return nil, fmt.Errorf("invalid range %s: out of range %d-%d", part, minValue, maxValue)
1235+
}
1236+
for i := start; i <= end; i++ {
1237+
result = append(result, i)
1238+
}
1239+
default:
1240+
num, err := strconv.Atoi(part)
1241+
if err != nil {
1242+
return nil, err
1243+
}
1244+
if num < minValue || num > maxValue {
1245+
return nil, fmt.Errorf("invalid value %d: out of range %d-%d", num, minValue, maxValue)
1246+
}
1247+
result = append(result, num)
1248+
}
1249+
}
1250+
return result, nil
1251+
}
1252+
1253+
// isEmptyMemoryPolicy checks if a LinuxMemoryPolicy is empty (nil or all zero values).
1254+
func isEmptyMemoryPolicy(mp *specs.LinuxMemoryPolicy) bool {
1255+
if mp == nil {
1256+
return true
1257+
}
1258+
if mp.Mode != "" {
1259+
return false
1260+
}
1261+
if mp.Nodes != "" {
1262+
return false
1263+
}
1264+
if len(mp.Flags) > 0 {
1265+
return false
1266+
}
1267+
return true
1268+
}
1269+
11381270
func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) {
11391271
if config == nil {
11401272
return nil, nil

0 commit comments

Comments
 (0)