Skip to content

Commit 7120976

Browse files
committed
Implement none, private, and shareable ipc modes
Since the commit d88fe44 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at moby#34087 (review)] [v3: addressed review at moby#34087 (review)] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 0fb1fb1 commit 7120976

19 files changed

+246
-112
lines changed

api/swagger.yaml

+11-1
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,17 @@ definitions:
665665
type: "string"
666666
IpcMode:
667667
type: "string"
668-
description: "IPC namespace to use for the container."
668+
description: |
669+
IPC sharing mode for the container. Possible values are:
670+
671+
- `"none"`: own private IPC namespace, with /dev/shm not mounted
672+
- `"private"`: own private IPC namespace
673+
- `"shareable"`: own private IPC namespace, with a possibility to share it with other containers
674+
- `"container:<name|id>"`: join another (shareable) container's IPC namespace
675+
- `"host"`: use the host system's IPC namespace
676+
677+
If not specified, daemon default is used, which can either be `"private"`
678+
or `"shareable"`, depending on daemon version and configuration.
669679
Cgroup:
670680
type: "string"
671681
description: "Cgroup to use for the container."

api/types/container/host_config.go

+22-17
Original file line numberDiff line numberDiff line change
@@ -23,41 +23,46 @@ func (i Isolation) IsDefault() bool {
2323
// IpcMode represents the container ipc stack.
2424
type IpcMode string
2525

26-
// IsPrivate indicates whether the container uses its private ipc stack.
26+
// IsPrivate indicates whether the container uses its own private ipc namespace which can not be shared.
2727
func (n IpcMode) IsPrivate() bool {
28-
return !(n.IsHost() || n.IsContainer())
28+
return n == "private"
2929
}
3030

31-
// IsHost indicates whether the container uses the host's ipc stack.
31+
// IsHost indicates whether the container shares the host's ipc namespace.
3232
func (n IpcMode) IsHost() bool {
3333
return n == "host"
3434
}
3535

36-
// IsContainer indicates whether the container uses a container's ipc stack.
36+
// IsShareable indicates whether the container's ipc namespace can be shared with another container.
37+
func (n IpcMode) IsShareable() bool {
38+
return n == "shareable"
39+
}
40+
41+
// IsContainer indicates whether the container uses another container's ipc namespace.
3742
func (n IpcMode) IsContainer() bool {
3843
parts := strings.SplitN(string(n), ":", 2)
3944
return len(parts) > 1 && parts[0] == "container"
4045
}
4146

42-
// Valid indicates whether the ipc stack is valid.
47+
// IsNone indicates whether container IpcMode is set to "none".
48+
func (n IpcMode) IsNone() bool {
49+
return n == "none"
50+
}
51+
52+
// IsEmpty indicates whether container IpcMode is empty
53+
func (n IpcMode) IsEmpty() bool {
54+
return n == ""
55+
}
56+
57+
// Valid indicates whether the ipc mode is valid.
4358
func (n IpcMode) Valid() bool {
44-
parts := strings.Split(string(n), ":")
45-
switch mode := parts[0]; mode {
46-
case "", "host":
47-
case "container":
48-
if len(parts) != 2 || parts[1] == "" {
49-
return false
50-
}
51-
default:
52-
return false
53-
}
54-
return true
59+
return n.IsEmpty() || n.IsNone() || n.IsPrivate() || n.IsHost() || n.IsShareable() || n.IsContainer()
5560
}
5661

5762
// Container returns the name of the container ipc stack is going to be used.
5863
func (n IpcMode) Container() string {
5964
parts := strings.SplitN(string(n), ":", 2)
60-
if len(parts) > 1 {
65+
if len(parts) > 1 && parts[0] == "container" {
6166
return parts[1]
6267
}
6368
return ""

cmd/dockerd/config_unix.go

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
4747
flags.StringVar(&conf.SeccompProfile, "seccomp-profile", "", "Path to seccomp profile")
4848
flags.Var(&conf.ShmSize, "default-shm-size", "Default shm size for containers")
4949
flags.BoolVar(&conf.NoNewPrivileges, "no-new-privileges", false, "Set no-new-privileges by default for new containers")
50+
flags.StringVar(&conf.IpcMode, "default-ipc-mode", config.DefaultIpcMode, `Default mode for containers ipc ("shareable" | "private")`)
5051

5152
attachExperimentalFlags(conf, flags)
5253
}

container/container_unix.go

+31-31
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"io/ioutil"
88
"os"
99
"path/filepath"
10-
"strings"
1110

1211
"github.com/docker/docker/api/types"
1312
containertypes "github.com/docker/docker/api/types/container"
@@ -19,6 +18,7 @@ import (
1918
"github.com/docker/docker/pkg/system"
2019
"github.com/docker/docker/volume"
2120
"github.com/opencontainers/selinux/go-selinux/label"
21+
"github.com/pkg/errors"
2222
"github.com/sirupsen/logrus"
2323
"golang.org/x/sys/unix"
2424
)
@@ -171,48 +171,48 @@ func (container *Container) HasMountFor(path string) bool {
171171
return exists
172172
}
173173

174-
// UnmountIpcMounts uses the provided unmount function to unmount shm and mqueue if they were mounted
175-
func (container *Container) UnmountIpcMounts(unmount func(pth string) error) {
176-
if container.HostConfig.IpcMode.IsContainer() || container.HostConfig.IpcMode.IsHost() {
177-
return
174+
// UnmountIpcMount uses the provided unmount function to unmount shm if it was mounted
175+
func (container *Container) UnmountIpcMount(unmount func(pth string) error) error {
176+
if container.HasMountFor("/dev/shm") {
177+
return nil
178178
}
179179

180-
var warnings []string
181-
182-
if !container.HasMountFor("/dev/shm") {
183-
shmPath, err := container.ShmResourcePath()
184-
if err != nil {
185-
logrus.Error(err)
186-
warnings = append(warnings, err.Error())
187-
} else if shmPath != "" {
188-
if err := unmount(shmPath); err != nil && !os.IsNotExist(err) {
189-
if mounted, mErr := mount.Mounted(shmPath); mounted || mErr != nil {
190-
warnings = append(warnings, fmt.Sprintf("failed to umount %s: %v", shmPath, err))
191-
}
192-
}
193-
194-
}
180+
// container.ShmPath should not be used here as it may point
181+
// to the host's or other container's /dev/shm
182+
shmPath, err := container.ShmResourcePath()
183+
if err != nil {
184+
return err
195185
}
196-
197-
if len(warnings) > 0 {
198-
logrus.Warnf("failed to cleanup ipc mounts:\n%v", strings.Join(warnings, "\n"))
186+
if shmPath == "" {
187+
return nil
199188
}
189+
if err = unmount(shmPath); err != nil && !os.IsNotExist(err) {
190+
if mounted, mErr := mount.Mounted(shmPath); mounted || mErr != nil {
191+
return errors.Wrapf(err, "umount %s", shmPath)
192+
}
193+
}
194+
return nil
200195
}
201196

202197
// IpcMounts returns the list of IPC mounts
203198
func (container *Container) IpcMounts() []Mount {
204199
var mounts []Mount
205200

206-
if !container.HasMountFor("/dev/shm") {
207-
label.SetFileLabel(container.ShmPath, container.MountLabel)
208-
mounts = append(mounts, Mount{
209-
Source: container.ShmPath,
210-
Destination: "/dev/shm",
211-
Writable: true,
212-
Propagation: string(volume.DefaultPropagationMode),
213-
})
201+
if container.HasMountFor("/dev/shm") {
202+
return mounts
203+
}
204+
if container.ShmPath == "" {
205+
return mounts
214206
}
215207

208+
label.SetFileLabel(container.ShmPath, container.MountLabel)
209+
mounts = append(mounts, Mount{
210+
Source: container.ShmPath,
211+
Destination: "/dev/shm",
212+
Writable: true,
213+
Propagation: string(volume.DefaultPropagationMode),
214+
})
215+
216216
return mounts
217217
}
218218

container/container_windows.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ type ExitStatus struct {
2424
ExitCode int
2525
}
2626

27-
// UnmountIpcMounts unmounts Ipc related mounts.
27+
// UnmountIpcMount unmounts Ipc related mounts.
2828
// This is a NOOP on windows.
29-
func (container *Container) UnmountIpcMounts(unmount func(pth string) error) {
29+
func (container *Container) UnmountIpcMount(unmount func(pth string) error) error {
30+
return nil
3031
}
3132

3233
// IpcMounts returns the list of Ipc related mounts.

daemon/config/config.go

+5
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,11 @@ func Validate(config *Config) error {
513513
}
514514
}
515515

516+
// validate platform-specific settings
517+
if err := config.ValidatePlatformConfig(); err != nil {
518+
return err
519+
}
520+
516521
return nil
517522
}
518523

daemon/config/config_solaris.go

+5
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,8 @@ type BridgeConfig struct {
2727
func (conf *Config) IsSwarmCompatible() error {
2828
return nil
2929
}
30+
31+
// ValidatePlatformConfig checks if any platform-specific configuration settings are invalid.
32+
func (conf *Config) ValidatePlatformConfig() error {
33+
return nil
34+
}

daemon/config/config_unix.go

+25
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@ package config
55
import (
66
"fmt"
77

8+
containertypes "github.com/docker/docker/api/types/container"
89
"github.com/docker/docker/opts"
910
units "github.com/docker/go-units"
1011
)
1112

13+
const (
14+
// DefaultIpcMode is default for container's IpcMode, if not set otherwise
15+
DefaultIpcMode = "shareable" // TODO: change to private
16+
)
17+
1218
// Config defines the configuration of a docker daemon.
1319
// It includes json tags to deserialize configuration from a file
1420
// using the same names that the flags in the command line uses.
@@ -31,6 +37,7 @@ type Config struct {
3137
SeccompProfile string `json:"seccomp-profile,omitempty"`
3238
ShmSize opts.MemBytes `json:"default-shm-size,omitempty"`
3339
NoNewPrivileges bool `json:"no-new-privileges,omitempty"`
40+
IpcMode string `json:"default-ipc-mode,omitempty"`
3441
}
3542

3643
// BridgeConfig stores all the bridge driver specific
@@ -61,3 +68,21 @@ func (conf *Config) IsSwarmCompatible() error {
6168
}
6269
return nil
6370
}
71+
72+
func verifyDefaultIpcMode(mode string) error {
73+
const hint = "Use \"shareable\" or \"private\"."
74+
75+
dm := containertypes.IpcMode(mode)
76+
if !dm.Valid() {
77+
return fmt.Errorf("Default IPC mode setting (%v) is invalid. "+hint, dm)
78+
}
79+
if dm != "" && !dm.IsPrivate() && !dm.IsShareable() {
80+
return fmt.Errorf("IPC mode \"%v\" is not supported as default value. "+hint, dm)
81+
}
82+
return nil
83+
}
84+
85+
// ValidatePlatformConfig checks if any platform-specific configuration settings are invalid.
86+
func (conf *Config) ValidatePlatformConfig() error {
87+
return verifyDefaultIpcMode(conf.IpcMode)
88+
}

daemon/config/config_windows.go

+5
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,8 @@ func (conf *Config) GetExecRoot() string {
5050
func (conf *Config) IsSwarmCompatible() error {
5151
return nil
5252
}
53+
54+
// ValidatePlatformConfig checks if any platform-specific configuration settings are invalid.
55+
func (conf *Config) ValidatePlatformConfig() error {
56+
return nil
57+
}

daemon/container_operations_unix.go

+40-15
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,27 @@ func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]s
5757
return env, nil
5858
}
5959

60-
func (daemon *Daemon) getIpcContainer(container *container.Container) (*container.Container, error) {
61-
containerID := container.HostConfig.IpcMode.Container()
62-
container, err := daemon.GetContainer(containerID)
60+
func (daemon *Daemon) getIpcContainer(id string) (*container.Container, error) {
61+
errMsg := "can't join IPC of container " + id
62+
// Check the container exists
63+
container, err := daemon.GetContainer(id)
6364
if err != nil {
64-
return nil, errors.Wrapf(err, "cannot join IPC of a non running container: %s", container.ID)
65+
return nil, errors.Wrap(err, errMsg)
6566
}
66-
return container, daemon.checkContainer(container, containerIsRunning, containerIsNotRestarting)
67+
// Check the container is running and not restarting
68+
if err := daemon.checkContainer(container, containerIsRunning, containerIsNotRestarting); err != nil {
69+
return nil, errors.Wrap(err, errMsg)
70+
}
71+
// Check the container ipc is shareable
72+
if st, err := os.Stat(container.ShmPath); err != nil || !st.IsDir() {
73+
if err == nil || os.IsNotExist(err) {
74+
return nil, errors.New(errMsg + ": non-shareable IPC")
75+
}
76+
// stat() failed?
77+
return nil, errors.Wrap(err, errMsg+": unexpected error from stat "+container.ShmPath)
78+
}
79+
80+
return container, nil
6781
}
6882

6983
func (daemon *Daemon) getPidContainer(container *container.Container) (*container.Container, error) {
@@ -90,25 +104,33 @@ func containerIsNotRestarting(c *container.Container) error {
90104
}
91105

92106
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
93-
var err error
107+
ipcMode := c.HostConfig.IpcMode
94108

95-
c.ShmPath, err = c.ShmResourcePath()
96-
if err != nil {
97-
return err
98-
}
99-
100-
if c.HostConfig.IpcMode.IsContainer() {
101-
ic, err := daemon.getIpcContainer(c)
109+
switch {
110+
case ipcMode.IsContainer():
111+
ic, err := daemon.getIpcContainer(ipcMode.Container())
102112
if err != nil {
103113
return err
104114
}
105115
c.ShmPath = ic.ShmPath
106-
} else if c.HostConfig.IpcMode.IsHost() {
116+
117+
case ipcMode.IsHost():
107118
if _, err := os.Stat("/dev/shm"); err != nil {
108119
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
109120
}
110121
c.ShmPath = "/dev/shm"
111-
} else {
122+
123+
case ipcMode.IsPrivate(), ipcMode.IsNone():
124+
// c.ShmPath will/should not be used, so make it empty.
125+
// Container's /dev/shm mount comes from OCI spec.
126+
c.ShmPath = ""
127+
128+
case ipcMode.IsEmpty():
129+
// A container was created by an older version of the daemon.
130+
// The default behavior used to be what is now called "shareable".
131+
fallthrough
132+
133+
case ipcMode.IsShareable():
112134
rootIDs := daemon.idMappings.RootPair()
113135
if !c.HasMountFor("/dev/shm") {
114136
shmPath, err := c.ShmResourcePath()
@@ -127,8 +149,11 @@ func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
127149
if err := os.Chown(shmPath, rootIDs.UID, rootIDs.GID); err != nil {
128150
return err
129151
}
152+
c.ShmPath = shmPath
130153
}
131154

155+
default:
156+
return fmt.Errorf("invalid IPC mode: %v", ipcMode)
132157
}
133158

134159
return nil

daemon/daemon_solaris.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,8 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
308308

309309
// reloadPlatform updates configuration with platform specific options
310310
// and updates the passed attributes
311-
func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) {
311+
func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error {
312+
return nil
312313
}
313314

314315
// verifyDaemonSettings performs validation of daemon config struct

0 commit comments

Comments
 (0)