diff --git a/libpod/container_internal_common.go b/libpod/container_internal_common.go index 017a01e5b3..38a4042fc1 100644 --- a/libpod/container_internal_common.go +++ b/libpod/container_internal_common.go @@ -374,6 +374,27 @@ func (c *Container) generateSpec(ctx context.Context) (s *spec.Spec, cleanupFunc // Podman decided for --no-dereference as many // bin-utils tools (e..g, touch, chown, cp) do. options = append(options, "copy-symlink") + // TODO: this also ends up checking non-user mounts + case "ro", "rro": + // There are 2 cases: + // 1. User requests `rro` + // * Return error if runtime does not support `rro` + // 2. User requests `ro` + // * Use `rro` if runtime supports `rro` + // * Use `ro` if runtime does not support `rro` + rro := true + if err := util.SupportsRecursiveReadonly(c.ociRuntime.Features()); err != nil { + rro = false + if o == "rro" { + return nil, nil, err + } + } + + if rro { + options = append(options, "rro") + } else { + options = append(options, "ro") + } default: options = append(options, o) } diff --git a/libpod/oci.go b/libpod/oci.go index f14b4acf83..f0de47e783 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -8,6 +8,7 @@ import ( "github.com/containers/common/pkg/resize" "github.com/containers/podman/v5/libpod/define" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-spec/specs-go/features" ) // OCIRuntime is an implementation of an OCI runtime. @@ -131,6 +132,9 @@ type OCIRuntime interface { //nolint:interfacebloat // without KVM separation SupportsKVM() bool + // Features returns the features struct from the OCI runtime + Features() *features.Features + // AttachSocketPath is the path to the socket to attach to a given // container. // TODO: If we move Attach code in here, this should be made internal. diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index 1b0f4c42da..7b95808f91 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -36,6 +36,7 @@ import ( "github.com/containers/podman/v5/utils" "github.com/containers/storage/pkg/idtools" spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-spec/specs-go/features" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -66,6 +67,7 @@ type ConmonOCIRuntime struct { supportsNoCgroups bool enableKeyring bool persistDir string + features *features.Features } // Make a new Conmon-based OCI runtime with the given options. @@ -131,6 +133,12 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime break } + features, err := runtime.getOCIRuntimeFeatures() + if err != nil { + return nil, fmt.Errorf("getting %s features: %w", runtime.name, err) + } + runtime.features = features + // Search the $PATH as last fallback if !foundPath { if foundRuntime, err := exec.LookPath(name); err == nil { @@ -839,6 +847,11 @@ func (r *ConmonOCIRuntime) SupportsKVM() bool { return r.supportsKVM } +// Features returns the features struct from the OCI runtime +func (r *ConmonOCIRuntime) Features() *features.Features { + return r.features +} + // AttachSocketPath is the path to a single container's attach socket. func (r *ConmonOCIRuntime) AttachSocketPath(ctr *Container) (string, error) { if ctr == nil { @@ -1485,6 +1498,20 @@ func (r *ConmonOCIRuntime) getConmonVersion() (string, error) { return strings.TrimSuffix(strings.Replace(output, "\n", ", ", 1), "\n"), nil } +func (r *ConmonOCIRuntime) getOCIRuntimeFeatures() (*features.Features, error) { + var features *features.Features + output, err := utils.ExecCmd(r.path, "features") + if err != nil { + return features, err + } + + if jsonErr := json.Unmarshal([]byte(output), &features); jsonErr != nil { + return features, err + } + + return features, nil +} + // getOCIRuntimeVersion returns a string representation of the OCI runtime's // version. func (r *ConmonOCIRuntime) getOCIRuntimeVersion() (string, error) { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index bfdbbf228e..93eb45b697 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -11,6 +11,7 @@ import ( "github.com/containers/common/pkg/resize" "github.com/containers/podman/v5/libpod/define" spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-spec/specs-go/features" "github.com/sirupsen/logrus" ) @@ -194,6 +195,11 @@ func (r *MissingRuntime) SupportsKVM() bool { return false } +// Features returns nil since this is a missing runtime +func (r *MissingRuntime) Features() *features.Features { + return nil +} + // AttachSocketPath does not work as there is no runtime to attach to. // (Theoretically we could follow ExitFilePath but there is no guarantee the // container is running and thus has an attach socket...) diff --git a/pkg/specgen/volumes.go b/pkg/specgen/volumes.go index dbe987d439..516e207611 100644 --- a/pkg/specgen/volumes.go +++ b/pkg/specgen/volumes.go @@ -43,8 +43,7 @@ type OverlayVolume struct { } // ImageVolume is a volume based on a container image. The container image is -// first mounted on the host and is then bind-mounted into the container. An -// ImageVolume is always mounted read-only. +// first mounted on the host and is then bind-mounted into the container. type ImageVolume struct { // Source is the source of the image volume. The image can be referred // to by name and by ID. diff --git a/pkg/specgenutil/volumes.go b/pkg/specgenutil/volumes.go index d8d190578c..f13ce32171 100644 --- a/pkg/specgenutil/volumes.go +++ b/pkg/specgenutil/volumes.go @@ -345,34 +345,38 @@ func parseMountOptions(mountType string, args []string) (*universalMount, error) } else { mnt.mount.Options = append(mnt.mount.Options, "idmap") } - case "readonly", "ro", "rw": + case "readonly", "ro", "recursivereadonly", "rro", "rw": if setRORW { - return nil, fmt.Errorf("cannot pass 'readonly', 'ro', or 'rw' mnt.Options more than once: %w", errOptionArg) + return nil, fmt.Errorf("cannot pass 'readonly', 'ro', 'rro', 'recursivereadonly' or 'rw' options more than once: %w", errOptionArg) } setRORW = true + // Can be formatted as one of: // readonly // readonly=[true|false] // ro // ro=[true|false] + // recursivereadonly + // recursivereadonly=[true|false] + // rro + // rro=[true|false] // rw // rw=[true|false] - if name == "readonly" { - name = "ro" - } - if hasValue { - switch strings.ToLower(value) { - case "true": - mnt.mount.Options = append(mnt.mount.Options, name) - case "false": - // Set the opposite only for rw - // ro's opposite is the default - if name == "rw" { - mnt.mount.Options = append(mnt.mount.Options, "ro") + switch name { + case "rro", "recursivereadonly": + mnt.mount.Options = append(mnt.mount.Options, "rro") + case "ro", "readonly": + mnt.mount.Options = append(mnt.mount.Options, "ro") + case "rw": + if hasValue { + switch strings.ToLower(value) { + case "true": + mnt.mount.Options = append(mnt.mount.Options, name) + case "false": + // default to rro instead of ro + mnt.mount.Options = append(mnt.mount.Options, "rro") } } - } else { - mnt.mount.Options = append(mnt.mount.Options, name) } case "nodev", "dev": if setDev { diff --git a/pkg/util/mount_opts.go b/pkg/util/mount_opts.go index c9a773093e..b867927cac 100644 --- a/pkg/util/mount_opts.go +++ b/pkg/util/mount_opts.go @@ -94,7 +94,7 @@ func processOptionsInternal(options []string, isTmpfs bool, sourcePath string, g return nil, fmt.Errorf("only one of 'nodev' and 'dev' can be used: %w", ErrDupeMntOption) } foundDev = true - case "rw", "ro": + case "rw", "ro", "rro": if foundWrite { return nil, fmt.Errorf("only one of 'rw' and 'ro' can be used: %w", ErrDupeMntOption) } diff --git a/pkg/util/utils_linux.go b/pkg/util/utils_linux.go index 9130d1f404..29e778cee7 100644 --- a/pkg/util/utils_linux.go +++ b/pkg/util/utils_linux.go @@ -6,21 +6,28 @@ import ( "io/fs" "os" "path/filepath" + "slices" "strconv" "strings" + "sync" "syscall" "github.com/containers/podman/v5/libpod/define" "github.com/containers/podman/v5/pkg/rootless" "github.com/containers/psgo" spec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/runtime-spec/specs-go/features" "github.com/opencontainers/runtime-tools/generate" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) var ( - errNotADevice = errors.New("not a device node") + errNotADevice = errors.New("not a device node") + errKernelDoesNotSupportRRO = errors.New("kernel does not support recursive readonly mount option `rro`") + errRuntimeDoesNotSupportRRO = errors.New("runtime does not support recursive readonly mount option `rro`") + + kernelSupportsRROOnce sync.Once ) // GetContainerPidInformationDescriptors returns a string slice of all supported @@ -258,3 +265,70 @@ func DeviceFromPath(path string) (*spec.LinuxDevice, error) { Minor: int64(unix.Minor(devNumber)), }, nil } + +// kernelSupportsRecursivelyReadOnly returns true if the kernel supports recursive readonly mounts +// from https://github.com/moby/moby/blob/master/daemon/daemon_linux.go#L222 +func kernelSupportsRecursivelyReadOnly() error { + fn := func() error { + tmpMnt, err := os.MkdirTemp("", "podman-detect-rro") + if err != nil { + return fmt.Errorf("failed to create a temp directory: %w", err) + } + for { + err = unix.Mount("", tmpMnt, "tmpfs", 0, "") + if !errors.Is(err, unix.EINTR) { + break + } + } + if err != nil { + return fmt.Errorf("failed to mount tmpfs on %q: %w", tmpMnt, err) + } + defer func() { + var umErr error + for { + umErr = unix.Unmount(tmpMnt, 0) + if !errors.Is(umErr, unix.EINTR) { + break + } + } + if umErr != nil { + logrus.Errorf("Failed to unmount %q: %v", tmpMnt, umErr) + } + }() + attr := &unix.MountAttr{ + Attr_set: unix.MOUNT_ATTR_RDONLY, + } + for { + err = unix.MountSetattr(-1, tmpMnt, unix.AT_RECURSIVE, attr) + if !errors.Is(err, unix.EINTR) { + break + } + } + // ENOSYS on kernel < 5.12 + if err != nil { + return fmt.Errorf("failed to call mount_setattr with AT_RECURSIVE: %w", err) + } + return nil + } + + kernelSupportsRROOnce.Do(func() { + errKernelDoesNotSupportRRO = fn() + }) + return errKernelDoesNotSupportRRO +} + +// SupportsRecursiveReadonly returns true if the runtime supports recursive readonly mounts +func SupportsRecursiveReadonly(features *features.Features) error { + if err := kernelSupportsRecursivelyReadOnly(); err != nil { + return err + } + + if features == nil || features.MountOptions == nil { + return errRuntimeDoesNotSupportRRO + } + if !slices.Contains(features.MountOptions, "rro") { + return errRuntimeDoesNotSupportRRO + } + + return nil +} diff --git a/test/system/060-mount.bats b/test/system/060-mount.bats index 3d5c09cd02..438583fb7d 100644 --- a/test/system/060-mount.bats +++ b/test/system/060-mount.bats @@ -573,3 +573,28 @@ glob | /* | /mountroot/ | in run_podman rmi -f $img } + +# bats test_tags=ci:parallel +@test "podman bind mount rro" { + skip_if_rootless + + volName="vol-$(safename)" + volPath=${PODMAN_TMPDIR}/$volName + mkdir -p $volPath + + mount -t tmpfs tmpfs $volPath + mkdir -p $volPath/foo $volPath/bar + mount -t tmpfs tmpfs $volPath/foo + mount -t tmpfs tmpfs $volPath/bar + + run_podman 1 run --rm -it -v $volPath/:/tmp/mounts:rro $IMAGE touch /tmp/mounts/foo/test + assert "$output" =~ "Read-only file system" "Error should indicate read-only filesystem" + + run_podman 1 run --rm -it --mount type=bind,source=$volPath,destination=/tmp/mounts,recursivereadonly=true $IMAGE touch /tmp/mounts/bar/test + assert "$output" =~ "Read-only file system" "Error should indicate read-only filesystem" + + umount $volPath/foo + umount $volPath/bar + umount $volPath + rm -rf $volPath +} \ No newline at end of file diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/features/features.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/features/features.go new file mode 100644 index 0000000000..949f532b65 --- /dev/null +++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/features/features.go @@ -0,0 +1,145 @@ +// Package features provides the Features struct. +package features + +// Features represents the supported features of the runtime. +type Features struct { + // OCIVersionMin is the minimum OCI Runtime Spec version recognized by the runtime, e.g., "1.0.0". + OCIVersionMin string `json:"ociVersionMin,omitempty"` + + // OCIVersionMax is the maximum OCI Runtime Spec version recognized by the runtime, e.g., "1.0.2-dev". + OCIVersionMax string `json:"ociVersionMax,omitempty"` + + // Hooks is the list of the recognized hook names, e.g., "createRuntime". + // Nil value means "unknown", not "no support for any hook". + Hooks []string `json:"hooks,omitempty"` + + // MountOptions is the list of the recognized mount options, e.g., "ro". + // Nil value means "unknown", not "no support for any mount option". + // This list does not contain filesystem-specific options passed to mount(2) syscall as (const void *). + MountOptions []string `json:"mountOptions,omitempty"` + + // Linux is specific to Linux. + Linux *Linux `json:"linux,omitempty"` + + // Annotations contains implementation-specific annotation strings, + // such as the implementation version, and third-party extensions. + Annotations map[string]string `json:"annotations,omitempty"` + + // PotentiallyUnsafeConfigAnnotations the list of the potential unsafe annotations + // that may appear in `config.json`. + // + // A value that ends with "." is interpreted as a prefix of annotations. + PotentiallyUnsafeConfigAnnotations []string `json:"potentiallyUnsafeConfigAnnotations,omitempty"` +} + +// Linux is specific to Linux. +type Linux struct { + // Namespaces is the list of the recognized namespaces, e.g., "mount". + // Nil value means "unknown", not "no support for any namespace". + Namespaces []string `json:"namespaces,omitempty"` + + // Capabilities is the list of the recognized capabilities , e.g., "CAP_SYS_ADMIN". + // Nil value means "unknown", not "no support for any capability". + Capabilities []string `json:"capabilities,omitempty"` + + Cgroup *Cgroup `json:"cgroup,omitempty"` + Seccomp *Seccomp `json:"seccomp,omitempty"` + Apparmor *Apparmor `json:"apparmor,omitempty"` + Selinux *Selinux `json:"selinux,omitempty"` + IntelRdt *IntelRdt `json:"intelRdt,omitempty"` + MountExtensions *MountExtensions `json:"mountExtensions,omitempty"` +} + +// Cgroup represents the "cgroup" field. +type Cgroup struct { + // V1 represents whether Cgroup v1 support is compiled in. + // Unrelated to whether the host uses cgroup v1 or not. + // Nil value means "unknown", not "false". + V1 *bool `json:"v1,omitempty"` + + // V2 represents whether Cgroup v2 support is compiled in. + // Unrelated to whether the host uses cgroup v2 or not. + // Nil value means "unknown", not "false". + V2 *bool `json:"v2,omitempty"` + + // Systemd represents whether systemd-cgroup support is compiled in. + // Unrelated to whether the host uses systemd or not. + // Nil value means "unknown", not "false". + Systemd *bool `json:"systemd,omitempty"` + + // SystemdUser represents whether user-scoped systemd-cgroup support is compiled in. + // Unrelated to whether the host uses systemd or not. + // Nil value means "unknown", not "false". + SystemdUser *bool `json:"systemdUser,omitempty"` + + // Rdma represents whether RDMA cgroup support is compiled in. + // Unrelated to whether the host supports RDMA or not. + // Nil value means "unknown", not "false". + Rdma *bool `json:"rdma,omitempty"` +} + +// Seccomp represents the "seccomp" field. +type Seccomp struct { + // Enabled is true if seccomp support is compiled in. + // Nil value means "unknown", not "false". + Enabled *bool `json:"enabled,omitempty"` + + // Actions is the list of the recognized actions, e.g., "SCMP_ACT_NOTIFY". + // Nil value means "unknown", not "no support for any action". + Actions []string `json:"actions,omitempty"` + + // Operators is the list of the recognized operators, e.g., "SCMP_CMP_NE". + // Nil value means "unknown", not "no support for any operator". + Operators []string `json:"operators,omitempty"` + + // Archs is the list of the recognized archs, e.g., "SCMP_ARCH_X86_64". + // Nil value means "unknown", not "no support for any arch". + Archs []string `json:"archs,omitempty"` + + // KnownFlags is the list of the recognized filter flags, e.g., "SECCOMP_FILTER_FLAG_LOG". + // Nil value means "unknown", not "no flags are recognized". + KnownFlags []string `json:"knownFlags,omitempty"` + + // SupportedFlags is the list of the supported filter flags, e.g., "SECCOMP_FILTER_FLAG_LOG". + // This list may be a subset of KnownFlags due to some flags + // not supported by the current kernel and/or libseccomp. + // Nil value means "unknown", not "no flags are supported". + SupportedFlags []string `json:"supportedFlags,omitempty"` +} + +// Apparmor represents the "apparmor" field. +type Apparmor struct { + // Enabled is true if AppArmor support is compiled in. + // Unrelated to whether the host supports AppArmor or not. + // Nil value means "unknown", not "false". + Enabled *bool `json:"enabled,omitempty"` +} + +// Selinux represents the "selinux" field. +type Selinux struct { + // Enabled is true if SELinux support is compiled in. + // Unrelated to whether the host supports SELinux or not. + // Nil value means "unknown", not "false". + Enabled *bool `json:"enabled,omitempty"` +} + +// IntelRdt represents the "intelRdt" field. +type IntelRdt struct { + // Enabled is true if Intel RDT support is compiled in. + // Unrelated to whether the host supports Intel RDT or not. + // Nil value means "unknown", not "false". + Enabled *bool `json:"enabled,omitempty"` +} + +// MountExtensions represents the "mountExtensions" field. +type MountExtensions struct { + // IDMap represents the status of idmap mounts support. + IDMap *IDMap `json:"idmap,omitempty"` +} + +type IDMap struct { + // Enabled represents whether idmap mounts supports is compiled in. + // Unrelated to whether the host supports it or not. + // Nil value means "unknown", not "false". + Enabled *bool `json:"enabled,omitempty"` +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 44c0e193e6..5089f71463 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -921,6 +921,7 @@ github.com/opencontainers/runc/libcontainer/utils # github.com/opencontainers/runtime-spec v1.2.1 ## explicit github.com/opencontainers/runtime-spec/specs-go +github.com/opencontainers/runtime-spec/specs-go/features # github.com/opencontainers/runtime-tools v0.9.1-0.20241108202711-f7e3563b0271 ## explicit; go 1.19 github.com/opencontainers/runtime-tools/generate