Skip to content

Commit 6bc2a31

Browse files
kolyshkinlifubang
andcommitted
runc kill: fix sending KILL to non-pidns container
Commit f8ad20f made it impossible to kill leftover processes in a stopped container that does not have its own PID namespace. In other words, if a container init is gone, it is no longer possible to use `runc kill` to kill the leftover processes. Fix this by moving the check if container init exists to after the special case of handling the container without own PID namespace. While at it, fix the minor issue introduced by commit 9583b3d: if signalAllProcesses is used, there is no need to thaw the container (as freeze/thaw is either done in signalAllProcesses already, or not needed at all). Also, make signalAllProcesses return an error early if the container cgroup does not exist (as it relies on it to do its job). This way, the error message returned is more generic and easier to understand ("container not running" instead of "can't open file"). Finally, add a test case. Fixes: f8ad20f Fixes: 9583b3d Co-authored-by: lifubang <[email protected]> Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 055dfab commit 6bc2a31

File tree

3 files changed

+58
-15
lines changed

3 files changed

+58
-15
lines changed

libcontainer/container_linux.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -364,25 +364,26 @@ func (c *Container) start(process *Process) (retErr error) {
364364
func (c *Container) Signal(s os.Signal) error {
365365
c.m.Lock()
366366
defer c.m.Unlock()
367-
// To avoid a PID reuse attack, don't kill non-running container.
368-
if !c.hasInit() {
369-
return ErrNotRunning
370-
}
371367

372368
// When a container has its own PID namespace, inside it the init PID
373369
// is 1, and thus it is handled specially by the kernel. In particular,
374370
// killing init with SIGKILL from an ancestor namespace will also kill
375371
// all other processes in that PID namespace (see pid_namespaces(7)).
376372
//
377373
// OTOH, if PID namespace is shared, we should kill all pids to avoid
378-
// leftover processes.
379-
var err error
374+
// leftover processes. Handle this special case here.
380375
if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) {
381-
err = signalAllProcesses(c.cgroupManager, unix.SIGKILL)
382-
} else {
383-
err = c.initProcess.signal(s)
376+
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
377+
return fmt.Errorf("unable to kill all processes: %w", err)
378+
}
379+
return nil
384380
}
385-
if err != nil {
381+
382+
// To avoid a PID reuse attack, don't kill non-running container.
383+
if !c.hasInit() {
384+
return ErrNotRunning
385+
}
386+
if err := c.initProcess.signal(s); err != nil {
386387
return fmt.Errorf("unable to signal init: %w", err)
387388
}
388389
if s == unix.SIGKILL {

libcontainer/init_linux.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,9 @@ func setupPersonality(config *configs.Config) error {
661661
// signalAllProcesses freezes then iterates over all the processes inside the
662662
// manager's cgroups sending the signal s to them.
663663
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
664+
if !m.Exists() {
665+
return ErrNotRunning
666+
}
664667
// Use cgroup.kill, if available.
665668
if s == unix.SIGKILL {
666669
if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid.

tests/integration/kill.bats

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ function teardown() {
1010
teardown_bundle
1111
}
1212

13+
# shellcheck disable=SC2030
1314
@test "kill detached busybox" {
1415
# run busybox detached
1516
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
@@ -35,15 +36,12 @@ function teardown() {
3536
[ "$status" -eq 0 ]
3637
}
3738

38-
# This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration.
39-
@test "kill KILL [host pidns]" {
40-
# kill -a currently requires cgroup freezer.
39+
test_host_pidns_kill() {
4140
requires cgroups_freezer
4241

4342
update_config ' .linux.namespaces -= [{"type": "pid"}]'
4443
set_cgroups_path
4544
if [ $EUID -ne 0 ]; then
46-
# kill --all requires working cgroups.
4745
requires rootless_cgroup
4846
update_config ' .mounts |= map((select(.type == "proc")
4947
| .type = "none"
@@ -53,14 +51,23 @@ function teardown() {
5351
fi
5452

5553
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
54+
# shellcheck disable=SC2031
5655
[ "$status" -eq 0 ]
5756

5857
# Start a few more processes.
5958
for _ in 1 2 3 4 5; do
6059
__runc exec -d test_busybox sleep 1h
61-
[ "$status" -eq 0 ]
6260
done
6361

62+
if [ -v KILL_INIT ]; then
63+
# Now kill the container's init process (sh). Since the container do
64+
# not have own PID ns, its init is no special and the container will
65+
# still be up and running (except for rootless container AND
66+
# systemd cgroup driver AND systemd > v245, when systemd kills
67+
# the container; see "kill KILL [host pidns + init gone]" below).
68+
__runc exec test_busybox killall -9 sh
69+
fi
70+
6471
# Get the list of all container processes.
6572
path=$(get_cgroup_path "pids")
6673
pids=$(cat "$path"/cgroup.procs)
@@ -71,6 +78,7 @@ function teardown() {
7178
done
7279

7380
runc kill test_busybox KILL
81+
# shellcheck disable=SC2031
7482
[ "$status" -eq 0 ]
7583
wait_for_container 10 1 test_busybox stopped
7684

@@ -79,6 +87,37 @@ function teardown() {
7987
echo "pids: $pids"
8088
for p in $pids; do
8189
run ! kill -0 "$p"
90+
# shellcheck disable=SC2031
8291
[[ "$output" = *"No such process" ]]
8392
done
8493
}
94+
95+
# This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration.
96+
# The differences are:
97+
#
98+
# 1. Here we use separate processes to create and to kill a container, so the
99+
# processes inside a container are not children of "runc kill".
100+
#
101+
# 2. We hit different codepaths (nonChildProcess.signal rather than initProcess.signal).
102+
@test "kill KILL [host pidns]" {
103+
unset KILL_INIT
104+
test_host_pidns_kill
105+
}
106+
107+
# Same as above plus:
108+
# 3. Test runc kill on a container whose init process is gone (issue 4047).
109+
@test "kill KILL [host pidns + init gone]" {
110+
# Apparently, for rootless test, when using systemd cgroup manager,
111+
# newer versions of systemd clean up the container as soon as its init
112+
# process is gone. This is all fine and dandy, except it prevents us to
113+
# test this case, thus we skip the test.
114+
#
115+
# It is not entirely clear which systemd version got this feature:
116+
# v245 works fine, and v249 does not.
117+
if [ $EUID -ne 0 ] && [ -v RUNC_USE_SYSTEMD ] && [ "$(systemd_version)" -gt 245 ]; then
118+
skip "rootless+systemd conflicts with systemd > 245"
119+
fi
120+
KILL_INIT=1
121+
test_host_pidns_kill
122+
unset KILL_INIT
123+
}

0 commit comments

Comments
 (0)