Message ID | 20230911184310.1937349-5-cascardo@canonical.com |
---|---|
State | New |
Headers | show |
Series | [SRU,Mantic] io_uring: add a sysctl to disable io_uring system-wide | expand |
On 11.09.23 20:43, Thadeu Lima de Souza Cascardo wrote: > From: Matteo Rizzo <matteorizzo@google.com> > > BugLink: https://bugs.launchpad.net/bugs/2035116 > > Introduce a new sysctl (io_uring_disabled) which can be either 0, 1, or > 2. When 0 (the default), all processes are allowed to create io_uring > instances, which is the current behavior. When 1, io_uring creation is > disabled (io_uring_setup() will fail with -EPERM) for unprivileged > processes not in the kernel.io_uring_group group. When 2, calls to > io_uring_setup() fail with -EPERM regardless of privilege. > > Signed-off-by: Matteo Rizzo <matteorizzo@google.com> > [JEM: modified to add io_uring_group] > Signed-off-by: Jeff Moyer <jmoyer@redhat.com> > Link: https://lore.kernel.org/r/x49y1i42j1z.fsf@segfault.boston.devel.redhat.com > Signed-off-by: Jens Axboe <axboe@kernel.dk> > (backported from commit 76d3ccecfa186af3120e206d62f03db1a94a535f) > [cascardo: conflict due to missing b97f96e22f051d59d07a527dbd7d90408b661ca8] > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> > --- Applied to mantic:linux/master-next. Thanks. -Stefan > Documentation/admin-guide/sysctl/kernel.rst | 29 ++++++++++++ > io_uring/io_uring.c | 51 +++++++++++++++++++++ > 2 files changed, 80 insertions(+) > > diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst > index 3800fab1619b..0795d790cc56 100644 > --- a/Documentation/admin-guide/sysctl/kernel.rst > +++ b/Documentation/admin-guide/sysctl/kernel.rst > @@ -450,6 +450,35 @@ this allows system administrators to override the > ``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded. > > > +io_uring_disabled > +================= > + > +Prevents all processes from creating new io_uring instances. Enabling this > +shrinks the kernel's attack surface. > + > += ====================================================================== > +0 All processes can create io_uring instances as normal. This is the > + default setting. > +1 io_uring creation is disabled (io_uring_setup() will fail with > + -EPERM) for unprivileged processes not in the io_uring_group group. > + Existing io_uring instances can still be used. See the > + documentation for io_uring_group for more information. > +2 io_uring creation is disabled for all processes. io_uring_setup() > + always fails with -EPERM. Existing io_uring instances can still be > + used. > += ====================================================================== > + > + > +io_uring_group > +============== > + > +When io_uring_disabled is set to 1, a process must either be > +privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order > +to create an io_uring instance. If io_uring_group is set to -1 (the > +default), only processes with the CAP_SYS_ADMIN capability may create > +io_uring instances. > + > + > kexec_load_disabled > =================== > > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c > index 93db3e4e7b68..8beb362356fd 100644 > --- a/io_uring/io_uring.c > +++ b/io_uring/io_uring.c > @@ -152,6 +152,31 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx); > > struct kmem_cache *req_cachep; > > +static int __read_mostly sysctl_io_uring_disabled; > +static int __read_mostly sysctl_io_uring_group = -1; > + > +#ifdef CONFIG_SYSCTL > +static struct ctl_table kernel_io_uring_disabled_table[] = { > + { > + .procname = "io_uring_disabled", > + .data = &sysctl_io_uring_disabled, > + .maxlen = sizeof(sysctl_io_uring_disabled), > + .mode = 0644, > + .proc_handler = proc_dointvec_minmax, > + .extra1 = SYSCTL_ZERO, > + .extra2 = SYSCTL_TWO, > + }, > + { > + .procname = "io_uring_group", > + .data = &sysctl_io_uring_group, > + .maxlen = sizeof(gid_t), > + .mode = 0644, > + .proc_handler = proc_dointvec, > + }, > + {}, > +}; > +#endif > + > struct sock *io_uring_get_socket(struct file *file) > { > #if defined(CONFIG_UNIX) > @@ -4040,9 +4065,30 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) > return io_uring_create(entries, &p, params); > } > > +static inline bool io_uring_allowed(void) > +{ > + int disabled = READ_ONCE(sysctl_io_uring_disabled); > + kgid_t io_uring_group; > + > + if (disabled == 2) > + return false; > + > + if (disabled == 0 || capable(CAP_SYS_ADMIN)) > + return true; > + > + io_uring_group = make_kgid(&init_user_ns, sysctl_io_uring_group); > + if (!gid_valid(io_uring_group)) > + return false; > + > + return in_group_p(io_uring_group); > +} > + > SYSCALL_DEFINE2(io_uring_setup, u32, entries, > struct io_uring_params __user *, params) > { > + if (!io_uring_allowed()) > + return -EPERM; > + > return io_uring_setup(entries, params); > } > > @@ -4617,6 +4663,11 @@ static int __init io_uring_init(void) > > req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | > SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU); > + > +#ifdef CONFIG_SYSCTL > + register_sysctl_init("kernel", kernel_io_uring_disabled_table); > +#endif > + > return 0; > }; > __initcall(io_uring_init);
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 3800fab1619b..0795d790cc56 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -450,6 +450,35 @@ this allows system administrators to override the ``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded. +io_uring_disabled +================= + +Prevents all processes from creating new io_uring instances. Enabling this +shrinks the kernel's attack surface. + += ====================================================================== +0 All processes can create io_uring instances as normal. This is the + default setting. +1 io_uring creation is disabled (io_uring_setup() will fail with + -EPERM) for unprivileged processes not in the io_uring_group group. + Existing io_uring instances can still be used. See the + documentation for io_uring_group for more information. +2 io_uring creation is disabled for all processes. io_uring_setup() + always fails with -EPERM. Existing io_uring instances can still be + used. += ====================================================================== + + +io_uring_group +============== + +When io_uring_disabled is set to 1, a process must either be +privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order +to create an io_uring instance. If io_uring_group is set to -1 (the +default), only processes with the CAP_SYS_ADMIN capability may create +io_uring instances. + + kexec_load_disabled =================== diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 93db3e4e7b68..8beb362356fd 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -152,6 +152,31 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx); struct kmem_cache *req_cachep; +static int __read_mostly sysctl_io_uring_disabled; +static int __read_mostly sysctl_io_uring_group = -1; + +#ifdef CONFIG_SYSCTL +static struct ctl_table kernel_io_uring_disabled_table[] = { + { + .procname = "io_uring_disabled", + .data = &sysctl_io_uring_disabled, + .maxlen = sizeof(sysctl_io_uring_disabled), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { + .procname = "io_uring_group", + .data = &sysctl_io_uring_group, + .maxlen = sizeof(gid_t), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {}, +}; +#endif + struct sock *io_uring_get_socket(struct file *file) { #if defined(CONFIG_UNIX) @@ -4040,9 +4065,30 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) return io_uring_create(entries, &p, params); } +static inline bool io_uring_allowed(void) +{ + int disabled = READ_ONCE(sysctl_io_uring_disabled); + kgid_t io_uring_group; + + if (disabled == 2) + return false; + + if (disabled == 0 || capable(CAP_SYS_ADMIN)) + return true; + + io_uring_group = make_kgid(&init_user_ns, sysctl_io_uring_group); + if (!gid_valid(io_uring_group)) + return false; + + return in_group_p(io_uring_group); +} + SYSCALL_DEFINE2(io_uring_setup, u32, entries, struct io_uring_params __user *, params) { + if (!io_uring_allowed()) + return -EPERM; + return io_uring_setup(entries, params); } @@ -4617,6 +4663,11 @@ static int __init io_uring_init(void) req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU); + +#ifdef CONFIG_SYSCTL + register_sysctl_init("kernel", kernel_io_uring_disabled_table); +#endif + return 0; }; __initcall(io_uring_init);