Message ID | 1350479712-15082-3-git-send-email-otubo@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On Wed, Oct 17, 2012 at 1:15 PM, Eduardo Otubo <otubo@linux.vnet.ibm.com> wrote: > This patch includes a second whitelist right before the main loop. It's > a smaller and more restricted whitelist, excluding execve() among many > others. > > Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com> > --- > qemu-seccomp.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ > qemu-seccomp.h | 7 ++++- > vl.c | 13 +++++++- > 3 files changed, 103 insertions(+), 11 deletions(-) > > diff --git a/qemu-seccomp.c b/qemu-seccomp.c > index a25f2fa..9c68af5 100644 > --- a/qemu-seccomp.c > +++ b/qemu-seccomp.c > @@ -13,6 +13,7 @@ > * GNU GPL, version 2 or (at your option) any later version. > */ > #include <stdio.h> > +#include <stdlib.h> > #include <seccomp.h> > #include "qemu-seccomp.h" > > @@ -21,7 +22,7 @@ struct QemuSeccompSyscall { > uint8_t priority; > }; > > -static const struct QemuSeccompSyscall seccomp_whitelist[] = { > +static const struct QemuSeccompSyscall seccomp_whitelist_init[] = { > { SCMP_SYS(timer_settime), 255 }, > { SCMP_SYS(timer_gettime), 254 }, > { SCMP_SYS(futex), 253 }, > @@ -118,27 +119,102 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = { > { SCMP_SYS(accept4), 242 } > }; > > -int seccomp_start(void) > +static const struct QemuSeccompSyscall seccomp_whitelist_main_loop[] = { > + { SCMP_SYS(timer_settime), 255 }, > + { SCMP_SYS(timer_gettime), 254 }, > + { SCMP_SYS(futex), 253 }, > + { SCMP_SYS(select), 252 }, > + { SCMP_SYS(recvfrom), 251 }, > + { SCMP_SYS(sendto), 250 }, > + { SCMP_SYS(read), 249 }, > + { SCMP_SYS(brk), 248 }, > + { SCMP_SYS(mmap), 247 }, > +#if defined(__i386__) > + { SCMP_SYS(fcntl64), 245 }, > + { SCMP_SYS(fstat64), 245 }, > + { SCMP_SYS(stat64), 245 }, > + { SCMP_SYS(getgid32), 245 }, > + { SCMP_SYS(getegid32), 245 }, > + { SCMP_SYS(getuid32), 245 }, > + { SCMP_SYS(geteuid32), 245 }, > + { SCMP_SYS(sigreturn), 245 }, > + { SCMP_SYS(_newselect), 245 }, > + { SCMP_SYS(_llseek), 245 }, > + { SCMP_SYS(mmap2), 245}, > + { SCMP_SYS(sigprocmask), 245 }, > +#endif > + { SCMP_SYS(exit), 245 }, > + { SCMP_SYS(timer_delete), 245 }, > + { SCMP_SYS(exit_group), 245 }, > + { SCMP_SYS(rt_sigreturn), 245 }, > + { SCMP_SYS(madvise), 245 }, > + { SCMP_SYS(write), 244 }, > + { SCMP_SYS(fcntl), 243 }, > + { SCMP_SYS(tgkill), 242 }, > + { SCMP_SYS(rt_sigaction), 242 }, > + { SCMP_SYS(pipe2), 242 }, > + { SCMP_SYS(munmap), 242 }, > + { SCMP_SYS(mremap), 242 }, > + { SCMP_SYS(getsockname), 242 }, > + { SCMP_SYS(getpeername), 242 }, > + { SCMP_SYS(close), 242 }, > + { SCMP_SYS(accept4), 242 } It's nice to see that for example open, creat, unlink, socket, bind, mprotect, setrlimit and kill are not present. > +}; > + > +static int > +process_whitelist(const struct QemuSeccompSyscall *whitelist, > + unsigned int size, scmp_filter_ctx *ctx) > { > int rc = 0; > + > unsigned int i = 0; > - scmp_filter_ctx ctx; > + > + for (i = 0; i < size; i++) { > + rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, whitelist[i].num, 0); > + if (rc < 0) { > + return -1; > + } > + > + rc = seccomp_syscall_priority(ctx, whitelist[i].num, > + whitelist[i].priority); > + if (rc < 0) { > + return -1; > + } > + } > + return 0; > +} > + > +int > +seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx) > +{ > + int rc = 0; > > ctx = seccomp_init(SCMP_ACT_KILL); > if (ctx == NULL) { > + rc = -1; > goto seccomp_return; > } > > - for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) { > - rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0); > - if (rc < 0) { > + switch (mode) { > + case INIT: > + if (process_whitelist > + (seccomp_whitelist_init, > + ARRAY_SIZE(seccomp_whitelist_init), ctx) < 0) { > + rc = -1; > goto seccomp_return; > } > - rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num, > - seccomp_whitelist[i].priority); > - if (rc < 0) { > + break; > + case MAIN_LOOP: > + if (process_whitelist > + (seccomp_whitelist_main_loop, > + ARRAY_SIZE(seccomp_whitelist_main_loop), ctx) < 0) { > + rc = -1; > goto seccomp_return; > } > + break; > + default: > + rc = -1; > + goto seccomp_return; > } > > rc = seccomp_load(ctx); > diff --git a/qemu-seccomp.h b/qemu-seccomp.h > index b2fc3f8..1c97978 100644 > --- a/qemu-seccomp.h > +++ b/qemu-seccomp.h > @@ -18,5 +18,10 @@ > #include <seccomp.h> > #include "osdep.h" > > -int seccomp_start(void); > +enum whitelist_mode { > + INIT = 0, > + MAIN_LOOP = 1, > +}; > + > +int seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx); > #endif > diff --git a/vl.c b/vl.c > index bec68cd..773d488 100644 > --- a/vl.c > +++ b/vl.c > @@ -278,6 +278,7 @@ static int default_vga = 1; > > #ifdef CONFIG_SECCOMP > bool seccomp_on = true; > +scmp_filter_ctx ctx; This should be a local variable to main(), maybe also named 'main_loop_ctx' so we can add further contexts. > #endif > > static struct { > @@ -777,7 +778,7 @@ static int bt_parse(const char *opt) > static int install_seccomp_filters(void) > { > #ifdef CONFIG_SECCOMP > - if (seccomp_start() < 0) { > + if (seccomp_start(INIT, &ctx) < 0) { > qerror_report(ERROR_CLASS_GENERIC_ERROR, > "failed to install seccomp syscall filter in the kernel"); > return -1; > @@ -3794,6 +3795,16 @@ int main(int argc, char **argv, char **envp) > > os_setup_post(); > > + if (seccomp_on) { 'seccomp_on' is only available with CONFIG_SECCOMP, so this would break build. > +#ifdef CONFIG_SECCOMP > + if (seccomp_start(MAIN_LOOP, &ctx) < 0) { > + qerror_report(ERROR_CLASS_GENERIC_ERROR, > + "failed to install seccomp syscall filter in the kernel"); This error message could be different from the first one. > + return -1; > + } > +#endif > + } > + > resume_all_vcpus(); > main_loop(); > bdrv_close_all(); > -- > 1.7.12 > >
On 10/17/2012 09:15 AM, Eduardo Otubo wrote: > This patch includes a second whitelist right before the main loop. It's > a smaller and more restricted whitelist, excluding execve() among many > others. > > Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com> > --- > qemu-seccomp.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ > qemu-seccomp.h | 7 ++++- > vl.c | 13 +++++++- > 3 files changed, 103 insertions(+), 11 deletions(-) > > diff --git a/qemu-seccomp.c b/qemu-seccomp.c > index a25f2fa..9c68af5 100644 > --- a/qemu-seccomp.c > +++ b/qemu-seccomp.c > @@ -13,6 +13,7 @@ > * GNU GPL, version 2 or (at your option) any later version. > */ > #include <stdio.h> > +#include <stdlib.h> > #include <seccomp.h> > #include "qemu-seccomp.h" > > @@ -21,7 +22,7 @@ struct QemuSeccompSyscall { > uint8_t priority; > }; > > -static const struct QemuSeccompSyscall seccomp_whitelist[] = { > +static const struct QemuSeccompSyscall seccomp_whitelist_init[] = { > { SCMP_SYS(timer_settime), 255 }, > { SCMP_SYS(timer_gettime), 254 }, > { SCMP_SYS(futex), 253 }, > @@ -118,27 +119,102 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = { > { SCMP_SYS(accept4), 242 } > }; > > -int seccomp_start(void) > +static const struct QemuSeccompSyscall seccomp_whitelist_main_loop[] = { > + { SCMP_SYS(timer_settime), 255 }, > + { SCMP_SYS(timer_gettime), 254 }, > + { SCMP_SYS(futex), 253 }, > + { SCMP_SYS(select), 252 }, > + { SCMP_SYS(recvfrom), 251 }, > + { SCMP_SYS(sendto), 250 }, > + { SCMP_SYS(read), 249 }, > + { SCMP_SYS(brk), 248 }, > + { SCMP_SYS(mmap), 247 }, > +#if defined(__i386__) > + { SCMP_SYS(fcntl64), 245 }, > + { SCMP_SYS(fstat64), 245 }, > + { SCMP_SYS(stat64), 245 }, > + { SCMP_SYS(getgid32), 245 }, > + { SCMP_SYS(getegid32), 245 }, > + { SCMP_SYS(getuid32), 245 }, > + { SCMP_SYS(geteuid32), 245 }, > + { SCMP_SYS(sigreturn), 245 }, > + { SCMP_SYS(_newselect), 245 }, > + { SCMP_SYS(_llseek), 245 }, > + { SCMP_SYS(mmap2), 245}, > + { SCMP_SYS(sigprocmask), 245 }, > +#endif > + { SCMP_SYS(exit), 245 }, > + { SCMP_SYS(timer_delete), 245 }, > + { SCMP_SYS(exit_group), 245 }, > + { SCMP_SYS(rt_sigreturn), 245 }, > + { SCMP_SYS(madvise), 245 }, > + { SCMP_SYS(write), 244 }, > + { SCMP_SYS(fcntl), 243 }, > + { SCMP_SYS(tgkill), 242 }, > + { SCMP_SYS(rt_sigaction), 242 }, > + { SCMP_SYS(pipe2), 242 }, > + { SCMP_SYS(munmap), 242 }, > + { SCMP_SYS(mremap), 242 }, > + { SCMP_SYS(getsockname), 242 }, > + { SCMP_SYS(getpeername), 242 }, > + { SCMP_SYS(close), 242 }, > + { SCMP_SYS(accept4), 242 } > +}; This list also needs: eventfd2, recvmsg, ioctl, rt_sigprocmask. > + > +static int > +process_whitelist(const struct QemuSeccompSyscall *whitelist, > + unsigned int size, scmp_filter_ctx *ctx) > { > int rc = 0; > + > unsigned int i = 0; > - scmp_filter_ctx ctx; > + > + for (i = 0; i < size; i++) { > + rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, whitelist[i].num, 0); > + if (rc < 0) { > + return -1; > + } > + > + rc = seccomp_syscall_priority(ctx, whitelist[i].num, > + whitelist[i].priority); > + if (rc < 0) { > + return -1; > + } > + } > + return 0; > +} > + > +int > +seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx) > +{ > + int rc = 0; > > ctx = seccomp_init(SCMP_ACT_KILL); > if (ctx == NULL) { > + rc = -1; > goto seccomp_return; > } > > - for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) { > - rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0); > - if (rc < 0) { > + switch (mode) { > + case INIT: > + if (process_whitelist > + (seccomp_whitelist_init, > + ARRAY_SIZE(seccomp_whitelist_init), ctx) < 0) { > + rc = -1; > goto seccomp_return; > } > - rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num, > - seccomp_whitelist[i].priority); > - if (rc < 0) { > + break; > + case MAIN_LOOP: > + if (process_whitelist > + (seccomp_whitelist_main_loop, > + ARRAY_SIZE(seccomp_whitelist_main_loop), ctx) < 0) { > + rc = -1; > goto seccomp_return; > } > + break; > + default: > + rc = -1; > + goto seccomp_return; > } > > rc = seccomp_load(ctx); > diff --git a/qemu-seccomp.h b/qemu-seccomp.h > index b2fc3f8..1c97978 100644 > --- a/qemu-seccomp.h > +++ b/qemu-seccomp.h > @@ -18,5 +18,10 @@ > #include <seccomp.h> > #include "osdep.h" > > -int seccomp_start(void); > +enum whitelist_mode { > + INIT = 0, > + MAIN_LOOP = 1, > +}; > + > +int seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx); > #endif > diff --git a/vl.c b/vl.c > index bec68cd..773d488 100644 > --- a/vl.c > +++ b/vl.c > @@ -278,6 +278,7 @@ static int default_vga = 1; > > #ifdef CONFIG_SECCOMP > bool seccomp_on = true; > +scmp_filter_ctx ctx; > #endif > > static struct { > @@ -777,7 +778,7 @@ static int bt_parse(const char *opt) > static int install_seccomp_filters(void) > { > #ifdef CONFIG_SECCOMP > - if (seccomp_start() < 0) { > + if (seccomp_start(INIT, &ctx) < 0) { > qerror_report(ERROR_CLASS_GENERIC_ERROR, > "failed to install seccomp syscall filter in the kernel"); > return -1; > @@ -3794,6 +3795,16 @@ int main(int argc, char **argv, char **envp) > > os_setup_post(); > > + if (seccomp_on) { > +#ifdef CONFIG_SECCOMP > + if (seccomp_start(MAIN_LOOP, &ctx) < 0) { The first list is installed with install_seccomp_filters() and this one is installed with seccomp_start(). One thing you could do make it more consistent is to add a parameter for whitelist_mode mode to install_seccomp_filters() and call install_seccomp_filters(INIT) and install_seccomp_filters(MAIN_LOOP). > + qerror_report(ERROR_CLASS_GENERIC_ERROR, > + "failed to install seccomp syscall filter in the kernel"); > + return -1; > + } > +#endif > + } > + > resume_all_vcpus(); > main_loop(); > bdrv_close_all(); >
On 10/19/2012 01:04 PM, Blue Swirl wrote: > On Wed, Oct 17, 2012 at 1:15 PM, Eduardo Otubo <otubo@linux.vnet.ibm.com> wrote: >> This patch includes a second whitelist right before the main loop. It's >> a smaller and more restricted whitelist, excluding execve() among many >> others. >> >> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com> >> --- >> qemu-seccomp.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ >> qemu-seccomp.h | 7 ++++- >> vl.c | 13 +++++++- >> 3 files changed, 103 insertions(+), 11 deletions(-) >> >> diff --git a/qemu-seccomp.c b/qemu-seccomp.c >> index a25f2fa..9c68af5 100644 >> --- a/qemu-seccomp.c >> +++ b/qemu-seccomp.c >> @@ -13,6 +13,7 @@ >> * GNU GPL, version 2 or (at your option) any later version. >> */ >> #include <stdio.h> >> +#include <stdlib.h> >> #include <seccomp.h> >> #include "qemu-seccomp.h" >> >> @@ -21,7 +22,7 @@ struct QemuSeccompSyscall { >> uint8_t priority; >> }; >> >> -static const struct QemuSeccompSyscall seccomp_whitelist[] = { >> +static const struct QemuSeccompSyscall seccomp_whitelist_init[] = { >> { SCMP_SYS(timer_settime), 255 }, >> { SCMP_SYS(timer_gettime), 254 }, >> { SCMP_SYS(futex), 253 }, >> @@ -118,27 +119,102 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = { >> { SCMP_SYS(accept4), 242 } >> }; >> >> -int seccomp_start(void) >> +static const struct QemuSeccompSyscall seccomp_whitelist_main_loop[] = { >> + { SCMP_SYS(timer_settime), 255 }, >> + { SCMP_SYS(timer_gettime), 254 }, >> + { SCMP_SYS(futex), 253 }, >> + { SCMP_SYS(select), 252 }, >> + { SCMP_SYS(recvfrom), 251 }, >> + { SCMP_SYS(sendto), 250 }, >> + { SCMP_SYS(read), 249 }, >> + { SCMP_SYS(brk), 248 }, >> + { SCMP_SYS(mmap), 247 }, >> +#if defined(__i386__) >> + { SCMP_SYS(fcntl64), 245 }, >> + { SCMP_SYS(fstat64), 245 }, >> + { SCMP_SYS(stat64), 245 }, >> + { SCMP_SYS(getgid32), 245 }, >> + { SCMP_SYS(getegid32), 245 }, >> + { SCMP_SYS(getuid32), 245 }, >> + { SCMP_SYS(geteuid32), 245 }, >> + { SCMP_SYS(sigreturn), 245 }, >> + { SCMP_SYS(_newselect), 245 }, >> + { SCMP_SYS(_llseek), 245 }, >> + { SCMP_SYS(mmap2), 245}, >> + { SCMP_SYS(sigprocmask), 245 }, >> +#endif >> + { SCMP_SYS(exit), 245 }, >> + { SCMP_SYS(timer_delete), 245 }, >> + { SCMP_SYS(exit_group), 245 }, >> + { SCMP_SYS(rt_sigreturn), 245 }, >> + { SCMP_SYS(madvise), 245 }, >> + { SCMP_SYS(write), 244 }, >> + { SCMP_SYS(fcntl), 243 }, >> + { SCMP_SYS(tgkill), 242 }, >> + { SCMP_SYS(rt_sigaction), 242 }, >> + { SCMP_SYS(pipe2), 242 }, >> + { SCMP_SYS(munmap), 242 }, >> + { SCMP_SYS(mremap), 242 }, >> + { SCMP_SYS(getsockname), 242 }, >> + { SCMP_SYS(getpeername), 242 }, >> + { SCMP_SYS(close), 242 }, >> + { SCMP_SYS(accept4), 242 } > > It's nice to see that for example open, creat, unlink, socket, bind, > mprotect, setrlimit and kill are not present. > Hmm, well open minimally needs to be added to this list so that drives can be hotplugged. >> +}; >> + >> +static int >> +process_whitelist(const struct QemuSeccompSyscall *whitelist, >> + unsigned int size, scmp_filter_ctx *ctx) >> { >> int rc = 0; >> + >> unsigned int i = 0; >> - scmp_filter_ctx ctx; >> + >> + for (i = 0; i < size; i++) { >> + rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, whitelist[i].num, 0); >> + if (rc < 0) { >> + return -1; >> + } >> + >> + rc = seccomp_syscall_priority(ctx, whitelist[i].num, >> + whitelist[i].priority); >> + if (rc < 0) { >> + return -1; >> + } >> + } >> + return 0; >> +} >> + >> +int >> +seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx) >> +{ >> + int rc = 0; >> >> ctx = seccomp_init(SCMP_ACT_KILL); >> if (ctx == NULL) { >> + rc = -1; >> goto seccomp_return; >> } >> >> - for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) { >> - rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0); >> - if (rc < 0) { >> + switch (mode) { >> + case INIT: >> + if (process_whitelist >> + (seccomp_whitelist_init, >> + ARRAY_SIZE(seccomp_whitelist_init), ctx) < 0) { >> + rc = -1; >> goto seccomp_return; >> } >> - rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num, >> - seccomp_whitelist[i].priority); >> - if (rc < 0) { >> + break; >> + case MAIN_LOOP: >> + if (process_whitelist >> + (seccomp_whitelist_main_loop, >> + ARRAY_SIZE(seccomp_whitelist_main_loop), ctx) < 0) { >> + rc = -1; >> goto seccomp_return; >> } >> + break; >> + default: >> + rc = -1; >> + goto seccomp_return; >> } >> >> rc = seccomp_load(ctx); >> diff --git a/qemu-seccomp.h b/qemu-seccomp.h >> index b2fc3f8..1c97978 100644 >> --- a/qemu-seccomp.h >> +++ b/qemu-seccomp.h >> @@ -18,5 +18,10 @@ >> #include <seccomp.h> >> #include "osdep.h" >> >> -int seccomp_start(void); >> +enum whitelist_mode { >> + INIT = 0, >> + MAIN_LOOP = 1, >> +}; >> + >> +int seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx); >> #endif >> diff --git a/vl.c b/vl.c >> index bec68cd..773d488 100644 >> --- a/vl.c >> +++ b/vl.c >> @@ -278,6 +278,7 @@ static int default_vga = 1; >> >> #ifdef CONFIG_SECCOMP >> bool seccomp_on = true; >> +scmp_filter_ctx ctx; > > This should be a local variable to main(), maybe also named > 'main_loop_ctx' so we can add further contexts. > >> #endif >> >> static struct { >> @@ -777,7 +778,7 @@ static int bt_parse(const char *opt) >> static int install_seccomp_filters(void) >> { >> #ifdef CONFIG_SECCOMP >> - if (seccomp_start() < 0) { >> + if (seccomp_start(INIT, &ctx) < 0) { >> qerror_report(ERROR_CLASS_GENERIC_ERROR, >> "failed to install seccomp syscall filter in the kernel"); >> return -1; >> @@ -3794,6 +3795,16 @@ int main(int argc, char **argv, char **envp) >> >> os_setup_post(); >> >> + if (seccomp_on) { > > 'seccomp_on' is only available with CONFIG_SECCOMP, so this would break build. > >> +#ifdef CONFIG_SECCOMP >> + if (seccomp_start(MAIN_LOOP, &ctx) < 0) { >> + qerror_report(ERROR_CLASS_GENERIC_ERROR, >> + "failed to install seccomp syscall filter in the kernel"); > > This error message could be different from the first one. > >> + return -1; >> + } >> +#endif >> + } >> + >> resume_all_vcpus(); >> main_loop(); >> bdrv_close_all(); >> -- >> 1.7.12 >> >> > >
On 10/19/2012 02:08 PM, Corey Bryant wrote: > > > On 10/19/2012 01:04 PM, Blue Swirl wrote: >> On Wed, Oct 17, 2012 at 1:15 PM, Eduardo Otubo >> <otubo@linux.vnet.ibm.com> wrote: >>> This patch includes a second whitelist right before the main loop. It's >>> a smaller and more restricted whitelist, excluding execve() among many >>> others. >>> >> It's nice to see that for example open, creat, unlink, socket, bind, >> mprotect, setrlimit and kill are not present. >> > > Hmm, well open minimally needs to be added to this list so that drives > can be hotplugged. Unless we enforce the use of add-fd for hot-plugging drives, but that in turn requires that we have -blockdev semantics for telling qemu how to open backing chains.
On 10/19/2012 04:36 PM, Eric Blake wrote: > On 10/19/2012 02:08 PM, Corey Bryant wrote: >> >> >> On 10/19/2012 01:04 PM, Blue Swirl wrote: >>> On Wed, Oct 17, 2012 at 1:15 PM, Eduardo Otubo >>> <otubo@linux.vnet.ibm.com> wrote: >>>> This patch includes a second whitelist right before the main loop. It's >>>> a smaller and more restricted whitelist, excluding execve() among many >>>> others. >>>> > >>> It's nice to see that for example open, creat, unlink, socket, bind, >>> mprotect, setrlimit and kill are not present. >>> >> >> Hmm, well open minimally needs to be added to this list so that drives >> can be hotplugged. > > Unless we enforce the use of add-fd for hot-plugging drives, but that in > turn requires that we have -blockdev semantics for telling qemu how to > open backing chains. > True, that would be nice. But for now we don't have a complete fd passing solution so maybe we can add that restriction in the future.
diff --git a/qemu-seccomp.c b/qemu-seccomp.c index a25f2fa..9c68af5 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -13,6 +13,7 @@ * GNU GPL, version 2 or (at your option) any later version. */ #include <stdio.h> +#include <stdlib.h> #include <seccomp.h> #include "qemu-seccomp.h" @@ -21,7 +22,7 @@ struct QemuSeccompSyscall { uint8_t priority; }; -static const struct QemuSeccompSyscall seccomp_whitelist[] = { +static const struct QemuSeccompSyscall seccomp_whitelist_init[] = { { SCMP_SYS(timer_settime), 255 }, { SCMP_SYS(timer_gettime), 254 }, { SCMP_SYS(futex), 253 }, @@ -118,27 +119,102 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = { { SCMP_SYS(accept4), 242 } }; -int seccomp_start(void) +static const struct QemuSeccompSyscall seccomp_whitelist_main_loop[] = { + { SCMP_SYS(timer_settime), 255 }, + { SCMP_SYS(timer_gettime), 254 }, + { SCMP_SYS(futex), 253 }, + { SCMP_SYS(select), 252 }, + { SCMP_SYS(recvfrom), 251 }, + { SCMP_SYS(sendto), 250 }, + { SCMP_SYS(read), 249 }, + { SCMP_SYS(brk), 248 }, + { SCMP_SYS(mmap), 247 }, +#if defined(__i386__) + { SCMP_SYS(fcntl64), 245 }, + { SCMP_SYS(fstat64), 245 }, + { SCMP_SYS(stat64), 245 }, + { SCMP_SYS(getgid32), 245 }, + { SCMP_SYS(getegid32), 245 }, + { SCMP_SYS(getuid32), 245 }, + { SCMP_SYS(geteuid32), 245 }, + { SCMP_SYS(sigreturn), 245 }, + { SCMP_SYS(_newselect), 245 }, + { SCMP_SYS(_llseek), 245 }, + { SCMP_SYS(mmap2), 245}, + { SCMP_SYS(sigprocmask), 245 }, +#endif + { SCMP_SYS(exit), 245 }, + { SCMP_SYS(timer_delete), 245 }, + { SCMP_SYS(exit_group), 245 }, + { SCMP_SYS(rt_sigreturn), 245 }, + { SCMP_SYS(madvise), 245 }, + { SCMP_SYS(write), 244 }, + { SCMP_SYS(fcntl), 243 }, + { SCMP_SYS(tgkill), 242 }, + { SCMP_SYS(rt_sigaction), 242 }, + { SCMP_SYS(pipe2), 242 }, + { SCMP_SYS(munmap), 242 }, + { SCMP_SYS(mremap), 242 }, + { SCMP_SYS(getsockname), 242 }, + { SCMP_SYS(getpeername), 242 }, + { SCMP_SYS(close), 242 }, + { SCMP_SYS(accept4), 242 } +}; + +static int +process_whitelist(const struct QemuSeccompSyscall *whitelist, + unsigned int size, scmp_filter_ctx *ctx) { int rc = 0; + unsigned int i = 0; - scmp_filter_ctx ctx; + + for (i = 0; i < size; i++) { + rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, whitelist[i].num, 0); + if (rc < 0) { + return -1; + } + + rc = seccomp_syscall_priority(ctx, whitelist[i].num, + whitelist[i].priority); + if (rc < 0) { + return -1; + } + } + return 0; +} + +int +seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx) +{ + int rc = 0; ctx = seccomp_init(SCMP_ACT_KILL); if (ctx == NULL) { + rc = -1; goto seccomp_return; } - for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) { - rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0); - if (rc < 0) { + switch (mode) { + case INIT: + if (process_whitelist + (seccomp_whitelist_init, + ARRAY_SIZE(seccomp_whitelist_init), ctx) < 0) { + rc = -1; goto seccomp_return; } - rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num, - seccomp_whitelist[i].priority); - if (rc < 0) { + break; + case MAIN_LOOP: + if (process_whitelist + (seccomp_whitelist_main_loop, + ARRAY_SIZE(seccomp_whitelist_main_loop), ctx) < 0) { + rc = -1; goto seccomp_return; } + break; + default: + rc = -1; + goto seccomp_return; } rc = seccomp_load(ctx); diff --git a/qemu-seccomp.h b/qemu-seccomp.h index b2fc3f8..1c97978 100644 --- a/qemu-seccomp.h +++ b/qemu-seccomp.h @@ -18,5 +18,10 @@ #include <seccomp.h> #include "osdep.h" -int seccomp_start(void); +enum whitelist_mode { + INIT = 0, + MAIN_LOOP = 1, +}; + +int seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx); #endif diff --git a/vl.c b/vl.c index bec68cd..773d488 100644 --- a/vl.c +++ b/vl.c @@ -278,6 +278,7 @@ static int default_vga = 1; #ifdef CONFIG_SECCOMP bool seccomp_on = true; +scmp_filter_ctx ctx; #endif static struct { @@ -777,7 +778,7 @@ static int bt_parse(const char *opt) static int install_seccomp_filters(void) { #ifdef CONFIG_SECCOMP - if (seccomp_start() < 0) { + if (seccomp_start(INIT, &ctx) < 0) { qerror_report(ERROR_CLASS_GENERIC_ERROR, "failed to install seccomp syscall filter in the kernel"); return -1; @@ -3794,6 +3795,16 @@ int main(int argc, char **argv, char **envp) os_setup_post(); + if (seccomp_on) { +#ifdef CONFIG_SECCOMP + if (seccomp_start(MAIN_LOOP, &ctx) < 0) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "failed to install seccomp syscall filter in the kernel"); + return -1; + } +#endif + } + resume_all_vcpus(); main_loop(); bdrv_close_all();
This patch includes a second whitelist right before the main loop. It's a smaller and more restricted whitelist, excluding execve() among many others. Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com> --- qemu-seccomp.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ qemu-seccomp.h | 7 ++++- vl.c | 13 +++++++- 3 files changed, 103 insertions(+), 11 deletions(-)