Patchwork [3/4] Support for "double whitelist" filters

login
register
mail settings
Submitter Eduardo Otubo
Date Oct. 17, 2012, 1:15 p.m.
Message ID <1350479712-15082-3-git-send-email-otubo@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/192038/
State New
Headers show

Comments

Eduardo Otubo - Oct. 17, 2012, 1:15 p.m.
This patch includes a second whitelist right before the main loop. It's
a smaller and more restricted whitelist, excluding execve() among many
others.

Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
---
 qemu-seccomp.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
 qemu-seccomp.h |  7 ++++-
 vl.c           | 13 +++++++-
 3 files changed, 103 insertions(+), 11 deletions(-)
Blue Swirl - Oct. 19, 2012, 5:04 p.m.
On Wed, Oct 17, 2012 at 1:15 PM, Eduardo Otubo <otubo@linux.vnet.ibm.com> wrote:
> This patch includes a second whitelist right before the main loop. It's
> a smaller and more restricted whitelist, excluding execve() among many
> others.
>
> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
> ---
>  qemu-seccomp.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  qemu-seccomp.h |  7 ++++-
>  vl.c           | 13 +++++++-
>  3 files changed, 103 insertions(+), 11 deletions(-)
>
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index a25f2fa..9c68af5 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -13,6 +13,7 @@
>   * GNU GPL, version 2 or (at your option) any later version.
>   */
>  #include <stdio.h>
> +#include <stdlib.h>
>  #include <seccomp.h>
>  #include "qemu-seccomp.h"
>
> @@ -21,7 +22,7 @@ struct QemuSeccompSyscall {
>      uint8_t priority;
>  };
>
> -static const struct QemuSeccompSyscall seccomp_whitelist[] = {
> +static const struct QemuSeccompSyscall seccomp_whitelist_init[] = {
>      { SCMP_SYS(timer_settime), 255 },
>      { SCMP_SYS(timer_gettime), 254 },
>      { SCMP_SYS(futex), 253 },
> @@ -118,27 +119,102 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
>      { SCMP_SYS(accept4), 242 }
>  };
>
> -int seccomp_start(void)
> +static const struct QemuSeccompSyscall seccomp_whitelist_main_loop[] = {
> +    { SCMP_SYS(timer_settime), 255 },
> +    { SCMP_SYS(timer_gettime), 254 },
> +    { SCMP_SYS(futex), 253 },
> +    { SCMP_SYS(select), 252 },
> +    { SCMP_SYS(recvfrom), 251 },
> +    { SCMP_SYS(sendto), 250 },
> +    { SCMP_SYS(read), 249 },
> +    { SCMP_SYS(brk), 248 },
> +    { SCMP_SYS(mmap), 247 },
> +#if defined(__i386__)
> +    { SCMP_SYS(fcntl64), 245 },
> +    { SCMP_SYS(fstat64), 245 },
> +    { SCMP_SYS(stat64), 245 },
> +    { SCMP_SYS(getgid32), 245 },
> +    { SCMP_SYS(getegid32), 245 },
> +    { SCMP_SYS(getuid32), 245 },
> +    { SCMP_SYS(geteuid32), 245 },
> +    { SCMP_SYS(sigreturn), 245 },
> +    { SCMP_SYS(_newselect), 245 },
> +    { SCMP_SYS(_llseek), 245 },
> +    { SCMP_SYS(mmap2), 245},
> +    { SCMP_SYS(sigprocmask), 245 },
> +#endif
> +    { SCMP_SYS(exit), 245 },
> +    { SCMP_SYS(timer_delete), 245 },
> +    { SCMP_SYS(exit_group), 245 },
> +    { SCMP_SYS(rt_sigreturn), 245 },
> +    { SCMP_SYS(madvise), 245 },
> +    { SCMP_SYS(write), 244 },
> +    { SCMP_SYS(fcntl), 243 },
> +    { SCMP_SYS(tgkill), 242 },
> +    { SCMP_SYS(rt_sigaction), 242 },
> +    { SCMP_SYS(pipe2), 242 },
> +    { SCMP_SYS(munmap), 242 },
> +    { SCMP_SYS(mremap), 242 },
> +    { SCMP_SYS(getsockname), 242 },
> +    { SCMP_SYS(getpeername), 242 },
> +    { SCMP_SYS(close), 242 },
> +    { SCMP_SYS(accept4), 242 }

It's nice to see that for example open, creat, unlink, socket, bind,
mprotect, setrlimit and kill are not present.

> +};
> +
> +static int
> +process_whitelist(const struct QemuSeccompSyscall *whitelist,
> +                  unsigned int size, scmp_filter_ctx *ctx)
>  {
>      int rc = 0;
> +
>      unsigned int i = 0;
> -    scmp_filter_ctx ctx;
> +
> +    for (i = 0; i < size; i++) {
> +        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, whitelist[i].num, 0);
> +        if (rc < 0) {
> +            return -1;
> +        }
> +
> +        rc = seccomp_syscall_priority(ctx, whitelist[i].num,
> +                                      whitelist[i].priority);
> +        if (rc < 0) {
> +            return -1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +int
> +seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx)
> +{
> +    int rc = 0;
>
>      ctx = seccomp_init(SCMP_ACT_KILL);
>      if (ctx == NULL) {
> +        rc = -1;
>          goto seccomp_return;
>      }
>
> -    for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) {
> -        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0);
> -        if (rc < 0) {
> +    switch (mode) {
> +    case INIT:
> +        if (process_whitelist
> +            (seccomp_whitelist_init,
> +             ARRAY_SIZE(seccomp_whitelist_init), ctx) < 0) {
> +            rc = -1;
>              goto seccomp_return;
>          }
> -        rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num,
> -                                      seccomp_whitelist[i].priority);
> -        if (rc < 0) {
> +        break;
> +    case MAIN_LOOP:
> +        if (process_whitelist
> +            (seccomp_whitelist_main_loop,
> +             ARRAY_SIZE(seccomp_whitelist_main_loop), ctx) < 0) {
> +            rc = -1;
>              goto seccomp_return;
>          }
> +        break;
> +    default:
> +        rc = -1;
> +        goto seccomp_return;
>      }
>
>      rc = seccomp_load(ctx);
> diff --git a/qemu-seccomp.h b/qemu-seccomp.h
> index b2fc3f8..1c97978 100644
> --- a/qemu-seccomp.h
> +++ b/qemu-seccomp.h
> @@ -18,5 +18,10 @@
>  #include <seccomp.h>
>  #include "osdep.h"
>
> -int seccomp_start(void);
> +enum whitelist_mode {
> +    INIT = 0,
> +    MAIN_LOOP = 1,
> +};
> +
> +int seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx);
>  #endif
> diff --git a/vl.c b/vl.c
> index bec68cd..773d488 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -278,6 +278,7 @@ static int default_vga = 1;
>
>  #ifdef CONFIG_SECCOMP
>  bool seccomp_on = true;
> +scmp_filter_ctx ctx;

This should be a local variable to main(), maybe also named
'main_loop_ctx' so we can add further contexts.

>  #endif
>
>  static struct {
> @@ -777,7 +778,7 @@ static int bt_parse(const char *opt)
>  static int install_seccomp_filters(void)
>  {
>  #ifdef CONFIG_SECCOMP
> -    if (seccomp_start() < 0) {
> +    if (seccomp_start(INIT, &ctx) < 0) {
>          qerror_report(ERROR_CLASS_GENERIC_ERROR,
>                  "failed to install seccomp syscall filter in the kernel");
>          return -1;
> @@ -3794,6 +3795,16 @@ int main(int argc, char **argv, char **envp)
>
>      os_setup_post();
>
> +    if (seccomp_on) {

'seccomp_on' is only available with CONFIG_SECCOMP, so this would break build.

> +#ifdef CONFIG_SECCOMP
> +        if (seccomp_start(MAIN_LOOP, &ctx) < 0) {
> +            qerror_report(ERROR_CLASS_GENERIC_ERROR,
> +                          "failed to install seccomp syscall filter in the kernel");

This error message could be different from the first one.

> +            return -1;
> +        }
> +#endif
> +    }
> +
>      resume_all_vcpus();
>      main_loop();
>      bdrv_close_all();
> --
> 1.7.12
>
>
Corey Bryant - Oct. 19, 2012, 8:03 p.m.
On 10/17/2012 09:15 AM, Eduardo Otubo wrote:
> This patch includes a second whitelist right before the main loop. It's
> a smaller and more restricted whitelist, excluding execve() among many
> others.
>
> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
> ---
>   qemu-seccomp.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
>   qemu-seccomp.h |  7 ++++-
>   vl.c           | 13 +++++++-
>   3 files changed, 103 insertions(+), 11 deletions(-)
>
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index a25f2fa..9c68af5 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -13,6 +13,7 @@
>    * GNU GPL, version 2 or (at your option) any later version.
>    */
>   #include <stdio.h>
> +#include <stdlib.h>
>   #include <seccomp.h>
>   #include "qemu-seccomp.h"
>
> @@ -21,7 +22,7 @@ struct QemuSeccompSyscall {
>       uint8_t priority;
>   };
>
> -static const struct QemuSeccompSyscall seccomp_whitelist[] = {
> +static const struct QemuSeccompSyscall seccomp_whitelist_init[] = {
>       { SCMP_SYS(timer_settime), 255 },
>       { SCMP_SYS(timer_gettime), 254 },
>       { SCMP_SYS(futex), 253 },
> @@ -118,27 +119,102 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
>       { SCMP_SYS(accept4), 242 }
>   };
>
> -int seccomp_start(void)
> +static const struct QemuSeccompSyscall seccomp_whitelist_main_loop[] = {
> +    { SCMP_SYS(timer_settime), 255 },
> +    { SCMP_SYS(timer_gettime), 254 },
> +    { SCMP_SYS(futex), 253 },
> +    { SCMP_SYS(select), 252 },
> +    { SCMP_SYS(recvfrom), 251 },
> +    { SCMP_SYS(sendto), 250 },
> +    { SCMP_SYS(read), 249 },
> +    { SCMP_SYS(brk), 248 },
> +    { SCMP_SYS(mmap), 247 },
> +#if defined(__i386__)
> +    { SCMP_SYS(fcntl64), 245 },
> +    { SCMP_SYS(fstat64), 245 },
> +    { SCMP_SYS(stat64), 245 },
> +    { SCMP_SYS(getgid32), 245 },
> +    { SCMP_SYS(getegid32), 245 },
> +    { SCMP_SYS(getuid32), 245 },
> +    { SCMP_SYS(geteuid32), 245 },
> +    { SCMP_SYS(sigreturn), 245 },
> +    { SCMP_SYS(_newselect), 245 },
> +    { SCMP_SYS(_llseek), 245 },
> +    { SCMP_SYS(mmap2), 245},
> +    { SCMP_SYS(sigprocmask), 245 },
> +#endif
> +    { SCMP_SYS(exit), 245 },
> +    { SCMP_SYS(timer_delete), 245 },
> +    { SCMP_SYS(exit_group), 245 },
> +    { SCMP_SYS(rt_sigreturn), 245 },
> +    { SCMP_SYS(madvise), 245 },
> +    { SCMP_SYS(write), 244 },
> +    { SCMP_SYS(fcntl), 243 },
> +    { SCMP_SYS(tgkill), 242 },
> +    { SCMP_SYS(rt_sigaction), 242 },
> +    { SCMP_SYS(pipe2), 242 },
> +    { SCMP_SYS(munmap), 242 },
> +    { SCMP_SYS(mremap), 242 },
> +    { SCMP_SYS(getsockname), 242 },
> +    { SCMP_SYS(getpeername), 242 },
> +    { SCMP_SYS(close), 242 },
> +    { SCMP_SYS(accept4), 242 }
> +};

This list also needs: eventfd2, recvmsg, ioctl, rt_sigprocmask.

> +
> +static int
> +process_whitelist(const struct QemuSeccompSyscall *whitelist,
> +                  unsigned int size, scmp_filter_ctx *ctx)
>   {
>       int rc = 0;
> +
>       unsigned int i = 0;
> -    scmp_filter_ctx ctx;
> +
> +    for (i = 0; i < size; i++) {
> +        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, whitelist[i].num, 0);
> +        if (rc < 0) {
> +            return -1;
> +        }
> +
> +        rc = seccomp_syscall_priority(ctx, whitelist[i].num,
> +                                      whitelist[i].priority);
> +        if (rc < 0) {
> +            return -1;
> +        }
> +    }
> +    return 0;
> +}
> +
> +int
> +seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx)
> +{
> +    int rc = 0;
>
>       ctx = seccomp_init(SCMP_ACT_KILL);
>       if (ctx == NULL) {
> +        rc = -1;
>           goto seccomp_return;
>       }
>
> -    for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) {
> -        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0);
> -        if (rc < 0) {
> +    switch (mode) {
> +    case INIT:
> +        if (process_whitelist
> +            (seccomp_whitelist_init,
> +             ARRAY_SIZE(seccomp_whitelist_init), ctx) < 0) {
> +            rc = -1;
>               goto seccomp_return;
>           }
> -        rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num,
> -                                      seccomp_whitelist[i].priority);
> -        if (rc < 0) {
> +        break;
> +    case MAIN_LOOP:
> +        if (process_whitelist
> +            (seccomp_whitelist_main_loop,
> +             ARRAY_SIZE(seccomp_whitelist_main_loop), ctx) < 0) {
> +            rc = -1;
>               goto seccomp_return;
>           }
> +        break;
> +    default:
> +        rc = -1;
> +        goto seccomp_return;
>       }
>
>       rc = seccomp_load(ctx);
> diff --git a/qemu-seccomp.h b/qemu-seccomp.h
> index b2fc3f8..1c97978 100644
> --- a/qemu-seccomp.h
> +++ b/qemu-seccomp.h
> @@ -18,5 +18,10 @@
>   #include <seccomp.h>
>   #include "osdep.h"
>
> -int seccomp_start(void);
> +enum whitelist_mode {
> +    INIT = 0,
> +    MAIN_LOOP = 1,
> +};
> +
> +int seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx);
>   #endif
> diff --git a/vl.c b/vl.c
> index bec68cd..773d488 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -278,6 +278,7 @@ static int default_vga = 1;
>
>   #ifdef CONFIG_SECCOMP
>   bool seccomp_on = true;
> +scmp_filter_ctx ctx;
>   #endif
>
>   static struct {
> @@ -777,7 +778,7 @@ static int bt_parse(const char *opt)
>   static int install_seccomp_filters(void)
>   {
>   #ifdef CONFIG_SECCOMP
> -    if (seccomp_start() < 0) {
> +    if (seccomp_start(INIT, &ctx) < 0) {
>           qerror_report(ERROR_CLASS_GENERIC_ERROR,
>                   "failed to install seccomp syscall filter in the kernel");
>           return -1;
> @@ -3794,6 +3795,16 @@ int main(int argc, char **argv, char **envp)
>
>       os_setup_post();
>
> +    if (seccomp_on) {
> +#ifdef CONFIG_SECCOMP
> +        if (seccomp_start(MAIN_LOOP, &ctx) < 0) {

The first list is installed with install_seccomp_filters() and this one 
is installed with seccomp_start().  One thing you could do make it more 
consistent is to add a parameter for whitelist_mode mode to 
install_seccomp_filters() and call install_seccomp_filters(INIT) and 
install_seccomp_filters(MAIN_LOOP).

> +            qerror_report(ERROR_CLASS_GENERIC_ERROR,
> +                          "failed to install seccomp syscall filter in the kernel");
> +            return -1;
> +        }
> +#endif
> +    }
> +
>       resume_all_vcpus();
>       main_loop();
>       bdrv_close_all();
>
Corey Bryant - Oct. 19, 2012, 8:08 p.m.
On 10/19/2012 01:04 PM, Blue Swirl wrote:
> On Wed, Oct 17, 2012 at 1:15 PM, Eduardo Otubo <otubo@linux.vnet.ibm.com> wrote:
>> This patch includes a second whitelist right before the main loop. It's
>> a smaller and more restricted whitelist, excluding execve() among many
>> others.
>>
>> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
>> ---
>>   qemu-seccomp.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
>>   qemu-seccomp.h |  7 ++++-
>>   vl.c           | 13 +++++++-
>>   3 files changed, 103 insertions(+), 11 deletions(-)
>>
>> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
>> index a25f2fa..9c68af5 100644
>> --- a/qemu-seccomp.c
>> +++ b/qemu-seccomp.c
>> @@ -13,6 +13,7 @@
>>    * GNU GPL, version 2 or (at your option) any later version.
>>    */
>>   #include <stdio.h>
>> +#include <stdlib.h>
>>   #include <seccomp.h>
>>   #include "qemu-seccomp.h"
>>
>> @@ -21,7 +22,7 @@ struct QemuSeccompSyscall {
>>       uint8_t priority;
>>   };
>>
>> -static const struct QemuSeccompSyscall seccomp_whitelist[] = {
>> +static const struct QemuSeccompSyscall seccomp_whitelist_init[] = {
>>       { SCMP_SYS(timer_settime), 255 },
>>       { SCMP_SYS(timer_gettime), 254 },
>>       { SCMP_SYS(futex), 253 },
>> @@ -118,27 +119,102 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
>>       { SCMP_SYS(accept4), 242 }
>>   };
>>
>> -int seccomp_start(void)
>> +static const struct QemuSeccompSyscall seccomp_whitelist_main_loop[] = {
>> +    { SCMP_SYS(timer_settime), 255 },
>> +    { SCMP_SYS(timer_gettime), 254 },
>> +    { SCMP_SYS(futex), 253 },
>> +    { SCMP_SYS(select), 252 },
>> +    { SCMP_SYS(recvfrom), 251 },
>> +    { SCMP_SYS(sendto), 250 },
>> +    { SCMP_SYS(read), 249 },
>> +    { SCMP_SYS(brk), 248 },
>> +    { SCMP_SYS(mmap), 247 },
>> +#if defined(__i386__)
>> +    { SCMP_SYS(fcntl64), 245 },
>> +    { SCMP_SYS(fstat64), 245 },
>> +    { SCMP_SYS(stat64), 245 },
>> +    { SCMP_SYS(getgid32), 245 },
>> +    { SCMP_SYS(getegid32), 245 },
>> +    { SCMP_SYS(getuid32), 245 },
>> +    { SCMP_SYS(geteuid32), 245 },
>> +    { SCMP_SYS(sigreturn), 245 },
>> +    { SCMP_SYS(_newselect), 245 },
>> +    { SCMP_SYS(_llseek), 245 },
>> +    { SCMP_SYS(mmap2), 245},
>> +    { SCMP_SYS(sigprocmask), 245 },
>> +#endif
>> +    { SCMP_SYS(exit), 245 },
>> +    { SCMP_SYS(timer_delete), 245 },
>> +    { SCMP_SYS(exit_group), 245 },
>> +    { SCMP_SYS(rt_sigreturn), 245 },
>> +    { SCMP_SYS(madvise), 245 },
>> +    { SCMP_SYS(write), 244 },
>> +    { SCMP_SYS(fcntl), 243 },
>> +    { SCMP_SYS(tgkill), 242 },
>> +    { SCMP_SYS(rt_sigaction), 242 },
>> +    { SCMP_SYS(pipe2), 242 },
>> +    { SCMP_SYS(munmap), 242 },
>> +    { SCMP_SYS(mremap), 242 },
>> +    { SCMP_SYS(getsockname), 242 },
>> +    { SCMP_SYS(getpeername), 242 },
>> +    { SCMP_SYS(close), 242 },
>> +    { SCMP_SYS(accept4), 242 }
>
> It's nice to see that for example open, creat, unlink, socket, bind,
> mprotect, setrlimit and kill are not present.
>

Hmm, well open minimally needs to be added to this list so that drives 
can be hotplugged.

>> +};
>> +
>> +static int
>> +process_whitelist(const struct QemuSeccompSyscall *whitelist,
>> +                  unsigned int size, scmp_filter_ctx *ctx)
>>   {
>>       int rc = 0;
>> +
>>       unsigned int i = 0;
>> -    scmp_filter_ctx ctx;
>> +
>> +    for (i = 0; i < size; i++) {
>> +        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, whitelist[i].num, 0);
>> +        if (rc < 0) {
>> +            return -1;
>> +        }
>> +
>> +        rc = seccomp_syscall_priority(ctx, whitelist[i].num,
>> +                                      whitelist[i].priority);
>> +        if (rc < 0) {
>> +            return -1;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> +
>> +int
>> +seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx)
>> +{
>> +    int rc = 0;
>>
>>       ctx = seccomp_init(SCMP_ACT_KILL);
>>       if (ctx == NULL) {
>> +        rc = -1;
>>           goto seccomp_return;
>>       }
>>
>> -    for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) {
>> -        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0);
>> -        if (rc < 0) {
>> +    switch (mode) {
>> +    case INIT:
>> +        if (process_whitelist
>> +            (seccomp_whitelist_init,
>> +             ARRAY_SIZE(seccomp_whitelist_init), ctx) < 0) {
>> +            rc = -1;
>>               goto seccomp_return;
>>           }
>> -        rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num,
>> -                                      seccomp_whitelist[i].priority);
>> -        if (rc < 0) {
>> +        break;
>> +    case MAIN_LOOP:
>> +        if (process_whitelist
>> +            (seccomp_whitelist_main_loop,
>> +             ARRAY_SIZE(seccomp_whitelist_main_loop), ctx) < 0) {
>> +            rc = -1;
>>               goto seccomp_return;
>>           }
>> +        break;
>> +    default:
>> +        rc = -1;
>> +        goto seccomp_return;
>>       }
>>
>>       rc = seccomp_load(ctx);
>> diff --git a/qemu-seccomp.h b/qemu-seccomp.h
>> index b2fc3f8..1c97978 100644
>> --- a/qemu-seccomp.h
>> +++ b/qemu-seccomp.h
>> @@ -18,5 +18,10 @@
>>   #include <seccomp.h>
>>   #include "osdep.h"
>>
>> -int seccomp_start(void);
>> +enum whitelist_mode {
>> +    INIT = 0,
>> +    MAIN_LOOP = 1,
>> +};
>> +
>> +int seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx);
>>   #endif
>> diff --git a/vl.c b/vl.c
>> index bec68cd..773d488 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -278,6 +278,7 @@ static int default_vga = 1;
>>
>>   #ifdef CONFIG_SECCOMP
>>   bool seccomp_on = true;
>> +scmp_filter_ctx ctx;
>
> This should be a local variable to main(), maybe also named
> 'main_loop_ctx' so we can add further contexts.
>
>>   #endif
>>
>>   static struct {
>> @@ -777,7 +778,7 @@ static int bt_parse(const char *opt)
>>   static int install_seccomp_filters(void)
>>   {
>>   #ifdef CONFIG_SECCOMP
>> -    if (seccomp_start() < 0) {
>> +    if (seccomp_start(INIT, &ctx) < 0) {
>>           qerror_report(ERROR_CLASS_GENERIC_ERROR,
>>                   "failed to install seccomp syscall filter in the kernel");
>>           return -1;
>> @@ -3794,6 +3795,16 @@ int main(int argc, char **argv, char **envp)
>>
>>       os_setup_post();
>>
>> +    if (seccomp_on) {
>
> 'seccomp_on' is only available with CONFIG_SECCOMP, so this would break build.
>
>> +#ifdef CONFIG_SECCOMP
>> +        if (seccomp_start(MAIN_LOOP, &ctx) < 0) {
>> +            qerror_report(ERROR_CLASS_GENERIC_ERROR,
>> +                          "failed to install seccomp syscall filter in the kernel");
>
> This error message could be different from the first one.
>
>> +            return -1;
>> +        }
>> +#endif
>> +    }
>> +
>>       resume_all_vcpus();
>>       main_loop();
>>       bdrv_close_all();
>> --
>> 1.7.12
>>
>>
>
>
Eric Blake - Oct. 19, 2012, 8:36 p.m.
On 10/19/2012 02:08 PM, Corey Bryant wrote:
> 
> 
> On 10/19/2012 01:04 PM, Blue Swirl wrote:
>> On Wed, Oct 17, 2012 at 1:15 PM, Eduardo Otubo
>> <otubo@linux.vnet.ibm.com> wrote:
>>> This patch includes a second whitelist right before the main loop. It's
>>> a smaller and more restricted whitelist, excluding execve() among many
>>> others.
>>>

>> It's nice to see that for example open, creat, unlink, socket, bind,
>> mprotect, setrlimit and kill are not present.
>>
> 
> Hmm, well open minimally needs to be added to this list so that drives
> can be hotplugged.

Unless we enforce the use of add-fd for hot-plugging drives, but that in
turn requires that we have -blockdev semantics for telling qemu how to
open backing chains.
Corey Bryant - Oct. 19, 2012, 8:46 p.m.
On 10/19/2012 04:36 PM, Eric Blake wrote:
> On 10/19/2012 02:08 PM, Corey Bryant wrote:
>>
>>
>> On 10/19/2012 01:04 PM, Blue Swirl wrote:
>>> On Wed, Oct 17, 2012 at 1:15 PM, Eduardo Otubo
>>> <otubo@linux.vnet.ibm.com> wrote:
>>>> This patch includes a second whitelist right before the main loop. It's
>>>> a smaller and more restricted whitelist, excluding execve() among many
>>>> others.
>>>>
>
>>> It's nice to see that for example open, creat, unlink, socket, bind,
>>> mprotect, setrlimit and kill are not present.
>>>
>>
>> Hmm, well open minimally needs to be added to this list so that drives
>> can be hotplugged.
>
> Unless we enforce the use of add-fd for hot-plugging drives, but that in
> turn requires that we have -blockdev semantics for telling qemu how to
> open backing chains.
>

True, that would be nice.  But for now we don't have a complete fd 
passing solution so maybe we can add that restriction in the future.

Patch

diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index a25f2fa..9c68af5 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -13,6 +13,7 @@ 
  * GNU GPL, version 2 or (at your option) any later version.
  */
 #include <stdio.h>
+#include <stdlib.h>
 #include <seccomp.h>
 #include "qemu-seccomp.h"
 
@@ -21,7 +22,7 @@  struct QemuSeccompSyscall {
     uint8_t priority;
 };
 
-static const struct QemuSeccompSyscall seccomp_whitelist[] = {
+static const struct QemuSeccompSyscall seccomp_whitelist_init[] = {
     { SCMP_SYS(timer_settime), 255 },
     { SCMP_SYS(timer_gettime), 254 },
     { SCMP_SYS(futex), 253 },
@@ -118,27 +119,102 @@  static const struct QemuSeccompSyscall seccomp_whitelist[] = {
     { SCMP_SYS(accept4), 242 }
 };
 
-int seccomp_start(void)
+static const struct QemuSeccompSyscall seccomp_whitelist_main_loop[] = {
+    { SCMP_SYS(timer_settime), 255 },
+    { SCMP_SYS(timer_gettime), 254 },
+    { SCMP_SYS(futex), 253 },
+    { SCMP_SYS(select), 252 },
+    { SCMP_SYS(recvfrom), 251 },
+    { SCMP_SYS(sendto), 250 },
+    { SCMP_SYS(read), 249 },
+    { SCMP_SYS(brk), 248 },
+    { SCMP_SYS(mmap), 247 },
+#if defined(__i386__)
+    { SCMP_SYS(fcntl64), 245 },
+    { SCMP_SYS(fstat64), 245 },
+    { SCMP_SYS(stat64), 245 },
+    { SCMP_SYS(getgid32), 245 },
+    { SCMP_SYS(getegid32), 245 },
+    { SCMP_SYS(getuid32), 245 },
+    { SCMP_SYS(geteuid32), 245 },
+    { SCMP_SYS(sigreturn), 245 },
+    { SCMP_SYS(_newselect), 245 },
+    { SCMP_SYS(_llseek), 245 },
+    { SCMP_SYS(mmap2), 245},
+    { SCMP_SYS(sigprocmask), 245 },
+#endif
+    { SCMP_SYS(exit), 245 },
+    { SCMP_SYS(timer_delete), 245 },
+    { SCMP_SYS(exit_group), 245 },
+    { SCMP_SYS(rt_sigreturn), 245 },
+    { SCMP_SYS(madvise), 245 },
+    { SCMP_SYS(write), 244 },
+    { SCMP_SYS(fcntl), 243 },
+    { SCMP_SYS(tgkill), 242 },
+    { SCMP_SYS(rt_sigaction), 242 },
+    { SCMP_SYS(pipe2), 242 },
+    { SCMP_SYS(munmap), 242 },
+    { SCMP_SYS(mremap), 242 },
+    { SCMP_SYS(getsockname), 242 },
+    { SCMP_SYS(getpeername), 242 },
+    { SCMP_SYS(close), 242 },
+    { SCMP_SYS(accept4), 242 }
+};
+
+static int
+process_whitelist(const struct QemuSeccompSyscall *whitelist,
+                  unsigned int size, scmp_filter_ctx *ctx)
 {
     int rc = 0;
+
     unsigned int i = 0;
-    scmp_filter_ctx ctx;
+
+    for (i = 0; i < size; i++) {
+        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, whitelist[i].num, 0);
+        if (rc < 0) {
+            return -1;
+        }
+
+        rc = seccomp_syscall_priority(ctx, whitelist[i].num,
+                                      whitelist[i].priority);
+        if (rc < 0) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int
+seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx)
+{
+    int rc = 0;
 
     ctx = seccomp_init(SCMP_ACT_KILL);
     if (ctx == NULL) {
+        rc = -1;
         goto seccomp_return;
     }
 
-    for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) {
-        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0);
-        if (rc < 0) {
+    switch (mode) {
+    case INIT:
+        if (process_whitelist
+            (seccomp_whitelist_init,
+             ARRAY_SIZE(seccomp_whitelist_init), ctx) < 0) {
+            rc = -1;
             goto seccomp_return;
         }
-        rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num,
-                                      seccomp_whitelist[i].priority);
-        if (rc < 0) {
+        break;
+    case MAIN_LOOP:
+        if (process_whitelist
+            (seccomp_whitelist_main_loop,
+             ARRAY_SIZE(seccomp_whitelist_main_loop), ctx) < 0) {
+            rc = -1;
             goto seccomp_return;
         }
+        break;
+    default:
+        rc = -1;
+        goto seccomp_return;
     }
 
     rc = seccomp_load(ctx);
diff --git a/qemu-seccomp.h b/qemu-seccomp.h
index b2fc3f8..1c97978 100644
--- a/qemu-seccomp.h
+++ b/qemu-seccomp.h
@@ -18,5 +18,10 @@ 
 #include <seccomp.h>
 #include "osdep.h"
 
-int seccomp_start(void);
+enum whitelist_mode {
+    INIT = 0,
+    MAIN_LOOP = 1,
+};
+
+int seccomp_start(enum whitelist_mode mode, scmp_filter_ctx *ctx);
 #endif
diff --git a/vl.c b/vl.c
index bec68cd..773d488 100644
--- a/vl.c
+++ b/vl.c
@@ -278,6 +278,7 @@  static int default_vga = 1;
 
 #ifdef CONFIG_SECCOMP
 bool seccomp_on = true;
+scmp_filter_ctx ctx;
 #endif
 
 static struct {
@@ -777,7 +778,7 @@  static int bt_parse(const char *opt)
 static int install_seccomp_filters(void)
 {
 #ifdef CONFIG_SECCOMP
-    if (seccomp_start() < 0) {
+    if (seccomp_start(INIT, &ctx) < 0) {
         qerror_report(ERROR_CLASS_GENERIC_ERROR,
                 "failed to install seccomp syscall filter in the kernel");
         return -1;
@@ -3794,6 +3795,16 @@  int main(int argc, char **argv, char **envp)
 
     os_setup_post();
 
+    if (seccomp_on) {
+#ifdef CONFIG_SECCOMP
+        if (seccomp_start(MAIN_LOOP, &ctx) < 0) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "failed to install seccomp syscall filter in the kernel");
+            return -1;
+        }
+#endif
+    }
+
     resume_all_vcpus();
     main_loop();
     bdrv_close_all();