diff mbox

[v2] linux-user: Simplify timerid checks on g_posix_timers range

Message ID 1408708578-53362-1-git-send-email-agraf@suse.de
State New
Headers show

Commit Message

Alexander Graf Aug. 22, 2014, 11:56 a.m. UTC
We check whether the passed in timer id is negative on all calls
that involve g_posix_timers.

However, these checks are bogus. First off we limit the timer_id to
16 bits which is not what Linux does. Then we check whether it's negative
which it can't be because we masked it.

We can safely remove the masking. For the negativity check we can just
treat the timerid as unsigned and only check for upper boundaries.

Signed-off-by: Alexander Graf <agraf@suse.de>

---

v1 -> v2:

  - drop 0xffff mask
  - explicitly cast to unsigned because the mask is missing now

---
 linux-user/syscall.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

Comments

Peter Maydell Aug. 22, 2014, 12:07 p.m. UTC | #1
On 22 August 2014 12:56, Alexander Graf <agraf@suse.de> wrote:
> We check whether the passed in timer id is negative on all calls
> that involve g_posix_timers.
>
> However, these checks are bogus. First off we limit the timer_id to
> 16 bits which is not what Linux does. Then we check whether it's negative
> which it can't be because we masked it.
>
> We can safely remove the masking. For the negativity check we can just
> treat the timerid as unsigned and only check for upper boundaries.

Timer IDs aren't unsigned for the kernel; why not just drop
the mask and keep the <0 checks?

-- PMM
Laurent Vivier Aug. 22, 2014, 12:09 p.m. UTC | #2
Hi,

as in the kernel timer_t is an "int" (as said PMM), you should cast to "int" to
remove garbage on 64bit hosts and check sign ...

Regards,
Laurent

> Le 22 août 2014 à 13:56, Alexander Graf <agraf@suse.de> a écrit :
>
>
> We check whether the passed in timer id is negative on all calls
> that involve g_posix_timers.
>
> However, these checks are bogus. First off we limit the timer_id to
> 16 bits which is not what Linux does. Then we check whether it's negative
> which it can't be because we masked it.
>
> We can safely remove the masking. For the negativity check we can just
> treat the timerid as unsigned and only check for upper boundaries.
>
> Signed-off-by: Alexander Graf <agraf@suse.de>
>
> ---
>
> v1 -> v2:
>
> - drop 0xffff mask
> - explicitly cast to unsigned because the mask is missing now
>
> ---
> linux-user/syscall.c | 30 +++++++++++++++++-------------
> 1 file changed, 17 insertions(+), 13 deletions(-)
>
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index f6c887f..92b6a38 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -9508,11 +9508,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long
> arg1,
> {
> /* args: timer_t timerid, int flags, const struct itimerspec *new_value,
> * struct itimerspec * old_value */
> - arg1 &= 0xffff;
> - if (arg3 == 0 || arg1 < 0 || arg1 >= ARRAY_SIZE(g_posix_timers)) {
> + target_ulong timerid = arg1;
> +
> + if (arg3 == 0 || timerid >= ARRAY_SIZE(g_posix_timers)) {
> ret = -TARGET_EINVAL;
> } else {
> - timer_t htimer = g_posix_timers[arg1];
> + timer_t htimer = g_posix_timers[timerid];
> struct itimerspec hspec_new = {{0},}, hspec_old = {{0},};
>
> target_to_host_itimerspec(&hspec_new, arg3);
> @@ -9528,13 +9529,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long
> arg1,
> case TARGET_NR_timer_gettime:
> {
> /* args: timer_t timerid, struct itimerspec *curr_value */
> - arg1 &= 0xffff;
> + target_ulong timerid = arg1;
> +
> if (!arg2) {
> return -TARGET_EFAULT;
> - } else if (arg1 < 0 || arg1 >= ARRAY_SIZE(g_posix_timers)) {
> + } else if (timerid >= ARRAY_SIZE(g_posix_timers)) {
> ret = -TARGET_EINVAL;
> } else {
> - timer_t htimer = g_posix_timers[arg1];
> + timer_t htimer = g_posix_timers[timerid];
> struct itimerspec hspec;
> ret = get_errno(timer_gettime(htimer, &hspec));
>
> @@ -9550,11 +9552,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long
> arg1,
> case TARGET_NR_timer_getoverrun:
> {
> /* args: timer_t timerid */
> - arg1 &= 0xffff;
> - if (arg1 < 0 || arg1 >= ARRAY_SIZE(g_posix_timers)) {
> + target_ulong timerid = arg1;
> +
> + if (timerid >= ARRAY_SIZE(g_posix_timers)) {
> ret = -TARGET_EINVAL;
> } else {
> - timer_t htimer = g_posix_timers[arg1];
> + timer_t htimer = g_posix_timers[timerid];
> ret = get_errno(timer_getoverrun(htimer));
> }
> break;
> @@ -9565,13 +9568,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long
> arg1,
> case TARGET_NR_timer_delete:
> {
> /* args: timer_t timerid */
> - arg1 &= 0xffff;
> - if (arg1 < 0 || arg1 >= ARRAY_SIZE(g_posix_timers)) {
> + target_ulong timerid = arg1;
> +
> + if (timerid >= ARRAY_SIZE(g_posix_timers)) {
> ret = -TARGET_EINVAL;
> } else {
> - timer_t htimer = g_posix_timers[arg1];
> + timer_t htimer = g_posix_timers[timerid];
> ret = get_errno(timer_delete(htimer));
> - g_posix_timers[arg1] = 0;
> + g_posix_timers[timerid] = 0;
> }
> break;
> }
> --
> 1.7.12.4
>
>
Alexander Graf Aug. 22, 2014, 12:12 p.m. UTC | #3
On 22.08.14 14:07, Peter Maydell wrote:
> On 22 August 2014 12:56, Alexander Graf <agraf@suse.de> wrote:
>> We check whether the passed in timer id is negative on all calls
>> that involve g_posix_timers.
>>
>> However, these checks are bogus. First off we limit the timer_id to
>> 16 bits which is not what Linux does. Then we check whether it's negative
>> which it can't be because we masked it.
>>
>> We can safely remove the masking. For the negativity check we can just
>> treat the timerid as unsigned and only check for upper boundaries.
> 
> Timer IDs aren't unsigned for the kernel; why not just drop
> the mask and keep the <0 checks?

Because I'd then have to carry yet another local patch.

In Linux, the timer id is a "key" into a hash table that the kernel
searches to find its timer. In QEMU it's an offset into an array.

In both cases the syscall user receives it as a token from a create
function and should treat it as opaque.

So in the QEMU case it is unsigned, regardless of what the kernel allows
it to be, because it's an array offset.


Alex
Peter Maydell Aug. 22, 2014, 12:25 p.m. UTC | #4
On 22 August 2014 13:12, Alexander Graf <agraf@suse.de> wrote:
> In Linux, the timer id is a "key" into a hash table that the kernel
> searches to find its timer. In QEMU it's an offset into an array.
>
> In both cases the syscall user receives it as a token from a create
> function and should treat it as opaque.
>
> So in the QEMU case it is unsigned, regardless of what the kernel allows
> it to be, because it's an array offset.

It's a number between 0 and 32. That doesn't imply that it has
to be an unsigned variable, and we already have it in a
signed variable arg1...

-- PMM
Alexander Graf Aug. 22, 2014, 12:29 p.m. UTC | #5
On 22.08.14 14:25, Peter Maydell wrote:
> On 22 August 2014 13:12, Alexander Graf <agraf@suse.de> wrote:
>> In Linux, the timer id is a "key" into a hash table that the kernel
>> searches to find its timer. In QEMU it's an offset into an array.
>>
>> In both cases the syscall user receives it as a token from a create
>> function and should treat it as opaque.
>>
>> So in the QEMU case it is unsigned, regardless of what the kernel allows
>> it to be, because it's an array offset.
> 
> It's a number between 0 and 32. That doesn't imply that it has
> to be an unsigned variable, and we already have it in a
> signed variable arg1...

Yes, so the end result will be the same. What's the point of this bike
shedding?


Alex
Laurent Vivier Aug. 22, 2014, 1 p.m. UTC | #6
> Le 22 août 2014 à 14:29, Alexander Graf <agraf@suse.de> a écrit :
>
>
>
>
> On 22.08.14 14:25, Peter Maydell wrote:
> > On 22 August 2014 13:12, Alexander Graf <agraf@suse.de> wrote:
> >> In Linux, the timer id is a "key" into a hash table that the kernel
> >> searches to find its timer. In QEMU it's an offset into an array.
> >>
> >> In both cases the syscall user receives it as a token from a create
> >> function and should treat it as opaque.
> >>
> >> So in the QEMU case it is unsigned, regardless of what the kernel allows
> >> it to be, because it's an array offset.
> >
> > It's a number between 0 and 32. That doesn't imply that it has
> > to be an unsigned variable, and we already have it in a
> > signed variable arg1...
>
> Yes, so the end result will be the same. What's the point of this bike
> shedding?

On some archs, we can imagine libc/gcc filling only the 32 lower bits (= int) of
the register during the syscall, and without modifying the 32 upper bits (=
garbage). You must ignore the 32 upper bits (but you can ignore the sign too). I
think you can let the mask but remove the sign checking -> your patch v1 was
good ...

Regards,
Laurent
Peter Maydell Aug. 22, 2014, 1:09 p.m. UTC | #7
On 22 August 2014 14:00, Laurent Vivier <laurent@vivier.eu> wrote:
>> Le 22 août 2014 à 14:29, Alexander Graf <agraf@suse.de> a écrit :
>> On 22.08.14 14:25, Peter Maydell wrote:
>> > It's a number between 0 and 32. That doesn't imply that it has
>> > to be an unsigned variable, and we already have it in a
>> > signed variable arg1...
>>
>> Yes, so the end result will be the same. What's the point of this bike
>> shedding?

Not much, except that it's a smaller and simpler patch if you
just remove the bogus masking.

> On some archs, we can imagine libc/gcc filling only the 32 lower bits (=
> int) of the register during the syscall, and without modifying the 32 upper
> bits (= garbage). You must ignore the 32 upper bits (but you can ignore the
> sign too). I think you can let the mask but remove the sign checking -> your
> patch v1 was good ...

No, this is wrong I think. do_syscall() is passed a set of arguments
of type "abi_long". It's true that the calling convention might be
such that  if abi_long is 32 bits and host registers are 64 bits then
the upper half of the host register might be garbage. But in that
case the compiler is obliged to implement casts and other operations
on the variable so they behave correctly. So you never need to worry
about it.

v1 was was definitely wrong. v2 is correct but gratuitously fiddly.
But I don't care enough to actually demand a v3.

thanks
-- PMM
Andreas Färber Aug. 22, 2014, 1:27 p.m. UTC | #8
Hi,

Am 22.08.2014 14:09, schrieb Laurent Vivier:
> as in the kernel timer_t is an "int" (as said PMM), you should cast to
> "int" to remove garbage on 64bit hosts and check sign ...

So maybe that's the bug Alex was trying to fix downstream with the use
of unsigned types? If as you say the upper 32 bits may be garbage, then
casting from long to int would put garbage into bit 31 unless you cast
to unsigned long first. Maybe we need cast macros to fix that?
TARGET_TIMER_T() or something?

Regards,
Andreas
Peter Maydell Aug. 22, 2014, 1:34 p.m. UTC | #9
On 22 August 2014 14:27, Andreas Färber <afaerber@suse.de> wrote:
> Am 22.08.2014 14:09, schrieb Laurent Vivier:
>> as in the kernel timer_t is an "int" (as said PMM), you should cast to
>> "int" to remove garbage on 64bit hosts and check sign ...
>
> So maybe that's the bug Alex was trying to fix downstream with the use
> of unsigned types?

I imagine the reason the SuSE tree switches to abi_ulong
for the arg* is that it fixes a bunch of bugs we have where we're
incorrectly casting a (probably 32 bit) abi_long to a 64 bit signed
host type and getting a sign-extension, when the semantics of
those particular syscalls require unsigned values. But conversely
the change probably means that places which wanted the
sign-extension are no longer getting it.

If we were writing this code from scratch then there's probably
a good argument for making the arg* be the unsigned type
rather than signed. Unfortunately at this point it's basically
impossible to change over, because we'd have to audit every
use of them in a 10,000 line file to determine whether we needed
to put a cast back in to get sign extension or not. I'd rather we
just fixed the places that don't want sign-extension, because
presumably we at least have examples of failing guest programs
we can use to tell us what the problematic syscalls are...

-- PMM
Andreas Färber Aug. 22, 2014, 1:41 p.m. UTC | #10
Am 22.08.2014 15:34, schrieb Peter Maydell:
> On 22 August 2014 14:27, Andreas Färber <afaerber@suse.de> wrote:
>> Am 22.08.2014 14:09, schrieb Laurent Vivier:
>>> as in the kernel timer_t is an "int" (as said PMM), you should cast to
>>> "int" to remove garbage on 64bit hosts and check sign ...
>>
>> So maybe that's the bug Alex was trying to fix downstream with the use
>> of unsigned types?
> 
> I imagine the reason the SuSE tree switches to abi_ulong
> for the arg* is that it fixes a bunch of bugs we have where we're
> incorrectly casting a (probably 32 bit) abi_long to a 64 bit signed
> host type and getting a sign-extension, when the semantics of
> those particular syscalls require unsigned values. But conversely
> the change probably means that places which wanted the
> sign-extension are no longer getting it.
> 
> If we were writing this code from scratch then there's probably
> a good argument for making the arg* be the unsigned type
> rather than signed. Unfortunately at this point it's basically
> impossible to change over, because we'd have to audit every
> use of them in a 10,000 line file to determine whether we needed
> to put a cast back in to get sign extension or not. I'd rather we
> just fixed the places that don't want sign-extension, because
> presumably we at least have examples of failing guest programs
> we can use to tell us what the problematic syscalls are...

You snipped the part of my message where I was clearly *not* advocating
to apply Alex' type change but suggested to annotate arg uses with the
actual type, so that intermediate casts can be inserted. If we were to
agree on some system, that could be done incrementally.

Andreas
Peter Maydell Aug. 22, 2014, 1:43 p.m. UTC | #11
On 22 August 2014 14:41, Andreas Färber <afaerber@suse.de> wrote:
> You snipped the part of my message where I was clearly *not* advocating
> to apply Alex' type change but suggested to annotate arg uses with the
> actual type, so that intermediate casts can be inserted. If we were to
> agree on some system, that could be done incrementally.

I snipped the other half of your message because it seemed
to be predicated on Laurent's incorrect analysis (see my
other email in reply to him).

-- PMM
diff mbox

Patch

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index f6c887f..92b6a38 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9508,11 +9508,12 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     {
         /* args: timer_t timerid, int flags, const struct itimerspec *new_value,
          * struct itimerspec * old_value */
-        arg1 &= 0xffff;
-        if (arg3 == 0 || arg1 < 0 || arg1 >= ARRAY_SIZE(g_posix_timers)) {
+        target_ulong timerid = arg1;
+
+        if (arg3 == 0 || timerid >= ARRAY_SIZE(g_posix_timers)) {
             ret = -TARGET_EINVAL;
         } else {
-            timer_t htimer = g_posix_timers[arg1];
+            timer_t htimer = g_posix_timers[timerid];
             struct itimerspec hspec_new = {{0},}, hspec_old = {{0},};
 
             target_to_host_itimerspec(&hspec_new, arg3);
@@ -9528,13 +9529,14 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_timer_gettime:
     {
         /* args: timer_t timerid, struct itimerspec *curr_value */
-        arg1 &= 0xffff;
+        target_ulong timerid = arg1;
+
         if (!arg2) {
             return -TARGET_EFAULT;
-        } else if (arg1 < 0 || arg1 >= ARRAY_SIZE(g_posix_timers)) {
+        } else if (timerid >= ARRAY_SIZE(g_posix_timers)) {
             ret = -TARGET_EINVAL;
         } else {
-            timer_t htimer = g_posix_timers[arg1];
+            timer_t htimer = g_posix_timers[timerid];
             struct itimerspec hspec;
             ret = get_errno(timer_gettime(htimer, &hspec));
 
@@ -9550,11 +9552,12 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_timer_getoverrun:
     {
         /* args: timer_t timerid */
-        arg1 &= 0xffff;
-        if (arg1 < 0 || arg1 >= ARRAY_SIZE(g_posix_timers)) {
+        target_ulong timerid = arg1;
+
+        if (timerid >= ARRAY_SIZE(g_posix_timers)) {
             ret = -TARGET_EINVAL;
         } else {
-            timer_t htimer = g_posix_timers[arg1];
+            timer_t htimer = g_posix_timers[timerid];
             ret = get_errno(timer_getoverrun(htimer));
         }
         break;
@@ -9565,13 +9568,14 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_timer_delete:
     {
         /* args: timer_t timerid */
-        arg1 &= 0xffff;
-        if (arg1 < 0 || arg1 >= ARRAY_SIZE(g_posix_timers)) {
+        target_ulong timerid = arg1;
+
+        if (timerid >= ARRAY_SIZE(g_posix_timers)) {
             ret = -TARGET_EINVAL;
         } else {
-            timer_t htimer = g_posix_timers[arg1];
+            timer_t htimer = g_posix_timers[timerid];
             ret = get_errno(timer_delete(htimer));
-            g_posix_timers[arg1] = 0;
+            g_posix_timers[timerid] = 0;
         }
         break;
     }