[2/2] target/i386: trap on instructions longer than >15 bytes

Message ID 20171012143548.18581-3-pbonzini@redhat.com
State New
Headers show
Series
  • target/i386: trap on instructions longer than >15 bytes
Related show

Commit Message

Paolo Bonzini Oct. 12, 2017, 2:35 p.m.
Besides being more correct, arbitrarily long instruction allow the
generation of a translation block that spans three pages.  This
confuses the generator and even allows ring 3 code to poison the
translation block cache and inject code into other processes that are
in guest ring 3.

This is an improved (and more invasive) fix for the bug fixed in commit
30663fd ("tcg/i386: Check the size of instruction being translated",
2017-03-24).  In addition to being more precise (and generating the
right exception, which is #GP rather than #UD), it distinguishes better
between page faults and too long instructions, as shown by this test case:

    #include <sys/mman.h>
    #include <string.h>
    #include <stdio.h>

    int main()
    {
            char *x = mmap(NULL, 8192, PROT_READ|PROT_WRITE|PROT_EXEC,
                           MAP_PRIVATE|MAP_ANON, -1, 0);
            memset(x, 0x66, 4096);
            x[4096] = 0x90;
            x[4097] = 0xc3;
            char *i = x + 4096 - 15;
            mprotect(x + 4096, 4096, PROT_READ|PROT_WRITE);
            ((void(*)(void)) i) ();
    }

... which produces a #GP without the mprotect, and a #PF with it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/translate.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

Comments

Richard Henderson Oct. 12, 2017, 7:30 p.m. | #1
On 10/12/2017 07:35 AM, Paolo Bonzini wrote:
> Besides being more correct, arbitrarily long instruction allow the
> generation of a translation block that spans three pages.  This
> confuses the generator and even allows ring 3 code to poison the
> translation block cache and inject code into other processes that are
> in guest ring 3.
> 
> This is an improved (and more invasive) fix for the bug fixed in commit
> 30663fd ("tcg/i386: Check the size of instruction being translated",
> 2017-03-24).  In addition to being more precise (and generating the
> right exception, which is #GP rather than #UD), it distinguishes better
> between page faults and too long instructions, as shown by this test case:
> 
>     #include <sys/mman.h>
>     #include <string.h>
>     #include <stdio.h>
> 
>     int main()
>     {
>             char *x = mmap(NULL, 8192, PROT_READ|PROT_WRITE|PROT_EXEC,
>                            MAP_PRIVATE|MAP_ANON, -1, 0);
>             memset(x, 0x66, 4096);
>             x[4096] = 0x90;
>             x[4097] = 0xc3;
>             char *i = x + 4096 - 15;
>             mprotect(x + 4096, 4096, PROT_READ|PROT_WRITE);
>             ((void(*)(void)) i) ();
>     }
> 
> ... which produces a #GP without the mprotect, and a #PF with it.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  target/i386/translate.c | 29 ++++++++++++++++++++++-------
>  1 file changed, 22 insertions(+), 7 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

> +    if (sigsetjmp(s->jmpbuf, 0) != 0) {

Any particular reason to use sigsetjmp(x, 0) instead of setjmp(x)?
Certainly there are no signal frames that the longjmp will pass...


r~
Paolo Bonzini Oct. 12, 2017, 9:43 p.m. | #2
On 12/10/2017 21:30, Richard Henderson wrote:
> On 10/12/2017 07:35 AM, Paolo Bonzini wrote:
>> Besides being more correct, arbitrarily long instruction allow the
>> generation of a translation block that spans three pages.  This
>> confuses the generator and even allows ring 3 code to poison the
>> translation block cache and inject code into other processes that are
>> in guest ring 3.
>>
>> This is an improved (and more invasive) fix for the bug fixed in commit
>> 30663fd ("tcg/i386: Check the size of instruction being translated",
>> 2017-03-24).  In addition to being more precise (and generating the
>> right exception, which is #GP rather than #UD), it distinguishes better
>> between page faults and too long instructions, as shown by this test case:
>>
>>     #include <sys/mman.h>
>>     #include <string.h>
>>     #include <stdio.h>
>>
>>     int main()
>>     {
>>             char *x = mmap(NULL, 8192, PROT_READ|PROT_WRITE|PROT_EXEC,
>>                            MAP_PRIVATE|MAP_ANON, -1, 0);
>>             memset(x, 0x66, 4096);
>>             x[4096] = 0x90;
>>             x[4097] = 0xc3;
>>             char *i = x + 4096 - 15;
>>             mprotect(x + 4096, 4096, PROT_READ|PROT_WRITE);
>>             ((void(*)(void)) i) ();
>>     }
>>
>> ... which produces a #GP without the mprotect, and a #PF with it.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  target/i386/translate.c | 29 ++++++++++++++++++++++-------
>>  1 file changed, 22 insertions(+), 7 deletions(-)
> 
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
>> +    if (sigsetjmp(s->jmpbuf, 0) != 0) {
> 
> Any particular reason to use sigsetjmp(x, 0) instead of setjmp(x)?
> Certainly there are no signal frames that the longjmp will pass...

sigsetjmp is used to _not_ save the signal mask.  On OS X setjmp saves
the signal mask by default, which is slower.

Paolo
Richard Henderson Oct. 13, 2017, 6:12 p.m. | #3
On 10/12/2017 02:43 PM, Paolo Bonzini wrote:
>> Any particular reason to use sigsetjmp(x, 0) instead of setjmp(x)?
>> Certainly there are no signal frames that the longjmp will pass...
> 
> sigsetjmp is used to _not_ save the signal mask.  On OS X setjmp saves
> the signal mask by default, which is slower.

Oh, bizarro.  Thanks,


r~

Patch

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 4a938c21a0..5f24a2de3c 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -136,6 +136,7 @@  typedef struct DisasContext {
     int cpuid_ext3_features;
     int cpuid_7_0_ebx_features;
     int cpuid_xsave_features;
+    sigjmp_buf jmpbuf;
 } DisasContext;
 
 static void gen_eob(DisasContext *s);
@@ -1863,11 +1864,27 @@  static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
     }
 }
 
+#define X86_MAX_INSN_LENGTH 15
+
 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
 {
     uint64_t pc = s->pc;
 
     s->pc += num_bytes;
+    if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
+        /* If the instruction's 16th byte is on a different page than the 1st, a
+         * page fault on the second page wins over the general protection fault
+         * caused by the instruction being too long.
+         * This can happen even if the operand is only one byte long!
+         */
+        if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
+            volatile uint8_t unused =
+                cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
+            (void) unused;
+        }
+        siglongjmp(s->jmpbuf, 1);
+    }
+
     return pc;
 }
 
@@ -4463,14 +4480,12 @@  static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     s->rip_offset = 0; /* for relative ip address */
     s->vex_l = 0;
     s->vex_v = 0;
- next_byte:
-    /* x86 has an upper limit of 15 bytes for an instruction. Since we
-     * do not want to decode and generate IR for an illegal
-     * instruction, the following check limits the instruction size to
-     * 25 bytes: 14 prefix + 1 opc + 6 (modrm+sib+ofs) + 4 imm */
-    if (s->pc - pc_start > 14) {
-        goto illegal_op;
+    if (sigsetjmp(s->jmpbuf, 0) != 0) {
+        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        return s->pc;
     }
+
+ next_byte:
     b = x86_ldub_code(env, s);
     /* Collect prefixes.  */
     switch (b) {