Patchwork [RFT] monitor: implement x86 info tlb for PAE and long modes

login
register
mail settings
Submitter Blue Swirl
Date Dec. 5, 2010, 4:25 p.m.
Message ID <AANLkTi=03bcmdBjyMJ9y7CB=pPvd5YDg0=+mDaDLkDK5@mail.gmail.com>
Download mbox | patch
Permalink /patch/74302/
State New
Headers show

Comments

Blue Swirl - Dec. 5, 2010, 4:25 p.m.
'info tlb' didn't show correct information for PAE mode and
x86_64 long mode.

Implement the missing modes. Also print NX bit for PAE and long modes.
Fix off-by-one error in 32 bit mode mask.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---

I didn't find an OS that enabled PAE, please test and report.

---
 monitor.c |  149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 138 insertions(+), 11 deletions(-)

                    pte & PG_DIRTY_MASK ? 'D' : '-',
@@ -1863,25 +1872,19 @@ static void print_pte(Monitor *mon, uint32_t
addr, uint32_t pte, uint32_t mask)
                    pte & PG_RW_MASK ? 'W' : '-');
 }

-static void tlb_info(Monitor *mon)
+static void tlb_info_32(Monitor *mon, CPUState *env)
 {
-    CPUState *env;
     int l1, l2;
     uint32_t pgd, pde, pte;

-    env = mon_get_cpu();
-
-    if (!(env->cr[0] & CR0_PG_MASK)) {
-        monitor_printf(mon, "PG disabled\n");
-        return;
-    }
     pgd = env->cr[3] & ~0xfff;
     for(l1 = 0; l1 < 1024; l1++) {
         cpu_physical_memory_read(pgd + l1 * 4, (uint8_t *)&pde, 4);
         pde = le32_to_cpu(pde);
         if (pde & PG_PRESENT_MASK) {
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
-                print_pte(mon, (l1 << 22), pde, ~((1 << 20) - 1));
+                /* 4M pages */
+                print_pte(mon, (l1 << 22), pde, ~((1 << 21) - 1));
             } else {
                 for(l2 = 0; l2 < 1024; l2++) {
                     cpu_physical_memory_read((pde & ~0xfff) + l2 * 4,
@@ -1898,6 +1901,130 @@ static void tlb_info(Monitor *mon)
     }
 }

+static void tlb_info_pae32(Monitor *mon, CPUState *env)
+{
+    int l1, l2, l3;
+    uint64_t pdpe, pde, pte;
+    uint64_t pdp_addr, pd_addr, pt_addr;
+
+    pdp_addr = env->cr[3] & ~0x1f;
+    for (l1 = 0; l1 < 4; l1++) {
+        cpu_physical_memory_read(pdp_addr + l1 * 8, (uint8_t *)&pdpe, 8);
+        pdpe = le64_to_cpu(pdpe);
+        if (pdpe & PG_PRESENT_MASK) {
+            pd_addr = pdpe & 0x3fffffffff000ULL;
+            for (l2 = 0; l2 < 512; l2++) {
+                cpu_physical_memory_read(pd_addr + l2 * 8,
+                                         (uint8_t *)&pde, 8);
+                pde = le64_to_cpu(pde);
+                if (pde & PG_PSE_MASK) {
+                    /* 2M pages with PAE, CR4.PSE is ignored */
+                    print_pte(mon, (l1 << 30 ) + (l2 << 21), pde,
+                              ~((target_phys_addr_t)(1 << 20) - 1));
+                } else {
+                    pt_addr = pde & 0x3fffffffff000ULL;
+                    for (l3 = 0; l3 < 512; l3++) {
+                        cpu_physical_memory_read(pt_addr + l3 * 8,
+                                                 (uint8_t *)&pte, 8);
+                        pte = le64_to_cpu(pte);
+                        if (pte & PG_PRESENT_MASK) {
+                            print_pte(mon, (l1 << 30 ) + (l2 << 21)
+                                      + (l3 << 12),
+                                      pte & ~PG_PSE_MASK,
+                                      ~(target_phys_addr_t)0xfff);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+#ifdef TARGET_X86_64
+static void tlb_info_64(Monitor *mon, CPUState *env)
+{
+    uint64_t l1, l2, l3, l4;
+    uint64_t pml4e, pdpe, pde, pte;
+    uint64_t pml4_addr, pdp_addr, pd_addr, pt_addr;
+
+    pml4_addr = env->cr[3] & 0x3fffffffff000ULL;
+    for (l1 = 0; l1 < 512; l1++) {
+        cpu_physical_memory_read(pml4_addr + l1 * 8, (uint8_t *)&pml4e, 8);
+        pml4e = le64_to_cpu(pml4e);
+        if (pml4e & PG_PRESENT_MASK) {
+            pdp_addr = pml4e & 0x3fffffffff000ULL;
+            for (l2 = 0; l2 < 512; l2++) {
+                cpu_physical_memory_read(pdp_addr + l2 * 8, (uint8_t *)&pdpe,
+                                         8);
+                pdpe = le64_to_cpu(pdpe);
+                if (pdpe & PG_PRESENT_MASK) {
+                    if (pdpe & PG_PSE_MASK) {
+                        /* 1G pages, CR4.PSE is ignored */
+                        print_pte(mon, (l1 << 39) + (l2 << 30), pdpe,
+                                  0x3ffffc0000000ULL);
+                    } else {
+                        pd_addr = pdpe & 0x3fffffffff000ULL;
+                        for (l3 = 0; l3 < 512; l3++) {
+                            cpu_physical_memory_read(pd_addr + l3 * 8,
+                                                     (uint8_t *)&pde, 8);
+                            pde = le64_to_cpu(pde);
+                            if (pde & PG_PRESENT_MASK) {
+                                if (pde & PG_PSE_MASK) {
+                                    /* 2M pages, CR4.PSE is ignored */
+                                    print_pte(mon, (l1 << 39) + (l2 << 30) +
+                                              (l3 << 21), pde,
+                                              0x3ffffffe00000ULL);
+                                } else {
+                                    pt_addr = pde & 0x3fffffffff000ULL;
+                                    for (l4 = 0; l4 < 512; l4++) {
+                                        cpu_physical_memory_read(pt_addr
+                                                                 + l4 * 8,
+
(uint8_t *)&pte,
+                                                                 8);
+                                        pte = le64_to_cpu(pte);
+                                        if (pte & PG_PRESENT_MASK) {
+                                            print_pte(mon, (l1 << 39) +
+                                                      (l2 << 30) +
+                                                      (l3 << 21) + (l4 << 12),
+                                                      pte & ~PG_PSE_MASK,
+                                                      0x3fffffffff000ULL);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+#endif
+
+static void tlb_info(Monitor *mon)
+{
+    CPUState *env;
+
+    env = mon_get_cpu();
+
+    if (!(env->cr[0] & CR0_PG_MASK)) {
+        monitor_printf(mon, "PG disabled\n");
+        return;
+    }
+    if (env->cr[4] & CR4_PAE_MASK) {
+#ifdef TARGET_X86_64
+        if (env->hflags & HF_LMA_MASK) {
+            tlb_info_64(mon, env);
+        } else
+#endif
+        {
+            tlb_info_pae32(mon, env);
+        }
+    } else {
+        tlb_info_32(mon, env);
+    }
+}
+
 static void mem_print(Monitor *mon, uint32_t *pstart, int *plast_prot,
                       uint32_t end, int prot)
 {
Alexander Graf - Dec. 6, 2010, 11:12 a.m.
On 05.12.2010, at 17:25, Blue Swirl wrote:

> 'info tlb' didn't show correct information for PAE mode and
> x86_64 long mode.
> 
> Implement the missing modes. Also print NX bit for PAE and long modes.
> Fix off-by-one error in 32 bit mode mask.
> 
> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> 
> I didn't find an OS that enabled PAE, please test and report.

Xen does. Just take a random recent xen kernel and run it with -kernel :).


Alex
Ian Campbell - Dec. 6, 2010, 1:05 p.m.
On Mon, 2010-12-06 at 12:12 +0100, Alexander Graf wrote:
> On 05.12.2010, at 17:25, Blue Swirl wrote:
> 
> > 'info tlb' didn't show correct information for PAE mode and
> > x86_64 long mode.
> > 
> > Implement the missing modes. Also print NX bit for PAE and long modes.
> > Fix off-by-one error in 32 bit mode mask.
> > 
> > Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
> > ---
> > 
> > I didn't find an OS that enabled PAE, please test and report.
> 
> Xen does. Just take a random recent xen kernel and run it with -kernel :).

In addition AFAIK recent 32 bit Fedora is PAE enabled by default (so is
RHEL6?). Debian also supplies a -686-bigmem kernel flavour which is
their name for PAE enabled.

Ian.
Blue Swirl - Dec. 6, 2010, 6:38 p.m.
On Mon, Dec 6, 2010 at 11:12 AM, Alexander Graf <agraf@suse.de> wrote:
>
> On 05.12.2010, at 17:25, Blue Swirl wrote:
>
>> 'info tlb' didn't show correct information for PAE mode and
>> x86_64 long mode.
>>
>> Implement the missing modes. Also print NX bit for PAE and long modes.
>> Fix off-by-one error in 32 bit mode mask.
>>
>> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>> ---
>>
>> I didn't find an OS that enabled PAE, please test and report.
>
> Xen does. Just take a random recent xen kernel and run it with -kernel :).

Thanks, it does (CR4 is 0xb0). I'm not sure all is correct, after
0x1000000 the dump looks messy:
0000000000c00000: 0000000000c00000 --PDA---W
0000000000e00000: 0000000000e00000 --PDA---W
0000000001000000: f000ff53f000f000 XG-D-C--W
0000000001001000: f000ff53f000e000 X--D----W
0000000001002000: f000ff53f000f000 XG-D-C--W
0000000001003000: f000ff53f000f000 XG-D-C--W
0000000001004000: f000e987f000f000 X---A--U-
Blue Swirl - Dec. 6, 2010, 6:41 p.m.
On Mon, Dec 6, 2010 at 1:05 PM, Ian Campbell <ijc@hellion.org.uk> wrote:
> On Mon, 2010-12-06 at 12:12 +0100, Alexander Graf wrote:
>> On 05.12.2010, at 17:25, Blue Swirl wrote:
>>
>> > 'info tlb' didn't show correct information for PAE mode and
>> > x86_64 long mode.
>> >
>> > Implement the missing modes. Also print NX bit for PAE and long modes.
>> > Fix off-by-one error in 32 bit mode mask.
>> >
>> > Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>> > ---
>> >
>> > I didn't find an OS that enabled PAE, please test and report.
>>
>> Xen does. Just take a random recent xen kernel and run it with -kernel :).
>
> In addition AFAIK recent 32 bit Fedora is PAE enabled by default (so is
> RHEL6?). Debian also supplies a -686-bigmem kernel flavour which is
> their name for PAE enabled.

At least Fedora 14 installation CD kernel does not enable PAE with 5G
of memory (enabled for i386 by adjusting target_phys_bits in
configure).
Ian Campbell - Dec. 7, 2010, 10:05 a.m.
On Mon, 2010-12-06 at 18:41 +0000, Blue Swirl wrote:
> On Mon, Dec 6, 2010 at 1:05 PM, Ian Campbell <ijc@hellion.org.uk> wrote:
> > On Mon, 2010-12-06 at 12:12 +0100, Alexander Graf wrote:
> >> On 05.12.2010, at 17:25, Blue Swirl wrote:
> >>
> >> > 'info tlb' didn't show correct information for PAE mode and
> >> > x86_64 long mode.
> >> >
> >> > Implement the missing modes. Also print NX bit for PAE and long modes.
> >> > Fix off-by-one error in 32 bit mode mask.
> >> >
> >> > Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
> >> > ---
> >> >
> >> > I didn't find an OS that enabled PAE, please test and report.
> >>
> >> Xen does. Just take a random recent xen kernel and run it with -kernel :).
> >
> > In addition AFAIK recent 32 bit Fedora is PAE enabled by default (so is
> > RHEL6?). Debian also supplies a -686-bigmem kernel flavour which is
> > their name for PAE enabled.
> 
> At least Fedora 14 installation CD kernel does not enable PAE with 5G
> of memory (enabled for i386 by adjusting target_phys_bits in
> configure).

Hm, seems like I was mistaken and it was just RHEL6 which changed the
default.

FWIW Fedora-14-i386-DVD.iso contains /images/pxeboot/vmlinuz-PAE and
initrd-PAE.img which might be useful to you, see also
http://download.fedora.redhat.com/pub/fedora/linux/releases/14/Fedora/i386/os/images/pxeboot/ .

Ian.
Alexander Graf - Dec. 7, 2010, 10:08 a.m.
On 06.12.2010, at 19:38, Blue Swirl wrote:

> On Mon, Dec 6, 2010 at 11:12 AM, Alexander Graf <agraf@suse.de> wrote:
>> 
>> On 05.12.2010, at 17:25, Blue Swirl wrote:
>> 
>>> 'info tlb' didn't show correct information for PAE mode and
>>> x86_64 long mode.
>>> 
>>> Implement the missing modes. Also print NX bit for PAE and long modes.
>>> Fix off-by-one error in 32 bit mode mask.
>>> 
>>> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>>> ---
>>> 
>>> I didn't find an OS that enabled PAE, please test and report.
>> 
>> Xen does. Just take a random recent xen kernel and run it with -kernel :).
> 
> Thanks, it does (CR4 is 0xb0). I'm not sure all is correct, after
> 0x1000000 the dump looks messy:
> 0000000000c00000: 0000000000c00000 --PDA---W
> 0000000000e00000: 0000000000e00000 --PDA---W
> 0000000001000000: f000ff53f000f000 XG-D-C--W
> 0000000001001000: f000ff53f000e000 X--D----W
> 0000000001002000: f000ff53f000f000 XG-D-C--W
> 0000000001003000: f000ff53f000f000 XG-D-C--W
> 0000000001004000: f000e987f000f000 X---A--U-

Good question XD. I guess the best way to find out if those numbers are right would be to trace the page tables manually. info tlb shows PA : VA, right?


Alex
Blue Swirl - Dec. 11, 2010, 9:34 a.m.
On Tue, Dec 7, 2010 at 10:08 AM, Alexander Graf <agraf@suse.de> wrote:
>
> On 06.12.2010, at 19:38, Blue Swirl wrote:
>
>> On Mon, Dec 6, 2010 at 11:12 AM, Alexander Graf <agraf@suse.de> wrote:
>>>
>>> On 05.12.2010, at 17:25, Blue Swirl wrote:
>>>
>>>> 'info tlb' didn't show correct information for PAE mode and
>>>> x86_64 long mode.
>>>>
>>>> Implement the missing modes. Also print NX bit for PAE and long modes.
>>>> Fix off-by-one error in 32 bit mode mask.
>>>>
>>>> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
>>>> ---
>>>>
>>>> I didn't find an OS that enabled PAE, please test and report.
>>>
>>> Xen does. Just take a random recent xen kernel and run it with -kernel :).
>>
>> Thanks, it does (CR4 is 0xb0). I'm not sure all is correct, after
>> 0x1000000 the dump looks messy:
>> 0000000000c00000: 0000000000c00000 --PDA---W
>> 0000000000e00000: 0000000000e00000 --PDA---W
>> 0000000001000000: f000ff53f000f000 XG-D-C--W
>> 0000000001001000: f000ff53f000e000 X--D----W
>> 0000000001002000: f000ff53f000f000 XG-D-C--W
>> 0000000001003000: f000ff53f000f000 XG-D-C--W
>> 0000000001004000: f000e987f000f000 X---A--U-
>
> Good question XD. I guess the best way to find out if those numbers are right would be to trace the page tables manually.

Those were actually caused by a bug of not checking the P bit for
intermediate tables, this was fixed in the second version.

> info tlb shows PA : VA, right?

No, VA : PA.

Patch

diff --git a/monitor.c b/monitor.c
index ec31eac..b18516c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1848,11 +1848,20 @@  static int do_system_powerdown(Monitor *mon,
const QDict *qdict,
 }

 #if defined(TARGET_I386)
-static void print_pte(Monitor *mon, uint32_t addr, uint32_t pte, uint32_t mask)
+static void print_pte(Monitor *mon, target_phys_addr_t addr,
+                      target_phys_addr_t pte,
+                      target_phys_addr_t mask)
 {
-    monitor_printf(mon, "%08x: %08x %c%c%c%c%c%c%c%c\n",
+#ifdef TARGET_X86_64
+    if (addr & (1ULL << 47)) {
+        addr |= -1LL << 48;
+    }
+#endif
+    monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
+                   " %c%c%c%c%c%c%c%c%c\n",
                    addr,
                    pte & mask,
+                   pte & PG_NX_MASK ? 'X' : '-',
                    pte & PG_GLOBAL_MASK ? 'G' : '-',
                    pte & PG_PSE_MASK ? 'P' : '-',