diff mbox

[PULL,11/13] target-i386: forward CPUID cache leaves when -cpu host is used

Message ID 528B561C.9070200@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Nov. 19, 2013, 12:14 p.m. UTC
Il 19/11/2013 13:03, Peter Lieven ha scritto:
>>
>> Can you test which of these two work?  But I agree it's best to disable
>> cache-leaf forwarding.
> The first does make windows boot again and it calculates a
> correct combination of cpus, threads, cores and sockets. But
> I think the reason it boots is because cores=threads=1.
> 
> As its more intuitive (I think) I would prefer your "cores over threads
> over socket ".
> The last thing I would think of is emulating more than 1 socket. -smp N
> would then mean, N cores, no hyper-threading, 1 socket.

After looking more at the docs, I think I found the bug.  Can you test this?


Paolo

Comments

Peter Lieven Nov. 19, 2013, 12:32 p.m. UTC | #1
On 19.11.2013 13:14, Paolo Bonzini wrote:
> Il 19/11/2013 13:03, Peter Lieven ha scritto:
>>> Can you test which of these two work?  But I agree it's best to disable
>>> cache-leaf forwarding.
>> The first does make windows boot again and it calculates a
>> correct combination of cpus, threads, cores and sockets. But
>> I think the reason it boots is because cores=threads=1.
>>
>> As its more intuitive (I think) I would prefer your "cores over threads
>> over socket ".
>> The last thing I would think of is emulating more than 1 socket. -smp N
>> would then mean, N cores, no hyper-threading, 1 socket.
> After looking more at the docs, I think I found the bug.  Can you test this?
>
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 864c80e..16d4db1 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -2086,14 +2086,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>           /* cache info: needed for Core compatibility */
>           if (cpu->cache_info_passthrough) {
>               host_cpuid(index, count, eax, ebx, ecx, edx);
> -            break;
> -        }
> -        if (cs->nr_cores > 1) {
> -            *eax = (cs->nr_cores - 1) << 26;
> +            *eax &= ~0xFC000000;
>           } else {
>               *eax = 0;
> -        }
> -        switch (count) {
> +            switch (count) {
>               case 0: /* L1 dcache info */
>                   *eax |= CPUID_4_TYPE_DCACHE | \
>                           CPUID_4_LEVEL(1) | \
> @@ -2118,9 +2114,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>                   *eax |= CPUID_4_TYPE_UNIFIED | \
>                           CPUID_4_LEVEL(2) | \
>                           CPUID_4_SELF_INIT_LEVEL;
> -                if (cs->nr_threads > 1) {
> -                    *eax |= (cs->nr_threads - 1) << 14;
> -                }
>                   *ebx = (L2_LINE_SIZE - 1) | \
>                          ((L2_PARTITIONS - 1) << 12) | \
>                          ((L2_ASSOCIATIVITY - 1) << 22);
> @@ -2133,6 +2126,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>                   *ecx = 0;
>                   *edx = 0;
>                   break;
> +            }
> +        }
> +
> +        /* We give out APIC IDs ourselves, so force bits 31..26 even for "-cpu host".  */
> +        if (cs->nr_cores > 1) {
> +            *eax |= (cs->nr_cores - 1) << 26;
>           }
>           break;
>       case 5:
>
> Paolo
I already tried exactly this fix. Its reading index 0x0000004 for increasing indexes until qemu aborts:

~/git/qemu$ x86_64-softmmu/qemu-system-x86_64 -m 2048 -drive if=virtio,file=iscsi://172.21.200.45/iqn.2001-05.com.equallogic:0-8a0906-9d95c510a-344001d54795289f-2012-r2-1-7-0/0,format=raw,cache=writeback,aio=native -cpu host -monitor stdio -vnc :1 
-enable-kvm -usb -usbdevice tablet -vga cirrus -global virtio-blk-pci.scsi=off -smp 4,cores=4,threads=1,sockets=1  -serial null  -parallel null -boot c

(qemu) cpuid_data is full, no space for cpuid(eax:0x4,ecx:0x5d)
Abgebrochen (Speicherabzug geschrieben)

If you really want to have this feature:

a) fix smp_parse and leave it at "prefer sockets over cores over threads", but use your new code:

         if (cpus == 0) {
             sockets = sockets > 0 ? sockets : 1;
             cores = cores > 0 ? cores : 1;
             threads = threads > 0 ? threads : 1;
             cpus = cores * threads * sockets;
         } else if (sockets == 0) {
             cores = cores > 0 ? cores : 1;
             threads = threads > 0 ? threads : 1;
             sockets = cpus / (cores * threads);
         } else if (cores == 0) {
             threads = threads > 0 ? threads : 1;
             cores = cpus / (sockets * threads);
         } else {
             threads = cpus / (sockets * cores);
         }


b) disable cache leaf pass-thru as soon as threads*cores > 1. It seems to work as long as there is only one core with one thread per socket.

Peter
Paolo Bonzini Nov. 19, 2013, 1:21 p.m. UTC | #2
Il 19/11/2013 13:32, Peter Lieven ha scritto:
>>
>> +
>> +        /* We give out APIC IDs ourselves, so force bits 31..26 even
>> for "-cpu host".  */
>> +        if (cs->nr_cores > 1) {
>> +            *eax |= (cs->nr_cores - 1) << 26;
>>           }
>>           break;
>>       case 5:
>
> I already tried exactly this fix. Its reading index 0x0000004 for
> increasing indexes until qemu aborts:

Oops, it should be I guess "if ((*eax & 31) && cs->nr_cores > 1)".

Paolo

> ~/git/qemu$ x86_64-softmmu/qemu-system-x86_64 -m 2048 -drive
> if=virtio,file=iscsi://172.21.200.45/iqn.2001-05.com.equallogic:0-8a0906-9d95c510a-344001d54795289f-2012-r2-1-7-0/0,format=raw,cache=writeback,aio=native
> -cpu host -monitor stdio -vnc :1 -enable-kvm -usb -usbdevice tablet -vga
> cirrus -global virtio-blk-pci.scsi=off -smp
> 4,cores=4,threads=1,sockets=1  -serial null  -parallel null -boot c
Peter Lieven Nov. 19, 2013, 2:11 p.m. UTC | #3
On 19.11.2013 14:21, Paolo Bonzini wrote:
> Il 19/11/2013 13:32, Peter Lieven ha scritto:
>>> +
>>> +        /* We give out APIC IDs ourselves, so force bits 31..26 even
>>> for "-cpu host".  */
>>> +        if (cs->nr_cores > 1) {
>>> +            *eax |= (cs->nr_cores - 1) << 26;
>>>            }
>>>            break;
>>>        case 5:
>> I already tried exactly this fix. Its reading index 0x0000004 for
>> increasing indexes until qemu aborts:
> Oops, it should be I guess "if ((*eax & 31) && cs->nr_cores > 1)".
Maybe, how should we continue. This should be fixed before 1.7 comes out.

Peter
Paolo Bonzini Nov. 19, 2013, 2:14 p.m. UTC | #4
Il 19/11/2013 15:11, Peter Lieven ha scritto:
>>>>
>>> I already tried exactly this fix. Its reading index 0x0000004 for
>>> increasing indexes until qemu aborts:
>> Oops, it should be I guess "if ((*eax & 31) && cs->nr_cores > 1)".
> Maybe, how should we continue. This should be fixed before 1.7 comes out.

If this works, I'll post a patch.

Paolo
Peter Lieven Nov. 19, 2013, 2:17 p.m. UTC | #5
On 19.11.2013 15:14, Paolo Bonzini wrote:
> if ((*eax & 31) && cs->nr_cores > 1)
at which position exactly do you want to put this condition and take which action?

Peter
diff mbox

Patch

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 864c80e..16d4db1 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2086,14 +2086,10 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         /* cache info: needed for Core compatibility */
         if (cpu->cache_info_passthrough) {
             host_cpuid(index, count, eax, ebx, ecx, edx);
-            break;
-        }
-        if (cs->nr_cores > 1) {
-            *eax = (cs->nr_cores - 1) << 26;
+            *eax &= ~0xFC000000;
         } else {
             *eax = 0;
-        }
-        switch (count) {
+            switch (count) {
             case 0: /* L1 dcache info */
                 *eax |= CPUID_4_TYPE_DCACHE | \
                         CPUID_4_LEVEL(1) | \
@@ -2118,9 +2114,6 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 *eax |= CPUID_4_TYPE_UNIFIED | \
                         CPUID_4_LEVEL(2) | \
                         CPUID_4_SELF_INIT_LEVEL;
-                if (cs->nr_threads > 1) {
-                    *eax |= (cs->nr_threads - 1) << 14;
-                }
                 *ebx = (L2_LINE_SIZE - 1) | \
                        ((L2_PARTITIONS - 1) << 12) | \
                        ((L2_ASSOCIATIVITY - 1) << 22);
@@ -2133,6 +2126,12 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 *ecx = 0;
                 *edx = 0;
                 break;
+            }
+        }
+
+        /* We give out APIC IDs ourselves, so force bits 31..26 even for "-cpu host".  */
+        if (cs->nr_cores > 1) {
+            *eax |= (cs->nr_cores - 1) << 26;
         }
         break;
     case 5: