Message ID | 1339963951-7798-1-git-send-email-chegu_vinod@hp.com |
---|---|
State | New |
Headers | show |
On Sun, Jun 17, 2012 at 01:12:31PM -0700, Chegu Vinod wrote: > The -numa option to qemu is used to create [fake] numa nodes > and expose them to the guest OS instance. > > There are a couple of issues with the -numa option: > > a) Max VCPU's that can be specified for a guest while using > the qemu's -numa option is 64. Due to a typecasting issue > when the number of VCPUs is > 32 the VCPUs don't show up > under the specified [fake] numa nodes. > > b) KVM currently has support for 160VCPUs per guest. The > qemu's -numa option has only support for upto 64VCPUs > per guest. > > This patch addresses these two issues. [ Note: This > patch has been verified by Eduardo Habkost ]. I was going to add a Tested-by line, but this patch breaks the automatic round-robin assignment when no CPU is added to any node on the command-line. Additional questions below: [...] > diff --git a/cpus.c b/cpus.c > index b182b3d..f9cee60 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -1145,7 +1145,7 @@ void set_numa_modes(void) > > for (env = first_cpu; env != NULL; env = env->next_cpu) { > for (i = 0; i < nb_numa_nodes; i++) { > - if (node_cpumask[i] & (1 << env->cpu_index)) { > + if (CPU_ISSET_S(env->cpu_index, cpumask_size, node_cpumask[i])) { > env->numa_node = i; > } > } > diff --git a/hw/pc.c b/hw/pc.c > index 8368701..f0c3665 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -639,7 +639,7 @@ static void *bochs_bios_init(void) > numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); > for (i = 0; i < max_cpus; i++) { > for (j = 0; j < nb_numa_nodes; j++) { > - if (node_cpumask[j] & (1 << i)) { > + if (CPU_ISSET_S(i, cpumask_size, node_cpumask[j])) { > numa_fw_cfg[i + 1] = cpu_to_le64(j); > break; > } > diff --git a/sysemu.h b/sysemu.h > index bc2c788..6e4d342 100644 > --- a/sysemu.h > +++ b/sysemu.h > @@ -9,6 +9,7 @@ > #include "qapi-types.h" > #include "notify.h" > #include "main-loop.h" > +#include <sched.h> > > /* vl.c */ > > @@ -133,9 +134,11 @@ extern uint8_t qemu_extra_params_fw[2]; > extern QEMUClock *rtc_clock; > > #define MAX_NODES 64 > +#define KVM_MAX_VCPUS 254 Do we really need this constant? Why not just use max_cpus when allocating the CPU sets, instead? > extern int nb_numa_nodes; > extern uint64_t node_mem[MAX_NODES]; > -extern uint64_t node_cpumask[MAX_NODES]; > +extern cpu_set_t *node_cpumask[MAX_NODES]; > +extern size_t cpumask_size; > > #define MAX_OPTION_ROMS 16 > typedef struct QEMUOptionRom { > diff --git a/vl.c b/vl.c > index 204d85b..1906412 100644 > --- a/vl.c > +++ b/vl.c > @@ -28,6 +28,7 @@ > #include <errno.h> > #include <sys/time.h> > #include <zlib.h> > +#include <sched.h> > > /* Needed early for CONFIG_BSD etc. */ > #include "config-host.h" > @@ -240,7 +241,8 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order > > int nb_numa_nodes; > uint64_t node_mem[MAX_NODES]; > -uint64_t node_cpumask[MAX_NODES]; > +cpu_set_t *node_cpumask[MAX_NODES]; > +size_t cpumask_size; > > uint8_t qemu_uuid[16]; > > @@ -950,6 +952,9 @@ static void numa_add(const char *optarg) > char *endptr; > unsigned long long value, endvalue; > int nodenr; > + int i; > + > + value = endvalue = 0; > > optarg = get_opt_name(option, 128, optarg, ',') + 1; > if (!strcmp(option, "node")) { > @@ -970,27 +975,17 @@ static void numa_add(const char *optarg) > } > node_mem[nodenr] = sval; > } > - if (get_param_value(option, 128, "cpus", optarg) == 0) { > - node_cpumask[nodenr] = 0; > - } else { > + if (get_param_value(option, 128, "cpus", optarg) != 0) { > value = strtoull(option, &endptr, 10); > - if (value >= 64) { > - value = 63; > - fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); > + if (*endptr == '-') { > + endvalue = strtoull(endptr+1, &endptr, 10); > } else { > - if (*endptr == '-') { > - endvalue = strtoull(endptr+1, &endptr, 10); > - if (endvalue >= 63) { > - endvalue = 62; > - fprintf(stderr, > - "only 63 CPUs in NUMA mode supported.\n"); > - } > - value = (2ULL << endvalue) - (1ULL << value); > - } else { > - value = 1ULL << value; > - } > + endvalue = value; > + } > + > + for (i = value; i <= endvalue; ++i) { > + CPU_SET_S(i, cpumask_size, node_cpumask[nodenr]); What if endvalue is larger than the cpu mask size we allocated? > } > - node_cpumask[nodenr] = value; > } > nb_numa_nodes++; > } > @@ -2331,7 +2326,9 @@ int main(int argc, char **argv, char **envp) > > for (i = 0; i < MAX_NODES; i++) { > node_mem[i] = 0; > - node_cpumask[i] = 0; > + node_cpumask[i] = CPU_ALLOC(KVM_MAX_VCPUS); > + cpumask_size = CPU_ALLOC_SIZE(KVM_MAX_VCPUS); > + CPU_ZERO_S(cpumask_size, node_cpumask[i]); > } > > nb_numa_nodes = 0; > @@ -3465,8 +3462,9 @@ int main(int argc, char **argv, char **envp) > } > > for (i = 0; i < nb_numa_nodes; i++) { > - if (node_cpumask[i] != 0) > + if (node_cpumask[i] != NULL) { This will be true for every node (as you preallocate all the CPU sets in the beginning), so the loop will always end with i==0, in turn unconditionally disabling the round-robin CPU assignment code below. You can use CPU_COUNT_S, instead. > break; > + } > } > /* assigning the VCPUs round-robin is easier to implement, guest OSes > * must cope with this anyway, because there are BIOSes out there in > @@ -3474,7 +3472,7 @@ int main(int argc, char **argv, char **envp) > */ > if (i == nb_numa_nodes) { > for (i = 0; i < max_cpus; i++) { > - node_cpumask[i % nb_numa_nodes] |= 1 << i; > + CPU_SET_S(i, cpumask_size, node_cpumask[i % nb_numa_nodes]); > } > } > } > -- > 1.7.1 >
On 6/18/2012 1:29 PM, Eduardo Habkost wrote: > On Sun, Jun 17, 2012 at 01:12:31PM -0700, Chegu Vinod wrote: >> The -numa option to qemu is used to create [fake] numa nodes >> and expose them to the guest OS instance. >> >> There are a couple of issues with the -numa option: >> >> a) Max VCPU's that can be specified for a guest while using >> the qemu's -numa option is 64. Due to a typecasting issue >> when the number of VCPUs is> 32 the VCPUs don't show up >> under the specified [fake] numa nodes. >> >> b) KVM currently has support for 160VCPUs per guest. The >> qemu's -numa option has only support for upto 64VCPUs >> per guest. >> >> This patch addresses these two issues. [ Note: This >> patch has been verified by Eduardo Habkost ]. > I was going to add a Tested-by line, but this patch breaks the automatic > round-robin assignment when no CPU is added to any node on the > command-line. Pl. see below. > > Additional questions below: > > [...] >> diff --git a/cpus.c b/cpus.c >> index b182b3d..f9cee60 100644 >> --- a/cpus.c >> +++ b/cpus.c >> @@ -1145,7 +1145,7 @@ void set_numa_modes(void) >> >> for (env = first_cpu; env != NULL; env = env->next_cpu) { >> for (i = 0; i< nb_numa_nodes; i++) { >> - if (node_cpumask[i]& (1<< env->cpu_index)) { >> + if (CPU_ISSET_S(env->cpu_index, cpumask_size, node_cpumask[i])) { >> env->numa_node = i; >> } >> } >> diff --git a/hw/pc.c b/hw/pc.c >> index 8368701..f0c3665 100644 >> --- a/hw/pc.c >> +++ b/hw/pc.c >> @@ -639,7 +639,7 @@ static void *bochs_bios_init(void) >> numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); >> for (i = 0; i< max_cpus; i++) { >> for (j = 0; j< nb_numa_nodes; j++) { >> - if (node_cpumask[j]& (1<< i)) { >> + if (CPU_ISSET_S(i, cpumask_size, node_cpumask[j])) { >> numa_fw_cfg[i + 1] = cpu_to_le64(j); >> break; >> } >> diff --git a/sysemu.h b/sysemu.h >> index bc2c788..6e4d342 100644 >> --- a/sysemu.h >> +++ b/sysemu.h >> @@ -9,6 +9,7 @@ >> #include "qapi-types.h" >> #include "notify.h" >> #include "main-loop.h" >> +#include<sched.h> >> >> /* vl.c */ >> >> @@ -133,9 +134,11 @@ extern uint8_t qemu_extra_params_fw[2]; >> extern QEMUClock *rtc_clock; >> >> #define MAX_NODES 64 >> +#define KVM_MAX_VCPUS 254 > Do we really need this constant? Why not just use max_cpus when > allocating the CPU sets, instead? Hmm.... I had thought about this earlier too max_cpus was not getting set at the point where the allocations were being done. I have now moved that code to a bit later and switched to using max_cpus. > > >> extern int nb_numa_nodes; >> extern uint64_t node_mem[MAX_NODES]; >> -extern uint64_t node_cpumask[MAX_NODES]; >> +extern cpu_set_t *node_cpumask[MAX_NODES]; >> +extern size_t cpumask_size; >> >> #define MAX_OPTION_ROMS 16 >> typedef struct QEMUOptionRom { >> diff --git a/vl.c b/vl.c >> index 204d85b..1906412 100644 >> --- a/vl.c >> +++ b/vl.c >> @@ -28,6 +28,7 @@ >> #include<errno.h> >> #include<sys/time.h> >> #include<zlib.h> >> +#include<sched.h> >> >> /* Needed early for CONFIG_BSD etc. */ >> #include "config-host.h" >> @@ -240,7 +241,8 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order >> >> int nb_numa_nodes; >> uint64_t node_mem[MAX_NODES]; >> -uint64_t node_cpumask[MAX_NODES]; >> +cpu_set_t *node_cpumask[MAX_NODES]; >> +size_t cpumask_size; >> >> uint8_t qemu_uuid[16]; >> >> @@ -950,6 +952,9 @@ static void numa_add(const char *optarg) >> char *endptr; >> unsigned long long value, endvalue; >> int nodenr; >> + int i; >> + >> + value = endvalue = 0; >> >> optarg = get_opt_name(option, 128, optarg, ',') + 1; >> if (!strcmp(option, "node")) { >> @@ -970,27 +975,17 @@ static void numa_add(const char *optarg) >> } >> node_mem[nodenr] = sval; >> } >> - if (get_param_value(option, 128, "cpus", optarg) == 0) { >> - node_cpumask[nodenr] = 0; >> - } else { >> + if (get_param_value(option, 128, "cpus", optarg) != 0) { >> value = strtoull(option,&endptr, 10); >> - if (value>= 64) { >> - value = 63; >> - fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); >> + if (*endptr == '-') { >> + endvalue = strtoull(endptr+1,&endptr, 10); >> } else { >> - if (*endptr == '-') { >> - endvalue = strtoull(endptr+1,&endptr, 10); >> - if (endvalue>= 63) { >> - endvalue = 62; >> - fprintf(stderr, >> - "only 63 CPUs in NUMA mode supported.\n"); >> - } >> - value = (2ULL<< endvalue) - (1ULL<< value); >> - } else { >> - value = 1ULL<< value; >> - } >> + endvalue = value; >> + } >> + >> + for (i = value; i<= endvalue; ++i) { >> + CPU_SET_S(i, cpumask_size, node_cpumask[nodenr]); > What if endvalue is larger than the cpu mask size we allocated? I can add a check (endvalue >= max_cpus) and print an error. Should we force set the endvalue to max_cpus-1 in that case ? > > >> } >> - node_cpumask[nodenr] = value; >> } >> nb_numa_nodes++; >> } >> @@ -2331,7 +2326,9 @@ int main(int argc, char **argv, char **envp) >> >> for (i = 0; i< MAX_NODES; i++) { >> node_mem[i] = 0; >> - node_cpumask[i] = 0; >> + node_cpumask[i] = CPU_ALLOC(KVM_MAX_VCPUS); >> + cpumask_size = CPU_ALLOC_SIZE(KVM_MAX_VCPUS); >> + CPU_ZERO_S(cpumask_size, node_cpumask[i]); >> } >> >> nb_numa_nodes = 0; >> @@ -3465,8 +3462,9 @@ int main(int argc, char **argv, char **envp) >> } >> >> for (i = 0; i< nb_numa_nodes; i++) { >> - if (node_cpumask[i] != 0) >> + if (node_cpumask[i] != NULL) { > This will be true for every node (as you preallocate all the CPU > sets in the beginning), so the loop will always end with i==0, in turn > unconditionally disabling the round-robin CPU assignment code below. > You can use CPU_COUNT_S, instead. Argh... I should have tested this. My bad ! Thanks for catching this. I made the change and tested it and its doing the round-robin assignment now. Will post the next version of the patch soon. Thanks for your feedback. Vinod > >> break; >> + } >> } >> /* assigning the VCPUs round-robin is easier to implement, guest OSes >> * must cope with this anyway, because there are BIOSes out there in >> @@ -3474,7 +3472,7 @@ int main(int argc, char **argv, char **envp) >> */ >> if (i == nb_numa_nodes) { >> for (i = 0; i< max_cpus; i++) { >> - node_cpumask[i % nb_numa_nodes] |= 1<< i; >> + CPU_SET_S(i, cpumask_size, node_cpumask[i % nb_numa_nodes]); >> } >> } >> } >> -- >> 1.7.1 >>
Am 17.06.2012 22:12, schrieb Chegu Vinod: > diff --git a/vl.c b/vl.c > index 204d85b..1906412 100644 > --- a/vl.c > +++ b/vl.c > @@ -28,6 +28,7 @@ > #include <errno.h> > #include <sys/time.h> > #include <zlib.h> > +#include <sched.h> Did you check whether this and the macros you're using are available on POSIX and mingw32? vl.c is a pretty central file. Andreas
On 06/18/2012 04:05 PM, Andreas Färber wrote: > Am 17.06.2012 22:12, schrieb Chegu Vinod: >> diff --git a/vl.c b/vl.c >> index 204d85b..1906412 100644 >> --- a/vl.c >> +++ b/vl.c >> @@ -28,6 +28,7 @@ >> #include <errno.h> >> #include <sys/time.h> >> #include <zlib.h> >> +#include <sched.h> > > Did you check whether this and the macros you're using are available on > POSIX and mingw32? vl.c is a pretty central file. POSIX, yes. mingw32, no. Use of preprocessor conditionals is probably in order.
On 6/18/2012 3:11 PM, Eric Blake wrote: > On 06/18/2012 04:05 PM, Andreas Färber wrote: >> Am 17.06.2012 22:12, schrieb Chegu Vinod: >>> diff --git a/vl.c b/vl.c >>> index 204d85b..1906412 100644 >>> --- a/vl.c >>> +++ b/vl.c >>> @@ -28,6 +28,7 @@ >>> #include<errno.h> >>> #include<sys/time.h> >>> #include<zlib.h> >>> +#include<sched.h> >> Did you check whether this and the macros you're using are available on >> POSIX and mingw32? vl.c is a pretty central file. > POSIX, yes. mingw32, no. Use of preprocessor conditionals is probably > in order. > Thanks. I will look into this. Vinod
qemu-devel was no longer cc'ed, let us add it again. Cheers, Stefan Am 19.06.2012 22:35, schrieb Eduardo Habkost: > On Tue, Jun 19, 2012 at 04:33:35AM -0700, Chegu Vinod wrote: > [...] > >>>>> +#include<sched.h> >>>>> >>>> Did you check whether this and the macros you're using are available on >>>> POSIX and mingw32? vl.c is a pretty central file. >>>> >>> POSIX, yes. mingw32, no. Use of preprocessor conditionals is probably >>> in order. >>> > [...] > >> I started looking at this yesterday and quickly realized that even if >> I did what I think Eric suggested (see below) >> we won't be solving the underlying issue of going beyond 64VCPUs >> >> #ifndef __MINGW32__ >> >> //use the macros >> >> #else >> >> // do without the macros...i.e. use current way (which goes upto 64way after >> // fixing the typecasting issue) >> >> #endif >> >> >> Looking for other suggestions (besides having to write our own flavor >> of the the macros >> from scratch). >> > You can use the bitmap functions at bitmap.{h,c} instead of cpu_set_t. > >
diff --git a/cpus.c b/cpus.c index b182b3d..f9cee60 100644 --- a/cpus.c +++ b/cpus.c @@ -1145,7 +1145,7 @@ void set_numa_modes(void) for (env = first_cpu; env != NULL; env = env->next_cpu) { for (i = 0; i < nb_numa_nodes; i++) { - if (node_cpumask[i] & (1 << env->cpu_index)) { + if (CPU_ISSET_S(env->cpu_index, cpumask_size, node_cpumask[i])) { env->numa_node = i; } } diff --git a/hw/pc.c b/hw/pc.c index 8368701..f0c3665 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -639,7 +639,7 @@ static void *bochs_bios_init(void) numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); for (i = 0; i < max_cpus; i++) { for (j = 0; j < nb_numa_nodes; j++) { - if (node_cpumask[j] & (1 << i)) { + if (CPU_ISSET_S(i, cpumask_size, node_cpumask[j])) { numa_fw_cfg[i + 1] = cpu_to_le64(j); break; } diff --git a/sysemu.h b/sysemu.h index bc2c788..6e4d342 100644 --- a/sysemu.h +++ b/sysemu.h @@ -9,6 +9,7 @@ #include "qapi-types.h" #include "notify.h" #include "main-loop.h" +#include <sched.h> /* vl.c */ @@ -133,9 +134,11 @@ extern uint8_t qemu_extra_params_fw[2]; extern QEMUClock *rtc_clock; #define MAX_NODES 64 +#define KVM_MAX_VCPUS 254 extern int nb_numa_nodes; extern uint64_t node_mem[MAX_NODES]; -extern uint64_t node_cpumask[MAX_NODES]; +extern cpu_set_t *node_cpumask[MAX_NODES]; +extern size_t cpumask_size; #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { diff --git a/vl.c b/vl.c index 204d85b..1906412 100644 --- a/vl.c +++ b/vl.c @@ -28,6 +28,7 @@ #include <errno.h> #include <sys/time.h> #include <zlib.h> +#include <sched.h> /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" @@ -240,7 +241,8 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order int nb_numa_nodes; uint64_t node_mem[MAX_NODES]; -uint64_t node_cpumask[MAX_NODES]; +cpu_set_t *node_cpumask[MAX_NODES]; +size_t cpumask_size; uint8_t qemu_uuid[16]; @@ -950,6 +952,9 @@ static void numa_add(const char *optarg) char *endptr; unsigned long long value, endvalue; int nodenr; + int i; + + value = endvalue = 0; optarg = get_opt_name(option, 128, optarg, ',') + 1; if (!strcmp(option, "node")) { @@ -970,27 +975,17 @@ static void numa_add(const char *optarg) } node_mem[nodenr] = sval; } - if (get_param_value(option, 128, "cpus", optarg) == 0) { - node_cpumask[nodenr] = 0; - } else { + if (get_param_value(option, 128, "cpus", optarg) != 0) { value = strtoull(option, &endptr, 10); - if (value >= 64) { - value = 63; - fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); + if (*endptr == '-') { + endvalue = strtoull(endptr+1, &endptr, 10); } else { - if (*endptr == '-') { - endvalue = strtoull(endptr+1, &endptr, 10); - if (endvalue >= 63) { - endvalue = 62; - fprintf(stderr, - "only 63 CPUs in NUMA mode supported.\n"); - } - value = (2ULL << endvalue) - (1ULL << value); - } else { - value = 1ULL << value; - } + endvalue = value; + } + + for (i = value; i <= endvalue; ++i) { + CPU_SET_S(i, cpumask_size, node_cpumask[nodenr]); } - node_cpumask[nodenr] = value; } nb_numa_nodes++; } @@ -2331,7 +2326,9 @@ int main(int argc, char **argv, char **envp) for (i = 0; i < MAX_NODES; i++) { node_mem[i] = 0; - node_cpumask[i] = 0; + node_cpumask[i] = CPU_ALLOC(KVM_MAX_VCPUS); + cpumask_size = CPU_ALLOC_SIZE(KVM_MAX_VCPUS); + CPU_ZERO_S(cpumask_size, node_cpumask[i]); } nb_numa_nodes = 0; @@ -3465,8 +3462,9 @@ int main(int argc, char **argv, char **envp) } for (i = 0; i < nb_numa_nodes; i++) { - if (node_cpumask[i] != 0) + if (node_cpumask[i] != NULL) { break; + } } /* assigning the VCPUs round-robin is easier to implement, guest OSes * must cope with this anyway, because there are BIOSes out there in @@ -3474,7 +3472,7 @@ int main(int argc, char **argv, char **envp) */ if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { - node_cpumask[i % nb_numa_nodes] |= 1 << i; + CPU_SET_S(i, cpumask_size, node_cpumask[i % nb_numa_nodes]); } } }
The -numa option to qemu is used to create [fake] numa nodes and expose them to the guest OS instance. There are a couple of issues with the -numa option: a) Max VCPU's that can be specified for a guest while using the qemu's -numa option is 64. Due to a typecasting issue when the number of VCPUs is > 32 the VCPUs don't show up under the specified [fake] numa nodes. b) KVM currently has support for 160VCPUs per guest. The qemu's -numa option has only support for upto 64VCPUs per guest. This patch addresses these two issues. [ Note: This patch has been verified by Eduardo Habkost ]. Below are examples of (a) and (b) a) >32 VCPUs are specified with the -numa option: /usr/local/bin/qemu-system-x86_64 \ -enable-kvm \ -cpu Westmere,+rdtscp,+pdpe1gb,+dca,+pdcm,+xtpr,+tm2,+est,+smx,+vmx,+ds_cpl,+monitor,+dtes64,+pclmuldq,+pbe,+tm,+ht,+ss,+acpi,+ds,+vme \ -smp sockets=6,cores=10,threads=1 \ -numa node,nodeid=0,cpus=0-9,mem=128g \ -numa node,nodeid=1,cpus=10-19,mem=128g \ -numa node,nodeid=2,cpus=20-29,mem=128g \ -numa node,nodeid=3,cpus=30-39,mem=128g \ -numa node,nodeid=4,cpus=40-49,mem=128g \ -numa node,nodeid=5,cpus=50-59,mem=128g \ -m 786432 \ -name vm1 \ -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/vm1.monitor,server,nowait \ -drive file=/dev/libvirt_lvm/vm.img,if=none,id=drive-virtio-disk0,format=raw,cache=none,aio=native \ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 \ -monitor stdio \ -net nic,macaddr=52:54:00:71:01:01 \ -net tap,ifname=tap0,script=no,downscript=no \ -vnc :4 ... Upstream qemu : -------------- QEMU 1.1.50 monitor - type 'help' for more information (qemu) info numa 6 nodes node 0 cpus: 0 1 2 3 4 5 6 7 8 9 32 33 34 35 36 37 38 39 40 41 node 0 size: 131072 MB node 1 cpus: 10 11 12 13 14 15 16 17 18 19 42 43 44 45 46 47 48 49 50 51 node 1 size: 131072 MB node 2 cpus: 20 21 22 23 24 25 26 27 28 29 52 53 54 55 56 57 58 59 node 2 size: 131072 MB node 3 cpus: 30 node 3 size: 131072 MB node 4 cpus: node 4 size: 131072 MB node 5 cpus: 31 node 5 size: 131072 MB With the patch applied : ----------------------- QEMU 1.1.50 monitor - type 'help' for more information (qemu) info numa 6 nodes node 0 cpus: 0 1 2 3 4 5 6 7 8 9 node 0 size: 131072 MB node 1 cpus: 10 11 12 13 14 15 16 17 18 19 node 1 size: 131072 MB node 2 cpus: 20 21 22 23 24 25 26 27 28 29 node 2 size: 131072 MB node 3 cpus: 30 31 32 33 34 35 36 37 38 39 node 3 size: 131072 MB node 4 cpus: 40 41 42 43 44 45 46 47 48 49 node 4 size: 131072 MB node 5 cpus: 50 51 52 53 54 55 56 57 58 59 node 5 size: 131072 MB b) >64 VCPUs specified with -numa option: /usr/local/bin/qemu-system-x86_64 \ -enable-kvm \ -cpu Westmere,+rdtscp,+pdpe1gb,+dca,+pdcm,+xtpr,+tm2,+est,+smx,+vmx,+ds_cpl,+monitor,+dtes64,+pclmuldq,+pbe,+tm,+ht,+ss,+acpi,+ds,+vme \ -smp sockets=8,cores=10,threads=1 \ -numa node,nodeid=0,cpus=0-9,mem=64g \ -numa node,nodeid=1,cpus=10-19,mem=64g \ -numa node,nodeid=2,cpus=20-29,mem=64g \ -numa node,nodeid=3,cpus=30-39,mem=64g \ -numa node,nodeid=4,cpus=40-49,mem=64g \ -numa node,nodeid=5,cpus=50-59,mem=64g \ -numa node,nodeid=6,cpus=60-69,mem=64g \ -numa node,nodeid=7,cpus=70-79,mem=64g \ -m 524288 \ -name vm1 \ -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/vm1.monitor,server,nowait \ -drive file=/dev/libvirt_lvm/vm.img,if=none,id=drive-virtio-disk0,format=raw,cache=none,aio=native \ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 \ -monitor stdio \ -net nic,macaddr=52:54:00:71:01:01 \ -net tap,ifname=tap0,script=no,downscript=no \ -vnc :4 ... Upstream qemu : -------------- only 63 CPUs in NUMA mode supported. only 64 CPUs in NUMA mode supported. QEMU 1.1.50 monitor - type 'help' for more information (qemu) info numa 8 nodes node 0 cpus: 6 7 8 9 38 39 40 41 70 71 72 73 node 0 size: 65536 MB node 1 cpus: 10 11 12 13 14 15 16 17 18 19 42 43 44 45 46 47 48 49 50 51 74 75 76 77 78 79 node 1 size: 65536 MB node 2 cpus: 20 21 22 23 24 25 26 27 28 29 52 53 54 55 56 57 58 59 60 61 node 2 size: 65536 MB node 3 cpus: 30 62 node 3 size: 65536 MB node 4 cpus: node 4 size: 65536 MB node 5 cpus: node 5 size: 65536 MB node 6 cpus: 31 63 node 6 size: 65536 MB node 7 cpus: 0 1 2 3 4 5 32 33 34 35 36 37 64 65 66 67 68 69 node 7 size: 65536 MB With the patch applied : ----------------------- QEMU 1.1.50 monitor - type 'help' for more information (qemu) info numa 8 nodes node 0 cpus: 0 1 2 3 4 5 6 7 8 9 node 0 size: 65536 MB node 1 cpus: 10 11 12 13 14 15 16 17 18 19 node 1 size: 65536 MB node 2 cpus: 20 21 22 23 24 25 26 27 28 29 node 2 size: 65536 MB node 3 cpus: 30 31 32 33 34 35 36 37 38 39 node 3 size: 65536 MB node 4 cpus: 40 41 42 43 44 45 46 47 48 49 node 4 size: 65536 MB node 5 cpus: 50 51 52 53 54 55 56 57 58 59 node 5 size: 65536 MB node 6 cpus: 60 61 62 63 64 65 66 67 68 69 node 6 size: 65536 MB node 7 cpus: 70 71 72 73 74 75 76 77 78 79 node 7 size: 65536 MB Signed-off-by: Chegu Vinod <chegu_vinod@hp.com>, Jim Hull <jim.hull@hp.com>, Craig Hada <craig.hada@hp.com> --- cpus.c | 2 +- hw/pc.c | 2 +- sysemu.h | 5 ++++- vl.c | 42 ++++++++++++++++++++---------------------- 4 files changed, 26 insertions(+), 25 deletions(-)