Message ID | 1297522467-5975-16-git-send-email-david@gibson.dropbear.id.au |
---|---|
State | New |
Headers | show |
On 12.02.2011, at 15:54, David Gibson wrote: > This extends the "pseries" (PAPR) machine to include a virtual IO bus > supporting the PAPR defined hypercall based virtual IO mechanisms. > > So far only one VIO device is provided, the vty / vterm, providing > a full console (polled only, for now). > > Signed-off-by: David Gibson <dwg@au1.ibm.com> > --- > Makefile.target | 3 +- > hw/spapr.c | 31 +++++++++- > hw/spapr.h | 10 +++ > hw/spapr_hcall.c | 19 ++---- > hw/spapr_vio.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/spapr_vio.h | 49 ++++++++++++++ > hw/spapr_vty.c | 132 +++++++++++++++++++++++++++++++++++++ > 7 files changed, 419 insertions(+), 16 deletions(-) > create mode 100644 hw/spapr_vio.c > create mode 100644 hw/spapr_vio.h > create mode 100644 hw/spapr_vty.c > > diff --git a/Makefile.target b/Makefile.target > index e0796ba..fe232da 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -232,7 +232,8 @@ obj-ppc-y += ppc_oldworld.o > # NewWorld PowerMac > obj-ppc-y += ppc_newworld.o > # IBM pSeries (sPAPR) > -obj-ppc-y += spapr.o spapr_hcall.o > +obj-ppc-y += spapr.o spapr_hcall.o spapr_vio.o > +obj-ppc-y += spapr_vty.o > # PowerPC 4xx boards > obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o > obj-ppc-y += ppc440.o ppc440_bamboo.o > diff --git a/hw/spapr.c b/hw/spapr.c > index 8aca4e0..da61061 100644 > --- a/hw/spapr.c > +++ b/hw/spapr.c > @@ -37,6 +37,7 @@ > #include "net.h" > #include "blockdev.h" > #include "hw/spapr.h" > +#include "hw/spapr_vio.h" > > #include <libfdt.h> > > @@ -49,6 +50,7 @@ > > static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, > const char *cpu_model, CPUState *envs[], > + sPAPREnvironment *spapr, > target_phys_addr_t initrd_base, > target_phys_addr_t initrd_size, > const char *kernel_cmdline) > @@ -59,6 +61,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, > uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); > int i; > char *modelname; > + int ret; > > #define _FDT(exp) \ > do { \ > @@ -151,9 +154,28 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, > > _FDT((fdt_end_node(fdt))); > > + /* vdevice */ > + _FDT((fdt_begin_node(fdt, "vdevice"))); > + > + _FDT((fdt_property_string(fdt, "device_type", "vdevice"))); > + _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice"))); > + _FDT((fdt_property_cell(fdt, "#address-cells", 0x1))); > + _FDT((fdt_property_cell(fdt, "#size-cells", 0x0))); > + > + _FDT((fdt_end_node(fdt))); > + > _FDT((fdt_end_node(fdt))); /* close root node */ > _FDT((fdt_finish(fdt))); > > + /* re-expand to allow for further tweaks */ > + _FDT((fdt_open_into(fdt, fdt, FDT_MAX_SIZE))); > + > + ret = spapr_populate_vdevice(spapr->vio_bus, fdt); > + if (ret < 0) Braces.. > + fprintf(stderr, "couldn't setup vio devices in fdt\n"); > + > + _FDT((fdt_pack(fdt))); > + > if (fdt_size) > *fdt_size = fdt_totalsize(fdt); > > @@ -211,6 +233,12 @@ static void ppc_spapr_init (ram_addr_t ram_size, > ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", ram_size); > cpu_register_physical_memory(0, ram_size, ram_offset); > > + spapr->vio_bus = spapr_vio_bus_init(); > + > + for (i = 0; i < MAX_SERIAL_PORTS; i++) Braces.. > + if (serial_hds[i]) Braces.. > + spapr_vty_create(spapr->vio_bus, i, serial_hds[i]); There might be a qdev way to do this. Blue? > + > if (kernel_filename) { > uint64_t lowaddr = 0; > > @@ -242,7 +270,7 @@ static void ppc_spapr_init (ram_addr_t ram_size, > } > > /* load fdt */ > - fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, &env, > + fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, &env, spapr, > initrd_base, initrd_size, > kernel_cmdline); > if (!fdt) { > @@ -267,6 +295,7 @@ static QEMUMachine spapr_machine = { > .desc = "pSeries Logical Partition (PAPR compliant)", > .init = ppc_spapr_init, > .max_cpus = 1, > + .no_parallel = 1, duplicate? > .no_vga = 1, > .no_parallel = 1, > }; > diff --git a/hw/spapr.h b/hw/spapr.h > index dae9617..168511f 100644 > --- a/hw/spapr.h > +++ b/hw/spapr.h > @@ -1,7 +1,10 @@ > #if !defined (__HW_SPAPR_H__) > #define __HW_SPAPR_H__ > > +struct VIOsPAPRBus; > + > typedef struct sPAPREnvironment { > + struct VIOsPAPRBus *vio_bus; > } sPAPREnvironment; > > #define H_SUCCESS 0 > @@ -237,4 +240,11 @@ typedef struct sPAPREnvironment { > target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, > target_ulong token, target_ulong *args); > > +target_ulong h_put_term_char(sPAPREnvironment *spapr, > + target_ulong termno, target_ulong len, > + target_ulong char0_7, target_ulong char8_15); > +target_ulong h_get_term_char(sPAPREnvironment *spapr, > + target_ulong termno, target_ulong *len, > + target_ulong *char0_7, target_ulong *char8_15); > + > #endif /* !defined (__HW_SPAPR_H__) */ > diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c > index c99c345..e2ed9cf 100644 > --- a/hw/spapr_hcall.c > +++ b/hw/spapr_hcall.c > @@ -3,19 +3,6 @@ > #include "qemu-char.h" > #include "hw/spapr.h" > > -static target_ulong h_put_term_char(target_ulong termno, target_ulong len, > - target_ulong char0_7, target_ulong char8_15) > -{ > - uint8_t buf[16]; > - > - *((uint64_t *)buf) = cpu_to_be64(char0_7); > - *((uint64_t *)buf + 1) = cpu_to_be64(char8_15); > - > - qemu_chr_write(serial_hds[0], buf, len); > - > - return 0; > -} > - > target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, > target_ulong token, target_ulong *args) > { > @@ -29,7 +16,11 @@ target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, > > switch (token) { > case H_PUT_TERM_CHAR: > - r = h_put_term_char(args[0], args[1], args[2], args[3]); > + r = h_put_term_char(spapr, args[0], args[1], args[2], args[3]); > + break; > + > + case H_GET_TERM_CHAR: > + r = h_get_term_char(spapr, args[0], &args[0], &args[1], &args[2]); Slick and simple. Blue, do you think there's some random abstraction layer necessary? > break; > > default: > diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c > new file mode 100644 > index 0000000..d9c7292 > --- /dev/null > +++ b/hw/spapr_vio.c > @@ -0,0 +1,191 @@ > +/* > + * QEMU sPAPR VIO code > + * > + * Copyright (c) 2010 David Gibson, IBM Corporation <david@gibson.dropbear.id.au> > + * Based on the s390 virtio bus code: > + * Copyright (c) 2009 Alexander Graf <agraf@suse.de> > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "hw.h" > +#include "sysemu.h" > +#include "boards.h" > +#include "monitor.h" > +#include "loader.h" > +#include "elf.h" > +#include "hw/sysbus.h" > +#include "kvm.h" > +#include "device_tree.h" > + > +#include "hw/spapr.h" > +#include "hw/spapr_vio.h" > + > +#ifdef CONFIG_FDT > +#include <libfdt.h> > +#endif /* CONFIG_FDT */ > + > +/* #define DEBUG_SPAPR */ > + > +#ifdef DEBUG_SPAPR > +#define dprintf(fmt, ...) \ > + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) > +#else > +#define dprintf(fmt, ...) \ > + do { } while (0) > +#endif > + > +struct BusInfo spapr_vio_bus_info = { > + .name = "spapr-vio", > + .size = sizeof(VIOsPAPRBus), > +}; > + > +VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg) > +{ > + DeviceState *qdev; > + VIOsPAPRDevice *dev = NULL; > + > + QLIST_FOREACH(qdev, &bus->bus.children, sibling) { > + dev = (VIOsPAPRDevice *)qdev; > + if (dev->reg == reg) Braces > + break; > + } > + > + return dev; What if the device doesn't exist? > +} > + > +VIOsPAPRBus *spapr_vio_bus_init(void) > +{ > + VIOsPAPRBus *bus; > + BusState *_bus; > + DeviceState *dev; > + > + /* Create bridge device */ > + dev = qdev_create(NULL, "spapr-vio-bridge"); > + qdev_init_nofail(dev); > + > + /* Create bus on bridge device */ > + > + _bus = qbus_create(&spapr_vio_bus_info, dev, "spapr-vio"); > + bus = DO_UPCAST(VIOsPAPRBus, bus, _bus); > + > + return bus; > +} > + > +#ifdef CONFIG_FDT > +static int vio_make_devnode(VIOsPAPRDevice *dev, > + void *fdt) > +{ > + VIOsPAPRDeviceInfo *info = (VIOsPAPRDeviceInfo *)dev->qdev.info; > + int vdevice_off, node_off; > + int ret; > + > + vdevice_off = fdt_path_offset(fdt, "/vdevice"); > + if (vdevice_off < 0) Braces > + return vdevice_off; > + > + node_off = fdt_add_subnode(fdt, vdevice_off, dev->qdev.id); > + if (node_off < 0) Braces > + return node_off; > + > + ret = fdt_setprop_cell(fdt, node_off, "reg", dev->reg); > + if (ret < 0) Braces > + return ret; > + > + if (info->dt_type) { > + ret = fdt_setprop_string(fdt, node_off, "device_type", > + info->dt_type); > + if (ret < 0) Braces I'll stop complaining about braces now. Please go through the patch yourself and just fix them up :) > + return ret; > + } > + > + if (info->dt_compatible) { > + ret = fdt_setprop_string(fdt, node_off, "compatible", > + info->dt_compatible); > + if (ret < 0) > + return ret; > + } > + > + if (info->devnode) { > + ret = (info->devnode)(dev, fdt, node_off); > + if (ret < 0) > + return ret; > + } > + > + return node_off; > +} > +#endif /* CONFIG_FDT */ > + > +static int spapr_vio_busdev_init(DeviceState *dev, DeviceInfo *info) > +{ > + VIOsPAPRDeviceInfo *_info = (VIOsPAPRDeviceInfo *)info; > + VIOsPAPRDevice *_dev = (VIOsPAPRDevice *)dev; > + char *id; > + > + if (asprintf(&id, "%s@%x", _info->dt_name, _dev->reg) < 0) > + return -1; > + > + _dev->qdev.id = id; > + > + return _info->init(_dev); > +} > + > +void spapr_vio_bus_register_withprop(VIOsPAPRDeviceInfo *info) > +{ > + info->qdev.init = spapr_vio_busdev_init; > + info->qdev.bus_info = &spapr_vio_bus_info; > + > + assert(info->qdev.size >= sizeof(VIOsPAPRDevice)); > + qdev_register(&info->qdev); > +} > + > +static int spapr_vio_bridge_init(SysBusDevice *dev) > +{ > + /* nothing */ > + return 0; > +} > + > +static SysBusDeviceInfo spapr_vio_bridge_info = { > + .init = spapr_vio_bridge_init, > + .qdev.name = "spapr-vio-bridge", > + .qdev.size = sizeof(SysBusDevice), > + .qdev.no_user = 1, > +}; > + > +static void spapr_vio_register_devices(void) > +{ > + sysbus_register_withprop(&spapr_vio_bridge_info); > +} > + > +device_init(spapr_vio_register_devices) > + > +#ifdef CONFIG_FDT > + > +int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt) > +{ > + DeviceState *qdev; > + int ret = 0; > + > + QLIST_FOREACH(qdev, &bus->bus.children, sibling) { > + VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev; > + > + ret = vio_make_devnode(dev, fdt); > + > + if (ret < 0) > + return ret; > + } > + > + return 0; > +} > +#endif /* CONFIG_FDT */ > diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h > new file mode 100644 > index 0000000..fb5e301 > --- /dev/null > +++ b/hw/spapr_vio.h > @@ -0,0 +1,49 @@ > +#ifndef _HW_SPAPR_VIO_H > +#define _HW_SPAPR_VIO_H > +/* > + * QEMU sPAPR VIO bus definitions > + * > + * Copyright (c) 2010 David Gibson, IBM Corporation <david@gibson.dropbear.id.au> > + * Based on the s390 virtio bus definitions: > + * Copyright (c) 2009 Alexander Graf <agraf@suse.de> > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +typedef struct VIOsPAPRDevice { > + DeviceState qdev; > + uint32_t reg; > +} VIOsPAPRDevice; > + > +typedef struct VIOsPAPRBus { > + BusState bus; > +} VIOsPAPRBus; > + > +typedef struct { > + DeviceInfo qdev; > + const char *dt_name, *dt_type, *dt_compatible; > + int (*init)(VIOsPAPRDevice *dev); > + int (*devnode)(VIOsPAPRDevice *dev, void *fdt, int node_off); > +} VIOsPAPRDeviceInfo; > + > +extern VIOsPAPRBus *spapr_vio_bus_init(void); > +extern VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg); > +extern void spapr_vio_bus_register_withprop(VIOsPAPRDeviceInfo *info); > +extern int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt); > + > +void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len); > +void spapr_vty_create(VIOsPAPRBus *bus, > + uint32_t reg, CharDriverState *chardev); > + > +#endif /* _HW_SPAPR_VIO_H */ > diff --git a/hw/spapr_vty.c b/hw/spapr_vty.c > new file mode 100644 > index 0000000..9a2dc0b > --- /dev/null > +++ b/hw/spapr_vty.c > @@ -0,0 +1,132 @@ > +#include "qdev.h" > +#include "qemu-char.h" > +#include "hw/spapr.h" > +#include "hw/spapr_vio.h" > + > +#define VTERM_BUFSIZE 16 > + > +typedef struct VIOsPAPRVTYDevice { > + VIOsPAPRDevice sdev; > + CharDriverState *chardev; > + uint32_t in, out; > + uint8_t buf[VTERM_BUFSIZE]; > +} VIOsPAPRVTYDevice; > + > +static int vty_can_receive(void *opaque) > +{ > + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)opaque; > + > + return (dev->in - dev->out) < VTERM_BUFSIZE; > +} > + > +static void vty_receive(void *opaque, const uint8_t *buf, int size) > +{ > + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)opaque; > + int i; > + > + for (i = 0; i < size; i++) { > + assert((dev->in - dev->out) < VTERM_BUFSIZE); > + dev->buf[dev->in++ % VTERM_BUFSIZE] = buf[i]; > + } > +} > + > +static int vty_getchars(VIOsPAPRDevice *sdev, uint8_t *buf, int max) > +{ > + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; > + int n = 0; > + > + while ((n < max) && (dev->out != dev->in)) > + buf[n++] = dev->buf[dev->out++ % VTERM_BUFSIZE]; > + > + return n; > +} > + > +void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len) > +{ > + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; > + > + /* FIXME: should check the qemu_chr_write() return value */ > + qemu_chr_write(dev->chardev, buf, len); > +} > + > +static int spapr_vty_init(VIOsPAPRDevice *sdev) > +{ > + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; > + > + qemu_chr_add_handlers(dev->chardev, vty_can_receive, > + vty_receive, NULL, dev); > + > + return 0; > +} > + > +target_ulong h_put_term_char(sPAPREnvironment *spapr, > + target_ulong termno, target_ulong len, > + target_ulong char0_7, target_ulong char8_15) > +{ > + VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, termno); > + uint8_t buf[16]; > + > + if (!sdev) > + return H_PARAMETER; > + > + if (len > 16) > + return H_PARAMETER; > + > + *((uint64_t *)buf) = cpu_to_be64(char0_7); > + *((uint64_t *)buf + 1) = cpu_to_be64(char8_15); > + > + vty_putchars(sdev, buf, len); > + > + return 0; > +} > + > +target_ulong h_get_term_char(sPAPREnvironment *spapr, > + target_ulong termno, target_ulong *len, > + target_ulong *char0_7, target_ulong *char8_15) > +{ > + VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, termno); > + uint8_t buf[16]; > + > + if (!sdev) > + return H_PARAMETER; > + > + *len = vty_getchars(sdev, buf, sizeof(buf)); > + if (*len < 16) > + memset(buf + *len, 0, 16 - *len); > + > + *char0_7 = be64_to_cpu(*((uint64_t *)buf)); > + *char8_15 = be64_to_cpu(*((uint64_t *)buf + 1)); > + > + return H_SUCCESS; > +} > + > +void spapr_vty_create(VIOsPAPRBus *bus, > + uint32_t reg, CharDriverState *chardev) > +{ > + DeviceState *dev; > + > + dev = qdev_create(&bus->bus, "spapr-vty"); > + qdev_prop_set_uint32(dev, "reg", reg); > + qdev_prop_set_chr(dev, "chardev", chardev); > + qdev_init_nofail(dev); > +} > + > +static VIOsPAPRDeviceInfo spapr_vty = { > + .init = spapr_vty_init, > + .dt_name = "vty", > + .dt_type = "serial", > + .dt_compatible = "hvterm1", > + .qdev.name = "spapr-vty", > + .qdev.size = sizeof(VIOsPAPRVTYDevice), > + .qdev.props = (Property[]) { > + DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0), > + DEFINE_PROP_CHR("chardev", VIOsPAPRVTYDevice, chardev), > + DEFINE_PROP_END_OF_LIST(), > + }, > +}; > + > +static void spapr_vty_register(void) > +{ > + spapr_vio_bus_register_withprop(&spapr_vty); > +} > +device_init(spapr_vty_register); > -- > 1.7.1 > Alex
On Sat, Feb 12, 2011 at 6:47 PM, Alexander Graf <agraf@suse.de> wrote: > > On 12.02.2011, at 15:54, David Gibson wrote: > >> This extends the "pseries" (PAPR) machine to include a virtual IO bus >> supporting the PAPR defined hypercall based virtual IO mechanisms. >> >> So far only one VIO device is provided, the vty / vterm, providing >> a full console (polled only, for now). >> >> Signed-off-by: David Gibson <dwg@au1.ibm.com> >> --- >> Makefile.target | 3 +- >> hw/spapr.c | 31 +++++++++- >> hw/spapr.h | 10 +++ >> hw/spapr_hcall.c | 19 ++---- >> hw/spapr_vio.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> hw/spapr_vio.h | 49 ++++++++++++++ >> hw/spapr_vty.c | 132 +++++++++++++++++++++++++++++++++++++ >> 7 files changed, 419 insertions(+), 16 deletions(-) >> create mode 100644 hw/spapr_vio.c >> create mode 100644 hw/spapr_vio.h >> create mode 100644 hw/spapr_vty.c >> >> diff --git a/Makefile.target b/Makefile.target >> index e0796ba..fe232da 100644 >> --- a/Makefile.target >> +++ b/Makefile.target >> @@ -232,7 +232,8 @@ obj-ppc-y += ppc_oldworld.o >> # NewWorld PowerMac >> obj-ppc-y += ppc_newworld.o >> # IBM pSeries (sPAPR) >> -obj-ppc-y += spapr.o spapr_hcall.o >> +obj-ppc-y += spapr.o spapr_hcall.o spapr_vio.o >> +obj-ppc-y += spapr_vty.o >> # PowerPC 4xx boards >> obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o >> obj-ppc-y += ppc440.o ppc440_bamboo.o >> diff --git a/hw/spapr.c b/hw/spapr.c >> index 8aca4e0..da61061 100644 >> --- a/hw/spapr.c >> +++ b/hw/spapr.c >> @@ -37,6 +37,7 @@ >> #include "net.h" >> #include "blockdev.h" >> #include "hw/spapr.h" >> +#include "hw/spapr_vio.h" >> >> #include <libfdt.h> >> >> @@ -49,6 +50,7 @@ >> >> static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, >> const char *cpu_model, CPUState *envs[], >> + sPAPREnvironment *spapr, >> target_phys_addr_t initrd_base, >> target_phys_addr_t initrd_size, >> const char *kernel_cmdline) >> @@ -59,6 +61,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, >> uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); >> int i; >> char *modelname; >> + int ret; >> >> #define _FDT(exp) \ >> do { \ >> @@ -151,9 +154,28 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, >> >> _FDT((fdt_end_node(fdt))); >> >> + /* vdevice */ >> + _FDT((fdt_begin_node(fdt, "vdevice"))); >> + >> + _FDT((fdt_property_string(fdt, "device_type", "vdevice"))); >> + _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice"))); >> + _FDT((fdt_property_cell(fdt, "#address-cells", 0x1))); >> + _FDT((fdt_property_cell(fdt, "#size-cells", 0x0))); >> + >> + _FDT((fdt_end_node(fdt))); >> + >> _FDT((fdt_end_node(fdt))); /* close root node */ >> _FDT((fdt_finish(fdt))); >> >> + /* re-expand to allow for further tweaks */ >> + _FDT((fdt_open_into(fdt, fdt, FDT_MAX_SIZE))); >> + >> + ret = spapr_populate_vdevice(spapr->vio_bus, fdt); >> + if (ret < 0) > > Braces.. > >> + fprintf(stderr, "couldn't setup vio devices in fdt\n"); >> + >> + _FDT((fdt_pack(fdt))); >> + >> if (fdt_size) >> *fdt_size = fdt_totalsize(fdt); >> >> @@ -211,6 +233,12 @@ static void ppc_spapr_init (ram_addr_t ram_size, >> ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", ram_size); >> cpu_register_physical_memory(0, ram_size, ram_offset); >> >> + spapr->vio_bus = spapr_vio_bus_init(); >> + >> + for (i = 0; i < MAX_SERIAL_PORTS; i++) > > Braces.. > >> + if (serial_hds[i]) > > Braces.. > >> + spapr_vty_create(spapr->vio_bus, i, serial_hds[i]); > > There might be a qdev way to do this. Blue? Actually I don't quite understand the need for vty layer, why not use the chardev here directly? > >> + >> if (kernel_filename) { >> uint64_t lowaddr = 0; >> >> @@ -242,7 +270,7 @@ static void ppc_spapr_init (ram_addr_t ram_size, >> } >> >> /* load fdt */ >> - fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, &env, >> + fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, &env, spapr, >> initrd_base, initrd_size, >> kernel_cmdline); >> if (!fdt) { >> @@ -267,6 +295,7 @@ static QEMUMachine spapr_machine = { >> .desc = "pSeries Logical Partition (PAPR compliant)", >> .init = ppc_spapr_init, >> .max_cpus = 1, >> + .no_parallel = 1, > > duplicate? > >> .no_vga = 1, >> .no_parallel = 1, >> }; >> diff --git a/hw/spapr.h b/hw/spapr.h >> index dae9617..168511f 100644 >> --- a/hw/spapr.h >> +++ b/hw/spapr.h >> @@ -1,7 +1,10 @@ >> #if !defined (__HW_SPAPR_H__) >> #define __HW_SPAPR_H__ >> >> +struct VIOsPAPRBus; >> + >> typedef struct sPAPREnvironment { >> + struct VIOsPAPRBus *vio_bus; >> } sPAPREnvironment; >> >> #define H_SUCCESS 0 >> @@ -237,4 +240,11 @@ typedef struct sPAPREnvironment { >> target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, >> target_ulong token, target_ulong *args); >> >> +target_ulong h_put_term_char(sPAPREnvironment *spapr, >> + target_ulong termno, target_ulong len, >> + target_ulong char0_7, target_ulong char8_15); >> +target_ulong h_get_term_char(sPAPREnvironment *spapr, >> + target_ulong termno, target_ulong *len, >> + target_ulong *char0_7, target_ulong *char8_15); >> + >> #endif /* !defined (__HW_SPAPR_H__) */ >> diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c >> index c99c345..e2ed9cf 100644 >> --- a/hw/spapr_hcall.c >> +++ b/hw/spapr_hcall.c >> @@ -3,19 +3,6 @@ >> #include "qemu-char.h" >> #include "hw/spapr.h" >> >> -static target_ulong h_put_term_char(target_ulong termno, target_ulong len, >> - target_ulong char0_7, target_ulong char8_15) >> -{ >> - uint8_t buf[16]; >> - >> - *((uint64_t *)buf) = cpu_to_be64(char0_7); >> - *((uint64_t *)buf + 1) = cpu_to_be64(char8_15); >> - >> - qemu_chr_write(serial_hds[0], buf, len); >> - >> - return 0; >> -} >> - >> target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, >> target_ulong token, target_ulong *args) >> { >> @@ -29,7 +16,11 @@ target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, >> >> switch (token) { >> case H_PUT_TERM_CHAR: >> - r = h_put_term_char(args[0], args[1], args[2], args[3]); >> + r = h_put_term_char(spapr, args[0], args[1], args[2], args[3]); >> + break; >> + >> + case H_GET_TERM_CHAR: >> + r = h_get_term_char(spapr, args[0], &args[0], &args[1], &args[2]); > > Slick and simple. Blue, do you think there's some random abstraction layer necessary? Same here.
On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: > > Actually I don't quite understand the need for vty layer, why not use > the chardev here directly? I'm not sure what you mean here... Basically, the interface presented to guests is sPAPR compliant, so virtual devices come with a bunch of stuff such as standard device-tree properties, but also hcalls for interrupt control etc... which are common to most of these guys including vty. Some of it isn't present in David current patch just yet, but I don't see how using an existing chardev would provide the same semantics, especially when we start adding interrupts etc... Also eventually, VTY's will be hot-pluggable (when we get to do that) and will use the same mechanisms as the other sPAPR VIO devices for that. Cheers, Ben.
On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: >> >> Actually I don't quite understand the need for vty layer, why not use >> the chardev here directly? > > I'm not sure what you mean here... Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c instead of moving those to a separate file.
On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: > On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt > <benh@kernel.crashing.org> wrote: > > On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: > >> > >> Actually I don't quite understand the need for vty layer, why not use > >> the chardev here directly? > > > > I'm not sure what you mean here... > > Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c > instead of moving those to a separate file. Well, the VIO device instance gives the chardev instance which is all nicely encapsulated inside spapr-vty... Also VIO devices tend to have dedicated hcalls, not only VTY, so it makes a lot of sense to keep them close to the rest of the VIO driver they belong to don't you think ? (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've added to the core VIO but you'll see that when we get those patches ready, hopefully soon). Actually, one thing I noticed is that the current patches David posted still have a single function with a switch/case statement for hcalls. I'm not 100% certain what David long term plans are here, but in our internal "WIP" tree, we've subsequently turned that into a mechanism where any module can call powerpc_register_hypercall() to add hcalls. So if David intends to move the "upstream candidate" tree in that direction, then naturally, the calls in spapr_hcall.c are going to disappear in favor of a pair of powerpc_register_hypercall() locally in the vty module. Cheers, Ben.
On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt >> <benh@kernel.crashing.org> wrote: >> > On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: >> >> >> >> Actually I don't quite understand the need for vty layer, why not use >> >> the chardev here directly? >> > >> > I'm not sure what you mean here... >> >> Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c >> instead of moving those to a separate file. > > Well, the VIO device instance gives the chardev instance which is all > nicely encapsulated inside spapr-vty... Also VIO devices tend to have > dedicated hcalls, not only VTY, so it makes a lot of sense to keep them > close to the rest of the VIO driver they belong to don't you think ? > > (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've > added to the core VIO but you'll see that when we get those patches > ready, hopefully soon). This is a bit of a special case, much like semihosting modes for m68k or ARM, or like MOL hacks which were removed recently. From QEMU point of view, the most natural way of handling this would be hypervisor implemented in the guest side (for example BIOS). Then the hypervisor would use normal IO (or virtio) to communicate with the host. If inside QEMU, the interface of the hypervisor to the devices needs some thought. We'd like to avoid ugly interfaces like vmmouse where a device probes CPU registers directly or spaghetti interfaces like APIC. > Actually, one thing I noticed is that the current patches David posted > still have a single function with a switch/case statement for hcalls. > > I'm not 100% certain what David long term plans are here, but in our > internal "WIP" tree, we've subsequently turned that into a mechanism > where any module can call powerpc_register_hypercall() to add hcalls. > > So if David intends to move the "upstream candidate" tree in that > direction, then naturally, the calls in spapr_hcall.c are going to > disappear in favor of a pair of powerpc_register_hypercall() locally in > the vty module. Is the interface new design, or are you implementing what is used also on real HW?
On Sat, Feb 12, 2011 at 05:47:53PM +0100, Alexander Graf wrote: > On 12.02.2011, at 15:54, David Gibson wrote: [snip] > > @@ -267,6 +295,7 @@ static QEMUMachine spapr_machine = { > > .desc = "pSeries Logical Partition (PAPR compliant)", > > .init = ppc_spapr_init, > > .max_cpus = 1, > > + .no_parallel = 1, > > duplicate? Oops, rebasing mistake. Fixed now. [snip] > > +VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg) > > +{ > > + DeviceState *qdev; > > + VIOsPAPRDevice *dev = NULL; > > + > > + QLIST_FOREACH(qdev, &bus->bus.children, sibling) { > > + dev = (VIOsPAPRDevice *)qdev; > > + if (dev->reg == reg) > > Braces > > > + break; > > + } > > + > > + return dev; > > What if the device doesn't exist? This returns NULL, the caller returns an error...
On Sun, Feb 13, 2011 at 10:08:23AM +0200, Blue Swirl wrote: > On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt > <benh@kernel.crashing.org> wrote: > > On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: > >> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt > >> <benh@kernel.crashing.org> wrote: > >> > On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: > >> >> > >> >> Actually I don't quite understand the need for vty layer, why not use > >> >> the chardev here directly? > >> > > >> > I'm not sure what you mean here... > >> > >> Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c > >> instead of moving those to a separate file. > > > > Well, the VIO device instance gives the chardev instance which is all > > nicely encapsulated inside spapr-vty... Also VIO devices tend to have > > dedicated hcalls, not only VTY, so it makes a lot of sense to keep them > > close to the rest of the VIO driver they belong to don't you think ? > > > > (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've > > added to the core VIO but you'll see that when we get those patches > > ready, hopefully soon). > > This is a bit of a special case, much like semihosting modes for m68k > or ARM, or like MOL hacks which were removed recently. From QEMU point > of view, the most natural way of handling this would be hypervisor > implemented in the guest side (for example BIOS). Then the hypervisor > would use normal IO (or virtio) to communicate with the host. If > inside QEMU, the interface of the hypervisor to the devices needs some > thought. We'd like to avoid ugly interfaces like vmmouse where a > device probes CPU registers directly or spaghetti interfaces like > APIC. I really don't follow what you're saying here. Running the hypervisor in the guest, rather than emulating its effect in qemu seems like an awful lot of complexity for no clear reason. > > Actually, one thing I noticed is that the current patches David posted > > still have a single function with a switch/case statement for hcalls. > > > > I'm not 100% certain what David long term plans are here, but in our > > internal "WIP" tree, we've subsequently turned that into a mechanism > > where any module can call powerpc_register_hypercall() to add hcalls. > > > > So if David intends to move the "upstream candidate" tree in that > > direction, then naturally, the calls in spapr_hcall.c are going to > > disappear in favor of a pair of powerpc_register_hypercall() locally in > > the vty module. > > Is the interface new design, or are you implementing what is used also > on real HW? The interface already exists on real HW. It's described in the PAPR document we keep mentioning.
On Sun, Feb 13, 2011 at 10:15:03AM +1100, Benjamin Herrenschmidt wrote: > On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: > > On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt [snip] > Actually, one thing I noticed is that the current patches David posted > still have a single function with a switch/case statement for hcalls. > > I'm not 100% certain what David long term plans are here, but in our > internal "WIP" tree, we've subsequently turned that into a mechanism > where any module can call powerpc_register_hypercall() to add hcalls. > > So if David intends to move the "upstream candidate" tree in that > direction, then naturally, the calls in spapr_hcall.c are going to > disappear in favor of a pair of powerpc_register_hypercall() locally in > the vty module. Ah, yeah. I'm still not sure what to do about it. I was going to fold the dynamic hcall registration into the patch set before upstreaming. But then something paulus said made me rethink whether the dynamic registration was a good idea. Still need to sort this out before the series is really ready.
On Sun, Feb 13, 2011 at 1:12 PM, David Gibson <david@gibson.dropbear.id.au> wrote: > On Sun, Feb 13, 2011 at 10:08:23AM +0200, Blue Swirl wrote: >> On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt >> <benh@kernel.crashing.org> wrote: >> > On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >> >> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt >> >> <benh@kernel.crashing.org> wrote: >> >> > On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: >> >> >> >> >> >> Actually I don't quite understand the need for vty layer, why not use >> >> >> the chardev here directly? >> >> > >> >> > I'm not sure what you mean here... >> >> >> >> Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c >> >> instead of moving those to a separate file. >> > >> > Well, the VIO device instance gives the chardev instance which is all >> > nicely encapsulated inside spapr-vty... Also VIO devices tend to have >> > dedicated hcalls, not only VTY, so it makes a lot of sense to keep them >> > close to the rest of the VIO driver they belong to don't you think ? >> > >> > (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've >> > added to the core VIO but you'll see that when we get those patches >> > ready, hopefully soon). >> >> This is a bit of a special case, much like semihosting modes for m68k >> or ARM, or like MOL hacks which were removed recently. From QEMU point >> of view, the most natural way of handling this would be hypervisor >> implemented in the guest side (for example BIOS). Then the hypervisor >> would use normal IO (or virtio) to communicate with the host. If >> inside QEMU, the interface of the hypervisor to the devices needs some >> thought. We'd like to avoid ugly interfaces like vmmouse where a >> device probes CPU registers directly or spaghetti interfaces like >> APIC. > > I really don't follow what you're saying here. Running the hypervisor > in the guest, rather than emulating its effect in qemu seems like an > awful lot of complexity for no clear reason. Maybe it would be more complex but also emulation accuracy would be increased and the interfaces would be saner. We don't shortcut BIOS and implement its services to OS in QEMU for other machines either. I'd expect one problem with that approach though, the interface used on real HW between the hypervisor and the underlying HW may be undocumented, but then it could use for example existing virtio devices. One way to handle this could be to add the hypervisor interface now to QEMU and switch to guest hypervisor when (if) it becomes available. I'd just like to avoid duplication with virtio or messy interfaces like vmport.
On 13.02.2011, at 09:08, Blue Swirl wrote: > On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt > <benh@kernel.crashing.org> wrote: >> On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >>> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt >>> <benh@kernel.crashing.org> wrote: >>>> On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: >>>>> >>>>> Actually I don't quite understand the need for vty layer, why not use >>>>> the chardev here directly? >>>> >>>> I'm not sure what you mean here... >>> >>> Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c >>> instead of moving those to a separate file. >> >> Well, the VIO device instance gives the chardev instance which is all >> nicely encapsulated inside spapr-vty... Also VIO devices tend to have >> dedicated hcalls, not only VTY, so it makes a lot of sense to keep them >> close to the rest of the VIO driver they belong to don't you think ? >> >> (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've >> added to the core VIO but you'll see that when we get those patches >> ready, hopefully soon). > > This is a bit of a special case, much like semihosting modes for m68k > or ARM, or like MOL hacks which were removed recently. From QEMU point > of view, the most natural way of handling this would be hypervisor > implemented in the guest side (for example BIOS). Then the hypervisor > would use normal IO (or virtio) to communicate with the host. If > inside QEMU, the interface of the hypervisor to the devices needs some > thought. We'd like to avoid ugly interfaces like vmmouse where a > device probes CPU registers directly or spaghetti interfaces like > APIC. In this case I disagree. While the "emulate real hardware" case would be to have a full proprietary binary blob of firmware running in the guest that would handle all the hypervisor stuff, this is not feasible. Implementing PAPR in Qemu is hard (and slow) enough - doing it all emulated simply is overkill for what we're trying to achieve here. The PAPR interfaces are well specified (in the PAPR spec - seems to be power.org member restricted) and are the only thing you ever get to see on recent POWER hardware. The real hardware interface is simply inaccessible to you. It's basically the same as the S390 machine we have. On those machine we simply don't have access to real hw, so emulating it is moot. All interfaces that the OS sees are PV interfaces. > >> Actually, one thing I noticed is that the current patches David posted >> still have a single function with a switch/case statement for hcalls. >> >> I'm not 100% certain what David long term plans are here, but in our >> internal "WIP" tree, we've subsequently turned that into a mechanism >> where any module can call powerpc_register_hypercall() to add hcalls. >> >> So if David intends to move the "upstream candidate" tree in that >> direction, then naturally, the calls in spapr_hcall.c are going to >> disappear in favor of a pair of powerpc_register_hypercall() locally in >> the vty module. > > Is the interface new design, or are you implementing what is used also > on real HW? PAPR is the spec that defines the PV interface in use on POWER. Outside of IBM, nobody can run Linux on POWER without going through phyp which is their hypervisor. So this implements exactly what the OS sees on real HW :). Alex
On 13.02.2011, at 12:09, David Gibson wrote: > On Sat, Feb 12, 2011 at 05:47:53PM +0100, Alexander Graf wrote: >> On 12.02.2011, at 15:54, David Gibson wrote: > [snip] >>> @@ -267,6 +295,7 @@ static QEMUMachine spapr_machine = { >>> .desc = "pSeries Logical Partition (PAPR compliant)", >>> .init = ppc_spapr_init, >>> .max_cpus = 1, >>> + .no_parallel = 1, >> >> duplicate? > > Oops, rebasing mistake. Fixed now. > > [snip] >>> +VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg) >>> +{ >>> + DeviceState *qdev; >>> + VIOsPAPRDevice *dev = NULL; >>> + >>> + QLIST_FOREACH(qdev, &bus->bus.children, sibling) { >>> + dev = (VIOsPAPRDevice *)qdev; >>> + if (dev->reg == reg) >> >> Braces >> >>> + break; >>> + } >>> + >>> + return dev; >> >> What if the device doesn't exist? > > This returns NULL, the caller returns an error... Makes sense :). Alex
On 13.02.2011, at 12:14, David Gibson wrote: > On Sun, Feb 13, 2011 at 10:15:03AM +1100, Benjamin Herrenschmidt wrote: >> On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >>> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt > [snip] >> Actually, one thing I noticed is that the current patches David posted >> still have a single function with a switch/case statement for hcalls. >> >> I'm not 100% certain what David long term plans are here, but in our >> internal "WIP" tree, we've subsequently turned that into a mechanism >> where any module can call powerpc_register_hypercall() to add hcalls. >> >> So if David intends to move the "upstream candidate" tree in that >> direction, then naturally, the calls in spapr_hcall.c are going to >> disappear in favor of a pair of powerpc_register_hypercall() locally in >> the vty module. > > Ah, yeah. I'm still not sure what to do about it. I was going to > fold the dynamic hcall registration into the patch set before > upstreaming. But then something paulus said made me rethink whether > the dynamic registration was a good idea. Still need to sort this out > before the series is really ready. We can surely move it to dynamic later on. I think the "proper" way would be to populate a qdev bus and have the individual hypercall receivers register themselves through -device creations. But Blue really is the expert here :). Alex
On Sun, Feb 13, 2011 at 01:40:14PM +0100, Alexander Graf wrote: > > On 13.02.2011, at 12:14, David Gibson wrote: > > > On Sun, Feb 13, 2011 at 10:15:03AM +1100, Benjamin Herrenschmidt wrote: > >> On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: > >>> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt > > [snip] > >> Actually, one thing I noticed is that the current patches David posted > >> still have a single function with a switch/case statement for hcalls. > >> > >> I'm not 100% certain what David long term plans are here, but in our > >> internal "WIP" tree, we've subsequently turned that into a mechanism > >> where any module can call powerpc_register_hypercall() to add hcalls. > >> > >> So if David intends to move the "upstream candidate" tree in that > >> direction, then naturally, the calls in spapr_hcall.c are going to > >> disappear in favor of a pair of powerpc_register_hypercall() locally in > >> the vty module. > > > > Ah, yeah. I'm still not sure what to do about it. I was going to > > fold the dynamic hcall registration into the patch set before > > upstreaming. But then something paulus said made me rethink whether > > the dynamic registration was a good idea. Still need to sort this out > > before the series is really ready. > > We can surely move it to dynamic later on. I think the "proper" way > would be to populate a qdev bus and have the individual hypercall > receivers register themselves through -device creations. But Blue > really is the expert here :). Ok, not sure what you mean here. I already have a qdev bus for the VIO devices. With my tentative dynamic model as devices are created on the bus they may register hypercalls as well. Is that what you mean, or do you mean have a separate "hypercall" bus. That sounds like serious overkill for a simple number->function translation.
On Sun, Feb 13, 2011 at 2:31 PM, Alexander Graf <agraf@suse.de> wrote: > > On 13.02.2011, at 09:08, Blue Swirl wrote: > >> On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt >> <benh@kernel.crashing.org> wrote: >>> On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >>>> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt >>>> <benh@kernel.crashing.org> wrote: >>>>> On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: >>>>>> >>>>>> Actually I don't quite understand the need for vty layer, why not use >>>>>> the chardev here directly? >>>>> >>>>> I'm not sure what you mean here... >>>> >>>> Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c >>>> instead of moving those to a separate file. >>> >>> Well, the VIO device instance gives the chardev instance which is all >>> nicely encapsulated inside spapr-vty... Also VIO devices tend to have >>> dedicated hcalls, not only VTY, so it makes a lot of sense to keep them >>> close to the rest of the VIO driver they belong to don't you think ? >>> >>> (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've >>> added to the core VIO but you'll see that when we get those patches >>> ready, hopefully soon). >> >> This is a bit of a special case, much like semihosting modes for m68k >> or ARM, or like MOL hacks which were removed recently. From QEMU point >> of view, the most natural way of handling this would be hypervisor >> implemented in the guest side (for example BIOS). Then the hypervisor >> would use normal IO (or virtio) to communicate with the host. If >> inside QEMU, the interface of the hypervisor to the devices needs some >> thought. We'd like to avoid ugly interfaces like vmmouse where a >> device probes CPU registers directly or spaghetti interfaces like >> APIC. > > In this case I disagree. While the "emulate real hardware" case would be to have a full proprietary binary blob of firmware running in the guest that would handle all the hypervisor stuff, this is not feasible. Implementing PAPR in Qemu is hard (and slow) enough - doing it all emulated simply is overkill for what we're trying to achieve here. > > The PAPR interfaces are well specified (in the PAPR spec - seems to be power.org member restricted) and are the only thing you ever get to see on recent POWER hardware. The real hardware interface is simply inaccessible to you. Hah, I'm sure that if sufficiently motivated bunch of cool hackers got access to a truckload (or several, aren't these big machines?) of POWER hardware, which they could open and brick at leisure, like it is done with other firmware reverse engineering efforts, we'd have open hypervisor at no time. ;-) It's not impossible to imagine also IBM open sourcing theirs. > It's basically the same as the S390 machine we have. On those machine we simply don't have access to real hw, so emulating it is moot. All interfaces that the OS sees are PV interfaces. > >> >>> Actually, one thing I noticed is that the current patches David posted >>> still have a single function with a switch/case statement for hcalls. >>> >>> I'm not 100% certain what David long term plans are here, but in our >>> internal "WIP" tree, we've subsequently turned that into a mechanism >>> where any module can call powerpc_register_hypercall() to add hcalls. >>> >>> So if David intends to move the "upstream candidate" tree in that >>> direction, then naturally, the calls in spapr_hcall.c are going to >>> disappear in favor of a pair of powerpc_register_hypercall() locally in >>> the vty module. >> >> Is the interface new design, or are you implementing what is used also >> on real HW? > > PAPR is the spec that defines the PV interface in use on POWER. Outside of IBM, nobody can run Linux on POWER without going through phyp which is their hypervisor. > > So this implements exactly what the OS sees on real HW :). Right. As long as the resulting spaghetti is well contained, I'm OK with this approach. But should ever the millionaire hacker team (or IBM) appear with their open hypervisor, this shall make room for it.
On 13.02.2011, at 13:44, David Gibson wrote: > On Sun, Feb 13, 2011 at 01:40:14PM +0100, Alexander Graf wrote: >> >> On 13.02.2011, at 12:14, David Gibson wrote: >> >>> On Sun, Feb 13, 2011 at 10:15:03AM +1100, Benjamin Herrenschmidt wrote: >>>> On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >>>>> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt >>> [snip] >>>> Actually, one thing I noticed is that the current patches David posted >>>> still have a single function with a switch/case statement for hcalls. >>>> >>>> I'm not 100% certain what David long term plans are here, but in our >>>> internal "WIP" tree, we've subsequently turned that into a mechanism >>>> where any module can call powerpc_register_hypercall() to add hcalls. >>>> >>>> So if David intends to move the "upstream candidate" tree in that >>>> direction, then naturally, the calls in spapr_hcall.c are going to >>>> disappear in favor of a pair of powerpc_register_hypercall() locally in >>>> the vty module. >>> >>> Ah, yeah. I'm still not sure what to do about it. I was going to >>> fold the dynamic hcall registration into the patch set before >>> upstreaming. But then something paulus said made me rethink whether >>> the dynamic registration was a good idea. Still need to sort this out >>> before the series is really ready. >> >> We can surely move it to dynamic later on. I think the "proper" way >> would be to populate a qdev bus and have the individual hypercall >> receivers register themselves through -device creations. But Blue >> really is the expert here :). > > Ok, not sure what you mean here. I already have a qdev bus for the > VIO devices. With my tentative dynamic model as devices are created > on the bus they may register hypercalls as well. Oh, guess I just overlooked that then, sorry :). > Is that what you mean, or do you mean have a separate "hypercall" > bus. That sounds like serious overkill for a simple number->function > translation. Yup. Alex
On 02/13/2011 05:12 AM, David Gibson wrote: > On Sun, Feb 13, 2011 at 10:08:23AM +0200, Blue Swirl wrote: > >> On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt >> <benh@kernel.crashing.org> wrote: >> >>> On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >>> >>>> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt >>>> <benh@kernel.crashing.org> wrote: >>>> >>>>> On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: >>>>> >>>>>> Actually I don't quite understand the need for vty layer, why not use >>>>>> the chardev here directly? >>>>>> >>>>> I'm not sure what you mean here... >>>>> >>>> Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c >>>> instead of moving those to a separate file. >>>> >>> Well, the VIO device instance gives the chardev instance which is all >>> nicely encapsulated inside spapr-vty... Also VIO devices tend to have >>> dedicated hcalls, not only VTY, so it makes a lot of sense to keep them >>> close to the rest of the VIO driver they belong to don't you think ? >>> >>> (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've >>> added to the core VIO but you'll see that when we get those patches >>> ready, hopefully soon). >>> >> This is a bit of a special case, much like semihosting modes for m68k >> or ARM, or like MOL hacks which were removed recently. From QEMU point >> of view, the most natural way of handling this would be hypervisor >> implemented in the guest side (for example BIOS). Then the hypervisor >> would use normal IO (or virtio) to communicate with the host. If >> inside QEMU, the interface of the hypervisor to the devices needs some >> thought. We'd like to avoid ugly interfaces like vmmouse where a >> device probes CPU registers directly or spaghetti interfaces like >> APIC. >> > I really don't follow what you're saying here. Running the hypervisor > in the guest, rather than emulating its effect in qemu seems like an > awful lot of complexity for no clear reason. > In KVM for x86, instead of using a secondary interface (like vmmcall/vmcall), we do all of our paravirtualization using native hardware interfaces that we can trap (PIO/MMIO). IIUC, on Power, trapping MMIO is not possible due to the MMU mode that is currently used (PFs are delivered directly to the guest). So it's not really possible to switch from using hypercalls to using MMIO. What I would suggest is modelling hypercalls as another I/O address space for the processor. So instead of having a function pointer in the CPUState, introduce a: typedef void (HypercallFunc)(CPUState *env, void *opaque); /* register a hypercall handler */ void register_hypercall(target_ulong index, HypercallFunc *handler, void *opaque); void unregister_hypercall(target_ulong index); /* dispatch a hypercall */ void cpu_hypercall(CPUState *env, target_ulong index); This interface could also be used to implement hypercall based interfaces on s390 and x86. The arguments will have to be extracted from the CPU state but I don't think we'll really ever have common hypercall implementations anyway so that's not a huge problem. >> on real HW? >> > The interface already exists on real HW. It's described in the PAPR > document we keep mentioning. > Another thing to note is that the hypercall based I/O devices the interfaces that the current Power hypervisor uses so implementing this interface is necessary to support existing guests. Regards, Anthony Liguori
On 02/13/2011 06:40 AM, Alexander Graf wrote: > >> Ah, yeah. I'm still not sure what to do about it. I was going to >> fold the dynamic hcall registration into the patch set before >> upstreaming. But then something paulus said made me rethink whether >> the dynamic registration was a good idea. Still need to sort this out >> before the series is really ready. >> > We can surely move it to dynamic later on. I think the "proper" way would be to populate a qdev bus and have the individual hypercall receivers register themselves through -device creations. > Ignore the qdev bit for a moment. Hypercalls could be plausibly implemented as another I/O space from a processor so the thing to model off of would be PIO dispatch (cpu_outb and friends). From a qdev perspective, having a VIO bus makes sense. The details of which I/O spaces are uses are not as important from a device tree perspective. Regards, Anthony Liguori > Alex > > >
On 13.02.2011, at 16:08, Anthony Liguori wrote: > On 02/13/2011 05:12 AM, David Gibson wrote: >> On Sun, Feb 13, 2011 at 10:08:23AM +0200, Blue Swirl wrote: >> >>> On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt >>> <benh@kernel.crashing.org> wrote: >>> >>>> On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >>>> >>>>> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt >>>>> <benh@kernel.crashing.org> wrote: >>>>> >>>>>> On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: >>>>>> >>>>>>> Actually I don't quite understand the need for vty layer, why not use >>>>>>> the chardev here directly? >>>>>>> >>>>>> I'm not sure what you mean here... >>>>>> >>>>> Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c >>>>> instead of moving those to a separate file. >>>>> >>>> Well, the VIO device instance gives the chardev instance which is all >>>> nicely encapsulated inside spapr-vty... Also VIO devices tend to have >>>> dedicated hcalls, not only VTY, so it makes a lot of sense to keep them >>>> close to the rest of the VIO driver they belong to don't you think ? >>>> >>>> (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've >>>> added to the core VIO but you'll see that when we get those patches >>>> ready, hopefully soon). >>>> >>> This is a bit of a special case, much like semihosting modes for m68k >>> or ARM, or like MOL hacks which were removed recently. From QEMU point >>> of view, the most natural way of handling this would be hypervisor >>> implemented in the guest side (for example BIOS). Then the hypervisor >>> would use normal IO (or virtio) to communicate with the host. If >>> inside QEMU, the interface of the hypervisor to the devices needs some >>> thought. We'd like to avoid ugly interfaces like vmmouse where a >>> device probes CPU registers directly or spaghetti interfaces like >>> APIC. >>> >> I really don't follow what you're saying here. Running the hypervisor >> in the guest, rather than emulating its effect in qemu seems like an >> awful lot of complexity for no clear reason. >> > > In KVM for x86, instead of using a secondary interface (like vmmcall/vmcall), we do all of our paravirtualization using native hardware interfaces that we can trap (PIO/MMIO). > > IIUC, on Power, trapping MMIO is not possible due to the MMU mode that is currently used (PFs are delivered directly to the guest). > > So it's not really possible to switch from using hypercalls to using MMIO. > > What I would suggest is modelling hypercalls as another I/O address space for the processor. So instead of having a function pointer in the CPUState, introduce a: > > typedef void (HypercallFunc)(CPUState *env, void *opaque); > > /* register a hypercall handler */ > void register_hypercall(target_ulong index, HypercallFunc *handler, void *opaque); > void unregister_hypercall(target_ulong index); > > /* dispatch a hypercall */ > void cpu_hypercall(CPUState *env, target_ulong index); > > This interface could also be used to implement hypercall based interfaces on s390 and x86. > > The arguments will have to be extracted from the CPU state but I don't think we'll really ever have common hypercall implementations anyway so that's not a huge problem. Is there enough common ground between the hypercall interfaces that this makes sense? It sounds nice on paper, but in the end the "hypercall not implemented" return codes differ, the argument interpretation differs and even the amount of return values differ. So at the end of the day, all this interface could do is call a machine helper function to evaluate the hypercall id from its register state and dispatch to that, leaving the rest to the individual hypercall function implementation. The implementations themselves also couldn't be reused. A PAPR hypercall simply doesn't work on x86. PIO and MMIO interfaces make sense to share - devices implemented using them could potentially be reused later by other platforms. For the hypercall devices, that doesn't work. > >>> on real HW? >>> >> The interface already exists on real HW. It's described in the PAPR >> document we keep mentioning. >> > > Another thing to note is that the hypercall based I/O devices the interfaces that the current Power hypervisor uses so implementing this interface is necessary to support existing guests. Yes, implementing the existing PAPR interfaces is crucial to run existing guests. Implementing it at the hypercall level is required if we ever want to run it with hardware accelerated KVM on ppc, as there hypercalls simply get forwarded to the hypervisor (kvm) which would pass them on to qemu. And since the interface is not nesting capable, running a hypervisor inside the guest doesn't work. Alex
On Sun, 2011-02-13 at 10:08 +0200, Blue Swirl wrote: > This is a bit of a special case, much like semihosting modes for m68k > or ARM, or like MOL hacks which were removed recently. From QEMU point > of view, the most natural way of handling this would be hypervisor > implemented in the guest side (for example BIOS). Then the hypervisor > would use normal IO (or virtio) to communicate with the host. If > inside QEMU, the interface of the hypervisor to the devices needs some > thought. We'd like to avoid ugly interfaces like vmmouse where a > device probes CPU registers directly or spaghetti interfaces like > APIC. I'm not sure I understand your point. We are emulating a guest running under pHyp, ie, qemu in that case is the hypervisor, and provides the same interfaces as pHyp does (the IBM proprietary hypervisor, aka sPAPR). This is how we enable booting existing kernels and distributions inside qemu/kvm. > Is the interface new design, or are you implementing what is used also > on real HW? We are implementing a subset of the interfaces implemented by pHyp today on "real HW". In the long run, when you throw KVM into the picture, on machines (hypothetical machines at this stage) where one can run a KVM has a hypervisor (currently you can only run pHyp on all released IBM machines), this will give you an environment that is compatible with pHyp and thus supports existing distributions and kernels. We -will- add support for the "real" virtio on top of that, but those will require modified guest kernels (and will provide better performances than the sPAPR emulation). But the sPAPR emulation is a necessary step to enable existing stuff to run. Cheers, Ben.
On Sun, 2011-02-13 at 14:15 +0200, Blue Swirl wrote: > > Maybe it would be more complex but also emulation accuracy would be > increased and the interfaces would be saner. We don't shortcut BIOS > and implement its services to OS in QEMU for other machines either. But that is not comparable. BIOS is comparable for example to Open Firmware and we do not 'emulate' OF, we will provide an implementation that runs inside the guest, just like you do for BIOS (SLOF based, tho people are welcome to play with OpenBIOS if they want, but SLOF is what we will provide and support). In this case, we are talking about a hypervisor which is somewhat a different beast. Sure you -could- run it into the guest, I suppose, if emulation accuracy was your ultimate goal. That would entail at least the followings: - Implement support for the complete "hypervisor" mode inside qemu - Re-implement a complete hypervisor compatible with pHyp An enormous amount of work, for a result that would have low performances and about zero interest to anybody. The goal here is to provide a runtime environment for kernels and distributions that is -compatible- with sPAPR/pHyp to enable existing distributions to operate in KVM. > I'd expect one problem with that approach though, the interface used > on real HW between the hypervisor and the underlying HW may be > undocumented, but then it could use for example existing virtio > devices. But what would be the point ? > One way to handle this could be to add the hypervisor interface now to > QEMU and switch to guest hypervisor when (if) it becomes available. > I'd just like to avoid duplication with virtio or messy interfaces > like vmport. Again, what would be the point ? Eventually, KVM will be available as an "alternate" hypervisor to pHyp which I suppose one could run entirely inside qemu once we add support for the HV mode to it, and that would somewhat do what you describe but that isn't what we are trying to get at here. Cheers, Ben.
On Sun, 2011-02-13 at 13:40 +0100, Alexander Graf wrote: > > We can surely move it to dynamic later on. I think the "proper" way > would be to populate a qdev bus and have the individual hypercall > receivers register themselves through -device creations. But Blue > really is the expert here :). Why would you want to go through a bus for all hcalls ? (ie including the ones that aren't device related ?). That doesn't quite "tick" but I'm sure I'm missing part of the picture here :-) A simple dispatch table based approach is the most efficient and simple way to do that (after a switch/case) in my opinion, this is more/less what we have done internally, with a pair of calls for "modules" to register hcalls if they need to. The hcalls numbers are fixed and specified in sPAPR. BTW. We are still missing in this picture RTAS. I suppose David is still cleaning up those patches. Basically, we use a 5 instruction trampoline that calls a private h-call, the RTAS emulation is entirely in qemu. This has to be done that way for various reasons, but essentially RTAS under pHyp also more/less turns into private pHyp calls under the hood. Cheers, Ben.
On 02/13/2011 09:56 AM, Alexander Graf wrote: > >> This interface could also be used to implement hypercall based interfaces on s390 and x86. >> >> The arguments will have to be extracted from the CPU state but I don't think we'll really ever have common hypercall implementations anyway so that's not a huge problem. >> > Is there enough common ground between the hypercall interfaces that this makes sense? Between the dispatch, registration, and tracing, yes. > It sounds nice on paper, but in the end the "hypercall not implemented" return codes differ, the argument interpretation differs and even the amount of return values differ. > Yes, that's why we pass CPUState. But keep in mind, CPUState needs to be sync'd before and after the invocation. > So at the end of the day, all this interface could do is call a machine helper function to evaluate the hypercall id from its register state and dispatch to that, leaving the rest to the individual hypercall function implementation. > > The implementations themselves also couldn't be reused. A PAPR hypercall simply doesn't work on x86. PIO and MMIO interfaces make sense to share - devices implemented using them could potentially be reused later by other platforms. For the hypercall devices, that doesn't work. > Yes, which is why I'm not advocating trying to unmarshal the calls or anything like that. But the dispatch, registration, tracing, and CPU sync'ing bits can all be reused. Regards, Anthony Liguori
On 02/13/2011 10:07 AM, Benjamin Herrenschmidt wrote: > On Sun, 2011-02-13 at 10:08 +0200, Blue Swirl wrote: > >> This is a bit of a special case, much like semihosting modes for m68k >> or ARM, or like MOL hacks which were removed recently. From QEMU point >> of view, the most natural way of handling this would be hypervisor >> implemented in the guest side (for example BIOS). Then the hypervisor >> would use normal IO (or virtio) to communicate with the host. If >> inside QEMU, the interface of the hypervisor to the devices needs some >> thought. We'd like to avoid ugly interfaces like vmmouse where a >> device probes CPU registers directly or spaghetti interfaces like >> APIC. >> > I'm not sure I understand your point. We are emulating a guest running > under pHyp, ie, qemu in that case is the hypervisor, and provides the > same interfaces as pHyp does (the IBM proprietary hypervisor, aka > sPAPR). This is how we enable booting existing kernels and distributions > inside qemu/kvm. > > >> Is the interface new design, or are you implementing what is used also >> on real HW? >> > We are implementing a subset of the interfaces implemented by pHyp today > on "real HW". In the long run, when you throw KVM into the picture, on > machines (hypothetical machines at this stage) where one can run a KVM > has a hypervisor (currently you can only run pHyp on all released IBM > machines), this will give you an environment that is compatible with > pHyp and thus supports existing distributions and kernels. > We try very, very hard to make our paravirtualization look like real hardware. When the paravirtualization interfaces are already defined, we have no choice in supporting those interfaces although sometimes we try to push that to firmware (like with Xenner). It's better from a security PoV. But in this case, we have no choice but to implement the paravirtualization interfaces in QEMU because of the way the hardware works and because these interfaces are already well defined. Regards, Anthony Liguori > We -will- add support for the "real" virtio on top of that, but those > will require modified guest kernels (and will provide better > performances than the sPAPR emulation). But the sPAPR emulation is a > necessary step to enable existing stuff to run. > > Cheers, > Ben. > > >
On 02/13/2011 10:17 AM, Benjamin Herrenschmidt wrote: > On Sun, 2011-02-13 at 13:40 +0100, Alexander Graf wrote: > >> We can surely move it to dynamic later on. I think the "proper" way >> would be to populate a qdev bus and have the individual hypercall >> receivers register themselves through -device creations. But Blue >> really is the expert here :). >> > Why would you want to go through a bus for all hcalls ? (ie including > the ones that aren't device related ?). That doesn't quite "tick" but > I'm sure I'm missing part of the picture here :-) > A virtual bus is just an interface. If all virtual devices that interact via hcalls would all reside on the same virtual bus, then having hypercalls registered through that interface makes sense because you can associate hypercalls with particular devices. This means that you can automatically deregister on device removal and things like that. But I don't think this will work out well. I think treating the hypercalls as a simple dispatch table just like ioport would make sense. Regards, Anthony Liguori > A simple dispatch table based approach is the most efficient and simple > way to do that (after a switch/case) in my opinion, this is more/less > what we have done internally, with a pair of calls for "modules" to > register hcalls if they need to. The hcalls numbers are fixed and > specified in sPAPR. > > BTW. We are still missing in this picture RTAS. I suppose David is still > cleaning up those patches. Basically, we use a 5 instruction trampoline > that calls a private h-call, the RTAS emulation is entirely in qemu. > This has to be done that way for various reasons, but essentially RTAS > under pHyp also more/less turns into private pHyp calls under the hood. > > Cheers, > Ben. > > > >
On Sun, 2011-02-13 at 10:48 -0600, Anthony Liguori wrote: > > We try very, very hard to make our paravirtualization look like real > hardware. Sure, that makes sense when you invent new paravirt interfaces, but that isn't the case. Note also that our current processors do not have the ability to emulate MMIOs in all cases, ie, when doing "real" KVM in HV mode, we cannot trap MMIO unless we redirect all page faults to the hypervisor, which comes at a cost. > When the paravirtualization interfaces are already defined, we have no > choice in supporting those interfaces although sometimes we try to > push that to firmware (like with Xenner). It's better from a > security PoV. > > But in this case, we have no choice but to implement the > paravirtualization interfaces in QEMU because of the way the hardware > works and because these interfaces are already well defined. Right. Now, in the long run, we might decide to "reflect" some of these back into some guest co-located FW or whatever of that kind, especially if we get to a point where linux virt-io becomes more prevalent and the sPAPR style IOs become purely legacy backward compat stubs, but we aren't there yet. Cheers, Ben.
On Sun, 2011-02-13 at 10:52 -0600, Anthony Liguori wrote: > > A virtual bus is just an interface. If all virtual devices that > interact via hcalls would all reside on the same virtual bus, then > having hypercalls registered through that interface makes sense > because > you can associate hypercalls with particular devices. This means > that > you can automatically deregister on device removal and things like > that. I see. Well, VIO related h-calls are only part of the picture here, I think we can live with having explicit de-registration if needed ;-) Besides the h-call is still implemented even if no device -instance- is currently plugged into the partition anyways. It just returns a (well defined per-hcall) error code if the instance handle passed to it is bogus. > But I don't think this will work out well. I think treating the > hypercalls as a simple dispatch table just like ioport would make > sense. Yup. Cheers, Ben.
On Sun, Feb 13, 2011 at 5:08 PM, Anthony Liguori <anthony@codemonkey.ws> wrote: > On 02/13/2011 05:12 AM, David Gibson wrote: >> >> On Sun, Feb 13, 2011 at 10:08:23AM +0200, Blue Swirl wrote: >> >>> >>> On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt >>> <benh@kernel.crashing.org> wrote: >>> >>>> >>>> On Sun, 2011-02-13 at 00:52 +0200, Blue Swirl wrote: >>>> >>>>> >>>>> On Sat, Feb 12, 2011 at 11:00 PM, Benjamin Herrenschmidt >>>>> <benh@kernel.crashing.org> wrote: >>>>> >>>>>> >>>>>> On Sat, 2011-02-12 at 18:59 +0200, Blue Swirl wrote: >>>>>> >>>>>>> >>>>>>> Actually I don't quite understand the need for vty layer, why not use >>>>>>> the chardev here directly? >>>>>>> >>>>>> >>>>>> I'm not sure what you mean here... >>>>>> >>>>> >>>>> Maybe it would be reasonable to leave h_put_term_char to spapr_hcall.c >>>>> instead of moving those to a separate file. >>>>> >>>> >>>> Well, the VIO device instance gives the chardev instance which is all >>>> nicely encapsulated inside spapr-vty... Also VIO devices tend to have >>>> dedicated hcalls, not only VTY, so it makes a lot of sense to keep them >>>> close to the rest of the VIO driver they belong to don't you think ? >>>> >>>> (Actually veth does, vscsi uses the more "generic" CRQ stuff which we've >>>> added to the core VIO but you'll see that when we get those patches >>>> ready, hopefully soon). >>>> >>> >>> This is a bit of a special case, much like semihosting modes for m68k >>> or ARM, or like MOL hacks which were removed recently. From QEMU point >>> of view, the most natural way of handling this would be hypervisor >>> implemented in the guest side (for example BIOS). Then the hypervisor >>> would use normal IO (or virtio) to communicate with the host. If >>> inside QEMU, the interface of the hypervisor to the devices needs some >>> thought. We'd like to avoid ugly interfaces like vmmouse where a >>> device probes CPU registers directly or spaghetti interfaces like >>> APIC. >>> >> >> I really don't follow what you're saying here. Running the hypervisor >> in the guest, rather than emulating its effect in qemu seems like an >> awful lot of complexity for no clear reason. >> > > In KVM for x86, instead of using a secondary interface (like > vmmcall/vmcall), we do all of our paravirtualization using native hardware > interfaces that we can trap (PIO/MMIO). > > IIUC, on Power, trapping MMIO is not possible due to the MMU mode that is > currently used (PFs are delivered directly to the guest). > > So it's not really possible to switch from using hypercalls to using MMIO. > > What I would suggest is modelling hypercalls as another I/O address space > for the processor. So instead of having a function pointer in the CPUState, > introduce a: > > typedef void (HypercallFunc)(CPUState *env, void *opaque); > > /* register a hypercall handler */ > void register_hypercall(target_ulong index, HypercallFunc *handler, void > *opaque); > void unregister_hypercall(target_ulong index); > > /* dispatch a hypercall */ > void cpu_hypercall(CPUState *env, target_ulong index); > > This interface could also be used to implement hypercall based interfaces on > s390 and x86. > > The arguments will have to be extracted from the CPU state but I don't think > we'll really ever have common hypercall implementations anyway so that's not > a huge problem. Nice idea. Then the part handling CPUState probably should belong to target-ppc/ rather than hw/.
On 02/13/2011 12:29 PM, Blue Swirl wrote: > On Sun, Feb 13, 2011 at 5:08 PM, Anthony Liguori<anthony@codemonkey.ws> wrote: > >> >> In KVM for x86, instead of using a secondary interface (like >> vmmcall/vmcall), we do all of our paravirtualization using native hardware >> interfaces that we can trap (PIO/MMIO). >> >> IIUC, on Power, trapping MMIO is not possible due to the MMU mode that is >> currently used (PFs are delivered directly to the guest). >> >> So it's not really possible to switch from using hypercalls to using MMIO. >> >> What I would suggest is modelling hypercalls as another I/O address space >> for the processor. So instead of having a function pointer in the CPUState, >> introduce a: >> >> typedef void (HypercallFunc)(CPUState *env, void *opaque); >> >> /* register a hypercall handler */ >> void register_hypercall(target_ulong index, HypercallFunc *handler, void >> *opaque); >> void unregister_hypercall(target_ulong index); >> >> /* dispatch a hypercall */ >> void cpu_hypercall(CPUState *env, target_ulong index); >> >> This interface could also be used to implement hypercall based interfaces on >> s390 and x86. >> >> The arguments will have to be extracted from the CPU state but I don't think >> we'll really ever have common hypercall implementations anyway so that's not >> a huge problem. >> > Nice idea. Then the part handling CPUState probably should belong to > target-ppc/ rather than hw/. > Would be nice to have the target-ppc/ hypercall handlers call into a higher level VIO interface that didn't deal directly with CPUState. The actual hardware emulation would then be implemented in hw/ and would not be compiled for a specific target. That helps avoid future sloppiness. Regards, Anthony Liguori
On Sun, Feb 13, 2011 at 09:08:22AM -0600, Anthony Liguori wrote: > On 02/13/2011 05:12 AM, David Gibson wrote: > >On Sun, Feb 13, 2011 at 10:08:23AM +0200, Blue Swirl wrote: > >>On Sun, Feb 13, 2011 at 1:15 AM, Benjamin Herrenschmidt [snip] > In KVM for x86, instead of using a secondary interface (like > vmmcall/vmcall), we do all of our paravirtualization using native > hardware interfaces that we can trap (PIO/MMIO). > > IIUC, on Power, trapping MMIO is not possible due to the MMU mode > that is currently used (PFs are delivered directly to the guest). > > So it's not really possible to switch from using hypercalls to using MMIO. That's correct. > What I would suggest is modelling hypercalls as another I/O address > space for the processor. So instead of having a function pointer in > the CPUState, introduce a: > > typedef void (HypercallFunc)(CPUState *env, void *opaque); > > /* register a hypercall handler */ > void register_hypercall(target_ulong index, HypercallFunc *handler, > void *opaque); > void unregister_hypercall(target_ulong index); > > /* dispatch a hypercall */ > void cpu_hypercall(CPUState *env, target_ulong index); Well, I can certainly implement dynamic registration - in fact I've done that, I just need to fold it into the earlier part of the patch series. But the only "address" we have for this hypercall address space is the hypercall number, and it's not architected where that will be supplied. So we'd still need a per-platform hook to extract the index from the CPUState. > This interface could also be used to implement hypercall based > interfaces on s390 and x86. > > The arguments will have to be extracted from the CPU state but I > don't think we'll really ever have common hypercall implementations > anyway so that's not a huge problem. > > >>on real HW? > >The interface already exists on real HW. It's described in the PAPR > >document we keep mentioning. > > Another thing to note is that the hypercall based I/O devices the > interfaces that the current Power hypervisor uses so implementing > this interface is necessary to support existing guests.
On Sun, Feb 13, 2011 at 08:29:05PM +0200, Blue Swirl wrote: > On Sun, Feb 13, 2011 at 5:08 PM, Anthony Liguori <anthony@codemonkey.ws> wrote: > > On 02/13/2011 05:12 AM, David Gibson wrote: [snip] > > The arguments will have to be extracted from the CPU state but I don't think > > we'll really ever have common hypercall implementations anyway so that's not > > a huge problem. > > Nice idea. Then the part handling CPUState probably should belong to > target-ppc/ rather than hw/. Doesn't work. Different hypervisors may have arguments - even the hcall number itself - arranged differently in the registers. My earlier drafts had this in target-ppc/; I moved it to hw/ for a reason.
diff --git a/Makefile.target b/Makefile.target index e0796ba..fe232da 100644 --- a/Makefile.target +++ b/Makefile.target @@ -232,7 +232,8 @@ obj-ppc-y += ppc_oldworld.o # NewWorld PowerMac obj-ppc-y += ppc_newworld.o # IBM pSeries (sPAPR) -obj-ppc-y += spapr.o spapr_hcall.o +obj-ppc-y += spapr.o spapr_hcall.o spapr_vio.o +obj-ppc-y += spapr_vty.o # PowerPC 4xx boards obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o obj-ppc-y += ppc440.o ppc440_bamboo.o diff --git a/hw/spapr.c b/hw/spapr.c index 8aca4e0..da61061 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -37,6 +37,7 @@ #include "net.h" #include "blockdev.h" #include "hw/spapr.h" +#include "hw/spapr_vio.h" #include <libfdt.h> @@ -49,6 +50,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, const char *cpu_model, CPUState *envs[], + sPAPREnvironment *spapr, target_phys_addr_t initrd_base, target_phys_addr_t initrd_size, const char *kernel_cmdline) @@ -59,6 +61,7 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); int i; char *modelname; + int ret; #define _FDT(exp) \ do { \ @@ -151,9 +154,28 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize, _FDT((fdt_end_node(fdt))); + /* vdevice */ + _FDT((fdt_begin_node(fdt, "vdevice"))); + + _FDT((fdt_property_string(fdt, "device_type", "vdevice"))); + _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice"))); + _FDT((fdt_property_cell(fdt, "#address-cells", 0x1))); + _FDT((fdt_property_cell(fdt, "#size-cells", 0x0))); + + _FDT((fdt_end_node(fdt))); + _FDT((fdt_end_node(fdt))); /* close root node */ _FDT((fdt_finish(fdt))); + /* re-expand to allow for further tweaks */ + _FDT((fdt_open_into(fdt, fdt, FDT_MAX_SIZE))); + + ret = spapr_populate_vdevice(spapr->vio_bus, fdt); + if (ret < 0) + fprintf(stderr, "couldn't setup vio devices in fdt\n"); + + _FDT((fdt_pack(fdt))); + if (fdt_size) *fdt_size = fdt_totalsize(fdt); @@ -211,6 +233,12 @@ static void ppc_spapr_init (ram_addr_t ram_size, ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", ram_size); cpu_register_physical_memory(0, ram_size, ram_offset); + spapr->vio_bus = spapr_vio_bus_init(); + + for (i = 0; i < MAX_SERIAL_PORTS; i++) + if (serial_hds[i]) + spapr_vty_create(spapr->vio_bus, i, serial_hds[i]); + if (kernel_filename) { uint64_t lowaddr = 0; @@ -242,7 +270,7 @@ static void ppc_spapr_init (ram_addr_t ram_size, } /* load fdt */ - fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, &env, + fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, &env, spapr, initrd_base, initrd_size, kernel_cmdline); if (!fdt) { @@ -267,6 +295,7 @@ static QEMUMachine spapr_machine = { .desc = "pSeries Logical Partition (PAPR compliant)", .init = ppc_spapr_init, .max_cpus = 1, + .no_parallel = 1, .no_vga = 1, .no_parallel = 1, }; diff --git a/hw/spapr.h b/hw/spapr.h index dae9617..168511f 100644 --- a/hw/spapr.h +++ b/hw/spapr.h @@ -1,7 +1,10 @@ #if !defined (__HW_SPAPR_H__) #define __HW_SPAPR_H__ +struct VIOsPAPRBus; + typedef struct sPAPREnvironment { + struct VIOsPAPRBus *vio_bus; } sPAPREnvironment; #define H_SUCCESS 0 @@ -237,4 +240,11 @@ typedef struct sPAPREnvironment { target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, target_ulong token, target_ulong *args); +target_ulong h_put_term_char(sPAPREnvironment *spapr, + target_ulong termno, target_ulong len, + target_ulong char0_7, target_ulong char8_15); +target_ulong h_get_term_char(sPAPREnvironment *spapr, + target_ulong termno, target_ulong *len, + target_ulong *char0_7, target_ulong *char8_15); + #endif /* !defined (__HW_SPAPR_H__) */ diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c index c99c345..e2ed9cf 100644 --- a/hw/spapr_hcall.c +++ b/hw/spapr_hcall.c @@ -3,19 +3,6 @@ #include "qemu-char.h" #include "hw/spapr.h" -static target_ulong h_put_term_char(target_ulong termno, target_ulong len, - target_ulong char0_7, target_ulong char8_15) -{ - uint8_t buf[16]; - - *((uint64_t *)buf) = cpu_to_be64(char0_7); - *((uint64_t *)buf + 1) = cpu_to_be64(char8_15); - - qemu_chr_write(serial_hds[0], buf, len); - - return 0; -} - target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, target_ulong token, target_ulong *args) { @@ -29,7 +16,11 @@ target_ulong spapr_hypercall(CPUState *env, sPAPREnvironment *spapr, switch (token) { case H_PUT_TERM_CHAR: - r = h_put_term_char(args[0], args[1], args[2], args[3]); + r = h_put_term_char(spapr, args[0], args[1], args[2], args[3]); + break; + + case H_GET_TERM_CHAR: + r = h_get_term_char(spapr, args[0], &args[0], &args[1], &args[2]); break; default: diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c new file mode 100644 index 0000000..d9c7292 --- /dev/null +++ b/hw/spapr_vio.c @@ -0,0 +1,191 @@ +/* + * QEMU sPAPR VIO code + * + * Copyright (c) 2010 David Gibson, IBM Corporation <david@gibson.dropbear.id.au> + * Based on the s390 virtio bus code: + * Copyright (c) 2009 Alexander Graf <agraf@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw.h" +#include "sysemu.h" +#include "boards.h" +#include "monitor.h" +#include "loader.h" +#include "elf.h" +#include "hw/sysbus.h" +#include "kvm.h" +#include "device_tree.h" + +#include "hw/spapr.h" +#include "hw/spapr_vio.h" + +#ifdef CONFIG_FDT +#include <libfdt.h> +#endif /* CONFIG_FDT */ + +/* #define DEBUG_SPAPR */ + +#ifdef DEBUG_SPAPR +#define dprintf(fmt, ...) \ + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) +#else +#define dprintf(fmt, ...) \ + do { } while (0) +#endif + +struct BusInfo spapr_vio_bus_info = { + .name = "spapr-vio", + .size = sizeof(VIOsPAPRBus), +}; + +VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg) +{ + DeviceState *qdev; + VIOsPAPRDevice *dev = NULL; + + QLIST_FOREACH(qdev, &bus->bus.children, sibling) { + dev = (VIOsPAPRDevice *)qdev; + if (dev->reg == reg) + break; + } + + return dev; +} + +VIOsPAPRBus *spapr_vio_bus_init(void) +{ + VIOsPAPRBus *bus; + BusState *_bus; + DeviceState *dev; + + /* Create bridge device */ + dev = qdev_create(NULL, "spapr-vio-bridge"); + qdev_init_nofail(dev); + + /* Create bus on bridge device */ + + _bus = qbus_create(&spapr_vio_bus_info, dev, "spapr-vio"); + bus = DO_UPCAST(VIOsPAPRBus, bus, _bus); + + return bus; +} + +#ifdef CONFIG_FDT +static int vio_make_devnode(VIOsPAPRDevice *dev, + void *fdt) +{ + VIOsPAPRDeviceInfo *info = (VIOsPAPRDeviceInfo *)dev->qdev.info; + int vdevice_off, node_off; + int ret; + + vdevice_off = fdt_path_offset(fdt, "/vdevice"); + if (vdevice_off < 0) + return vdevice_off; + + node_off = fdt_add_subnode(fdt, vdevice_off, dev->qdev.id); + if (node_off < 0) + return node_off; + + ret = fdt_setprop_cell(fdt, node_off, "reg", dev->reg); + if (ret < 0) + return ret; + + if (info->dt_type) { + ret = fdt_setprop_string(fdt, node_off, "device_type", + info->dt_type); + if (ret < 0) + return ret; + } + + if (info->dt_compatible) { + ret = fdt_setprop_string(fdt, node_off, "compatible", + info->dt_compatible); + if (ret < 0) + return ret; + } + + if (info->devnode) { + ret = (info->devnode)(dev, fdt, node_off); + if (ret < 0) + return ret; + } + + return node_off; +} +#endif /* CONFIG_FDT */ + +static int spapr_vio_busdev_init(DeviceState *dev, DeviceInfo *info) +{ + VIOsPAPRDeviceInfo *_info = (VIOsPAPRDeviceInfo *)info; + VIOsPAPRDevice *_dev = (VIOsPAPRDevice *)dev; + char *id; + + if (asprintf(&id, "%s@%x", _info->dt_name, _dev->reg) < 0) + return -1; + + _dev->qdev.id = id; + + return _info->init(_dev); +} + +void spapr_vio_bus_register_withprop(VIOsPAPRDeviceInfo *info) +{ + info->qdev.init = spapr_vio_busdev_init; + info->qdev.bus_info = &spapr_vio_bus_info; + + assert(info->qdev.size >= sizeof(VIOsPAPRDevice)); + qdev_register(&info->qdev); +} + +static int spapr_vio_bridge_init(SysBusDevice *dev) +{ + /* nothing */ + return 0; +} + +static SysBusDeviceInfo spapr_vio_bridge_info = { + .init = spapr_vio_bridge_init, + .qdev.name = "spapr-vio-bridge", + .qdev.size = sizeof(SysBusDevice), + .qdev.no_user = 1, +}; + +static void spapr_vio_register_devices(void) +{ + sysbus_register_withprop(&spapr_vio_bridge_info); +} + +device_init(spapr_vio_register_devices) + +#ifdef CONFIG_FDT + +int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt) +{ + DeviceState *qdev; + int ret = 0; + + QLIST_FOREACH(qdev, &bus->bus.children, sibling) { + VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev; + + ret = vio_make_devnode(dev, fdt); + + if (ret < 0) + return ret; + } + + return 0; +} +#endif /* CONFIG_FDT */ diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h new file mode 100644 index 0000000..fb5e301 --- /dev/null +++ b/hw/spapr_vio.h @@ -0,0 +1,49 @@ +#ifndef _HW_SPAPR_VIO_H +#define _HW_SPAPR_VIO_H +/* + * QEMU sPAPR VIO bus definitions + * + * Copyright (c) 2010 David Gibson, IBM Corporation <david@gibson.dropbear.id.au> + * Based on the s390 virtio bus definitions: + * Copyright (c) 2009 Alexander Graf <agraf@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +typedef struct VIOsPAPRDevice { + DeviceState qdev; + uint32_t reg; +} VIOsPAPRDevice; + +typedef struct VIOsPAPRBus { + BusState bus; +} VIOsPAPRBus; + +typedef struct { + DeviceInfo qdev; + const char *dt_name, *dt_type, *dt_compatible; + int (*init)(VIOsPAPRDevice *dev); + int (*devnode)(VIOsPAPRDevice *dev, void *fdt, int node_off); +} VIOsPAPRDeviceInfo; + +extern VIOsPAPRBus *spapr_vio_bus_init(void); +extern VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg); +extern void spapr_vio_bus_register_withprop(VIOsPAPRDeviceInfo *info); +extern int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt); + +void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len); +void spapr_vty_create(VIOsPAPRBus *bus, + uint32_t reg, CharDriverState *chardev); + +#endif /* _HW_SPAPR_VIO_H */ diff --git a/hw/spapr_vty.c b/hw/spapr_vty.c new file mode 100644 index 0000000..9a2dc0b --- /dev/null +++ b/hw/spapr_vty.c @@ -0,0 +1,132 @@ +#include "qdev.h" +#include "qemu-char.h" +#include "hw/spapr.h" +#include "hw/spapr_vio.h" + +#define VTERM_BUFSIZE 16 + +typedef struct VIOsPAPRVTYDevice { + VIOsPAPRDevice sdev; + CharDriverState *chardev; + uint32_t in, out; + uint8_t buf[VTERM_BUFSIZE]; +} VIOsPAPRVTYDevice; + +static int vty_can_receive(void *opaque) +{ + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)opaque; + + return (dev->in - dev->out) < VTERM_BUFSIZE; +} + +static void vty_receive(void *opaque, const uint8_t *buf, int size) +{ + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)opaque; + int i; + + for (i = 0; i < size; i++) { + assert((dev->in - dev->out) < VTERM_BUFSIZE); + dev->buf[dev->in++ % VTERM_BUFSIZE] = buf[i]; + } +} + +static int vty_getchars(VIOsPAPRDevice *sdev, uint8_t *buf, int max) +{ + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; + int n = 0; + + while ((n < max) && (dev->out != dev->in)) + buf[n++] = dev->buf[dev->out++ % VTERM_BUFSIZE]; + + return n; +} + +void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len) +{ + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; + + /* FIXME: should check the qemu_chr_write() return value */ + qemu_chr_write(dev->chardev, buf, len); +} + +static int spapr_vty_init(VIOsPAPRDevice *sdev) +{ + VIOsPAPRVTYDevice *dev = (VIOsPAPRVTYDevice *)sdev; + + qemu_chr_add_handlers(dev->chardev, vty_can_receive, + vty_receive, NULL, dev); + + return 0; +} + +target_ulong h_put_term_char(sPAPREnvironment *spapr, + target_ulong termno, target_ulong len, + target_ulong char0_7, target_ulong char8_15) +{ + VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, termno); + uint8_t buf[16]; + + if (!sdev) + return H_PARAMETER; + + if (len > 16) + return H_PARAMETER; + + *((uint64_t *)buf) = cpu_to_be64(char0_7); + *((uint64_t *)buf + 1) = cpu_to_be64(char8_15); + + vty_putchars(sdev, buf, len); + + return 0; +} + +target_ulong h_get_term_char(sPAPREnvironment *spapr, + target_ulong termno, target_ulong *len, + target_ulong *char0_7, target_ulong *char8_15) +{ + VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, termno); + uint8_t buf[16]; + + if (!sdev) + return H_PARAMETER; + + *len = vty_getchars(sdev, buf, sizeof(buf)); + if (*len < 16) + memset(buf + *len, 0, 16 - *len); + + *char0_7 = be64_to_cpu(*((uint64_t *)buf)); + *char8_15 = be64_to_cpu(*((uint64_t *)buf + 1)); + + return H_SUCCESS; +} + +void spapr_vty_create(VIOsPAPRBus *bus, + uint32_t reg, CharDriverState *chardev) +{ + DeviceState *dev; + + dev = qdev_create(&bus->bus, "spapr-vty"); + qdev_prop_set_uint32(dev, "reg", reg); + qdev_prop_set_chr(dev, "chardev", chardev); + qdev_init_nofail(dev); +} + +static VIOsPAPRDeviceInfo spapr_vty = { + .init = spapr_vty_init, + .dt_name = "vty", + .dt_type = "serial", + .dt_compatible = "hvterm1", + .qdev.name = "spapr-vty", + .qdev.size = sizeof(VIOsPAPRVTYDevice), + .qdev.props = (Property[]) { + DEFINE_PROP_UINT32("reg", VIOsPAPRDevice, reg, 0), + DEFINE_PROP_CHR("chardev", VIOsPAPRVTYDevice, chardev), + DEFINE_PROP_END_OF_LIST(), + }, +}; + +static void spapr_vty_register(void) +{ + spapr_vio_bus_register_withprop(&spapr_vty); +} +device_init(spapr_vty_register);
This extends the "pseries" (PAPR) machine to include a virtual IO bus supporting the PAPR defined hypercall based virtual IO mechanisms. So far only one VIO device is provided, the vty / vterm, providing a full console (polled only, for now). Signed-off-by: David Gibson <dwg@au1.ibm.com> --- Makefile.target | 3 +- hw/spapr.c | 31 +++++++++- hw/spapr.h | 10 +++ hw/spapr_hcall.c | 19 ++---- hw/spapr_vio.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/spapr_vio.h | 49 ++++++++++++++ hw/spapr_vty.c | 132 +++++++++++++++++++++++++++++++++++++ 7 files changed, 419 insertions(+), 16 deletions(-) create mode 100644 hw/spapr_vio.c create mode 100644 hw/spapr_vio.h create mode 100644 hw/spapr_vty.c