diff mbox

[V9,3/8] Introduce HostPCIDevice to access a pci device on the host.

Message ID 1332354545-17044-4-git-send-email-anthony.perard@citrix.com
State New
Headers show

Commit Message

Anthony PERARD March 21, 2012, 6:29 p.m. UTC
Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 Makefile.target      |    3 +
 hw/host-pci-device.c |  278 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/host-pci-device.h |   75 ++++++++++++++
 3 files changed, 356 insertions(+), 0 deletions(-)
 create mode 100644 hw/host-pci-device.c
 create mode 100644 hw/host-pci-device.h

Comments

Michael S. Tsirkin March 21, 2012, 6:32 p.m. UTC | #1
On Wed, Mar 21, 2012 at 06:29:00PM +0000, Anthony PERARD wrote:
> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
> Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

Host is really linux sysfs, right?
Better reflect this in naming.
Or maybe even xen-host-pci-device?

> ---
>  Makefile.target      |    3 +
>  hw/host-pci-device.c |  278 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/host-pci-device.h |   75 ++++++++++++++
>  3 files changed, 356 insertions(+), 0 deletions(-)
>  create mode 100644 hw/host-pci-device.c
>  create mode 100644 hw/host-pci-device.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index 63cf769..0ccfd5b 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -232,6 +232,9 @@ obj-$(CONFIG_NO_XEN) += xen-stub.o
>  
>  obj-i386-$(CONFIG_XEN) += xen_platform.o
>  
> +# Xen PCI Passthrough
> +obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o
> +
>  # Inter-VM PCI shared memory
>  CONFIG_IVSHMEM =
>  ifeq ($(CONFIG_KVM), y)
> diff --git a/hw/host-pci-device.c b/hw/host-pci-device.c
> new file mode 100644
> index 0000000..3dacb30
> --- /dev/null
> +++ b/hw/host-pci-device.c
> @@ -0,0 +1,278 @@
> +/*
> + * Copyright (C) 2011       Citrix Ltd.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include "host-pci-device.h"
> +
> +#define PCI_MAX_EXT_CAP \
> +    ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 4))
> +
> +enum error_code {
> +    ERROR_SYNTAX = 1,
> +};
> +
> +static int path_to(const HostPCIDevice *d,
> +                   const char *name, char *buf, ssize_t size)
> +{
> +    return snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%x/%s",
> +                    d->domain, d->bus, d->dev, d->func, name);
> +}
> +
> +static int get_resource(HostPCIDevice *d)
> +{
> +    int i, rc = 0;
> +    FILE *f;
> +    char path[PATH_MAX];
> +    unsigned long long start, end, flags, size;
> +
> +    path_to(d, "resource", path, sizeof (path));
> +    f = fopen(path, "r");
> +    if (!f) {
> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
> +        return -errno;
> +    }
> +
> +    for (i = 0; i < PCI_NUM_REGIONS; i++) {
> +        if (fscanf(f, "%llx %llx %llx", &start, &end, &flags) != 3) {
> +            fprintf(stderr, "Error: Syntax error in %s\n", path);
> +            rc = ERROR_SYNTAX;
> +            break;
> +        }
> +        if (start) {
> +            size = end - start + 1;
> +        } else {
> +            size = 0;
> +        }
> +
> +        if (i < PCI_ROM_SLOT) {
> +            d->io_regions[i].base_addr = start;
> +            d->io_regions[i].size = size;
> +            d->io_regions[i].flags = flags;
> +        } else {
> +            d->rom.base_addr = start;
> +            d->rom.size = size;
> +            d->rom.flags = flags;
> +        }
> +    }
> +
> +    fclose(f);
> +    return rc;
> +}
> +
> +static int get_hex_value(HostPCIDevice *d, const char *name,
> +                         unsigned long *pvalue)
> +{
> +    char path[PATH_MAX];
> +    FILE *f;
> +    unsigned long value;
> +
> +    path_to(d, name, path, sizeof (path));
> +    f = fopen(path, "r");
> +    if (!f) {
> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
> +        return -errno;
> +    }
> +    if (fscanf(f, "%lx\n", &value) != 1) {
> +        fprintf(stderr, "Error: Syntax error in %s\n", path);
> +        fclose(f);
> +        return ERROR_SYNTAX;
> +    }
> +    fclose(f);
> +    *pvalue = value;
> +    return 0;
> +}
> +
> +static bool pci_dev_is_virtfn(HostPCIDevice *d)
> +{
> +    char path[PATH_MAX];
> +    struct stat buf;
> +
> +    path_to(d, "physfn", path, sizeof (path));
> +    return !stat(path, &buf);
> +}
> +
> +static int host_pci_config_fd(HostPCIDevice *d)
> +{
> +    char path[PATH_MAX];
> +
> +    if (d->config_fd < 0) {
> +        path_to(d, "config", path, sizeof (path));
> +        d->config_fd = open(path, O_RDWR);
> +        if (d->config_fd < 0) {
> +            fprintf(stderr, "HostPCIDevice: Can not open '%s': %s\n",
> +                    path, strerror(errno));
> +        }
> +    }
> +    return d->config_fd;
> +}
> +static int host_pci_config_read(HostPCIDevice *d, int pos, void *buf, int len)
> +{
> +    int fd = host_pci_config_fd(d);
> +    int res = 0;
> +
> +again:
> +    res = pread(fd, buf, len, pos);
> +    if (res != len) {
> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;
> +        }
> +        fprintf(stderr, "%s: read failed: %s (fd: %i)\n",
> +                __func__, strerror(errno), fd);
> +        return -errno;
> +    }
> +    return 0;
> +}
> +static int host_pci_config_write(HostPCIDevice *d,
> +                                 int pos, const void *buf, int len)
> +{
> +    int fd = host_pci_config_fd(d);
> +    int res = 0;
> +
> +again:
> +    res = pwrite(fd, buf, len, pos);
> +    if (res != len) {
> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;
> +        }
> +        fprintf(stderr, "%s: write failed: %s\n",
> +                __func__, strerror(errno));
> +        return -errno;
> +    }
> +    return 0;
> +}
> +
> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p)
> +{
> +    uint8_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 1);
> +    if (rc == 0) {
> +        *p = buf;
> +    }
> +    return rc;
> +}
> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p)
> +{
> +    uint16_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 2);
> +    if (rc == 0) {
> +        *p = le16_to_cpu(buf);
> +    }
> +    return rc;
> +}
> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p)
> +{
> +    uint32_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 4);
> +    if (rc == 0) {
> +        *p = le32_to_cpu(buf);
> +    }
> +    return rc;
> +}
> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
> +{
> +    return host_pci_config_read(d, pos, buf, len);
> +}
> +
> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data)
> +{
> +    return host_pci_config_write(d, pos, &data, 1);
> +}
> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data)
> +{
> +    data = cpu_to_le16(data);
> +    return host_pci_config_write(d, pos, &data, 2);
> +}
> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data)
> +{
> +    data = cpu_to_le32(data);
> +    return host_pci_config_write(d, pos, &data, 4);
> +}
> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
> +{
> +    return host_pci_config_write(d, pos, buf, len);
> +}
> +
> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *d, uint32_t cap)
> +{
> +    uint32_t header = 0;
> +    int max_cap = PCI_MAX_EXT_CAP;
> +    int pos = PCI_CONFIG_SPACE_SIZE;
> +
> +    do {
> +        if (host_pci_get_long(d, pos, &header)) {
> +            break;
> +        }
> +        /*
> +         * If we have no capabilities, this is indicated by cap ID,
> +         * cap version and next pointer all being 0.
> +         */
> +        if (header == 0) {
> +            break;
> +        }
> +
> +        if (PCI_EXT_CAP_ID(header) == cap) {
> +            return pos;
> +        }
> +
> +        pos = PCI_EXT_CAP_NEXT(header);
> +        if (pos < PCI_CONFIG_SPACE_SIZE) {
> +            break;
> +        }
> +
> +        max_cap--;
> +    } while (max_cap > 0);
> +
> +    return 0;
> +}
> +
> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func)
> +{
> +    HostPCIDevice *d = NULL;
> +    unsigned long v = 0;
> +
> +    d = g_new0(HostPCIDevice, 1);
> +
> +    d->config_fd = -1;
> +    d->domain = 0;
> +    d->bus = bus;
> +    d->dev = dev;
> +    d->func = func;
> +
> +    if (host_pci_config_fd(d) == -1) {
> +        goto error;
> +    }
> +    if (get_resource(d) != 0) {
> +        goto error;
> +    }
> +
> +    if (get_hex_value(d, "vendor", &v)) {
> +        goto error;
> +    }
> +    d->vendor_id = v;
> +    if (get_hex_value(d, "device", &v)) {
> +        goto error;
> +    }
> +    d->device_id = v;
> +    d->is_virtfn = pci_dev_is_virtfn(d);
> +
> +    return d;
> +error:
> +    if (d->config_fd >= 0) {
> +        close(d->config_fd);
> +    }
> +    g_free(d);
> +    return NULL;
> +}
> +
> +void host_pci_device_put(HostPCIDevice *d)
> +{
> +    if (d->config_fd >= 0) {
> +        close(d->config_fd);
> +    }
> +    g_free(d);
> +}
> diff --git a/hw/host-pci-device.h b/hw/host-pci-device.h
> new file mode 100644
> index 0000000..c8880eb
> --- /dev/null
> +++ b/hw/host-pci-device.h
> @@ -0,0 +1,75 @@
> +#ifndef HW_HOST_PCI_DEVICE
> +#  define HW_HOST_PCI_DEVICE
> +
> +#include "pci.h"
> +
> +/*
> + * from linux/ioport.h
> + * IO resources have these defined flags.
> + */
> +#define IORESOURCE_BITS         0x000000ff      /* Bus-specific bits */
> +
> +#define IORESOURCE_TYPE_BITS    0x00000f00      /* Resource type */
> +#define IORESOURCE_IO           0x00000100
> +#define IORESOURCE_MEM          0x00000200
> +#define IORESOURCE_IRQ          0x00000400
> +#define IORESOURCE_DMA          0x00000800
> +
> +#define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
> +#define IORESOURCE_READONLY     0x00002000
> +#define IORESOURCE_CACHEABLE    0x00004000
> +#define IORESOURCE_RANGELENGTH  0x00008000
> +#define IORESOURCE_SHADOWABLE   0x00010000
> +
> +#define IORESOURCE_SIZEALIGN    0x00020000      /* size indicates alignment */
> +#define IORESOURCE_STARTALIGN   0x00040000      /* start field is alignment */
> +
> +#define IORESOURCE_MEM_64       0x00100000
> +
> +    /* Userland may not map this resource */
> +#define IORESOURCE_EXCLUSIVE    0x08000000
> +#define IORESOURCE_DISABLED     0x10000000
> +#define IORESOURCE_UNSET        0x20000000
> +#define IORESOURCE_AUTO         0x40000000
> +    /* Driver has marked this resource busy */
> +#define IORESOURCE_BUSY         0x80000000
> +
> +
> +typedef struct HostPCIIORegion {
> +    unsigned long flags;
> +    pcibus_t base_addr;
> +    pcibus_t size;
> +} HostPCIIORegion;
> +
> +typedef struct HostPCIDevice {
> +    uint16_t domain;
> +    uint8_t bus;
> +    uint8_t dev;
> +    uint8_t func;
> +
> +    uint16_t vendor_id;
> +    uint16_t device_id;
> +
> +    HostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
> +    HostPCIIORegion rom;
> +
> +    bool is_virtfn;
> +
> +    int config_fd;
> +} HostPCIDevice;
> +
> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func);
> +void host_pci_device_put(HostPCIDevice *pci_dev);
> +
> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p);
> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p);
> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p);
> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data);
> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data);
> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data);
> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
> +
> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *s, uint32_t cap);
> +
> +#endif /* !HW_HOST_PCI_DEVICE */
> -- 
> Anthony PERARD
Anthony PERARD March 21, 2012, 6:48 p.m. UTC | #2
On Wed, Mar 21, 2012 at 18:32, Michael S. Tsirkin <mst@redhat.com> wrote:
> Host is really linux sysfs, right?

Yes, it is.

> Better reflect this in naming.

If someone need to access to a device by an other ways, it's could be
implemented also in this file, no ?

> Or maybe even xen-host-pci-device?

These functions are not very Xen specific and just provide a layer to
access to a pci device.
Michael S. Tsirkin March 21, 2012, 8:30 p.m. UTC | #3
On Wed, Mar 21, 2012 at 06:29:00PM +0000, Anthony PERARD wrote:
> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
> Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

So this interface is really LinuxSysfsPCIDevice.
For example the assumption that you can just open
device by pci address is broken with vfio.
Domain number is also not something anyone
besides linux knows about.

If I were you I would just call it xen- ....
and if it comes in handy it can be later renamed.

> ---
>  Makefile.target      |    3 +
>  hw/host-pci-device.c |  278 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/host-pci-device.h |   75 ++++++++++++++
>  3 files changed, 356 insertions(+), 0 deletions(-)
>  create mode 100644 hw/host-pci-device.c
>  create mode 100644 hw/host-pci-device.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index 63cf769..0ccfd5b 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -232,6 +232,9 @@ obj-$(CONFIG_NO_XEN) += xen-stub.o
>  
>  obj-i386-$(CONFIG_XEN) += xen_platform.o
>  
> +# Xen PCI Passthrough
> +obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o
> +
>  # Inter-VM PCI shared memory
>  CONFIG_IVSHMEM =
>  ifeq ($(CONFIG_KVM), y)
> diff --git a/hw/host-pci-device.c b/hw/host-pci-device.c
> new file mode 100644
> index 0000000..3dacb30
> --- /dev/null
> +++ b/hw/host-pci-device.c
> @@ -0,0 +1,278 @@
> +/*
> + * Copyright (C) 2011       Citrix Ltd.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include "host-pci-device.h"
> +
> +#define PCI_MAX_EXT_CAP \
> +    ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 4))

namespace pollution.
name all things HOST_PCI_....

in this case, open-coding will make things clearer.


> +
> +enum error_code {

seems unused. So why name the type?

> +    ERROR_SYNTAX = 1,

We return -1 on error, just do that and you won't need ERROR_SYNTAX.

> +};
> +
> +static int path_to(const HostPCIDevice *d,
> +                   const char *name, char *buf, ssize_t size)
> +{
> +    return snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%x/%s",
> +                    d->domain, d->bus, d->dev, d->func, name);
> +}

users ignore return value. Also, want to check no overflow
and assert?

> +
> +static int get_resource(HostPCIDevice *d)
> +{
> +    int i, rc = 0;
> +    FILE *f;
> +    char path[PATH_MAX];
> +    unsigned long long start, end, flags, size;
> +
> +    path_to(d, "resource", path, sizeof (path));

I think this might not fit, snprintf needs an extra byte for \0.

> +    f = fopen(path, "r");
> +    if (!f) {
> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
> +        return -errno;
> +    }
> +
> +    for (i = 0; i < PCI_NUM_REGIONS; i++) {
> +        if (fscanf(f, "%llx %llx %llx", &start, &end, &flags) != 3) {

People mentioned that scanf is not a good way to parse input.
Applies here.

> +            fprintf(stderr, "Error: Syntax error in %s\n", path);
> +            rc = ERROR_SYNTAX;
> +            break;
> +        }
> +        if (start) {
> +            size = end - start + 1;
> +        } else {
> +            size = 0;
> +        }
> +
> +        if (i < PCI_ROM_SLOT) {
> +            d->io_regions[i].base_addr = start;
> +            d->io_regions[i].size = size;
> +            d->io_regions[i].flags = flags;
> +        } else {
> +            d->rom.base_addr = start;
> +            d->rom.size = size;
> +            d->rom.flags = flags;
> +        }
> +    }
> +
> +    fclose(f);
> +    return rc;
> +}
> +
> +static int get_hex_value(HostPCIDevice *d, const char *name,
> +                         unsigned long *pvalue)

why long?

> +{
> +    char path[PATH_MAX];
> +    FILE *f;
> +    unsigned long value;
> +
> +    path_to(d, name, path, sizeof (path));
> +    f = fopen(path, "r");
> +    if (!f) {
> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
> +        return -errno;
> +    }
> +    if (fscanf(f, "%lx\n", &value) != 1) {
> +        fprintf(stderr, "Error: Syntax error in %s\n", path);
> +        fclose(f);
> +        return ERROR_SYNTAX;
> +    }
> +    fclose(f);
> +    *pvalue = value;
> +    return 0;
> +}
> +
> +static bool pci_dev_is_virtfn(HostPCIDevice *d)
> +{
> +    char path[PATH_MAX];
> +    struct stat buf;
> +
> +    path_to(d, "physfn", path, sizeof (path));
> +    return !stat(path, &buf);
> +}
> +

Don't start names with pci_.
It would also be better to avoid things like path_to IMO.

> +static int host_pci_config_fd(HostPCIDevice *d)

So this opens if needed, and returns.
Why not explicitly open on get?
then you won't need these hacks.

> +{
> +    char path[PATH_MAX];
> +
> +    if (d->config_fd < 0) {
> +        path_to(d, "config", path, sizeof (path));

sizeof path

> +        d->config_fd = open(path, O_RDWR);
> +        if (d->config_fd < 0) {
> +            fprintf(stderr, "HostPCIDevice: Can not open '%s': %s\n",
> +                    path, strerror(errno));

strerror is not thread safe

> +        }
> +    }
> +    return d->config_fd;
> +}
> +static int host_pci_config_read(HostPCIDevice *d, int pos, void *buf, int len)
> +{
> +    int fd = host_pci_config_fd(d);

You open file on each access?

> +    int res = 0;

why initialize here?

> +
> +again:
> +    res = pread(fd, buf, len, pos);
> +    if (res != len) {
> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;

code loops with while or for.

> +        }
> +        fprintf(stderr, "%s: read failed: %s (fd: %i)\n",
> +                __func__, strerror(errno), fd);
> +        return -errno;
> +    }
> +    return 0;
> +}
> +static int host_pci_config_write(HostPCIDevice *d,
> +                                 int pos, const void *buf, int len)
> +{
> +    int fd = host_pci_config_fd(d);
> +    int res = 0;
> +
> +again:
> +    res = pwrite(fd, buf, len, pos);
> +    if (res != len) {
> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;
> +        }
> +        fprintf(stderr, "%s: write failed: %s\n",
> +                __func__, strerror(errno));
> +        return -errno;
> +    }
> +    return 0;
> +}
> +

same comments as above. also,
Don't report errors with fprintf.

> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p)
> +{
> +    uint8_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 1);
> +    if (rc == 0) {

!rc.

> +        *p = buf;

why not pass in p directly?

> +    }
> +    return rc;
> +}
> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p)
> +{
> +    uint16_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 2);
> +    if (rc == 0) {

!rc.

> +        *p = le16_to_cpu(buf);
> +    }
> +    return rc;
> +}

This looks wrong wrt endian-ness.

> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p)
> +{
> +    uint32_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 4);
> +    if (rc == 0) {
> +        *p = le32_to_cpu(buf);
> +    }
> +    return rc;
> +}


Add empty lines between {}

> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
> +{
> +    return host_pci_config_read(d, pos, buf, len);
> +}

when would this be useful?

> +
> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data)
> +{
> +    return host_pci_config_write(d, pos, &data, 1);
> +}
> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data)
> +{
> +    data = cpu_to_le16(data);
> +    return host_pci_config_write(d, pos, &data, 2);
> +}
> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data)
> +{
> +    data = cpu_to_le32(data);
> +    return host_pci_config_write(d, pos, &data, 4);
> +}
> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
> +{
> +    return host_pci_config_write(d, pos, buf, len);
> +}
> +
> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *d, uint32_t cap)

Why 32? Ext config offsets are < 12 bit.

> +{
> +    uint32_t header = 0;
> +    int max_cap = PCI_MAX_EXT_CAP;
> +    int pos = PCI_CONFIG_SPACE_SIZE;
> +
> +    do {
> +        if (host_pci_get_long(d, pos, &header)) {
> +            break;
> +        }
> +        /*
> +         * If we have no capabilities, this is indicated by cap ID,
> +         * cap version and next pointer all being 0.
> +         */
> +        if (header == 0) {
> +            break;
> +        }
> +
> +        if (PCI_EXT_CAP_ID(header) == cap) {
> +            return pos;
> +        }
> +
> +        pos = PCI_EXT_CAP_NEXT(header);
> +        if (pos < PCI_CONFIG_SPACE_SIZE) {
> +            break;
> +        }
> +
> +        max_cap--;
> +    } while (max_cap > 0);
> +
> +    return 0;
> +}
> +
> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func)

Why skip domain in the interface?
Also, HostPCIDevice structure is public so there is little value
in allocating, just get it by pointer and init/cleanup.


> +{
> +    HostPCIDevice *d = NULL;
> +    unsigned long v = 0;
> +
> +    d = g_new0(HostPCIDevice, 1);
> +
> +    d->config_fd = -1;
> +    d->domain = 0;
> +    d->bus = bus;
> +    d->dev = dev;
> +    d->func = func;
> +
> +    if (host_pci_config_fd(d) == -1) {
> +        goto error;
> +    }
> +    if (get_resource(d) != 0) {

just get_resource(d).

> +        goto error;
> +    }
> +
> +    if (get_hex_value(d, "vendor", &v)) {
> +        goto error;
> +    }
> +    d->vendor_id = v;
> +    if (get_hex_value(d, "device", &v)) {
> +        goto error;
> +    }
> +    d->device_id = v;
> +    d->is_virtfn = pci_dev_is_virtfn(d);
> +
> +    return d;
> +error:
> +    if (d->config_fd >= 0) {
> +        close(d->config_fd);
> +    }
> +    g_free(d);
> +    return NULL;
> +}
> +
> +void host_pci_device_put(HostPCIDevice *d)
> +{
> +    if (d->config_fd >= 0) {
> +        close(d->config_fd);
> +    }
> +    g_free(d);
> +}
> diff --git a/hw/host-pci-device.h b/hw/host-pci-device.h
> new file mode 100644
> index 0000000..c8880eb
> --- /dev/null
> +++ b/hw/host-pci-device.h
> @@ -0,0 +1,75 @@
> +#ifndef HW_HOST_PCI_DEVICE
> +#  define HW_HOST_PCI_DEVICE

Don't put space after #.

Also HOST_PCI_DEVICE_H would be less likely to confuse.

> +
> +#include "pci.h"
> +
> +/*
> + * from linux/ioport.h
> + * IO resources have these defined flags.
> + */
> +#define IORESOURCE_BITS         0x000000ff      /* Bus-specific bits */
> +
> +#define IORESOURCE_TYPE_BITS    0x00000f00      /* Resource type */
> +#define IORESOURCE_IO           0x00000100
> +#define IORESOURCE_MEM          0x00000200
> +#define IORESOURCE_IRQ          0x00000400
> +#define IORESOURCE_DMA          0x00000800
> +
> +#define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
> +#define IORESOURCE_READONLY     0x00002000
> +#define IORESOURCE_CACHEABLE    0x00004000
> +#define IORESOURCE_RANGELENGTH  0x00008000
> +#define IORESOURCE_SHADOWABLE   0x00010000
> +
> +#define IORESOURCE_SIZEALIGN    0x00020000      /* size indicates alignment */
> +#define IORESOURCE_STARTALIGN   0x00040000      /* start field is alignment */
> +
> +#define IORESOURCE_MEM_64       0x00100000
> +
> +    /* Userland may not map this resource */
> +#define IORESOURCE_EXCLUSIVE    0x08000000
> +#define IORESOURCE_DISABLED     0x10000000
> +#define IORESOURCE_UNSET        0x20000000
> +#define IORESOURCE_AUTO         0x40000000
> +    /* Driver has marked this resource busy */
> +#define IORESOURCE_BUSY         0x80000000
> +

Why do above make sense in an API?
Abstract it in some reasonable way, don't just expose
flags from sysfs as is.

> +

kill extra empty lines

> +typedef struct HostPCIIORegion {
> +    unsigned long flags;
> +    pcibus_t base_addr;
> +    pcibus_t size;
> +} HostPCIIORegion;
> +
> +typedef struct HostPCIDevice {
> +    uint16_t domain;
> +    uint8_t bus;
> +    uint8_t dev;
> +    uint8_t func;
> +
> +    uint16_t vendor_id;
> +    uint16_t device_id;
> +
> +    HostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
> +    HostPCIIORegion rom;
> +
> +    bool is_virtfn;
> +
> +    int config_fd;
> +} HostPCIDevice;
> +
> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func);
> +void host_pci_device_put(HostPCIDevice *pci_dev);
> +
> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p);
> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p);
> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p);
> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data);
> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data);
> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data);
> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
> +
> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *s, uint32_t cap);
> +
> +#endif /* !HW_HOST_PCI_DEVICE */
> -- 
> Anthony PERARD
Anthony PERARD March 23, 2012, 1:56 p.m. UTC | #4
On Wed, Mar 21, 2012 at 20:30, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Wed, Mar 21, 2012 at 06:29:00PM +0000, Anthony PERARD wrote:
>> Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
>> Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
>
> So this interface is really LinuxSysfsPCIDevice.
> For example the assumption that you can just open
> device by pci address is broken with vfio.
> Domain number is also not something anyone
> besides linux knows about.
>
> If I were you I would just call it xen- ....
> and if it comes in handy it can be later renamed.

Ok, I will rename that XenHostPCIDevice.

>> ---
>>  Makefile.target      |    3 +
>>  hw/host-pci-device.c |  278 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>  hw/host-pci-device.h |   75 ++++++++++++++
>>  3 files changed, 356 insertions(+), 0 deletions(-)
>>  create mode 100644 hw/host-pci-device.c
>>  create mode 100644 hw/host-pci-device.h
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index 63cf769..0ccfd5b 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -232,6 +232,9 @@ obj-$(CONFIG_NO_XEN) += xen-stub.o
>>
>>  obj-i386-$(CONFIG_XEN) += xen_platform.o
>>
>> +# Xen PCI Passthrough
>> +obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o
>> +
>>  # Inter-VM PCI shared memory
>>  CONFIG_IVSHMEM =
>>  ifeq ($(CONFIG_KVM), y)
>> diff --git a/hw/host-pci-device.c b/hw/host-pci-device.c
>> new file mode 100644
>> index 0000000..3dacb30
>> --- /dev/null
>> +++ b/hw/host-pci-device.c
>> @@ -0,0 +1,278 @@
>> +/*
>> + * Copyright (C) 2011       Citrix Ltd.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
>> + * the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu-common.h"
>> +#include "host-pci-device.h"
>> +
>> +#define PCI_MAX_EXT_CAP \
>> +    ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 4))
>
> namespace pollution.
> name all things HOST_PCI_....
>
> in this case, open-coding will make things clearer.
>
>
>> +
>> +enum error_code {
>
> seems unused. So why name the type?
>
>> +    ERROR_SYNTAX = 1,
>
> We return -1 on error, just do that and you won't need ERROR_SYNTAX.

Ok, I'll remove this.

>> +};
>> +
>> +static int path_to(const HostPCIDevice *d,
>> +                   const char *name, char *buf, ssize_t size)
>> +{
>> +    return snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%x/%s",
>> +                    d->domain, d->bus, d->dev, d->func, name);
>> +}
>
> users ignore return value. Also, want to check no overflow
> and assert?

I will check the return value in this function an then return 0 or -1.

>> +
>> +static int get_resource(HostPCIDevice *d)
>> +{
>> +    int i, rc = 0;
>> +    FILE *f;
>> +    char path[PATH_MAX];
>> +    unsigned long long start, end, flags, size;
>> +
>> +    path_to(d, "resource", path, sizeof (path));
>
> I think this might not fit, snprintf needs an extra byte for \0.

I just check snprintf write size byte including the \0, sw we should
just give the size of the buffer.

>> +    f = fopen(path, "r");
>> +    if (!f) {
>> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
>> +        return -errno;
>> +    }
>> +
>> +    for (i = 0; i < PCI_NUM_REGIONS; i++) {
>> +        if (fscanf(f, "%llx %llx %llx", &start, &end, &flags) != 3) {
>
> People mentioned that scanf is not a good way to parse input.
> Applies here.

Ok, I'll do a manual parsing :(.

>> +            fprintf(stderr, "Error: Syntax error in %s\n", path);
>> +            rc = ERROR_SYNTAX;
>> +            break;
>> +        }
>> +        if (start) {
>> +            size = end - start + 1;
>> +        } else {
>> +            size = 0;
>> +        }
>> +
>> +        if (i < PCI_ROM_SLOT) {
>> +            d->io_regions[i].base_addr = start;
>> +            d->io_regions[i].size = size;
>> +            d->io_regions[i].flags = flags;
>> +        } else {
>> +            d->rom.base_addr = start;
>> +            d->rom.size = size;
>> +            d->rom.flags = flags;
>> +        }
>> +    }
>> +
>> +    fclose(f);
>> +    return rc;
>> +}
>> +
>> +static int get_hex_value(HostPCIDevice *d, const char *name,
>> +                         unsigned long *pvalue)
>
> why long?

Do be a bit generic I suppose, but I just use this function for
vendor_id and device_id, I probably just need an int.

>> +{
>> +    char path[PATH_MAX];
>> +    FILE *f;
>> +    unsigned long value;
>> +
>> +    path_to(d, name, path, sizeof (path));
>> +    f = fopen(path, "r");
>> +    if (!f) {
>> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
>> +        return -errno;
>> +    }
>> +    if (fscanf(f, "%lx\n", &value) != 1) {
>> +        fprintf(stderr, "Error: Syntax error in %s\n", path);
>> +        fclose(f);
>> +        return ERROR_SYNTAX;
>> +    }
>> +    fclose(f);
>> +    *pvalue = value;
>> +    return 0;
>> +}
>> +
>> +static bool pci_dev_is_virtfn(HostPCIDevice *d)
>> +{
>> +    char path[PATH_MAX];
>> +    struct stat buf;
>> +
>> +    path_to(d, "physfn", path, sizeof (path));
>> +    return !stat(path, &buf);
>> +}
>> +
>
> Don't start names with pci_.
> It would also be better to avoid things like path_to IMO.

Do you mean avoiding the name or the purpose of the function path_to ?
For the name, I can probably rename it to sysfs_device_path()

>> +static int host_pci_config_fd(HostPCIDevice *d)
>
> So this opens if needed, and returns.
> Why not explicitly open on get?
> then you won't need these hacks.

Ok, I'll change that.

>> +{
>> +    char path[PATH_MAX];
>> +
>> +    if (d->config_fd < 0) {
>> +        path_to(d, "config", path, sizeof (path));
>
> sizeof path
>
>> +        d->config_fd = open(path, O_RDWR);
>> +        if (d->config_fd < 0) {
>> +            fprintf(stderr, "HostPCIDevice: Can not open '%s': %s\n",
>> +                    path, strerror(errno));
>
> strerror is not thread safe
>
>> +        }
>> +    }
>> +    return d->config_fd;
>> +}
>> +static int host_pci_config_read(HostPCIDevice *d, int pos, void *buf, int len)
>> +{
>> +    int fd = host_pci_config_fd(d);
>
> You open file on each access?
>
>> +    int res = 0;
>
> why initialize here?
>
>> +
>> +again:
>> +    res = pread(fd, buf, len, pos);
>> +    if (res != len) {
>> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
>> +            goto again;
>
> code loops with while or for.

ok.

>> +        }
>> +        fprintf(stderr, "%s: read failed: %s (fd: %i)\n",
>> +                __func__, strerror(errno), fd);
>> +        return -errno;
>> +    }
>> +    return 0;
>> +}
>> +static int host_pci_config_write(HostPCIDevice *d,
>> +                                 int pos, const void *buf, int len)
>> +{
>> +    int fd = host_pci_config_fd(d);
>> +    int res = 0;
>> +
>> +again:
>> +    res = pwrite(fd, buf, len, pos);
>> +    if (res != len) {
>> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
>> +            goto again;
>> +        }
>> +        fprintf(stderr, "%s: write failed: %s\n",
>> +                __func__, strerror(errno));
>> +        return -errno;
>> +    }
>> +    return 0;
>> +}
>> +
>
> same comments as above. also,
> Don't report errors with fprintf.
>
>> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p)
>> +{
>> +    uint8_t buf;
>> +    int rc = host_pci_config_read(d, pos, &buf, 1);
>> +    if (rc == 0) {
>
> !rc.
>
>> +        *p = buf;
>
> why not pass in p directly?
>
>> +    }
>> +    return rc;
>> +}
>> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p)
>> +{
>> +    uint16_t buf;
>> +    int rc = host_pci_config_read(d, pos, &buf, 2);
>> +    if (rc == 0) {
>
> !rc.
>
>> +        *p = le16_to_cpu(buf);
>> +    }
>> +    return rc;
>> +}
>
> This looks wrong wrt endian-ness.

It's seams that PCI config space registers are little-endian, so,
get/read a word/dword from the pci config space should be converted
from little-endian to the cpu endian-ness.

>> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p)
>> +{
>> +    uint32_t buf;
>> +    int rc = host_pci_config_read(d, pos, &buf, 4);
>> +    if (rc == 0) {
>> +        *p = le32_to_cpu(buf);
>> +    }
>> +    return rc;
>> +}
>
> Add empty lines between {}

It's look nicer when I fold the function to only see one line :), but,
I add this empty lines.

>> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
>> +{
>> +    return host_pci_config_read(d, pos, buf, len);
>> +}
>
> when would this be useful?

It's used to initialize the "emulated" config space (of pci.h) and
every time a pci config read or write is issued by the guest.

>> +
>> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data)
>> +{
>> +    return host_pci_config_write(d, pos, &data, 1);
>> +}
>> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data)
>> +{
>> +    data = cpu_to_le16(data);
>> +    return host_pci_config_write(d, pos, &data, 2);
>> +}
>> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data)
>> +{
>> +    data = cpu_to_le32(data);
>> +    return host_pci_config_write(d, pos, &data, 4);
>> +}
>> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
>> +{
>> +    return host_pci_config_write(d, pos, buf, len);
>> +}
>> +
>> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *d, uint32_t cap)
>
> Why 32? Ext config offsets are < 12 bit.

No apparent reason, the user of this function was just expecting a uint32.

>> +{
>> +    uint32_t header = 0;
>> +    int max_cap = PCI_MAX_EXT_CAP;
>> +    int pos = PCI_CONFIG_SPACE_SIZE;
>> +
>> +    do {
>> +        if (host_pci_get_long(d, pos, &header)) {
>> +            break;
>> +        }
>> +        /*
>> +         * If we have no capabilities, this is indicated by cap ID,
>> +         * cap version and next pointer all being 0.
>> +         */
>> +        if (header == 0) {
>> +            break;
>> +        }
>> +
>> +        if (PCI_EXT_CAP_ID(header) == cap) {
>> +            return pos;
>> +        }
>> +
>> +        pos = PCI_EXT_CAP_NEXT(header);
>> +        if (pos < PCI_CONFIG_SPACE_SIZE) {
>> +            break;
>> +        }
>> +
>> +        max_cap--;
>> +    } while (max_cap > 0);
>> +
>> +    return 0;
>> +}
>> +
>> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func)
>
> Why skip domain in the interface?
> Also, HostPCIDevice structure is public so there is little value
> in allocating, just get it by pointer and init/cleanup.

You mean like pci_bus_new_inplace ? Ok, I'll do that.

>> +{
>> +    HostPCIDevice *d = NULL;
>> +    unsigned long v = 0;
>> +
>> +    d = g_new0(HostPCIDevice, 1);
>> +
>> +    d->config_fd = -1;
>> +    d->domain = 0;
>> +    d->bus = bus;
>> +    d->dev = dev;
>> +    d->func = func;
>> +
>> +    if (host_pci_config_fd(d) == -1) {
>> +        goto error;
>> +    }
>> +    if (get_resource(d) != 0) {
>
> just get_resource(d).
>
>> +        goto error;
>> +    }
>> +
>> +    if (get_hex_value(d, "vendor", &v)) {
>> +        goto error;
>> +    }
>> +    d->vendor_id = v;
>> +    if (get_hex_value(d, "device", &v)) {
>> +        goto error;
>> +    }
>> +    d->device_id = v;
>> +    d->is_virtfn = pci_dev_is_virtfn(d);
>> +
>> +    return d;
>> +error:
>> +    if (d->config_fd >= 0) {
>> +        close(d->config_fd);
>> +    }
>> +    g_free(d);
>> +    return NULL;
>> +}
>> +
>> +void host_pci_device_put(HostPCIDevice *d)
>> +{
>> +    if (d->config_fd >= 0) {
>> +        close(d->config_fd);
>> +    }
>> +    g_free(d);
>> +}
>> diff --git a/hw/host-pci-device.h b/hw/host-pci-device.h
>> new file mode 100644
>> index 0000000..c8880eb
>> --- /dev/null
>> +++ b/hw/host-pci-device.h
>> @@ -0,0 +1,75 @@
>> +#ifndef HW_HOST_PCI_DEVICE
>> +#  define HW_HOST_PCI_DEVICE
>
> Don't put space after #.
>
> Also HOST_PCI_DEVICE_H would be less likely to confuse.
>
>> +
>> +#include "pci.h"
>> +
>> +/*
>> + * from linux/ioport.h
>> + * IO resources have these defined flags.
>> + */
>> +#define IORESOURCE_BITS         0x000000ff      /* Bus-specific bits */
>> +
>> +#define IORESOURCE_TYPE_BITS    0x00000f00      /* Resource type */
>> +#define IORESOURCE_IO           0x00000100
>> +#define IORESOURCE_MEM          0x00000200
>> +#define IORESOURCE_IRQ          0x00000400
>> +#define IORESOURCE_DMA          0x00000800
>> +
>> +#define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
>> +#define IORESOURCE_READONLY     0x00002000
>> +#define IORESOURCE_CACHEABLE    0x00004000
>> +#define IORESOURCE_RANGELENGTH  0x00008000
>> +#define IORESOURCE_SHADOWABLE   0x00010000
>> +
>> +#define IORESOURCE_SIZEALIGN    0x00020000      /* size indicates alignment */
>> +#define IORESOURCE_STARTALIGN   0x00040000      /* start field is alignment */
>> +
>> +#define IORESOURCE_MEM_64       0x00100000
>> +
>> +    /* Userland may not map this resource */
>> +#define IORESOURCE_EXCLUSIVE    0x08000000
>> +#define IORESOURCE_DISABLED     0x10000000
>> +#define IORESOURCE_UNSET        0x20000000
>> +#define IORESOURCE_AUTO         0x40000000
>> +    /* Driver has marked this resource busy */
>> +#define IORESOURCE_BUSY         0x80000000
>> +
>
> Why do above make sense in an API?
> Abstract it in some reasonable way, don't just expose
> flags from sysfs as is.

Ok.

>> +
>
> kill extra empty lines
>
>> +typedef struct HostPCIIORegion {
>> +    unsigned long flags;
>> +    pcibus_t base_addr;
>> +    pcibus_t size;
>> +} HostPCIIORegion;
>> +
>> +typedef struct HostPCIDevice {
>> +    uint16_t domain;
>> +    uint8_t bus;
>> +    uint8_t dev;
>> +    uint8_t func;
>> +
>> +    uint16_t vendor_id;
>> +    uint16_t device_id;
>> +
>> +    HostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
>> +    HostPCIIORegion rom;
>> +
>> +    bool is_virtfn;
>> +
>> +    int config_fd;
>> +} HostPCIDevice;
>> +
>> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func);
>> +void host_pci_device_put(HostPCIDevice *pci_dev);
>> +
>> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p);
>> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p);
>> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p);
>> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
>> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data);
>> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data);
>> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data);
>> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
>> +
>> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *s, uint32_t cap);
>> +
>> +#endif /* !HW_HOST_PCI_DEVICE */
>> --
>> Anthony PERARD
>
diff mbox

Patch

diff --git a/Makefile.target b/Makefile.target
index 63cf769..0ccfd5b 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -232,6 +232,9 @@  obj-$(CONFIG_NO_XEN) += xen-stub.o
 
 obj-i386-$(CONFIG_XEN) += xen_platform.o
 
+# Xen PCI Passthrough
+obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o
+
 # Inter-VM PCI shared memory
 CONFIG_IVSHMEM =
 ifeq ($(CONFIG_KVM), y)
diff --git a/hw/host-pci-device.c b/hw/host-pci-device.c
new file mode 100644
index 0000000..3dacb30
--- /dev/null
+++ b/hw/host-pci-device.c
@@ -0,0 +1,278 @@ 
+/*
+ * Copyright (C) 2011       Citrix Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "host-pci-device.h"
+
+#define PCI_MAX_EXT_CAP \
+    ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 4))
+
+enum error_code {
+    ERROR_SYNTAX = 1,
+};
+
+static int path_to(const HostPCIDevice *d,
+                   const char *name, char *buf, ssize_t size)
+{
+    return snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%x/%s",
+                    d->domain, d->bus, d->dev, d->func, name);
+}
+
+static int get_resource(HostPCIDevice *d)
+{
+    int i, rc = 0;
+    FILE *f;
+    char path[PATH_MAX];
+    unsigned long long start, end, flags, size;
+
+    path_to(d, "resource", path, sizeof (path));
+    f = fopen(path, "r");
+    if (!f) {
+        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
+        return -errno;
+    }
+
+    for (i = 0; i < PCI_NUM_REGIONS; i++) {
+        if (fscanf(f, "%llx %llx %llx", &start, &end, &flags) != 3) {
+            fprintf(stderr, "Error: Syntax error in %s\n", path);
+            rc = ERROR_SYNTAX;
+            break;
+        }
+        if (start) {
+            size = end - start + 1;
+        } else {
+            size = 0;
+        }
+
+        if (i < PCI_ROM_SLOT) {
+            d->io_regions[i].base_addr = start;
+            d->io_regions[i].size = size;
+            d->io_regions[i].flags = flags;
+        } else {
+            d->rom.base_addr = start;
+            d->rom.size = size;
+            d->rom.flags = flags;
+        }
+    }
+
+    fclose(f);
+    return rc;
+}
+
+static int get_hex_value(HostPCIDevice *d, const char *name,
+                         unsigned long *pvalue)
+{
+    char path[PATH_MAX];
+    FILE *f;
+    unsigned long value;
+
+    path_to(d, name, path, sizeof (path));
+    f = fopen(path, "r");
+    if (!f) {
+        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
+        return -errno;
+    }
+    if (fscanf(f, "%lx\n", &value) != 1) {
+        fprintf(stderr, "Error: Syntax error in %s\n", path);
+        fclose(f);
+        return ERROR_SYNTAX;
+    }
+    fclose(f);
+    *pvalue = value;
+    return 0;
+}
+
+static bool pci_dev_is_virtfn(HostPCIDevice *d)
+{
+    char path[PATH_MAX];
+    struct stat buf;
+
+    path_to(d, "physfn", path, sizeof (path));
+    return !stat(path, &buf);
+}
+
+static int host_pci_config_fd(HostPCIDevice *d)
+{
+    char path[PATH_MAX];
+
+    if (d->config_fd < 0) {
+        path_to(d, "config", path, sizeof (path));
+        d->config_fd = open(path, O_RDWR);
+        if (d->config_fd < 0) {
+            fprintf(stderr, "HostPCIDevice: Can not open '%s': %s\n",
+                    path, strerror(errno));
+        }
+    }
+    return d->config_fd;
+}
+static int host_pci_config_read(HostPCIDevice *d, int pos, void *buf, int len)
+{
+    int fd = host_pci_config_fd(d);
+    int res = 0;
+
+again:
+    res = pread(fd, buf, len, pos);
+    if (res != len) {
+        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
+            goto again;
+        }
+        fprintf(stderr, "%s: read failed: %s (fd: %i)\n",
+                __func__, strerror(errno), fd);
+        return -errno;
+    }
+    return 0;
+}
+static int host_pci_config_write(HostPCIDevice *d,
+                                 int pos, const void *buf, int len)
+{
+    int fd = host_pci_config_fd(d);
+    int res = 0;
+
+again:
+    res = pwrite(fd, buf, len, pos);
+    if (res != len) {
+        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
+            goto again;
+        }
+        fprintf(stderr, "%s: write failed: %s\n",
+                __func__, strerror(errno));
+        return -errno;
+    }
+    return 0;
+}
+
+int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p)
+{
+    uint8_t buf;
+    int rc = host_pci_config_read(d, pos, &buf, 1);
+    if (rc == 0) {
+        *p = buf;
+    }
+    return rc;
+}
+int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p)
+{
+    uint16_t buf;
+    int rc = host_pci_config_read(d, pos, &buf, 2);
+    if (rc == 0) {
+        *p = le16_to_cpu(buf);
+    }
+    return rc;
+}
+int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p)
+{
+    uint32_t buf;
+    int rc = host_pci_config_read(d, pos, &buf, 4);
+    if (rc == 0) {
+        *p = le32_to_cpu(buf);
+    }
+    return rc;
+}
+int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
+{
+    return host_pci_config_read(d, pos, buf, len);
+}
+
+int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data)
+{
+    return host_pci_config_write(d, pos, &data, 1);
+}
+int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data)
+{
+    data = cpu_to_le16(data);
+    return host_pci_config_write(d, pos, &data, 2);
+}
+int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data)
+{
+    data = cpu_to_le32(data);
+    return host_pci_config_write(d, pos, &data, 4);
+}
+int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
+{
+    return host_pci_config_write(d, pos, buf, len);
+}
+
+uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *d, uint32_t cap)
+{
+    uint32_t header = 0;
+    int max_cap = PCI_MAX_EXT_CAP;
+    int pos = PCI_CONFIG_SPACE_SIZE;
+
+    do {
+        if (host_pci_get_long(d, pos, &header)) {
+            break;
+        }
+        /*
+         * If we have no capabilities, this is indicated by cap ID,
+         * cap version and next pointer all being 0.
+         */
+        if (header == 0) {
+            break;
+        }
+
+        if (PCI_EXT_CAP_ID(header) == cap) {
+            return pos;
+        }
+
+        pos = PCI_EXT_CAP_NEXT(header);
+        if (pos < PCI_CONFIG_SPACE_SIZE) {
+            break;
+        }
+
+        max_cap--;
+    } while (max_cap > 0);
+
+    return 0;
+}
+
+HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func)
+{
+    HostPCIDevice *d = NULL;
+    unsigned long v = 0;
+
+    d = g_new0(HostPCIDevice, 1);
+
+    d->config_fd = -1;
+    d->domain = 0;
+    d->bus = bus;
+    d->dev = dev;
+    d->func = func;
+
+    if (host_pci_config_fd(d) == -1) {
+        goto error;
+    }
+    if (get_resource(d) != 0) {
+        goto error;
+    }
+
+    if (get_hex_value(d, "vendor", &v)) {
+        goto error;
+    }
+    d->vendor_id = v;
+    if (get_hex_value(d, "device", &v)) {
+        goto error;
+    }
+    d->device_id = v;
+    d->is_virtfn = pci_dev_is_virtfn(d);
+
+    return d;
+error:
+    if (d->config_fd >= 0) {
+        close(d->config_fd);
+    }
+    g_free(d);
+    return NULL;
+}
+
+void host_pci_device_put(HostPCIDevice *d)
+{
+    if (d->config_fd >= 0) {
+        close(d->config_fd);
+    }
+    g_free(d);
+}
diff --git a/hw/host-pci-device.h b/hw/host-pci-device.h
new file mode 100644
index 0000000..c8880eb
--- /dev/null
+++ b/hw/host-pci-device.h
@@ -0,0 +1,75 @@ 
+#ifndef HW_HOST_PCI_DEVICE
+#  define HW_HOST_PCI_DEVICE
+
+#include "pci.h"
+
+/*
+ * from linux/ioport.h
+ * IO resources have these defined flags.
+ */
+#define IORESOURCE_BITS         0x000000ff      /* Bus-specific bits */
+
+#define IORESOURCE_TYPE_BITS    0x00000f00      /* Resource type */
+#define IORESOURCE_IO           0x00000100
+#define IORESOURCE_MEM          0x00000200
+#define IORESOURCE_IRQ          0x00000400
+#define IORESOURCE_DMA          0x00000800
+
+#define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
+#define IORESOURCE_READONLY     0x00002000
+#define IORESOURCE_CACHEABLE    0x00004000
+#define IORESOURCE_RANGELENGTH  0x00008000
+#define IORESOURCE_SHADOWABLE   0x00010000
+
+#define IORESOURCE_SIZEALIGN    0x00020000      /* size indicates alignment */
+#define IORESOURCE_STARTALIGN   0x00040000      /* start field is alignment */
+
+#define IORESOURCE_MEM_64       0x00100000
+
+    /* Userland may not map this resource */
+#define IORESOURCE_EXCLUSIVE    0x08000000
+#define IORESOURCE_DISABLED     0x10000000
+#define IORESOURCE_UNSET        0x20000000
+#define IORESOURCE_AUTO         0x40000000
+    /* Driver has marked this resource busy */
+#define IORESOURCE_BUSY         0x80000000
+
+
+typedef struct HostPCIIORegion {
+    unsigned long flags;
+    pcibus_t base_addr;
+    pcibus_t size;
+} HostPCIIORegion;
+
+typedef struct HostPCIDevice {
+    uint16_t domain;
+    uint8_t bus;
+    uint8_t dev;
+    uint8_t func;
+
+    uint16_t vendor_id;
+    uint16_t device_id;
+
+    HostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
+    HostPCIIORegion rom;
+
+    bool is_virtfn;
+
+    int config_fd;
+} HostPCIDevice;
+
+HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func);
+void host_pci_device_put(HostPCIDevice *pci_dev);
+
+int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p);
+int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p);
+int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p);
+int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
+int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data);
+int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data);
+int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data);
+int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
+
+uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *s, uint32_t cap);
+
+#endif /* !HW_HOST_PCI_DEVICE */