diff mbox series

[for-4.2,v10,03/15] virtio-iommu: Add skeleton

Message ID 20190730172137.23114-4-eric.auger@redhat.com
State New
Headers show
Series VIRTIO-IOMMU device | expand

Commit Message

Eric Auger July 30, 2019, 5:21 p.m. UTC
This patchs adds the skeleton for the virtio-iommu device.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v9 -> v10:
- expose VIRTIO_IOMMU_F_MMIO feature
- s/domain_bits/domain_range struct
- change error codes
- enforce unmigratable
- Kconfig

v7 -> v8:
- expose VIRTIO_IOMMU_F_BYPASS and VIRTIO_F_VERSION_1
  features
- set_config dummy implementation + tracing
- add trace in get_features
- set the features on realize() and store the acked ones
- remove inclusion of linux/virtio_iommu.h

v6 -> v7:
- removed qapi-event.h include
- add primary_bus and associated property

v4 -> v5:
- use the new v0.5 terminology (domain, endpoint)
- add the event virtqueue

v3 -> v4:
- use page_size_mask instead of page_sizes
- added set_features()
- added some traces (reset, set_status, set_features)
- empty virtio_iommu_set_config() as the driver MUST NOT
  write to device configuration fields
- add get_config trace

v2 -> v3:
- rebase on 2.10-rc0, ie. use IOMMUMemoryRegion and remove
  iommu_ops.
- advertise VIRTIO_IOMMU_F_MAP_UNMAP feature
- page_sizes set to TARGET_PAGE_SIZE

Conflicts:
	hw/virtio/trace-events
---
 hw/virtio/Kconfig                |   5 +
 hw/virtio/Makefile.objs          |   1 +
 hw/virtio/trace-events           |   8 +
 hw/virtio/virtio-iommu.c         | 267 +++++++++++++++++++++++++++++++
 include/hw/virtio/virtio-iommu.h |  62 +++++++
 5 files changed, 343 insertions(+)
 create mode 100644 hw/virtio/virtio-iommu.c
 create mode 100644 include/hw/virtio/virtio-iommu.h

Comments

Peter Xu Aug. 15, 2019, 1:54 p.m. UTC | #1
On Tue, Jul 30, 2019 at 07:21:25PM +0200, Eric Auger wrote:
> +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
> +    struct virtio_iommu_req_head head;
> +    struct virtio_iommu_req_tail tail;

[1]

> +    VirtQueueElement *elem;
> +    unsigned int iov_cnt;
> +    struct iovec *iov;
> +    size_t sz;
> +
> +    for (;;) {
> +        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> +        if (!elem) {
> +            return;
> +        }
> +
> +        if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
> +            iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
> +            virtio_error(vdev, "virtio-iommu bad head/tail size");
> +            virtqueue_detach_element(vq, elem, 0);
> +            g_free(elem);
> +            break;
> +        }
> +
> +        iov_cnt = elem->out_num;
> +        iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);

Could I ask why memdup is needed here?

> +        sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
> +        if (unlikely(sz != sizeof(head))) {
> +            tail.status = VIRTIO_IOMMU_S_DEVERR;

Do you need to zero the reserved bits to make sure it won't contain
garbage?  Same question to below uses of tail.

> +            goto out;
> +        }
> +        qemu_mutex_lock(&s->mutex);
> +        switch (head.type) {
> +        case VIRTIO_IOMMU_T_ATTACH:
> +            tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
> +            break;
> +        case VIRTIO_IOMMU_T_DETACH:
> +            tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
> +            break;
> +        case VIRTIO_IOMMU_T_MAP:
> +            tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
> +            break;
> +        case VIRTIO_IOMMU_T_UNMAP:
> +            tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
> +            break;
> +        default:
> +            tail.status = VIRTIO_IOMMU_S_UNSUPP;
> +        }
> +        qemu_mutex_unlock(&s->mutex);
> +
> +out:
> +        sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
> +                          &tail, sizeof(tail));
> +        assert(sz == sizeof(tail));
> +
> +        virtqueue_push(vq, elem, sizeof(tail));

s/tail/head/ (though they are the same size)?

> +        virtio_notify(vdev, vq);
> +        g_free(elem);
> +    }
> +}

[...]

> +static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
> +{
> +    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
> +
> +    dev->acked_features = val;
> +    trace_virtio_iommu_set_features(dev->acked_features);
> +}
> +
> +static const VMStateDescription vmstate_virtio_iommu_device = {
> +    .name = "virtio-iommu-device",
> +    .unmigratable = 1,

Curious, is there explicit reason to not support migration from the
first version? :)

> +};
> +
> +static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
> +
> +    virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
> +                sizeof(struct virtio_iommu_config));
> +
> +    s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
> +                             virtio_iommu_handle_command);
> +    s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
> +
> +    s->config.page_size_mask = TARGET_PAGE_MASK;
> +    s->config.input_range.end = -1UL;
> +    s->config.domain_range.start = 0;

Zero input_range.start = 0?  After all domain_range.start is zeroed.

> +    s->config.domain_range.end = 32;
> +
> +    virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
> +    virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
> +    virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS);
> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
> +}

Regards,
Eric Auger Aug. 29, 2019, 12:18 p.m. UTC | #2
Hi Peter,

First of all, please forgive me for the delay.
On 8/15/19 3:54 PM, Peter Xu wrote:
> On Tue, Jul 30, 2019 at 07:21:25PM +0200, Eric Auger wrote:
>> +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
>> +{
>> +    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
>> +    struct virtio_iommu_req_head head;
>> +    struct virtio_iommu_req_tail tail;
> 
> [1]
> 
>> +    VirtQueueElement *elem;
>> +    unsigned int iov_cnt;
>> +    struct iovec *iov;
>> +    size_t sz;
>> +
>> +    for (;;) {
>> +        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
>> +        if (!elem) {
>> +            return;
>> +        }
>> +
>> +        if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
>> +            iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
>> +            virtio_error(vdev, "virtio-iommu bad head/tail size");
>> +            virtqueue_detach_element(vq, elem, 0);
>> +            g_free(elem);
>> +            break;
>> +        }
>> +
>> +        iov_cnt = elem->out_num;
>> +        iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
> 
> Could I ask why memdup is needed here?
Indeed I don't think it is needed and besides iov is not freed!

I got inspired from hw/net/virtio-net.c. To be honest I don't get why
the g_memdup is needed there either. The out_sg gets duplicated and
commands work on the duplicated data and not in place.
> 
>> +        sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
>> +        if (unlikely(sz != sizeof(head))) {
>> +            tail.status = VIRTIO_IOMMU_S_DEVERR;
> 
> Do you need to zero the reserved bits to make sure it won't contain
> garbage?  Same question to below uses of tail.
yes. I initialized tail.
> 
>> +            goto out;
>> +        }
>> +        qemu_mutex_lock(&s->mutex);
>> +        switch (head.type) {
>> +        case VIRTIO_IOMMU_T_ATTACH:
>> +            tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
>> +            break;
>> +        case VIRTIO_IOMMU_T_DETACH:
>> +            tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
>> +            break;
>> +        case VIRTIO_IOMMU_T_MAP:
>> +            tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
>> +            break;
>> +        case VIRTIO_IOMMU_T_UNMAP:
>> +            tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
>> +            break;
>> +        default:
>> +            tail.status = VIRTIO_IOMMU_S_UNSUPP;
>> +        }
>> +        qemu_mutex_unlock(&s->mutex);
>> +
>> +out:
>> +        sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
>> +                          &tail, sizeof(tail));
>> +        assert(sz == sizeof(tail));
>> +
>> +        virtqueue_push(vq, elem, sizeof(tail));
> 
> s/tail/head/ (though they are the same size)?
That's unclear to me. Similarly when checking against virtio-net.c, the
element is pushed back to the used ring and len is set to the size of
the status with:

/*
 * Control virtqueue data structures
 *
 * The control virtqueue expects a header in the first sg entry
 * and an ack/status response in the last entry.  Data for the
 * command goes in between.
 */
> 
>> +        virtio_notify(vdev, vq);
>> +        g_free(elem);
>> +    }
>> +}
> 
> [...]
> 
>> +static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
>> +{
>> +    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
>> +
>> +    dev->acked_features = val;
>> +    trace_virtio_iommu_set_features(dev->acked_features);
>> +}
>> +
>> +static const VMStateDescription vmstate_virtio_iommu_device = {
>> +    .name = "virtio-iommu-device",
>> +    .unmigratable = 1,
> 
> Curious, is there explicit reason to not support migration from the
> first version? :)
The state is made of red black trees, lists. For the former there is no
VMSTATE* ready. I am working on it but I think this should be handled
separately
> 
>> +};
>> +
>> +static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>> +{
>> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> +    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
>> +
>> +    virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
>> +                sizeof(struct virtio_iommu_config));
>> +
>> +    s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
>> +                             virtio_iommu_handle_command);
>> +    s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
>> +
>> +    s->config.page_size_mask = TARGET_PAGE_MASK;
>> +    s->config.input_range.end = -1UL;
>> +    s->config.domain_range.start = 0;
> 
> Zero input_range.start = 0?  After all domain_range.start is zeroed.
virtio_init does:
    if (vdev->config_len) {
        vdev->config = g_malloc0(config_size);

but I should be homogeneous and then remove s->config.domain_range.start
= 0;
> 
>> +    s->config.domain_range.end = 32;
>> +
>> +    virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
>> +    virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
>> +    virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
>> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
>> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
>> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
>> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS);
>> +    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
>> +}
> 
> Regards,
>
Peter Xu Aug. 30, 2019, 1:26 a.m. UTC | #3
On Thu, Aug 29, 2019 at 02:18:42PM +0200, Auger Eric wrote:
> Hi Peter,
> 
> First of all, please forgive me for the delay.
> On 8/15/19 3:54 PM, Peter Xu wrote:
> > On Tue, Jul 30, 2019 at 07:21:25PM +0200, Eric Auger wrote:
> >> +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
> >> +{
> >> +    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
> >> +    struct virtio_iommu_req_head head;
> >> +    struct virtio_iommu_req_tail tail;
> > 
> > [1]
> > 
> >> +    VirtQueueElement *elem;
> >> +    unsigned int iov_cnt;
> >> +    struct iovec *iov;
> >> +    size_t sz;
> >> +
> >> +    for (;;) {
> >> +        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> >> +        if (!elem) {
> >> +            return;
> >> +        }
> >> +
> >> +        if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
> >> +            iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
> >> +            virtio_error(vdev, "virtio-iommu bad head/tail size");
> >> +            virtqueue_detach_element(vq, elem, 0);
> >> +            g_free(elem);
> >> +            break;
> >> +        }
> >> +
> >> +        iov_cnt = elem->out_num;
> >> +        iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
> > 
> > Could I ask why memdup is needed here?
> Indeed I don't think it is needed and besides iov is not freed!
> 
> I got inspired from hw/net/virtio-net.c. To be honest I don't get why
> the g_memdup is needed there either. The out_sg gets duplicated and
> commands work on the duplicated data and not in place.

Oh true, I found that it's because of calling of iov_discard_front().
Please have a look at 771b6ed37e3.  Though it seems to me that
virtio-iommu does not truncate iovs so it should not be needed.

> > 
> >> +        sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
> >> +        if (unlikely(sz != sizeof(head))) {
> >> +            tail.status = VIRTIO_IOMMU_S_DEVERR;
> > 
> > Do you need to zero the reserved bits to make sure it won't contain
> > garbage?  Same question to below uses of tail.
> yes. I initialized tail.
> > 
> >> +            goto out;
> >> +        }
> >> +        qemu_mutex_lock(&s->mutex);
> >> +        switch (head.type) {
> >> +        case VIRTIO_IOMMU_T_ATTACH:
> >> +            tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
> >> +            break;
> >> +        case VIRTIO_IOMMU_T_DETACH:
> >> +            tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
> >> +            break;
> >> +        case VIRTIO_IOMMU_T_MAP:
> >> +            tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
> >> +            break;
> >> +        case VIRTIO_IOMMU_T_UNMAP:
> >> +            tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
> >> +            break;
> >> +        default:
> >> +            tail.status = VIRTIO_IOMMU_S_UNSUPP;
> >> +        }
> >> +        qemu_mutex_unlock(&s->mutex);
> >> +
> >> +out:
> >> +        sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
> >> +                          &tail, sizeof(tail));
> >> +        assert(sz == sizeof(tail));
> >> +
> >> +        virtqueue_push(vq, elem, sizeof(tail));
> > 
> > s/tail/head/ (though they are the same size)?
> That's unclear to me. Similarly when checking against virtio-net.c, the
> element is pushed back to the used ring and len is set to the size of
> the status with:
> 
> /*
>  * Control virtqueue data structures
>  *
>  * The control virtqueue expects a header in the first sg entry
>  * and an ack/status response in the last entry.  Data for the
>  * command goes in between.
>  */

I was referencing the balloon code when reading the patch, e.g.,
virtio_balloon_handle_output().  Though after I read more carefully I
see that other places are using it as you described.  Now I tend to
agree with you, because virtqueue_push() who calls
virtqueue_unmap_sg() used the len to unmap in_sg[] rather than
out_sg[].  So please ignore my previous comment.

(then I'm not sure whether the usage in the balloon code was correct
 now...)

> > 
> >> +        virtio_notify(vdev, vq);
> >> +        g_free(elem);
> >> +    }
> >> +}
> > 
> > [...]
> > 
> >> +static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
> >> +{
> >> +    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
> >> +
> >> +    dev->acked_features = val;
> >> +    trace_virtio_iommu_set_features(dev->acked_features);
> >> +}
> >> +
> >> +static const VMStateDescription vmstate_virtio_iommu_device = {
> >> +    .name = "virtio-iommu-device",
> >> +    .unmigratable = 1,
> > 
> > Curious, is there explicit reason to not support migration from the
> > first version? :)
> The state is made of red black trees, lists. For the former there is no
> VMSTATE* ready. I am working on it but I think this should be handled
> separately

Fair enough.  Would you mind to add a similar comment above
unmigratable?

Thanks,
Eric Auger Aug. 30, 2019, 8:12 a.m. UTC | #4
Hi Peter,
On 8/30/19 3:26 AM, Peter Xu wrote:
> On Thu, Aug 29, 2019 at 02:18:42PM +0200, Auger Eric wrote:
>> Hi Peter,
>>
>> First of all, please forgive me for the delay.
>> On 8/15/19 3:54 PM, Peter Xu wrote:
>>> On Tue, Jul 30, 2019 at 07:21:25PM +0200, Eric Auger wrote:
>>>> +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
>>>> +{
>>>> +    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
>>>> +    struct virtio_iommu_req_head head;
>>>> +    struct virtio_iommu_req_tail tail;
>>>
>>> [1]
>>>
>>>> +    VirtQueueElement *elem;
>>>> +    unsigned int iov_cnt;
>>>> +    struct iovec *iov;
>>>> +    size_t sz;
>>>> +
>>>> +    for (;;) {
>>>> +        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
>>>> +        if (!elem) {
>>>> +            return;
>>>> +        }
>>>> +
>>>> +        if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
>>>> +            iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
>>>> +            virtio_error(vdev, "virtio-iommu bad head/tail size");
>>>> +            virtqueue_detach_element(vq, elem, 0);
>>>> +            g_free(elem);
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        iov_cnt = elem->out_num;
>>>> +        iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
>>>
>>> Could I ask why memdup is needed here?
>> Indeed I don't think it is needed and besides iov is not freed!
>>
>> I got inspired from hw/net/virtio-net.c. To be honest I don't get why
>> the g_memdup is needed there either. The out_sg gets duplicated and
>> commands work on the duplicated data and not in place.
> 
> Oh true, I found that it's because of calling of iov_discard_front().
> Please have a look at 771b6ed37e3.  Though it seems to me that
> virtio-iommu does not truncate iovs so it should not be needed.

thanks for the sha1. indeed virtio-iommu does not use iov_discard_front
so I shouldn't need it.
> 
>>>
>>>> +        sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
>>>> +        if (unlikely(sz != sizeof(head))) {
>>>> +            tail.status = VIRTIO_IOMMU_S_DEVERR;
>>>
>>> Do you need to zero the reserved bits to make sure it won't contain
>>> garbage?  Same question to below uses of tail.
>> yes. I initialized tail.
>>>
>>>> +            goto out;
>>>> +        }
>>>> +        qemu_mutex_lock(&s->mutex);
>>>> +        switch (head.type) {
>>>> +        case VIRTIO_IOMMU_T_ATTACH:
>>>> +            tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
>>>> +            break;
>>>> +        case VIRTIO_IOMMU_T_DETACH:
>>>> +            tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
>>>> +            break;
>>>> +        case VIRTIO_IOMMU_T_MAP:
>>>> +            tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
>>>> +            break;
>>>> +        case VIRTIO_IOMMU_T_UNMAP:
>>>> +            tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
>>>> +            break;
>>>> +        default:
>>>> +            tail.status = VIRTIO_IOMMU_S_UNSUPP;
>>>> +        }
>>>> +        qemu_mutex_unlock(&s->mutex);
>>>> +
>>>> +out:
>>>> +        sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
>>>> +                          &tail, sizeof(tail));
>>>> +        assert(sz == sizeof(tail));
>>>> +
>>>> +        virtqueue_push(vq, elem, sizeof(tail));
>>>
>>> s/tail/head/ (though they are the same size)?
>> That's unclear to me. Similarly when checking against virtio-net.c, the
>> element is pushed back to the used ring and len is set to the size of
>> the status with:
>>
>> /*
>>  * Control virtqueue data structures
>>  *
>>  * The control virtqueue expects a header in the first sg entry
>>  * and an ack/status response in the last entry.  Data for the
>>  * command goes in between.
>>  */
> 
> I was referencing the balloon code when reading the patch, e.g.,
> virtio_balloon_handle_output().  Though after I read more carefully I
> see that other places are using it as you described.  Now I tend to
> agree with you, because virtqueue_push() who calls
> virtqueue_unmap_sg() used the len to unmap in_sg[] rather than
> out_sg[].  So please ignore my previous comment.

OK
> 
> (then I'm not sure whether the usage in the balloon code was correct
>  now...)
> 
>>>
>>>> +        virtio_notify(vdev, vq);
>>>> +        g_free(elem);
>>>> +    }
>>>> +}
>>>
>>> [...]
>>>
>>>> +static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
>>>> +{
>>>> +    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
>>>> +
>>>> +    dev->acked_features = val;
>>>> +    trace_virtio_iommu_set_features(dev->acked_features);
>>>> +}
>>>> +
>>>> +static const VMStateDescription vmstate_virtio_iommu_device = {
>>>> +    .name = "virtio-iommu-device",
>>>> +    .unmigratable = 1,
>>>
>>> Curious, is there explicit reason to not support migration from the
>>> first version? :)
>> The state is made of red black trees, lists. For the former there is no
>> VMSTATE* ready. I am working on it but I think this should be handled
>> separately
> 
> Fair enough.  Would you mind to add a similar comment above
> unmigratable?
sure

Thanks!

Eric
> 
> Thanks,
>
diff mbox series

Patch

diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
index 3724ff8bac..a30107b439 100644
--- a/hw/virtio/Kconfig
+++ b/hw/virtio/Kconfig
@@ -6,6 +6,11 @@  config VIRTIO_RNG
     default y
     depends on VIRTIO
 
+config VIRTIO_IOMMU
+    bool
+    default y
+    depends on VIRTIO
+
 config VIRTIO_PCI
     bool
     default y if PCI_DEVICES
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index 964ce78607..f42e4dd94f 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -14,6 +14,7 @@  obj-$(CONFIG_VIRTIO_CRYPTO) += virtio-crypto.o
 obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += virtio-crypto-pci.o
 obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o
 common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o
+obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o
 
 ifeq ($(CONFIG_VIRTIO_PCI),y)
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index e28ba48da6..f7dac39213 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -53,3 +53,11 @@  virtio_mmio_write_offset(uint64_t offset, uint64_t value) "virtio_mmio_write off
 virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 " shift %d"
 virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d"
 virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d"
+
+# hw/virtio/virtio-iommu.c
+virtio_iommu_device_reset(void) "reset!"
+virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64
+virtio_iommu_set_features(uint64_t features) "features accepted by the driver =0x%"PRIx64
+virtio_iommu_device_status(uint8_t status) "driver status = %d"
+virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" start=0x%"PRIx64" end=0x%"PRIx64" domain_range=%d probe_size=0x%x"
+virtio_iommu_set_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" start=0x%"PRIx64" end=0x%"PRIx64" domain_bits=%d probe_size=0x%x"
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
new file mode 100644
index 0000000000..f239954396
--- /dev/null
+++ b/hw/virtio/virtio-iommu.c
@@ -0,0 +1,267 @@ 
+/*
+ * virtio-iommu device
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu-common.h"
+#include "hw/virtio/virtio.h"
+#include "sysemu/kvm.h"
+#include "trace.h"
+
+#include "standard-headers/linux/virtio_ids.h"
+
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "hw/virtio/virtio-iommu.h"
+
+/* Max size */
+#define VIOMMU_DEFAULT_QUEUE_SIZE 256
+
+static int virtio_iommu_handle_attach(VirtIOIOMMU *s,
+                                      struct iovec *iov,
+                                      unsigned int iov_cnt)
+{
+    return VIRTIO_IOMMU_S_UNSUPP;
+}
+static int virtio_iommu_handle_detach(VirtIOIOMMU *s,
+                                      struct iovec *iov,
+                                      unsigned int iov_cnt)
+{
+    return VIRTIO_IOMMU_S_UNSUPP;
+}
+static int virtio_iommu_handle_map(VirtIOIOMMU *s,
+                                   struct iovec *iov,
+                                   unsigned int iov_cnt)
+{
+    return VIRTIO_IOMMU_S_UNSUPP;
+}
+static int virtio_iommu_handle_unmap(VirtIOIOMMU *s,
+                                     struct iovec *iov,
+                                     unsigned int iov_cnt)
+{
+    return VIRTIO_IOMMU_S_UNSUPP;
+}
+
+static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
+    struct virtio_iommu_req_head head;
+    struct virtio_iommu_req_tail tail;
+    VirtQueueElement *elem;
+    unsigned int iov_cnt;
+    struct iovec *iov;
+    size_t sz;
+
+    for (;;) {
+        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+        if (!elem) {
+            return;
+        }
+
+        if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
+            iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
+            virtio_error(vdev, "virtio-iommu bad head/tail size");
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            break;
+        }
+
+        iov_cnt = elem->out_num;
+        iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
+        sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
+        if (unlikely(sz != sizeof(head))) {
+            tail.status = VIRTIO_IOMMU_S_DEVERR;
+            goto out;
+        }
+        qemu_mutex_lock(&s->mutex);
+        switch (head.type) {
+        case VIRTIO_IOMMU_T_ATTACH:
+            tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
+            break;
+        case VIRTIO_IOMMU_T_DETACH:
+            tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
+            break;
+        case VIRTIO_IOMMU_T_MAP:
+            tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
+            break;
+        case VIRTIO_IOMMU_T_UNMAP:
+            tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
+            break;
+        default:
+            tail.status = VIRTIO_IOMMU_S_UNSUPP;
+        }
+        qemu_mutex_unlock(&s->mutex);
+
+out:
+        sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
+                          &tail, sizeof(tail));
+        assert(sz == sizeof(tail));
+
+        virtqueue_push(vq, elem, sizeof(tail));
+        virtio_notify(vdev, vq);
+        g_free(elem);
+    }
+}
+
+static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+    struct virtio_iommu_config *config = &dev->config;
+
+    trace_virtio_iommu_get_config(config->page_size_mask,
+                                  config->input_range.start,
+                                  config->input_range.end,
+                                  config->domain_range.end,
+                                  config->probe_size);
+    memcpy(config_data, &dev->config, sizeof(struct virtio_iommu_config));
+}
+
+static void virtio_iommu_set_config(VirtIODevice *vdev,
+                                      const uint8_t *config_data)
+{
+    struct virtio_iommu_config config;
+
+    memcpy(&config, config_data, sizeof(struct virtio_iommu_config));
+    trace_virtio_iommu_set_config(config.page_size_mask,
+                                  config.input_range.start,
+                                  config.input_range.end,
+                                  config.domain_range.end,
+                                  config.probe_size);
+}
+
+static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
+                                          Error **errp)
+{
+    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+
+    f |= dev->features;
+    trace_virtio_iommu_get_features(f);
+    return f;
+}
+
+static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
+{
+    VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
+
+    dev->acked_features = val;
+    trace_virtio_iommu_set_features(dev->acked_features);
+}
+
+static const VMStateDescription vmstate_virtio_iommu_device = {
+    .name = "virtio-iommu-device",
+    .unmigratable = 1,
+};
+
+static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
+
+    virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
+                sizeof(struct virtio_iommu_config));
+
+    s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
+                             virtio_iommu_handle_command);
+    s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
+
+    s->config.page_size_mask = TARGET_PAGE_MASK;
+    s->config.input_range.end = -1UL;
+    s->config.domain_range.start = 0;
+    s->config.domain_range.end = 32;
+
+    virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
+    virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
+    virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS);
+    virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
+}
+
+static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+
+    virtio_cleanup(vdev);
+}
+
+static void virtio_iommu_device_reset(VirtIODevice *vdev)
+{
+    trace_virtio_iommu_device_reset();
+}
+
+static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    trace_virtio_iommu_device_status(status);
+}
+
+static void virtio_iommu_instance_init(Object *obj)
+{
+}
+
+static const VMStateDescription vmstate_virtio_iommu = {
+    .name = "virtio-iommu",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property virtio_iommu_properties[] = {
+    DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_iommu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    dc->props = virtio_iommu_properties;
+    dc->vmsd = &vmstate_virtio_iommu;
+
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->realize = virtio_iommu_device_realize;
+    vdc->unrealize = virtio_iommu_device_unrealize;
+    vdc->reset = virtio_iommu_device_reset;
+    vdc->get_config = virtio_iommu_get_config;
+    vdc->set_config = virtio_iommu_set_config;
+    vdc->get_features = virtio_iommu_get_features;
+    vdc->set_features = virtio_iommu_set_features;
+    vdc->set_status = virtio_iommu_set_status;
+    vdc->vmsd = &vmstate_virtio_iommu_device;
+}
+
+static const TypeInfo virtio_iommu_info = {
+    .name = TYPE_VIRTIO_IOMMU,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOIOMMU),
+    .instance_init = virtio_iommu_instance_init,
+    .class_init = virtio_iommu_class_init,
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_iommu_info);
+}
+
+type_init(virtio_register_types)
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
new file mode 100644
index 0000000000..4d47b6abeb
--- /dev/null
+++ b/include/hw/virtio/virtio-iommu.h
@@ -0,0 +1,62 @@ 
+/*
+ * virtio-iommu device
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_IOMMU_H
+#define QEMU_VIRTIO_IOMMU_H
+
+#include "standard-headers/linux/virtio_iommu.h"
+#include "hw/virtio/virtio.h"
+#include "hw/pci/pci.h"
+
+#define TYPE_VIRTIO_IOMMU "virtio-iommu-device"
+#define VIRTIO_IOMMU(obj) \
+        OBJECT_CHECK(VirtIOIOMMU, (obj), TYPE_VIRTIO_IOMMU)
+
+#define IOMMU_PCI_BUS_MAX      256
+#define IOMMU_PCI_DEVFN_MAX    256
+
+typedef struct IOMMUDevice {
+    void         *viommu;
+    PCIBus       *bus;
+    int           devfn;
+    IOMMUMemoryRegion  iommu_mr;
+    AddressSpace  as;
+} IOMMUDevice;
+
+typedef struct IOMMUPciBus {
+    PCIBus       *bus;
+    IOMMUDevice  *pbdev[0]; /* Parent array is sparse, so dynamically alloc */
+} IOMMUPciBus;
+
+typedef struct VirtIOIOMMU {
+    VirtIODevice parent_obj;
+    VirtQueue *req_vq;
+    VirtQueue *event_vq;
+    struct virtio_iommu_config config;
+    uint64_t features;
+    uint64_t acked_features;
+    GHashTable *as_by_busptr;
+    IOMMUPciBus *as_by_bus_num[IOMMU_PCI_BUS_MAX];
+    PCIBus *primary_bus;
+    GTree *domains;
+    QemuMutex mutex;
+    GTree *endpoints;
+} VirtIOIOMMU;
+
+#endif