diff mbox series

[for-5.0,v11,08/20] virtio-iommu: Implement translate

Message ID 20191122182943.4656-9-eric.auger@redhat.com
State New
Headers show
Series VIRTIO-IOMMU device | expand

Commit Message

Eric Auger Nov. 22, 2019, 6:29 p.m. UTC
This patch implements the translate callback

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v10 -> v11:
- take into account the new value struct and use
  g_tree_lookup_extended
- switched to error_report_once

v6 -> v7:
- implemented bypass-mode

v5 -> v6:
- replace error_report by qemu_log_mask

v4 -> v5:
- check the device domain is not NULL
- s/printf/error_report
- set flags to IOMMU_NONE in case of all translation faults
---
 hw/virtio/trace-events   |  1 +
 hw/virtio/virtio-iommu.c | 63 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 1 deletion(-)

Comments

Jean-Philippe Brucker Dec. 10, 2019, 4:43 p.m. UTC | #1
On Fri, Nov 22, 2019 at 07:29:31PM +0100, Eric Auger wrote:
> This patch implements the translate callback
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>

Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Peter Xu Dec. 10, 2019, 7:33 p.m. UTC | #2
On Fri, Nov 22, 2019 at 07:29:31PM +0100, Eric Auger wrote:
> This patch implements the translate callback
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> v10 -> v11:
> - take into account the new value struct and use
>   g_tree_lookup_extended
> - switched to error_report_once
> 
> v6 -> v7:
> - implemented bypass-mode
> 
> v5 -> v6:
> - replace error_report by qemu_log_mask
> 
> v4 -> v5:
> - check the device domain is not NULL
> - s/printf/error_report
> - set flags to IOMMU_NONE in case of all translation faults
> ---
>  hw/virtio/trace-events   |  1 +
>  hw/virtio/virtio-iommu.c | 63 +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 63 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index f25359cee2..de7cbb3c8f 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -72,3 +72,4 @@ virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
>  virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
>  virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
>  virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
> +virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> index f0a56833a2..a83666557b 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -412,19 +412,80 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
>                                              int iommu_idx)
>  {
>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> +    viommu_interval interval, *mapping_key;
> +    viommu_mapping *mapping_value;
> +    VirtIOIOMMU *s = sdev->viommu;
> +    viommu_endpoint *ep;
> +    bool bypass_allowed;
>      uint32_t sid;
> +    bool found;
> +
> +    interval.low = addr;
> +    interval.high = addr + 1;
>  
>      IOMMUTLBEntry entry = {
>          .target_as = &address_space_memory,
>          .iova = addr,
>          .translated_addr = addr,
> -        .addr_mask = ~(hwaddr)0,
> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
>          .perm = IOMMU_NONE,
>      };
>  
> +    bypass_allowed = virtio_has_feature(s->acked_features,
> +                                        VIRTIO_IOMMU_F_BYPASS);
> +

Would it be easier to check bypass_allowed here once and then drop the
latter [1] and [2] check?

>      sid = virtio_iommu_get_sid(sdev);
>  
>      trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
> +    qemu_mutex_lock(&s->mutex);
> +
> +    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
> +    if (!ep) {
> +        if (!bypass_allowed) {

[1]

> +            error_report_once("%s sid=%d is not known!!", __func__, sid);
> +        } else {
> +            entry.perm = flag;
> +        }
> +        goto unlock;
> +    }
> +
> +    if (!ep->domain) {
> +        if (!bypass_allowed) {

[2]

> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "%s %02x:%02x.%01x not attached to any domain\n",
> +                          __func__, PCI_BUS_NUM(sid),
> +                          PCI_SLOT(sid), PCI_FUNC(sid));
> +        } else {
> +            entry.perm = flag;
> +        }
> +        goto unlock;
> +    }
> +
> +    found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
> +                                   (void **)&mapping_key,
> +                                   (void **)&mapping_value);
> +    if (!found) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s no mapping for 0x%"PRIx64" for sid=%d\n",
> +                      __func__, addr, sid);

I would still suggest that we use the same logging interface (either
error_report_once() or qemu_log_mask(), not use them randomly).

> +        goto unlock;
> +    }
> +
> +    if (((flag & IOMMU_RO) &&
> +            !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ)) ||
> +        ((flag & IOMMU_WO) &&
> +            !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE))) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "Permission error on 0x%"PRIx64"(%d): allowed=%d\n",
> +                      addr, flag, mapping_value->flags);

(Btw, IIUC this may not be a guest error. Say, what if the device is
 simply broken?)

> +        goto unlock;
> +    }
> +    entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
> +    entry.perm = flag;
> +    trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
> +
> +unlock:
> +    qemu_mutex_unlock(&s->mutex);
>      return entry;
>  }
>  
> -- 
> 2.20.1
>
Eric Auger Dec. 19, 2019, 10:30 a.m. UTC | #3
Hi Peter,
On 12/10/19 8:33 PM, Peter Xu wrote:
> On Fri, Nov 22, 2019 at 07:29:31PM +0100, Eric Auger wrote:
>> This patch implements the translate callback
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> v10 -> v11:
>> - take into account the new value struct and use
>>   g_tree_lookup_extended
>> - switched to error_report_once
>>
>> v6 -> v7:
>> - implemented bypass-mode
>>
>> v5 -> v6:
>> - replace error_report by qemu_log_mask
>>
>> v4 -> v5:
>> - check the device domain is not NULL
>> - s/printf/error_report
>> - set flags to IOMMU_NONE in case of all translation faults
>> ---
>>  hw/virtio/trace-events   |  1 +
>>  hw/virtio/virtio-iommu.c | 63 +++++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 63 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
>> index f25359cee2..de7cbb3c8f 100644
>> --- a/hw/virtio/trace-events
>> +++ b/hw/virtio/trace-events
>> @@ -72,3 +72,4 @@ virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
>>  virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
>>  virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
>>  virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
>> +virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>> index f0a56833a2..a83666557b 100644
>> --- a/hw/virtio/virtio-iommu.c
>> +++ b/hw/virtio/virtio-iommu.c
>> @@ -412,19 +412,80 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
>>                                              int iommu_idx)
>>  {
>>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
>> +    viommu_interval interval, *mapping_key;
>> +    viommu_mapping *mapping_value;
>> +    VirtIOIOMMU *s = sdev->viommu;
>> +    viommu_endpoint *ep;
>> +    bool bypass_allowed;
>>      uint32_t sid;
>> +    bool found;
>> +
>> +    interval.low = addr;
>> +    interval.high = addr + 1;
>>  
>>      IOMMUTLBEntry entry = {
>>          .target_as = &address_space_memory,
>>          .iova = addr,
>>          .translated_addr = addr,
>> -        .addr_mask = ~(hwaddr)0,
>> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
>>          .perm = IOMMU_NONE,
>>      };
>>  
>> +    bypass_allowed = virtio_has_feature(s->acked_features,
>> +                                        VIRTIO_IOMMU_F_BYPASS);
>> +
> 
> Would it be easier to check bypass_allowed here once and then drop the
> latter [1] and [2] check?
bypass_allowed does not mean you systematically bypass. You bypass if
the SID is unknown or if the device is not attached to any domain.
Otherwise you translate. But maybe I miss your point.
> 
>>      sid = virtio_iommu_get_sid(sdev);
>>  
>>      trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
>> +    qemu_mutex_lock(&s->mutex);
>> +
>> +    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
>> +    if (!ep) {
>> +        if (!bypass_allowed) {
> 
> [1]
> 
>> +            error_report_once("%s sid=%d is not known!!", __func__, sid);
>> +        } else {
>> +            entry.perm = flag;
>> +        }
>> +        goto unlock;
>> +    }
>> +
>> +    if (!ep->domain) {
>> +        if (!bypass_allowed) {
> 
> [2]
> 
>> +            qemu_log_mask(LOG_GUEST_ERROR,
>> +                          "%s %02x:%02x.%01x not attached to any domain\n",
>> +                          __func__, PCI_BUS_NUM(sid),
>> +                          PCI_SLOT(sid), PCI_FUNC(sid));
>> +        } else {
>> +            entry.perm = flag;
>> +        }
>> +        goto unlock;
>> +    }
>> +
>> +    found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
>> +                                   (void **)&mapping_key,
>> +                                   (void **)&mapping_value);
>> +    if (!found) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                      "%s no mapping for 0x%"PRIx64" for sid=%d\n",
>> +                      __func__, addr, sid);
> 
> I would still suggest that we use the same logging interface (either
> error_report_once() or qemu_log_mask(), not use them randomly).
OK I will switch to error_report_once() then
> 
>> +        goto unlock;
>> +    }
>> +
>> +    if (((flag & IOMMU_RO) &&
>> +            !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ)) ||
>> +        ((flag & IOMMU_WO) &&
>> +            !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE))) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                      "Permission error on 0x%"PRIx64"(%d): allowed=%d\n",
>> +                      addr, flag, mapping_value->flags);
> 
> (Btw, IIUC this may not be a guest error. Say, what if the device is
>  simply broken?)
> 
>> +        goto unlock;
>> +    }
>> +    entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
>> +    entry.perm = flag;
>> +    trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
>> +
>> +unlock:
>> +    qemu_mutex_unlock(&s->mutex);
>>      return entry;
>>  }
>>  
>> -- 
>> 2.20.1
>>
> 
Thanks

Eric
Peter Xu Dec. 19, 2019, 1:33 p.m. UTC | #4
On Thu, Dec 19, 2019 at 11:30:40AM +0100, Auger Eric wrote:
> Hi Peter,
> On 12/10/19 8:33 PM, Peter Xu wrote:
> > On Fri, Nov 22, 2019 at 07:29:31PM +0100, Eric Auger wrote:
> >> This patch implements the translate callback
> >>
> >> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> >>
> >> ---
> >>
> >> v10 -> v11:
> >> - take into account the new value struct and use
> >>   g_tree_lookup_extended
> >> - switched to error_report_once
> >>
> >> v6 -> v7:
> >> - implemented bypass-mode
> >>
> >> v5 -> v6:
> >> - replace error_report by qemu_log_mask
> >>
> >> v4 -> v5:
> >> - check the device domain is not NULL
> >> - s/printf/error_report
> >> - set flags to IOMMU_NONE in case of all translation faults
> >> ---
> >>  hw/virtio/trace-events   |  1 +
> >>  hw/virtio/virtio-iommu.c | 63 +++++++++++++++++++++++++++++++++++++++-
> >>  2 files changed, 63 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> >> index f25359cee2..de7cbb3c8f 100644
> >> --- a/hw/virtio/trace-events
> >> +++ b/hw/virtio/trace-events
> >> @@ -72,3 +72,4 @@ virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
> >>  virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
> >>  virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
> >>  virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
> >> +virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
> >> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> >> index f0a56833a2..a83666557b 100644
> >> --- a/hw/virtio/virtio-iommu.c
> >> +++ b/hw/virtio/virtio-iommu.c
> >> @@ -412,19 +412,80 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
> >>                                              int iommu_idx)
> >>  {
> >>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> >> +    viommu_interval interval, *mapping_key;
> >> +    viommu_mapping *mapping_value;
> >> +    VirtIOIOMMU *s = sdev->viommu;
> >> +    viommu_endpoint *ep;
> >> +    bool bypass_allowed;
> >>      uint32_t sid;
> >> +    bool found;
> >> +
> >> +    interval.low = addr;
> >> +    interval.high = addr + 1;
> >>  
> >>      IOMMUTLBEntry entry = {
> >>          .target_as = &address_space_memory,
> >>          .iova = addr,
> >>          .translated_addr = addr,
> >> -        .addr_mask = ~(hwaddr)0,
> >> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
> >>          .perm = IOMMU_NONE,
> >>      };
> >>  
> >> +    bypass_allowed = virtio_has_feature(s->acked_features,
> >> +                                        VIRTIO_IOMMU_F_BYPASS);
> >> +
> > 
> > Would it be easier to check bypass_allowed here once and then drop the
> > latter [1] and [2] check?
> bypass_allowed does not mean you systematically bypass. You bypass if
> the SID is unknown or if the device is not attached to any domain.
> Otherwise you translate. But maybe I miss your point.

Ah ok, then could I ask how will this VIRTIO_IOMMU_F_BYPASS be used?
For example, I think VT-d defines passthrough in a totally different
way in that the PT mark will be stored in the per-device context
entries, then we can allow a specific device to be pass-through when
doing DMA.  That information is explicit (e.g., unknown SID will
always fail the DMA), and per-device.

Here do you mean that you just don't put a device into any domain to
show it wants to use PT?  Then I'm not sure how do you identify
whether this is a legal PT or a malicious device (e.g., an unknown
device that even does not have any driver bound to it, which will also
satisfy "unknown SID" and "not attached to any domain", iiuc).

Thanks,
Eric Auger Dec. 19, 2019, 2:38 p.m. UTC | #5
Hi Peter,

On 12/19/19 2:33 PM, Peter Xu wrote:
> On Thu, Dec 19, 2019 at 11:30:40AM +0100, Auger Eric wrote:
>> Hi Peter,
>> On 12/10/19 8:33 PM, Peter Xu wrote:
>>> On Fri, Nov 22, 2019 at 07:29:31PM +0100, Eric Auger wrote:
>>>> This patch implements the translate callback
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>
>>>> ---
>>>>
>>>> v10 -> v11:
>>>> - take into account the new value struct and use
>>>>   g_tree_lookup_extended
>>>> - switched to error_report_once
>>>>
>>>> v6 -> v7:
>>>> - implemented bypass-mode
>>>>
>>>> v5 -> v6:
>>>> - replace error_report by qemu_log_mask
>>>>
>>>> v4 -> v5:
>>>> - check the device domain is not NULL
>>>> - s/printf/error_report
>>>> - set flags to IOMMU_NONE in case of all translation faults
>>>> ---
>>>>  hw/virtio/trace-events   |  1 +
>>>>  hw/virtio/virtio-iommu.c | 63 +++++++++++++++++++++++++++++++++++++++-
>>>>  2 files changed, 63 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
>>>> index f25359cee2..de7cbb3c8f 100644
>>>> --- a/hw/virtio/trace-events
>>>> +++ b/hw/virtio/trace-events
>>>> @@ -72,3 +72,4 @@ virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
>>>>  virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
>>>>  virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
>>>>  virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
>>>> +virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
>>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>>> index f0a56833a2..a83666557b 100644
>>>> --- a/hw/virtio/virtio-iommu.c
>>>> +++ b/hw/virtio/virtio-iommu.c
>>>> @@ -412,19 +412,80 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
>>>>                                              int iommu_idx)
>>>>  {
>>>>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
>>>> +    viommu_interval interval, *mapping_key;
>>>> +    viommu_mapping *mapping_value;
>>>> +    VirtIOIOMMU *s = sdev->viommu;
>>>> +    viommu_endpoint *ep;
>>>> +    bool bypass_allowed;
>>>>      uint32_t sid;
>>>> +    bool found;
>>>> +
>>>> +    interval.low = addr;
>>>> +    interval.high = addr + 1;
>>>>  
>>>>      IOMMUTLBEntry entry = {
>>>>          .target_as = &address_space_memory,
>>>>          .iova = addr,
>>>>          .translated_addr = addr,
>>>> -        .addr_mask = ~(hwaddr)0,
>>>> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
>>>>          .perm = IOMMU_NONE,
>>>>      };
>>>>  
>>>> +    bypass_allowed = virtio_has_feature(s->acked_features,
>>>> +                                        VIRTIO_IOMMU_F_BYPASS);
>>>> +
>>>
>>> Would it be easier to check bypass_allowed here once and then drop the
>>> latter [1] and [2] check?
>> bypass_allowed does not mean you systematically bypass. You bypass if
>> the SID is unknown or if the device is not attached to any domain.
>> Otherwise you translate. But maybe I miss your point.
> 
> Ah ok, then could I ask how will this VIRTIO_IOMMU_F_BYPASS be used?
> For example, I think VT-d defines passthrough in a totally different
> way in that the PT mark will be stored in the per-device context
> entries, then we can allow a specific device to be pass-through when
> doing DMA.  That information is explicit (e.g., unknown SID will
> always fail the DMA), and per-device.
> 
> Here do you mean that you just don't put a device into any domain to
> show it wants to use PT?  Then I'm not sure how do you identify
> whether this is a legal PT or a malicious device (e.g., an unknown
> device that even does not have any driver bound to it, which will also
> satisfy "unknown SID" and "not attached to any domain", iiuc).

The virtio-iommu spec currently says:

"If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses from
unattached endpoints are
allowed and translated by the IOMMU using the identity function. If the
feature is not negotiated, any
memory access from an unattached endpoint fails. Upon attaching an
endpoint in bypass mode to a new
domain, any memory access from the endpoint fails, since the domain does
not contain any mapping.
"

I guess this can serve the purpose of devices doing early accesses,
before the guest OS gets the hand and maps them?

Thanks

Eric
> 
> Thanks,
>
Peter Xu Dec. 19, 2019, 2:49 p.m. UTC | #6
On Thu, Dec 19, 2019 at 03:38:34PM +0100, Auger Eric wrote:
> Hi Peter,
> 
> On 12/19/19 2:33 PM, Peter Xu wrote:
> > On Thu, Dec 19, 2019 at 11:30:40AM +0100, Auger Eric wrote:
> >> Hi Peter,
> >> On 12/10/19 8:33 PM, Peter Xu wrote:
> >>> On Fri, Nov 22, 2019 at 07:29:31PM +0100, Eric Auger wrote:
> >>>> This patch implements the translate callback
> >>>>
> >>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> >>>>
> >>>> ---
> >>>>
> >>>> v10 -> v11:
> >>>> - take into account the new value struct and use
> >>>>   g_tree_lookup_extended
> >>>> - switched to error_report_once
> >>>>
> >>>> v6 -> v7:
> >>>> - implemented bypass-mode
> >>>>
> >>>> v5 -> v6:
> >>>> - replace error_report by qemu_log_mask
> >>>>
> >>>> v4 -> v5:
> >>>> - check the device domain is not NULL
> >>>> - s/printf/error_report
> >>>> - set flags to IOMMU_NONE in case of all translation faults
> >>>> ---
> >>>>  hw/virtio/trace-events   |  1 +
> >>>>  hw/virtio/virtio-iommu.c | 63 +++++++++++++++++++++++++++++++++++++++-
> >>>>  2 files changed, 63 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> >>>> index f25359cee2..de7cbb3c8f 100644
> >>>> --- a/hw/virtio/trace-events
> >>>> +++ b/hw/virtio/trace-events
> >>>> @@ -72,3 +72,4 @@ virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
> >>>>  virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
> >>>>  virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
> >>>>  virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
> >>>> +virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
> >>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> >>>> index f0a56833a2..a83666557b 100644
> >>>> --- a/hw/virtio/virtio-iommu.c
> >>>> +++ b/hw/virtio/virtio-iommu.c
> >>>> @@ -412,19 +412,80 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
> >>>>                                              int iommu_idx)
> >>>>  {
> >>>>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> >>>> +    viommu_interval interval, *mapping_key;
> >>>> +    viommu_mapping *mapping_value;
> >>>> +    VirtIOIOMMU *s = sdev->viommu;
> >>>> +    viommu_endpoint *ep;
> >>>> +    bool bypass_allowed;
> >>>>      uint32_t sid;
> >>>> +    bool found;
> >>>> +
> >>>> +    interval.low = addr;
> >>>> +    interval.high = addr + 1;
> >>>>  
> >>>>      IOMMUTLBEntry entry = {
> >>>>          .target_as = &address_space_memory,
> >>>>          .iova = addr,
> >>>>          .translated_addr = addr,
> >>>> -        .addr_mask = ~(hwaddr)0,
> >>>> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
> >>>>          .perm = IOMMU_NONE,
> >>>>      };
> >>>>  
> >>>> +    bypass_allowed = virtio_has_feature(s->acked_features,
> >>>> +                                        VIRTIO_IOMMU_F_BYPASS);
> >>>> +
> >>>
> >>> Would it be easier to check bypass_allowed here once and then drop the
> >>> latter [1] and [2] check?
> >> bypass_allowed does not mean you systematically bypass. You bypass if
> >> the SID is unknown or if the device is not attached to any domain.
> >> Otherwise you translate. But maybe I miss your point.
> > 
> > Ah ok, then could I ask how will this VIRTIO_IOMMU_F_BYPASS be used?
> > For example, I think VT-d defines passthrough in a totally different
> > way in that the PT mark will be stored in the per-device context
> > entries, then we can allow a specific device to be pass-through when
> > doing DMA.  That information is explicit (e.g., unknown SID will
> > always fail the DMA), and per-device.
> > 
> > Here do you mean that you just don't put a device into any domain to
> > show it wants to use PT?  Then I'm not sure how do you identify
> > whether this is a legal PT or a malicious device (e.g., an unknown
> > device that even does not have any driver bound to it, which will also
> > satisfy "unknown SID" and "not attached to any domain", iiuc).
> 
> The virtio-iommu spec currently says:
> 
> "If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses from
> unattached endpoints are
> allowed and translated by the IOMMU using the identity function. If the
> feature is not negotiated, any
> memory access from an unattached endpoint fails. Upon attaching an
> endpoint in bypass mode to a new
> domain, any memory access from the endpoint fails, since the domain does
> not contain any mapping.
> "
> 
> I guess this can serve the purpose of devices doing early accesses,
> before the guest OS gets the hand and maps them?

OK, so there's no global enablement knob for virtio-iommu? Hmm... Then:

  - This flag is a must for all virtio-iommu emulation, right?
    (otherwise I can't see how system bootstraps..)

  - Should this flag be gone right after OS starts (otherwise I think
    we still have the issue that any malicious device can be seen as
    in PT mode as default)?  How is that done?

Thanks,
Eric Auger Dec. 19, 2019, 3:09 p.m. UTC | #7
Hi Peter, jean,

On 12/19/19 3:49 PM, Peter Xu wrote:
> On Thu, Dec 19, 2019 at 03:38:34PM +0100, Auger Eric wrote:
>> Hi Peter,
>>
>> On 12/19/19 2:33 PM, Peter Xu wrote:
>>> On Thu, Dec 19, 2019 at 11:30:40AM +0100, Auger Eric wrote:
>>>> Hi Peter,
>>>> On 12/10/19 8:33 PM, Peter Xu wrote:
>>>>> On Fri, Nov 22, 2019 at 07:29:31PM +0100, Eric Auger wrote:
>>>>>> This patch implements the translate callback
>>>>>>
>>>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>>>
>>>>>> ---
>>>>>>
>>>>>> v10 -> v11:
>>>>>> - take into account the new value struct and use
>>>>>>   g_tree_lookup_extended
>>>>>> - switched to error_report_once
>>>>>>
>>>>>> v6 -> v7:
>>>>>> - implemented bypass-mode
>>>>>>
>>>>>> v5 -> v6:
>>>>>> - replace error_report by qemu_log_mask
>>>>>>
>>>>>> v4 -> v5:
>>>>>> - check the device domain is not NULL
>>>>>> - s/printf/error_report
>>>>>> - set flags to IOMMU_NONE in case of all translation faults
>>>>>> ---
>>>>>>  hw/virtio/trace-events   |  1 +
>>>>>>  hw/virtio/virtio-iommu.c | 63 +++++++++++++++++++++++++++++++++++++++-
>>>>>>  2 files changed, 63 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
>>>>>> index f25359cee2..de7cbb3c8f 100644
>>>>>> --- a/hw/virtio/trace-events
>>>>>> +++ b/hw/virtio/trace-events
>>>>>> @@ -72,3 +72,4 @@ virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
>>>>>>  virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
>>>>>>  virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
>>>>>>  virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
>>>>>> +virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
>>>>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>>>>> index f0a56833a2..a83666557b 100644
>>>>>> --- a/hw/virtio/virtio-iommu.c
>>>>>> +++ b/hw/virtio/virtio-iommu.c
>>>>>> @@ -412,19 +412,80 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
>>>>>>                                              int iommu_idx)
>>>>>>  {
>>>>>>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
>>>>>> +    viommu_interval interval, *mapping_key;
>>>>>> +    viommu_mapping *mapping_value;
>>>>>> +    VirtIOIOMMU *s = sdev->viommu;
>>>>>> +    viommu_endpoint *ep;
>>>>>> +    bool bypass_allowed;
>>>>>>      uint32_t sid;
>>>>>> +    bool found;
>>>>>> +
>>>>>> +    interval.low = addr;
>>>>>> +    interval.high = addr + 1;
>>>>>>  
>>>>>>      IOMMUTLBEntry entry = {
>>>>>>          .target_as = &address_space_memory,
>>>>>>          .iova = addr,
>>>>>>          .translated_addr = addr,
>>>>>> -        .addr_mask = ~(hwaddr)0,
>>>>>> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
>>>>>>          .perm = IOMMU_NONE,
>>>>>>      };
>>>>>>  
>>>>>> +    bypass_allowed = virtio_has_feature(s->acked_features,
>>>>>> +                                        VIRTIO_IOMMU_F_BYPASS);
>>>>>> +
>>>>>
>>>>> Would it be easier to check bypass_allowed here once and then drop the
>>>>> latter [1] and [2] check?
>>>> bypass_allowed does not mean you systematically bypass. You bypass if
>>>> the SID is unknown or if the device is not attached to any domain.
>>>> Otherwise you translate. But maybe I miss your point.
>>>
>>> Ah ok, then could I ask how will this VIRTIO_IOMMU_F_BYPASS be used?
>>> For example, I think VT-d defines passthrough in a totally different
>>> way in that the PT mark will be stored in the per-device context
>>> entries, then we can allow a specific device to be pass-through when
>>> doing DMA.  That information is explicit (e.g., unknown SID will
>>> always fail the DMA), and per-device.
>>>
>>> Here do you mean that you just don't put a device into any domain to
>>> show it wants to use PT?  Then I'm not sure how do you identify
>>> whether this is a legal PT or a malicious device (e.g., an unknown
>>> device that even does not have any driver bound to it, which will also
>>> satisfy "unknown SID" and "not attached to any domain", iiuc).
>>
>> The virtio-iommu spec currently says:
>>
>> "If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses from
>> unattached endpoints are
>> allowed and translated by the IOMMU using the identity function. If the
>> feature is not negotiated, any
>> memory access from an unattached endpoint fails. Upon attaching an
>> endpoint in bypass mode to a new
>> domain, any memory access from the endpoint fails, since the domain does
>> not contain any mapping.
>> "
>>
>> I guess this can serve the purpose of devices doing early accesses,
>> before the guest OS gets the hand and maps them?
> 
> OK, so there's no global enablement knob for virtio-iommu? Hmm... Then:
well this is a global knob. If this is bot negotiated any unmapped
device can PT.

My assumption above must be wrong as this is a negotiated feature so
anyway the virtio-iommu driver should be involved.

I don't really remember the rationale of the feature bit tbh.

In "[virtio-dev] RE: [RFC] virtio-iommu version 0.4 " Jean discussed
that with Kevein. Sorry I cannot find the link.

" If the endpoint is not attached to any address space,
then the device MAY abort the transaction."

Kevin> From definition of BYPASS, it's orthogonal to whether there is an
address space attached, then should we still allow "May abort" behavior?

Jean> The behavior is left as an implementation choice, and I'm not sure
it's worth enforcing in the architecture. If the endpoint isn't attached
to any domain then (unless VIRTIO_IOMMU_F_BYPASS is negotiated), it
isn't necessarily able to do DMA at all. The virtio-iommu device may
setup DMA mastering lazily, in which case any DMA transaction would
abort, or have setup DMA already, in which case the endpoint can access
MEM_T_BYPASS regions.

Hopefully Jean will remember and comment on this.

Thanks

Eric

> 
>   - This flag is a must for all virtio-iommu emulation, right?
>     (otherwise I can't see how system bootstraps..)
> 
>   - Should this flag be gone right after OS starts (otherwise I think
>     we still have the issue that any malicious device can be seen as
>     in PT mode as default)?  How is that done?
> 
> Thanks,
>
Jean-Philippe Brucker Dec. 20, 2019, 4:26 p.m. UTC | #8
On Thu, Dec 19, 2019 at 04:09:47PM +0100, Auger Eric wrote:
> >>>>>> @@ -412,19 +412,80 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
> >>>>>>                                              int iommu_idx)
> >>>>>>  {
> >>>>>>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> >>>>>> +    viommu_interval interval, *mapping_key;
> >>>>>> +    viommu_mapping *mapping_value;
> >>>>>> +    VirtIOIOMMU *s = sdev->viommu;
> >>>>>> +    viommu_endpoint *ep;
> >>>>>> +    bool bypass_allowed;
> >>>>>>      uint32_t sid;
> >>>>>> +    bool found;
> >>>>>> +
> >>>>>> +    interval.low = addr;
> >>>>>> +    interval.high = addr + 1;
> >>>>>>  
> >>>>>>      IOMMUTLBEntry entry = {
> >>>>>>          .target_as = &address_space_memory,
> >>>>>>          .iova = addr,
> >>>>>>          .translated_addr = addr,
> >>>>>> -        .addr_mask = ~(hwaddr)0,
> >>>>>> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
> >>>>>>          .perm = IOMMU_NONE,
> >>>>>>      };
> >>>>>>  
> >>>>>> +    bypass_allowed = virtio_has_feature(s->acked_features,
> >>>>>> +                                        VIRTIO_IOMMU_F_BYPASS);
> >>>>>> +
> >>>>>
> >>>>> Would it be easier to check bypass_allowed here once and then drop the
> >>>>> latter [1] and [2] check?
> >>>> bypass_allowed does not mean you systematically bypass. You bypass if
> >>>> the SID is unknown or if the device is not attached to any domain.
> >>>> Otherwise you translate. But maybe I miss your point.
> >>>
> >>> Ah ok, then could I ask how will this VIRTIO_IOMMU_F_BYPASS be used?
> >>> For example, I think VT-d defines passthrough in a totally different
> >>> way in that the PT mark will be stored in the per-device context
> >>> entries, then we can allow a specific device to be pass-through when
> >>> doing DMA.  That information is explicit (e.g., unknown SID will
> >>> always fail the DMA), and per-device.
> >>>
> >>> Here do you mean that you just don't put a device into any domain to
> >>> show it wants to use PT?  Then I'm not sure how do you identify
> >>> whether this is a legal PT or a malicious device (e.g., an unknown
> >>> device that even does not have any driver bound to it, which will also
> >>> satisfy "unknown SID" and "not attached to any domain", iiuc).
> >>
> >> The virtio-iommu spec currently says:
> >>
> >> "If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses from
> >> unattached endpoints are
> >> allowed and translated by the IOMMU using the identity function. If the
> >> feature is not negotiated, any
> >> memory access from an unattached endpoint fails. Upon attaching an
> >> endpoint in bypass mode to a new
> >> domain, any memory access from the endpoint fails, since the domain does
> >> not contain any mapping.
> >> "
> >>
> >> I guess this can serve the purpose of devices doing early accesses,
> >> before the guest OS gets the hand and maps them?
> > 
> > OK, so there's no global enablement knob for virtio-iommu? Hmm... Then:

There is at the virtio transport level: the driver sets status to
FEATURES_OK once it accepted the feature bits, and to DRIVER_OK once its
fully operational. The virtio-iommu spec says:

  If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
  device SHOULD NOT let endpoints access the guest-physical address space.

So before features negotiation, there is no access. Afterwards it depends
if the VIRTIO_IOMMU_F_BYPASS has been accepted by the driver.

> well this is a global knob. If this is bot negotiated any unmapped
> device can PT.
> 
> My assumption above must be wrong as this is a negotiated feature so
> anyway the virtio-iommu driver should be involved.
> 
> I don't really remember the rationale of the feature bit tbh.

I don't remember writing down a rationale for this bit, it was in the very
first version (I think someone suggested it during the initial internal
discussion) and I didn't remove it afterwards because it seems useful:

Say a guest only wants to use the vIOMMU for userspace assignment and
wants all other endpoints to bypass translation, which is our primary
use-case. In other words booting Linux with iommu.passthrough=1. It can
either create an identity domain for each endpoint (one MAP request with
VA==PA) or it can set the VIRTIO_IOMMU_F_BYPASS bit. The device-side
implementation should be more efficient with the latter, since you don't
need to lookup the domain + address space for each access.

> In "[virtio-dev] RE: [RFC] virtio-iommu version 0.4 " Jean discussed
> that with Kevein. Sorry I cannot find the link.
> 
> " If the endpoint is not attached to any address space,
> then the device MAY abort the transaction."

Hmm, that was regarding a "bypass" reserved memory region, which isn't in
the current spec.

> Kevin> From definition of BYPASS, it's orthogonal to whether there is an
> address space attached, then should we still allow "May abort" behavior?
> 
> Jean> The behavior is left as an implementation choice, and I'm not sure
> it's worth enforcing in the architecture. If the endpoint isn't attached
> to any domain then (unless VIRTIO_IOMMU_F_BYPASS is negotiated), it
> isn't necessarily able to do DMA at all. The virtio-iommu device may
> setup DMA mastering lazily, in which case any DMA transaction would
> abort, or have setup DMA already, in which case the endpoint can access
> MEM_T_BYPASS regions.
> 
> Hopefully Jean will remember and comment on this.
> 
> Thanks
> 
> Eric
> 
> > 
> >   - This flag is a must for all virtio-iommu emulation, right?
> >     (otherwise I can't see how system bootstraps..)

What do you mean by system bootstrap?

One thing I've been wondering, and may be related, is how to handle a
bootloader that wants to read for example an initrd from a virtio-block
device that's behind the IOMMU. Either we allow the device to let any DMA
bypass the device until FEATURES_OK, which is a source of vulnerabilities
[1], or we have to implement some support for the virtio-iommu in the
BIOS. Again the F_BYPASS bit would help for this, since all the BIOS has
to do is set it on boot. However, F_BYPASS is optional, and more complex
support is needed for setting up identity mappings.

[1] See "IOMMU protection against I/O attacks: a vulnerability and a proof
of concept" by Morgan et al, where a malicious device bypassing the IOMMU
overwrites the IOMMU configuration as it is being created by the OS.
Arguably we're not too concerned about malicious devices at the moment,
but I'm not comfortable relaxing this.

> >   - Should this flag be gone right after OS starts (otherwise I think
> >     we still have the issue that any malicious device can be seen as
> >     in PT mode as default)?  How is that done?

Yes bypass mode assumes that devices and drivers aren't malicious, and the
IOMMU is only used for things like assigning devices to guest userspace,
or having large contiguous DMA buffers.

Thanks,
Jean
Peter Xu Dec. 20, 2019, 4:51 p.m. UTC | #9
On Fri, Dec 20, 2019 at 05:26:42PM +0100, Jean-Philippe Brucker wrote:
> There is at the virtio transport level: the driver sets status to
> FEATURES_OK once it accepted the feature bits, and to DRIVER_OK once its
> fully operational. The virtio-iommu spec says:
> 
>   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
>   device SHOULD NOT let endpoints access the guest-physical address space.
> 
> So before features negotiation, there is no access. Afterwards it depends
> if the VIRTIO_IOMMU_F_BYPASS has been accepted by the driver.

Before enabling virtio-iommu device, should we still let the devices
to access the whole system address space?  I believe that's at least
what Intel IOMMUs are doing.  From code-wise, its:

    if (likely(s->dmar_enabled)) {
        success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
                                         addr, flag & IOMMU_WO, &iotlb);
    } else {
        /* DMAR disabled, passthrough, use 4k-page*/
        iotlb.iova = addr & VTD_PAGE_MASK_4K;
        iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
        iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
        iotlb.perm = IOMMU_RW;
        success = true;
    }

From hardware-wise, an IOMMU should be close to transparent if you
never enable it, imho.

Otherwise I'm confused on how a guest (with virtio-iommu) could boot
with a normal BIOS that does not contain a virtio-iommu driver.  For
example, what if the BIOS needs to read some block sectors (as you
mentioned)?

> > >   - This flag is a must for all virtio-iommu emulation, right?
> > >     (otherwise I can't see how system bootstraps..)
> 
> What do you mean by system bootstrap?

Sorry, I meant when the system boots before the OS.

> 
> One thing I've been wondering, and may be related, is how to handle a
> bootloader that wants to read for example an initrd from a virtio-block
> device that's behind the IOMMU.

My understanding is that virtio devices are special in that they can
use the VIRTIO_F_IOMMU_PLATFORM flag to bypass any vIOMMU (though, I
don't think that'll work when virtio hardwares comes to the
world.. because they can't really bypass the IOMMU hardware).

> Either we allow the device to let any DMA
> bypass the device until FEATURES_OK, which is a source of vulnerabilities
> [1], or we have to implement some support for the virtio-iommu in the
> BIOS. Again the F_BYPASS bit would help for this, since all the BIOS has
> to do is set it on boot. However, F_BYPASS is optional, and more complex
> support is needed for setting up identity mappings.
> 
> [1] See "IOMMU protection against I/O attacks: a vulnerability and a proof
> of concept" by Morgan et al, where a malicious device bypassing the IOMMU
> overwrites the IOMMU configuration as it is being created by the OS.
> Arguably we're not too concerned about malicious devices at the moment,
> but I'm not comfortable relaxing this.
> 
> > >   - Should this flag be gone right after OS starts (otherwise I think
> > >     we still have the issue that any malicious device can be seen as
> > >     in PT mode as default)?  How is that done?
> 
> Yes bypass mode assumes that devices and drivers aren't malicious, and the
> IOMMU is only used for things like assigning devices to guest userspace,
> or having large contiguous DMA buffers.

Yes I agree.  However again when the BYPASS flag was introduced, have
you thought of introducing that flag per-device?  IMHO that could be
better because you have a finer granularity on controlling all these,
so you'll be able to reject malicious devices but at the meantime
grant permission to trusted devices.

Thanks,
Jean-Philippe Brucker Jan. 6, 2020, 5:06 p.m. UTC | #10
On Fri, Dec 20, 2019 at 11:51:00AM -0500, Peter Xu wrote:
> On Fri, Dec 20, 2019 at 05:26:42PM +0100, Jean-Philippe Brucker wrote:
> > There is at the virtio transport level: the driver sets status to
> > FEATURES_OK once it accepted the feature bits, and to DRIVER_OK once its
> > fully operational. The virtio-iommu spec says:
> > 
> >   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
> >   device SHOULD NOT let endpoints access the guest-physical address space.
> > 
> > So before features negotiation, there is no access. Afterwards it depends
> > if the VIRTIO_IOMMU_F_BYPASS has been accepted by the driver.
> 
> Before enabling virtio-iommu device, should we still let the devices
> to access the whole system address space?  I believe that's at least
> what Intel IOMMUs are doing.  From code-wise, its:
> 
>     if (likely(s->dmar_enabled)) {
>         success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
>                                          addr, flag & IOMMU_WO, &iotlb);
>     } else {
>         /* DMAR disabled, passthrough, use 4k-page*/
>         iotlb.iova = addr & VTD_PAGE_MASK_4K;
>         iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
>         iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
>         iotlb.perm = IOMMU_RW;
>         success = true;
>     }
> 
> From hardware-wise, an IOMMU should be close to transparent if you
> never enable it, imho.

For hardware that's not necessarily the best choice. As cited in my
previous reply it has been shown to introduce vulnerabilities since
malicious devices can DMA during boot, before the OS takes control of the
IOMMU. The Arm SMMU allows an implementation to adopt a deny policy by
default.

> Otherwise I'm confused on how a guest (with virtio-iommu) could boot
> with a normal BIOS that does not contain a virtio-iommu driver.  For
> example, what if the BIOS needs to read some block sectors (as you
> mentioned)?

Ideally we should aim at supporting the device in both the BIOS and the
OS. Failing that, there should at least be a way to instantiate a
virtio-iommu device that is blocking by default, that cannot be bypassed
unless the controlling software decides to allow it. Could the bypass
policy be a command-line option to the virtio-iommu device?

[...]
> > Yes bypass mode assumes that devices and drivers aren't malicious, and the
> > IOMMU is only used for things like assigning devices to guest userspace,
> > or having large contiguous DMA buffers.
> 
> Yes I agree.  However again when the BYPASS flag was introduced, have
> you thought of introducing that flag per-device?  IMHO that could be
> better because you have a finer granularity on controlling all these,
> so you'll be able to reject malicious devices but at the meantime
> grant permission to trusted devices.

At the moment that per-device behavior can be emulated by sending an
ATTACH request followed by identity MAP. It could be a little more
efficient to add a "bypass" flag to the ATTACH request and avoid setting
up the identity mapping manually, since the device then wouldn't need to
look up the mapping on translation, but I don't know how much it would
improve performance. The device could also cache the fact that the address
space is identity-mapped, for the same result. The domain lookup has to
happen in any case, so you can never get the full iommu-free performance
with these mechanisms.

Thanks,
Jean
Peter Xu Jan. 6, 2020, 5:58 p.m. UTC | #11
On Mon, Jan 06, 2020 at 06:06:34PM +0100, Jean-Philippe Brucker wrote:
> On Fri, Dec 20, 2019 at 11:51:00AM -0500, Peter Xu wrote:
> > On Fri, Dec 20, 2019 at 05:26:42PM +0100, Jean-Philippe Brucker wrote:
> > > There is at the virtio transport level: the driver sets status to
> > > FEATURES_OK once it accepted the feature bits, and to DRIVER_OK once its
> > > fully operational. The virtio-iommu spec says:
> > > 
> > >   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
> > >   device SHOULD NOT let endpoints access the guest-physical address space.
> > > 
> > > So before features negotiation, there is no access. Afterwards it depends
> > > if the VIRTIO_IOMMU_F_BYPASS has been accepted by the driver.
> > 
> > Before enabling virtio-iommu device, should we still let the devices
> > to access the whole system address space?  I believe that's at least
> > what Intel IOMMUs are doing.  From code-wise, its:
> > 
> >     if (likely(s->dmar_enabled)) {
> >         success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
> >                                          addr, flag & IOMMU_WO, &iotlb);
> >     } else {
> >         /* DMAR disabled, passthrough, use 4k-page*/
> >         iotlb.iova = addr & VTD_PAGE_MASK_4K;
> >         iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
> >         iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
> >         iotlb.perm = IOMMU_RW;
> >         success = true;
> >     }
> > 
> > From hardware-wise, an IOMMU should be close to transparent if you
> > never enable it, imho.
> 
> For hardware that's not necessarily the best choice. As cited in my
> previous reply it has been shown to introduce vulnerabilities since
> malicious devices can DMA during boot, before the OS takes control of the
> IOMMU. The Arm SMMU allows an implementation to adopt a deny policy by
> default.

I see.  But then how to read a sector from the block to at least boot
an OS if we use a default-deny policy?  Does it still need a mapping
that is established somehow by someone before hand?

> 
> > Otherwise I'm confused on how a guest (with virtio-iommu) could boot
> > with a normal BIOS that does not contain a virtio-iommu driver.  For
> > example, what if the BIOS needs to read some block sectors (as you
> > mentioned)?
> 
> Ideally we should aim at supporting the device in both the BIOS and the
> OS. Failing that, there should at least be a way to instantiate a
> virtio-iommu device that is blocking by default, that cannot be bypassed
> unless the controlling software decides to allow it. Could the bypass
> policy be a command-line option to the virtio-iommu device?
> 
> [...]
> > > Yes bypass mode assumes that devices and drivers aren't malicious, and the
> > > IOMMU is only used for things like assigning devices to guest userspace,
> > > or having large contiguous DMA buffers.
> > 
> > Yes I agree.  However again when the BYPASS flag was introduced, have
> > you thought of introducing that flag per-device?  IMHO that could be
> > better because you have a finer granularity on controlling all these,
> > so you'll be able to reject malicious devices but at the meantime
> > grant permission to trusted devices.
> 
> At the moment that per-device behavior can be emulated by sending an
> ATTACH request followed by identity MAP. It could be a little more
> efficient to add a "bypass" flag to the ATTACH request and avoid setting
> up the identity mapping manually, since the device then wouldn't need to
> look up the mapping on translation, but I don't know how much it would
> improve performance. The device could also cache the fact that the address
> space is identity-mapped, for the same result. The domain lookup has to
> happen in any case, so you can never get the full iommu-free performance
> with these mechanisms.

IMHO it's really a matter of whether virtio-iommu wants to have a
device layer besides the domain layer for the initial versions (just
like VT-d has a device context, then it points to a domain, so it has
these two layers).  But I agree for the bypass feature it should work
(though trying to detect "a device is put into an identital domain is
bypassed" is still a bit tricky to me).  And after all virtio-iommu is
always extensible when needs come.

Thanks,
Jean-Philippe Brucker Jan. 7, 2020, 10:10 a.m. UTC | #12
On Mon, Jan 06, 2020 at 12:58:50PM -0500, Peter Xu wrote:
> On Mon, Jan 06, 2020 at 06:06:34PM +0100, Jean-Philippe Brucker wrote:
> > On Fri, Dec 20, 2019 at 11:51:00AM -0500, Peter Xu wrote:
> > > On Fri, Dec 20, 2019 at 05:26:42PM +0100, Jean-Philippe Brucker wrote:
> > > > There is at the virtio transport level: the driver sets status to
> > > > FEATURES_OK once it accepted the feature bits, and to DRIVER_OK once its
> > > > fully operational. The virtio-iommu spec says:
> > > > 
> > > >   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
> > > >   device SHOULD NOT let endpoints access the guest-physical address space.
> > > > 
> > > > So before features negotiation, there is no access. Afterwards it depends
> > > > if the VIRTIO_IOMMU_F_BYPASS has been accepted by the driver.
> > > 
> > > Before enabling virtio-iommu device, should we still let the devices
> > > to access the whole system address space?  I believe that's at least
> > > what Intel IOMMUs are doing.  From code-wise, its:
> > > 
> > >     if (likely(s->dmar_enabled)) {
> > >         success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
> > >                                          addr, flag & IOMMU_WO, &iotlb);
> > >     } else {
> > >         /* DMAR disabled, passthrough, use 4k-page*/
> > >         iotlb.iova = addr & VTD_PAGE_MASK_4K;
> > >         iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
> > >         iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
> > >         iotlb.perm = IOMMU_RW;
> > >         success = true;
> > >     }
> > > 
> > > From hardware-wise, an IOMMU should be close to transparent if you
> > > never enable it, imho.
> > 
> > For hardware that's not necessarily the best choice. As cited in my
> > previous reply it has been shown to introduce vulnerabilities since
> > malicious devices can DMA during boot, before the OS takes control of the
> > IOMMU. The Arm SMMU allows an implementation to adopt a deny policy by
> > default.
> 
> I see.  But then how to read a sector from the block to at least boot
> an OS if we use a default-deny policy?  Does it still need a mapping
> that is established somehow by someone before hand?

Yes, it looks like EDK II uses IOMMU operations in order to access those
devices on platforms where the IOMMU isn't default-bypass (AMD SEV support
is provided by edk2, and a VT-d driver seems provided by edk2-platforms).
However for OVMF we could just set the bypass feature bit in virtio-iommu
device, which doesn't even requires setting up the virtqueue.

I'm missing a piece of the puzzle for Arm platforms though, because it
looks like Trusted Firmware-A sets up the default-deny policy on reset
even when it wasn't hardwired, but doesn't provide a service to create
SMMUv3 mappings for the bootloader.

Thanks,
Jean
Eric Auger Jan. 8, 2020, 4:55 p.m. UTC | #13
Hi Jean-Philippe, Peter,

On 1/7/20 11:10 AM, Jean-Philippe Brucker wrote:
> On Mon, Jan 06, 2020 at 12:58:50PM -0500, Peter Xu wrote:
>> On Mon, Jan 06, 2020 at 06:06:34PM +0100, Jean-Philippe Brucker wrote:
>>> On Fri, Dec 20, 2019 at 11:51:00AM -0500, Peter Xu wrote:
>>>> On Fri, Dec 20, 2019 at 05:26:42PM +0100, Jean-Philippe Brucker wrote:
>>>>> There is at the virtio transport level: the driver sets status to
>>>>> FEATURES_OK once it accepted the feature bits, and to DRIVER_OK once its
>>>>> fully operational. The virtio-iommu spec says:
>>>>>
>>>>>   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
>>>>>   device SHOULD NOT let endpoints access the guest-physical address space.
>>>>>
>>>>> So before features negotiation, there is no access. Afterwards it depends
>>>>> if the VIRTIO_IOMMU_F_BYPASS has been accepted by the driver.
>>>>
>>>> Before enabling virtio-iommu device, should we still let the devices
>>>> to access the whole system address space?  I believe that's at least
>>>> what Intel IOMMUs are doing.  From code-wise, its:
>>>>
>>>>     if (likely(s->dmar_enabled)) {
>>>>         success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
>>>>                                          addr, flag & IOMMU_WO, &iotlb);
>>>>     } else {
>>>>         /* DMAR disabled, passthrough, use 4k-page*/
>>>>         iotlb.iova = addr & VTD_PAGE_MASK_4K;
>>>>         iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
>>>>         iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
>>>>         iotlb.perm = IOMMU_RW;
>>>>         success = true;
>>>>     }
>>>>
>>>> From hardware-wise, an IOMMU should be close to transparent if you
>>>> never enable it, imho.
>>>
>>> For hardware that's not necessarily the best choice. As cited in my
>>> previous reply it has been shown to introduce vulnerabilities since
>>> malicious devices can DMA during boot, before the OS takes control of the
>>> IOMMU. The Arm SMMU allows an implementation to adopt a deny policy by
>>> default.
>>
>> I see.  But then how to read a sector from the block to at least boot
>> an OS if we use a default-deny policy?  Does it still need a mapping
>> that is established somehow by someone before hand?
> 
> Yes, it looks like EDK II uses IOMMU operations in order to access those
> devices on platforms where the IOMMU isn't default-bypass (AMD SEV support
> is provided by edk2, and a VT-d driver seems provided by edk2-platforms).
> However for OVMF we could just set the bypass feature bit in virtio-iommu
> device, which doesn't even requires setting up the virtqueue.
> 
> I'm missing a piece of the puzzle for Arm platforms though, because it
> looks like Trusted Firmware-A sets up the default-deny policy on reset
> even when it wasn't hardwired, but doesn't provide a service to create
> SMMUv3 mappings for the bootloader.
> 
> Thanks,
> Jean
> 

I think we have a concrete example for the above discussion. The AHCI.
When running the virtio-iommu on x86 I get messages like:

virtio_iommu_translate sid=250 is not known!!
no buffer available in event queue to report event

and a bunch of "AHCI: Failed to start FIS receive engine: bad FIS
receive buffer address" messages (For each port)

This was reported in my cover letter (*). This happens very early in the
boot process, before the OS get the hand and before the virtio-iommu
driver creates any mapping. It does not prevent the guest from booting
though.

Currently the virtio-iommu device checks the VIRTIO_IOMMU_F_BYPASS. If I
overwrite it to true in the device, then, the guest boots without those
messages.

I share Peter's concern about having a different default policy than x86.

Thanks

Eric

Note the migration issue reported in the cover letter is fixed now and
was due to the migration priority unset.
Jean-Philippe Brucker Jan. 9, 2020, 8:47 a.m. UTC | #14
On Wed, Jan 08, 2020 at 05:55:52PM +0100, Auger Eric wrote:
> Hi Jean-Philippe, Peter,
> 
> On 1/7/20 11:10 AM, Jean-Philippe Brucker wrote:
> > On Mon, Jan 06, 2020 at 12:58:50PM -0500, Peter Xu wrote:
> >> On Mon, Jan 06, 2020 at 06:06:34PM +0100, Jean-Philippe Brucker wrote:
> >>> On Fri, Dec 20, 2019 at 11:51:00AM -0500, Peter Xu wrote:
> >>>> On Fri, Dec 20, 2019 at 05:26:42PM +0100, Jean-Philippe Brucker wrote:
> >>>>> There is at the virtio transport level: the driver sets status to
> >>>>> FEATURES_OK once it accepted the feature bits, and to DRIVER_OK once its
> >>>>> fully operational. The virtio-iommu spec says:
> >>>>>
> >>>>>   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
> >>>>>   device SHOULD NOT let endpoints access the guest-physical address space.
> >>>>>
> >>>>> So before features negotiation, there is no access. Afterwards it depends
> >>>>> if the VIRTIO_IOMMU_F_BYPASS has been accepted by the driver.
> >>>>
> >>>> Before enabling virtio-iommu device, should we still let the devices
> >>>> to access the whole system address space?  I believe that's at least
> >>>> what Intel IOMMUs are doing.  From code-wise, its:
> >>>>
> >>>>     if (likely(s->dmar_enabled)) {
> >>>>         success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
> >>>>                                          addr, flag & IOMMU_WO, &iotlb);
> >>>>     } else {
> >>>>         /* DMAR disabled, passthrough, use 4k-page*/
> >>>>         iotlb.iova = addr & VTD_PAGE_MASK_4K;
> >>>>         iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
> >>>>         iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
> >>>>         iotlb.perm = IOMMU_RW;
> >>>>         success = true;
> >>>>     }
> >>>>
> >>>> From hardware-wise, an IOMMU should be close to transparent if you
> >>>> never enable it, imho.
> >>>
> >>> For hardware that's not necessarily the best choice. As cited in my
> >>> previous reply it has been shown to introduce vulnerabilities since
> >>> malicious devices can DMA during boot, before the OS takes control of the
> >>> IOMMU. The Arm SMMU allows an implementation to adopt a deny policy by
> >>> default.
> >>
> >> I see.  But then how to read a sector from the block to at least boot
> >> an OS if we use a default-deny policy?  Does it still need a mapping
> >> that is established somehow by someone before hand?
> > 
> > Yes, it looks like EDK II uses IOMMU operations in order to access those
> > devices on platforms where the IOMMU isn't default-bypass (AMD SEV support
> > is provided by edk2, and a VT-d driver seems provided by edk2-platforms).
> > However for OVMF we could just set the bypass feature bit in virtio-iommu
> > device, which doesn't even requires setting up the virtqueue.
> > 
> > I'm missing a piece of the puzzle for Arm platforms though, because it
> > looks like Trusted Firmware-A sets up the default-deny policy on reset
> > even when it wasn't hardwired, but doesn't provide a service to create
> > SMMUv3 mappings for the bootloader.
> > 
> > Thanks,
> > Jean
> > 
> 
> I think we have a concrete example for the above discussion. The AHCI.
> When running the virtio-iommu on x86 I get messages like:
> 
> virtio_iommu_translate sid=250 is not known!!
> no buffer available in event queue to report event
> 
> and a bunch of "AHCI: Failed to start FIS receive engine: bad FIS
> receive buffer address" messages (For each port)
> 
> This was reported in my cover letter (*). This happens very early in the
> boot process, before the OS get the hand and before the virtio-iommu
> driver creates any mapping. It does not prevent the guest from booting
> though.
> 
> Currently the virtio-iommu device checks the VIRTIO_IOMMU_F_BYPASS. If I
> overwrite it to true in the device, then, the guest boots without those
> messages.

Oh that's good, I was afraid it was an issue in Linux.

> I share Peter's concern about having a different default policy than x86.

Yes I'd say just align with whatever policy is already in place. Do you
think we could add a command-line option to let people disable
default-bypass, though?  That would be a convenient way to make the IOMMU
protection airtight for those who need it.

Thanks,
Jean
Eric Auger Jan. 9, 2020, 8:58 a.m. UTC | #15
Hi Jean,

On 1/9/20 9:47 AM, Jean-Philippe Brucker wrote:
> On Wed, Jan 08, 2020 at 05:55:52PM +0100, Auger Eric wrote:
>> Hi Jean-Philippe, Peter,
>>
>> On 1/7/20 11:10 AM, Jean-Philippe Brucker wrote:
>>> On Mon, Jan 06, 2020 at 12:58:50PM -0500, Peter Xu wrote:
>>>> On Mon, Jan 06, 2020 at 06:06:34PM +0100, Jean-Philippe Brucker wrote:
>>>>> On Fri, Dec 20, 2019 at 11:51:00AM -0500, Peter Xu wrote:
>>>>>> On Fri, Dec 20, 2019 at 05:26:42PM +0100, Jean-Philippe Brucker wrote:
>>>>>>> There is at the virtio transport level: the driver sets status to
>>>>>>> FEATURES_OK once it accepted the feature bits, and to DRIVER_OK once its
>>>>>>> fully operational. The virtio-iommu spec says:
>>>>>>>
>>>>>>>   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
>>>>>>>   device SHOULD NOT let endpoints access the guest-physical address space.
>>>>>>>
>>>>>>> So before features negotiation, there is no access. Afterwards it depends
>>>>>>> if the VIRTIO_IOMMU_F_BYPASS has been accepted by the driver.
>>>>>>
>>>>>> Before enabling virtio-iommu device, should we still let the devices
>>>>>> to access the whole system address space?  I believe that's at least
>>>>>> what Intel IOMMUs are doing.  From code-wise, its:
>>>>>>
>>>>>>     if (likely(s->dmar_enabled)) {
>>>>>>         success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
>>>>>>                                          addr, flag & IOMMU_WO, &iotlb);
>>>>>>     } else {
>>>>>>         /* DMAR disabled, passthrough, use 4k-page*/
>>>>>>         iotlb.iova = addr & VTD_PAGE_MASK_4K;
>>>>>>         iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
>>>>>>         iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
>>>>>>         iotlb.perm = IOMMU_RW;
>>>>>>         success = true;
>>>>>>     }
>>>>>>
>>>>>> From hardware-wise, an IOMMU should be close to transparent if you
>>>>>> never enable it, imho.
>>>>>
>>>>> For hardware that's not necessarily the best choice. As cited in my
>>>>> previous reply it has been shown to introduce vulnerabilities since
>>>>> malicious devices can DMA during boot, before the OS takes control of the
>>>>> IOMMU. The Arm SMMU allows an implementation to adopt a deny policy by
>>>>> default.
>>>>
>>>> I see.  But then how to read a sector from the block to at least boot
>>>> an OS if we use a default-deny policy?  Does it still need a mapping
>>>> that is established somehow by someone before hand?
>>>
>>> Yes, it looks like EDK II uses IOMMU operations in order to access those
>>> devices on platforms where the IOMMU isn't default-bypass (AMD SEV support
>>> is provided by edk2, and a VT-d driver seems provided by edk2-platforms).
>>> However for OVMF we could just set the bypass feature bit in virtio-iommu
>>> device, which doesn't even requires setting up the virtqueue.
>>>
>>> I'm missing a piece of the puzzle for Arm platforms though, because it
>>> looks like Trusted Firmware-A sets up the default-deny policy on reset
>>> even when it wasn't hardwired, but doesn't provide a service to create
>>> SMMUv3 mappings for the bootloader.
>>>
>>> Thanks,
>>> Jean
>>>
>>
>> I think we have a concrete example for the above discussion. The AHCI.
>> When running the virtio-iommu on x86 I get messages like:
>>
>> virtio_iommu_translate sid=250 is not known!!
>> no buffer available in event queue to report event
>>
>> and a bunch of "AHCI: Failed to start FIS receive engine: bad FIS
>> receive buffer address" messages (For each port)
>>
>> This was reported in my cover letter (*). This happens very early in the
>> boot process, before the OS get the hand and before the virtio-iommu
>> driver creates any mapping. It does not prevent the guest from booting
>> though.
>>
>> Currently the virtio-iommu device checks the VIRTIO_IOMMU_F_BYPASS. If I
>> overwrite it to true in the device, then, the guest boots without those
>> messages.
> 
> Oh that's good, I was afraid it was an issue in Linux.
> 
>> I share Peter's concern about having a different default policy than x86.
> 
> Yes I'd say just align with whatever policy is already in place. Do you
> think we could add a command-line option to let people disable
> default-bypass, though?  That would be a convenient way to make the IOMMU
> protection airtight for those who need it.
Yes I could easily add a device option to disable the default bypass.

Shall we change the meaning of the F_BYPASS feature then? If exposed by
the device, the device does bypass by default, otherwise it doesn't.
This would be controlled by the device option.

The driver then could have means to overwrite this behavior once loaded?

Thanks

Eric
> 
> Thanks,
> Jean
>
Jean-Philippe Brucker Jan. 9, 2020, 10:40 a.m. UTC | #16
On Thu, Jan 09, 2020 at 09:58:49AM +0100, Auger Eric wrote:
> >> I share Peter's concern about having a different default policy than x86.
> > 
> > Yes I'd say just align with whatever policy is already in place. Do you
> > think we could add a command-line option to let people disable
> > default-bypass, though?  That would be a convenient way to make the IOMMU
> > protection airtight for those who need it.
> Yes I could easily add a device option to disable the default bypass.
> 
> Shall we change the meaning of the F_BYPASS feature then? If exposed by
> the device, the device does bypass by default, otherwise it doesn't.
> This would be controlled by the device option.

For a device that doesn't do bypass by default, the driver wouldn't have
the ability to enable bypass (feature bit not offered, not negotiable).

> The driver then could have means to overwrite this behavior once loaded?

Let's keep the bypass feature bit for this. If the bit is offered, the
driver chooses to enable or disable it. If the bit is not offered or not
negotiated, then the behavior is deny. If the bit is offered and
negotiated then the behavior is allow.

We can say that before features negotiation (latched at features register
write, I think, in practice?) the behavior is platform dependent. The
current wording about bypass intends to discourage unsafe choices but
makes a strong statement only about the device behavior after features
negotiation. 

We could add a second feature bit specifically for the boot bypass
behavior. It wouldn't be useful to the OS (the driver doesn't have a
choice) but could present a bit in config space that allows a firmware to
disable boot-bypass in a way that is sticky across reset. So when the OS
resets the device after taking it over, it doesn't accidentally enable
bypass. I wouldn't bother though. If a FW/bootloader is able to support
virtio-iommu, the user might as well instantiate the device with the
default-deny option.

Thanks,
Jean
Eric Auger Jan. 9, 2020, 11:01 a.m. UTC | #17
Hi,

On 1/9/20 11:40 AM, Jean-Philippe Brucker wrote:
> On Thu, Jan 09, 2020 at 09:58:49AM +0100, Auger Eric wrote:
>>>> I share Peter's concern about having a different default policy than x86.
>>>
>>> Yes I'd say just align with whatever policy is already in place. Do you
>>> think we could add a command-line option to let people disable
>>> default-bypass, though?  That would be a convenient way to make the IOMMU
>>> protection airtight for those who need it.
>> Yes I could easily add a device option to disable the default bypass.
>>
>> Shall we change the meaning of the F_BYPASS feature then? If exposed by
>> the device, the device does bypass by default, otherwise it doesn't.
>> This would be controlled by the device option.
> 
> For a device that doesn't do bypass by default, the driver wouldn't have
> the ability to enable bypass (feature bit not offered, not negotiable).
> 
>> The driver then could have means to overwrite this behavior once loaded?
> 
> Let's keep the bypass feature bit for this. If the bit is offered, the
> driver chooses to enable or disable it. If the bit is not offered or not
> negotiated, then the behavior is deny. If the bit is offered and
> negotiated then the behavior is allow.
> 
> We can say that before features negotiation (latched at features register
> write, I think, in practice?) the behavior is platform dependent. The
> current wording about bypass intends to discourage unsafe choices but
> makes a strong statement only about the device behavior after features
> negotiation. 
OK. May be worth adding in the spec later.

By the way what is the plan for the vote?

Thanks

Eric
> 
> We could add a second feature bit specifically for the boot bypass
> behavior. It wouldn't be useful to the OS (the driver doesn't have a
> choice) but could present a bit in config space that allows a firmware to
> disable boot-bypass in a way that is sticky across reset. So when the OS
> resets the device after taking it over, it doesn't accidentally enable
> bypass. I wouldn't bother though. If a FW/bootloader is able to support
> virtio-iommu, the user might as well instantiate the device with the
> default-deny option.
> 
> Thanks,
> Jean
> 
>
Jean-Philippe Brucker Jan. 9, 2020, 11:15 a.m. UTC | #18
On Thu, Jan 09, 2020 at 12:01:26PM +0100, Auger Eric wrote:
> Hi,
> 
> On 1/9/20 11:40 AM, Jean-Philippe Brucker wrote:
> > On Thu, Jan 09, 2020 at 09:58:49AM +0100, Auger Eric wrote:
> >>>> I share Peter's concern about having a different default policy than x86.
> >>>
> >>> Yes I'd say just align with whatever policy is already in place. Do you
> >>> think we could add a command-line option to let people disable
> >>> default-bypass, though?  That would be a convenient way to make the IOMMU
> >>> protection airtight for those who need it.
> >> Yes I could easily add a device option to disable the default bypass.
> >>
> >> Shall we change the meaning of the F_BYPASS feature then? If exposed by
> >> the device, the device does bypass by default, otherwise it doesn't.
> >> This would be controlled by the device option.
> > 
> > For a device that doesn't do bypass by default, the driver wouldn't have
> > the ability to enable bypass (feature bit not offered, not negotiable).
> > 
> >> The driver then could have means to overwrite this behavior once loaded?
> > 
> > Let's keep the bypass feature bit for this. If the bit is offered, the
> > driver chooses to enable or disable it. If the bit is not offered or not
> > negotiated, then the behavior is deny. If the bit is offered and
> > negotiated then the behavior is allow.
> > 
> > We can say that before features negotiation (latched at features register
> > write, I think, in practice?) the behavior is platform dependent. The
> > current wording about bypass intends to discourage unsafe choices but
> > makes a strong statement only about the device behavior after features
> > negotiation. 
> OK. May be worth adding in the spec later.
> 
> By the way what is the plan for the vote?

The ballot closed and the spec is accepted for virtio-v1.2-cs01, with the
condition that the stale statement about padding is removed
(https://lists.oasis-open.org/archives/virtio-dev/201911/msg00083.html)

Thanks,
Jean
Eric Auger Jan. 9, 2020, 11:32 a.m. UTC | #19
Hi Jean,

On 1/9/20 12:15 PM, Jean-Philippe Brucker wrote:
> On Thu, Jan 09, 2020 at 12:01:26PM +0100, Auger Eric wrote:
>> Hi,
>>
>> On 1/9/20 11:40 AM, Jean-Philippe Brucker wrote:
>>> On Thu, Jan 09, 2020 at 09:58:49AM +0100, Auger Eric wrote:
>>>>>> I share Peter's concern about having a different default policy than x86.
>>>>>
>>>>> Yes I'd say just align with whatever policy is already in place. Do you
>>>>> think we could add a command-line option to let people disable
>>>>> default-bypass, though?  That would be a convenient way to make the IOMMU
>>>>> protection airtight for those who need it.
>>>> Yes I could easily add a device option to disable the default bypass.
>>>>
>>>> Shall we change the meaning of the F_BYPASS feature then? If exposed by
>>>> the device, the device does bypass by default, otherwise it doesn't.
>>>> This would be controlled by the device option.
>>>
>>> For a device that doesn't do bypass by default, the driver wouldn't have
>>> the ability to enable bypass (feature bit not offered, not negotiable).
>>>
>>>> The driver then could have means to overwrite this behavior once loaded?
>>>
>>> Let's keep the bypass feature bit for this. If the bit is offered, the
>>> driver chooses to enable or disable it. If the bit is not offered or not
>>> negotiated, then the behavior is deny. If the bit is offered and
>>> negotiated then the behavior is allow.
>>>
>>> We can say that before features negotiation (latched at features register
>>> write, I think, in practice?) the behavior is platform dependent. The
>>> current wording about bypass intends to discourage unsafe choices but
>>> makes a strong statement only about the device behavior after features
>>> negotiation. 
>> OK. May be worth adding in the spec later.
>>
>> By the way what is the plan for the vote?
> 
> The ballot closed and the spec is accepted for virtio-v1.2-cs01, with the
> condition that the stale statement about padding is removed
> (https://lists.oasis-open.org/archives/virtio-dev/201911/msg00083.html)

Ah OK. Sorry I missed the outcome. Congratulations!

Eric
> 
> Thanks,
> Jean
>
diff mbox series

Patch

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index f25359cee2..de7cbb3c8f 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -72,3 +72,4 @@  virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
 virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
 virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
 virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
+virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index f0a56833a2..a83666557b 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -412,19 +412,80 @@  static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
                                             int iommu_idx)
 {
     IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+    viommu_interval interval, *mapping_key;
+    viommu_mapping *mapping_value;
+    VirtIOIOMMU *s = sdev->viommu;
+    viommu_endpoint *ep;
+    bool bypass_allowed;
     uint32_t sid;
+    bool found;
+
+    interval.low = addr;
+    interval.high = addr + 1;
 
     IOMMUTLBEntry entry = {
         .target_as = &address_space_memory,
         .iova = addr,
         .translated_addr = addr,
-        .addr_mask = ~(hwaddr)0,
+        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
         .perm = IOMMU_NONE,
     };
 
+    bypass_allowed = virtio_has_feature(s->acked_features,
+                                        VIRTIO_IOMMU_F_BYPASS);
+
     sid = virtio_iommu_get_sid(sdev);
 
     trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
+    qemu_mutex_lock(&s->mutex);
+
+    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
+    if (!ep) {
+        if (!bypass_allowed) {
+            error_report_once("%s sid=%d is not known!!", __func__, sid);
+        } else {
+            entry.perm = flag;
+        }
+        goto unlock;
+    }
+
+    if (!ep->domain) {
+        if (!bypass_allowed) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s %02x:%02x.%01x not attached to any domain\n",
+                          __func__, PCI_BUS_NUM(sid),
+                          PCI_SLOT(sid), PCI_FUNC(sid));
+        } else {
+            entry.perm = flag;
+        }
+        goto unlock;
+    }
+
+    found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
+                                   (void **)&mapping_key,
+                                   (void **)&mapping_value);
+    if (!found) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s no mapping for 0x%"PRIx64" for sid=%d\n",
+                      __func__, addr, sid);
+        goto unlock;
+    }
+
+    if (((flag & IOMMU_RO) &&
+            !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ)) ||
+        ((flag & IOMMU_WO) &&
+            !(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE))) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Permission error on 0x%"PRIx64"(%d): allowed=%d\n",
+                      addr, flag, mapping_value->flags);
+        goto unlock;
+    }
+    entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
+    entry.perm = flag;
+    trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
+
+unlock:
+    qemu_mutex_unlock(&s->mutex);
     return entry;
 }