diff mbox

[v4,RESEND,2/3] IOMMU: change iommu_op->translate's is_write to flags, add support to NO_FAIL flag mode

Message ID 1476719064-9242-3-git-send-email-bd.aviv@gmail.com
State New
Headers show

Commit Message

Aviv B.D. Oct. 17, 2016, 3:44 p.m. UTC
From: "Aviv Ben-David" <bd.aviv@gmail.com>

Supports translation trials without reporting error to guest on
translation failure.

Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
---
 exec.c                |  3 ++-
 hw/i386/amd_iommu.c   |  4 +--
 hw/i386/intel_iommu.c | 70 +++++++++++++++++++++++++++++++++------------------
 include/exec/memory.h |  6 +++--
 memory.c              |  3 ++-
 5 files changed, 55 insertions(+), 31 deletions(-)

Comments

David Gibson Oct. 18, 2016, 3:57 a.m. UTC | #1
On Mon, Oct 17, 2016 at 06:44:23PM +0300, Aviv B.D wrote:
> From: "Aviv Ben-David" <bd.aviv@gmail.com>
> 
> Supports translation trials without reporting error to guest on
> translation failure.
> 
> Signed-off-by: Aviv Ben-David <bd.aviv@gmail.com>
> ---
>  exec.c                |  3 ++-
>  hw/i386/amd_iommu.c   |  4 +--
>  hw/i386/intel_iommu.c | 70 +++++++++++++++++++++++++++++++++------------------
>  include/exec/memory.h |  6 +++--
>  memory.c              |  3 ++-
>  5 files changed, 55 insertions(+), 31 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 374c364..266fa01 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -432,7 +432,8 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
>              break;
>          }
>  
> -        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
> +        iotlb = mr->iommu_ops->translate(mr, addr,
> +                                         is_write ? IOMMU_WO : IOMMU_RO);
>          addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
>                  | (addr & iotlb.addr_mask));
>          *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> index 47b79d9..1f0d76b 100644
> --- a/hw/i386/amd_iommu.c
> +++ b/hw/i386/amd_iommu.c
> @@ -988,7 +988,7 @@ static inline bool amdvi_is_interrupt_addr(hwaddr addr)
>  }
>  
>  static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
> -                                     bool is_write)
> +                                     IOMMUAccessFlags flags)

You've also updated the intel viommu implementation for the new
notifier flags semantics, but none of the others (AMD and sPAPR).
That needs to be done as well.

>  {
>      AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
>      AMDVIState *s = as->iommu_state;
> @@ -1017,7 +1017,7 @@ static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
>          return ret;
>      }
>  
> -    amdvi_do_translate(as, addr, is_write, &ret);
> +    amdvi_do_translate(as, addr, flags & IOMMU_WO, &ret);
>      trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
>              PCI_FUNC(as->devfn), addr, ret.translated_addr);
>      return ret;
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 69730cb..dcf45f0 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -364,7 +364,7 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
>  /* Must not update F field now, should be done later */
>  static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
>                              uint16_t source_id, hwaddr addr,
> -                            VTDFaultReason fault, bool is_write)
> +                            VTDFaultReason fault, IOMMUAccessFlags flags)
>  {
>      uint64_t hi = 0, lo;
>      hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
> @@ -373,7 +373,7 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
>  
>      lo = VTD_FRCD_FI(addr);
>      hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
> -    if (!is_write) {
> +    if (!(flags == IOMMU_WO || flags == IOMMU_RW)) {
>          hi |= VTD_FRCD_T;
>      }
>      vtd_set_quad_raw(s, frcd_reg_addr, lo);
> @@ -404,7 +404,7 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
>  /* Log and report an DMAR (address translation) fault to software */
>  static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
>                                    hwaddr addr, VTDFaultReason fault,
> -                                  bool is_write)
> +                                  IOMMUAccessFlags flags)
>  {
>      uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
>  
> @@ -415,7 +415,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
>          return;
>      }
>      VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
> -                ", is_write %d", source_id, fault, addr, is_write);
> +                ", flags %d", source_id, fault, addr, flags);
>      if (fsts_reg & VTD_FSTS_PFO) {
>          VTD_DPRINTF(FLOG, "new fault is not recorded due to "
>                      "Primary Fault Overflow");
> @@ -433,7 +433,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
>          return;
>      }
>  
> -    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
> +    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, flags);
>  
>      if (fsts_reg & VTD_FSTS_PPF) {
>          VTD_DPRINTF(FLOG, "there are pending faults already, "
> @@ -629,7 +629,8 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
>  /* Given the @gpa, get relevant @slptep. @slpte_level will be the last level
>   * of the translation, can be used for deciding the size of large page.
>   */
> -static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
> +static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa,
> +                            IOMMUAccessFlags flags,
>                              uint64_t *slptep, uint32_t *slpte_level,
>                              bool *reads, bool *writes)
>  {
> @@ -649,7 +650,20 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
>      }
>  
>      /* FIXME: what is the Atomics request here? */
> -    access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
> +    switch (flags) {
> +    case IOMMU_WO:
> +        access_right_check = VTD_SL_W;
> +        break;
> +    case IOMMU_RO:
> +        access_right_check = VTD_SL_R;
> +        break;
> +    case IOMMU_RW: /* passthrow */
> +    case IOMMU_NO_FAIL:
> +        access_right_check = VTD_SL_R | VTD_SL_W;
> +        break;

I'm very confused by the semantics of IOMMU_NO_FAIL.  At first I
thought it was an additional flag that would be ORed with the access
mode to trigger extra behaviour.  But here it seems that you'd use it
_instead_ of a normal access mode.  I don't really see how that makes
sense.

> +    default:
> +        assert(0);
> +    }
>  
>      while (true) {
>          offset = vtd_gpa_level_offset(gpa, level);
> @@ -671,8 +685,9 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
>          if (!(slpte & access_right_check)) {
>              VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
>                          "gpa 0x%"PRIx64 " slpte 0x%"PRIx64,
> -                        (is_write ? "write" : "read"), gpa, slpte);
> -            return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
> +                        (flags == IOMMU_WO ? "write" : "read"), gpa, slpte);
> +            return (flags == IOMMU_RW || flags == IOMMU_WO) ?
> +                   -VTD_FR_WRITE : -VTD_FR_READ;
>          }
>          if (vtd_slpte_nonzero_rsvd(slpte, level)) {
>              VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second "
> @@ -789,11 +804,12 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
>   *
>   * @bus_num: The bus number
>   * @devfn: The devfn, which is the  combined of device and function number
> - * @is_write: The access is a write operation
> + * @flags: The access is a write operation
>   * @entry: IOMMUTLBEntry that contain the addr to be translated and result
>   */
>  static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> -                                   uint8_t devfn, hwaddr addr, bool is_write,
> +                                   uint8_t devfn, hwaddr addr,
> +                                   IOMMUAccessFlags flags,
>                                     IOMMUTLBEntry *entry)
>  {
>      IntelIOMMUState *s = vtd_as->iommu_state;
> @@ -811,7 +827,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>  
>      /* Check if the request is in interrupt address range */
>      if (vtd_is_interrupt_addr(addr)) {
> -        if (is_write) {
> +        if (flags == IOMMU_WO || flags == IOMMU_RW) {
>              /* FIXME: since we don't know the length of the access here, we
>               * treat Non-DWORD length write requests without PASID as
>               * interrupt requests, too. Withoud interrupt remapping support,
> @@ -827,7 +843,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>          } else {
>              VTD_DPRINTF(GENERAL, "error: read request from interrupt address "
>                          "gpa 0x%"PRIx64, addr);
> -            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write);
> +            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, flags);
>              return;
>          }
>      }
> @@ -856,12 +872,14 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>          is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
>          if (ret_fr) {
>              ret_fr = -ret_fr;
> -            if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
> -                VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
> +            if (flags != IOMMU_NO_FAIL) {
> +                if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
> +                    VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
>                              "requests through this context-entry "
>                              "(with FPD Set)");

Um.. the descriptions seem to imply that NO_FAIL prevents reporting
failures to the guest.  But here it seems to be the opposite - you
only transmit the fault if the flags are != IOMMU_NO_FAIL.

> -            } else {
> -                vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
> +                } else {
> +                    vtd_report_dmar_fault(s, source_id, addr, ret_fr, flags);
> +                }
>              }
>              return;
>          }
> @@ -874,15 +892,17 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>          cc_entry->context_cache_gen = s->context_cache_gen;
>      }
>  
> -    ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level,
> +    ret_fr = vtd_gpa_to_slpte(&ce, addr, flags, &slpte, &level,
>                                &reads, &writes);
>      if (ret_fr) {
>          ret_fr = -ret_fr;
> -        if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
> -            VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests "
> -                        "through this context-entry (with FPD Set)");
> -        } else {
> -            vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
> +        if (flags != IOMMU_NO_FAIL) {
> +            if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
> +                VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
> +                        "requests through this context-entry (with FPD Set)");
> +            } else {
> +                vtd_report_dmar_fault(s, source_id, addr, ret_fr, flags);
> +            }
>          }
>          return;
>      }
> @@ -1944,7 +1964,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
>  }
>  
>  static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
> -                                         bool is_write)
> +                                         IOMMUAccessFlags flags)
>  {
>      VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
>      IntelIOMMUState *s = vtd_as->iommu_state;
> @@ -1966,7 +1986,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
>      }
>  
>      vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
> -                           is_write, &ret);
> +                           flags, &ret);
>      VTD_DPRINTF(MMU,
>                  "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
>                  " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 10d7eac..0d4acb9 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -57,6 +57,7 @@ typedef enum {
>      IOMMU_RO   = 1,
>      IOMMU_WO   = 2,
>      IOMMU_RW   = 3,
> +    IOMMU_NO_FAIL  = 4, /* may not be present, don't repport error to guest */
>  } IOMMUAccessFlags;
>  
>  struct IOMMUTLBEntry {
> @@ -168,10 +169,11 @@ struct MemoryRegionOps {
>  };
>  
>  typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
> -
>  struct MemoryRegionIOMMUOps {
>      /* Return a TLB entry that contains a given address. */
> -    IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
> +    IOMMUTLBEntry (*translate)(MemoryRegion *iommu,
> +                               hwaddr addr,
> +                               IOMMUAccessFlags flags);
>      /* Returns minimum supported page size */
>      uint64_t (*get_min_page_size)(MemoryRegion *iommu);
>      /* Called when IOMMU Notifier flag changed */
> diff --git a/memory.c b/memory.c
> index 58f9269..dfbb9a0 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1563,7 +1563,8 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
>      granularity = memory_region_iommu_get_min_page_size(mr);
>  
>      for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
> -        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
> +        iotlb = mr->iommu_ops->translate(mr, addr,
> +                                         is_write ? IOMMU_WO : IOMMU_RO);

It probably makes more sense to pust the IOMMUAccessFlags parameter
into memory_region_iommu_replay().

>          if (iotlb.perm != IOMMU_NONE) {
>              n->notify(n, &iotlb);
>          }
Peter Xu Oct. 19, 2016, 8:35 a.m. UTC | #2
On Mon, Oct 17, 2016 at 06:44:23PM +0300, Aviv B.D wrote:

[...]

> @@ -364,7 +364,7 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
>  /* Must not update F field now, should be done later */
>  static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
>                              uint16_t source_id, hwaddr addr,
> -                            VTDFaultReason fault, bool is_write)
> +                            VTDFaultReason fault, IOMMUAccessFlags flags)

I think we don't need to change this is_write into flags. We can just
make sure we translate the flags into is_write when calling
vtd_record_frcd. After all, NO_FAIL makes no sense in this function.

>  {
>      uint64_t hi = 0, lo;
>      hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
> @@ -373,7 +373,7 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
>  
>      lo = VTD_FRCD_FI(addr);
>      hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
> -    if (!is_write) {
> +    if (!(flags == IOMMU_WO || flags == IOMMU_RW)) {
>          hi |= VTD_FRCD_T;
>      }
>      vtd_set_quad_raw(s, frcd_reg_addr, lo);
> @@ -404,7 +404,7 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
>  /* Log and report an DMAR (address translation) fault to software */
>  static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
>                                    hwaddr addr, VTDFaultReason fault,
> -                                  bool is_write)
> +                                  IOMMUAccessFlags flags)

Here as well. IMHO we can just keep the old is_write, and tune the
callers.

>  {
>      uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
>  
> @@ -415,7 +415,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
>          return;
>      }
>      VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
> -                ", is_write %d", source_id, fault, addr, is_write);
> +                ", flags %d", source_id, fault, addr, flags);
>      if (fsts_reg & VTD_FSTS_PFO) {
>          VTD_DPRINTF(FLOG, "new fault is not recorded due to "
>                      "Primary Fault Overflow");
> @@ -433,7 +433,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
>          return;
>      }
>  
> -    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
> +    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, flags);

... so if you agree on my previous comments, here we can use:

       vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault,
                       flags & IOMMU_WO);

and keep the vtd_record_frcd() untouched.

[...]

> @@ -789,11 +804,12 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
>   *
>   * @bus_num: The bus number
>   * @devfn: The devfn, which is the  combined of device and function number
> - * @is_write: The access is a write operation
> + * @flags: The access is a write operation

Need to fix comment to suite the new flag.

>   * @entry: IOMMUTLBEntry that contain the addr to be translated and result
>   */
>  static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> -                                   uint8_t devfn, hwaddr addr, bool is_write,
> +                                   uint8_t devfn, hwaddr addr,
> +                                   IOMMUAccessFlags flags,
>                                     IOMMUTLBEntry *entry)
>  {
>      IntelIOMMUState *s = vtd_as->iommu_state;
> @@ -811,7 +827,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>  
>      /* Check if the request is in interrupt address range */
>      if (vtd_is_interrupt_addr(addr)) {
> -        if (is_write) {
> +        if (flags == IOMMU_WO || flags == IOMMU_RW) {

I suggest we directly use (flags & IOMMU_WO) in all similar places.

>              /* FIXME: since we don't know the length of the access here, we
>               * treat Non-DWORD length write requests without PASID as
>               * interrupt requests, too. Withoud interrupt remapping support,
> @@ -827,7 +843,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>          } else {
>              VTD_DPRINTF(GENERAL, "error: read request from interrupt address "
>                          "gpa 0x%"PRIx64, addr);
> -            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write);
> +            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, flags);

Again, we can avoid touching vtd_report_dmar_fault() interface, and
use the old is_write. Looks like NO_FAIL makes no sense for it as well.

Thanks,

-- peterx
Aviv B.D. Oct. 20, 2016, 6:54 p.m. UTC | #3
Hi,
You are right, and I will revert those functions you mentioned.

Thanks,
Aviv.



On Wed, Oct 19, 2016 at 11:35 AM, Peter Xu <peterx@redhat.com> wrote:

> On Mon, Oct 17, 2016 at 06:44:23PM +0300, Aviv B.D wrote:
>
> [...]
>
> > @@ -364,7 +364,7 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState
> *s, uint16_t index)
> >  /* Must not update F field now, should be done later */
> >  static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
> >                              uint16_t source_id, hwaddr addr,
> > -                            VTDFaultReason fault, bool is_write)
> > +                            VTDFaultReason fault, IOMMUAccessFlags
> flags)
>
> I think we don't need to change this is_write into flags. We can just
> make sure we translate the flags into is_write when calling
> vtd_record_frcd. After all, NO_FAIL makes no sense in this function.
>
> >  {
> >      uint64_t hi = 0, lo;
> >      hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) <<
> 4);
> > @@ -373,7 +373,7 @@ static void vtd_record_frcd(IntelIOMMUState *s,
> uint16_t index,
> >
> >      lo = VTD_FRCD_FI(addr);
> >      hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
> > -    if (!is_write) {
> > +    if (!(flags == IOMMU_WO || flags == IOMMU_RW)) {
> >          hi |= VTD_FRCD_T;
> >      }
> >      vtd_set_quad_raw(s, frcd_reg_addr, lo);
> > @@ -404,7 +404,7 @@ static bool vtd_try_collapse_fault(IntelIOMMUState
> *s, uint16_t source_id)
> >  /* Log and report an DMAR (address translation) fault to software */
> >  static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t
> source_id,
> >                                    hwaddr addr, VTDFaultReason fault,
> > -                                  bool is_write)
> > +                                  IOMMUAccessFlags flags)
>
> Here as well. IMHO we can just keep the old is_write, and tune the
> callers.
>
> >  {
> >      uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
> >
> > @@ -415,7 +415,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState
> *s, uint16_t source_id,
> >          return;
> >      }
> >      VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
> > -                ", is_write %d", source_id, fault, addr, is_write);
> > +                ", flags %d", source_id, fault, addr, flags);
> >      if (fsts_reg & VTD_FSTS_PFO) {
> >          VTD_DPRINTF(FLOG, "new fault is not recorded due to "
> >                      "Primary Fault Overflow");
> > @@ -433,7 +433,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState
> *s, uint16_t source_id,
> >          return;
> >      }
> >
> > -    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault,
> is_write);
> > +    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, flags);
>
> ... so if you agree on my previous comments, here we can use:
>
>        vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault,
>                        flags & IOMMU_WO);
>
> and keep the vtd_record_frcd() untouched.
>
> [...]
>
> > @@ -789,11 +804,12 @@ static inline bool vtd_is_interrupt_addr(hwaddr
> addr)
> >   *
> >   * @bus_num: The bus number
> >   * @devfn: The devfn, which is the  combined of device and function
> number
> > - * @is_write: The access is a write operation
> > + * @flags: The access is a write operation
>
> Need to fix comment to suite the new flag.
>
> >   * @entry: IOMMUTLBEntry that contain the addr to be translated and
> result
> >   */
> >  static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus
> *bus,
> > -                                   uint8_t devfn, hwaddr addr, bool
> is_write,
> > +                                   uint8_t devfn, hwaddr addr,
> > +                                   IOMMUAccessFlags flags,
> >                                     IOMMUTLBEntry *entry)
> >  {
> >      IntelIOMMUState *s = vtd_as->iommu_state;
> > @@ -811,7 +827,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace
> *vtd_as, PCIBus *bus,
> >
> >      /* Check if the request is in interrupt address range */
> >      if (vtd_is_interrupt_addr(addr)) {
> > -        if (is_write) {
> > +        if (flags == IOMMU_WO || flags == IOMMU_RW) {
>
> I suggest we directly use (flags & IOMMU_WO) in all similar places.
>
> >              /* FIXME: since we don't know the length of the access
> here, we
> >               * treat Non-DWORD length write requests without PASID as
> >               * interrupt requests, too. Withoud interrupt remapping
> support,
> > @@ -827,7 +843,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace
> *vtd_as, PCIBus *bus,
> >          } else {
> >              VTD_DPRINTF(GENERAL, "error: read request from interrupt
> address "
> >                          "gpa 0x%"PRIx64, addr);
> > -            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ,
> is_write);
> > +            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ,
> flags);
>
> Again, we can avoid touching vtd_report_dmar_fault() interface, and
> use the old is_write. Looks like NO_FAIL makes no sense for it as well.
>
> Thanks,
>
> -- peterx
>
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 374c364..266fa01 100644
--- a/exec.c
+++ b/exec.c
@@ -432,7 +432,8 @@  MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
             break;
         }
 
-        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+        iotlb = mr->iommu_ops->translate(mr, addr,
+                                         is_write ? IOMMU_WO : IOMMU_RO);
         addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                 | (addr & iotlb.addr_mask));
         *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 47b79d9..1f0d76b 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -988,7 +988,7 @@  static inline bool amdvi_is_interrupt_addr(hwaddr addr)
 }
 
 static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
-                                     bool is_write)
+                                     IOMMUAccessFlags flags)
 {
     AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
     AMDVIState *s = as->iommu_state;
@@ -1017,7 +1017,7 @@  static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
         return ret;
     }
 
-    amdvi_do_translate(as, addr, is_write, &ret);
+    amdvi_do_translate(as, addr, flags & IOMMU_WO, &ret);
     trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
             PCI_FUNC(as->devfn), addr, ret.translated_addr);
     return ret;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 69730cb..dcf45f0 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -364,7 +364,7 @@  static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
 /* Must not update F field now, should be done later */
 static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
                             uint16_t source_id, hwaddr addr,
-                            VTDFaultReason fault, bool is_write)
+                            VTDFaultReason fault, IOMMUAccessFlags flags)
 {
     uint64_t hi = 0, lo;
     hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
@@ -373,7 +373,7 @@  static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
 
     lo = VTD_FRCD_FI(addr);
     hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
-    if (!is_write) {
+    if (!(flags == IOMMU_WO || flags == IOMMU_RW)) {
         hi |= VTD_FRCD_T;
     }
     vtd_set_quad_raw(s, frcd_reg_addr, lo);
@@ -404,7 +404,7 @@  static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
 /* Log and report an DMAR (address translation) fault to software */
 static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
                                   hwaddr addr, VTDFaultReason fault,
-                                  bool is_write)
+                                  IOMMUAccessFlags flags)
 {
     uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
 
@@ -415,7 +415,7 @@  static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
         return;
     }
     VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
-                ", is_write %d", source_id, fault, addr, is_write);
+                ", flags %d", source_id, fault, addr, flags);
     if (fsts_reg & VTD_FSTS_PFO) {
         VTD_DPRINTF(FLOG, "new fault is not recorded due to "
                     "Primary Fault Overflow");
@@ -433,7 +433,7 @@  static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
         return;
     }
 
-    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
+    vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, flags);
 
     if (fsts_reg & VTD_FSTS_PPF) {
         VTD_DPRINTF(FLOG, "there are pending faults already, "
@@ -629,7 +629,8 @@  static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
 /* Given the @gpa, get relevant @slptep. @slpte_level will be the last level
  * of the translation, can be used for deciding the size of large page.
  */
-static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
+static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa,
+                            IOMMUAccessFlags flags,
                             uint64_t *slptep, uint32_t *slpte_level,
                             bool *reads, bool *writes)
 {
@@ -649,7 +650,20 @@  static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
     }
 
     /* FIXME: what is the Atomics request here? */
-    access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
+    switch (flags) {
+    case IOMMU_WO:
+        access_right_check = VTD_SL_W;
+        break;
+    case IOMMU_RO:
+        access_right_check = VTD_SL_R;
+        break;
+    case IOMMU_RW: /* passthrow */
+    case IOMMU_NO_FAIL:
+        access_right_check = VTD_SL_R | VTD_SL_W;
+        break;
+    default:
+        assert(0);
+    }
 
     while (true) {
         offset = vtd_gpa_level_offset(gpa, level);
@@ -671,8 +685,9 @@  static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write,
         if (!(slpte & access_right_check)) {
             VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
                         "gpa 0x%"PRIx64 " slpte 0x%"PRIx64,
-                        (is_write ? "write" : "read"), gpa, slpte);
-            return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
+                        (flags == IOMMU_WO ? "write" : "read"), gpa, slpte);
+            return (flags == IOMMU_RW || flags == IOMMU_WO) ?
+                   -VTD_FR_WRITE : -VTD_FR_READ;
         }
         if (vtd_slpte_nonzero_rsvd(slpte, level)) {
             VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second "
@@ -789,11 +804,12 @@  static inline bool vtd_is_interrupt_addr(hwaddr addr)
  *
  * @bus_num: The bus number
  * @devfn: The devfn, which is the  combined of device and function number
- * @is_write: The access is a write operation
+ * @flags: The access is a write operation
  * @entry: IOMMUTLBEntry that contain the addr to be translated and result
  */
 static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
-                                   uint8_t devfn, hwaddr addr, bool is_write,
+                                   uint8_t devfn, hwaddr addr,
+                                   IOMMUAccessFlags flags,
                                    IOMMUTLBEntry *entry)
 {
     IntelIOMMUState *s = vtd_as->iommu_state;
@@ -811,7 +827,7 @@  static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
 
     /* Check if the request is in interrupt address range */
     if (vtd_is_interrupt_addr(addr)) {
-        if (is_write) {
+        if (flags == IOMMU_WO || flags == IOMMU_RW) {
             /* FIXME: since we don't know the length of the access here, we
              * treat Non-DWORD length write requests without PASID as
              * interrupt requests, too. Withoud interrupt remapping support,
@@ -827,7 +843,7 @@  static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         } else {
             VTD_DPRINTF(GENERAL, "error: read request from interrupt address "
                         "gpa 0x%"PRIx64, addr);
-            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write);
+            vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, flags);
             return;
         }
     }
@@ -856,12 +872,14 @@  static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
         if (ret_fr) {
             ret_fr = -ret_fr;
-            if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
-                VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
+            if (flags != IOMMU_NO_FAIL) {
+                if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
+                    VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
                             "requests through this context-entry "
                             "(with FPD Set)");
-            } else {
-                vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
+                } else {
+                    vtd_report_dmar_fault(s, source_id, addr, ret_fr, flags);
+                }
             }
             return;
         }
@@ -874,15 +892,17 @@  static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         cc_entry->context_cache_gen = s->context_cache_gen;
     }
 
-    ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level,
+    ret_fr = vtd_gpa_to_slpte(&ce, addr, flags, &slpte, &level,
                               &reads, &writes);
     if (ret_fr) {
         ret_fr = -ret_fr;
-        if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
-            VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests "
-                        "through this context-entry (with FPD Set)");
-        } else {
-            vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
+        if (flags != IOMMU_NO_FAIL) {
+            if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
+                VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
+                        "requests through this context-entry (with FPD Set)");
+            } else {
+                vtd_report_dmar_fault(s, source_id, addr, ret_fr, flags);
+            }
         }
         return;
     }
@@ -1944,7 +1964,7 @@  static void vtd_mem_write(void *opaque, hwaddr addr,
 }
 
 static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
-                                         bool is_write)
+                                         IOMMUAccessFlags flags)
 {
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
     IntelIOMMUState *s = vtd_as->iommu_state;
@@ -1966,7 +1986,7 @@  static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
     }
 
     vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
-                           is_write, &ret);
+                           flags, &ret);
     VTD_DPRINTF(MMU,
                 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
                 " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 10d7eac..0d4acb9 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -57,6 +57,7 @@  typedef enum {
     IOMMU_RO   = 1,
     IOMMU_WO   = 2,
     IOMMU_RW   = 3,
+    IOMMU_NO_FAIL  = 4, /* may not be present, don't repport error to guest */
 } IOMMUAccessFlags;
 
 struct IOMMUTLBEntry {
@@ -168,10 +169,11 @@  struct MemoryRegionOps {
 };
 
 typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
-
 struct MemoryRegionIOMMUOps {
     /* Return a TLB entry that contains a given address. */
-    IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
+    IOMMUTLBEntry (*translate)(MemoryRegion *iommu,
+                               hwaddr addr,
+                               IOMMUAccessFlags flags);
     /* Returns minimum supported page size */
     uint64_t (*get_min_page_size)(MemoryRegion *iommu);
     /* Called when IOMMU Notifier flag changed */
diff --git a/memory.c b/memory.c
index 58f9269..dfbb9a0 100644
--- a/memory.c
+++ b/memory.c
@@ -1563,7 +1563,8 @@  void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
     granularity = memory_region_iommu_get_min_page_size(mr);
 
     for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
-        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+        iotlb = mr->iommu_ops->translate(mr, addr,
+                                         is_write ? IOMMU_WO : IOMMU_RO);
         if (iotlb.perm != IOMMU_NONE) {
             n->notify(n, &iotlb);
         }