Message ID | cover.1716578450.git.tjeznach@rivosinc.com |
---|---|
Headers | show |
Series | Linux RISC-V IOMMU Support | expand |
Hi Tomasz, I reviewed iommu-bits.h to the spec. Most naming matches exactly, which is nice, but I've pointed out a few which don't. Thanks, drew On Fri, May 24, 2024 at 12:34:42PM GMT, Tomasz Jeznach wrote: ... > diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h > new file mode 100644 > index 000000000000..48f795a0cd10 > --- /dev/null > +++ b/drivers/iommu/riscv/iommu-bits.h > @@ -0,0 +1,707 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Copyright © 2022-2024 Rivos Inc. > + * Copyright © 2023 FORTH-ICS/CARV > + * Copyright © 2023 RISC-V IOMMU Task Group > + * > + * RISC-V IOMMU - Register Layout and Data Structures. > + * > + * Based on the 'RISC-V IOMMU Architecture Specification', Version 1.0 > + * Published at https://github.com/riscv-non-isa/riscv-iommu > + * > + */ > + > +#ifndef _RISCV_IOMMU_BITS_H_ > +#define _RISCV_IOMMU_BITS_H_ > + > +#include <linux/types.h> > +#include <linux/bitfield.h> > +#include <linux/bits.h> > + > +/* > + * Chapter 5: Memory Mapped register interface > + */ > + > +/* Common field positions */ > +#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10) > +#define RISCV_IOMMU_QUEUE_LOGSZ_FIELD GENMASK_ULL(4, 0) RISCV_IOMMU_QUEUE_LOG2SZ_FIELD > +#define RISCV_IOMMU_QUEUE_INDEX_FIELD GENMASK_ULL(31, 0) > +#define RISCV_IOMMU_QUEUE_ENABLE BIT(0) > +#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1) > +#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8) > +#define RISCV_IOMMU_QUEUE_OVERFLOW BIT(9) > +#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16) > +#define RISCV_IOMMU_QUEUE_BUSY BIT(17) > + > +#define RISCV_IOMMU_ATP_PPN_FIELD GENMASK_ULL(43, 0) > +#define RISCV_IOMMU_ATP_MODE_FIELD GENMASK_ULL(63, 60) > + > +/* 5.3 IOMMU Capabilities (64bits) */ > +#define RISCV_IOMMU_REG_CAP 0x0000 Maybe too verbose, but the name is "capabilities". So all the below 'CAP' instances would match the spec better if spelled out. > +#define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0) > +#define RISCV_IOMMU_CAP_S_SV32 BIT_ULL(8) > +#define RISCV_IOMMU_CAP_S_SV39 BIT_ULL(9) > +#define RISCV_IOMMU_CAP_S_SV48 BIT_ULL(10) > +#define RISCV_IOMMU_CAP_S_SV57 BIT_ULL(11) > +#define RISCV_IOMMU_CAP_SVPBMT BIT_ULL(15) > +#define RISCV_IOMMU_CAP_G_SV32 BIT_ULL(16) > +#define RISCV_IOMMU_CAP_G_SV39 BIT_ULL(17) > +#define RISCV_IOMMU_CAP_G_SV48 BIT_ULL(18) > +#define RISCV_IOMMU_CAP_G_SV57 BIT_ULL(19) RISCV_IOMMU_CAPABILITIES_SV32X4 RISCV_IOMMU_CAPABILITIES_SV39X4 RISCV_IOMMU_CAPABILITIES_SV48X4 RISCV_IOMMU_CAPABILITIES_SV57X4 > +#define RISCV_IOMMU_CAP_AMO_MRIF BIT_ULL(21) > +#define RISCV_IOMMU_CAP_MSI_FLAT BIT_ULL(22) > +#define RISCV_IOMMU_CAP_MSI_MRIF BIT_ULL(23) > +#define RISCV_IOMMU_CAP_AMO_HWAD BIT_ULL(24) > +#define RISCV_IOMMU_CAP_ATS BIT_ULL(25) > +#define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26) > +#define RISCV_IOMMU_CAP_END BIT_ULL(27) > +#define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28) > +#define RISCV_IOMMU_CAP_HPM BIT_ULL(30) > +#define RISCV_IOMMU_CAP_DBG BIT_ULL(31) > +#define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32) > +#define RISCV_IOMMU_CAP_PD8 BIT_ULL(38) > +#define RISCV_IOMMU_CAP_PD17 BIT_ULL(39) > +#define RISCV_IOMMU_CAP_PD20 BIT_ULL(40) > + > +#define RISCV_IOMMU_CAP_VERSION_VER_MASK 0xF0 RISCV_IOMMU_CAP_VERSION_MAJOR_MASK > +#define RISCV_IOMMU_CAP_VERSION_REV_MASK 0x0F RISCV_IOMMU_CAP_VERSION_MINOR_MASK > + > +/** > + * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings > + * @RISCV_IOMMU_CAP_IGS_MSI: I/O MMU supports only MSI generation > + * @RISCV_IOMMU_CAP_IGS_WSI: I/O MMU supports only Wired-Signaled interrupt > + * @RISCV_IOMMU_CAP_IGS_BOTH: I/O MMU supports both MSI and WSI generation Can we always spell IOMMU 'IOMMU'? > + * @RISCV_IOMMU_CAP_IGS_RSRV: Reserved for standard use > + */ > +enum riscv_iommu_igs_settings { > + RISCV_IOMMU_CAP_IGS_MSI = 0, > + RISCV_IOMMU_CAP_IGS_WSI = 1, > + RISCV_IOMMU_CAP_IGS_BOTH = 2, > + RISCV_IOMMU_CAP_IGS_RSRV = 3 > +}; > + > +/* 5.4 Features control register (32bits) */ > +#define RISCV_IOMMU_REG_FCTL 0x0008 > +#define RISCV_IOMMU_FCTL_BE BIT(0) > +#define RISCV_IOMMU_FCTL_WSI BIT(1) > +#define RISCV_IOMMU_FCTL_GXL BIT(2) > + > +/* 5.5 Device-directory-table pointer (64bits) */ > +#define RISCV_IOMMU_REG_DDTP 0x0010 > +#define RISCV_IOMMU_DDTP_MODE GENMASK_ULL(3, 0) A bit unnecessary, but spec calls this 'iommu_mode', so RISCV_IOMMU_DDTP_IOMMU_MODE ? > +#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4) > +#define RISCV_IOMMU_DDTP_PPN RISCV_IOMMU_PPN_FIELD > + > +/** > + * enum riscv_iommu_ddtp_modes - I/O MMU translation modes > + * @RISCV_IOMMU_DDTP_MODE_OFF: No inbound transactions allowed > + * @RISCV_IOMMU_DDTP_MODE_BARE: Pass-through mode > + * @RISCV_IOMMU_DDTP_MODE_1LVL: One-level DDT > + * @RISCV_IOMMU_DDTP_MODE_2LVL: Two-level DDT > + * @RISCV_IOMMU_DDTP_MODE_3LVL: Three-level DDT > + * @RISCV_IOMMU_DDTP_MODE_MAX: Max value allowed by specification > + */ > +enum riscv_iommu_ddtp_modes { > + RISCV_IOMMU_DDTP_MODE_OFF = 0, > + RISCV_IOMMU_DDTP_MODE_BARE = 1, > + RISCV_IOMMU_DDTP_MODE_1LVL = 2, > + RISCV_IOMMU_DDTP_MODE_2LVL = 3, > + RISCV_IOMMU_DDTP_MODE_3LVL = 4, > + RISCV_IOMMU_DDTP_MODE_MAX = 4 > +}; > + > +/* 5.6 Command Queue Base (64bits) */ > +#define RISCV_IOMMU_REG_CQB 0x0018 > +#define RISCV_IOMMU_CQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > +#define RISCV_IOMMU_CQB_PPN RISCV_IOMMU_PPN_FIELD > + > +/* 5.7 Command Queue head (32bits) */ > +#define RISCV_IOMMU_REG_CQH 0x0020 > +#define RISCV_IOMMU_CQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > + > +/* 5.8 Command Queue tail (32bits) */ > +#define RISCV_IOMMU_REG_CQT 0x0024 > +#define RISCV_IOMMU_CQT_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > + > +/* 5.9 Fault Queue Base (64bits) */ > +#define RISCV_IOMMU_REG_FQB 0x0028 > +#define RISCV_IOMMU_FQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > +#define RISCV_IOMMU_FQB_PN RISCV_IOMMU_PPN_FIELD RISCV_IOMMU_FQB_PPN > + > +/* 5.10 Fault Queue Head (32bits) */ > +#define RISCV_IOMMU_REG_FQH 0x0030 > +#define RISCV_IOMMU_FQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > + > +/* 5.11 Fault Queue tail (32bits) */ > +#define RISCV_IOMMU_REG_FQT 0x0034 > +#define RISCV_IOMMU_FQT_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > + > +/* 5.12 Page Request Queue base (64bits) */ > +#define RISCV_IOMMU_REG_PQB 0x0038 > +#define RISCV_IOMMU_PQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > +#define RISCV_IOMMU_PQB_PPN RISCV_IOMMU_PPN_FIELD > + > +/* 5.13 Page Request Queue head (32bits) */ > +#define RISCV_IOMMU_REG_PQH 0x0040 > +#define RISCV_IOMMU_PQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > + > +/* 5.14 Page Request Queue tail (32bits) */ > +#define RISCV_IOMMU_REG_PQT 0x0044 > +#define RISCV_IOMMU_PQT_INDEX_MASK RISCV_IOMMU_QUEUE_INDEX_FIELD > + > +/* 5.15 Command Queue CSR (32bits) */ > +#define RISCV_IOMMU_REG_CQCSR 0x0048 > +#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE > +#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE > +#define RISCV_IOMMU_CQCSR_CQMF RISCV_IOMMU_QUEUE_MEM_FAULT > +#define RISCV_IOMMU_CQCSR_CMD_TO BIT(9) > +#define RISCV_IOMMU_CQCSR_CMD_ILL BIT(10) > +#define RISCV_IOMMU_CQCSR_FENCE_W_IP BIT(11) > +#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE > +#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > + > +/* 5.16 Fault Queue CSR (32bits) */ > +#define RISCV_IOMMU_REG_FQCSR 0x004C > +#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE > +#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE > +#define RISCV_IOMMU_FQCSR_FQMF RISCV_IOMMU_QUEUE_MEM_FAULT > +#define RISCV_IOMMU_FQCSR_FQOF RISCV_IOMMU_QUEUE_OVERFLOW > +#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE > +#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > + > +/* 5.17 Page Request Queue CSR (32bits) */ > +#define RISCV_IOMMU_REG_PQCSR 0x0050 > +#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE > +#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE > +#define RISCV_IOMMU_PQCSR_PQMF RISCV_IOMMU_QUEUE_MEM_FAULT > +#define RISCV_IOMMU_PQCSR_PQOF RISCV_IOMMU_QUEUE_OVERFLOW > +#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE > +#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > + > +/* 5.18 Interrupt Pending Status (32bits) */ > +#define RISCV_IOMMU_REG_IPSR 0x0054 > + > +#define RISCV_IOMMU_INTR_CQ 0 > +#define RISCV_IOMMU_INTR_FQ 1 > +#define RISCV_IOMMU_INTR_PM 2 > +#define RISCV_IOMMU_INTR_PQ 3 > +#define RISCV_IOMMU_INTR_COUNT 4 > + > +#define RISCV_IOMMU_IPSR_CIP BIT(RISCV_IOMMU_INTR_CQ) > +#define RISCV_IOMMU_IPSR_FIP BIT(RISCV_IOMMU_INTR_FQ) > +#define RISCV_IOMMU_IPSR_PMIP BIT(RISCV_IOMMU_INTR_PM) > +#define RISCV_IOMMU_IPSR_PIP BIT(RISCV_IOMMU_INTR_PQ) > + > +/* 5.19 Performance monitoring counter overflow status (32bits) */ > +#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058 > +#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0) > +#define RISCV_IOMMU_IOCOUNTOVF_HPM GENMASK_ULL(31, 1) RISCV_IOMMU_REG_IOCNTOVF* for all above. It looks like this got renamed and the header of the spec subsection still has the old name, but otherwise the new name is referenced. > + > +/* 5.20 Performance monitoring counter inhibits (32bits) */ > +#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C > +#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0) > +#define RISCV_IOMMU_IOCOUNTINH_HPM GENMASK(31, 1) RISCV_IOMMU_IOCNTINH* for all above. Same header with old name spec issue. > + > +/* 5.21 Performance monitoring cycles counter (64bits) */ > +#define RISCV_IOMMU_REG_IOHPMCYCLES 0x0060 > +#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0) > +#define RISCV_IOMMU_IOHPMCYCLES_OVF BIT_ULL(63) RISCV_IOMMU_IOHPMCYCLES_OF > + > +/* 5.22 Performance monitoring event counters (31 * 64bits) */ > +#define RISCV_IOMMU_REG_IOHPMCTR_BASE 0x0068 > +#define RISCV_IOMMU_REG_IOHPMCTR(_n) (RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8)) > + > +/* 5.23 Performance monitoring event selectors (31 * 64bits) */ > +#define RISCV_IOMMU_REG_IOHPMEVT_BASE 0x0160 > +#define RISCV_IOMMU_REG_IOHPMEVT(_n) (RISCV_IOMMU_REG_IOHPMEVT_BASE + ((_n) * 0x8)) > +#define RISCV_IOMMU_IOHPMEVT_CNT 31 What is RISCV_IOMMU_IOHPMEVT_CNT ? > +#define RISCV_IOMMU_IOHPMEVT_EVENT_ID GENMASK_ULL(14, 0) RISCV_IOMMU_IOHPMEVT_EVENTID (EVENT_ID reads nicer though...) > +#define RISCV_IOMMU_IOHPMEVT_DMASK BIT_ULL(15) > +#define RISCV_IOMMU_IOHPMEVT_PID_PSCID GENMASK_ULL(35, 16) > +#define RISCV_IOMMU_IOHPMEVT_DID_GSCID GENMASK_ULL(59, 36) > +#define RISCV_IOMMU_IOHPMEVT_PV_PSCV BIT_ULL(60) > +#define RISCV_IOMMU_IOHPMEVT_DV_GSCV BIT_ULL(61) > +#define RISCV_IOMMU_IOHPMEVT_IDT BIT_ULL(62) > +#define RISCV_IOMMU_IOHPMEVT_OF BIT_ULL(63) > + > +/** > + * enum riscv_iommu_hpmevent_id - Performance-monitoring event identifier > + * > + * @RISCV_IOMMU_HPMEVENT_INVALID: Invalid event, do not count > + * @RISCV_IOMMU_HPMEVENT_URQ: Untranslated requests > + * @RISCV_IOMMU_HPMEVENT_TRQ: Translated requests > + * @RISCV_IOMMU_HPMEVENT_ATS_RQ: ATS translation requests > + * @RISCV_IOMMU_HPMEVENT_TLB_MISS: TLB misses > + * @RISCV_IOMMU_HPMEVENT_DD_WALK: Device directory walks > + * @RISCV_IOMMU_HPMEVENT_PD_WALK: Process directory walks > + * @RISCV_IOMMU_HPMEVENT_S_VS_WALKS: S/VS-Stage page table walks > + * @RISCV_IOMMU_HPMEVENT_G_WALKS: G-Stage page table walks Let's copy+paste these descriptions from the spec to allow them to be copy+pasted from here into a search in the spec. (Most already mostly match, except for the G-stage vs. second-stage type stuff.) > + * @RISCV_IOMMU_HPMEVENT_MAX: Value to denote maximum Event IDs > + */ > +enum riscv_iommu_hpmevent_id { > + RISCV_IOMMU_HPMEVENT_INVALID = 0, > + RISCV_IOMMU_HPMEVENT_URQ = 1, > + RISCV_IOMMU_HPMEVENT_TRQ = 2, > + RISCV_IOMMU_HPMEVENT_ATS_RQ = 3, > + RISCV_IOMMU_HPMEVENT_TLB_MISS = 4, > + RISCV_IOMMU_HPMEVENT_DD_WALK = 5, > + RISCV_IOMMU_HPMEVENT_PD_WALK = 6, > + RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7, > + RISCV_IOMMU_HPMEVENT_G_WALKS = 8, > + RISCV_IOMMU_HPMEVENT_MAX = 9 > +}; > + > +/* 5.24 Translation request IOVA (64bits) */ > +#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258 > +#define RISCV_IOMMU_TR_REQ_IOVA_VPN GENMASK_ULL(63, 12) > + > +/* 5.25 Translation request control (64bits) */ > +#define RISCV_IOMMU_REG_TR_REQ_CTL 0x0260 > +#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY BIT_ULL(0) > +#define RISCV_IOMMU_TR_REQ_CTL_PRIV BIT_ULL(1) > +#define RISCV_IOMMU_TR_REQ_CTL_EXE BIT_ULL(2) > +#define RISCV_IOMMU_TR_REQ_CTL_NW BIT_ULL(3) > +#define RISCV_IOMMU_TR_REQ_CTL_PID GENMASK_ULL(31, 12) > +#define RISCV_IOMMU_TR_REQ_CTL_PV BIT_ULL(32) > +#define RISCV_IOMMU_TR_REQ_CTL_DID GENMASK_ULL(63, 40) > + > +/* 5.26 Translation request response (64bits) */ > +#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268 > +#define RISCV_IOMMU_TR_RESPONSE_FAULT BIT_ULL(0) > +#define RISCV_IOMMU_TR_RESPONSE_PBMT GENMASK_ULL(8, 7) > +#define RISCV_IOMMU_TR_RESPONSE_SZ BIT_ULL(9) RISCV_IOMMU_TR_RESPONSE_S (although SZ is easier to understand...) > +#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD > + > +/* 5.27 Interrupt cause to vector (64bits) */ > +#define RISCV_IOMMU_REG_ICVEC 0x02F8 > +#define RISCV_IOMMU_ICVEC_CIV GENMASK_ULL(3, 0) > +#define RISCV_IOMMU_ICVEC_FIV GENMASK_ULL(7, 4) > +#define RISCV_IOMMU_ICVEC_PMIV GENMASK_ULL(11, 8) > +#define RISCV_IOMMU_ICVEC_PIV GENMASK_ULL(15, 12) > + > +/* 5.28 MSI Configuration table (32 * 64bits) */ > +#define RISCV_IOMMU_REG_MSI_CONFIG 0x0300 > +#define RISCV_IOMMU_REG_MSI_ADDR(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10)) > +#define RISCV_IOMMU_MSI_ADDR GENMASK_ULL(55, 2) > +#define RISCV_IOMMU_REG_MSI_DATA(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10) + 0x08) > +#define RISCV_IOMMU_MSI_DATA GENMASK_ULL(31, 0) > +#define RISCV_IOMMU_REG_MSI_VEC_CTL(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10) + 0x0C) > +#define RISCV_IOMMU_MSI_VEC_CTL_M BIT_ULL(0) RISCV_IOMMU_REG_MSI_CFG_TBL for all above > + > +#define RISCV_IOMMU_REG_SIZE 0x1000 > + > +/* > + * Chapter 2: Data structures > + */ > + > +/* > + * Device Directory Table macros for non-leaf nodes > + */ > +#define RISCV_IOMMU_DDTE_VALID BIT_ULL(0) RISCV_IOMMU_DDTE_V > +#define RISCV_IOMMU_DDTE_PPN RISCV_IOMMU_PPN_FIELD > + > +/** > + * struct riscv_iommu_dc - Device Context > + * @tc: Translation Control > + * @iohgatp: I/O Hypervisor guest address translation and protection > + * (Second stage context) > + * @ta: Translation Attributes > + * @fsc: First stage context > + * @msiptp: MSI page table pointer > + * @msi_addr_mask: MSI address mask > + * @msi_addr_pattern: MSI address pattern > + * @_reserved: Reserved for future use, padding > + * > + * This structure is used for leaf nodes on the Device Directory Table, > + * in case RISCV_IOMMU_CAP_MSI_FLAT is not set, the bottom 4 fields are > + * not present and are skipped with pointer arithmetic to avoid > + * casting, check out riscv_iommu_get_dc(). > + * See section 2.1 for more details > + */ > +struct riscv_iommu_dc { > + u64 tc; > + u64 iohgatp; > + u64 ta; > + u64 fsc; > + u64 msiptp; > + u64 msi_addr_mask; > + u64 msi_addr_pattern; > + u64 _reserved; > +}; > + > +/* Translation control fields */ > +#define RISCV_IOMMU_DC_TC_V BIT_ULL(0) > +#define RISCV_IOMMU_DC_TC_EN_ATS BIT_ULL(1) > +#define RISCV_IOMMU_DC_TC_EN_PRI BIT_ULL(2) > +#define RISCV_IOMMU_DC_TC_T2GPA BIT_ULL(3) > +#define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4) > +#define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5) > +#define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6) > +#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7) > +#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8) > +#define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9) > +#define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10) > +#define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11) > + > +/* Second-stage (aka G-stage) context fields */ > +#define RISCV_IOMMU_DC_IOHGATP_PPN RISCV_IOMMU_ATP_PPN_FIELD > +#define RISCV_IOMMU_DC_IOHGATP_GSCID GENMASK_ULL(59, 44) > +#define RISCV_IOMMU_DC_IOHGATP_MODE RISCV_IOMMU_ATP_MODE_FIELD > + > +/** > + * enum riscv_iommu_dc_iohgatp_modes - Guest address translation/protection modes > + * @RISCV_IOMMU_DC_IOHGATP_MODE_BARE: No translation/protection > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: Sv32x4 (2-bit extension of Sv32), when fctl.GXL == 1 > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: Sv39x4 (2-bit extension of Sv39), when fctl.GXL == 0 > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: Sv48x4 (2-bit extension of Sv48), when fctl.GXL == 0 > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: Sv57x4 (2-bit extension of Sv57), when fctl.GXL == 0 > + */ > +enum riscv_iommu_dc_iohgatp_modes { > + RISCV_IOMMU_DC_IOHGATP_MODE_BARE = 0, > + RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4 = 8, > + RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4 = 8, > + RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4 = 9, > + RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4 = 10 > +}; > + > +/* Translation attributes fields */ > +#define RISCV_IOMMU_DC_TA_PSCID GENMASK_ULL(31, 12) > + > +/* First-stage context fields */ > +#define RISCV_IOMMU_DC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD > +#define RISCV_IOMMU_DC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD > + > +/** > + * enum riscv_iommu_dc_fsc_atp_modes - First stage address translation/protection modes > + * @RISCV_IOMMU_DC_FSC_MODE_BARE: No translation/protection > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32: Sv32, when dc.tc.SXL == 1 > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: Sv39, when dc.tc.SXL == 0 > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: Sv48, when dc.tc.SXL == 0 > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: Sv57, when dc.tc.SXL == 0 > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 1lvl PDT, 8bit process ids > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 2lvl PDT, 17bit process ids > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 3lvl PDT, 20bit process ids > + * > + * FSC holds IOSATP when RISCV_IOMMU_DC_TC_PDTV is 0 and PDTP otherwise. > + * IOSATP controls the first stage address translation (same as the satp register on > + * the RISC-V MMU), and PDTP holds the process directory table, used to select a > + * first stage page table based on a process id (for devices that support multiple > + * process ids). > + */ > +enum riscv_iommu_dc_fsc_atp_modes { > + RISCV_IOMMU_DC_FSC_MODE_BARE = 0, > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 = 8, > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 = 8, > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 = 9, > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 = 10, > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8 = 1, > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17 = 2, > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20 = 3 > +}; > + > +/* MSI page table pointer */ > +#define RISCV_IOMMU_DC_MSIPTP_PPN RISCV_IOMMU_ATP_PPN_FIELD > +#define RISCV_IOMMU_DC_MSIPTP_MODE RISCV_IOMMU_ATP_MODE_FIELD > +#define RISCV_IOMMU_DC_MSIPTP_MODE_OFF 0 > +#define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1 > + > +/* MSI address mask */ > +#define RISCV_IOMMU_DC_MSI_ADDR_MASK GENMASK_ULL(51, 0) > + > +/* MSI address pattern */ > +#define RISCV_IOMMU_DC_MSI_PATTERN GENMASK_ULL(51, 0) > + > +/** > + * struct riscv_iommu_pc - Process Context > + * @ta: Translation Attributes > + * @fsc: First stage context > + * > + * This structure is used for leaf nodes on the Process Directory Table > + * See section 2.3 for more details > + */ > +struct riscv_iommu_pc { > + u64 ta; > + u64 fsc; > +}; > + > +/* Translation attributes fields */ > +#define RISCV_IOMMU_PC_TA_V BIT_ULL(0) > +#define RISCV_IOMMU_PC_TA_ENS BIT_ULL(1) > +#define RISCV_IOMMU_PC_TA_SUM BIT_ULL(2) > +#define RISCV_IOMMU_PC_TA_PSCID GENMASK_ULL(31, 12) > + > +/* First stage context fields */ > +#define RISCV_IOMMU_PC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD > +#define RISCV_IOMMU_PC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD > + > +/* > + * Chapter 3: In-memory queue interface > + */ > + > +/** > + * struct riscv_iommu_command - Generic I/O MMU command structure > + * @dword0: Includes the opcode and the function identifier > + * @dword1: Opcode specific data > + * > + * The commands are interpreted as two 64bit fields, where the first > + * 7bits of the first field are the opcode which also defines the > + * command's format, followed by a 3bit field that specifies the > + * function invoked by that command, and the rest is opcode-specific. > + * This is a generic struct which will be populated differently > + * according to each command. For more infos on the commands and > + * the command queue check section 3.1. > + */ > +struct riscv_iommu_command { > + u64 dword0; > + u64 dword1; > +}; > + > +/* Fields on dword0, common for all commands */ > +#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0) > +#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7) > + > +/* 3.1.1 I/O MMU Page-table cache invalidation */ > +/* Fields on dword0 */ > +#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1 > +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0 > +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1 > +#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10) > +#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12) > +#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32) > +#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33) > +#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44) > +/* dword1[61:10] is the 4K-aligned page address */ > +#define RISCV_IOMMU_CMD_IOTINVAL_ADDR GENMASK_ULL(61, 10) > + > +/* 3.1.2 I/O MMU Command Queue Fences */ > +/* Fields on dword0 */ > +#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2 > +#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0 > +#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10) > +#define RISCV_IOMMU_CMD_IOFENCE_WSI BIT_ULL(11) > +#define RISCV_IOMMU_CMD_IOFENCE_PR BIT_ULL(12) > +#define RISCV_IOMMU_CMD_IOFENCE_PW BIT_ULL(13) > +#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32) > +/* dword1 is the address, word-size aligned and shifted to the right by two bits. */ > + > +/* 3.1.3 I/O MMU Directory cache invalidation */ > +/* Fields on dword0 */ > +#define RISCV_IOMMU_CMD_IODIR_OPCODE 3 > +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0 > +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1 > +#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12) > +#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33) > +#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40) RISCV_IOMMU_CMD_IOTDIR_* for all above > +/* dword1 is reserved for standard use */ > + > +/* 3.1.4 I/O MMU PCIe ATS */ > +/* Fields on dword0 */ > +#define RISCV_IOMMU_CMD_ATS_OPCODE 4 > +#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0 > +#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1 > +#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12) > +#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32) > +#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33) > +#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40) > +#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56) > +/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */ > + > +/* ATS.INVAL payload*/ > +#define RISCV_IOMMU_CMD_ATS_INVAL_G BIT_ULL(0) > +/* Bits 1 - 10 are zeroed */ > +#define RISCV_IOMMU_CMD_ATS_INVAL_S BIT_ULL(11) > +#define RISCV_IOMMU_CMD_ATS_INVAL_UADDR GENMASK_ULL(63, 12) > + > +/* ATS.PRGR payload */ > +/* Bits 0 - 31 are zeroed */ > +#define RISCV_IOMMU_CMD_ATS_PRGR_PRG_INDEX GENMASK_ULL(40, 32) > +/* Bits 41 - 43 are zeroed */ > +#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44) > +#define RISCV_IOMMU_CMD_ATS_PRGR_DST_ID GENMASK_ULL(63, 48) > + > +/** > + * struct riscv_iommu_fq_record - Fault/Event Queue Record > + * @hdr: Header, includes fault/event cause, PID/DID, transaction type etc > + * @_reserved: Low 32bits for custom use, high 32bits for standard use > + * @iotval: Transaction-type/cause specific format > + * @iotval2: Cause specific format > + * > + * The fault/event queue reports events and failures raised when > + * processing transactions. Each record is a 32byte structure where > + * the first dword has a fixed format for providing generic infos > + * regarding the fault/event, and two more dwords are there for > + * fault/event-specific information. For more details see section > + * 3.2. > + */ > +struct riscv_iommu_fq_record { > + u64 hdr; > + u64 _reserved; > + u64 iotval; > + u64 iotval2; > +}; > + > +/* Fields on header */ > +#define RISCV_IOMMU_FQ_HDR_CAUSE GENMASK_ULL(11, 0) > +#define RISCV_IOMMU_FQ_HDR_PID GENMASK_ULL(31, 12) > +#define RISCV_IOMMU_FQ_HDR_PV BIT_ULL(32) > +#define RISCV_IOMMU_FQ_HDR_PRIV BIT_ULL(33) > +#define RISCV_IOMMU_FQ_HDR_TTYPE GENMASK_ULL(39, 34) RISCV_IOMMU_FQ_HDR_TTYP > +#define RISCV_IOMMU_FQ_HDR_DID GENMASK_ULL(63, 40) > + > +/** > + * enum riscv_iommu_fq_causes - Fault/event cause values > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT: Instruction access fault > + * @RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED: Read address misaligned > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT: Read load fault > + * @RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED: Write/AMO address misaligned > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT: Write/AMO access fault > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S: Instruction page fault > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S: Read page fault > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S: Write/AMO page fault > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS: Instruction guest page fault > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS: Read guest page fault > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS: Write/AMO guest page fault > + * @RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: All inbound transactions disallowed > + * @RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: DDT entry load access fault > + * @RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: DDT entry invalid > + * @RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: DDT entry misconfigured > + * @RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED: Transaction type disallowed > + * @RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT: MSI PTE load access fault > + * @RISCV_IOMMU_FQ_CAUSE_MSI_INVALID: MSI PTE invalid > + * @RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED: MSI PTE misconfigured > + * @RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT: MRIF access fault > + * @RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT: PDT entry load access fault > + * @RISCV_IOMMU_FQ_CAUSE_PDT_INVALID: PDT entry invalid > + * @RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED: PDT entry misconfigured > + * @RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: DDT data corruption > + * @RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED: PDT data corruption > + * @RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED: MSI page table data corruption > + * @RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED: MRIF data corruption > + * @RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: Internal data path error > + * @RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: IOMMU MSI write access fault > + * @RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED: First/second stage page table data corruption > + * > + * Values are on table 11 of the spec, encodings 275 - 2047 are reserved for standard > + * use, and 2048 - 4095 for custom use. > + */ > +enum riscv_iommu_fq_causes { > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT = 1, > + RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED = 4, > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT = 5, > + RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED = 6, > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT = 7, > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S = 12, > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S = 13, > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S = 15, > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS = 20, > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS = 21, > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS = 23, > + RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED = 256, > + RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT = 257, > + RISCV_IOMMU_FQ_CAUSE_DDT_INVALID = 258, > + RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED = 259, > + RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED = 260, > + RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT = 261, > + RISCV_IOMMU_FQ_CAUSE_MSI_INVALID = 262, > + RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED = 263, > + RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT = 264, > + RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT = 265, > + RISCV_IOMMU_FQ_CAUSE_PDT_INVALID = 266, > + RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED = 267, > + RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED = 268, > + RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED = 269, > + RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED = 270, > + RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED = 271, > + RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR = 272, > + RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT = 273, > + RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED = 274 > +}; > + > +/** > + * enum riscv_iommu_fq_ttypes: Fault/event transaction types > + * @RISCV_IOMMU_FQ_TTYPE_NONE: None. Fault not caused by an inbound transaction. > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH: Instruction fetch from untranslated address > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_RD: Read from untranslated address > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_WR: Write/AMO to untranslated address > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH: Instruction fetch from translated address > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_RD: Read from translated address > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_WR: Write/AMO to translated address > + * @RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ: PCIe ATS translation request > + * @RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ: PCIe message request > + * > + * Values are on table 12 of the spec, type 4 and 10 - 31 are reserved for standard use > + * and 31 - 63 for custom use. > + */ > +enum riscv_iommu_fq_ttypes { > + RISCV_IOMMU_FQ_TTYPE_NONE = 0, > + RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH = 1, > + RISCV_IOMMU_FQ_TTYPE_UADDR_RD = 2, > + RISCV_IOMMU_FQ_TTYPE_UADDR_WR = 3, > + RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH = 5, > + RISCV_IOMMU_FQ_TTYPE_TADDR_RD = 6, > + RISCV_IOMMU_FQ_TTYPE_TADDR_WR = 7, > + RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ = 8, > + RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9, > +}; RISCV_IOMMU_FW_TTYP_* for all above > + > +/** > + * struct riscv_iommu_pq_record - PCIe Page Request record > + * @hdr: Header, includes PID, DID etc > + * @payload: Holds the page address, request group and permission bits > + * > + * For more infos on the PCIe Page Request queue see chapter 3.3. > + */ > +struct riscv_iommu_pq_record { > + u64 hdr; > + u64 payload; > +}; > + > +/* Header fields */ > +#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12) > +#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32) > +#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33) > +#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34) > +#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40) > + > +/* Payload fields */ > +#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0) > +#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1) > +#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2) > +#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0) /* Mask of RWL for convenience */ Maybe RISCV_IOMMU_PREQ_PAYLOAD_RWL_MASK ? Just 'M' doesn't convey anything so it makes one want to look it up in the spec. > +#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3) > +#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12) RISCV_IOMMU_PQ_HDR_* for all above > + > +/** > + * struct riscv_iommu_msi_pte - MSI Page Table Entry > + * @pte: MSI PTE > + * @mrif_info: Memory-resident interrupt file info > + * > + * The MSI Page Table is used for virtualizing MSIs, so that when > + * a device sends an MSI to a guest, the IOMMU can reroute it > + * by translating the MSI address, either to a guest interrupt file > + * or a memory resident interrupt file (MRIF). Note that this page table > + * is an array of MSI PTEs, not a multi-level pt, each entry > + * is a leaf entry. For more infos check out the AIA spec, chapter 9.5. > + * > + * Also in basic mode the mrif_info field is ignored by the IOMMU and can > + * be used by software, any other reserved fields on pte must be zeroed-out > + * by software. > + */ > +struct riscv_iommu_msi_pte { > + u64 pte; > + u64 mrif_info; > +}; > + > +/* Fields on pte */ > +#define RISCV_IOMMU_MSI_PTE_V BIT_ULL(0) > +#define RISCV_IOMMU_MSI_PTE_M GENMASK_ULL(2, 1) > +#define RISCV_IOMMU_MSI_PTE_MRIF_ADDR GENMASK_ULL(53, 7) /* When M == 1 (MRIF mode) */ > +#define RISCV_IOMMU_MSI_PTE_PPN RISCV_IOMMU_PPN_FIELD /* When M == 3 (basic mode) */ > +#define RISCV_IOMMU_MSI_PTE_C BIT_ULL(63) RISCV_IOMMU_MSIPTE_* for all above > + > +/* Fields on mrif_info */ > +#define RISCV_IOMMU_MSI_MRIF_NID GENMASK_ULL(9, 0) > +#define RISCV_IOMMU_MSI_MRIF_NPPN RISCV_IOMMU_PPN_FIELD > +#define RISCV_IOMMU_MSI_MRIF_NID_MSB BIT_ULL(60) > + > +#endif /* _RISCV_IOMMU_BITS_H_ */
On Wed, May 29, 2024 at 8:15 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > Hi Tomasz, > > I reviewed iommu-bits.h to the spec. Most naming matches exactly, which > is nice, but I've pointed out a few which don't. > > Thanks, > drew > Thanks for looking into this a bit boring file. General ACK to all suggestions. Thanks, - Tomasz > > On Fri, May 24, 2024 at 12:34:42PM GMT, Tomasz Jeznach wrote: > ... > > diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h > > new file mode 100644 > > index 000000000000..48f795a0cd10 > > --- /dev/null > > +++ b/drivers/iommu/riscv/iommu-bits.h > > @@ -0,0 +1,707 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > +/* > > + * Copyright © 2022-2024 Rivos Inc. > > + * Copyright © 2023 FORTH-ICS/CARV > > + * Copyright © 2023 RISC-V IOMMU Task Group > > + * > > + * RISC-V IOMMU - Register Layout and Data Structures. > > + * > > + * Based on the 'RISC-V IOMMU Architecture Specification', Version 1.0 > > + * Published at https://github.com/riscv-non-isa/riscv-iommu > > + * > > + */ > > + > > +#ifndef _RISCV_IOMMU_BITS_H_ > > +#define _RISCV_IOMMU_BITS_H_ > > + > > +#include <linux/types.h> > > +#include <linux/bitfield.h> > > +#include <linux/bits.h> > > + > > +/* > > + * Chapter 5: Memory Mapped register interface > > + */ > > + > > +/* Common field positions */ > > +#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10) > > +#define RISCV_IOMMU_QUEUE_LOGSZ_FIELD GENMASK_ULL(4, 0) > > RISCV_IOMMU_QUEUE_LOG2SZ_FIELD > > > +#define RISCV_IOMMU_QUEUE_INDEX_FIELD GENMASK_ULL(31, 0) > > +#define RISCV_IOMMU_QUEUE_ENABLE BIT(0) > > +#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1) > > +#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8) > > +#define RISCV_IOMMU_QUEUE_OVERFLOW BIT(9) > > +#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16) > > +#define RISCV_IOMMU_QUEUE_BUSY BIT(17) > > + > > +#define RISCV_IOMMU_ATP_PPN_FIELD GENMASK_ULL(43, 0) > > +#define RISCV_IOMMU_ATP_MODE_FIELD GENMASK_ULL(63, 60) > > + > > +/* 5.3 IOMMU Capabilities (64bits) */ > > +#define RISCV_IOMMU_REG_CAP 0x0000 > > Maybe too verbose, but the name is "capabilities". So all the below 'CAP' > instances would match the spec better if spelled out. > > > +#define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0) > > +#define RISCV_IOMMU_CAP_S_SV32 BIT_ULL(8) > > +#define RISCV_IOMMU_CAP_S_SV39 BIT_ULL(9) > > +#define RISCV_IOMMU_CAP_S_SV48 BIT_ULL(10) > > +#define RISCV_IOMMU_CAP_S_SV57 BIT_ULL(11) > > +#define RISCV_IOMMU_CAP_SVPBMT BIT_ULL(15) > > +#define RISCV_IOMMU_CAP_G_SV32 BIT_ULL(16) > > +#define RISCV_IOMMU_CAP_G_SV39 BIT_ULL(17) > > +#define RISCV_IOMMU_CAP_G_SV48 BIT_ULL(18) > > +#define RISCV_IOMMU_CAP_G_SV57 BIT_ULL(19) > > RISCV_IOMMU_CAPABILITIES_SV32X4 > RISCV_IOMMU_CAPABILITIES_SV39X4 > RISCV_IOMMU_CAPABILITIES_SV48X4 > RISCV_IOMMU_CAPABILITIES_SV57X4 > > > +#define RISCV_IOMMU_CAP_AMO_MRIF BIT_ULL(21) > > +#define RISCV_IOMMU_CAP_MSI_FLAT BIT_ULL(22) > > +#define RISCV_IOMMU_CAP_MSI_MRIF BIT_ULL(23) > > +#define RISCV_IOMMU_CAP_AMO_HWAD BIT_ULL(24) > > +#define RISCV_IOMMU_CAP_ATS BIT_ULL(25) > > +#define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26) > > +#define RISCV_IOMMU_CAP_END BIT_ULL(27) > > +#define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28) > > +#define RISCV_IOMMU_CAP_HPM BIT_ULL(30) > > +#define RISCV_IOMMU_CAP_DBG BIT_ULL(31) > > +#define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32) > > +#define RISCV_IOMMU_CAP_PD8 BIT_ULL(38) > > +#define RISCV_IOMMU_CAP_PD17 BIT_ULL(39) > > +#define RISCV_IOMMU_CAP_PD20 BIT_ULL(40) > > + > > +#define RISCV_IOMMU_CAP_VERSION_VER_MASK 0xF0 > > RISCV_IOMMU_CAP_VERSION_MAJOR_MASK > > > +#define RISCV_IOMMU_CAP_VERSION_REV_MASK 0x0F > > RISCV_IOMMU_CAP_VERSION_MINOR_MASK > > > + > > +/** > > + * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings > > + * @RISCV_IOMMU_CAP_IGS_MSI: I/O MMU supports only MSI generation > > + * @RISCV_IOMMU_CAP_IGS_WSI: I/O MMU supports only Wired-Signaled interrupt > > + * @RISCV_IOMMU_CAP_IGS_BOTH: I/O MMU supports both MSI and WSI generation > > Can we always spell IOMMU 'IOMMU'? > > > + * @RISCV_IOMMU_CAP_IGS_RSRV: Reserved for standard use > > + */ > > +enum riscv_iommu_igs_settings { > > + RISCV_IOMMU_CAP_IGS_MSI = 0, > > + RISCV_IOMMU_CAP_IGS_WSI = 1, > > + RISCV_IOMMU_CAP_IGS_BOTH = 2, > > + RISCV_IOMMU_CAP_IGS_RSRV = 3 > > +}; > > + > > +/* 5.4 Features control register (32bits) */ > > +#define RISCV_IOMMU_REG_FCTL 0x0008 > > +#define RISCV_IOMMU_FCTL_BE BIT(0) > > +#define RISCV_IOMMU_FCTL_WSI BIT(1) > > +#define RISCV_IOMMU_FCTL_GXL BIT(2) > > + > > +/* 5.5 Device-directory-table pointer (64bits) */ > > +#define RISCV_IOMMU_REG_DDTP 0x0010 > > +#define RISCV_IOMMU_DDTP_MODE GENMASK_ULL(3, 0) > > A bit unnecessary, but spec calls this 'iommu_mode', so > > RISCV_IOMMU_DDTP_IOMMU_MODE ? > > > +#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4) > > +#define RISCV_IOMMU_DDTP_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/** > > + * enum riscv_iommu_ddtp_modes - I/O MMU translation modes > > + * @RISCV_IOMMU_DDTP_MODE_OFF: No inbound transactions allowed > > + * @RISCV_IOMMU_DDTP_MODE_BARE: Pass-through mode > > + * @RISCV_IOMMU_DDTP_MODE_1LVL: One-level DDT > > + * @RISCV_IOMMU_DDTP_MODE_2LVL: Two-level DDT > > + * @RISCV_IOMMU_DDTP_MODE_3LVL: Three-level DDT > > + * @RISCV_IOMMU_DDTP_MODE_MAX: Max value allowed by specification > > + */ > > +enum riscv_iommu_ddtp_modes { > > + RISCV_IOMMU_DDTP_MODE_OFF = 0, > > + RISCV_IOMMU_DDTP_MODE_BARE = 1, > > + RISCV_IOMMU_DDTP_MODE_1LVL = 2, > > + RISCV_IOMMU_DDTP_MODE_2LVL = 3, > > + RISCV_IOMMU_DDTP_MODE_3LVL = 4, > > + RISCV_IOMMU_DDTP_MODE_MAX = 4 > > +}; > > + > > +/* 5.6 Command Queue Base (64bits) */ > > +#define RISCV_IOMMU_REG_CQB 0x0018 > > +#define RISCV_IOMMU_CQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > > +#define RISCV_IOMMU_CQB_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/* 5.7 Command Queue head (32bits) */ > > +#define RISCV_IOMMU_REG_CQH 0x0020 > > +#define RISCV_IOMMU_CQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.8 Command Queue tail (32bits) */ > > +#define RISCV_IOMMU_REG_CQT 0x0024 > > +#define RISCV_IOMMU_CQT_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.9 Fault Queue Base (64bits) */ > > +#define RISCV_IOMMU_REG_FQB 0x0028 > > +#define RISCV_IOMMU_FQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > > +#define RISCV_IOMMU_FQB_PN RISCV_IOMMU_PPN_FIELD > > RISCV_IOMMU_FQB_PPN > > > + > > +/* 5.10 Fault Queue Head (32bits) */ > > +#define RISCV_IOMMU_REG_FQH 0x0030 > > +#define RISCV_IOMMU_FQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.11 Fault Queue tail (32bits) */ > > +#define RISCV_IOMMU_REG_FQT 0x0034 > > +#define RISCV_IOMMU_FQT_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.12 Page Request Queue base (64bits) */ > > +#define RISCV_IOMMU_REG_PQB 0x0038 > > +#define RISCV_IOMMU_PQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > > +#define RISCV_IOMMU_PQB_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/* 5.13 Page Request Queue head (32bits) */ > > +#define RISCV_IOMMU_REG_PQH 0x0040 > > +#define RISCV_IOMMU_PQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.14 Page Request Queue tail (32bits) */ > > +#define RISCV_IOMMU_REG_PQT 0x0044 > > +#define RISCV_IOMMU_PQT_INDEX_MASK RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.15 Command Queue CSR (32bits) */ > > +#define RISCV_IOMMU_REG_CQCSR 0x0048 > > +#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE > > +#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE > > +#define RISCV_IOMMU_CQCSR_CQMF RISCV_IOMMU_QUEUE_MEM_FAULT > > +#define RISCV_IOMMU_CQCSR_CMD_TO BIT(9) > > +#define RISCV_IOMMU_CQCSR_CMD_ILL BIT(10) > > +#define RISCV_IOMMU_CQCSR_FENCE_W_IP BIT(11) > > +#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE > > +#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > > + > > +/* 5.16 Fault Queue CSR (32bits) */ > > +#define RISCV_IOMMU_REG_FQCSR 0x004C > > +#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE > > +#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE > > +#define RISCV_IOMMU_FQCSR_FQMF RISCV_IOMMU_QUEUE_MEM_FAULT > > +#define RISCV_IOMMU_FQCSR_FQOF RISCV_IOMMU_QUEUE_OVERFLOW > > +#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE > > +#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > > + > > +/* 5.17 Page Request Queue CSR (32bits) */ > > +#define RISCV_IOMMU_REG_PQCSR 0x0050 > > +#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE > > +#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE > > +#define RISCV_IOMMU_PQCSR_PQMF RISCV_IOMMU_QUEUE_MEM_FAULT > > +#define RISCV_IOMMU_PQCSR_PQOF RISCV_IOMMU_QUEUE_OVERFLOW > > +#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE > > +#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > > + > > +/* 5.18 Interrupt Pending Status (32bits) */ > > +#define RISCV_IOMMU_REG_IPSR 0x0054 > > + > > +#define RISCV_IOMMU_INTR_CQ 0 > > +#define RISCV_IOMMU_INTR_FQ 1 > > +#define RISCV_IOMMU_INTR_PM 2 > > +#define RISCV_IOMMU_INTR_PQ 3 > > +#define RISCV_IOMMU_INTR_COUNT 4 > > + > > +#define RISCV_IOMMU_IPSR_CIP BIT(RISCV_IOMMU_INTR_CQ) > > +#define RISCV_IOMMU_IPSR_FIP BIT(RISCV_IOMMU_INTR_FQ) > > +#define RISCV_IOMMU_IPSR_PMIP BIT(RISCV_IOMMU_INTR_PM) > > +#define RISCV_IOMMU_IPSR_PIP BIT(RISCV_IOMMU_INTR_PQ) > > + > > +/* 5.19 Performance monitoring counter overflow status (32bits) */ > > +#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058 > > +#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0) > > +#define RISCV_IOMMU_IOCOUNTOVF_HPM GENMASK_ULL(31, 1) > > RISCV_IOMMU_REG_IOCNTOVF* for all above. It looks like this > got renamed and the header of the spec subsection still has > the old name, but otherwise the new name is referenced. > > > + > > +/* 5.20 Performance monitoring counter inhibits (32bits) */ > > +#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C > > +#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0) > > +#define RISCV_IOMMU_IOCOUNTINH_HPM GENMASK(31, 1) > > RISCV_IOMMU_IOCNTINH* for all above. Same header with old > name spec issue. > > > + > > +/* 5.21 Performance monitoring cycles counter (64bits) */ > > +#define RISCV_IOMMU_REG_IOHPMCYCLES 0x0060 > > +#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0) > > +#define RISCV_IOMMU_IOHPMCYCLES_OVF BIT_ULL(63) > > RISCV_IOMMU_IOHPMCYCLES_OF > > > + > > +/* 5.22 Performance monitoring event counters (31 * 64bits) */ > > +#define RISCV_IOMMU_REG_IOHPMCTR_BASE 0x0068 > > +#define RISCV_IOMMU_REG_IOHPMCTR(_n) (RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8)) > > + > > +/* 5.23 Performance monitoring event selectors (31 * 64bits) */ > > +#define RISCV_IOMMU_REG_IOHPMEVT_BASE 0x0160 > > +#define RISCV_IOMMU_REG_IOHPMEVT(_n) (RISCV_IOMMU_REG_IOHPMEVT_BASE + ((_n) * 0x8)) > > +#define RISCV_IOMMU_IOHPMEVT_CNT 31 > > What is RISCV_IOMMU_IOHPMEVT_CNT ? > > > +#define RISCV_IOMMU_IOHPMEVT_EVENT_ID GENMASK_ULL(14, 0) > > RISCV_IOMMU_IOHPMEVT_EVENTID (EVENT_ID reads nicer though...) > > > +#define RISCV_IOMMU_IOHPMEVT_DMASK BIT_ULL(15) > > +#define RISCV_IOMMU_IOHPMEVT_PID_PSCID GENMASK_ULL(35, 16) > > +#define RISCV_IOMMU_IOHPMEVT_DID_GSCID GENMASK_ULL(59, 36) > > +#define RISCV_IOMMU_IOHPMEVT_PV_PSCV BIT_ULL(60) > > +#define RISCV_IOMMU_IOHPMEVT_DV_GSCV BIT_ULL(61) > > +#define RISCV_IOMMU_IOHPMEVT_IDT BIT_ULL(62) > > +#define RISCV_IOMMU_IOHPMEVT_OF BIT_ULL(63) > > + > > +/** > > + * enum riscv_iommu_hpmevent_id - Performance-monitoring event identifier > > + * > > + * @RISCV_IOMMU_HPMEVENT_INVALID: Invalid event, do not count > > + * @RISCV_IOMMU_HPMEVENT_URQ: Untranslated requests > > + * @RISCV_IOMMU_HPMEVENT_TRQ: Translated requests > > + * @RISCV_IOMMU_HPMEVENT_ATS_RQ: ATS translation requests > > + * @RISCV_IOMMU_HPMEVENT_TLB_MISS: TLB misses > > + * @RISCV_IOMMU_HPMEVENT_DD_WALK: Device directory walks > > + * @RISCV_IOMMU_HPMEVENT_PD_WALK: Process directory walks > > + * @RISCV_IOMMU_HPMEVENT_S_VS_WALKS: S/VS-Stage page table walks > > + * @RISCV_IOMMU_HPMEVENT_G_WALKS: G-Stage page table walks > > Let's copy+paste these descriptions from the spec to allow them to be > copy+pasted from here into a search in the spec. (Most already mostly > match, except for the G-stage vs. second-stage type stuff.) > > > + * @RISCV_IOMMU_HPMEVENT_MAX: Value to denote maximum Event IDs > > + */ > > +enum riscv_iommu_hpmevent_id { > > + RISCV_IOMMU_HPMEVENT_INVALID = 0, > > + RISCV_IOMMU_HPMEVENT_URQ = 1, > > + RISCV_IOMMU_HPMEVENT_TRQ = 2, > > + RISCV_IOMMU_HPMEVENT_ATS_RQ = 3, > > + RISCV_IOMMU_HPMEVENT_TLB_MISS = 4, > > + RISCV_IOMMU_HPMEVENT_DD_WALK = 5, > > + RISCV_IOMMU_HPMEVENT_PD_WALK = 6, > > + RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7, > > + RISCV_IOMMU_HPMEVENT_G_WALKS = 8, > > + RISCV_IOMMU_HPMEVENT_MAX = 9 > > +}; > > + > > +/* 5.24 Translation request IOVA (64bits) */ > > +#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258 > > +#define RISCV_IOMMU_TR_REQ_IOVA_VPN GENMASK_ULL(63, 12) > > + > > +/* 5.25 Translation request control (64bits) */ > > +#define RISCV_IOMMU_REG_TR_REQ_CTL 0x0260 > > +#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY BIT_ULL(0) > > +#define RISCV_IOMMU_TR_REQ_CTL_PRIV BIT_ULL(1) > > +#define RISCV_IOMMU_TR_REQ_CTL_EXE BIT_ULL(2) > > +#define RISCV_IOMMU_TR_REQ_CTL_NW BIT_ULL(3) > > +#define RISCV_IOMMU_TR_REQ_CTL_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_TR_REQ_CTL_PV BIT_ULL(32) > > +#define RISCV_IOMMU_TR_REQ_CTL_DID GENMASK_ULL(63, 40) > > + > > +/* 5.26 Translation request response (64bits) */ > > +#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268 > > +#define RISCV_IOMMU_TR_RESPONSE_FAULT BIT_ULL(0) > > +#define RISCV_IOMMU_TR_RESPONSE_PBMT GENMASK_ULL(8, 7) > > +#define RISCV_IOMMU_TR_RESPONSE_SZ BIT_ULL(9) > > RISCV_IOMMU_TR_RESPONSE_S (although SZ is easier to understand...) > > > +#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/* 5.27 Interrupt cause to vector (64bits) */ > > +#define RISCV_IOMMU_REG_ICVEC 0x02F8 > > +#define RISCV_IOMMU_ICVEC_CIV GENMASK_ULL(3, 0) > > +#define RISCV_IOMMU_ICVEC_FIV GENMASK_ULL(7, 4) > > +#define RISCV_IOMMU_ICVEC_PMIV GENMASK_ULL(11, 8) > > +#define RISCV_IOMMU_ICVEC_PIV GENMASK_ULL(15, 12) > > + > > +/* 5.28 MSI Configuration table (32 * 64bits) */ > > +#define RISCV_IOMMU_REG_MSI_CONFIG 0x0300 > > +#define RISCV_IOMMU_REG_MSI_ADDR(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10)) > > +#define RISCV_IOMMU_MSI_ADDR GENMASK_ULL(55, 2) > > +#define RISCV_IOMMU_REG_MSI_DATA(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10) + 0x08) > > +#define RISCV_IOMMU_MSI_DATA GENMASK_ULL(31, 0) > > +#define RISCV_IOMMU_REG_MSI_VEC_CTL(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10) + 0x0C) > > +#define RISCV_IOMMU_MSI_VEC_CTL_M BIT_ULL(0) > > RISCV_IOMMU_REG_MSI_CFG_TBL for all above > > > + > > +#define RISCV_IOMMU_REG_SIZE 0x1000 > > + > > +/* > > + * Chapter 2: Data structures > > + */ > > + > > +/* > > + * Device Directory Table macros for non-leaf nodes > > + */ > > +#define RISCV_IOMMU_DDTE_VALID BIT_ULL(0) > > RISCV_IOMMU_DDTE_V > > > +#define RISCV_IOMMU_DDTE_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/** > > + * struct riscv_iommu_dc - Device Context > > + * @tc: Translation Control > > + * @iohgatp: I/O Hypervisor guest address translation and protection > > + * (Second stage context) > > + * @ta: Translation Attributes > > + * @fsc: First stage context > > + * @msiptp: MSI page table pointer > > + * @msi_addr_mask: MSI address mask > > + * @msi_addr_pattern: MSI address pattern > > + * @_reserved: Reserved for future use, padding > > + * > > + * This structure is used for leaf nodes on the Device Directory Table, > > + * in case RISCV_IOMMU_CAP_MSI_FLAT is not set, the bottom 4 fields are > > + * not present and are skipped with pointer arithmetic to avoid > > + * casting, check out riscv_iommu_get_dc(). > > + * See section 2.1 for more details > > + */ > > +struct riscv_iommu_dc { > > + u64 tc; > > + u64 iohgatp; > > + u64 ta; > > + u64 fsc; > > + u64 msiptp; > > + u64 msi_addr_mask; > > + u64 msi_addr_pattern; > > + u64 _reserved; > > +}; > > + > > +/* Translation control fields */ > > +#define RISCV_IOMMU_DC_TC_V BIT_ULL(0) > > +#define RISCV_IOMMU_DC_TC_EN_ATS BIT_ULL(1) > > +#define RISCV_IOMMU_DC_TC_EN_PRI BIT_ULL(2) > > +#define RISCV_IOMMU_DC_TC_T2GPA BIT_ULL(3) > > +#define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4) > > +#define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5) > > +#define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6) > > +#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7) > > +#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8) > > +#define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9) > > +#define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10) > > +#define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11) > > + > > +/* Second-stage (aka G-stage) context fields */ > > +#define RISCV_IOMMU_DC_IOHGATP_PPN RISCV_IOMMU_ATP_PPN_FIELD > > +#define RISCV_IOMMU_DC_IOHGATP_GSCID GENMASK_ULL(59, 44) > > +#define RISCV_IOMMU_DC_IOHGATP_MODE RISCV_IOMMU_ATP_MODE_FIELD > > + > > +/** > > + * enum riscv_iommu_dc_iohgatp_modes - Guest address translation/protection modes > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_BARE: No translation/protection > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: Sv32x4 (2-bit extension of Sv32), when fctl.GXL == 1 > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: Sv39x4 (2-bit extension of Sv39), when fctl.GXL == 0 > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: Sv48x4 (2-bit extension of Sv48), when fctl.GXL == 0 > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: Sv57x4 (2-bit extension of Sv57), when fctl.GXL == 0 > > + */ > > +enum riscv_iommu_dc_iohgatp_modes { > > + RISCV_IOMMU_DC_IOHGATP_MODE_BARE = 0, > > + RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4 = 8, > > + RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4 = 8, > > + RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4 = 9, > > + RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4 = 10 > > +}; > > + > > +/* Translation attributes fields */ > > +#define RISCV_IOMMU_DC_TA_PSCID GENMASK_ULL(31, 12) > > + > > +/* First-stage context fields */ > > +#define RISCV_IOMMU_DC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD > > +#define RISCV_IOMMU_DC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD > > + > > +/** > > + * enum riscv_iommu_dc_fsc_atp_modes - First stage address translation/protection modes > > + * @RISCV_IOMMU_DC_FSC_MODE_BARE: No translation/protection > > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32: Sv32, when dc.tc.SXL == 1 > > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: Sv39, when dc.tc.SXL == 0 > > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: Sv48, when dc.tc.SXL == 0 > > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: Sv57, when dc.tc.SXL == 0 > > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 1lvl PDT, 8bit process ids > > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 2lvl PDT, 17bit process ids > > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 3lvl PDT, 20bit process ids > > + * > > + * FSC holds IOSATP when RISCV_IOMMU_DC_TC_PDTV is 0 and PDTP otherwise. > > + * IOSATP controls the first stage address translation (same as the satp register on > > + * the RISC-V MMU), and PDTP holds the process directory table, used to select a > > + * first stage page table based on a process id (for devices that support multiple > > + * process ids). > > + */ > > +enum riscv_iommu_dc_fsc_atp_modes { > > + RISCV_IOMMU_DC_FSC_MODE_BARE = 0, > > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 = 8, > > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 = 8, > > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 = 9, > > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 = 10, > > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8 = 1, > > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17 = 2, > > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20 = 3 > > +}; > > + > > +/* MSI page table pointer */ > > +#define RISCV_IOMMU_DC_MSIPTP_PPN RISCV_IOMMU_ATP_PPN_FIELD > > +#define RISCV_IOMMU_DC_MSIPTP_MODE RISCV_IOMMU_ATP_MODE_FIELD > > +#define RISCV_IOMMU_DC_MSIPTP_MODE_OFF 0 > > +#define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1 > > + > > +/* MSI address mask */ > > +#define RISCV_IOMMU_DC_MSI_ADDR_MASK GENMASK_ULL(51, 0) > > + > > +/* MSI address pattern */ > > +#define RISCV_IOMMU_DC_MSI_PATTERN GENMASK_ULL(51, 0) > > + > > +/** > > + * struct riscv_iommu_pc - Process Context > > + * @ta: Translation Attributes > > + * @fsc: First stage context > > + * > > + * This structure is used for leaf nodes on the Process Directory Table > > + * See section 2.3 for more details > > + */ > > +struct riscv_iommu_pc { > > + u64 ta; > > + u64 fsc; > > +}; > > + > > +/* Translation attributes fields */ > > +#define RISCV_IOMMU_PC_TA_V BIT_ULL(0) > > +#define RISCV_IOMMU_PC_TA_ENS BIT_ULL(1) > > +#define RISCV_IOMMU_PC_TA_SUM BIT_ULL(2) > > +#define RISCV_IOMMU_PC_TA_PSCID GENMASK_ULL(31, 12) > > + > > +/* First stage context fields */ > > +#define RISCV_IOMMU_PC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD > > +#define RISCV_IOMMU_PC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD > > + > > +/* > > + * Chapter 3: In-memory queue interface > > + */ > > + > > +/** > > + * struct riscv_iommu_command - Generic I/O MMU command structure > > + * @dword0: Includes the opcode and the function identifier > > + * @dword1: Opcode specific data > > + * > > + * The commands are interpreted as two 64bit fields, where the first > > + * 7bits of the first field are the opcode which also defines the > > + * command's format, followed by a 3bit field that specifies the > > + * function invoked by that command, and the rest is opcode-specific. > > + * This is a generic struct which will be populated differently > > + * according to each command. For more infos on the commands and > > + * the command queue check section 3.1. > > + */ > > +struct riscv_iommu_command { > > + u64 dword0; > > + u64 dword1; > > +}; > > + > > +/* Fields on dword0, common for all commands */ > > +#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0) > > +#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7) > > + > > +/* 3.1.1 I/O MMU Page-table cache invalidation */ > > +/* Fields on dword0 */ > > +#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1 > > +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0 > > +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1 > > +#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10) > > +#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32) > > +#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33) > > +#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44) > > +/* dword1[61:10] is the 4K-aligned page address */ > > +#define RISCV_IOMMU_CMD_IOTINVAL_ADDR GENMASK_ULL(61, 10) > > + > > +/* 3.1.2 I/O MMU Command Queue Fences */ > > +/* Fields on dword0 */ > > +#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2 > > +#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0 > > +#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10) > > +#define RISCV_IOMMU_CMD_IOFENCE_WSI BIT_ULL(11) > > +#define RISCV_IOMMU_CMD_IOFENCE_PR BIT_ULL(12) > > +#define RISCV_IOMMU_CMD_IOFENCE_PW BIT_ULL(13) > > +#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32) > > +/* dword1 is the address, word-size aligned and shifted to the right by two bits. */ > > + > > +/* 3.1.3 I/O MMU Directory cache invalidation */ > > +/* Fields on dword0 */ > > +#define RISCV_IOMMU_CMD_IODIR_OPCODE 3 > > +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0 > > +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1 > > +#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33) > > +#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40) > > RISCV_IOMMU_CMD_IOTDIR_* for all above > > > +/* dword1 is reserved for standard use */ > > + > > +/* 3.1.4 I/O MMU PCIe ATS */ > > +/* Fields on dword0 */ > > +#define RISCV_IOMMU_CMD_ATS_OPCODE 4 > > +#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0 > > +#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1 > > +#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32) > > +#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33) > > +#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40) > > +#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56) > > +/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */ > > + > > +/* ATS.INVAL payload*/ > > +#define RISCV_IOMMU_CMD_ATS_INVAL_G BIT_ULL(0) > > +/* Bits 1 - 10 are zeroed */ > > +#define RISCV_IOMMU_CMD_ATS_INVAL_S BIT_ULL(11) > > +#define RISCV_IOMMU_CMD_ATS_INVAL_UADDR GENMASK_ULL(63, 12) > > + > > +/* ATS.PRGR payload */ > > +/* Bits 0 - 31 are zeroed */ > > +#define RISCV_IOMMU_CMD_ATS_PRGR_PRG_INDEX GENMASK_ULL(40, 32) > > +/* Bits 41 - 43 are zeroed */ > > +#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44) > > +#define RISCV_IOMMU_CMD_ATS_PRGR_DST_ID GENMASK_ULL(63, 48) > > + > > +/** > > + * struct riscv_iommu_fq_record - Fault/Event Queue Record > > + * @hdr: Header, includes fault/event cause, PID/DID, transaction type etc > > + * @_reserved: Low 32bits for custom use, high 32bits for standard use > > + * @iotval: Transaction-type/cause specific format > > + * @iotval2: Cause specific format > > + * > > + * The fault/event queue reports events and failures raised when > > + * processing transactions. Each record is a 32byte structure where > > + * the first dword has a fixed format for providing generic infos > > + * regarding the fault/event, and two more dwords are there for > > + * fault/event-specific information. For more details see section > > + * 3.2. > > + */ > > +struct riscv_iommu_fq_record { > > + u64 hdr; > > + u64 _reserved; > > + u64 iotval; > > + u64 iotval2; > > +}; > > + > > +/* Fields on header */ > > +#define RISCV_IOMMU_FQ_HDR_CAUSE GENMASK_ULL(11, 0) > > +#define RISCV_IOMMU_FQ_HDR_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_FQ_HDR_PV BIT_ULL(32) > > +#define RISCV_IOMMU_FQ_HDR_PRIV BIT_ULL(33) > > +#define RISCV_IOMMU_FQ_HDR_TTYPE GENMASK_ULL(39, 34) > > RISCV_IOMMU_FQ_HDR_TTYP > > > +#define RISCV_IOMMU_FQ_HDR_DID GENMASK_ULL(63, 40) > > + > > +/** > > + * enum riscv_iommu_fq_causes - Fault/event cause values > > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT: Instruction access fault > > + * @RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED: Read address misaligned > > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT: Read load fault > > + * @RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED: Write/AMO address misaligned > > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT: Write/AMO access fault > > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S: Instruction page fault > > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S: Read page fault > > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S: Write/AMO page fault > > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS: Instruction guest page fault > > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS: Read guest page fault > > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS: Write/AMO guest page fault > > + * @RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: All inbound transactions disallowed > > + * @RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: DDT entry load access fault > > + * @RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: DDT entry invalid > > + * @RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: DDT entry misconfigured > > + * @RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED: Transaction type disallowed > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT: MSI PTE load access fault > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_INVALID: MSI PTE invalid > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED: MSI PTE misconfigured > > + * @RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT: MRIF access fault > > + * @RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT: PDT entry load access fault > > + * @RISCV_IOMMU_FQ_CAUSE_PDT_INVALID: PDT entry invalid > > + * @RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED: PDT entry misconfigured > > + * @RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: DDT data corruption > > + * @RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED: PDT data corruption > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED: MSI page table data corruption > > + * @RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED: MRIF data corruption > > + * @RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: Internal data path error > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: IOMMU MSI write access fault > > + * @RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED: First/second stage page table data corruption > > + * > > + * Values are on table 11 of the spec, encodings 275 - 2047 are reserved for standard > > + * use, and 2048 - 4095 for custom use. > > + */ > > +enum riscv_iommu_fq_causes { > > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT = 1, > > + RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED = 4, > > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT = 5, > > + RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED = 6, > > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT = 7, > > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S = 12, > > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S = 13, > > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S = 15, > > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS = 20, > > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS = 21, > > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS = 23, > > + RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED = 256, > > + RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT = 257, > > + RISCV_IOMMU_FQ_CAUSE_DDT_INVALID = 258, > > + RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED = 259, > > + RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED = 260, > > + RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT = 261, > > + RISCV_IOMMU_FQ_CAUSE_MSI_INVALID = 262, > > + RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED = 263, > > + RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT = 264, > > + RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT = 265, > > + RISCV_IOMMU_FQ_CAUSE_PDT_INVALID = 266, > > + RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED = 267, > > + RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED = 268, > > + RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED = 269, > > + RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED = 270, > > + RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED = 271, > > + RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR = 272, > > + RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT = 273, > > + RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED = 274 > > +}; > > + > > +/** > > + * enum riscv_iommu_fq_ttypes: Fault/event transaction types > > + * @RISCV_IOMMU_FQ_TTYPE_NONE: None. Fault not caused by an inbound transaction. > > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH: Instruction fetch from untranslated address > > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_RD: Read from untranslated address > > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_WR: Write/AMO to untranslated address > > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH: Instruction fetch from translated address > > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_RD: Read from translated address > > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_WR: Write/AMO to translated address > > + * @RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ: PCIe ATS translation request > > + * @RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ: PCIe message request > > + * > > + * Values are on table 12 of the spec, type 4 and 10 - 31 are reserved for standard use > > + * and 31 - 63 for custom use. > > + */ > > +enum riscv_iommu_fq_ttypes { > > + RISCV_IOMMU_FQ_TTYPE_NONE = 0, > > + RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH = 1, > > + RISCV_IOMMU_FQ_TTYPE_UADDR_RD = 2, > > + RISCV_IOMMU_FQ_TTYPE_UADDR_WR = 3, > > + RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH = 5, > > + RISCV_IOMMU_FQ_TTYPE_TADDR_RD = 6, > > + RISCV_IOMMU_FQ_TTYPE_TADDR_WR = 7, > > + RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ = 8, > > + RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9, > > +}; > > RISCV_IOMMU_FW_TTYP_* for all above > I guess RISCV_IOMMU_FQ_TTYP_* to match _FQ_ acronym. > > + > > +/** > > + * struct riscv_iommu_pq_record - PCIe Page Request record > > + * @hdr: Header, includes PID, DID etc > > + * @payload: Holds the page address, request group and permission bits > > + * > > + * For more infos on the PCIe Page Request queue see chapter 3.3. > > + */ > > +struct riscv_iommu_pq_record { > > + u64 hdr; > > + u64 payload; > > +}; > > + > > +/* Header fields */ > > +#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32) > > +#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33) > > +#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34) > > +#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40) > > + > > +/* Payload fields */ > > +#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0) > > +#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1) > > +#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2) > > +#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0) /* Mask of RWL for convenience */ > > Maybe RISCV_IOMMU_PREQ_PAYLOAD_RWL_MASK ? Just 'M' doesn't convey anything > so it makes one want to look it up in the spec. > > > +#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3) > > +#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12) > > RISCV_IOMMU_PQ_HDR_* for all above > > > + > > +/** > > + * struct riscv_iommu_msi_pte - MSI Page Table Entry > > + * @pte: MSI PTE > > + * @mrif_info: Memory-resident interrupt file info > > + * > > + * The MSI Page Table is used for virtualizing MSIs, so that when > > + * a device sends an MSI to a guest, the IOMMU can reroute it > > + * by translating the MSI address, either to a guest interrupt file > > + * or a memory resident interrupt file (MRIF). Note that this page table > > + * is an array of MSI PTEs, not a multi-level pt, each entry > > + * is a leaf entry. For more infos check out the AIA spec, chapter 9.5. > > + * > > + * Also in basic mode the mrif_info field is ignored by the IOMMU and can > > + * be used by software, any other reserved fields on pte must be zeroed-out > > + * by software. > > + */ > > +struct riscv_iommu_msi_pte { > > + u64 pte; > > + u64 mrif_info; > > +}; > > + > > +/* Fields on pte */ > > +#define RISCV_IOMMU_MSI_PTE_V BIT_ULL(0) > > +#define RISCV_IOMMU_MSI_PTE_M GENMASK_ULL(2, 1) > > +#define RISCV_IOMMU_MSI_PTE_MRIF_ADDR GENMASK_ULL(53, 7) /* When M == 1 (MRIF mode) */ > > +#define RISCV_IOMMU_MSI_PTE_PPN RISCV_IOMMU_PPN_FIELD /* When M == 3 (basic mode) */ > > +#define RISCV_IOMMU_MSI_PTE_C BIT_ULL(63) > > RISCV_IOMMU_MSIPTE_* for all above > > > + > > +/* Fields on mrif_info */ > > +#define RISCV_IOMMU_MSI_MRIF_NID GENMASK_ULL(9, 0) > > +#define RISCV_IOMMU_MSI_MRIF_NPPN RISCV_IOMMU_PPN_FIELD > > +#define RISCV_IOMMU_MSI_MRIF_NID_MSB BIT_ULL(60) > > + > > +#endif /* _RISCV_IOMMU_BITS_H_ */
On Wed, May 29, 2024 at 10:59:58AM GMT, Tomasz Jeznach wrote: > On Wed, May 29, 2024 at 8:15 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > > > Hi Tomasz, > > > > I reviewed iommu-bits.h to the spec. Most naming matches exactly, which > > is nice, but I've pointed out a few which don't. > > > > Thanks, > > drew > > > > Thanks for looking into this a bit boring file. No problem. I also meant to point out that I checked all bits/offsets as well. They all looked good to me. ... > > > +enum riscv_iommu_fq_ttypes { > > > + RISCV_IOMMU_FQ_TTYPE_NONE = 0, > > > + RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH = 1, > > > + RISCV_IOMMU_FQ_TTYPE_UADDR_RD = 2, > > > + RISCV_IOMMU_FQ_TTYPE_UADDR_WR = 3, > > > + RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH = 5, > > > + RISCV_IOMMU_FQ_TTYPE_TADDR_RD = 6, > > > + RISCV_IOMMU_FQ_TTYPE_TADDR_WR = 7, > > > + RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ = 8, > > > + RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9, > > > +}; > > > > RISCV_IOMMU_FW_TTYP_* for all above > > > > I guess RISCV_IOMMU_FQ_TTYP_* to match _FQ_ acronym. Oh yeah. I guess my eyes had glazed over at this point because I didn't notice the 'FW' vs. 'FQ'. So, yeah, we want RISCV_IOMMU_FQ_TTYP_* for all above, including RISCV_IOMMU_FQ_TTYP_PCIE_MSG_REQ. Thanks, drew
On Sat, May 25, 2024 at 3:35 AM Tomasz Jeznach <tjeznach@rivosinc.com> wrote: > > Introduce device context allocation and device directory tree > management including capabilities discovery sequence, as described > in Chapter 2.1 of the RISC-V IOMMU Architecture Specification. > > Device directory mode will be auto detected using DDTP WARL property, > using highest mode supported by the driver and hardware. If none > supported can be configured, driver will fall back to global pass-through. > > First level DDTP page can be located in I/O (detected using DDTP WARL) > and system memory. > > Only simple identity and blocking protection domains are supported by > this implementation. > > Co-developed-by: Nick Kossifidis <mick@ics.forth.gr> > Signed-off-by: Nick Kossifidis <mick@ics.forth.gr> > Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> > Reviewed-by: Zong Li <zong.li@sifive.com> > Signed-off-by: Tomasz Jeznach <tjeznach@rivosinc.com> > --- > drivers/iommu/riscv/iommu.c | 397 +++++++++++++++++++++++++++++++++++- > drivers/iommu/riscv/iommu.h | 5 + > 2 files changed, 392 insertions(+), 10 deletions(-) > > diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c > index b8e0e4b62585..9ca130505c96 100644 > --- a/drivers/iommu/riscv/iommu.c > +++ b/drivers/iommu/riscv/iommu.c > @@ -16,15 +16,168 @@ > #include <linux/crash_dump.h> > #include <linux/init.h> > #include <linux/iommu.h> > +#include <linux/iopoll.h> > #include <linux/kernel.h> > #include <linux/pci.h> > > +#include "../iommu-pages.h" > #include "iommu-bits.h" > #include "iommu.h" > > /* Timeouts in [us] */ > #define RISCV_IOMMU_DDTP_TIMEOUT 50000 > > +/* RISC-V IOMMU PPN <> PHYS address conversions, PHYS <=> PPN[53:10] */ > +#define phys_to_ppn(pa) (((pa) >> 2) & (((1ULL << 44) - 1) << 10)) > +#define ppn_to_phys(pn) (((pn) << 2) & (((1ULL << 44) - 1) << 12)) > + > +#define dev_to_iommu(dev) \ > + iommu_get_iommu_dev(dev, struct riscv_iommu_device, iommu) > + > +/* Device resource-managed allocations */ > +struct riscv_iommu_devres { > + void *addr; > + int order; > +}; > + > +static void riscv_iommu_devres_pages_release(struct device *dev, void *res) > +{ > + struct riscv_iommu_devres *devres = res; > + > + iommu_free_pages(devres->addr, devres->order); > +} > + > +static int riscv_iommu_devres_pages_match(struct device *dev, void *res, void *p) > +{ > + struct riscv_iommu_devres *devres = res; > + struct riscv_iommu_devres *target = p; > + > + return devres->addr == target->addr; > +} > + > +static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu, int order) > +{ > + struct riscv_iommu_devres *devres; > + void *addr; > + > + addr = iommu_alloc_pages_node(dev_to_node(iommu->dev), > + GFP_KERNEL_ACCOUNT, order); > + if (unlikely(!addr)) > + return NULL; > + > + devres = devres_alloc(riscv_iommu_devres_pages_release, > + sizeof(struct riscv_iommu_devres), GFP_KERNEL); > + > + if (unlikely(!devres)) { > + iommu_free_pages(addr, order); > + return NULL; > + } > + > + devres->addr = addr; > + devres->order = order; > + > + devres_add(iommu->dev, devres); > + > + return addr; > +} > + > +static void riscv_iommu_free_pages(struct riscv_iommu_device *iommu, void *addr) > +{ > + struct riscv_iommu_devres devres = { .addr = addr }; > + > + devres_release(iommu->dev, riscv_iommu_devres_pages_release, > + riscv_iommu_devres_pages_match, &devres); > +} > + > +/* Lookup and initialize device context info structure. */ > +static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iommu, > + unsigned int devid) > +{ > + const bool base_format = !(iommu->caps & RISCV_IOMMU_CAP_MSI_FLAT); > + unsigned int depth; > + unsigned long ddt, old, new; > + void *ptr; > + u8 ddi_bits[3] = { 0 }; > + u64 *ddtp = NULL; > + > + /* Make sure the mode is valid */ > + if (iommu->ddt_mode < RISCV_IOMMU_DDTP_MODE_1LVL || > + iommu->ddt_mode > RISCV_IOMMU_DDTP_MODE_3LVL) > + return NULL; > + > + /* > + * Device id partitioning for base format: > + * DDI[0]: bits 0 - 6 (1st level) (7 bits) > + * DDI[1]: bits 7 - 15 (2nd level) (9 bits) > + * DDI[2]: bits 16 - 23 (3rd level) (8 bits) > + * > + * For extended format: > + * DDI[0]: bits 0 - 5 (1st level) (6 bits) > + * DDI[1]: bits 6 - 14 (2nd level) (9 bits) > + * DDI[2]: bits 15 - 23 (3rd level) (9 bits) > + */ > + if (base_format) { > + ddi_bits[0] = 7; > + ddi_bits[1] = 7 + 9; > + ddi_bits[2] = 7 + 9 + 8; > + } else { > + ddi_bits[0] = 6; > + ddi_bits[1] = 6 + 9; > + ddi_bits[2] = 6 + 9 + 9; > + } > + > + /* Make sure device id is within range */ > + depth = iommu->ddt_mode - RISCV_IOMMU_DDTP_MODE_1LVL; > + if (devid >= (1 << ddi_bits[depth])) > + return NULL; > + > + /* Get to the level of the non-leaf node that holds the device context */ > + for (ddtp = iommu->ddt_root; depth-- > 0;) { > + const int split = ddi_bits[depth]; > + /* > + * Each non-leaf node is 64bits wide and on each level > + * nodes are indexed by DDI[depth]. > + */ > + ddtp += (devid >> split) & 0x1FF; > + > + /* > + * Check if this node has been populated and if not > + * allocate a new level and populate it. > + */ > + do { > + ddt = READ_ONCE(*(unsigned long *)ddtp); > + if (ddt & RISCV_IOMMU_DDTE_VALID) { > + ddtp = __va(ppn_to_phys(ddt)); > + break; > + } > + > + ptr = riscv_iommu_get_pages(iommu, 0); > + if (!ptr) > + return NULL; > + > + new = phys_to_ppn(__pa(ptr)) | RISCV_IOMMU_DDTE_VALID; > + old = cmpxchg_relaxed((unsigned long *)ddtp, ddt, new); > + > + if (old == ddt) { > + ddtp = (u64 *)ptr; > + break; > + } > + > + /* Race setting DDT detected, re-read and retry. */ > + riscv_iommu_free_pages(iommu, ptr); > + } while (1); > + } > + > + /* > + * Grab the node that matches DDI[depth], note that when using base > + * format the device context is 4 * 64bits, and the extended format > + * is 8 * 64bits, hence the (3 - base_format) below. > + */ > + ddtp += (devid & ((64 << base_format) - 1)) << (3 - base_format); > + > + return (struct riscv_iommu_dc *)ddtp; > +} > + > /* > * This is best effort IOMMU translation shutdown flow. > * Disable IOMMU without waiting for hardware response. > @@ -37,10 +190,201 @@ static void riscv_iommu_disable(struct riscv_iommu_device *iommu) > riscv_iommu_writel(iommu, RISCV_IOMMU_REG_PQCSR, 0); > } > > +#define riscv_iommu_read_ddtp(iommu) ({ \ > + u64 ddtp; \ > + riscv_iommu_readq_timeout((iommu), RISCV_IOMMU_REG_DDTP, ddtp, \ > + !(ddtp & RISCV_IOMMU_DDTP_BUSY), 10, \ > + RISCV_IOMMU_DDTP_TIMEOUT); \ > + ddtp; }) > + > +static int riscv_iommu_iodir_alloc(struct riscv_iommu_device *iommu) > +{ > + u64 ddtp; > + unsigned int mode; > + > + ddtp = riscv_iommu_read_ddtp(iommu); > + if (ddtp & RISCV_IOMMU_DDTP_BUSY) > + return -EBUSY; > + > + /* > + * It is optional for the hardware to report a fixed address for device > + * directory root page when DDT.MODE is OFF or BARE. > + */ > + mode = FIELD_GET(RISCV_IOMMU_DDTP_MODE, ddtp); > + if (mode == RISCV_IOMMU_DDTP_MODE_BARE || > + mode == RISCV_IOMMU_DDTP_MODE_OFF) { > + /* Use WARL to discover hardware fixed DDT PPN */ > + riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, > + FIELD_PREP(RISCV_IOMMU_DDTP_MODE, mode)); > + ddtp = riscv_iommu_read_ddtp(iommu); > + if (ddtp & RISCV_IOMMU_DDTP_BUSY) > + return -EBUSY; > + > + iommu->ddt_phys = ppn_to_phys(ddtp); > + if (iommu->ddt_phys) > + iommu->ddt_root = devm_ioremap(iommu->dev, > + iommu->ddt_phys, PAGE_SIZE); > + if (iommu->ddt_root) > + memset(iommu->ddt_root, 0, PAGE_SIZE); > + } > + > + if (!iommu->ddt_root) { > + iommu->ddt_root = riscv_iommu_get_pages(iommu, 0); > + iommu->ddt_phys = __pa(iommu->ddt_root); > + } > + > + if (!iommu->ddt_root) > + return -ENOMEM; > + > + return 0; > +} > + > +/* > + * Discover supported DDT modes starting from requested value, > + * configure DDTP register with accepted mode and root DDT address. > + * Accepted iommu->ddt_mode is updated on success. > + */ > +static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu, > + unsigned int ddtp_mode) > +{ > + struct device *dev = iommu->dev; > + u64 ddtp, rq_ddtp; > + unsigned int mode, rq_mode = ddtp_mode; > + > + ddtp = riscv_iommu_read_ddtp(iommu); > + if (ddtp & RISCV_IOMMU_DDTP_BUSY) > + return -EBUSY; > + > + /* Disallow state transition from xLVL to xLVL. */ > + mode = FIELD_GET(RISCV_IOMMU_DDTP_MODE, ddtp); > + if (mode != RISCV_IOMMU_DDTP_MODE_BARE && > + mode != RISCV_IOMMU_DDTP_MODE_OFF && > + rq_mode != RISCV_IOMMU_DDTP_MODE_BARE && > + rq_mode != RISCV_IOMMU_DDTP_MODE_OFF) > + return -EINVAL; > + > + do { > + rq_ddtp = FIELD_PREP(RISCV_IOMMU_DDTP_MODE, rq_mode); > + if (rq_mode > RISCV_IOMMU_DDTP_MODE_BARE) > + rq_ddtp |= phys_to_ppn(iommu->ddt_phys); > + > + riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, rq_ddtp); > + ddtp = riscv_iommu_read_ddtp(iommu); > + if (ddtp & RISCV_IOMMU_DDTP_BUSY) { > + dev_err(dev, "timeout when setting ddtp (ddt mode: %u, read: %llx)\n", > + rq_mode, ddtp); > + return -EBUSY; > + } > + > + /* Verify IOMMU hardware accepts new DDTP config. */ > + mode = FIELD_GET(RISCV_IOMMU_DDTP_MODE, ddtp); > + > + if (rq_mode == mode) > + break; > + > + /* Hardware mandatory DDTP mode has not been accepted. */ > + if (rq_mode < RISCV_IOMMU_DDTP_MODE_1LVL && rq_ddtp != ddtp) { > + dev_err(dev, "DDTP update failed hw: %llx vs %llx\n", > + ddtp, rq_ddtp); > + return -EINVAL; > + } > + > + /* > + * Mode field is WARL, an IOMMU may support a subset of > + * directory table levels in which case if we tried to set > + * an unsupported number of levels we'll readback either > + * a valid xLVL or off/bare. If we got off/bare, try again > + * with a smaller xLVL. > + */ > + if (mode < RISCV_IOMMU_DDTP_MODE_1LVL && > + rq_mode > RISCV_IOMMU_DDTP_MODE_1LVL) { > + dev_dbg(dev, "DDTP hw mode %u vs %u\n", mode, rq_mode); > + rq_mode--; > + continue; > + } > + > + /* > + * We tried all supported modes and IOMMU hardware failed to > + * accept new settings, something went very wrong since off/bare > + * and at least one xLVL must be supported. > + */ > + dev_err(dev, "DDTP hw mode %u, failed to set %u\n", > + mode, ddtp_mode); > + return -EINVAL; > + } while (1); > + > + iommu->ddt_mode = mode; > + if (mode != ddtp_mode) > + dev_dbg(dev, "DDTP hw mode %u, requested %u\n", mode, ddtp_mode); > + > + return 0; > +} > + > +#define RISCV_IOMMU_FSC_BARE 0 > + > +/* > + * Update IODIR for the device. > + * > + * During the execution of riscv_iommu_probe_device(), IODIR entries are > + * allocated for the device's identifiers. Device context invalidation > + * becomes necessary only if one of the updated entries was previously > + * marked as valid, given that invalid device context entries are not > + * cached by the IOMMU hardware. > + * In this implementation, updating a valid device context while the > + * device is not quiesced might be disruptive, potentially causing > + * interim translation faults. > + */ > +static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu, > + struct device *dev, u64 fsc, u64 ta) > +{ > + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > + struct riscv_iommu_dc *dc; > + u64 tc; > + int i; > + > + /* Device context invalidation ignored for now. */ > + > + /* > + * For device context with DC_TC_PDTV = 0, translation attributes valid bit > + * is stored as DC_TC_V bit (both sharing the same location at BIT(0)). > + */ > + for (i = 0; i < fwspec->num_ids; i++) { > + dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]); > + tc = READ_ONCE(dc->tc); > + tc |= ta & RISCV_IOMMU_DC_TC_V; > + > + WRITE_ONCE(dc->fsc, fsc); > + WRITE_ONCE(dc->ta, ta & RISCV_IOMMU_PC_TA_PSCID); > + /* Update device context, write TC.V as the last step. */ > + dma_wmb(); > + WRITE_ONCE(dc->tc, tc); > + } Does it make sense to invalidate the DDTE after we update the DDTE in memory? This behavior will affect the nested IOMMU mechanism. The VMM has to catch the event of a DDTE update from the guest and then eventually go into the host IOMMU driver to configure the IOMMU hardware. One way to achieve this is by catching the page fault of the in-memory DDT page table, but it will be difficult to modify the attribute of only a portion of the memory. Another way is through the page fault of the MMIO region. A good candidate for the MMIO register might be the tail pointer of the command queue because it makes sense to invalidate the DDTE after updating a DDTE. I checked the SMMU and DMAR implementations; they also invalidate the cache after updating the table's entry. I was wondering if there is any chance to modify the logic here, or if there are any ideas for this situation? Thanks. > +} > + > +static int riscv_iommu_attach_blocking_domain(struct iommu_domain *iommu_domain, > + struct device *dev) > +{ > + struct riscv_iommu_device *iommu = dev_to_iommu(dev); > + > + riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, 0); > + > + return 0; > +} > + > +static struct iommu_domain riscv_iommu_blocking_domain = { > + .type = IOMMU_DOMAIN_BLOCKED, > + .ops = &(const struct iommu_domain_ops) { > + .attach_dev = riscv_iommu_attach_blocking_domain, > + } > +}; > + > static int riscv_iommu_attach_identity_domain(struct iommu_domain *iommu_domain, > struct device *dev) > { > - /* Global pass-through already enabled, do nothing for now. */ > + struct riscv_iommu_device *iommu = dev_to_iommu(dev); > + > + riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, RISCV_IOMMU_PC_TA_V); > + > return 0; > } > > @@ -72,6 +416,9 @@ static struct iommu_device *riscv_iommu_probe_device(struct device *dev) > { > struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > struct riscv_iommu_device *iommu; > + struct riscv_iommu_dc *dc; > + u64 tc; > + int i; > > if (!fwspec || !fwspec->iommu_fwnode->dev || !fwspec->num_ids) > return ERR_PTR(-ENODEV); > @@ -80,12 +427,37 @@ static struct iommu_device *riscv_iommu_probe_device(struct device *dev) > if (!iommu) > return ERR_PTR(-ENODEV); > > + /* > + * IOMMU hardware operating in fail-over BARE mode will provide > + * identity translation for all connected devices anyway... > + */ > + if (iommu->ddt_mode <= RISCV_IOMMU_DDTP_MODE_BARE) > + return ERR_PTR(-ENODEV); > + > + /* > + * Allocate and pre-configure device context entries in > + * the device directory. Do not mark the context valid yet. > + */ > + tc = 0; > + if (iommu->caps & RISCV_IOMMU_CAP_AMO_HWAD) > + tc |= RISCV_IOMMU_DC_TC_SADE; > + for (i = 0; i < fwspec->num_ids; i++) { > + dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]); > + if (!dc) > + return ERR_PTR(-ENODEV); > + if (READ_ONCE(dc->tc) & RISCV_IOMMU_DC_TC_V) > + dev_warn(dev, "already attached to IOMMU device directory\n"); > + WRITE_ONCE(dc->tc, tc); > + } > + > return &iommu->iommu; > } > > static const struct iommu_ops riscv_iommu_ops = { > .of_xlate = riscv_iommu_of_xlate, > .identity_domain = &riscv_iommu_identity_domain, > + .blocked_domain = &riscv_iommu_blocking_domain, > + .release_domain = &riscv_iommu_blocking_domain, > .def_domain_type = riscv_iommu_device_domain_type, > .device_group = riscv_iommu_device_group, > .probe_device = riscv_iommu_probe_device, > @@ -128,6 +500,7 @@ void riscv_iommu_remove(struct riscv_iommu_device *iommu) > { > iommu_device_unregister(&iommu->iommu); > iommu_device_sysfs_remove(&iommu->iommu); > + riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_OFF); > } > > int riscv_iommu_init(struct riscv_iommu_device *iommu) > @@ -138,18 +511,20 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu) > if (rc) > return dev_err_probe(iommu->dev, rc, "unexpected device state\n"); > > - /* > - * Placeholder for a complete IOMMU device initialization. For now, > - * only bare minimum: enable global identity mapping mode and register sysfs. > - */ > - riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, > - FIELD_PREP(RISCV_IOMMU_DDTP_MODE, RISCV_IOMMU_DDTP_MODE_BARE)); > + rc = riscv_iommu_iodir_alloc(iommu); > + if (rc) > + return rc; > + > + rc = riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_MAX); > + if (rc) > + return rc; > > rc = iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "riscv-iommu@%s", > dev_name(iommu->dev)); > - if (rc) > - return dev_err_probe(iommu->dev, rc, > - "cannot register sysfs interface\n"); > + if (rc) { > + dev_err_probe(iommu->dev, rc, "cannot register sysfs interface\n"); > + goto err_iodir_off; > + } > > rc = iommu_device_register(&iommu->iommu, &riscv_iommu_ops, iommu->dev); > if (rc) { > @@ -161,5 +536,7 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu) > > err_remove_sysfs: > iommu_device_sysfs_remove(&iommu->iommu); > +err_iodir_off: > + riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_OFF); > return rc; > } > diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h > index 700e33dc2446..f1696926582c 100644 > --- a/drivers/iommu/riscv/iommu.h > +++ b/drivers/iommu/riscv/iommu.h > @@ -34,6 +34,11 @@ struct riscv_iommu_device { > /* available interrupt numbers, MSI or WSI */ > unsigned int irqs[RISCV_IOMMU_INTR_COUNT]; > unsigned int irqs_count; > + > + /* device directory */ > + unsigned int ddt_mode; > + dma_addr_t ddt_phys; > + u64 *ddt_root; > }; > > int riscv_iommu_init(struct riscv_iommu_device *iommu); > -- > 2.34.1 >
... > > +/* 5.19 Performance monitoring counter overflow status (32bits) */ > > +#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058 > > +#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0) > > +#define RISCV_IOMMU_IOCOUNTOVF_HPM GENMASK_ULL(31, 1) > > RISCV_IOMMU_REG_IOCNTOVF* for all above. It looks like this > got renamed and the header of the spec subsection still has > the old name, but otherwise the new name is referenced. > > > + > > +/* 5.20 Performance monitoring counter inhibits (32bits) */ > > +#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C > > +#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0) > > +#define RISCV_IOMMU_IOCOUNTINH_HPM GENMASK(31, 1) > > RISCV_IOMMU_IOCNTINH* for all above. Same header with old > name spec issue. > Scratch these suggestions. Based on IOMMU spec PR#243 ("Clarification updates to IOMMU v1.0.0") it looks like the iocnt* names are getting fixed up to be iocount* names. Thanks, drew
On Fri, May 24, 2024 at 12:34:42PM GMT, Tomasz Jeznach wrote: ... > +static struct platform_driver riscv_iommu_platform_driver = { > + .probe = riscv_iommu_platform_probe, > + .remove_new = riscv_iommu_platform_remove, Hi Tomasz, I think we should also support .shutdown (just turn the IOMMU off?), otherwise the IOMMU driver reports EBUSY and fails to initialize when rebooting. Same comment for the PCI driver. Thanks, drew
On Wed, May 29, 2024 at 8:15 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > Hi Tomasz, > > I reviewed iommu-bits.h to the spec. Most naming matches exactly, which > is nice, but I've pointed out a few which don't. > > Thanks, > drew > > > On Fri, May 24, 2024 at 12:34:42PM GMT, Tomasz Jeznach wrote: > ... > > diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h > > new file mode 100644 > > index 000000000000..48f795a0cd10 > > --- /dev/null > > +++ b/drivers/iommu/riscv/iommu-bits.h > > @@ -0,0 +1,707 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > +/* > > + * Copyright © 2022-2024 Rivos Inc. > > + * Copyright © 2023 FORTH-ICS/CARV > > + * Copyright © 2023 RISC-V IOMMU Task Group > > + * > > + * RISC-V IOMMU - Register Layout and Data Structures. > > + * > > + * Based on the 'RISC-V IOMMU Architecture Specification', Version 1.0 > > + * Published at https://github.com/riscv-non-isa/riscv-iommu > > + * > > + */ > > + > > +#ifndef _RISCV_IOMMU_BITS_H_ > > +#define _RISCV_IOMMU_BITS_H_ > > + > > +#include <linux/types.h> > > +#include <linux/bitfield.h> > > +#include <linux/bits.h> > > + > > +/* > > + * Chapter 5: Memory Mapped register interface > > + */ > > + > > +/* Common field positions */ > > +#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10) > > +#define RISCV_IOMMU_QUEUE_LOGSZ_FIELD GENMASK_ULL(4, 0) > > RISCV_IOMMU_QUEUE_LOG2SZ_FIELD > > > +#define RISCV_IOMMU_QUEUE_INDEX_FIELD GENMASK_ULL(31, 0) > > +#define RISCV_IOMMU_QUEUE_ENABLE BIT(0) > > +#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1) > > +#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8) > > +#define RISCV_IOMMU_QUEUE_OVERFLOW BIT(9) > > +#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16) > > +#define RISCV_IOMMU_QUEUE_BUSY BIT(17) > > + > > +#define RISCV_IOMMU_ATP_PPN_FIELD GENMASK_ULL(43, 0) > > +#define RISCV_IOMMU_ATP_MODE_FIELD GENMASK_ULL(63, 60) > > + > > +/* 5.3 IOMMU Capabilities (64bits) */ > > +#define RISCV_IOMMU_REG_CAP 0x0000 > > Maybe too verbose, but the name is "capabilities". So all the below 'CAP' > instances would match the spec better if spelled out. > > > +#define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0) > > +#define RISCV_IOMMU_CAP_S_SV32 BIT_ULL(8) > > +#define RISCV_IOMMU_CAP_S_SV39 BIT_ULL(9) > > +#define RISCV_IOMMU_CAP_S_SV48 BIT_ULL(10) > > +#define RISCV_IOMMU_CAP_S_SV57 BIT_ULL(11) > > +#define RISCV_IOMMU_CAP_SVPBMT BIT_ULL(15) > > +#define RISCV_IOMMU_CAP_G_SV32 BIT_ULL(16) > > +#define RISCV_IOMMU_CAP_G_SV39 BIT_ULL(17) > > +#define RISCV_IOMMU_CAP_G_SV48 BIT_ULL(18) > > +#define RISCV_IOMMU_CAP_G_SV57 BIT_ULL(19) > > RISCV_IOMMU_CAPABILITIES_SV32X4 > RISCV_IOMMU_CAPABILITIES_SV39X4 > RISCV_IOMMU_CAPABILITIES_SV48X4 > RISCV_IOMMU_CAPABILITIES_SV57X4 > > > +#define RISCV_IOMMU_CAP_AMO_MRIF BIT_ULL(21) > > +#define RISCV_IOMMU_CAP_MSI_FLAT BIT_ULL(22) > > +#define RISCV_IOMMU_CAP_MSI_MRIF BIT_ULL(23) > > +#define RISCV_IOMMU_CAP_AMO_HWAD BIT_ULL(24) > > +#define RISCV_IOMMU_CAP_ATS BIT_ULL(25) > > +#define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26) > > +#define RISCV_IOMMU_CAP_END BIT_ULL(27) > > +#define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28) > > +#define RISCV_IOMMU_CAP_HPM BIT_ULL(30) > > +#define RISCV_IOMMU_CAP_DBG BIT_ULL(31) > > +#define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32) > > +#define RISCV_IOMMU_CAP_PD8 BIT_ULL(38) > > +#define RISCV_IOMMU_CAP_PD17 BIT_ULL(39) > > +#define RISCV_IOMMU_CAP_PD20 BIT_ULL(40) > > + > > +#define RISCV_IOMMU_CAP_VERSION_VER_MASK 0xF0 > > RISCV_IOMMU_CAP_VERSION_MAJOR_MASK > > > +#define RISCV_IOMMU_CAP_VERSION_REV_MASK 0x0F > > RISCV_IOMMU_CAP_VERSION_MINOR_MASK > > > + > > +/** > > + * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings > > + * @RISCV_IOMMU_CAP_IGS_MSI: I/O MMU supports only MSI generation > > + * @RISCV_IOMMU_CAP_IGS_WSI: I/O MMU supports only Wired-Signaled interrupt > > + * @RISCV_IOMMU_CAP_IGS_BOTH: I/O MMU supports both MSI and WSI generation > > Can we always spell IOMMU 'IOMMU'? > > > + * @RISCV_IOMMU_CAP_IGS_RSRV: Reserved for standard use > > + */ > > +enum riscv_iommu_igs_settings { > > + RISCV_IOMMU_CAP_IGS_MSI = 0, > > + RISCV_IOMMU_CAP_IGS_WSI = 1, > > + RISCV_IOMMU_CAP_IGS_BOTH = 2, > > + RISCV_IOMMU_CAP_IGS_RSRV = 3 > > +}; > > + > > +/* 5.4 Features control register (32bits) */ > > +#define RISCV_IOMMU_REG_FCTL 0x0008 > > +#define RISCV_IOMMU_FCTL_BE BIT(0) > > +#define RISCV_IOMMU_FCTL_WSI BIT(1) > > +#define RISCV_IOMMU_FCTL_GXL BIT(2) > > + > > +/* 5.5 Device-directory-table pointer (64bits) */ > > +#define RISCV_IOMMU_REG_DDTP 0x0010 > > +#define RISCV_IOMMU_DDTP_MODE GENMASK_ULL(3, 0) > > A bit unnecessary, but spec calls this 'iommu_mode', so > > RISCV_IOMMU_DDTP_IOMMU_MODE ? > > > +#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4) > > +#define RISCV_IOMMU_DDTP_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/** > > + * enum riscv_iommu_ddtp_modes - I/O MMU translation modes > > + * @RISCV_IOMMU_DDTP_MODE_OFF: No inbound transactions allowed > > + * @RISCV_IOMMU_DDTP_MODE_BARE: Pass-through mode > > + * @RISCV_IOMMU_DDTP_MODE_1LVL: One-level DDT > > + * @RISCV_IOMMU_DDTP_MODE_2LVL: Two-level DDT > > + * @RISCV_IOMMU_DDTP_MODE_3LVL: Three-level DDT > > + * @RISCV_IOMMU_DDTP_MODE_MAX: Max value allowed by specification > > + */ > > +enum riscv_iommu_ddtp_modes { > > + RISCV_IOMMU_DDTP_MODE_OFF = 0, > > + RISCV_IOMMU_DDTP_MODE_BARE = 1, > > + RISCV_IOMMU_DDTP_MODE_1LVL = 2, > > + RISCV_IOMMU_DDTP_MODE_2LVL = 3, > > + RISCV_IOMMU_DDTP_MODE_3LVL = 4, > > + RISCV_IOMMU_DDTP_MODE_MAX = 4 > > +}; > > + > > +/* 5.6 Command Queue Base (64bits) */ > > +#define RISCV_IOMMU_REG_CQB 0x0018 > > +#define RISCV_IOMMU_CQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > > +#define RISCV_IOMMU_CQB_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/* 5.7 Command Queue head (32bits) */ > > +#define RISCV_IOMMU_REG_CQH 0x0020 > > +#define RISCV_IOMMU_CQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.8 Command Queue tail (32bits) */ > > +#define RISCV_IOMMU_REG_CQT 0x0024 > > +#define RISCV_IOMMU_CQT_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.9 Fault Queue Base (64bits) */ > > +#define RISCV_IOMMU_REG_FQB 0x0028 > > +#define RISCV_IOMMU_FQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > > +#define RISCV_IOMMU_FQB_PN RISCV_IOMMU_PPN_FIELD > > RISCV_IOMMU_FQB_PPN > > > + > > +/* 5.10 Fault Queue Head (32bits) */ > > +#define RISCV_IOMMU_REG_FQH 0x0030 > > +#define RISCV_IOMMU_FQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.11 Fault Queue tail (32bits) */ > > +#define RISCV_IOMMU_REG_FQT 0x0034 > > +#define RISCV_IOMMU_FQT_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.12 Page Request Queue base (64bits) */ > > +#define RISCV_IOMMU_REG_PQB 0x0038 > > +#define RISCV_IOMMU_PQB_ENTRIES RISCV_IOMMU_QUEUE_LOGSZ_FIELD > > +#define RISCV_IOMMU_PQB_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/* 5.13 Page Request Queue head (32bits) */ > > +#define RISCV_IOMMU_REG_PQH 0x0040 > > +#define RISCV_IOMMU_PQH_INDEX RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.14 Page Request Queue tail (32bits) */ > > +#define RISCV_IOMMU_REG_PQT 0x0044 > > +#define RISCV_IOMMU_PQT_INDEX_MASK RISCV_IOMMU_QUEUE_INDEX_FIELD > > + > > +/* 5.15 Command Queue CSR (32bits) */ > > +#define RISCV_IOMMU_REG_CQCSR 0x0048 > > +#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE > > +#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE > > +#define RISCV_IOMMU_CQCSR_CQMF RISCV_IOMMU_QUEUE_MEM_FAULT > > +#define RISCV_IOMMU_CQCSR_CMD_TO BIT(9) > > +#define RISCV_IOMMU_CQCSR_CMD_ILL BIT(10) > > +#define RISCV_IOMMU_CQCSR_FENCE_W_IP BIT(11) > > +#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE > > +#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > > + > > +/* 5.16 Fault Queue CSR (32bits) */ > > +#define RISCV_IOMMU_REG_FQCSR 0x004C > > +#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE > > +#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE > > +#define RISCV_IOMMU_FQCSR_FQMF RISCV_IOMMU_QUEUE_MEM_FAULT > > +#define RISCV_IOMMU_FQCSR_FQOF RISCV_IOMMU_QUEUE_OVERFLOW > > +#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE > > +#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > > + > > +/* 5.17 Page Request Queue CSR (32bits) */ > > +#define RISCV_IOMMU_REG_PQCSR 0x0050 > > +#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE > > +#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE > > +#define RISCV_IOMMU_PQCSR_PQMF RISCV_IOMMU_QUEUE_MEM_FAULT > > +#define RISCV_IOMMU_PQCSR_PQOF RISCV_IOMMU_QUEUE_OVERFLOW > > +#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE > > +#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY > > + > > +/* 5.18 Interrupt Pending Status (32bits) */ > > +#define RISCV_IOMMU_REG_IPSR 0x0054 > > + > > +#define RISCV_IOMMU_INTR_CQ 0 > > +#define RISCV_IOMMU_INTR_FQ 1 > > +#define RISCV_IOMMU_INTR_PM 2 > > +#define RISCV_IOMMU_INTR_PQ 3 > > +#define RISCV_IOMMU_INTR_COUNT 4 > > + > > +#define RISCV_IOMMU_IPSR_CIP BIT(RISCV_IOMMU_INTR_CQ) > > +#define RISCV_IOMMU_IPSR_FIP BIT(RISCV_IOMMU_INTR_FQ) > > +#define RISCV_IOMMU_IPSR_PMIP BIT(RISCV_IOMMU_INTR_PM) > > +#define RISCV_IOMMU_IPSR_PIP BIT(RISCV_IOMMU_INTR_PQ) > > + > > +/* 5.19 Performance monitoring counter overflow status (32bits) */ > > +#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058 > > +#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0) > > +#define RISCV_IOMMU_IOCOUNTOVF_HPM GENMASK_ULL(31, 1) > > RISCV_IOMMU_REG_IOCNTOVF* for all above. It looks like this > got renamed and the header of the spec subsection still has > the old name, but otherwise the new name is referenced. > > > + > > +/* 5.20 Performance monitoring counter inhibits (32bits) */ > > +#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C > > +#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0) > > +#define RISCV_IOMMU_IOCOUNTINH_HPM GENMASK(31, 1) > > RISCV_IOMMU_IOCNTINH* for all above. Same header with old > name spec issue. > > > + > > +/* 5.21 Performance monitoring cycles counter (64bits) */ > > +#define RISCV_IOMMU_REG_IOHPMCYCLES 0x0060 > > +#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0) > > +#define RISCV_IOMMU_IOHPMCYCLES_OVF BIT_ULL(63) > > RISCV_IOMMU_IOHPMCYCLES_OF > > > + > > +/* 5.22 Performance monitoring event counters (31 * 64bits) */ > > +#define RISCV_IOMMU_REG_IOHPMCTR_BASE 0x0068 > > +#define RISCV_IOMMU_REG_IOHPMCTR(_n) (RISCV_IOMMU_REG_IOHPMCTR_BASE + ((_n) * 0x8)) > > + > > +/* 5.23 Performance monitoring event selectors (31 * 64bits) */ > > +#define RISCV_IOMMU_REG_IOHPMEVT_BASE 0x0160 > > +#define RISCV_IOMMU_REG_IOHPMEVT(_n) (RISCV_IOMMU_REG_IOHPMEVT_BASE + ((_n) * 0x8)) > > +#define RISCV_IOMMU_IOHPMEVT_CNT 31 > > What is RISCV_IOMMU_IOHPMEVT_CNT ? > > > +#define RISCV_IOMMU_IOHPMEVT_EVENT_ID GENMASK_ULL(14, 0) > > RISCV_IOMMU_IOHPMEVT_EVENTID (EVENT_ID reads nicer though...) > > > +#define RISCV_IOMMU_IOHPMEVT_DMASK BIT_ULL(15) > > +#define RISCV_IOMMU_IOHPMEVT_PID_PSCID GENMASK_ULL(35, 16) > > +#define RISCV_IOMMU_IOHPMEVT_DID_GSCID GENMASK_ULL(59, 36) > > +#define RISCV_IOMMU_IOHPMEVT_PV_PSCV BIT_ULL(60) > > +#define RISCV_IOMMU_IOHPMEVT_DV_GSCV BIT_ULL(61) > > +#define RISCV_IOMMU_IOHPMEVT_IDT BIT_ULL(62) > > +#define RISCV_IOMMU_IOHPMEVT_OF BIT_ULL(63) > > + > > +/** > > + * enum riscv_iommu_hpmevent_id - Performance-monitoring event identifier > > + * > > + * @RISCV_IOMMU_HPMEVENT_INVALID: Invalid event, do not count > > + * @RISCV_IOMMU_HPMEVENT_URQ: Untranslated requests > > + * @RISCV_IOMMU_HPMEVENT_TRQ: Translated requests > > + * @RISCV_IOMMU_HPMEVENT_ATS_RQ: ATS translation requests > > + * @RISCV_IOMMU_HPMEVENT_TLB_MISS: TLB misses > > + * @RISCV_IOMMU_HPMEVENT_DD_WALK: Device directory walks > > + * @RISCV_IOMMU_HPMEVENT_PD_WALK: Process directory walks > > + * @RISCV_IOMMU_HPMEVENT_S_VS_WALKS: S/VS-Stage page table walks > > + * @RISCV_IOMMU_HPMEVENT_G_WALKS: G-Stage page table walks > > Let's copy+paste these descriptions from the spec to allow them to be > copy+pasted from here into a search in the spec. (Most already mostly > match, except for the G-stage vs. second-stage type stuff.) > > > + * @RISCV_IOMMU_HPMEVENT_MAX: Value to denote maximum Event IDs > > + */ > > +enum riscv_iommu_hpmevent_id { > > + RISCV_IOMMU_HPMEVENT_INVALID = 0, > > + RISCV_IOMMU_HPMEVENT_URQ = 1, > > + RISCV_IOMMU_HPMEVENT_TRQ = 2, > > + RISCV_IOMMU_HPMEVENT_ATS_RQ = 3, > > + RISCV_IOMMU_HPMEVENT_TLB_MISS = 4, > > + RISCV_IOMMU_HPMEVENT_DD_WALK = 5, > > + RISCV_IOMMU_HPMEVENT_PD_WALK = 6, > > + RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7, > > + RISCV_IOMMU_HPMEVENT_G_WALKS = 8, > > + RISCV_IOMMU_HPMEVENT_MAX = 9 > > +}; > > + > > +/* 5.24 Translation request IOVA (64bits) */ > > +#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258 > > +#define RISCV_IOMMU_TR_REQ_IOVA_VPN GENMASK_ULL(63, 12) > > + > > +/* 5.25 Translation request control (64bits) */ > > +#define RISCV_IOMMU_REG_TR_REQ_CTL 0x0260 > > +#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY BIT_ULL(0) > > +#define RISCV_IOMMU_TR_REQ_CTL_PRIV BIT_ULL(1) > > +#define RISCV_IOMMU_TR_REQ_CTL_EXE BIT_ULL(2) > > +#define RISCV_IOMMU_TR_REQ_CTL_NW BIT_ULL(3) > > +#define RISCV_IOMMU_TR_REQ_CTL_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_TR_REQ_CTL_PV BIT_ULL(32) > > +#define RISCV_IOMMU_TR_REQ_CTL_DID GENMASK_ULL(63, 40) > > + > > +/* 5.26 Translation request response (64bits) */ > > +#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268 > > +#define RISCV_IOMMU_TR_RESPONSE_FAULT BIT_ULL(0) > > +#define RISCV_IOMMU_TR_RESPONSE_PBMT GENMASK_ULL(8, 7) > > +#define RISCV_IOMMU_TR_RESPONSE_SZ BIT_ULL(9) > > RISCV_IOMMU_TR_RESPONSE_S (although SZ is easier to understand...) > > > +#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/* 5.27 Interrupt cause to vector (64bits) */ > > +#define RISCV_IOMMU_REG_ICVEC 0x02F8 > > +#define RISCV_IOMMU_ICVEC_CIV GENMASK_ULL(3, 0) > > +#define RISCV_IOMMU_ICVEC_FIV GENMASK_ULL(7, 4) > > +#define RISCV_IOMMU_ICVEC_PMIV GENMASK_ULL(11, 8) > > +#define RISCV_IOMMU_ICVEC_PIV GENMASK_ULL(15, 12) > > + > > +/* 5.28 MSI Configuration table (32 * 64bits) */ > > +#define RISCV_IOMMU_REG_MSI_CONFIG 0x0300 > > +#define RISCV_IOMMU_REG_MSI_ADDR(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10)) > > +#define RISCV_IOMMU_MSI_ADDR GENMASK_ULL(55, 2) > > +#define RISCV_IOMMU_REG_MSI_DATA(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10) + 0x08) > > +#define RISCV_IOMMU_MSI_DATA GENMASK_ULL(31, 0) > > +#define RISCV_IOMMU_REG_MSI_VEC_CTL(_n) (RISCV_IOMMU_REG_MSI_CONFIG + ((_n) * 0x10) + 0x0C) > > +#define RISCV_IOMMU_MSI_VEC_CTL_M BIT_ULL(0) > > RISCV_IOMMU_REG_MSI_CFG_TBL for all above > > > + > > +#define RISCV_IOMMU_REG_SIZE 0x1000 > > + > > +/* > > + * Chapter 2: Data structures > > + */ > > + > > +/* > > + * Device Directory Table macros for non-leaf nodes > > + */ > > +#define RISCV_IOMMU_DDTE_VALID BIT_ULL(0) > > RISCV_IOMMU_DDTE_V > > > +#define RISCV_IOMMU_DDTE_PPN RISCV_IOMMU_PPN_FIELD > > + > > +/** > > + * struct riscv_iommu_dc - Device Context > > + * @tc: Translation Control > > + * @iohgatp: I/O Hypervisor guest address translation and protection > > + * (Second stage context) > > + * @ta: Translation Attributes > > + * @fsc: First stage context > > + * @msiptp: MSI page table pointer > > + * @msi_addr_mask: MSI address mask > > + * @msi_addr_pattern: MSI address pattern > > + * @_reserved: Reserved for future use, padding > > + * > > + * This structure is used for leaf nodes on the Device Directory Table, > > + * in case RISCV_IOMMU_CAP_MSI_FLAT is not set, the bottom 4 fields are > > + * not present and are skipped with pointer arithmetic to avoid > > + * casting, check out riscv_iommu_get_dc(). > > + * See section 2.1 for more details > > + */ > > +struct riscv_iommu_dc { > > + u64 tc; > > + u64 iohgatp; > > + u64 ta; > > + u64 fsc; > > + u64 msiptp; > > + u64 msi_addr_mask; > > + u64 msi_addr_pattern; > > + u64 _reserved; > > +}; > > + > > +/* Translation control fields */ > > +#define RISCV_IOMMU_DC_TC_V BIT_ULL(0) > > +#define RISCV_IOMMU_DC_TC_EN_ATS BIT_ULL(1) > > +#define RISCV_IOMMU_DC_TC_EN_PRI BIT_ULL(2) > > +#define RISCV_IOMMU_DC_TC_T2GPA BIT_ULL(3) > > +#define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4) > > +#define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5) > > +#define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6) > > +#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7) > > +#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8) > > +#define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9) > > +#define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10) > > +#define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11) > > + > > +/* Second-stage (aka G-stage) context fields */ > > +#define RISCV_IOMMU_DC_IOHGATP_PPN RISCV_IOMMU_ATP_PPN_FIELD > > +#define RISCV_IOMMU_DC_IOHGATP_GSCID GENMASK_ULL(59, 44) > > +#define RISCV_IOMMU_DC_IOHGATP_MODE RISCV_IOMMU_ATP_MODE_FIELD > > + > > +/** > > + * enum riscv_iommu_dc_iohgatp_modes - Guest address translation/protection modes > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_BARE: No translation/protection > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: Sv32x4 (2-bit extension of Sv32), when fctl.GXL == 1 > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: Sv39x4 (2-bit extension of Sv39), when fctl.GXL == 0 > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: Sv48x4 (2-bit extension of Sv48), when fctl.GXL == 0 > > + * @RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: Sv57x4 (2-bit extension of Sv57), when fctl.GXL == 0 > > + */ > > +enum riscv_iommu_dc_iohgatp_modes { > > + RISCV_IOMMU_DC_IOHGATP_MODE_BARE = 0, > > + RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4 = 8, > > + RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4 = 8, > > + RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4 = 9, > > + RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4 = 10 > > +}; > > + > > +/* Translation attributes fields */ > > +#define RISCV_IOMMU_DC_TA_PSCID GENMASK_ULL(31, 12) > > + > > +/* First-stage context fields */ > > +#define RISCV_IOMMU_DC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD > > +#define RISCV_IOMMU_DC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD > > + > > +/** > > + * enum riscv_iommu_dc_fsc_atp_modes - First stage address translation/protection modes > > + * @RISCV_IOMMU_DC_FSC_MODE_BARE: No translation/protection > > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32: Sv32, when dc.tc.SXL == 1 > > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: Sv39, when dc.tc.SXL == 0 > > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: Sv48, when dc.tc.SXL == 0 > > + * @RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: Sv57, when dc.tc.SXL == 0 > > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 1lvl PDT, 8bit process ids > > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 2lvl PDT, 17bit process ids > > + * @RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 3lvl PDT, 20bit process ids > > + * > > + * FSC holds IOSATP when RISCV_IOMMU_DC_TC_PDTV is 0 and PDTP otherwise. > > + * IOSATP controls the first stage address translation (same as the satp register on > > + * the RISC-V MMU), and PDTP holds the process directory table, used to select a > > + * first stage page table based on a process id (for devices that support multiple > > + * process ids). > > + */ > > +enum riscv_iommu_dc_fsc_atp_modes { > > + RISCV_IOMMU_DC_FSC_MODE_BARE = 0, > > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 = 8, > > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 = 8, > > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 = 9, > > + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 = 10, > > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8 = 1, > > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17 = 2, > > + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20 = 3 > > +}; > > + > > +/* MSI page table pointer */ > > +#define RISCV_IOMMU_DC_MSIPTP_PPN RISCV_IOMMU_ATP_PPN_FIELD > > +#define RISCV_IOMMU_DC_MSIPTP_MODE RISCV_IOMMU_ATP_MODE_FIELD > > +#define RISCV_IOMMU_DC_MSIPTP_MODE_OFF 0 > > +#define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1 > > + > > +/* MSI address mask */ > > +#define RISCV_IOMMU_DC_MSI_ADDR_MASK GENMASK_ULL(51, 0) > > + > > +/* MSI address pattern */ > > +#define RISCV_IOMMU_DC_MSI_PATTERN GENMASK_ULL(51, 0) > > + > > +/** > > + * struct riscv_iommu_pc - Process Context > > + * @ta: Translation Attributes > > + * @fsc: First stage context > > + * > > + * This structure is used for leaf nodes on the Process Directory Table > > + * See section 2.3 for more details > > + */ > > +struct riscv_iommu_pc { > > + u64 ta; > > + u64 fsc; > > +}; > > + > > +/* Translation attributes fields */ > > +#define RISCV_IOMMU_PC_TA_V BIT_ULL(0) > > +#define RISCV_IOMMU_PC_TA_ENS BIT_ULL(1) > > +#define RISCV_IOMMU_PC_TA_SUM BIT_ULL(2) > > +#define RISCV_IOMMU_PC_TA_PSCID GENMASK_ULL(31, 12) > > + > > +/* First stage context fields */ > > +#define RISCV_IOMMU_PC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD > > +#define RISCV_IOMMU_PC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD > > + > > +/* > > + * Chapter 3: In-memory queue interface > > + */ > > + > > +/** > > + * struct riscv_iommu_command - Generic I/O MMU command structure > > + * @dword0: Includes the opcode and the function identifier > > + * @dword1: Opcode specific data > > + * > > + * The commands are interpreted as two 64bit fields, where the first > > + * 7bits of the first field are the opcode which also defines the > > + * command's format, followed by a 3bit field that specifies the > > + * function invoked by that command, and the rest is opcode-specific. > > + * This is a generic struct which will be populated differently > > + * according to each command. For more infos on the commands and > > + * the command queue check section 3.1. > > + */ > > +struct riscv_iommu_command { > > + u64 dword0; > > + u64 dword1; > > +}; > > + > > +/* Fields on dword0, common for all commands */ > > +#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0) > > +#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7) > > + > > +/* 3.1.1 I/O MMU Page-table cache invalidation */ > > +/* Fields on dword0 */ > > +#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1 > > +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0 > > +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1 > > +#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10) > > +#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32) > > +#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33) > > +#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44) > > +/* dword1[61:10] is the 4K-aligned page address */ > > +#define RISCV_IOMMU_CMD_IOTINVAL_ADDR GENMASK_ULL(61, 10) > > + > > +/* 3.1.2 I/O MMU Command Queue Fences */ > > +/* Fields on dword0 */ > > +#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2 > > +#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0 > > +#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10) > > +#define RISCV_IOMMU_CMD_IOFENCE_WSI BIT_ULL(11) > > +#define RISCV_IOMMU_CMD_IOFENCE_PR BIT_ULL(12) > > +#define RISCV_IOMMU_CMD_IOFENCE_PW BIT_ULL(13) > > +#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32) > > +/* dword1 is the address, word-size aligned and shifted to the right by two bits. */ > > + > > +/* 3.1.3 I/O MMU Directory cache invalidation */ > > +/* Fields on dword0 */ > > +#define RISCV_IOMMU_CMD_IODIR_OPCODE 3 > > +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0 > > +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1 > > +#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33) > > +#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40) > > RISCV_IOMMU_CMD_IOTDIR_* for all above > I've checked latest RISC-V IOMMU Arch Specification and it looks there it is a bit inconsistent in IODIR naming. The acronym IOTDIR is used only once, while all other references to directory cache invalidation command use IODIR. I'll keep _CMD_IODIR_ here. Best, - Tomasz > > +/* dword1 is reserved for standard use */ > > + > > +/* 3.1.4 I/O MMU PCIe ATS */ > > +/* Fields on dword0 */ > > +#define RISCV_IOMMU_CMD_ATS_OPCODE 4 > > +#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0 > > +#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1 > > +#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32) > > +#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33) > > +#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40) > > +#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56) > > +/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */ > > + > > +/* ATS.INVAL payload*/ > > +#define RISCV_IOMMU_CMD_ATS_INVAL_G BIT_ULL(0) > > +/* Bits 1 - 10 are zeroed */ > > +#define RISCV_IOMMU_CMD_ATS_INVAL_S BIT_ULL(11) > > +#define RISCV_IOMMU_CMD_ATS_INVAL_UADDR GENMASK_ULL(63, 12) > > + > > +/* ATS.PRGR payload */ > > +/* Bits 0 - 31 are zeroed */ > > +#define RISCV_IOMMU_CMD_ATS_PRGR_PRG_INDEX GENMASK_ULL(40, 32) > > +/* Bits 41 - 43 are zeroed */ > > +#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44) > > +#define RISCV_IOMMU_CMD_ATS_PRGR_DST_ID GENMASK_ULL(63, 48) > > + > > +/** > > + * struct riscv_iommu_fq_record - Fault/Event Queue Record > > + * @hdr: Header, includes fault/event cause, PID/DID, transaction type etc > > + * @_reserved: Low 32bits for custom use, high 32bits for standard use > > + * @iotval: Transaction-type/cause specific format > > + * @iotval2: Cause specific format > > + * > > + * The fault/event queue reports events and failures raised when > > + * processing transactions. Each record is a 32byte structure where > > + * the first dword has a fixed format for providing generic infos > > + * regarding the fault/event, and two more dwords are there for > > + * fault/event-specific information. For more details see section > > + * 3.2. > > + */ > > +struct riscv_iommu_fq_record { > > + u64 hdr; > > + u64 _reserved; > > + u64 iotval; > > + u64 iotval2; > > +}; > > + > > +/* Fields on header */ > > +#define RISCV_IOMMU_FQ_HDR_CAUSE GENMASK_ULL(11, 0) > > +#define RISCV_IOMMU_FQ_HDR_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_FQ_HDR_PV BIT_ULL(32) > > +#define RISCV_IOMMU_FQ_HDR_PRIV BIT_ULL(33) > > +#define RISCV_IOMMU_FQ_HDR_TTYPE GENMASK_ULL(39, 34) > > RISCV_IOMMU_FQ_HDR_TTYP > > > +#define RISCV_IOMMU_FQ_HDR_DID GENMASK_ULL(63, 40) > > + > > +/** > > + * enum riscv_iommu_fq_causes - Fault/event cause values > > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT: Instruction access fault > > + * @RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED: Read address misaligned > > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT: Read load fault > > + * @RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED: Write/AMO address misaligned > > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT: Write/AMO access fault > > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S: Instruction page fault > > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S: Read page fault > > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S: Write/AMO page fault > > + * @RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS: Instruction guest page fault > > + * @RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS: Read guest page fault > > + * @RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS: Write/AMO guest page fault > > + * @RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: All inbound transactions disallowed > > + * @RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: DDT entry load access fault > > + * @RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: DDT entry invalid > > + * @RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: DDT entry misconfigured > > + * @RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED: Transaction type disallowed > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT: MSI PTE load access fault > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_INVALID: MSI PTE invalid > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED: MSI PTE misconfigured > > + * @RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT: MRIF access fault > > + * @RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT: PDT entry load access fault > > + * @RISCV_IOMMU_FQ_CAUSE_PDT_INVALID: PDT entry invalid > > + * @RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED: PDT entry misconfigured > > + * @RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: DDT data corruption > > + * @RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED: PDT data corruption > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED: MSI page table data corruption > > + * @RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED: MRIF data corruption > > + * @RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: Internal data path error > > + * @RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: IOMMU MSI write access fault > > + * @RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED: First/second stage page table data corruption > > + * > > + * Values are on table 11 of the spec, encodings 275 - 2047 are reserved for standard > > + * use, and 2048 - 4095 for custom use. > > + */ > > +enum riscv_iommu_fq_causes { > > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT = 1, > > + RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED = 4, > > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT = 5, > > + RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED = 6, > > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT = 7, > > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S = 12, > > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S = 13, > > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S = 15, > > + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS = 20, > > + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS = 21, > > + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS = 23, > > + RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED = 256, > > + RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT = 257, > > + RISCV_IOMMU_FQ_CAUSE_DDT_INVALID = 258, > > + RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED = 259, > > + RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED = 260, > > + RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT = 261, > > + RISCV_IOMMU_FQ_CAUSE_MSI_INVALID = 262, > > + RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED = 263, > > + RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT = 264, > > + RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT = 265, > > + RISCV_IOMMU_FQ_CAUSE_PDT_INVALID = 266, > > + RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED = 267, > > + RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED = 268, > > + RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED = 269, > > + RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED = 270, > > + RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED = 271, > > + RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR = 272, > > + RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT = 273, > > + RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED = 274 > > +}; > > + > > +/** > > + * enum riscv_iommu_fq_ttypes: Fault/event transaction types > > + * @RISCV_IOMMU_FQ_TTYPE_NONE: None. Fault not caused by an inbound transaction. > > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH: Instruction fetch from untranslated address > > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_RD: Read from untranslated address > > + * @RISCV_IOMMU_FQ_TTYPE_UADDR_WR: Write/AMO to untranslated address > > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH: Instruction fetch from translated address > > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_RD: Read from translated address > > + * @RISCV_IOMMU_FQ_TTYPE_TADDR_WR: Write/AMO to translated address > > + * @RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ: PCIe ATS translation request > > + * @RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ: PCIe message request > > + * > > + * Values are on table 12 of the spec, type 4 and 10 - 31 are reserved for standard use > > + * and 31 - 63 for custom use. > > + */ > > +enum riscv_iommu_fq_ttypes { > > + RISCV_IOMMU_FQ_TTYPE_NONE = 0, > > + RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH = 1, > > + RISCV_IOMMU_FQ_TTYPE_UADDR_RD = 2, > > + RISCV_IOMMU_FQ_TTYPE_UADDR_WR = 3, > > + RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH = 5, > > + RISCV_IOMMU_FQ_TTYPE_TADDR_RD = 6, > > + RISCV_IOMMU_FQ_TTYPE_TADDR_WR = 7, > > + RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ = 8, > > + RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9, > > +}; > > RISCV_IOMMU_FW_TTYP_* for all above > > > + > > +/** > > + * struct riscv_iommu_pq_record - PCIe Page Request record > > + * @hdr: Header, includes PID, DID etc > > + * @payload: Holds the page address, request group and permission bits > > + * > > + * For more infos on the PCIe Page Request queue see chapter 3.3. > > + */ > > +struct riscv_iommu_pq_record { > > + u64 hdr; > > + u64 payload; > > +}; > > + > > +/* Header fields */ > > +#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12) > > +#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32) > > +#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33) > > +#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34) > > +#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40) > > + > > +/* Payload fields */ > > +#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0) > > +#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1) > > +#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2) > > +#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0) /* Mask of RWL for convenience */ > > Maybe RISCV_IOMMU_PREQ_PAYLOAD_RWL_MASK ? Just 'M' doesn't convey anything > so it makes one want to look it up in the spec. > > > +#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3) > > +#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12) > > RISCV_IOMMU_PQ_HDR_* for all above > > > + > > +/** > > + * struct riscv_iommu_msi_pte - MSI Page Table Entry > > + * @pte: MSI PTE > > + * @mrif_info: Memory-resident interrupt file info > > + * > > + * The MSI Page Table is used for virtualizing MSIs, so that when > > + * a device sends an MSI to a guest, the IOMMU can reroute it > > + * by translating the MSI address, either to a guest interrupt file > > + * or a memory resident interrupt file (MRIF). Note that this page table > > + * is an array of MSI PTEs, not a multi-level pt, each entry > > + * is a leaf entry. For more infos check out the AIA spec, chapter 9.5. > > + * > > + * Also in basic mode the mrif_info field is ignored by the IOMMU and can > > + * be used by software, any other reserved fields on pte must be zeroed-out > > + * by software. > > + */ > > +struct riscv_iommu_msi_pte { > > + u64 pte; > > + u64 mrif_info; > > +}; > > + > > +/* Fields on pte */ > > +#define RISCV_IOMMU_MSI_PTE_V BIT_ULL(0) > > +#define RISCV_IOMMU_MSI_PTE_M GENMASK_ULL(2, 1) > > +#define RISCV_IOMMU_MSI_PTE_MRIF_ADDR GENMASK_ULL(53, 7) /* When M == 1 (MRIF mode) */ > > +#define RISCV_IOMMU_MSI_PTE_PPN RISCV_IOMMU_PPN_FIELD /* When M == 3 (basic mode) */ > > +#define RISCV_IOMMU_MSI_PTE_C BIT_ULL(63) > > RISCV_IOMMU_MSIPTE_* for all above > > > + > > +/* Fields on mrif_info */ > > +#define RISCV_IOMMU_MSI_MRIF_NID GENMASK_ULL(9, 0) > > +#define RISCV_IOMMU_MSI_MRIF_NPPN RISCV_IOMMU_PPN_FIELD > > +#define RISCV_IOMMU_MSI_MRIF_NID_MSB BIT_ULL(60) > > + > > +#endif /* _RISCV_IOMMU_BITS_H_ */
On Mon, Jun 3, 2024 at 5:59 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Fri, May 24, 2024 at 12:34:42PM GMT, Tomasz Jeznach wrote: > ... > > +static struct platform_driver riscv_iommu_platform_driver = { > > + .probe = riscv_iommu_platform_probe, > > + .remove_new = riscv_iommu_platform_remove, > > Hi Tomasz, > > I think we should also support .shutdown (just turn the IOMMU off?), > otherwise the IOMMU driver reports EBUSY and fails to initialize when > rebooting. > > Same comment for the PCI driver. > I've been testing patches with added shutdown handlers, also looking at reboot notifier hooks. In both cases, devices actively running DMA traffic at system shutdown (eg. storage) might be affected by too-early IOMMU disable call. This topic was already brought up in the discussion [1] about kexec reboot flow. In this series I'd prefer to keep the IOMMU active during shutdown, and prepare the RISC-V IOMMU shutdown sequence implementation as a separate series, as it will involve more risc-v reboot rework to guarantee ordering of the shutdown callbacks. In a normal reboot cycle, firmware/hardware reset should bring IOMMU to known out-of-reset state anyway. [1] https://lore.kernel.org/linux-iommu/059ae516-aed4-4836-a2ca-aff150ff428d@arm.com/ Best, - Tomasz > Thanks, > drew
On Wed, Jun 05, 2024 at 11:58:28AM GMT, Tomasz Jeznach wrote: > On Wed, May 29, 2024 at 8:15 AM Andrew Jones <ajones@ventanamicro.com> wrote: ... > > > +/* 3.1.3 I/O MMU Directory cache invalidation */ > > > +/* Fields on dword0 */ > > > +#define RISCV_IOMMU_CMD_IODIR_OPCODE 3 > > > +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0 > > > +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1 > > > +#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12) > > > +#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33) > > > +#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40) > > > > RISCV_IOMMU_CMD_IOTDIR_* for all above > > > > I've checked latest RISC-V IOMMU Arch Specification and it looks there > it is a bit inconsistent in IODIR naming. The acronym IOTDIR is used > only once, while all other references to directory cache invalidation > command use IODIR. I'll keep _CMD_IODIR_ here. > Indeed. I've made a comment on the spec clarifications PR to suggest the s/IOTDIR/IODIR/ change. Thanks, drew
On Fri, May 31, 2024 at 02:25:15PM +0800, Zong Li wrote: > > +static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu, > > + struct device *dev, u64 fsc, u64 ta) > > +{ > > + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > > + struct riscv_iommu_dc *dc; > > + u64 tc; > > + int i; > > + > > + /* Device context invalidation ignored for now. */ > > + > > + /* > > + * For device context with DC_TC_PDTV = 0, translation attributes valid bit > > + * is stored as DC_TC_V bit (both sharing the same location at BIT(0)). > > + */ > > + for (i = 0; i < fwspec->num_ids; i++) { > > + dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]); > > + tc = READ_ONCE(dc->tc); > > + tc |= ta & RISCV_IOMMU_DC_TC_V; > > + > > + WRITE_ONCE(dc->fsc, fsc); > > + WRITE_ONCE(dc->ta, ta & RISCV_IOMMU_PC_TA_PSCID); > > + /* Update device context, write TC.V as the last step. */ > > + dma_wmb(); > > + WRITE_ONCE(dc->tc, tc); > > + } > > Does it make sense to invalidate the DDTE after we update the DDTE in > memory? This behavior will affect the nested IOMMU mechanism. The VMM > has to catch the event of a DDTE update from the guest and then > eventually go into the host IOMMU driver to configure the IOMMU > hardware. Right, this is why I asked about negative caching. The VMMs are a prime example of negative caching, in something like the SMMU implementation the VMM will cache the V=0 STE until they see an invalidation. Driving the VMM shadowing/caching entirely off of the standard invalidation mechanism is so much better than any other option. IMHO you should have the RISCV spec revised to allow negative caching in any invalidated data structure to permit the typical VMM design driven off of shadowing triggered by invalidation commands. Once the spec permits negative caching then the software would have to invalidate after going V=0 -> V=1. Jason
On Mon, Jun 10, 2024 at 10:49 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > On Fri, May 31, 2024 at 02:25:15PM +0800, Zong Li wrote: > > > > +static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu, > > > + struct device *dev, u64 fsc, u64 ta) > > > +{ > > > + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > > > + struct riscv_iommu_dc *dc; > > > + u64 tc; > > > + int i; > > > + > > > + /* Device context invalidation ignored for now. */ > > > + > > > + /* > > > + * For device context with DC_TC_PDTV = 0, translation attributes valid bit > > > + * is stored as DC_TC_V bit (both sharing the same location at BIT(0)). > > > + */ > > > + for (i = 0; i < fwspec->num_ids; i++) { > > > + dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]); > > > + tc = READ_ONCE(dc->tc); > > > + tc |= ta & RISCV_IOMMU_DC_TC_V; > > > + > > > + WRITE_ONCE(dc->fsc, fsc); > > > + WRITE_ONCE(dc->ta, ta & RISCV_IOMMU_PC_TA_PSCID); > > > + /* Update device context, write TC.V as the last step. */ > > > + dma_wmb(); > > > + WRITE_ONCE(dc->tc, tc); > > > + } > > > > Does it make sense to invalidate the DDTE after we update the DDTE in > > memory? This behavior will affect the nested IOMMU mechanism. The VMM > > has to catch the event of a DDTE update from the guest and then > > eventually go into the host IOMMU driver to configure the IOMMU > > hardware. > > Right, this is why I asked about negative caching. > > The VMMs are a prime example of negative caching, in something like > the SMMU implementation the VMM will cache the V=0 STE until they see > an invalidation. > > Driving the VMM shadowing/caching entirely off of the standard > invalidation mechanism is so much better than any other option. > > IMHO you should have the RISCV spec revised to allow negative caching > in any invalidated data structure to permit the typical VMM design > driven off of shadowing triggered by invalidation commands. > > Once the spec permits negative caching then the software would have to > invalidate after going V=0 -> V=1. > > Jason Allowing negative cacheing by the spec (e.g. for VMM use cases) and documenting required invalidation sequences would definitely help here. I'm hesitating adding IODIR.INVAL that is not required by the spec [1], but this is something that can be controlled by a capabilities/feature bit once added to the specification or based on VID:DID of the emulated Risc-V IOMMU. Another option to consider for VMM is to utilize the WARL property of DDTP, and provide fixed location of the single level DDTP, pointing to MMIO region, where DDTE updates will result in vmm exit / fault handler. This will likely not be as efficient as IODIR.INVAL issued for any DDTE updates. [1] https://github.com/riscv-non-isa/riscv-iommu/blob/main/src/iommu_data_structures.adoc#caching-in-memory-data-structures - Tomasz
On Mon, Jun 10, 2024 at 11:48:23AM -0700, Tomasz Jeznach wrote: > > Right, this is why I asked about negative caching. > > > > The VMMs are a prime example of negative caching, in something like > > the SMMU implementation the VMM will cache the V=0 STE until they see > > an invalidation. > > > > Driving the VMM shadowing/caching entirely off of the standard > > invalidation mechanism is so much better than any other option. > > > > IMHO you should have the RISCV spec revised to allow negative caching > > in any invalidated data structure to permit the typical VMM design > > driven off of shadowing triggered by invalidation commands. > > > > Once the spec permits negative caching then the software would have to > > invalidate after going V=0 -> V=1. > > > > Jason > > Allowing negative cacheing by the spec (e.g. for VMM use cases) and > documenting required invalidation sequences would definitely help > here. Yes, you probably should really do that. > I'm hesitating adding IODIR.INVAL that is not required by the > spec [1], If you expect to rapidly revise the spec then you should add it right now so that all SW implementations that exist are conforming. Otherwise you'll have compatability problems when you come to implement nesting. Obviously the VMM can't rely on a negative caching technique unless the spec says it can. > but this is something that can be controlled by a > capabilities/feature bit once added to the specification or based on > VID:DID of the emulated Risc-V IOMMU. I'm not sure it really can. Once you start shipping SW people will run it in a VM and the VMM will have to forever work without negative caching. My strong advice is to not expect the VMM trap random pages in guest memory, that is a huge mess to implement and will delay your VMM side. > Another option to consider for VMM is to utilize the WARL property of > DDTP, and provide fixed location of the single level DDTP, pointing to > MMIO region, where DDTE updates will result in vmm exit / fault > handler. This will likely not be as efficient as IODIR.INVAL issued > for any DDTE updates. I don't know what all those things mean, but if you mean to have the VMM supply faulting MMIO space that the VM is forced to put the DDTE table into, then that would be better. It is still quite abnormal from the VMM side.. My strong advice is to fix this. It is trivial to add the negative caching language to the spec and will cause insignificant extra work in this driver. The gains on at least the ease of VMM implementation and architectural similarity to other arches are well worth the effort. Otherwise I fear it will be a struggle to get nesting support completed :( Jason
On Mon, Jun 10, 2024 at 3:20 PM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > On Mon, Jun 10, 2024 at 11:48:23AM -0700, Tomasz Jeznach wrote: > > > Right, this is why I asked about negative caching. > > > > > > The VMMs are a prime example of negative caching, in something like > > > the SMMU implementation the VMM will cache the V=0 STE until they see > > > an invalidation. > > > > > > Driving the VMM shadowing/caching entirely off of the standard > > > invalidation mechanism is so much better than any other option. > > > > > > IMHO you should have the RISCV spec revised to allow negative caching > > > in any invalidated data structure to permit the typical VMM design > > > driven off of shadowing triggered by invalidation commands. > > > > > > Once the spec permits negative caching then the software would have to > > > invalidate after going V=0 -> V=1. > > > > > > Jason > > > > Allowing negative cacheing by the spec (e.g. for VMM use cases) and > > documenting required invalidation sequences would definitely help > > here. > > Yes, you probably should really do that. > > > I'm hesitating adding IODIR.INVAL that is not required by the > > spec [1], > > If you expect to rapidly revise the spec then you should add it right > now so that all SW implementations that exist are > conforming. Otherwise you'll have compatability problems when you come > to implement nesting. > > Obviously the VMM can't rely on a negative caching technique unless > the spec says it can. > > > but this is something that can be controlled by a > > capabilities/feature bit once added to the specification or based on > > VID:DID of the emulated Risc-V IOMMU. > > I'm not sure it really can. Once you start shipping SW people will run > it in a VM and the VMM will have to forever work without negative > caching. > > My strong advice is to not expect the VMM trap random pages in guest > memory, that is a huge mess to implement and will delay your VMM side. > > > Another option to consider for VMM is to utilize the WARL property of > > DDTP, and provide fixed location of the single level DDTP, pointing to > > MMIO region, where DDTE updates will result in vmm exit / fault > > handler. This will likely not be as efficient as IODIR.INVAL issued > > for any DDTE updates. > > I don't know what all those things mean, but if you mean to have the > VMM supply faulting MMIO space that the VM is forced to put the DDTE > table into, then that would be better. It is still quite abnormal from > the VMM side.. > > My strong advice is to fix this. It is trivial to add the negative > caching language to the spec and will cause insignificant extra work > in this driver. > > The gains on at least the ease of VMM implementation and architectural > similarity to other arches are well worth the effort. Otherwise I fear > it will be a struggle to get nesting support completed :( > > Jason > Thanks for the comments, it definitely makes sense. We will revise the wording in the RISC-V IOMMU spec to note that certain implementations may require invalidating commands to be issued when making DDT entries valid, and device tree / ACPI may be used to report such requirement. For now, I'll change the implementation to assume negative caching for DDTE and will follow up with device tree / driver updates to make the invalidation optional when revised specifications will be available. Best, - Tomasz > -- > You received this message because you are subscribed to the Google Groups "linux" group. > To unsubscribe from this group and stop receiving emails from it, send an email to linux+unsubscribe@rivosinc.com. > To view this discussion on the web visit https://groups.google.com/a/rivosinc.com/d/msgid/linux/20240610222051.GO791043%40ziepe.ca. > For more options, visit https://groups.google.com/a/rivosinc.com/d/optout.
On Mon, Jun 10, 2024 at 07:00:34PM -0700, Tomasz Jeznach wrote: > For now, I'll change the implementation to assume negative caching for > DDTE and will follow up with device tree / driver updates to make the > invalidation optional when revised specifications will be available. Is there a reason to make it optional? It seems like it doesn't have any performance downside to just always invalidate, attachment is not a critical path operation. I could see making something like negative PTE invalidation optional as that is more performance path.. Jason