npu2: Allow ATSD for LPAR other than 0

Message ID 20181205045222.40053-1-aik@ozlabs.ru
State Accepted
Headers show
Series
  • npu2: Allow ATSD for LPAR other than 0
Related show

Checks

Context Check Description
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied

Commit Message

Alexey Kardashevskiy Dec. 5, 2018, 4:52 a.m.
Each XTS MMIO ATSD# register is accompanied by another register -
XTS MMIO ATSD0 LPARID# - which controls LPID filtering for ATSD
transactions.

When a host system passes a GPU through to a guest, we need to enable
some ATSD for an LPAR. At the moment the host assigns one ATSD to
a NVLink bridge and this maps it to an LPAR when GPU is assigned to
the LPAR. The link number is used for an ATSD index.

ATSD6&7 stay mapped to the host (LPAR=0) all the time which seems to be
acceptable price for the simplicity.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 include/npu2-regs.h |  2 ++
 hw/npu2.c           | 22 ++++++++++++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

Comments

Alistair Popple Dec. 11, 2018, 2:53 a.m. | #1
On Wednesday, 5 December 2018 3:52:22 PM AEDT Alexey Kardashevskiy wrote:
> Each XTS MMIO ATSD# register is accompanied by another register -
> XTS MMIO ATSD0 LPARID# - which controls LPID filtering for ATSD
> transactions.
> 
> When a host system passes a GPU through to a guest, we need to enable
> some ATSD for an LPAR. At the moment the host assigns one ATSD to
> a NVLink bridge and this maps it to an LPAR when GPU is assigned to
> the LPAR. The link number is used for an ATSD index.
> 
> ATSD6&7 stay mapped to the host (LPAR=0) all the time which seems to be
> acceptable price for the simplicity.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  include/npu2-regs.h |  2 ++
>  hw/npu2.c           | 22 ++++++++++++++++++----
>  2 files changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 8273b2b..ae5e225 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -547,6 +547,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>  #define NPU2_XTS_MMIO_ATSD5_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
> NPU2_BLOCK_XTS, 0x128) #define
> NPU2_XTS_MMIO_ATSD6_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
> NPU2_BLOCK_XTS, 0x130) #define
> NPU2_XTS_MMIO_ATSD7_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
> NPU2_BLOCK_XTS, 0x138) +#define   NPU2_XTS_MMIO_ATSD_MSR_HV		PPC_BIT(51)
> +#define   NPU2_XTS_MMIO_ATSD_LPARID		PPC_BITMASK(52,63)
>  #define NPU2_XTS_BDF_MAP			NPU2_REG_OFFSET(NPU2_STACK_MISC, 
NPU2_BLOCK_XTS,
> 0x4000) #define   NPU2_XTS_BDF_MAP_VALID		PPC_BIT(0)
>  #define   NPU2_XTS_BDF_MAP_UNFILT		PPC_BIT(1)
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 41563b4..767306f 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -2255,9 +2255,14 @@ static int opal_npu_map_lpar(uint64_t phb_id,
> uint64_t bdf, uint64_t lparid, struct phb *phb = pci_get_phb(phb_id);
>  	struct npu2 *p;
>  	struct npu2_dev *ndev = NULL;
> -	uint64_t xts_bdf_lpar, rc = OPAL_SUCCESS;
> +	uint64_t xts_bdf_lpar, atsd_lpar, rc = OPAL_SUCCESS;
>  	int i;
>  	int id;
> +	static uint64_t atsd_lpar_regs[] = {
> +		NPU2_XTS_MMIO_ATSD0_LPARID, NPU2_XTS_MMIO_ATSD1_LPARID,
> +		NPU2_XTS_MMIO_ATSD2_LPARID, NPU2_XTS_MMIO_ATSD3_LPARID,
> +		NPU2_XTS_MMIO_ATSD4_LPARID, NPU2_XTS_MMIO_ATSD5_LPARID,
> +		NPU2_XTS_MMIO_ATSD6_LPARID, NPU2_XTS_MMIO_ATSD7_LPARID };
> 
>  	if (!phb || phb->phb_type != phb_type_npu_v2)
>  		return OPAL_PARAMETER;
> @@ -2297,11 +2302,20 @@ static int opal_npu_map_lpar(uint64_t phb_id,
> uint64_t bdf, uint64_t lparid, xts_bdf_lpar =
> SETFIELD(NPU2_XTS_BDF_MAP_LPARID, xts_bdf_lpar, lparid); xts_bdf_lpar =
> SETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf_lpar, id);
> 
> -	/* Need to find an NVLink to send the ATSDs for this device over */
> +	/*
> +	 * Need to find an NVLink to send the ATSDs for this device over.
> +	 * Also, the host allocates an ATSD per NVLink, enable filtering now.
> +	 */
> +	atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_LPARID, 0, lparid);
> +	if (!lparid)
> +		atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_MSR_HV, atsd_lpar, 1);
> +
>  	for (i = 0; i < p->total_devices; i++) {
>  		if (p->devices[i].nvlink.gpu_bdfn == bdf) {
> -			ndev = &p->devices[i];
> -			break;
> +			if (!ndev)
> +				ndev = &p->devices[i];
> +			if (i < ARRAY_SIZE(atsd_lpar_regs))
> +				npu2_write(p, atsd_lpar_regs[i], atsd_lpar);

I'm not sure I really like this as ATSD resources should be allocated and 
passed through to guests by the hypervisor independently of the NVLink devices 
themselves, and a single ATSD register can serve more than one device.

This also creates an implicit assumption that the order of NVLink devices in 
p->devices[] matches ATSD register numbers in some way that the hypervisor 
assumes, which seems like it would be especially brittle as devices[] is 
essentially unordered.

How does the hypervisor know which of the 8 ATSD registers has been assigned 
to the LPARID when calling opal_npu_map_lpar? It seems like you might need a 
seperate OPAL call for this along the lines of npu2_map_mmio_atsd(atsd_index, 
lparid).

- Alistair

>  		}
>  	}
Alexey Kardashevskiy Dec. 11, 2018, 5:45 a.m. | #2
On 11/12/2018 13:53, Alistair Popple wrote:
> On Wednesday, 5 December 2018 3:52:22 PM AEDT Alexey Kardashevskiy wrote:
>> Each XTS MMIO ATSD# register is accompanied by another register -
>> XTS MMIO ATSD0 LPARID# - which controls LPID filtering for ATSD
>> transactions.
>>
>> When a host system passes a GPU through to a guest, we need to enable
>> some ATSD for an LPAR. At the moment the host assigns one ATSD to
>> a NVLink bridge and this maps it to an LPAR when GPU is assigned to
>> the LPAR. The link number is used for an ATSD index.
>>
>> ATSD6&7 stay mapped to the host (LPAR=0) all the time which seems to be
>> acceptable price for the simplicity.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  include/npu2-regs.h |  2 ++
>>  hw/npu2.c           | 22 ++++++++++++++++++----
>>  2 files changed, 20 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
>> index 8273b2b..ae5e225 100644
>> --- a/include/npu2-regs.h
>> +++ b/include/npu2-regs.h
>> @@ -547,6 +547,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>>  #define NPU2_XTS_MMIO_ATSD5_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
>> NPU2_BLOCK_XTS, 0x128) #define
>> NPU2_XTS_MMIO_ATSD6_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
>> NPU2_BLOCK_XTS, 0x130) #define
>> NPU2_XTS_MMIO_ATSD7_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
>> NPU2_BLOCK_XTS, 0x138) +#define   NPU2_XTS_MMIO_ATSD_MSR_HV		PPC_BIT(51)
>> +#define   NPU2_XTS_MMIO_ATSD_LPARID		PPC_BITMASK(52,63)
>>  #define NPU2_XTS_BDF_MAP			NPU2_REG_OFFSET(NPU2_STACK_MISC, 
> NPU2_BLOCK_XTS,
>> 0x4000) #define   NPU2_XTS_BDF_MAP_VALID		PPC_BIT(0)
>>  #define   NPU2_XTS_BDF_MAP_UNFILT		PPC_BIT(1)
>> diff --git a/hw/npu2.c b/hw/npu2.c
>> index 41563b4..767306f 100644
>> --- a/hw/npu2.c
>> +++ b/hw/npu2.c
>> @@ -2255,9 +2255,14 @@ static int opal_npu_map_lpar(uint64_t phb_id,
>> uint64_t bdf, uint64_t lparid, struct phb *phb = pci_get_phb(phb_id);
>>  	struct npu2 *p;
>>  	struct npu2_dev *ndev = NULL;
>> -	uint64_t xts_bdf_lpar, rc = OPAL_SUCCESS;
>> +	uint64_t xts_bdf_lpar, atsd_lpar, rc = OPAL_SUCCESS;
>>  	int i;
>>  	int id;
>> +	static uint64_t atsd_lpar_regs[] = {
>> +		NPU2_XTS_MMIO_ATSD0_LPARID, NPU2_XTS_MMIO_ATSD1_LPARID,
>> +		NPU2_XTS_MMIO_ATSD2_LPARID, NPU2_XTS_MMIO_ATSD3_LPARID,
>> +		NPU2_XTS_MMIO_ATSD4_LPARID, NPU2_XTS_MMIO_ATSD5_LPARID,
>> +		NPU2_XTS_MMIO_ATSD6_LPARID, NPU2_XTS_MMIO_ATSD7_LPARID };
>>
>>  	if (!phb || phb->phb_type != phb_type_npu_v2)
>>  		return OPAL_PARAMETER;
>> @@ -2297,11 +2302,20 @@ static int opal_npu_map_lpar(uint64_t phb_id,
>> uint64_t bdf, uint64_t lparid, xts_bdf_lpar =
>> SETFIELD(NPU2_XTS_BDF_MAP_LPARID, xts_bdf_lpar, lparid); xts_bdf_lpar =
>> SETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf_lpar, id);
>>
>> -	/* Need to find an NVLink to send the ATSDs for this device over */
>> +	/*
>> +	 * Need to find an NVLink to send the ATSDs for this device over.
>> +	 * Also, the host allocates an ATSD per NVLink, enable filtering now.
>> +	 */
>> +	atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_LPARID, 0, lparid);
>> +	if (!lparid)
>> +		atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_MSR_HV, atsd_lpar, 1);
>> +
>>  	for (i = 0; i < p->total_devices; i++) {
>>  		if (p->devices[i].nvlink.gpu_bdfn == bdf) {
>> -			ndev = &p->devices[i];
>> -			break;
>> +			if (!ndev)
>> +				ndev = &p->devices[i];
>> +			if (i < ARRAY_SIZE(atsd_lpar_regs))
>> +				npu2_write(p, atsd_lpar_regs[i], atsd_lpar);
> 
> I'm not sure I really like this as ATSD resources should be allocated and 
> passed through to guests by the hypervisor independently of the NVLink devices 
> themselves, and a single ATSD register can serve more than one device.

Sure it can serve more, and all passed ATSDs are assembled in one vPHB
property and the guest then does not make distinction between them.  I
just pass maximum 6 of 8 ATSDs but until recently it was just a single
ATSD per PHB for all devices and yet nobody complained.

The problem here is that I need to mmap() ATSD to QEMU, then map them to
KVM and then present this somehow via the device tree.

Now, I can do mmap() on a fd such as a VFIO device fd or a IOMMU
container fd.  If I choose the dev fd, VFIO in QEMU can mmap it, store
the pointer in the QEMU PCI device, and SPAPR vPHB can put atsd property
in vPHB. If I choose the container fd, then VFIO in QEMU can mmap it but
SPAPR vPHB has no concept of IOMMU/VFIO groups/containers and I'd rather
leave like this.


> This also creates an implicit assumption that the order of NVLink devices in 
> p->devices[] matches ATSD register numbers in some way that the hypervisor 
> assumes, which seems like it would be especially brittle as devices[] is 
> essentially unordered.

The order does not matter here, all that matters is that the order does
not change - any ATSD works with any NVLink bridge on that NPU PHB.


> How does the hypervisor know which of the 8 ATSD registers has been assigned 
> to the LPARID when calling opal_npu_map_lpar?


One ATSD register is exposed to the userspace via mmap() on a
"corresponding" NVLink bridge VFIO device fd, register index == link
index. An NVLink device belongs to an IOMMU group. A KVM pointer (i.e.
LPID) is set to the IOMMU group from an ioctl() called by QEMU.


> It seems like you might need a 
> seperate OPAL call for this along the lines of npu2_map_mmio_atsd(atsd_index, 
> lparid).


I do not see what it buys us. I need an allocator on the host or skiboot
anyway, either static or dynamic, dynamic seems excessive as the
devices[] order or the number of ATSDs do not change.



> 
> - Alistair
> 
>>  		}
>>  	}
> 
>
Stewart Smith Dec. 11, 2018, 6:29 a.m. | #3
Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> Each XTS MMIO ATSD# register is accompanied by another register -
> XTS MMIO ATSD0 LPARID# - which controls LPID filtering for ATSD
> transactions.
>
> When a host system passes a GPU through to a guest, we need to enable
> some ATSD for an LPAR. At the moment the host assigns one ATSD to
> a NVLink bridge and this maps it to an LPAR when GPU is assigned to
> the LPAR. The link number is used for an ATSD index.
>
> ATSD6&7 stay mapped to the host (LPAR=0) all the time which seems to be
> acceptable price for the simplicity.
>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Thanks, merged to master as of d8b161f4b361f70a7bb43be47d4a32b8f937287a
Stewart Smith Dec. 11, 2018, 7:07 a.m. | #4
Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> On 11/12/2018 13:53, Alistair Popple wrote:
>> On Wednesday, 5 December 2018 3:52:22 PM AEDT Alexey Kardashevskiy wrote:
>>> Each XTS MMIO ATSD# register is accompanied by another register -
>>> XTS MMIO ATSD0 LPARID# - which controls LPID filtering for ATSD
>>> transactions.
>>>
>>> When a host system passes a GPU through to a guest, we need to enable
>>> some ATSD for an LPAR. At the moment the host assigns one ATSD to
>>> a NVLink bridge and this maps it to an LPAR when GPU is assigned to
>>> the LPAR. The link number is used for an ATSD index.
>>>
>>> ATSD6&7 stay mapped to the host (LPAR=0) all the time which seems to be
>>> acceptable price for the simplicity.
>>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>>  include/npu2-regs.h |  2 ++
>>>  hw/npu2.c           | 22 ++++++++++++++++++----
>>>  2 files changed, 20 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
>>> index 8273b2b..ae5e225 100644
>>> --- a/include/npu2-regs.h
>>> +++ b/include/npu2-regs.h
>>> @@ -547,6 +547,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>>>  #define NPU2_XTS_MMIO_ATSD5_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
>>> NPU2_BLOCK_XTS, 0x128) #define
>>> NPU2_XTS_MMIO_ATSD6_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
>>> NPU2_BLOCK_XTS, 0x130) #define
>>> NPU2_XTS_MMIO_ATSD7_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC,
>>> NPU2_BLOCK_XTS, 0x138) +#define   NPU2_XTS_MMIO_ATSD_MSR_HV		PPC_BIT(51)
>>> +#define   NPU2_XTS_MMIO_ATSD_LPARID		PPC_BITMASK(52,63)
>>>  #define NPU2_XTS_BDF_MAP			NPU2_REG_OFFSET(NPU2_STACK_MISC, 
>> NPU2_BLOCK_XTS,
>>> 0x4000) #define   NPU2_XTS_BDF_MAP_VALID		PPC_BIT(0)
>>>  #define   NPU2_XTS_BDF_MAP_UNFILT		PPC_BIT(1)
>>> diff --git a/hw/npu2.c b/hw/npu2.c
>>> index 41563b4..767306f 100644
>>> --- a/hw/npu2.c
>>> +++ b/hw/npu2.c
>>> @@ -2255,9 +2255,14 @@ static int opal_npu_map_lpar(uint64_t phb_id,
>>> uint64_t bdf, uint64_t lparid, struct phb *phb = pci_get_phb(phb_id);
>>>  	struct npu2 *p;
>>>  	struct npu2_dev *ndev = NULL;
>>> -	uint64_t xts_bdf_lpar, rc = OPAL_SUCCESS;
>>> +	uint64_t xts_bdf_lpar, atsd_lpar, rc = OPAL_SUCCESS;
>>>  	int i;
>>>  	int id;
>>> +	static uint64_t atsd_lpar_regs[] = {
>>> +		NPU2_XTS_MMIO_ATSD0_LPARID, NPU2_XTS_MMIO_ATSD1_LPARID,
>>> +		NPU2_XTS_MMIO_ATSD2_LPARID, NPU2_XTS_MMIO_ATSD3_LPARID,
>>> +		NPU2_XTS_MMIO_ATSD4_LPARID, NPU2_XTS_MMIO_ATSD5_LPARID,
>>> +		NPU2_XTS_MMIO_ATSD6_LPARID, NPU2_XTS_MMIO_ATSD7_LPARID };
>>>
>>>  	if (!phb || phb->phb_type != phb_type_npu_v2)
>>>  		return OPAL_PARAMETER;
>>> @@ -2297,11 +2302,20 @@ static int opal_npu_map_lpar(uint64_t phb_id,
>>> uint64_t bdf, uint64_t lparid, xts_bdf_lpar =
>>> SETFIELD(NPU2_XTS_BDF_MAP_LPARID, xts_bdf_lpar, lparid); xts_bdf_lpar =
>>> SETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf_lpar, id);
>>>
>>> -	/* Need to find an NVLink to send the ATSDs for this device over */
>>> +	/*
>>> +	 * Need to find an NVLink to send the ATSDs for this device over.
>>> +	 * Also, the host allocates an ATSD per NVLink, enable filtering now.
>>> +	 */
>>> +	atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_LPARID, 0, lparid);
>>> +	if (!lparid)
>>> +		atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_MSR_HV, atsd_lpar, 1);
>>> +
>>>  	for (i = 0; i < p->total_devices; i++) {
>>>  		if (p->devices[i].nvlink.gpu_bdfn == bdf) {
>>> -			ndev = &p->devices[i];
>>> -			break;
>>> +			if (!ndev)
>>> +				ndev = &p->devices[i];
>>> +			if (i < ARRAY_SIZE(atsd_lpar_regs))
>>> +				npu2_write(p, atsd_lpar_regs[i], atsd_lpar);
>> 
>> I'm not sure I really like this as ATSD resources should be allocated and 
>> passed through to guests by the hypervisor independently of the NVLink devices 
>> themselves, and a single ATSD register can serve more than one device.
>
> Sure it can serve more, and all passed ATSDs are assembled in one vPHB
> property and the guest then does not make distinction between them.  I
> just pass maximum 6 of 8 ATSDs but until recently it was just a single
> ATSD per PHB for all devices and yet nobody complained.
>
> The problem here is that I need to mmap() ATSD to QEMU, then map them to
> KVM and then present this somehow via the device tree.
>
> Now, I can do mmap() on a fd such as a VFIO device fd or a IOMMU
> container fd.  If I choose the dev fd, VFIO in QEMU can mmap it, store
> the pointer in the QEMU PCI device, and SPAPR vPHB can put atsd property
> in vPHB. If I choose the container fd, then VFIO in QEMU can mmap it but
> SPAPR vPHB has no concept of IOMMU/VFIO groups/containers and I'd rather
> leave like this.
>
>
>> This also creates an implicit assumption that the order of NVLink devices in 
>> p->devices[] matches ATSD register numbers in some way that the hypervisor 
>> assumes, which seems like it would be especially brittle as devices[] is 
>> essentially unordered.
>
> The order does not matter here, all that matters is that the order does
> not change - any ATSD works with any NVLink bridge on that NPU PHB.
>
>
>> How does the hypervisor know which of the 8 ATSD registers has been assigned 
>> to the LPARID when calling opal_npu_map_lpar?
>
>
> One ATSD register is exposed to the userspace via mmap() on a
> "corresponding" NVLink bridge VFIO device fd, register index == link
> index. An NVLink device belongs to an IOMMU group. A KVM pointer (i.e.
> LPID) is set to the IOMMU group from an ioctl() called by QEMU.
>
>
>> It seems like you might need a 
>> seperate OPAL call for this along the lines of npu2_map_mmio_atsd(atsd_index, 
>> lparid).
>
>
> I do not see what it buys us. I need an allocator on the host or skiboot
> anyway, either static or dynamic, dynamic seems excessive as the
> devices[] order or the number of ATSDs do not change.

So, of course I spot discussion after having hit the merge
button[1]. Maybe I should care/look closer at all the NPU2 things?
(it'd help if there was a sane way to test things that I could plug into
op-test)

Any recommendation what I should do from here, is keeping the patch okay?

[1] not an actual button.

Patch

diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index 8273b2b..ae5e225 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -547,6 +547,8 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define NPU2_XTS_MMIO_ATSD5_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x128)
 #define NPU2_XTS_MMIO_ATSD6_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x130)
 #define NPU2_XTS_MMIO_ATSD7_LPARID		NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x138)
+#define   NPU2_XTS_MMIO_ATSD_MSR_HV		PPC_BIT(51)
+#define   NPU2_XTS_MMIO_ATSD_LPARID		PPC_BITMASK(52,63)
 #define NPU2_XTS_BDF_MAP			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x4000)
 #define   NPU2_XTS_BDF_MAP_VALID		PPC_BIT(0)
 #define   NPU2_XTS_BDF_MAP_UNFILT		PPC_BIT(1)
diff --git a/hw/npu2.c b/hw/npu2.c
index 41563b4..767306f 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -2255,9 +2255,14 @@  static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
 	struct phb *phb = pci_get_phb(phb_id);
 	struct npu2 *p;
 	struct npu2_dev *ndev = NULL;
-	uint64_t xts_bdf_lpar, rc = OPAL_SUCCESS;
+	uint64_t xts_bdf_lpar, atsd_lpar, rc = OPAL_SUCCESS;
 	int i;
 	int id;
+	static uint64_t atsd_lpar_regs[] = {
+		NPU2_XTS_MMIO_ATSD0_LPARID, NPU2_XTS_MMIO_ATSD1_LPARID,
+		NPU2_XTS_MMIO_ATSD2_LPARID, NPU2_XTS_MMIO_ATSD3_LPARID,
+		NPU2_XTS_MMIO_ATSD4_LPARID, NPU2_XTS_MMIO_ATSD5_LPARID,
+		NPU2_XTS_MMIO_ATSD6_LPARID, NPU2_XTS_MMIO_ATSD7_LPARID };
 
 	if (!phb || phb->phb_type != phb_type_npu_v2)
 		return OPAL_PARAMETER;
@@ -2297,11 +2302,20 @@  static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
 	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_LPARID, xts_bdf_lpar, lparid);
 	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf_lpar, id);
 
-	/* Need to find an NVLink to send the ATSDs for this device over */
+	/*
+	 * Need to find an NVLink to send the ATSDs for this device over.
+	 * Also, the host allocates an ATSD per NVLink, enable filtering now.
+	 */
+	atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_LPARID, 0, lparid);
+	if (!lparid)
+		atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_MSR_HV, atsd_lpar, 1);
+
 	for (i = 0; i < p->total_devices; i++) {
 		if (p->devices[i].nvlink.gpu_bdfn == bdf) {
-			ndev = &p->devices[i];
-			break;
+			if (!ndev)
+				ndev = &p->devices[i];
+			if (i < ARRAY_SIZE(atsd_lpar_regs))
+				npu2_write(p, atsd_lpar_regs[i], atsd_lpar);
 		}
 	}