diff mbox

[RFC,Part2,V1,14/14] iommu/vt-d: update IOMMU state when memory hotplug happens

Message ID 1389085234-22296-15-git-send-email-jiang.liu@linux.intel.com
State Not Applicable
Headers show

Commit Message

Jiang Liu Jan. 7, 2014, 9 a.m. UTC
If static identity domain is created, IOMMU driver needs to update
si_domain page table when memory hotplug event happens. Otherwise
PCI device DMA operations can't access the hot-added memory regions.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
---
 drivers/iommu/intel-iommu.c |   52 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

Comments

Kai Huang Jan. 8, 2014, 5:07 a.m. UTC | #1
On Tue, Jan 7, 2014 at 5:00 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
> If static identity domain is created, IOMMU driver needs to update
> si_domain page table when memory hotplug event happens. Otherwise
> PCI device DMA operations can't access the hot-added memory regions.
>
> Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
> ---
>  drivers/iommu/intel-iommu.c |   52 ++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 51 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 83e3ed4..35a987d 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -33,6 +33,7 @@
>  #include <linux/dmar.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/mempool.h>
> +#include <linux/memory.h>
>  #include <linux/timer.h>
>  #include <linux/iova.h>
>  #include <linux/iommu.h>
> @@ -3689,6 +3690,54 @@ static struct notifier_block device_nb = {
>         .notifier_call = device_notifier,
>  };
>
> +static int intel_iommu_memory_notifier(struct notifier_block *nb,
> +                                      unsigned long val, void *v)
> +{
> +       struct memory_notify *mhp = v;
> +       unsigned long long start, end;
> +       struct iova *iova;
> +
> +       switch (val) {
> +       case MEM_GOING_ONLINE:
> +               start = mhp->start_pfn << PAGE_SHIFT;
> +               end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
> +               if (iommu_domain_identity_map(si_domain, start, end)) {
> +                       pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
> +                               start, end);
> +                       return NOTIFY_BAD;
> +               }

Better to use iommu_prepare_identity_map? For si_domain, if
hw_pass_through is used, there's no page table.

> +               break;
> +       case MEM_OFFLINE:
> +       case MEM_CANCEL_ONLINE:
> +               /* TODO: enhance RB-tree and IOVA code to support of splitting iova */
> +               iova = find_iova(&si_domain->iovad, mhp->start_pfn);
> +               if (iova) {
> +                       unsigned long start_pfn, last_pfn;
> +                       struct dmar_drhd_unit *drhd;
> +                       struct intel_iommu *iommu;
> +
> +                       start_pfn = mm_to_dma_pfn(iova->pfn_lo);
> +                       last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
> +                       dma_pte_clear_range(si_domain, start_pfn, last_pfn);
> +                       dma_pte_free_pagetable(si_domain, start_pfn, last_pfn);
> +                       rcu_read_lock();
> +                       for_each_active_iommu(iommu, drhd)
> +                               iommu_flush_iotlb_psi(iommu, si_domain->id,
> +                                       start_pfn, last_pfn - start_pfn + 1, 0);
> +                       rcu_read_unlock();
> +                       __free_iova(&si_domain->iovad, iova);
> +               }

The same as above. Looks we need to consider hw_pass_through for the si_domain.

-Kai

> +               break;
> +       }
> +
> +       return NOTIFY_OK;
> +}
> +
> +static struct notifier_block intel_iommu_memory_nb = {
> +       .notifier_call = intel_iommu_memory_notifier,
> +       .priority = 0
> +};
> +
>  int __init intel_iommu_init(void)
>  {
>         int ret = -ENODEV;
> @@ -3761,8 +3810,9 @@ int __init intel_iommu_init(void)
>         init_iommu_pm_ops();
>
>         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
> -
>         bus_register_notifier(&pci_bus_type, &device_nb);
> +       if (si_domain)
> +               register_memory_notifier(&intel_iommu_memory_nb);
>
>         intel_iommu_enabled = 1;
>
> --
> 1.7.10.4
>
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiang Liu Jan. 8, 2014, 6:01 a.m. UTC | #2
On 2014/1/8 13:07, Kai Huang wrote:
> On Tue, Jan 7, 2014 at 5:00 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
>> If static identity domain is created, IOMMU driver needs to update
>> si_domain page table when memory hotplug event happens. Otherwise
>> PCI device DMA operations can't access the hot-added memory regions.
>>
>> Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
>> ---
>>  drivers/iommu/intel-iommu.c |   52 ++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 51 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>> index 83e3ed4..35a987d 100644
>> --- a/drivers/iommu/intel-iommu.c
>> +++ b/drivers/iommu/intel-iommu.c
>> @@ -33,6 +33,7 @@
>>  #include <linux/dmar.h>
>>  #include <linux/dma-mapping.h>
>>  #include <linux/mempool.h>
>> +#include <linux/memory.h>
>>  #include <linux/timer.h>
>>  #include <linux/iova.h>
>>  #include <linux/iommu.h>
>> @@ -3689,6 +3690,54 @@ static struct notifier_block device_nb = {
>>         .notifier_call = device_notifier,
>>  };
>>
>> +static int intel_iommu_memory_notifier(struct notifier_block *nb,
>> +                                      unsigned long val, void *v)
>> +{
>> +       struct memory_notify *mhp = v;
>> +       unsigned long long start, end;
>> +       struct iova *iova;
>> +
>> +       switch (val) {
>> +       case MEM_GOING_ONLINE:
>> +               start = mhp->start_pfn << PAGE_SHIFT;
>> +               end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
>> +               if (iommu_domain_identity_map(si_domain, start, end)) {
>> +                       pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
>> +                               start, end);
>> +                       return NOTIFY_BAD;
>> +               }
> 
> Better to use iommu_prepare_identity_map? For si_domain, if
> hw_pass_through is used, there's no page table.
Hi Kai,
	Good catch!
Seems function iommu_prepare_identity_map() is designed to handle
RMRRs. So how about avoiding of registering memory hotplug notifier
if hw_pass_through is true?

Thanks!
Gerry

> 
>> +               break;
>> +       case MEM_OFFLINE:
>> +       case MEM_CANCEL_ONLINE:
>> +               /* TODO: enhance RB-tree and IOVA code to support of splitting iova */
>> +               iova = find_iova(&si_domain->iovad, mhp->start_pfn);
>> +               if (iova) {
>> +                       unsigned long start_pfn, last_pfn;
>> +                       struct dmar_drhd_unit *drhd;
>> +                       struct intel_iommu *iommu;
>> +
>> +                       start_pfn = mm_to_dma_pfn(iova->pfn_lo);
>> +                       last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
>> +                       dma_pte_clear_range(si_domain, start_pfn, last_pfn);
>> +                       dma_pte_free_pagetable(si_domain, start_pfn, last_pfn);
>> +                       rcu_read_lock();
>> +                       for_each_active_iommu(iommu, drhd)
>> +                               iommu_flush_iotlb_psi(iommu, si_domain->id,
>> +                                       start_pfn, last_pfn - start_pfn + 1, 0);
>> +                       rcu_read_unlock();
>> +                       __free_iova(&si_domain->iovad, iova);
>> +               }
> 
> The same as above. Looks we need to consider hw_pass_through for the si_domain.
> 
> -Kai
> 
>> +               break;
>> +       }
>> +
>> +       return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block intel_iommu_memory_nb = {
>> +       .notifier_call = intel_iommu_memory_notifier,
>> +       .priority = 0
>> +};
>> +
>>  int __init intel_iommu_init(void)
>>  {
>>         int ret = -ENODEV;
>> @@ -3761,8 +3810,9 @@ int __init intel_iommu_init(void)
>>         init_iommu_pm_ops();
>>
>>         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
>> -
>>         bus_register_notifier(&pci_bus_type, &device_nb);
>> +       if (si_domain)
>> +               register_memory_notifier(&intel_iommu_memory_nb);
>>
>>         intel_iommu_enabled = 1;
>>
>> --
>> 1.7.10.4
>>
>> _______________________________________________
>> iommu mailing list
>> iommu@lists.linux-foundation.org
>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kai Huang Jan. 8, 2014, 6:14 a.m. UTC | #3
On Wed, Jan 8, 2014 at 2:01 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
>
>
> On 2014/1/8 13:07, Kai Huang wrote:
>> On Tue, Jan 7, 2014 at 5:00 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
>>> If static identity domain is created, IOMMU driver needs to update
>>> si_domain page table when memory hotplug event happens. Otherwise
>>> PCI device DMA operations can't access the hot-added memory regions.
>>>
>>> Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
>>> ---
>>>  drivers/iommu/intel-iommu.c |   52 ++++++++++++++++++++++++++++++++++++++++++-
>>>  1 file changed, 51 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>>> index 83e3ed4..35a987d 100644
>>> --- a/drivers/iommu/intel-iommu.c
>>> +++ b/drivers/iommu/intel-iommu.c
>>> @@ -33,6 +33,7 @@
>>>  #include <linux/dmar.h>
>>>  #include <linux/dma-mapping.h>
>>>  #include <linux/mempool.h>
>>> +#include <linux/memory.h>
>>>  #include <linux/timer.h>
>>>  #include <linux/iova.h>
>>>  #include <linux/iommu.h>
>>> @@ -3689,6 +3690,54 @@ static struct notifier_block device_nb = {
>>>         .notifier_call = device_notifier,
>>>  };
>>>
>>> +static int intel_iommu_memory_notifier(struct notifier_block *nb,
>>> +                                      unsigned long val, void *v)
>>> +{
>>> +       struct memory_notify *mhp = v;
>>> +       unsigned long long start, end;
>>> +       struct iova *iova;
>>> +
>>> +       switch (val) {
>>> +       case MEM_GOING_ONLINE:
>>> +               start = mhp->start_pfn << PAGE_SHIFT;
>>> +               end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
>>> +               if (iommu_domain_identity_map(si_domain, start, end)) {
>>> +                       pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
>>> +                               start, end);
>>> +                       return NOTIFY_BAD;
>>> +               }
>>
>> Better to use iommu_prepare_identity_map? For si_domain, if
>> hw_pass_through is used, there's no page table.
> Hi Kai,
>         Good catch!
> Seems function iommu_prepare_identity_map() is designed to handle
> RMRRs. So how about avoiding of registering memory hotplug notifier
> if hw_pass_through is true?

I think that's also fine :)

Btw, I have a related question to memory hotplug but not related to
intel IOMMU specifically. For the devices use DMA remapping, suppose
the device is already using the memory that we are trying to remove,
is this case, looks we need to change the existing iova <-> pa
mappings for the pa that is in the memory range about to be removed,
and reset the mapping to different pa (iova remains the same). Does
existing code have this covered? Is there a generic IOMMU layer memory
hotplug notifier to handle memory removal?

-Kai
>
> Thanks!
> Gerry
>
>>
>>> +               break;
>>> +       case MEM_OFFLINE:
>>> +       case MEM_CANCEL_ONLINE:
>>> +               /* TODO: enhance RB-tree and IOVA code to support of splitting iova */
>>> +               iova = find_iova(&si_domain->iovad, mhp->start_pfn);
>>> +               if (iova) {
>>> +                       unsigned long start_pfn, last_pfn;
>>> +                       struct dmar_drhd_unit *drhd;
>>> +                       struct intel_iommu *iommu;
>>> +
>>> +                       start_pfn = mm_to_dma_pfn(iova->pfn_lo);
>>> +                       last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
>>> +                       dma_pte_clear_range(si_domain, start_pfn, last_pfn);
>>> +                       dma_pte_free_pagetable(si_domain, start_pfn, last_pfn);
>>> +                       rcu_read_lock();
>>> +                       for_each_active_iommu(iommu, drhd)
>>> +                               iommu_flush_iotlb_psi(iommu, si_domain->id,
>>> +                                       start_pfn, last_pfn - start_pfn + 1, 0);
>>> +                       rcu_read_unlock();
>>> +                       __free_iova(&si_domain->iovad, iova);
>>> +               }
>>
>> The same as above. Looks we need to consider hw_pass_through for the si_domain.
>>
>> -Kai
>>
>>> +               break;
>>> +       }
>>> +
>>> +       return NOTIFY_OK;
>>> +}
>>> +
>>> +static struct notifier_block intel_iommu_memory_nb = {
>>> +       .notifier_call = intel_iommu_memory_notifier,
>>> +       .priority = 0
>>> +};
>>> +
>>>  int __init intel_iommu_init(void)
>>>  {
>>>         int ret = -ENODEV;
>>> @@ -3761,8 +3810,9 @@ int __init intel_iommu_init(void)
>>>         init_iommu_pm_ops();
>>>
>>>         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
>>> -
>>>         bus_register_notifier(&pci_bus_type, &device_nb);
>>> +       if (si_domain)
>>> +               register_memory_notifier(&intel_iommu_memory_nb);
>>>
>>>         intel_iommu_enabled = 1;
>>>
>>> --
>>> 1.7.10.4
>>>
>>> _______________________________________________
>>> iommu mailing list
>>> iommu@lists.linux-foundation.org
>>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiang Liu Jan. 8, 2014, 6:21 a.m. UTC | #4
On 2014/1/8 14:14, Kai Huang wrote:
> On Wed, Jan 8, 2014 at 2:01 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
>>
>>
>> On 2014/1/8 13:07, Kai Huang wrote:
>>> On Tue, Jan 7, 2014 at 5:00 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
>>>> If static identity domain is created, IOMMU driver needs to update
>>>> si_domain page table when memory hotplug event happens. Otherwise
>>>> PCI device DMA operations can't access the hot-added memory regions.
>>>>
>>>> Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
>>>> ---
>>>>  drivers/iommu/intel-iommu.c |   52 ++++++++++++++++++++++++++++++++++++++++++-
>>>>  1 file changed, 51 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>>>> index 83e3ed4..35a987d 100644
>>>> --- a/drivers/iommu/intel-iommu.c
>>>> +++ b/drivers/iommu/intel-iommu.c
>>>> @@ -33,6 +33,7 @@
>>>>  #include <linux/dmar.h>
>>>>  #include <linux/dma-mapping.h>
>>>>  #include <linux/mempool.h>
>>>> +#include <linux/memory.h>
>>>>  #include <linux/timer.h>
>>>>  #include <linux/iova.h>
>>>>  #include <linux/iommu.h>
>>>> @@ -3689,6 +3690,54 @@ static struct notifier_block device_nb = {
>>>>         .notifier_call = device_notifier,
>>>>  };
>>>>
>>>> +static int intel_iommu_memory_notifier(struct notifier_block *nb,
>>>> +                                      unsigned long val, void *v)
>>>> +{
>>>> +       struct memory_notify *mhp = v;
>>>> +       unsigned long long start, end;
>>>> +       struct iova *iova;
>>>> +
>>>> +       switch (val) {
>>>> +       case MEM_GOING_ONLINE:
>>>> +               start = mhp->start_pfn << PAGE_SHIFT;
>>>> +               end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
>>>> +               if (iommu_domain_identity_map(si_domain, start, end)) {
>>>> +                       pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
>>>> +                               start, end);
>>>> +                       return NOTIFY_BAD;
>>>> +               }
>>>
>>> Better to use iommu_prepare_identity_map? For si_domain, if
>>> hw_pass_through is used, there's no page table.
>> Hi Kai,
>>         Good catch!
>> Seems function iommu_prepare_identity_map() is designed to handle
>> RMRRs. So how about avoiding of registering memory hotplug notifier
>> if hw_pass_through is true?
> 
> I think that's also fine :)
> 
> Btw, I have a related question to memory hotplug but not related to
> intel IOMMU specifically. For the devices use DMA remapping, suppose
> the device is already using the memory that we are trying to remove,
> is this case, looks we need to change the existing iova <-> pa
> mappings for the pa that is in the memory range about to be removed,
> and reset the mapping to different pa (iova remains the same). Does
> existing code have this covered? Is there a generic IOMMU layer memory
> hotplug notifier to handle memory removal?
That's a big issue about how to reclaim memory in use. Current rule
is that memory used by DMA won't be removed until released.

> 
> -Kai
>>
>> Thanks!
>> Gerry
>>
>>>
>>>> +               break;
>>>> +       case MEM_OFFLINE:
>>>> +       case MEM_CANCEL_ONLINE:
>>>> +               /* TODO: enhance RB-tree and IOVA code to support of splitting iova */
>>>> +               iova = find_iova(&si_domain->iovad, mhp->start_pfn);
>>>> +               if (iova) {
>>>> +                       unsigned long start_pfn, last_pfn;
>>>> +                       struct dmar_drhd_unit *drhd;
>>>> +                       struct intel_iommu *iommu;
>>>> +
>>>> +                       start_pfn = mm_to_dma_pfn(iova->pfn_lo);
>>>> +                       last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
>>>> +                       dma_pte_clear_range(si_domain, start_pfn, last_pfn);
>>>> +                       dma_pte_free_pagetable(si_domain, start_pfn, last_pfn);
>>>> +                       rcu_read_lock();
>>>> +                       for_each_active_iommu(iommu, drhd)
>>>> +                               iommu_flush_iotlb_psi(iommu, si_domain->id,
>>>> +                                       start_pfn, last_pfn - start_pfn + 1, 0);
>>>> +                       rcu_read_unlock();
>>>> +                       __free_iova(&si_domain->iovad, iova);
>>>> +               }
>>>
>>> The same as above. Looks we need to consider hw_pass_through for the si_domain.
>>>
>>> -Kai
>>>
>>>> +               break;
>>>> +       }
>>>> +
>>>> +       return NOTIFY_OK;
>>>> +}
>>>> +
>>>> +static struct notifier_block intel_iommu_memory_nb = {
>>>> +       .notifier_call = intel_iommu_memory_notifier,
>>>> +       .priority = 0
>>>> +};
>>>> +
>>>>  int __init intel_iommu_init(void)
>>>>  {
>>>>         int ret = -ENODEV;
>>>> @@ -3761,8 +3810,9 @@ int __init intel_iommu_init(void)
>>>>         init_iommu_pm_ops();
>>>>
>>>>         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
>>>> -
>>>>         bus_register_notifier(&pci_bus_type, &device_nb);
>>>> +       if (si_domain)
>>>> +               register_memory_notifier(&intel_iommu_memory_nb);
>>>>
>>>>         intel_iommu_enabled = 1;
>>>>
>>>> --
>>>> 1.7.10.4
>>>>
>>>> _______________________________________________
>>>> iommu mailing list
>>>> iommu@lists.linux-foundation.org
>>>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kai Huang Jan. 8, 2014, 6:27 a.m. UTC | #5
On Wed, Jan 8, 2014 at 2:21 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
>
>
> On 2014/1/8 14:14, Kai Huang wrote:
>> On Wed, Jan 8, 2014 at 2:01 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
>>>
>>>
>>> On 2014/1/8 13:07, Kai Huang wrote:
>>>> On Tue, Jan 7, 2014 at 5:00 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
>>>>> If static identity domain is created, IOMMU driver needs to update
>>>>> si_domain page table when memory hotplug event happens. Otherwise
>>>>> PCI device DMA operations can't access the hot-added memory regions.
>>>>>
>>>>> Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
>>>>> ---
>>>>>  drivers/iommu/intel-iommu.c |   52 ++++++++++++++++++++++++++++++++++++++++++-
>>>>>  1 file changed, 51 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>>>>> index 83e3ed4..35a987d 100644
>>>>> --- a/drivers/iommu/intel-iommu.c
>>>>> +++ b/drivers/iommu/intel-iommu.c
>>>>> @@ -33,6 +33,7 @@
>>>>>  #include <linux/dmar.h>
>>>>>  #include <linux/dma-mapping.h>
>>>>>  #include <linux/mempool.h>
>>>>> +#include <linux/memory.h>
>>>>>  #include <linux/timer.h>
>>>>>  #include <linux/iova.h>
>>>>>  #include <linux/iommu.h>
>>>>> @@ -3689,6 +3690,54 @@ static struct notifier_block device_nb = {
>>>>>         .notifier_call = device_notifier,
>>>>>  };
>>>>>
>>>>> +static int intel_iommu_memory_notifier(struct notifier_block *nb,
>>>>> +                                      unsigned long val, void *v)
>>>>> +{
>>>>> +       struct memory_notify *mhp = v;
>>>>> +       unsigned long long start, end;
>>>>> +       struct iova *iova;
>>>>> +
>>>>> +       switch (val) {
>>>>> +       case MEM_GOING_ONLINE:
>>>>> +               start = mhp->start_pfn << PAGE_SHIFT;
>>>>> +               end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
>>>>> +               if (iommu_domain_identity_map(si_domain, start, end)) {
>>>>> +                       pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
>>>>> +                               start, end);
>>>>> +                       return NOTIFY_BAD;
>>>>> +               }
>>>>
>>>> Better to use iommu_prepare_identity_map? For si_domain, if
>>>> hw_pass_through is used, there's no page table.
>>> Hi Kai,
>>>         Good catch!
>>> Seems function iommu_prepare_identity_map() is designed to handle
>>> RMRRs. So how about avoiding of registering memory hotplug notifier
>>> if hw_pass_through is true?
>>
>> I think that's also fine :)
>>
>> Btw, I have a related question to memory hotplug but not related to
>> intel IOMMU specifically. For the devices use DMA remapping, suppose
>> the device is already using the memory that we are trying to remove,
>> is this case, looks we need to change the existing iova <-> pa
>> mappings for the pa that is in the memory range about to be removed,
>> and reset the mapping to different pa (iova remains the same). Does
>> existing code have this covered? Is there a generic IOMMU layer memory
>> hotplug notifier to handle memory removal?
> That's a big issue about how to reclaim memory in use. Current rule
> is that memory used by DMA won't be removed until released.
>

Understood. Thanks.

-Kai
>>
>> -Kai
>>>
>>> Thanks!
>>> Gerry
>>>
>>>>
>>>>> +               break;
>>>>> +       case MEM_OFFLINE:
>>>>> +       case MEM_CANCEL_ONLINE:
>>>>> +               /* TODO: enhance RB-tree and IOVA code to support of splitting iova */
>>>>> +               iova = find_iova(&si_domain->iovad, mhp->start_pfn);
>>>>> +               if (iova) {
>>>>> +                       unsigned long start_pfn, last_pfn;
>>>>> +                       struct dmar_drhd_unit *drhd;
>>>>> +                       struct intel_iommu *iommu;
>>>>> +
>>>>> +                       start_pfn = mm_to_dma_pfn(iova->pfn_lo);
>>>>> +                       last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
>>>>> +                       dma_pte_clear_range(si_domain, start_pfn, last_pfn);
>>>>> +                       dma_pte_free_pagetable(si_domain, start_pfn, last_pfn);
>>>>> +                       rcu_read_lock();
>>>>> +                       for_each_active_iommu(iommu, drhd)
>>>>> +                               iommu_flush_iotlb_psi(iommu, si_domain->id,
>>>>> +                                       start_pfn, last_pfn - start_pfn + 1, 0);
>>>>> +                       rcu_read_unlock();
>>>>> +                       __free_iova(&si_domain->iovad, iova);
>>>>> +               }
>>>>
>>>> The same as above. Looks we need to consider hw_pass_through for the si_domain.
>>>>
>>>> -Kai
>>>>
>>>>> +               break;
>>>>> +       }
>>>>> +
>>>>> +       return NOTIFY_OK;
>>>>> +}
>>>>> +
>>>>> +static struct notifier_block intel_iommu_memory_nb = {
>>>>> +       .notifier_call = intel_iommu_memory_notifier,
>>>>> +       .priority = 0
>>>>> +};
>>>>> +
>>>>>  int __init intel_iommu_init(void)
>>>>>  {
>>>>>         int ret = -ENODEV;
>>>>> @@ -3761,8 +3810,9 @@ int __init intel_iommu_init(void)
>>>>>         init_iommu_pm_ops();
>>>>>
>>>>>         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
>>>>> -
>>>>>         bus_register_notifier(&pci_bus_type, &device_nb);
>>>>> +       if (si_domain)
>>>>> +               register_memory_notifier(&intel_iommu_memory_nb);
>>>>>
>>>>>         intel_iommu_enabled = 1;
>>>>>
>>>>> --
>>>>> 1.7.10.4
>>>>>
>>>>> _______________________________________________
>>>>> iommu mailing list
>>>>> iommu@lists.linux-foundation.org
>>>>> https://lists.linuxfoundation.org/mailman/listinfo/iommu
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 83e3ed4..35a987d 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -33,6 +33,7 @@ 
 #include <linux/dmar.h>
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
+#include <linux/memory.h>
 #include <linux/timer.h>
 #include <linux/iova.h>
 #include <linux/iommu.h>
@@ -3689,6 +3690,54 @@  static struct notifier_block device_nb = {
 	.notifier_call = device_notifier,
 };
 
+static int intel_iommu_memory_notifier(struct notifier_block *nb,
+				       unsigned long val, void *v)
+{
+	struct memory_notify *mhp = v;
+	unsigned long long start, end;
+	struct iova *iova;
+
+	switch (val) {
+	case MEM_GOING_ONLINE:
+		start = mhp->start_pfn << PAGE_SHIFT;
+		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
+		if (iommu_domain_identity_map(si_domain, start, end)) {
+			pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
+				start, end);
+			return NOTIFY_BAD;
+		}
+		break;
+	case MEM_OFFLINE:
+	case MEM_CANCEL_ONLINE:
+		/* TODO: enhance RB-tree and IOVA code to support of splitting iova */
+		iova = find_iova(&si_domain->iovad, mhp->start_pfn);
+		if (iova) {
+			unsigned long start_pfn, last_pfn;
+			struct dmar_drhd_unit *drhd;
+			struct intel_iommu *iommu;
+
+			start_pfn = mm_to_dma_pfn(iova->pfn_lo);
+			last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
+			dma_pte_clear_range(si_domain, start_pfn, last_pfn);
+			dma_pte_free_pagetable(si_domain, start_pfn, last_pfn);
+			rcu_read_lock();
+			for_each_active_iommu(iommu, drhd)
+				iommu_flush_iotlb_psi(iommu, si_domain->id,
+					start_pfn, last_pfn - start_pfn + 1, 0);
+			rcu_read_unlock();
+			__free_iova(&si_domain->iovad, iova);
+		}
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block intel_iommu_memory_nb = {
+	.notifier_call = intel_iommu_memory_notifier,
+	.priority = 0
+};
+
 int __init intel_iommu_init(void)
 {
 	int ret = -ENODEV;
@@ -3761,8 +3810,9 @@  int __init intel_iommu_init(void)
 	init_iommu_pm_ops();
 
 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
-
 	bus_register_notifier(&pci_bus_type, &device_nb);
+	if (si_domain)
+		register_memory_notifier(&intel_iommu_memory_nb);
 
 	intel_iommu_enabled = 1;