diff mbox

[1/1] IOMMU: Save pci device id instead of pci_dev* pointer for DMAR devices

Message ID 1383639898-48776-2-git-send-email-wangyijing@huawei.com
State Not Applicable
Headers show

Commit Message

Yijing Wang Nov. 5, 2013, 8:24 a.m. UTC
Currently, DMAR driver save target pci devices pointers for drhd/rmrr/atsr
in (pci_dev *) array. This is not safe, because pci devices maybe
hot added or removed during system running. They will have new pci_dev *
pointer. So if there have two IOMMUs or more in system, these devices
will find a wrong drhd during DMA mapping. And DMAR faults will occur.
This patch save pci device id insted of (pci_dev *) to fix this issue,
Because DMAR table just provide pci device id under a specific IOMMU,
so there is no reason to bind IOMMU with the (pci_dev *). Other, here
use list to manage devices' id for IOMMU, we can easily use list helper
to manage device id.

after remove and rescan a pci device
[  611.857095] dmar: DRHD: handling fault status reg 2
[  611.857109] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff7000
[  611.857109] DMAR:[fault reason 02] Present bit in context entry is clear
[  611.857524] dmar: DRHD: handling fault status reg 102
[  611.857534] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff6000
[  611.857534] DMAR:[fault reason 02] Present bit in context entry is clear
[  611.857936] dmar: DRHD: handling fault status reg 202
[  611.857947] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff5000
[  611.857947] DMAR:[fault reason 02] Present bit in context entry is clear
[  611.858351] dmar: DRHD: handling fault status reg 302
[  611.858362] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff4000
[  611.858362] DMAR:[fault reason 02] Present bit in context entry is clear
[  611.860819] IPv6: ADDRCONF(NETDEV_UP): eth3: link is not ready
[  611.860983] dmar: DRHD: handling fault status reg 402
[  611.860995] dmar: INTR-REMAP: Request device [[86:00.3] fault index a4
[  611.860995] INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
---
 drivers/iommu/dmar.c        |   93 +++++++++++++-------------
 drivers/iommu/intel-iommu.c |  155 ++++++++++++++++++++++++++++---------------
 include/linux/dmar.h        |   20 ++++--
 3 files changed, 159 insertions(+), 109 deletions(-)

Comments

Bjorn Helgaas Nov. 7, 2013, 6:07 p.m. UTC | #1
On Tue, Nov 05, 2013 at 04:24:58PM +0800, Yijing Wang wrote:
> Currently, DMAR driver save target pci devices pointers for drhd/rmrr/atsr
> in (pci_dev *) array. This is not safe, because pci devices maybe
> hot added or removed during system running. They will have new pci_dev *
> pointer. So if there have two IOMMUs or more in system, these devices
> will find a wrong drhd during DMA mapping. And DMAR faults will occur.
> This patch save pci device id insted of (pci_dev *) to fix this issue,
> Because DMAR table just provide pci device id under a specific IOMMU,
> so there is no reason to bind IOMMU with the (pci_dev *). Other, here
> use list to manage devices' id for IOMMU, we can easily use list helper
> to manage device id.
> 
> after remove and rescan a pci device
> [  611.857095] dmar: DRHD: handling fault status reg 2
> [  611.857109] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff7000
> [  611.857109] DMAR:[fault reason 02] Present bit in context entry is clear
> [  611.857524] dmar: DRHD: handling fault status reg 102
> [  611.857534] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff6000
> [  611.857534] DMAR:[fault reason 02] Present bit in context entry is clear
> [  611.857936] dmar: DRHD: handling fault status reg 202
> [  611.857947] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff5000
> [  611.857947] DMAR:[fault reason 02] Present bit in context entry is clear
> [  611.858351] dmar: DRHD: handling fault status reg 302
> [  611.858362] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff4000
> [  611.858362] DMAR:[fault reason 02] Present bit in context entry is clear
> [  611.860819] IPv6: ADDRCONF(NETDEV_UP): eth3: link is not ready
> [  611.860983] dmar: DRHD: handling fault status reg 402
> [  611.860995] dmar: INTR-REMAP: Request device [[86:00.3] fault index a4
> [  611.860995] INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
> 
> Signed-off-by: Yijing Wang <wangyijing@huawei.com>
> ---
>  drivers/iommu/dmar.c        |   93 +++++++++++++-------------
>  drivers/iommu/intel-iommu.c |  155 ++++++++++++++++++++++++++++---------------
>  include/linux/dmar.h        |   20 ++++--
>  3 files changed, 159 insertions(+), 109 deletions(-)
> 
> diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
> index 785675a..9aa65a3 100644
> --- a/drivers/iommu/dmar.c
> +++ b/drivers/iommu/dmar.c
> @@ -65,12 +65,13 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
>  }
>  
>  static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
> -					   struct pci_dev **dev, u16 segment)
> +					    u16 segment, struct list_head *head)
>  {
>  	struct pci_bus *bus;
>  	struct pci_dev *pdev = NULL;
>  	struct acpi_dmar_pci_path *path;
>  	int count;
> +	struct dmar_device *dmar_dev;
>  
>  	bus = pci_find_bus(segment, scope->bus);
>  	path = (struct acpi_dmar_pci_path *)(scope + 1);
> @@ -100,7 +101,6 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
>  	if (!pdev) {
>  		pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
>  			segment, scope->bus, path->dev, path->fn);
> -		*dev = NULL;
>  		return 0;
>  	}
>  	if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
> @@ -111,54 +111,39 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
>  			pci_name(pdev));
>  		return -EINVAL;
>  	}
> -	*dev = pdev;
> +
> +	dmar_dev = kzalloc(sizeof(struct dmar_device), GFP_KERNEL);
> +	if (!dmar_dev) {
> +		pci_dev_put(pdev);
> +		return -ENOMEM;
> +	}
> +
> +	dmar_dev->segment = segment;
> +	dmar_dev->bus = pdev->bus->number;
> +	dmar_dev->devfn = pdev->devfn;
> +	list_add_tail(&dmar_dev->list, head);
> +
> +	pci_dev_put(pdev);
>  	return 0;
>  }
>  
> -int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
> -				struct pci_dev ***devices, u16 segment)
> +int __init dmar_parse_dev_scope(void *start, void *end, u16 segment, 
> +	struct list_head *head)
>  {
>  	struct acpi_dmar_device_scope *scope;
> -	void * tmp = start;
> -	int index;
>  	int ret;
>  
> -	*cnt = 0;
> -	while (start < end) {
> -		scope = start;
> -		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
> -		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
> -			(*cnt)++;
> -		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
> -			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
> -			pr_warn("Unsupported device scope\n");
> -		}
> -		start += scope->length;
> -	}
> -	if (*cnt == 0)
> -		return 0;
> -
> -	*devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
> -	if (!*devices)
> -		return -ENOMEM;
> -
> -	start = tmp;
> -	index = 0;
>  	while (start < end) {
>  		scope = start;
>  		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
>  		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
> -			ret = dmar_parse_one_dev_scope(scope,
> -				&(*devices)[index], segment);
> -			if (ret) {
> -				kfree(*devices);
> +			ret = dmar_parse_one_dev_scope(scope, segment, head);
> +			if (ret)
>  				return ret;
> -			}
> -			index ++;
>  		}
>  		start += scope->length;
>  	}
> -
> +    
>  	return 0;
>  }
>  
> @@ -183,6 +168,7 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
>  	dmaru->reg_base_addr = drhd->address;
>  	dmaru->segment = drhd->segment;
>  	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
> +	INIT_LIST_HEAD(&dmaru->head);
>  
>  	ret = alloc_iommu(dmaru);
>  	if (ret) {
> @@ -193,6 +179,19 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
>  	return 0;
>  }
>  
> +static void drhd_free(struct dmar_drhd_unit *dmaru)
> +{
> +	struct dmar_device *dev, *tmp;
> +
> +	list_for_each_entry_safe(dev, tmp, &dmaru->head,
> +	    list) {
> +		list_del(&dev->list);
> +		kfree(dev);
> +	}
> +
> +	kfree(dmaru);
> +}
> +
>  static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
>  {
>  	struct acpi_dmar_hardware_unit *drhd;
> @@ -205,11 +204,10 @@ static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
>  
>  	ret = dmar_parse_dev_scope((void *)(drhd + 1),
>  				((void *)drhd) + drhd->header.length,
> -				&dmaru->devices_cnt, &dmaru->devices,
> -				drhd->segment);
> +				drhd->segment, &dmaru->head);
>  	if (ret) {
>  		list_del(&dmaru->list);
> -		kfree(dmaru);
> +		drhd_free(dmaru);
>  	}
>  	return ret;
>  }
> @@ -378,16 +376,18 @@ parse_dmar_table(void)
>  	return ret;
>  }
>  
> -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
> -			  struct pci_dev *dev)
> +static int dmar_pci_device_match(struct pci_dev *dev, 
> +	struct list_head *head)
>  {
> -	int index;
> +	struct dmar_device *dmar_dev;
>  
>  	while (dev) {
> -		for (index = 0; index < cnt; index++)
> -			if (dev == devices[index])
> -				return 1;
> -
> +		list_for_each_entry(dmar_dev, head, list)
> +		    if (dmar_dev->segment == pci_domain_nr(dev->bus)
> +			    && dmar_dev->bus == dev->bus->number
> +			    && dmar_dev->devfn == dev->devfn)
> +			return 1;
> +		
>  		/* Check our parent */
>  		dev = dev->bus->self;

You didn't change this, but it looks like this may have the same problem
we've been talking about here:

http://lkml.kernel.org/r/20131105232903.3790.8738.stgit@bhelgaas-glaptop.roam.corp.google.com

Namely, if "dev" is a VF on a virtual bus, "dev->bus->self == NULL", so
we won't search for any of the bridges leading to the VF.  I proposed a
pci_upstream_bridge() interface that could be used like this:

	/* Check our parent */
	dev = pci_upstream_bridge(dev);

>  	}
> @@ -412,8 +412,7 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev)
>  		    drhd->segment == pci_domain_nr(dev->bus))
>  			return dmaru;
>  
> -		if (dmar_pci_device_match(dmaru->devices,
> -					  dmaru->devices_cnt, dev))
> +		if (dmar_pci_device_match(dev, &dmaru->head))
>  			return dmaru;
>  	}
>  
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 15e9b57..b33fe0e 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -650,7 +650,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
>  static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
>  {
>  	struct dmar_drhd_unit *drhd = NULL;
> -	int i;
> +	struct dmar_device *dmar_dev;
> +	struct pci_dev *pdev;
>  
>  	for_each_drhd_unit(drhd) {
>  		if (drhd->ignored)
> @@ -658,16 +659,22 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
>  		if (segment != drhd->segment)
>  			continue;
>  
> -		for (i = 0; i < drhd->devices_cnt; i++) {
> -			if (drhd->devices[i] &&
> -			    drhd->devices[i]->bus->number == bus &&
> -			    drhd->devices[i]->devfn == devfn)
> -				return drhd->iommu;
> -			if (drhd->devices[i] &&
> -			    drhd->devices[i]->subordinate &&
> -			    drhd->devices[i]->subordinate->number <= bus &&
> -			    drhd->devices[i]->subordinate->busn_res.end >= bus)
> -				return drhd->iommu;
> +		list_for_each_entry(dmar_dev, &drhd->head, list) {
> +		    if (dmar_dev->bus == bus && 
> +			    dmar_dev->devfn == devfn)
> +			return drhd->iommu;
> +
> +		    pdev = pci_get_domain_bus_and_slot(dmar_dev->segment, 
> +			    dmar_dev->bus, dmar_dev->devfn);
> +		    if (pdev->subordinate && 
> +			    pdev->subordinate->number <= bus &&
> +			    pdev->subordinate->busn_res.end >= bus) {
> +			pci_dev_put(pdev);
> +			return drhd->iommu;

I don't know the details of how device_to_iommu() is used, but this
style (acquire ref to pci_dev, match it to some other object, drop
pci_dev ref, return object) makes me nervous.  How do we know the
caller isn't depending on pci_dev to remain attached to the object?
What happens if the pci_dev disappears when we do the pci_dev_put()
here?

> +		    }
> +
> +		    if (pdev)
> +			pci_dev_put(pdev);
>  		}
>  
>  		if (drhd->include_all)
> @@ -2331,18 +2338,20 @@ static int domain_add_dev_info(struct dmar_domain *domain,
>  static bool device_has_rmrr(struct pci_dev *dev)
>  {
>  	struct dmar_rmrr_unit *rmrr;
> -	int i;
> +	struct dmar_device *dmar_dev;
>  
>  	for_each_rmrr_units(rmrr) {
> -		for (i = 0; i < rmrr->devices_cnt; i++) {
> -			/*
> -			 * Return TRUE if this RMRR contains the device that
> -			 * is passed in.
> -			 */
> -			if (rmrr->devices[i] == dev)
> -				return true;
> -		}
> +	    list_for_each_entry(dmar_dev, &rmrr->head, list)
> +		/*
> +		 * Return TRUE if this RMRR contains the device that
> +		 * is passed in.
> +		 */
> +	if (dmar_dev->segment == pci_domain_nr(dev->bus) && 
> +			dmar_dev->bus == dev->bus->number && 
> +			dmar_dev->devfn == dev->devfn)
> +		return true;
>  	}
> +	
>  	return false;
>  }
>  
> @@ -2451,7 +2460,7 @@ static int __init init_dmars(void)
>  	struct dmar_rmrr_unit *rmrr;
>  	struct pci_dev *pdev;
>  	struct intel_iommu *iommu;
> -	int i, ret;
> +	int ret;
>  
>  	/*
>  	 * for each drhd
> @@ -2605,8 +2614,10 @@ static int __init init_dmars(void)
>  	 */
>  	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
>  	for_each_rmrr_units(rmrr) {
> -		for (i = 0; i < rmrr->devices_cnt; i++) {
> -			pdev = rmrr->devices[i];
> +		struct dmar_device *dmar_dev;
> +	    list_for_each_entry(dmar_dev, &rmrr->head, list) {
> +			pdev = pci_get_domain_bus_and_slot(dmar_dev->segment, 
> +					dmar_dev->bus, dmar_dev->devfn);
>  			/*
>  			 * some BIOS lists non-exist devices in DMAR
>  			 * table.
> @@ -2615,9 +2626,11 @@ static int __init init_dmars(void)
>  				continue;
>  			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
>  			if (ret)
> -				printk(KERN_ERR
> -				       "IOMMU: mapping reserved region failed\n");
> -		}
> +				printk(KERN_ERR 
> +					"IOMMU: mapping reserved region failed\n");
> +		
> +			pci_dev_put(pdev);
> +	    }
>  	}
>  
>  	iommu_prepare_isa();
> @@ -3301,30 +3314,30 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir
>  static void __init init_no_remapping_devices(void)
>  {
>  	struct dmar_drhd_unit *drhd;
> +	struct dmar_device *dmar_dev;
> +	struct pci_dev *pdev = NULL;
>  
> -	for_each_drhd_unit(drhd) {
> -		if (!drhd->include_all) {
> -			int i;
> -			for (i = 0; i < drhd->devices_cnt; i++)
> -				if (drhd->devices[i] != NULL)
> -					break;
> +	for_each_drhd_unit(drhd) 
> +		if (!drhd->include_all) 
>  			/* ignore DMAR unit if no pci devices exist */
> -			if (i == drhd->devices_cnt)
> +			if (list_empty(&drhd->head))
>  				drhd->ignored = 1;
> -		}
> -	}
> -
> +	
>  	for_each_drhd_unit(drhd) {
> -		int i;
>  		if (drhd->ignored || drhd->include_all)
>  			continue;
>  
> -		for (i = 0; i < drhd->devices_cnt; i++)
> -			if (drhd->devices[i] &&
> -			    !IS_GFX_DEVICE(drhd->devices[i]))
> +		list_for_each_entry(dmar_dev, &drhd->head, list) {
> +			pdev = pci_get_domain_bus_and_slot(dmar_dev->segment,
> +				dmar_dev->bus, dmar_dev->devfn);
> +			if (!IS_GFX_DEVICE(pdev)) {
> +				pci_dev_put(pdev);
>  				break;
> +			}
> +			pci_dev_put(pdev);
> +		}
>  
> -		if (i < drhd->devices_cnt)
> +		if (!IS_GFX_DEVICE(pdev))

I think this is clearly wrong.  You acquire a pdev reference, drop the
reference, then look at pdev again after dropping the reference.  But
as soon as you do the pci_dev_put(), you have to assume pdev is no
longer valid.

>  			continue;
>  
>  		/* This IOMMU has *only* gfx devices. Either bypass it or
> @@ -3333,10 +3346,15 @@ static void __init init_no_remapping_devices(void)
>  			intel_iommu_gfx_mapped = 1;
>  		} else {
>  			drhd->ignored = 1;
> -			for (i = 0; i < drhd->devices_cnt; i++) {
> -				if (!drhd->devices[i])
> +			list_for_each_entry(dmar_dev, &drhd->head, list) {
> +				pdev = pci_get_domain_bus_and_slot(
> +						dmar_dev->segment, 
> +						dmar_dev->bus, 
> +						dmar_dev->devfn);
> +				if (!pdev)
>  					continue;
> -				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
> +				pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
> +				pci_dev_put(pdev);
>  			}
>  		}
>  	}
> @@ -3501,11 +3519,25 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
>  	rmrr = (struct acpi_dmar_reserved_memory *)header;
>  	rmrru->base_address = rmrr->base_address;
>  	rmrru->end_address = rmrr->end_address;
> +	INIT_LIST_HEAD(&rmrru->head);
>  
>  	dmar_register_rmrr_unit(rmrru);
>  	return 0;
>  }
>  
> +static void rmrr_free(struct dmar_rmrr_unit *rmrru) 
> +{
> +	struct dmar_device *dev, *tmp;
> +
> +	list_for_each_entry_safe(dev, tmp, &rmrru->head,
> +	    list) {
> +		list_del(&dev->list);
> +		kfree(dev);
> +	}
> +    
> +	kfree(rmrru);
> +}
> +
>  static int __init
>  rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
>  {
> @@ -3515,11 +3547,11 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
>  	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
>  	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
>  		((void *)rmrr) + rmrr->header.length,
> -		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
> +		rmrr->segment, &rmrru->head);
>  
> -	if (ret || (rmrru->devices_cnt == 0)) {
> +	if (ret || list_empty(&rmrru->head)) {
>  		list_del(&rmrru->list);
> -		kfree(rmrru);
> +		rmrr_free(rmrru);
>  	}
>  	return ret;
>  }
> @@ -3538,12 +3570,25 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
>  
>  	atsru->hdr = hdr;
>  	atsru->include_all = atsr->flags & 0x1;
> +	INIT_LIST_HEAD(&atsru->head);
>  
>  	list_add(&atsru->list, &dmar_atsr_units);
>  
>  	return 0;
>  }
>  
> +static void atsr_free(struct dmar_atsr_unit *atsr) 
> +{
> +	struct dmar_device *dev, *tmp;
> +    
> +	list_for_each_entry_safe(dev, tmp, &atsr->head, list) {
> +		list_del(&dev->list);
> +		kfree(dev);
> +	}
> +
> +	kfree(atsr);
> +}
> +
>  static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
>  {
>  	int rc;
> @@ -3555,11 +3600,10 @@ static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
>  	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
>  	rc = dmar_parse_dev_scope((void *)(atsr + 1),
>  				(void *)atsr + atsr->header.length,
> -				&atsru->devices_cnt, &atsru->devices,
> -				atsr->segment);
> -	if (rc || !atsru->devices_cnt) {
> +				atsr->segment, &atsru->head);
> +	if (rc || list_empty(&atsru->head)) {
>  		list_del(&atsru->list);
> -		kfree(atsru);
> +		atsr_free(atsru);
>  	}
>  
>  	return rc;
> @@ -3567,7 +3611,6 @@ static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
>  
>  int dmar_find_matched_atsr_unit(struct pci_dev *dev)
>  {
> -	int i;
>  	struct pci_bus *bus;
>  	struct acpi_dmar_atsr *atsr;
>  	struct dmar_atsr_unit *atsru;
> @@ -3584,6 +3627,7 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev)
>  
>  found:
>  	for (bus = dev->bus; bus; bus = bus->parent) {
> +		struct dmar_device *dmar_dev;
>  		struct pci_dev *bridge = bus->self;
>  
>  		if (!bridge || !pci_is_pcie(bridge) ||
> @@ -3591,8 +3635,11 @@ found:
>  			return 0;
>  
>  		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
> -			for (i = 0; i < atsru->devices_cnt; i++)
> -				if (atsru->devices[i] == bridge)
> +			list_for_each_entry(dmar_dev, &atsru->head, list)
> +				if (dmar_dev->segment == 
> +					pci_domain_nr(bridge->bus) && 
> +					dmar_dev->bus == bridge->bus->number &&
> +					dmar_dev->devfn == bridge->devfn)
>  					return 1;
>  			break;
>  		}
> diff --git a/include/linux/dmar.h b/include/linux/dmar.h
> index b029d1a..5317cb0 100644
> --- a/include/linux/dmar.h
> +++ b/include/linux/dmar.h
> @@ -32,6 +32,13 @@ struct acpi_dmar_header;
>  #define DMAR_INTR_REMAP		0x1
>  #define DMAR_X2APIC_OPT_OUT	0x2
>  
> +struct dmar_device {
> +	struct list_head list;
> +	u8 segment;

I think this should be u16.  I didn't chase down how you're using it,
but Table 8.3 in the Intel VT-d spec shows Segment Number in a DRHD
structure as 16 bits.

> +	u8 bus;
> +	u8 devfn;
> +};
> +
>  struct intel_iommu;
>  #ifdef CONFIG_DMAR_TABLE
>  extern struct acpi_table_header *dmar_tbl;
> @@ -39,8 +46,7 @@ struct dmar_drhd_unit {
>  	struct list_head list;		/* list of drhd units	*/
>  	struct  acpi_dmar_header *hdr;	/* ACPI header		*/
>  	u64	reg_base_addr;		/* register base address*/
> -	struct	pci_dev **devices; 	/* target device array	*/
> -	int	devices_cnt;		/* target device count	*/
> +	struct list_head head;	/* target devices' list */

s/devices'/device/ (also below).  This is not a contraction or a
possessive construct, so no apostrophe is needed.

>  	u16	segment;		/* PCI domain		*/
>  	u8	ignored:1; 		/* ignore drhd		*/
>  	u8	include_all:1;
> @@ -139,8 +145,7 @@ struct dmar_rmrr_unit {
>  	struct acpi_dmar_header *hdr;	/* ACPI header		*/
>  	u64	base_address;		/* reserved base address*/
>  	u64	end_address;		/* reserved end address */
> -	struct pci_dev **devices;	/* target devices */
> -	int	devices_cnt;		/* target device count */
> +	struct list_head head;	/* target devices' list */
>  };
>  
>  #define for_each_rmrr_units(rmrr) \
> @@ -149,16 +154,15 @@ struct dmar_rmrr_unit {
>  struct dmar_atsr_unit {
>  	struct list_head list;		/* list of ATSR units */
>  	struct acpi_dmar_header *hdr;	/* ACPI header */
> -	struct pci_dev **devices;	/* target devices */
> -	int devices_cnt;		/* target device count */
>  	u8 include_all:1;		/* include all ports */
> +	struct list_head head;	/* target devices' list */
>  };
>  
>  int dmar_parse_rmrr_atsr_dev(void);
>  extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header);
>  extern int dmar_parse_one_atsr(struct acpi_dmar_header *header);
> -extern int dmar_parse_dev_scope(void *start, void *end, int *cnt,
> -				struct pci_dev ***devices, u16 segment);
> +extern int dmar_parse_dev_scope(void *start, void *end, u16 segment, 
> +				struct list_head *head);
>  extern int intel_iommu_init(void);
>  #else /* !CONFIG_INTEL_IOMMU: */
>  static inline int intel_iommu_init(void) { return -ENODEV; }
> -- 
> 1.7.1
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yijing Wang Nov. 8, 2013, 3:40 a.m. UTC | #2
HI Bjorn,
   Thanks for your review and comments very much!

>> +		list_for_each_entry(dmar_dev, head, list)
>> +		    if (dmar_dev->segment == pci_domain_nr(dev->bus)
>> +			    && dmar_dev->bus == dev->bus->number
>> +			    && dmar_dev->devfn == dev->devfn)
>> +			return 1;
>> +		
>>  		/* Check our parent */
>>  		dev = dev->bus->self;
> 
> You didn't change this, but it looks like this may have the same problem
> we've been talking about here:
> 
> http://lkml.kernel.org/r/20131105232903.3790.8738.stgit@bhelgaas-glaptop.roam.corp.google.com
> 
> Namely, if "dev" is a VF on a virtual bus, "dev->bus->self == NULL", so
> we won't search for any of the bridges leading to the VF.  I proposed a
> pci_upstream_bridge() interface that could be used like this:
> 
> 	/* Check our parent */
> 	dev = pci_upstream_bridge(dev);
>

It looks good to me, because pci_upstream_bridge() is still in your next branch, I think maybe
I can split this changes in a separate patch after 3.13-rc1.


>>  static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
>>  {
>>  	struct dmar_drhd_unit *drhd = NULL;
>> -	int i;
>> +	struct dmar_device *dmar_dev;
>> +	struct pci_dev *pdev;
>>  
>>  	for_each_drhd_unit(drhd) {
>>  		if (drhd->ignored)
>> @@ -658,16 +659,22 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
>>  		if (segment != drhd->segment)
>>  			continue;
>>  
>> -		for (i = 0; i < drhd->devices_cnt; i++) {
>> -			if (drhd->devices[i] &&
>> -			    drhd->devices[i]->bus->number == bus &&
>> -			    drhd->devices[i]->devfn == devfn)
>> -				return drhd->iommu;
>> -			if (drhd->devices[i] &&
>> -			    drhd->devices[i]->subordinate &&
>> -			    drhd->devices[i]->subordinate->number <= bus &&
>> -			    drhd->devices[i]->subordinate->busn_res.end >= bus)
>> -				return drhd->iommu;
>> +		list_for_each_entry(dmar_dev, &drhd->head, list) {
>> +		    if (dmar_dev->bus == bus && 
>> +			    dmar_dev->devfn == devfn)
>> +			return drhd->iommu;
>> +
>> +		    pdev = pci_get_domain_bus_and_slot(dmar_dev->segment, 
>> +			    dmar_dev->bus, dmar_dev->devfn);
>> +		    if (pdev->subordinate && 
>> +			    pdev->subordinate->number <= bus &&
>> +			    pdev->subordinate->busn_res.end >= bus) {
>> +			pci_dev_put(pdev);
>> +			return drhd->iommu;
> 
> I don't know the details of how device_to_iommu() is used, but this
> style (acquire ref to pci_dev, match it to some other object, drop
> pci_dev ref, return object) makes me nervous.  How do we know the
> caller isn't depending on pci_dev to remain attached to the object?
> What happens if the pci_dev disappears when we do the pci_dev_put()
> here?

Hmmm, this is the thing I am most worried about. If we just only use
(pci_dev *) poninter in drhd->devices array as a identification. Change
(pci_dev *) pointer instead of pci device id segment:bus:devfn is safe.
Or, this is a wrong way to fix this issue. I don't know IOMMU driver much now,
so IOMMU guys any comments on this issue is welcome.

If this is not safe, what about we both save pci device id and (pci_dev *) pointer
in drhd. So we can put pci_dev ref and set pci_dev * = NULL during device removed by bus notify, and
update (pci_dev *)pointer during device add.

like this:
struct dmar_device {
    struct list_head list;
    u16 segment;
    u8 bus;
    u8 devfn;
    struct pci_dev *dev;
};

>>  	for_each_drhd_unit(drhd) {
>> -		int i;
>>  		if (drhd->ignored || drhd->include_all)
>>  			continue;
>>  
>> -		for (i = 0; i < drhd->devices_cnt; i++)
>> -			if (drhd->devices[i] &&
>> -			    !IS_GFX_DEVICE(drhd->devices[i]))
>> +		list_for_each_entry(dmar_dev, &drhd->head, list) {
>> +			pdev = pci_get_domain_bus_and_slot(dmar_dev->segment,
>> +				dmar_dev->bus, dmar_dev->devfn);
>> +			if (!IS_GFX_DEVICE(pdev)) {
>> +				pci_dev_put(pdev);
>>  				break;
>> +			}
>> +			pci_dev_put(pdev);
>> +		}
>>  
>> -		if (i < drhd->devices_cnt)
>> +		if (!IS_GFX_DEVICE(pdev))
> 
> I think this is clearly wrong.  You acquire a pdev reference, drop the
> reference, then look at pdev again after dropping the reference.  But
> as soon as you do the pci_dev_put(), you have to assume pdev is no
> longer valid.
>

You are right, should move pci_dev_put() after if (!IS_GFX_DEVICE(pdev)).



>>  
>> +struct dmar_device {
>> +	struct list_head list;
>> +	u8 segment;
> 
> I think this should be u16.  I didn't chase down how you're using it,
> but Table 8.3 in the Intel VT-d spec shows Segment Number in a DRHD
> structure as 16 bits.

Yes, it's my mistake, thanks!

> 
>> +	u8 bus;
>> +	u8 devfn;
>> +};
>> +
>>  struct intel_iommu;
>>  #ifdef CONFIG_DMAR_TABLE
>>  extern struct acpi_table_header *dmar_tbl;
>> @@ -39,8 +46,7 @@ struct dmar_drhd_unit {
>>  	struct list_head list;		/* list of drhd units	*/
>>  	struct  acpi_dmar_header *hdr;	/* ACPI header		*/
>>  	u64	reg_base_addr;		/* register base address*/
>> -	struct	pci_dev **devices; 	/* target device array	*/
>> -	int	devices_cnt;		/* target device count	*/
>> +	struct list_head head;	/* target devices' list */
> 
> s/devices'/device/ (also below).  This is not a contraction or a
> possessive construct, so no apostrophe is needed.
> 
>>  	u16	segment;		/* PCI domain		*/
>>  	u8	ignored:1; 		/* ignore drhd		*/
>>  	u8	include_all:1;
>> @@ -139,8 +145,7 @@ struct dmar_rmrr_unit {
>>  	struct acpi_dmar_header *hdr;	/* ACPI header		*/
>>  	u64	base_address;		/* reserved base address*/
>>  	u64	end_address;		/* reserved end address */
>> -	struct pci_dev **devices;	/* target devices */
>> -	int	devices_cnt;		/* target device count */
>> +	struct list_head head;	/* target devices' list */
>>  };
>>  
>>  #define for_each_rmrr_units(rmrr) \
>> @@ -149,16 +154,15 @@ struct dmar_rmrr_unit {
>>  struct dmar_atsr_unit {
>>  	struct list_head list;		/* list of ATSR units */
>>  	struct acpi_dmar_header *hdr;	/* ACPI header */
>> -	struct pci_dev **devices;	/* target devices */
>> -	int devices_cnt;		/* target device count */
>>  	u8 include_all:1;		/* include all ports */
>> +	struct list_head head;	/* target devices' list */
>>  };
>>  
>>  int dmar_parse_rmrr_atsr_dev(void);
>>  extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header);
>>  extern int dmar_parse_one_atsr(struct acpi_dmar_header *header);
>> -extern int dmar_parse_dev_scope(void *start, void *end, int *cnt,
>> -				struct pci_dev ***devices, u16 segment);
>> +extern int dmar_parse_dev_scope(void *start, void *end, u16 segment, 
>> +				struct list_head *head);
>>  extern int intel_iommu_init(void);
>>  #else /* !CONFIG_INTEL_IOMMU: */
>>  static inline int intel_iommu_init(void) { return -ENODEV; }
>> -- 
>> 1.7.1
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> .
>
Bjorn Helgaas Nov. 8, 2013, 3:46 p.m. UTC | #3
On Thu, Nov 7, 2013 at 8:40 PM, Yijing Wang <wangyijing@huawei.com> wrote:
> HI Bjorn,
>    Thanks for your review and comments very much!
>
>>> +            list_for_each_entry(dmar_dev, head, list)
>>> +                if (dmar_dev->segment == pci_domain_nr(dev->bus)
>>> +                        && dmar_dev->bus == dev->bus->number
>>> +                        && dmar_dev->devfn == dev->devfn)
>>> +                    return 1;
>>> +
>>>              /* Check our parent */
>>>              dev = dev->bus->self;
>>
>> You didn't change this, but it looks like this may have the same problem
>> we've been talking about here:
>>
>> http://lkml.kernel.org/r/20131105232903.3790.8738.stgit@bhelgaas-glaptop.roam.corp.google.com
>>
>> Namely, if "dev" is a VF on a virtual bus, "dev->bus->self == NULL", so
>> we won't search for any of the bridges leading to the VF.  I proposed a
>> pci_upstream_bridge() interface that could be used like this:
>>
>>       /* Check our parent */
>>       dev = pci_upstream_bridge(dev);
>>
>
> It looks good to me, because pci_upstream_bridge() is still in your next branch, I think maybe
> I can split this changes in a separate patch after 3.13-rc1.

Yep, that would be a fix for a separate issue and should be a separate patch.

>>>  static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
>>>  {
>>>      struct dmar_drhd_unit *drhd = NULL;
>>> -    int i;
>>> +    struct dmar_device *dmar_dev;
>>> +    struct pci_dev *pdev;
>>>
>>>      for_each_drhd_unit(drhd) {
>>>              if (drhd->ignored)
>>> @@ -658,16 +659,22 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
>>>              if (segment != drhd->segment)
>>>                      continue;
>>>
>>> -            for (i = 0; i < drhd->devices_cnt; i++) {
>>> -                    if (drhd->devices[i] &&
>>> -                        drhd->devices[i]->bus->number == bus &&
>>> -                        drhd->devices[i]->devfn == devfn)
>>> -                            return drhd->iommu;
>>> -                    if (drhd->devices[i] &&
>>> -                        drhd->devices[i]->subordinate &&
>>> -                        drhd->devices[i]->subordinate->number <= bus &&
>>> -                        drhd->devices[i]->subordinate->busn_res.end >= bus)
>>> -                            return drhd->iommu;
>>> +            list_for_each_entry(dmar_dev, &drhd->head, list) {
>>> +                if (dmar_dev->bus == bus &&
>>> +                        dmar_dev->devfn == devfn)
>>> +                    return drhd->iommu;
>>> +
>>> +                pdev = pci_get_domain_bus_and_slot(dmar_dev->segment,
>>> +                        dmar_dev->bus, dmar_dev->devfn);
>>> +                if (pdev->subordinate &&
>>> +                        pdev->subordinate->number <= bus &&
>>> +                        pdev->subordinate->busn_res.end >= bus) {
>>> +                    pci_dev_put(pdev);
>>> +                    return drhd->iommu;
>>
>> I don't know the details of how device_to_iommu() is used, but this
>> style (acquire ref to pci_dev, match it to some other object, drop
>> pci_dev ref, return object) makes me nervous.  How do we know the
>> caller isn't depending on pci_dev to remain attached to the object?
>> What happens if the pci_dev disappears when we do the pci_dev_put()
>> here?
>
> Hmmm, this is the thing I am most worried about. If we just only use
> (pci_dev *) poninter in drhd->devices array as a identification. Change
> (pci_dev *) pointer instead of pci device id segment:bus:devfn is safe.
> Or, this is a wrong way to fix this issue. I don't know IOMMU driver much now,
> so IOMMU guys any comments on this issue is welcome.
>
> If this is not safe, what about we both save pci device id and (pci_dev *) pointer
> in drhd. So we can put pci_dev ref and set pci_dev * = NULL during device removed by bus notify, and
> update (pci_dev *)pointer during device add.

I don't know the IOMMU drivers well either, but it seems like they
rely on notifications of device addition and removal (see
iommu_bus_notifier()).  It doesn't seem right for them to also use the
generic PCI interfaces like pci_get_domain_bus_and_slot() because the
IOMMU driver should already know what devices exist and their
lifetimes.  It seems like confusion to mix the two.  But I don't have
a concrete suggestion.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yijing Wang Nov. 11, 2013, 12:55 a.m. UTC | #4
>> Hmmm, this is the thing I am most worried about. If we just only use
>> (pci_dev *) poninter in drhd->devices array as a identification. Change
>> (pci_dev *) pointer instead of pci device id segment:bus:devfn is safe.
>> Or, this is a wrong way to fix this issue. I don't know IOMMU driver much now,
>> so IOMMU guys any comments on this issue is welcome.
>>
>> If this is not safe, what about we both save pci device id and (pci_dev *) pointer
>> in drhd. So we can put pci_dev ref and set pci_dev * = NULL during device removed by bus notify, and
>> update (pci_dev *)pointer during device add.
> 
> I don't know the IOMMU drivers well either, but it seems like they
> rely on notifications of device addition and removal (see
> iommu_bus_notifier()).  It doesn't seem right for them to also use the
> generic PCI interfaces like pci_get_domain_bus_and_slot() because the
> IOMMU driver should already know what devices exist and their
> lifetimes.  It seems like confusion to mix the two.  But I don't have
> a concrete suggestion.

Maybe you are right~, I will try to rework the patch and resend soon.

Thanks!
Yijing.


> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> .
>
David Woodhouse Nov. 20, 2013, 3:59 p.m. UTC | #5
On Fri, 2013-11-08 at 08:46 -0700, Bjorn Helgaas wrote:
> 
> I don't know the IOMMU drivers well either, but it seems like they
> rely on notifications of device addition and removal (see
> iommu_bus_notifier()).  It doesn't seem right for them to also use the
> generic PCI interfaces like pci_get_domain_bus_and_slot() because the
> IOMMU driver should already know what devices exist and their
> lifetimes.  It seems like confusion to mix the two.  But I don't have
> a concrete suggestion.

The generic IOMMU code has a notifier, and calls through to an
->add_device() method in the specific IOMMU driver's iommu_ops.

The Intel IOMMU driver predates that, and its scheme for mapping devices
to the correct DMAR unit is different. It happens entirely within the
get_domain_for_dev() function, which happens when we're first asked to
set up a mapping for a given device (when we don't already have the
answer stashed in dev->archdata).

I think we should add an ->add_device() method to the Intel IOMMU
driver, and make it do much of what's in get_domain_for_dev() right now
— finding the "proxy" device (the upstream PCIe bridge or whatever), and
then looking through the ACPI DMAR table to find which DMAR unit that's
attached to. Then we stash that information (dmar, devfn) in
dev->archdata, and get_domain_for_dev() still has *some* work to do,
actually allocating a logical domain on the IOMMU in question, but not
as much. And refcount the damn domain instead of playing the horrid
tricks we currently do to hang it off the upstream proxy device *too*.

My main concern here is that the DMAR table contains the PCI bus numbers
at boot time. Doing the lookup later will only work if we don't renumber
busses. Or if we have a way to look things up based on the *original*
bus number.

The Intel IOMMU also has a bus notifier of its own which it only uses to
know when a driver is *detached*, so it can tear down the logical domain
for the corresponding device. Would be nice to have the generic IOMMU
notifier call a callback for us then too, perhaps.
Yijing Wang Nov. 21, 2013, 6:21 a.m. UTC | #6
On 2013/11/20 23:59, David Woodhouse wrote:
> On Fri, 2013-11-08 at 08:46 -0700, Bjorn Helgaas wrote:
>>
>> I don't know the IOMMU drivers well either, but it seems like they
>> rely on notifications of device addition and removal (see
>> iommu_bus_notifier()).  It doesn't seem right for them to also use the
>> generic PCI interfaces like pci_get_domain_bus_and_slot() because the
>> IOMMU driver should already know what devices exist and their
>> lifetimes.  It seems like confusion to mix the two.  But I don't have
>> a concrete suggestion.
> 

Hi David,
   Thanks for your review and comment!

> The generic IOMMU code has a notifier, and calls through to an
> ->add_device() method in the specific IOMMU driver's iommu_ops.
> 
> The Intel IOMMU driver predates that, and its scheme for mapping devices
> to the correct DMAR unit is different. It happens entirely within the
> get_domain_for_dev() function, which happens when we're first asked to
> set up a mapping for a given device (when we don't already have the
> answer stashed in dev->archdata).
> 
> I think we should add an ->add_device() method to the Intel IOMMU
> driver, and make it do much of what's in get_domain_for_dev() right now
> — finding the "proxy" device (the upstream PCIe bridge or whatever), and
> then looking through the ACPI DMAR table to find which DMAR unit that's
> attached to. Then we stash that information (dmar, devfn) in
> dev->archdata, and get_domain_for_dev() still has *some* work to do,
> actually allocating a logical domain on the IOMMU in question, but not
> as much. And refcount the damn domain instead of playing the horrid
> tricks we currently do to hang it off the upstream proxy device *too*.

Intel IOMMU driver has an ->add_device() method already,   .add_device	= intel_iommu_add_device,
this method was used to update iommu group info. Since Intel IOMMU driver has
its own notifier, so maybe it's a nice candidate to do something.
Currently, dmar driver parse DMAR table and find the pci device id under a specific
DRHD. But only save the device pci_dev * pointer in devices array. So if this pci device
was removed, this info became stale info. In the last version patch, I use pci device id intead
of pci_dev * pointer array completely. This maybe introduce some unsafe issues. Because
pci device maybe destroyed during process device dma mapping etc.

So, I have rework the patch and try to save pci device id as well as pci_dev *pointer, like:

struct dmar_device {
   u16 segment;
   u8 bus;
   u8 devfn;  ----------->these tree will be used only when pci device add or remove, we will use them to update pci_dev * pointer in intel iommu driver notifier.
   struct list_head list;   -->add to DRHD device list.
   struct pci_dev *pdev;   --->use to hold the pci device
}

What do you think about ?

In this new patch, we won't change the Intel iommu driver much, just enhance Intel driver iommu
notifier to make DRHD device list always effect, not stale info.

I will send out this new patch soon.

> 
> My main concern here is that the DMAR table contains the PCI bus numbers
> at boot time. Doing the lookup later will only work if we don't renumber
> busses. Or if we have a way to look things up based on the *original*
> bus number.

If we won't remove the pci device, the occupied buses won't be change, I think.
And because in the new patch, we still use pci_dev *pointer to find match DRHD, so
this is not a regression.
Since DMAR also use pci device id to identify the support device,
I have not found anything instead of device id.

In AMD IOMMU driver, it seems to use pci device id to identify drhd too, although
I just take a quickly glanced at it, maybe not correctly.

> 
> The Intel IOMMU also has a bus notifier of its own which it only uses to
> know when a driver is *detached*, so it can tear down the logical domain
> for the corresponding device. Would be nice to have the generic IOMMU
> notifier call a callback for us then too, perhaps.

Update the device info in Intel IOMMU driver is a good point.


Thanks!
Yijing.

> 
>
diff mbox

Patch

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 785675a..9aa65a3 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -65,12 +65,13 @@  static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
 }
 
 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
-					   struct pci_dev **dev, u16 segment)
+					    u16 segment, struct list_head *head)
 {
 	struct pci_bus *bus;
 	struct pci_dev *pdev = NULL;
 	struct acpi_dmar_pci_path *path;
 	int count;
+	struct dmar_device *dmar_dev;
 
 	bus = pci_find_bus(segment, scope->bus);
 	path = (struct acpi_dmar_pci_path *)(scope + 1);
@@ -100,7 +101,6 @@  static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 	if (!pdev) {
 		pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
 			segment, scope->bus, path->dev, path->fn);
-		*dev = NULL;
 		return 0;
 	}
 	if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
@@ -111,54 +111,39 @@  static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 			pci_name(pdev));
 		return -EINVAL;
 	}
-	*dev = pdev;
+
+	dmar_dev = kzalloc(sizeof(struct dmar_device), GFP_KERNEL);
+	if (!dmar_dev) {
+		pci_dev_put(pdev);
+		return -ENOMEM;
+	}
+
+	dmar_dev->segment = segment;
+	dmar_dev->bus = pdev->bus->number;
+	dmar_dev->devfn = pdev->devfn;
+	list_add_tail(&dmar_dev->list, head);
+
+	pci_dev_put(pdev);
 	return 0;
 }
 
-int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
-				struct pci_dev ***devices, u16 segment)
+int __init dmar_parse_dev_scope(void *start, void *end, u16 segment, 
+	struct list_head *head)
 {
 	struct acpi_dmar_device_scope *scope;
-	void * tmp = start;
-	int index;
 	int ret;
 
-	*cnt = 0;
-	while (start < end) {
-		scope = start;
-		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
-		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
-			(*cnt)++;
-		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
-			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
-			pr_warn("Unsupported device scope\n");
-		}
-		start += scope->length;
-	}
-	if (*cnt == 0)
-		return 0;
-
-	*devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
-	if (!*devices)
-		return -ENOMEM;
-
-	start = tmp;
-	index = 0;
 	while (start < end) {
 		scope = start;
 		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
-			ret = dmar_parse_one_dev_scope(scope,
-				&(*devices)[index], segment);
-			if (ret) {
-				kfree(*devices);
+			ret = dmar_parse_one_dev_scope(scope, segment, head);
+			if (ret)
 				return ret;
-			}
-			index ++;
 		}
 		start += scope->length;
 	}
-
+    
 	return 0;
 }
 
@@ -183,6 +168,7 @@  dmar_parse_one_drhd(struct acpi_dmar_header *header)
 	dmaru->reg_base_addr = drhd->address;
 	dmaru->segment = drhd->segment;
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
+	INIT_LIST_HEAD(&dmaru->head);
 
 	ret = alloc_iommu(dmaru);
 	if (ret) {
@@ -193,6 +179,19 @@  dmar_parse_one_drhd(struct acpi_dmar_header *header)
 	return 0;
 }
 
+static void drhd_free(struct dmar_drhd_unit *dmaru)
+{
+	struct dmar_device *dev, *tmp;
+
+	list_for_each_entry_safe(dev, tmp, &dmaru->head,
+	    list) {
+		list_del(&dev->list);
+		kfree(dev);
+	}
+
+	kfree(dmaru);
+}
+
 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 {
 	struct acpi_dmar_hardware_unit *drhd;
@@ -205,11 +204,10 @@  static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 
 	ret = dmar_parse_dev_scope((void *)(drhd + 1),
 				((void *)drhd) + drhd->header.length,
-				&dmaru->devices_cnt, &dmaru->devices,
-				drhd->segment);
+				drhd->segment, &dmaru->head);
 	if (ret) {
 		list_del(&dmaru->list);
-		kfree(dmaru);
+		drhd_free(dmaru);
 	}
 	return ret;
 }
@@ -378,16 +376,18 @@  parse_dmar_table(void)
 	return ret;
 }
 
-static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
-			  struct pci_dev *dev)
+static int dmar_pci_device_match(struct pci_dev *dev, 
+	struct list_head *head)
 {
-	int index;
+	struct dmar_device *dmar_dev;
 
 	while (dev) {
-		for (index = 0; index < cnt; index++)
-			if (dev == devices[index])
-				return 1;
-
+		list_for_each_entry(dmar_dev, head, list)
+		    if (dmar_dev->segment == pci_domain_nr(dev->bus)
+			    && dmar_dev->bus == dev->bus->number
+			    && dmar_dev->devfn == dev->devfn)
+			return 1;
+		
 		/* Check our parent */
 		dev = dev->bus->self;
 	}
@@ -412,8 +412,7 @@  dmar_find_matched_drhd_unit(struct pci_dev *dev)
 		    drhd->segment == pci_domain_nr(dev->bus))
 			return dmaru;
 
-		if (dmar_pci_device_match(dmaru->devices,
-					  dmaru->devices_cnt, dev))
+		if (dmar_pci_device_match(dev, &dmaru->head))
 			return dmaru;
 	}
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 15e9b57..b33fe0e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -650,7 +650,8 @@  static void domain_update_iommu_cap(struct dmar_domain *domain)
 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
 {
 	struct dmar_drhd_unit *drhd = NULL;
-	int i;
+	struct dmar_device *dmar_dev;
+	struct pci_dev *pdev;
 
 	for_each_drhd_unit(drhd) {
 		if (drhd->ignored)
@@ -658,16 +659,22 @@  static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
 		if (segment != drhd->segment)
 			continue;
 
-		for (i = 0; i < drhd->devices_cnt; i++) {
-			if (drhd->devices[i] &&
-			    drhd->devices[i]->bus->number == bus &&
-			    drhd->devices[i]->devfn == devfn)
-				return drhd->iommu;
-			if (drhd->devices[i] &&
-			    drhd->devices[i]->subordinate &&
-			    drhd->devices[i]->subordinate->number <= bus &&
-			    drhd->devices[i]->subordinate->busn_res.end >= bus)
-				return drhd->iommu;
+		list_for_each_entry(dmar_dev, &drhd->head, list) {
+		    if (dmar_dev->bus == bus && 
+			    dmar_dev->devfn == devfn)
+			return drhd->iommu;
+
+		    pdev = pci_get_domain_bus_and_slot(dmar_dev->segment, 
+			    dmar_dev->bus, dmar_dev->devfn);
+		    if (pdev->subordinate && 
+			    pdev->subordinate->number <= bus &&
+			    pdev->subordinate->busn_res.end >= bus) {
+			pci_dev_put(pdev);
+			return drhd->iommu;
+		    }
+
+		    if (pdev)
+			pci_dev_put(pdev);
 		}
 
 		if (drhd->include_all)
@@ -2331,18 +2338,20 @@  static int domain_add_dev_info(struct dmar_domain *domain,
 static bool device_has_rmrr(struct pci_dev *dev)
 {
 	struct dmar_rmrr_unit *rmrr;
-	int i;
+	struct dmar_device *dmar_dev;
 
 	for_each_rmrr_units(rmrr) {
-		for (i = 0; i < rmrr->devices_cnt; i++) {
-			/*
-			 * Return TRUE if this RMRR contains the device that
-			 * is passed in.
-			 */
-			if (rmrr->devices[i] == dev)
-				return true;
-		}
+	    list_for_each_entry(dmar_dev, &rmrr->head, list)
+		/*
+		 * Return TRUE if this RMRR contains the device that
+		 * is passed in.
+		 */
+	if (dmar_dev->segment == pci_domain_nr(dev->bus) && 
+			dmar_dev->bus == dev->bus->number && 
+			dmar_dev->devfn == dev->devfn)
+		return true;
 	}
+	
 	return false;
 }
 
@@ -2451,7 +2460,7 @@  static int __init init_dmars(void)
 	struct dmar_rmrr_unit *rmrr;
 	struct pci_dev *pdev;
 	struct intel_iommu *iommu;
-	int i, ret;
+	int ret;
 
 	/*
 	 * for each drhd
@@ -2605,8 +2614,10 @@  static int __init init_dmars(void)
 	 */
 	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
 	for_each_rmrr_units(rmrr) {
-		for (i = 0; i < rmrr->devices_cnt; i++) {
-			pdev = rmrr->devices[i];
+		struct dmar_device *dmar_dev;
+	    list_for_each_entry(dmar_dev, &rmrr->head, list) {
+			pdev = pci_get_domain_bus_and_slot(dmar_dev->segment, 
+					dmar_dev->bus, dmar_dev->devfn);
 			/*
 			 * some BIOS lists non-exist devices in DMAR
 			 * table.
@@ -2615,9 +2626,11 @@  static int __init init_dmars(void)
 				continue;
 			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
 			if (ret)
-				printk(KERN_ERR
-				       "IOMMU: mapping reserved region failed\n");
-		}
+				printk(KERN_ERR 
+					"IOMMU: mapping reserved region failed\n");
+		
+			pci_dev_put(pdev);
+	    }
 	}
 
 	iommu_prepare_isa();
@@ -3301,30 +3314,30 @@  DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir
 static void __init init_no_remapping_devices(void)
 {
 	struct dmar_drhd_unit *drhd;
+	struct dmar_device *dmar_dev;
+	struct pci_dev *pdev = NULL;
 
-	for_each_drhd_unit(drhd) {
-		if (!drhd->include_all) {
-			int i;
-			for (i = 0; i < drhd->devices_cnt; i++)
-				if (drhd->devices[i] != NULL)
-					break;
+	for_each_drhd_unit(drhd) 
+		if (!drhd->include_all) 
 			/* ignore DMAR unit if no pci devices exist */
-			if (i == drhd->devices_cnt)
+			if (list_empty(&drhd->head))
 				drhd->ignored = 1;
-		}
-	}
-
+	
 	for_each_drhd_unit(drhd) {
-		int i;
 		if (drhd->ignored || drhd->include_all)
 			continue;
 
-		for (i = 0; i < drhd->devices_cnt; i++)
-			if (drhd->devices[i] &&
-			    !IS_GFX_DEVICE(drhd->devices[i]))
+		list_for_each_entry(dmar_dev, &drhd->head, list) {
+			pdev = pci_get_domain_bus_and_slot(dmar_dev->segment,
+				dmar_dev->bus, dmar_dev->devfn);
+			if (!IS_GFX_DEVICE(pdev)) {
+				pci_dev_put(pdev);
 				break;
+			}
+			pci_dev_put(pdev);
+		}
 
-		if (i < drhd->devices_cnt)
+		if (!IS_GFX_DEVICE(pdev))
 			continue;
 
 		/* This IOMMU has *only* gfx devices. Either bypass it or
@@ -3333,10 +3346,15 @@  static void __init init_no_remapping_devices(void)
 			intel_iommu_gfx_mapped = 1;
 		} else {
 			drhd->ignored = 1;
-			for (i = 0; i < drhd->devices_cnt; i++) {
-				if (!drhd->devices[i])
+			list_for_each_entry(dmar_dev, &drhd->head, list) {
+				pdev = pci_get_domain_bus_and_slot(
+						dmar_dev->segment, 
+						dmar_dev->bus, 
+						dmar_dev->devfn);
+				if (!pdev)
 					continue;
-				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+				pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+				pci_dev_put(pdev);
 			}
 		}
 	}
@@ -3501,11 +3519,25 @@  int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 	rmrr = (struct acpi_dmar_reserved_memory *)header;
 	rmrru->base_address = rmrr->base_address;
 	rmrru->end_address = rmrr->end_address;
+	INIT_LIST_HEAD(&rmrru->head);
 
 	dmar_register_rmrr_unit(rmrru);
 	return 0;
 }
 
+static void rmrr_free(struct dmar_rmrr_unit *rmrru) 
+{
+	struct dmar_device *dev, *tmp;
+
+	list_for_each_entry_safe(dev, tmp, &rmrru->head,
+	    list) {
+		list_del(&dev->list);
+		kfree(dev);
+	}
+    
+	kfree(rmrru);
+}
+
 static int __init
 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
 {
@@ -3515,11 +3547,11 @@  rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
 	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
 	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
 		((void *)rmrr) + rmrr->header.length,
-		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
+		rmrr->segment, &rmrru->head);
 
-	if (ret || (rmrru->devices_cnt == 0)) {
+	if (ret || list_empty(&rmrru->head)) {
 		list_del(&rmrru->list);
-		kfree(rmrru);
+		rmrr_free(rmrru);
 	}
 	return ret;
 }
@@ -3538,12 +3570,25 @@  int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
 
 	atsru->hdr = hdr;
 	atsru->include_all = atsr->flags & 0x1;
+	INIT_LIST_HEAD(&atsru->head);
 
 	list_add(&atsru->list, &dmar_atsr_units);
 
 	return 0;
 }
 
+static void atsr_free(struct dmar_atsr_unit *atsr) 
+{
+	struct dmar_device *dev, *tmp;
+    
+	list_for_each_entry_safe(dev, tmp, &atsr->head, list) {
+		list_del(&dev->list);
+		kfree(dev);
+	}
+
+	kfree(atsr);
+}
+
 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
 {
 	int rc;
@@ -3555,11 +3600,10 @@  static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
 	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
 	rc = dmar_parse_dev_scope((void *)(atsr + 1),
 				(void *)atsr + atsr->header.length,
-				&atsru->devices_cnt, &atsru->devices,
-				atsr->segment);
-	if (rc || !atsru->devices_cnt) {
+				atsr->segment, &atsru->head);
+	if (rc || list_empty(&atsru->head)) {
 		list_del(&atsru->list);
-		kfree(atsru);
+		atsr_free(atsru);
 	}
 
 	return rc;
@@ -3567,7 +3611,6 @@  static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
 
 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
 {
-	int i;
 	struct pci_bus *bus;
 	struct acpi_dmar_atsr *atsr;
 	struct dmar_atsr_unit *atsru;
@@ -3584,6 +3627,7 @@  int dmar_find_matched_atsr_unit(struct pci_dev *dev)
 
 found:
 	for (bus = dev->bus; bus; bus = bus->parent) {
+		struct dmar_device *dmar_dev;
 		struct pci_dev *bridge = bus->self;
 
 		if (!bridge || !pci_is_pcie(bridge) ||
@@ -3591,8 +3635,11 @@  found:
 			return 0;
 
 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
-			for (i = 0; i < atsru->devices_cnt; i++)
-				if (atsru->devices[i] == bridge)
+			list_for_each_entry(dmar_dev, &atsru->head, list)
+				if (dmar_dev->segment == 
+					pci_domain_nr(bridge->bus) && 
+					dmar_dev->bus == bridge->bus->number &&
+					dmar_dev->devfn == bridge->devfn)
 					return 1;
 			break;
 		}
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index b029d1a..5317cb0 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -32,6 +32,13 @@  struct acpi_dmar_header;
 #define DMAR_INTR_REMAP		0x1
 #define DMAR_X2APIC_OPT_OUT	0x2
 
+struct dmar_device {
+	struct list_head list;
+	u8 segment;
+	u8 bus;
+	u8 devfn;
+};
+
 struct intel_iommu;
 #ifdef CONFIG_DMAR_TABLE
 extern struct acpi_table_header *dmar_tbl;
@@ -39,8 +46,7 @@  struct dmar_drhd_unit {
 	struct list_head list;		/* list of drhd units	*/
 	struct  acpi_dmar_header *hdr;	/* ACPI header		*/
 	u64	reg_base_addr;		/* register base address*/
-	struct	pci_dev **devices; 	/* target device array	*/
-	int	devices_cnt;		/* target device count	*/
+	struct list_head head;	/* target devices' list */
 	u16	segment;		/* PCI domain		*/
 	u8	ignored:1; 		/* ignore drhd		*/
 	u8	include_all:1;
@@ -139,8 +145,7 @@  struct dmar_rmrr_unit {
 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
 	u64	base_address;		/* reserved base address*/
 	u64	end_address;		/* reserved end address */
-	struct pci_dev **devices;	/* target devices */
-	int	devices_cnt;		/* target device count */
+	struct list_head head;	/* target devices' list */
 };
 
 #define for_each_rmrr_units(rmrr) \
@@ -149,16 +154,15 @@  struct dmar_rmrr_unit {
 struct dmar_atsr_unit {
 	struct list_head list;		/* list of ATSR units */
 	struct acpi_dmar_header *hdr;	/* ACPI header */
-	struct pci_dev **devices;	/* target devices */
-	int devices_cnt;		/* target device count */
 	u8 include_all:1;		/* include all ports */
+	struct list_head head;	/* target devices' list */
 };
 
 int dmar_parse_rmrr_atsr_dev(void);
 extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header);
 extern int dmar_parse_one_atsr(struct acpi_dmar_header *header);
-extern int dmar_parse_dev_scope(void *start, void *end, int *cnt,
-				struct pci_dev ***devices, u16 segment);
+extern int dmar_parse_dev_scope(void *start, void *end, u16 segment, 
+				struct list_head *head);
 extern int intel_iommu_init(void);
 #else /* !CONFIG_INTEL_IOMMU: */
 static inline int intel_iommu_init(void) { return -ENODEV; }