diff mbox series

ACK/Cmnt: [PATCH 0/6][Focal] 5.4 kernel: when iommu is on crashdump fails

Message ID 01788a32-0545-00f1-8645-249238506acf@canonical.com
State New
Headers show
Series ACK/Cmnt: [PATCH 0/6][Focal] 5.4 kernel: when iommu is on crashdump fails | expand

Commit Message

Stefan Bader April 8, 2021, 9:14 a.m. UTC
On 06.04.21 19:12, Tim Gardner wrote:
> [SRU Justification]
> 
> BugLink: https://bugs.launchpad.net/bugs/1922738
> 
> When iommu is enabled crashdump fails to be collected because crash-kernel crashes.
> 
> See extended SRU justification in the bug report.

When all the detail is in the bug report, then just putting in random info which 
does not even make sense in some case, is rather unhelpful. The info that should 
be here is anything on a technical level which somehow is helping the 
reviewer(s). Whereas the SRU justification is targeted towards the SRU team 
which is less technical. So Ioanna, I think your regression analysis is great 
for kernel devs but could be slightly overwhelming for the SRU team. They also 
did update their documentation last year because "regression potential" was 
mis-understood a lot. What they would like to see is a hint on "how" things go 
wrong.

What I found with this set is that things become much clearer when looking at 
the complete delta of all 6 patches. That would also be something that is really 
helpful for a review. So ideally that would be part of the cover email. I have 
attached the diff to this email.

Looking at that it appears the main change is to move the delayed attachment 
from the function which looks a domain up to two other places which actually 
want a mapping. Overall this seems to a later stage. With that in mind the 
regression potential is whenever there is a deferred attachment (cannot say 
which case this since I did not dig much deeper here) and possibly then stack 
traces which show the two new calling functions somewhere.

For the patchset:

Acked-by: Stefan Bader <stefan.bader@canonical.com>
> 
> [Test Plan]
> Enable crashdump, cause a fault.
> 
> [Where problems could occur]
> Released in stable updates:
> linux-5.5.y
> 
> [Other Info]
> None
> 
>
diff mbox series

Patch

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 953d86ca6d2b..ebb874fe6dbb 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -738,6 +738,11 @@  static int iommu_dummy(struct device *dev)
 	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
 }
 
+static bool attach_deferred(struct device *dev)
+{
+	return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+}
+
 /**
  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
  *				 sub-hierarchy of a candidate PCI-PCI bridge
@@ -2427,31 +2432,31 @@  static void domain_remove_dev_info(struct dmar_domain *domain)
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
-/*
- * find_domain
- * Note: we use struct device->archdata.iommu stores the info
- */
 static struct dmar_domain *find_domain(struct device *dev)
 {
 	struct device_domain_info *info;
 
-	if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
-		struct iommu_domain *domain;
-
-		dev->archdata.iommu = NULL;
-		domain = iommu_get_domain_for_dev(dev);
-		if (domain)
-			intel_iommu_attach_device(domain, dev);
-	}
+	if (unlikely(attach_deferred(dev) || iommu_dummy(dev)))
+		return NULL;
 
 	/* No lock here, assumes no domain exit in normal case */
 	info = dev->archdata.iommu;
-
 	if (likely(info))
 		return info->domain;
+
 	return NULL;
 }
 
+static void do_deferred_attach(struct device *dev)
+{
+	struct iommu_domain *domain;
+
+	dev->archdata.iommu = NULL;
+	domain = iommu_get_domain_for_dev(dev);
+	if (domain)
+		intel_iommu_attach_device(domain, dev);
+}
+
 static inline struct device_domain_info *
 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
 {
@@ -2796,7 +2801,7 @@  static int identity_mapping(struct device *dev)
 	struct device_domain_info *info;
 
 	info = dev->archdata.iommu;
-	if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
+	if (info)
 		return (info->domain == si_domain);
 
 	return 0;
@@ -3467,6 +3472,9 @@  static bool iommu_need_mapping(struct device *dev)
 	if (iommu_dummy(dev))
 		return false;
 
+	if (unlikely(attach_deferred(dev)))
+		do_deferred_attach(dev);
+
 	ret = identity_mapping(dev);
 	if (ret) {
 		u64 dma_mask = *dev->dma_mask;
@@ -3830,7 +3838,11 @@  bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
 	int prot = 0;
 	int ret;
 
+	if (unlikely(attach_deferred(dev)))
+		do_deferred_attach(dev);
+
 	domain = find_domain(dev);
+
 	if (WARN_ON(dir == DMA_NONE || !domain))
 		return DMA_MAPPING_ERROR;
 
@@ -5966,7 +5978,7 @@  intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
 					   struct device *dev)
 {
-	return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+	return attach_deferred(dev);
 }
 
 /*