diff mbox series

[kernel,v2] irq: Add reference counting to IRQ mappings

Message ID 20201029110141.94304-1-aik@ozlabs.ru
State Not Applicable
Headers show
Series [kernel,v2] irq: Add reference counting to IRQ mappings | expand

Checks

Context Check Description
snowpatch_ozlabs/needsstable success Patch has no Fixes tags
snowpatch_ozlabs/checkpatch success
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (8cb17737940b156329cb5210669b9c9b23f4dd56)

Commit Message

Alexey Kardashevskiy Oct. 29, 2020, 11:01 a.m. UTC
PCI devices share 4 legacy INTx interrupts from the same PCI host bridge.
Device drivers map/unmap hardware interrupts via irq_create_mapping()/
irq_dispose_mapping(). The problem with that these interrupts are
shared and when performing hot unplug, we need to unmap the interrupt
only when the last device is released.

This reuses already existing irq_desc::kobj for this purpose.
The refcounter is naturally 1 when the descriptor is allocated already;
this adds kobject_get() in places where already existing mapped virq
is returned.

This reorganizes irq_dispose_mapping() to release the kobj and let
the release callback do the cleanup.

Quick grep shows no sign of irq reference counting in drivers. Drivers
typically request mapping when probing and dispose it when removing;
platforms tend to dispose only if setup failed and the rest seems
calling one dispose per one mapping. Except (at least) PPC/pseries
which needs https://lkml.org/lkml/2020/10/27/259

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---

What is the easiest way to get irq-hierarchical hardware?
I have a bunch of powerpc boxes (no good) but also a raspberry pi,
a bunch of 32/64bit orange pi's, an "armada" arm box,
thinkpads - is any of this good for the task?


---
Changes:
v2:
* added more get/put, including irq_domain_associate/irq_domain_disassociate
---
 kernel/irq/irqdesc.c   | 36 ++++++++++++++++++++-----------
 kernel/irq/irqdomain.c | 49 +++++++++++++++++++++++++++++-------------
 2 files changed, 58 insertions(+), 27 deletions(-)

Comments

Alexey Kardashevskiy Nov. 6, 2020, 3:06 a.m. UTC | #1
Hi,

This one seems to be broken in the domain associating part so please 
ignore it, I'll post v3 soon. Thanks,


On 29/10/2020 22:01, Alexey Kardashevskiy wrote:
> PCI devices share 4 legacy INTx interrupts from the same PCI host bridge.
> Device drivers map/unmap hardware interrupts via irq_create_mapping()/
> irq_dispose_mapping(). The problem with that these interrupts are
> shared and when performing hot unplug, we need to unmap the interrupt
> only when the last device is released.
> 
> This reuses already existing irq_desc::kobj for this purpose.
> The refcounter is naturally 1 when the descriptor is allocated already;
> this adds kobject_get() in places where already existing mapped virq
> is returned.
> 
> This reorganizes irq_dispose_mapping() to release the kobj and let
> the release callback do the cleanup.
> 
> Quick grep shows no sign of irq reference counting in drivers. Drivers
> typically request mapping when probing and dispose it when removing;
> platforms tend to dispose only if setup failed and the rest seems
> calling one dispose per one mapping. Except (at least) PPC/pseries
> which needs https://lkml.org/lkml/2020/10/27/259
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
> 
> What is the easiest way to get irq-hierarchical hardware?
> I have a bunch of powerpc boxes (no good) but also a raspberry pi,
> a bunch of 32/64bit orange pi's, an "armada" arm box,
> thinkpads - is any of this good for the task?
> 
> 
> ---
> Changes:
> v2:
> * added more get/put, including irq_domain_associate/irq_domain_disassociate
> ---
>   kernel/irq/irqdesc.c   | 36 ++++++++++++++++++++-----------
>   kernel/irq/irqdomain.c | 49 +++++++++++++++++++++++++++++-------------
>   2 files changed, 58 insertions(+), 27 deletions(-)
> 
> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
> index 1a7723604399..bc8f62157ffa 100644
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -419,20 +419,40 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
>   	return NULL;
>   }
>   
> +static void delayed_free_desc(struct rcu_head *rhp);
>   static void irq_kobj_release(struct kobject *kobj)
>   {
>   	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
> +#ifdef CONFIG_IRQ_DOMAIN
> +	struct irq_domain *domain;
> +	unsigned int virq = desc->irq_data.irq;
>   
> -	free_masks(desc);
> -	free_percpu(desc->kstat_irqs);
> -	kfree(desc);
> +	domain = desc->irq_data.domain;
> +	if (domain) {
> +		if (irq_domain_is_hierarchy(domain)) {
> +			irq_domain_free_irqs(virq, 1);
> +		} else {
> +			irq_domain_disassociate(domain, virq);
> +			irq_free_desc(virq);
> +		}
> +	}
> +#endif
> +	/*
> +	 * We free the descriptor, masks and stat fields via RCU. That
> +	 * allows demultiplex interrupts to do rcu based management of
> +	 * the child interrupts.
> +	 * This also allows us to use rcu in kstat_irqs_usr().
> +	 */
> +	call_rcu(&desc->rcu, delayed_free_desc);
>   }
>   
>   static void delayed_free_desc(struct rcu_head *rhp)
>   {
>   	struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
>   
> -	kobject_put(&desc->kobj);
> +	free_masks(desc);
> +	free_percpu(desc->kstat_irqs);
> +	kfree(desc);
>   }
>   
>   static void free_desc(unsigned int irq)
> @@ -453,14 +473,6 @@ static void free_desc(unsigned int irq)
>   	 */
>   	irq_sysfs_del(desc);
>   	delete_irq_desc(irq);
> -
> -	/*
> -	 * We free the descriptor, masks and stat fields via RCU. That
> -	 * allows demultiplex interrupts to do rcu based management of
> -	 * the child interrupts.
> -	 * This also allows us to use rcu in kstat_irqs_usr().
> -	 */
> -	call_rcu(&desc->rcu, delayed_free_desc);
>   }
>   
>   static int alloc_descs(unsigned int start, unsigned int cnt, int node,
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index cf8b374b892d..5fb060e077e3 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -487,6 +487,7 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
>   
>   void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
>   {
> +	struct irq_desc *desc = irq_to_desc(irq);
>   	struct irq_data *irq_data = irq_get_irq_data(irq);
>   	irq_hw_number_t hwirq;
>   
> @@ -514,11 +515,14 @@ void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
>   
>   	/* Clear reverse map for this hwirq */
>   	irq_domain_clear_mapping(domain, hwirq);
> +
> +	kobject_put(&desc->kobj);
>   }
>   
>   int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
>   			 irq_hw_number_t hwirq)
>   {
> +	struct irq_desc *desc = irq_to_desc(virq);
>   	struct irq_data *irq_data = irq_get_irq_data(virq);
>   	int ret;
>   
> @@ -530,6 +534,8 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
>   	if (WARN(irq_data->domain, "error: virq%i is already associated", virq))
>   		return -EINVAL;
>   
> +	kobject_get(&desc->kobj);
> +
>   	mutex_lock(&irq_domain_mutex);
>   	irq_data->hwirq = hwirq;
>   	irq_data->domain = domain;
> @@ -548,6 +554,7 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
>   			irq_data->domain = NULL;
>   			irq_data->hwirq = 0;
>   			mutex_unlock(&irq_domain_mutex);
> +			kobject_put(&desc->kobj);
>   			return ret;
>   		}
>   
> @@ -638,6 +645,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
>   {
>   	struct device_node *of_node;
>   	int virq;
> +	struct irq_desc *desc;
>   
>   	pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
>   
> @@ -655,7 +663,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
>   	/* Check if mapping already exists */
>   	virq = irq_find_mapping(domain, hwirq);
>   	if (virq) {
> +		desc = irq_to_desc(virq);
>   		pr_debug("-> existing mapping on virq %d\n", virq);
> +		kobject_get(&desc->kobj);
>   		return virq;
>   	}
>   
> @@ -674,6 +684,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
>   	pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
>   		hwirq, of_node_full_name(of_node), virq);
>   
> +	desc = irq_to_desc(virq);
>   	return virq;
>   }
>   EXPORT_SYMBOL_GPL(irq_create_mapping);
> @@ -751,6 +762,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
>   	irq_hw_number_t hwirq;
>   	unsigned int type = IRQ_TYPE_NONE;
>   	int virq;
> +	struct irq_desc *desc;
>   
>   	if (fwspec->fwnode) {
>   		domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED);
> @@ -787,8 +799,15 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
>   		 * current trigger type then we are done so return the
>   		 * interrupt number.
>   		 */
> -		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
> +		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) {
> +			desc = irq_to_desc(virq);
> +			kobject_get(&desc->kobj);
> +
> +			pr_err("___K___ (%u) %s %u: virq %d counter %d\n",
> +				smp_processor_id(),
> +			       __func__, __LINE__, virq, kref_read(&desc->kobj.kref));
>   			return virq;
> +		}
>   
>   		/*
>   		 * If the trigger type has not been set yet, then set
> @@ -800,6 +819,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
>   				return 0;
>   
>   			irqd_set_trigger_type(irq_data, type);
> +			desc = irq_to_desc(virq);
> +			kobject_get(&desc->kobj);
>   			return virq;
>   		}
>   
> @@ -852,22 +873,12 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping);
>    */
>   void irq_dispose_mapping(unsigned int virq)
>   {
> -	struct irq_data *irq_data = irq_get_irq_data(virq);
> -	struct irq_domain *domain;
> +	struct irq_desc *desc = irq_to_desc(virq);
>   
> -	if (!virq || !irq_data)
> +	if (!virq || !desc)
>   		return;
>   
> -	domain = irq_data->domain;
> -	if (WARN_ON(domain == NULL))
> -		return;
> -
> -	if (irq_domain_is_hierarchy(domain)) {
> -		irq_domain_free_irqs(virq, 1);
> -	} else {
> -		irq_domain_disassociate(domain, virq);
> -		irq_free_desc(virq);
> -	}
> +	kobject_put(&desc->kobj);
>   }
>   EXPORT_SYMBOL_GPL(irq_dispose_mapping);
>   
> @@ -1413,6 +1424,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
>   			    bool realloc, const struct irq_affinity_desc *affinity)
>   {
>   	int i, ret, virq;
> +	bool get_ref = false;
>   
>   	if (domain == NULL) {
>   		domain = irq_default_domain;
> @@ -1422,6 +1434,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
>   
>   	if (realloc && irq_base >= 0) {
>   		virq = irq_base;
> +		get_ref = true;
>   	} else {
>   		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
>   					      affinity);
> @@ -1453,8 +1466,14 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
>   		}
>   	}
>   	
> -	for (i = 0; i < nr_irqs; i++)
> +	for (i = 0; i < nr_irqs; i++) {
>   		irq_domain_insert_irq(virq + i);
> +		if (get_ref) {
> +			struct irq_desc *desc = irq_to_desc(virq + i);
> +
> +			kobject_get(&desc->kobj);
> +		}
> +	}
>   	mutex_unlock(&irq_domain_mutex);
>   
>   	return virq;
>
Thomas Gleixner Nov. 8, 2020, 11:07 p.m. UTC | #2
On Fri, Nov 06 2020 at 14:06, Alexey Kardashevskiy wrote:
> Hi,
>
> This one seems to be broken in the domain associating part so please 
> ignore it, I'll post v3 soon. Thanks,

When you do that please use a proper subject line:

 [ PATCH vN ] $subsystem: Shortlog

and to find the subsystem string just run git log kernel/irq/irqdomain.c
for hints.

Thanks,

        tglx
diff mbox series

Patch

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1a7723604399..bc8f62157ffa 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -419,20 +419,40 @@  static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 	return NULL;
 }
 
+static void delayed_free_desc(struct rcu_head *rhp);
 static void irq_kobj_release(struct kobject *kobj)
 {
 	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+#ifdef CONFIG_IRQ_DOMAIN
+	struct irq_domain *domain;
+	unsigned int virq = desc->irq_data.irq;
 
-	free_masks(desc);
-	free_percpu(desc->kstat_irqs);
-	kfree(desc);
+	domain = desc->irq_data.domain;
+	if (domain) {
+		if (irq_domain_is_hierarchy(domain)) {
+			irq_domain_free_irqs(virq, 1);
+		} else {
+			irq_domain_disassociate(domain, virq);
+			irq_free_desc(virq);
+		}
+	}
+#endif
+	/*
+	 * We free the descriptor, masks and stat fields via RCU. That
+	 * allows demultiplex interrupts to do rcu based management of
+	 * the child interrupts.
+	 * This also allows us to use rcu in kstat_irqs_usr().
+	 */
+	call_rcu(&desc->rcu, delayed_free_desc);
 }
 
 static void delayed_free_desc(struct rcu_head *rhp)
 {
 	struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
 
-	kobject_put(&desc->kobj);
+	free_masks(desc);
+	free_percpu(desc->kstat_irqs);
+	kfree(desc);
 }
 
 static void free_desc(unsigned int irq)
@@ -453,14 +473,6 @@  static void free_desc(unsigned int irq)
 	 */
 	irq_sysfs_del(desc);
 	delete_irq_desc(irq);
-
-	/*
-	 * We free the descriptor, masks and stat fields via RCU. That
-	 * allows demultiplex interrupts to do rcu based management of
-	 * the child interrupts.
-	 * This also allows us to use rcu in kstat_irqs_usr().
-	 */
-	call_rcu(&desc->rcu, delayed_free_desc);
 }
 
 static int alloc_descs(unsigned int start, unsigned int cnt, int node,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index cf8b374b892d..5fb060e077e3 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -487,6 +487,7 @@  static void irq_domain_set_mapping(struct irq_domain *domain,
 
 void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_data *irq_data = irq_get_irq_data(irq);
 	irq_hw_number_t hwirq;
 
@@ -514,11 +515,14 @@  void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
 
 	/* Clear reverse map for this hwirq */
 	irq_domain_clear_mapping(domain, hwirq);
+
+	kobject_put(&desc->kobj);
 }
 
 int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
 			 irq_hw_number_t hwirq)
 {
+	struct irq_desc *desc = irq_to_desc(virq);
 	struct irq_data *irq_data = irq_get_irq_data(virq);
 	int ret;
 
@@ -530,6 +534,8 @@  int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
 	if (WARN(irq_data->domain, "error: virq%i is already associated", virq))
 		return -EINVAL;
 
+	kobject_get(&desc->kobj);
+
 	mutex_lock(&irq_domain_mutex);
 	irq_data->hwirq = hwirq;
 	irq_data->domain = domain;
@@ -548,6 +554,7 @@  int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
 			irq_data->domain = NULL;
 			irq_data->hwirq = 0;
 			mutex_unlock(&irq_domain_mutex);
+			kobject_put(&desc->kobj);
 			return ret;
 		}
 
@@ -638,6 +645,7 @@  unsigned int irq_create_mapping(struct irq_domain *domain,
 {
 	struct device_node *of_node;
 	int virq;
+	struct irq_desc *desc;
 
 	pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
 
@@ -655,7 +663,9 @@  unsigned int irq_create_mapping(struct irq_domain *domain,
 	/* Check if mapping already exists */
 	virq = irq_find_mapping(domain, hwirq);
 	if (virq) {
+		desc = irq_to_desc(virq);
 		pr_debug("-> existing mapping on virq %d\n", virq);
+		kobject_get(&desc->kobj);
 		return virq;
 	}
 
@@ -674,6 +684,7 @@  unsigned int irq_create_mapping(struct irq_domain *domain,
 	pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
 		hwirq, of_node_full_name(of_node), virq);
 
+	desc = irq_to_desc(virq);
 	return virq;
 }
 EXPORT_SYMBOL_GPL(irq_create_mapping);
@@ -751,6 +762,7 @@  unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 	irq_hw_number_t hwirq;
 	unsigned int type = IRQ_TYPE_NONE;
 	int virq;
+	struct irq_desc *desc;
 
 	if (fwspec->fwnode) {
 		domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED);
@@ -787,8 +799,15 @@  unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 		 * current trigger type then we are done so return the
 		 * interrupt number.
 		 */
-		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
+		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) {
+			desc = irq_to_desc(virq);
+			kobject_get(&desc->kobj);
+
+			pr_err("___K___ (%u) %s %u: virq %d counter %d\n",
+				smp_processor_id(),
+			       __func__, __LINE__, virq, kref_read(&desc->kobj.kref));
 			return virq;
+		}
 
 		/*
 		 * If the trigger type has not been set yet, then set
@@ -800,6 +819,8 @@  unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 				return 0;
 
 			irqd_set_trigger_type(irq_data, type);
+			desc = irq_to_desc(virq);
+			kobject_get(&desc->kobj);
 			return virq;
 		}
 
@@ -852,22 +873,12 @@  EXPORT_SYMBOL_GPL(irq_create_of_mapping);
  */
 void irq_dispose_mapping(unsigned int virq)
 {
-	struct irq_data *irq_data = irq_get_irq_data(virq);
-	struct irq_domain *domain;
+	struct irq_desc *desc = irq_to_desc(virq);
 
-	if (!virq || !irq_data)
+	if (!virq || !desc)
 		return;
 
-	domain = irq_data->domain;
-	if (WARN_ON(domain == NULL))
-		return;
-
-	if (irq_domain_is_hierarchy(domain)) {
-		irq_domain_free_irqs(virq, 1);
-	} else {
-		irq_domain_disassociate(domain, virq);
-		irq_free_desc(virq);
-	}
+	kobject_put(&desc->kobj);
 }
 EXPORT_SYMBOL_GPL(irq_dispose_mapping);
 
@@ -1413,6 +1424,7 @@  int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 			    bool realloc, const struct irq_affinity_desc *affinity)
 {
 	int i, ret, virq;
+	bool get_ref = false;
 
 	if (domain == NULL) {
 		domain = irq_default_domain;
@@ -1422,6 +1434,7 @@  int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 
 	if (realloc && irq_base >= 0) {
 		virq = irq_base;
+		get_ref = true;
 	} else {
 		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
 					      affinity);
@@ -1453,8 +1466,14 @@  int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 		}
 	}
 	
-	for (i = 0; i < nr_irqs; i++)
+	for (i = 0; i < nr_irqs; i++) {
 		irq_domain_insert_irq(virq + i);
+		if (get_ref) {
+			struct irq_desc *desc = irq_to_desc(virq + i);
+
+			kobject_get(&desc->kobj);
+		}
+	}
 	mutex_unlock(&irq_domain_mutex);
 
 	return virq;