diff mbox series

gpu: host1x: Syncpoint interrupt sharding

Message ID 20230901114008.672433-1-cyndis@kapsi.fi
State Accepted
Headers show
Series gpu: host1x: Syncpoint interrupt sharding | expand

Commit Message

Mikko Perttunen Sept. 1, 2023, 11:40 a.m. UTC
From: Mikko Perttunen <mperttunen@nvidia.com>

Support sharded syncpoint interrupts on Tegra234+. This feature
allows specifying one of eight interrupt lines for each syncpoint
to lower processing latency of syncpoint threshold
interrupts.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
 drivers/gpu/host1x/dev.c        | 28 +++++++++++++++++---
 drivers/gpu/host1x/dev.h        |  3 ++-
 drivers/gpu/host1x/hw/intr_hw.c | 46 ++++++++++++++++++++++++---------
 3 files changed, 60 insertions(+), 17 deletions(-)

Comments

Thierry Reding Oct. 11, 2023, 8:44 p.m. UTC | #1
On Fri, Sep 01, 2023 at 02:40:07PM +0300, Mikko Perttunen wrote:
> From: Mikko Perttunen <mperttunen@nvidia.com>
> 
> Support sharded syncpoint interrupts on Tegra234+. This feature
> allows specifying one of eight interrupt lines for each syncpoint
> to lower processing latency of syncpoint threshold
> interrupts.
> 
> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
> ---
>  drivers/gpu/host1x/dev.c        | 28 +++++++++++++++++---
>  drivers/gpu/host1x/dev.h        |  3 ++-
>  drivers/gpu/host1x/hw/intr_hw.c | 46 ++++++++++++++++++++++++---------
>  3 files changed, 60 insertions(+), 17 deletions(-)

Applied, thanks.

Thierry
diff mbox series

Patch

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 7c6699aed7d2..b22821c81394 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -488,7 +488,7 @@  static int host1x_get_resets(struct host1x *host)
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
-	int err;
+	int err, i;
 
 	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
 	if (!host)
@@ -516,9 +516,29 @@  static int host1x_probe(struct platform_device *pdev)
 			return PTR_ERR(host->regs);
 	}
 
-	host->syncpt_irq = platform_get_irq(pdev, 0);
-	if (host->syncpt_irq < 0)
-		return host->syncpt_irq;
+	for (i = 0; i < ARRAY_SIZE(host->syncpt_irqs); i++) {
+		char irq_name[] = "syncptX";
+		sprintf(irq_name, "syncpt%d", i);
+
+		err = platform_get_irq_byname_optional(pdev, irq_name);
+		if (err == -ENXIO)
+			break;
+		if (err < 0)
+			return err;
+
+		host->syncpt_irqs[i] = err;
+	}
+
+	host->num_syncpt_irqs = i;
+
+	/* Device tree without irq names */
+	if (i == 0) {
+		host->syncpt_irqs[0] = platform_get_irq(pdev, 0);
+		if (host->syncpt_irqs[0] < 0)
+			return host->syncpt_irqs[0];
+
+		host->num_syncpt_irqs = 1;
+	}
 
 	mutex_init(&host->devices_lock);
 	INIT_LIST_HEAD(&host->devices);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 75de50fe03d0..c8e302de7625 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -124,7 +124,8 @@  struct host1x {
 	void __iomem *regs;
 	void __iomem *hv_regs; /* hypervisor region */
 	void __iomem *common_regs;
-	int syncpt_irq;
+	int syncpt_irqs[8];
+	int num_syncpt_irqs;
 	struct host1x_syncpt *syncpt;
 	struct host1x_syncpt_base *bases;
 	struct device *dev;
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index b915ef7d0348..9880e0c47235 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -13,13 +13,20 @@ 
 #include "../intr.h"
 #include "../dev.h"
 
+struct host1x_intr_irq_data {
+	struct host1x *host;
+	u32 offset;
+};
+
 static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 {
-	struct host1x *host = dev_id;
+	struct host1x_intr_irq_data *irq_data = dev_id;
+	struct host1x *host = irq_data->host;
 	unsigned long reg;
 	unsigned int i, id;
 
-	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
+	for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32);
+	     i += host->num_syncpt_irqs) {
 		reg = host1x_sync_readl(host,
 			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
 
@@ -67,26 +74,41 @@  static void intr_hw_init(struct host1x *host, u32 cpm)
 
 	/*
 	 * Program threshold interrupt destination among 8 lines per VM,
-	 * per syncpoint. For now, just direct all to the first interrupt
-	 * line.
+	 * per syncpoint. For each group of 32 syncpoints (corresponding to one
+	 * interrupt status register), direct to one interrupt line, going
+	 * around in a round robin fashion.
 	 */
-	for (id = 0; id < host->info->nb_pts; id++)
-		host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+	for (id = 0; id < host->info->nb_pts; id++) {
+		u32 reg_offset = id / 32;
+		u32 irq_index = reg_offset % host->num_syncpt_irqs;
+
+		host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+	}
 #endif
 }
 
 static int
 host1x_intr_init_host_sync(struct host1x *host, u32 cpm)
 {
-	int err;
+	int err, i;
+	struct host1x_intr_irq_data *irq_data;
+
+	irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL);
+	if (!irq_data)
+		return -ENOMEM;
 
 	host1x_hw_intr_disable_all_syncpt_intrs(host);
 
-	err = devm_request_irq(host->dev, host->syncpt_irq,
-			       syncpt_thresh_isr, IRQF_SHARED,
-			       "host1x_syncpt", host);
-	if (err < 0)
-		return err;
+	for (i = 0; i < host->num_syncpt_irqs; i++) {
+		irq_data[i].host = host;
+		irq_data[i].offset = i;
+
+		err = devm_request_irq(host->dev, host->syncpt_irqs[i],
+				       syncpt_thresh_isr, IRQF_SHARED,
+				       "host1x_syncpt", &irq_data[i]);
+		if (err < 0)
+			return err;
+	}
 
 	intr_hw_init(host, cpm);