diff mbox

[PATCHv2,net-next,10/16] net: mvpp2: handle register mapping and access for PPv2.2

Message ID 1482943592-12556-11-git-send-email-thomas.petazzoni@free-electrons.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Thomas Petazzoni Dec. 28, 2016, 4:46 p.m. UTC
This commit adjusts the mvpp2 driver register mapping and access logic
to support PPv2.2, to handle a number of differences.

Due to how the registers are laid out in memory, the Device Tree binding
for the "reg" property is different:

 - On PPv2.1, we had a first area for the common registers, and then one
   area per port.

 - On PPv2.2, we have a first area for the common registers, and a
   second area for all the per-ports registers.

In addition, on PPv2.2, the area for the common registers is split into
so-called "address spaces" of 64 KB each. They allow to access the same
registers, but from different CPUs. Hence the introduction of cpu_base[]
in 'struct mvpp2', and the modification of the mvpp2_write() and
mvpp2_read() register accessors. For PPv2.1, the compatibility is
preserved by using an "address space" size of 0.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/net/ethernet/marvell/mvpp2.c | 78 +++++++++++++++++++++++++++++-------
 1 file changed, 64 insertions(+), 14 deletions(-)

Comments

Russell King (Oracle) Jan. 6, 2017, 2:46 p.m. UTC | #1
On Wed, Dec 28, 2016 at 05:46:26PM +0100, Thomas Petazzoni wrote:
> This commit adjusts the mvpp2 driver register mapping and access logic
> to support PPv2.2, to handle a number of differences.
> 
> Due to how the registers are laid out in memory, the Device Tree binding
> for the "reg" property is different:
> 
>  - On PPv2.1, we had a first area for the common registers, and then one
>    area per port.
> 
>  - On PPv2.2, we have a first area for the common registers, and a
>    second area for all the per-ports registers.
> 
> In addition, on PPv2.2, the area for the common registers is split into
> so-called "address spaces" of 64 KB each. They allow to access the same
> registers, but from different CPUs. Hence the introduction of cpu_base[]
> in 'struct mvpp2', and the modification of the mvpp2_write() and
> mvpp2_read() register accessors. For PPv2.1, the compatibility is
> preserved by using an "address space" size of 0.

I'm not entirely sure this is the best solution - every register access
will be wrapped with a preempt_disable() and preempt_enable().  At
every site, when preempt is enabled, we will end up with code to:

- get the thread info
- increment the preempt count
- access the register
- decrement the preempt count
- test resulting preempt count and branch to __preempt_schedule()

If tracing is enabled, it gets much worse, because the increment and
decrement happen out of line, and are even more expensive.

If a function is going to make several register accesses, it's going
to be much more efficient to do:

	void __iomem *base = priv->cpu_base[get_cpu()];

	...

	put_cpu();

which means we don't end up with multiple instances of the preempt code
consecutive accesses.

I think this is an example where having driver-private accessors for
readl()/writel() is far from a good idea.
Thomas Petazzoni March 2, 2017, 8:45 a.m. UTC | #2
Hello,

On Fri, 6 Jan 2017 14:46:48 +0000, Russell King - ARM Linux wrote:

> I'm not entirely sure this is the best solution - every register access
> will be wrapped with a preempt_disable() and preempt_enable().  At
> every site, when preempt is enabled, we will end up with code to:
> 
> - get the thread info
> - increment the preempt count
> - access the register
> - decrement the preempt count
> - test resulting preempt count and branch to __preempt_schedule()
> 
> If tracing is enabled, it gets much worse, because the increment and
> decrement happen out of line, and are even more expensive.
> 
> If a function is going to make several register accesses, it's going
> to be much more efficient to do:
> 
> 	void __iomem *base = priv->cpu_base[get_cpu()];
> 
> 	...
> 
> 	put_cpu();
> 
> which means we don't end up with multiple instances of the preempt code
> consecutive accesses.

In the next version, these accessors have been reworked. After getting
more details about the HW, it turned out that disabling preemption is
not at all necessary. We just have some global registers (can be
accessed through any per-CPU window) and some per-CPU registers (must
be accessed from a specific per-CPU window), with the trick that
certain sequences of register read/write must occur from the same
per-CPU window.

So we'll have mvpp2_{read,write} that read/write through the register
window of CPU0, used for all non-per CPU register accesses. And we'll
have mvpp2_percpu_{read,write}, taking an additional "cpu" argument,
used for per-CPU register accesses. Thanks to "cpu" being passed as
argument, the caller can ensure the same cpu window is used to do
several reads/writes in a row.

Best regards,

Thomas
diff mbox

Patch

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 22f7970..389cc62 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -304,6 +304,9 @@ 
 #define      MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v)	(((v) << 6) & \
 					MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK)
 
+#define MVPP22_PORT_BASE			0x30e00
+#define MVPP22_PORT_OFFSET			0x1000
+
 #define MVPP2_CAUSE_TXQ_SENT_DESC_ALL_MASK	0xff
 
 /* Descriptor ring Macros */
@@ -631,6 +634,11 @@  enum mvpp2_prs_l3_cast {
  */
 #define MVPP2_BM_SHORT_PKT_SIZE		MVPP2_RX_MAX_PKT_SIZE(512)
 
+#define MVPP21_ADDR_SPACE_SZ		0
+#define MVPP22_ADDR_SPACE_SZ		SZ_64K
+
+#define MVPP2_MAX_CPUS			4
+
 enum mvpp2_bm_type {
 	MVPP2_BM_FREE,
 	MVPP2_BM_SWF_LONG,
@@ -644,6 +652,13 @@  struct mvpp2 {
 	/* Shared registers' base addresses */
 	void __iomem *base;
 	void __iomem *lms_base;
+	void __iomem *iface_base;
+
+	/* On PPv2.2, each CPU can access the base register through a
+	 * separate address space, each 64 KB apart from each
+	 * other.
+	 */
+	void __iomem *cpu_base[MVPP2_MAX_CPUS];
 
 	/* Common clocks */
 	struct clk *pp_clk;
@@ -1021,12 +1036,21 @@  static int txq_number = MVPP2_MAX_TXQ;
 
 static void mvpp2_write(struct mvpp2 *priv, u32 offset, u32 data)
 {
-	writel(data, priv->base + offset);
+	int cpu = get_cpu();
+
+	writel(data, priv->cpu_base[cpu] + offset);
+	put_cpu();
 }
 
 static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 {
-	return readl(priv->base + offset);
+	int cpu = get_cpu();
+	u32 val;
+
+	val = readl(priv->cpu_base[cpu] + offset);
+	put_cpu();
+
+	return val;
 }
 
 static dma_addr_t mvpp2_txdesc_phys_addr_get(struct mvpp2_port *port,
@@ -6386,7 +6410,6 @@  static int mvpp2_port_probe(struct platform_device *pdev,
 	u32 id;
 	int features;
 	int phy_mode;
-	int priv_common_regs_num = 2;
 	int err, i, cpu;
 
 	dev = alloc_etherdev_mqs(sizeof(struct mvpp2_port), txq_number,
@@ -6436,12 +6459,24 @@  static int mvpp2_port_probe(struct platform_device *pdev,
 	port->phy_node = phy_node;
 	port->phy_interface = phy_mode;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM,
-				    priv_common_regs_num + id);
-	port->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(port->base)) {
-		err = PTR_ERR(port->base);
-		goto err_free_irq;
+	if (priv->hw_version == MVPP21) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 2 + id);
+		port->base = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(port->base)) {
+			err = PTR_ERR(port->base);
+			goto err_free_irq;
+		}
+	} else {
+		u32 gop_id;
+
+		if (of_property_read_u32(port_node, "gop-port-id", &gop_id)) {
+			err = -EINVAL;
+			dev_err(&pdev->dev, "missing gop-port-id value\n");
+			goto err_free_irq;
+		}
+
+		port->base = priv->iface_base + MVPP22_PORT_BASE +
+			gop_id * MVPP22_PORT_OFFSET;
 	}
 
 	/* Alloc per-cpu stats */
@@ -6674,7 +6709,7 @@  static int mvpp2_probe(struct platform_device *pdev)
 	struct device_node *port_node;
 	struct mvpp2 *priv;
 	struct resource *res;
-	int port_count, first_rxq;
+	int port_count, first_rxq, cpu;
 	int err;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(struct mvpp2), GFP_KERNEL);
@@ -6689,10 +6724,25 @@  static int mvpp2_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	priv->lms_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(priv->lms_base))
-		return PTR_ERR(priv->lms_base);
+	if (priv->hw_version == MVPP21) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		priv->lms_base = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(priv->lms_base))
+			return PTR_ERR(priv->lms_base);
+	} else {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		priv->iface_base = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(priv->iface_base))
+			return PTR_ERR(priv->iface_base);
+	}
+
+	for_each_present_cpu(cpu) {
+		u32 addr_space_sz;
+
+		addr_space_sz = (priv->hw_version == MVPP21 ?
+				 MVPP21_ADDR_SPACE_SZ : MVPP22_ADDR_SPACE_SZ);
+		priv->cpu_base[cpu] = priv->base + cpu * addr_space_sz;
+	}
 
 	priv->pp_clk = devm_clk_get(&pdev->dev, "pp_clk");
 	if (IS_ERR(priv->pp_clk))