diff mbox series

[V1,3/4] memory: tegra: add mc-err support for T186

Message ID 1641926750-27544-4-git-send-email-amhetre@nvidia.com
State Changes Requested
Headers show
Series memory: tegra: Update mc interrupts | expand

Commit Message

Ashish Mhetre Jan. 11, 2022, 6:45 p.m. UTC
Add all mc-errors supported by T186.
Implement mc interrupt handling routine for T186.

Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
---
 drivers/memory/tegra/mc.h       |  17 +++++++
 drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)

Comments

Krzysztof Kozlowski Jan. 11, 2022, 8:56 p.m. UTC | #1
Cc in the future also Dmitry, because he was involved in these drivers
quite a lot.

On 11/01/2022 19:45, Ashish Mhetre wrote:
> Add all mc-errors supported by T186.
> Implement mc interrupt handling routine for T186.

Here and in other patches - please use Tegra186 (and similar) to be
consistent with existing upstream naming.

> 
> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
> ---
>  drivers/memory/tegra/mc.h       |  17 +++++++
>  drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 117 insertions(+)
> 
> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
> index 2d4f495..7817492 100644
> --- a/drivers/memory/tegra/mc.h
> +++ b/drivers/memory/tegra/mc.h
> @@ -44,6 +44,15 @@
>  #define MC_TIMING_CONTROL_DBG				0xf8
>  #define MC_TIMING_CONTROL				0xfc
>  
> +#define MC_ERR_VPR_STATUS				0x654
> +#define MC_ERR_VPR_ADR					0x658
> +#define MC_ERR_SEC_STATUS				0x67c
> +#define MC_ERR_SEC_ADR					0x680
> +#define MC_ERR_MTS_STATUS				0x9b0
> +#define MC_ERR_MTS_ADR					0x9b4
> +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS		0xc00
> +#define MC_ERR_GENERALIZED_CARVEOUT_ADR			0xc04
> +
>  #define MC_INT_DECERR_ROUTE_SANITY			BIT(20)
>  #define MC_INT_WCAM_ERR					BIT(19)
>  #define MC_INT_SCRUB_ECC_WR_ACK				BIT(18)
> @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops;
>  extern const char * const tegra_mc_status_names[32];
>  extern const char * const tegra_mc_error_names[8];
>  
> +struct tegra_mc_error {
> +	u32 int_bit;

Where is it used (read)?

> +	const char *msg;
> +	u32 status_reg;
> +	u32 addr_reg;

Please describe all these fields with kerneldoc.

> +	u32 addr_reg_hi;

Looks unused.

> +};
> +
>  /*
>   * These IDs are for internal use of Tegra ICC drivers. The ID numbers are
>   * chosen such that they don't conflict with the device-tree ICC node IDs.
> diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
> index 6766cc4..4f3ae71 100644
> --- a/drivers/memory/tegra/tegra186.c
> +++ b/drivers/memory/tegra/tegra186.c
> @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc)
>  	mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS);
>  }
>  
> +static const struct tegra_mc_error int_mc_errors[] = {
> +	{
> +		.int_bit = MC_INT_DECERR_EMEM,
> +		.msg = "EMEM address decode error",
> +		.status_reg = MC_ERR_STATUS,
> +		.addr_reg = MC_ERR_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_SECURITY_VIOLATION,
> +		.msg = "non secure access to secure region",
> +		.status_reg = MC_ERR_STATUS,
> +		.addr_reg = MC_ERR_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_DECERR_VPR,
> +		.msg = "MC request violates VPR requirements",
> +		.status_reg = MC_ERR_VPR_STATUS,
> +		.addr_reg = MC_ERR_VPR_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_SECERR_SEC,
> +		.msg = "MC request violated SEC carveout requirements",
> +		.status_reg = MC_ERR_SEC_STATUS,
> +		.addr_reg = MC_ERR_SEC_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_DECERR_MTS,
> +		.msg = "MTS carveout access violation",
> +		.status_reg = MC_ERR_MTS_STATUS,
> +		.addr_reg = MC_ERR_MTS_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT,
> +		.msg = "GSC access violation",
> +		.status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS,
> +		.addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR,
> +	},
> +};
> +
> +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data)
> +{
> +	struct tegra_mc *mc = data;
> +	unsigned long status;
> +	unsigned int bit;
> +
> +	status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
> +	if (!status)
> +		return IRQ_NONE;
> +
> +	for_each_set_bit(bit, &status, 32) {

Don't you need bitops.h for this?

> +		const char *error = int_mc_errors[bit].msg ?: "unknown";

int_mc_errors has size of 6, but it's index (the bit variable) can be 20
or even 32. Are you sure indices are used properly or maybe
int_mc_errors missed initializer per-index?

> +		const char *client = "unknown";
> +		const char *direction, *secure;
> +		phys_addr_t addr = 0;
> +		unsigned int i;
> +		u8 id;
> +		u32 value;

Keep order in reversed xmas tree plus name it in a meaningful way. You
read here several registers, so which one is value? Probably you meant
status?

> +
> +		value = mc_readl(mc, int_mc_errors[bit].status_reg);
> +
> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
> +		if (mc->soc->num_address_bits > 32) {
> +			addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) &
> +				MC_ERR_STATUS_ADR_HI_MASK);
> +			addr <<= 32;
> +		}
> +#endif
> +		addr |= mc_readl(mc, int_mc_errors[bit].addr_reg);
> +
> +		if (value & MC_ERR_STATUS_RW)
> +			direction = "write";
> +		else
> +			direction = "read";
> +
> +		if (value & MC_ERR_STATUS_SECURITY)
> +			secure = "secure ";
> +		else
> +			secure = "";
> +
> +		id = value & mc->soc->client_id_mask;
> +
> +		for (i = 0; i < mc->soc->num_clients; i++) {
> +			if (mc->soc->clients[i].id == id) {
> +				client = mc->soc->clients[i].name;
> +				break;
> +			}
> +		}
> +
> +		dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s\n",
> +				    client, secure, direction, &addr, error);
> +	}
> +
> +	/* clear interrupts */
> +	mc_writel(mc, status, MC_INTSTATUS);> +
> +	return IRQ_HANDLED;

I don't think you are actually handling these errors as stated in commit
message. You only log them. Please adjust the commit subject and message
to mention the actual purpose/action of error handling.

> +}
> +
>  const struct tegra_mc_interrupt_ops tegra186_mc_interrupt_ops = {
>  	.clear_interrupt = tegra186_mc_clear_interrupt,
> +	.handle_irq = tegra186_mc_handle_irq,
>  };
>  
>  const struct tegra_mc_ops tegra186_mc_ops = {
> @@ -886,6 +985,7 @@ const struct tegra_mc_soc tegra186_mc_soc = {
>  	.num_clients = ARRAY_SIZE(tegra186_mc_clients),
>  	.clients = tegra186_mc_clients,
>  	.num_address_bits = 40,
> +	.client_id_mask = 0xff,
>  	.intmask = MC_INT_WCAM_ERR | MC_INT_SCRUB_ECC_WR_ACK |
>  		   MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
>  		   MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
> 



Best regards,
Krzysztof
Dmitry Osipenko Jan. 12, 2022, 11:01 a.m. UTC | #2
11.01.2022 21:45, Ashish Mhetre пишет:
> Add all mc-errors supported by T186.
> Implement mc interrupt handling routine for T186.
> 
> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
> ---
>  drivers/memory/tegra/mc.h       |  17 +++++++
>  drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 117 insertions(+)
> 
> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
> index 2d4f495..7817492 100644
> --- a/drivers/memory/tegra/mc.h
> +++ b/drivers/memory/tegra/mc.h
> @@ -44,6 +44,15 @@
>  #define MC_TIMING_CONTROL_DBG				0xf8
>  #define MC_TIMING_CONTROL				0xfc
>  

this empty line is unnecessary

> +#define MC_ERR_VPR_STATUS				0x654
> +#define MC_ERR_VPR_ADR					0x658
> +#define MC_ERR_SEC_STATUS				0x67c
> +#define MC_ERR_SEC_ADR					0x680
> +#define MC_ERR_MTS_STATUS				0x9b0
> +#define MC_ERR_MTS_ADR					0x9b4
> +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS		0xc00
> +#define MC_ERR_GENERALIZED_CARVEOUT_ADR			0xc04
> +
>  #define MC_INT_DECERR_ROUTE_SANITY			BIT(20)
>  #define MC_INT_WCAM_ERR					BIT(19)
>  #define MC_INT_SCRUB_ECC_WR_ACK				BIT(18)
> @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops;
>  extern const char * const tegra_mc_status_names[32];
>  extern const char * const tegra_mc_error_names[8];
>  
> +struct tegra_mc_error {
> +	u32 int_bit;
> +	const char *msg;
> +	u32 status_reg;
> +	u32 addr_reg;
> +	u32 addr_reg_hi;
> +};
> +
>  /*
>   * These IDs are for internal use of Tegra ICC drivers. The ID numbers are
>   * chosen such that they don't conflict with the device-tree ICC node IDs.
> diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
> index 6766cc4..4f3ae71 100644
> --- a/drivers/memory/tegra/tegra186.c
> +++ b/drivers/memory/tegra/tegra186.c
> @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc)
>  	mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS);
>  }
>  
> +static const struct tegra_mc_error int_mc_errors[] = {
> +	{
> +		.int_bit = MC_INT_DECERR_EMEM,
> +		.msg = "EMEM address decode error",
> +		.status_reg = MC_ERR_STATUS,
> +		.addr_reg = MC_ERR_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_SECURITY_VIOLATION,
> +		.msg = "non secure access to secure region",
> +		.status_reg = MC_ERR_STATUS,
> +		.addr_reg = MC_ERR_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_DECERR_VPR,
> +		.msg = "MC request violates VPR requirements",
> +		.status_reg = MC_ERR_VPR_STATUS,
> +		.addr_reg = MC_ERR_VPR_ADR,
> +	},

I see that these VPR registers present on all SoCs starting with T124.
It doesn't look like you need the separate IRQ handlers at all, instead
please extend the common T30 handler. For example, you may add a
switch-case statements to handle those T124+ specific bits differently.

static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
{
...
	switch (bit) {
	case MC_INT_DECERR_VPR:
		status_reg = MC_ERR_VPR_STATUS;
		addr_reg   = MC_ERR_VPR_ADR;
		break;
	...
	default:
		status_reg = MC_ERR_STATUS;
		addr_reg   = MC_ERR_ADR;
	}

	value = mc_readl(mc, status_reg);
	...
	
	value = mc_readl(mc, addr_reg);
Dmitry Osipenko Jan. 12, 2022, 11:02 a.m. UTC | #3
11.01.2022 21:45, Ashish Mhetre пишет:
> +static const struct tegra_mc_error int_mc_errors[] = {
> +	{
> +		.int_bit = MC_INT_DECERR_EMEM,
> +		.msg = "EMEM address decode error",
> +		.status_reg = MC_ERR_STATUS,
> +		.addr_reg = MC_ERR_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_SECURITY_VIOLATION,
> +		.msg = "non secure access to secure region",
> +		.status_reg = MC_ERR_STATUS,
> +		.addr_reg = MC_ERR_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_DECERR_VPR,
> +		.msg = "MC request violates VPR requirements",
> +		.status_reg = MC_ERR_VPR_STATUS,
> +		.addr_reg = MC_ERR_VPR_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_SECERR_SEC,
> +		.msg = "MC request violated SEC carveout requirements",
> +		.status_reg = MC_ERR_SEC_STATUS,
> +		.addr_reg = MC_ERR_SEC_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_DECERR_MTS,
> +		.msg = "MTS carveout access violation",
> +		.status_reg = MC_ERR_MTS_STATUS,
> +		.addr_reg = MC_ERR_MTS_ADR,
> +	},
> +	{
> +		.int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT,
> +		.msg = "GSC access violation",
> +		.status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS,
> +		.addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR,
> +	},
> +};
> +
> +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data)
> +{
> +	struct tegra_mc *mc = data;
> +	unsigned long status;
> +	unsigned int bit;
> +
> +	status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
> +	if (!status)
> +		return IRQ_NONE;
> +
> +	for_each_set_bit(bit, &status, 32) {
> +		const char *error = int_mc_errors[bit].msg ?: "unknown";

int_mc_errors[bit] isn't what you need and .int_bit is unused, which
suggests that all this code doesn't work and was untested. Please don't
send untested patches.
Dmitry Osipenko Jan. 12, 2022, 11:22 a.m. UTC | #4
11.01.2022 21:45, Ashish Mhetre пишет:
>  #define MC_INT_DECERR_ROUTE_SANITY			BIT(20)
>  #define MC_INT_WCAM_ERR					BIT(19)
>  #define MC_INT_SCRUB_ECC_WR_ACK				BIT(18)

I don't see where these errors are handled in the code. Is documentation
that explains these bits publicly available?
Dmitry Osipenko Jan. 12, 2022, 11:24 a.m. UTC | #5
12.01.2022 14:22, Dmitry Osipenko пишет:
> 11.01.2022 21:45, Ashish Mhetre пишет:
>>  #define MC_INT_DECERR_ROUTE_SANITY			BIT(20)
>>  #define MC_INT_WCAM_ERR					BIT(19)
>>  #define MC_INT_SCRUB_ECC_WR_ACK				BIT(18)
> 
> I don't see where these errors are handled in the code. Is documentation
> that explains these bits publicly available?
> 

MC_INT_SCRUB_ECC_WR_ACK shouldn't be a error.
Ashish Mhetre Jan. 19, 2022, 8:33 a.m. UTC | #6
On 1/12/2022 2:26 AM, Krzysztof Kozlowski wrote:
> External email: Use caution opening links or attachments
> 
> 
> Cc in the future also Dmitry, because he was involved in these drivers
> quite a lot.
> 
I'll make sure of that in future patches.

> On 11/01/2022 19:45, Ashish Mhetre wrote:
>> Add all mc-errors supported by T186.
>> Implement mc interrupt handling routine for T186.
> 
> Here and in other patches - please use Tegra186 (and similar) to be
> consistent with existing upstream naming.
> 
Okay, I'll update the naming.

>>
>> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
>> ---
>>   drivers/memory/tegra/mc.h       |  17 +++++++
>>   drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 117 insertions(+)
>>
>> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
>> index 2d4f495..7817492 100644
>> --- a/drivers/memory/tegra/mc.h
>> +++ b/drivers/memory/tegra/mc.h
>> @@ -44,6 +44,15 @@
>>   #define MC_TIMING_CONTROL_DBG                                0xf8
>>   #define MC_TIMING_CONTROL                            0xfc
>>
>> +#define MC_ERR_VPR_STATUS                            0x654
>> +#define MC_ERR_VPR_ADR                                       0x658
>> +#define MC_ERR_SEC_STATUS                            0x67c
>> +#define MC_ERR_SEC_ADR                                       0x680
>> +#define MC_ERR_MTS_STATUS                            0x9b0
>> +#define MC_ERR_MTS_ADR                                       0x9b4
>> +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS           0xc00
>> +#define MC_ERR_GENERALIZED_CARVEOUT_ADR                      0xc04
>> +
>>   #define MC_INT_DECERR_ROUTE_SANITY                   BIT(20)
>>   #define MC_INT_WCAM_ERR                                      BIT(19)
>>   #define MC_INT_SCRUB_ECC_WR_ACK                              BIT(18)
>> @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops;
>>   extern const char * const tegra_mc_status_names[32];
>>   extern const char * const tegra_mc_error_names[8];
>>
>> +struct tegra_mc_error {
>> +     u32 int_bit;
> 
> Where is it used (read)?
> 
Oops, int_bit should be getting used in handle_irq for checking type of
interrupt. I'll update it in next version.

>> +     const char *msg;
>> +     u32 status_reg;
>> +     u32 addr_reg;
> 
> Please describe all these fields with kerneldoc.
> 
>> +     u32 addr_reg_hi;
> 
> Looks unused.
> 
It's getting used in patch 4. I'll update it in next version.

>> +};
>> +
>>   /*
>>    * These IDs are for internal use of Tegra ICC drivers. The ID numbers are
>>    * chosen such that they don't conflict with the device-tree ICC node IDs.
>> diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
>> index 6766cc4..4f3ae71 100644
>> --- a/drivers/memory/tegra/tegra186.c
>> +++ b/drivers/memory/tegra/tegra186.c
>> @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc)
>>        mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS);
>>   }
>>
>> +static const struct tegra_mc_error int_mc_errors[] = {
>> +     {
>> +             .int_bit = MC_INT_DECERR_EMEM,
>> +             .msg = "EMEM address decode error",
>> +             .status_reg = MC_ERR_STATUS,
>> +             .addr_reg = MC_ERR_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_SECURITY_VIOLATION,
>> +             .msg = "non secure access to secure region",
>> +             .status_reg = MC_ERR_STATUS,
>> +             .addr_reg = MC_ERR_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_DECERR_VPR,
>> +             .msg = "MC request violates VPR requirements",
>> +             .status_reg = MC_ERR_VPR_STATUS,
>> +             .addr_reg = MC_ERR_VPR_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_SECERR_SEC,
>> +             .msg = "MC request violated SEC carveout requirements",
>> +             .status_reg = MC_ERR_SEC_STATUS,
>> +             .addr_reg = MC_ERR_SEC_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_DECERR_MTS,
>> +             .msg = "MTS carveout access violation",
>> +             .status_reg = MC_ERR_MTS_STATUS,
>> +             .addr_reg = MC_ERR_MTS_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT,
>> +             .msg = "GSC access violation",
>> +             .status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS,
>> +             .addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR,
>> +     },
>> +};
>> +
>> +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data)
>> +{
>> +     struct tegra_mc *mc = data;
>> +     unsigned long status;
>> +     unsigned int bit;
>> +
>> +     status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
>> +     if (!status)
>> +             return IRQ_NONE;
>> +
>> +     for_each_set_bit(bit, &status, 32) {
> 
> Don't you need bitops.h for this?
> 
It didn't throw any errors when I built it. bitops.h is mostly getting
included from one of the existing includes.
Probably of_device.h -> of.h -> bitops.h

>> +             const char *error = int_mc_errors[bit].msg ?: "unknown";
> 
> int_mc_errors has size of 6, but it's index (the bit variable) can be 20
> or even 32. Are you sure indices are used properly or maybe
> int_mc_errors missed initializer per-index?
> 
That's true, I have not implemented it correctly. I should have looped
over int_mc_errors[] and should have checked .int_bit with bit and used
index of that element.

>> +             const char *client = "unknown";
>> +             const char *direction, *secure;
>> +             phys_addr_t addr = 0;
>> +             unsigned int i;
>> +             u8 id;
>> +             u32 value;
> 
> Keep order in reversed xmas tree plus name it in a meaningful way. You
> read here several registers, so which one is value? Probably you meant
> status?
> 
Yes, I meant status. But tegra30_mc_handle_irq() is implemented in
similar way. So I went with same names. I'll update the names and order.

>> +
>> +             value = mc_readl(mc, int_mc_errors[bit].status_reg);
>> +
>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
>> +             if (mc->soc->num_address_bits > 32) {
>> +                     addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) &
>> +                             MC_ERR_STATUS_ADR_HI_MASK);
>> +                     addr <<= 32;
>> +             }
>> +#endif
>> +             addr |= mc_readl(mc, int_mc_errors[bit].addr_reg);
>> +
>> +             if (value & MC_ERR_STATUS_RW)
>> +                     direction = "write";
>> +             else
>> +                     direction = "read";
>> +
>> +             if (value & MC_ERR_STATUS_SECURITY)
>> +                     secure = "secure ";
>> +             else
>> +                     secure = "";
>> +
>> +             id = value & mc->soc->client_id_mask;
>> +
>> +             for (i = 0; i < mc->soc->num_clients; i++) {
>> +                     if (mc->soc->clients[i].id == id) {
>> +                             client = mc->soc->clients[i].name;
>> +                             break;
>> +                     }
>> +             }
>> +
>> +             dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s\n",
>> +                                 client, secure, direction, &addr, error);
>> +     }
>> +
>> +     /* clear interrupts */
>> +     mc_writel(mc, status, MC_INTSTATUS);> +
>> +     return IRQ_HANDLED;
> 
> I don't think you are actually handling these errors as stated in commit
> message. You only log them. Please adjust the commit subject and message
> to mention the actual purpose/action of error handling.
> 
Yes, I'll update the commit message.

>> +}
>> +
>>   const struct tegra_mc_interrupt_ops tegra186_mc_interrupt_ops = {
>>        .clear_interrupt = tegra186_mc_clear_interrupt,
>> +     .handle_irq = tegra186_mc_handle_irq,
>>   };
>>
>>   const struct tegra_mc_ops tegra186_mc_ops = {
>> @@ -886,6 +985,7 @@ const struct tegra_mc_soc tegra186_mc_soc = {
>>        .num_clients = ARRAY_SIZE(tegra186_mc_clients),
>>        .clients = tegra186_mc_clients,
>>        .num_address_bits = 40,
>> +     .client_id_mask = 0xff,
>>        .intmask = MC_INT_WCAM_ERR | MC_INT_SCRUB_ECC_WR_ACK |
>>                   MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
>>                   MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |
>>
> 
> 
> 
> Best regards,
> Krzysztof
Ashish Mhetre Jan. 19, 2022, 8:53 a.m. UTC | #7
On 1/12/2022 4:31 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 11.01.2022 21:45, Ashish Mhetre пишет:
>> Add all mc-errors supported by T186.
>> Implement mc interrupt handling routine for T186.
>>
>> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
>> ---
>>   drivers/memory/tegra/mc.h       |  17 +++++++
>>   drivers/memory/tegra/tegra186.c | 100 ++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 117 insertions(+)
>>
>> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
>> index 2d4f495..7817492 100644
>> --- a/drivers/memory/tegra/mc.h
>> +++ b/drivers/memory/tegra/mc.h
>> @@ -44,6 +44,15 @@
>>   #define MC_TIMING_CONTROL_DBG                                0xf8
>>   #define MC_TIMING_CONTROL                            0xfc
>>
> 
> this empty line is unnecessary
> 
I'll fix this in next version.

>> +#define MC_ERR_VPR_STATUS                            0x654
>> +#define MC_ERR_VPR_ADR                                       0x658
>> +#define MC_ERR_SEC_STATUS                            0x67c
>> +#define MC_ERR_SEC_ADR                                       0x680
>> +#define MC_ERR_MTS_STATUS                            0x9b0
>> +#define MC_ERR_MTS_ADR                                       0x9b4
>> +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS           0xc00
>> +#define MC_ERR_GENERALIZED_CARVEOUT_ADR                      0xc04
>> +
>>   #define MC_INT_DECERR_ROUTE_SANITY                   BIT(20)
>>   #define MC_INT_WCAM_ERR                                      BIT(19)
>>   #define MC_INT_SCRUB_ECC_WR_ACK                              BIT(18)
>> @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops;
>>   extern const char * const tegra_mc_status_names[32];
>>   extern const char * const tegra_mc_error_names[8];
>>
>> +struct tegra_mc_error {
>> +     u32 int_bit;
>> +     const char *msg;
>> +     u32 status_reg;
>> +     u32 addr_reg;
>> +     u32 addr_reg_hi;
>> +};
>> +
>>   /*
>>    * These IDs are for internal use of Tegra ICC drivers. The ID numbers are
>>    * chosen such that they don't conflict with the device-tree ICC node IDs.
>> diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
>> index 6766cc4..4f3ae71 100644
>> --- a/drivers/memory/tegra/tegra186.c
>> +++ b/drivers/memory/tegra/tegra186.c
>> @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct tegra_mc *mc)
>>        mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS);
>>   }
>>
>> +static const struct tegra_mc_error int_mc_errors[] = {
>> +     {
>> +             .int_bit = MC_INT_DECERR_EMEM,
>> +             .msg = "EMEM address decode error",
>> +             .status_reg = MC_ERR_STATUS,
>> +             .addr_reg = MC_ERR_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_SECURITY_VIOLATION,
>> +             .msg = "non secure access to secure region",
>> +             .status_reg = MC_ERR_STATUS,
>> +             .addr_reg = MC_ERR_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_DECERR_VPR,
>> +             .msg = "MC request violates VPR requirements",
>> +             .status_reg = MC_ERR_VPR_STATUS,
>> +             .addr_reg = MC_ERR_VPR_ADR,
>> +     },
> 
> I see that these VPR registers present on all SoCs starting with T124.
> It doesn't look like you need the separate IRQ handlers at all, instead
> please extend the common T30 handler. For example, you may add a
> switch-case statements to handle those T124+ specific bits differently.
> 
> static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
> {
> ...
>          switch (bit) {
>          case MC_INT_DECERR_VPR:
>                  status_reg = MC_ERR_VPR_STATUS;
>                  addr_reg   = MC_ERR_VPR_ADR;
>                  break;
>          ...
>          default:
>                  status_reg = MC_ERR_STATUS;
>                  addr_reg   = MC_ERR_ADR;
>          }
> 
>          value = mc_readl(mc, status_reg);
>          ...
> 
>          value = mc_readl(mc, addr_reg);
Okay. I'll use same handler as Tegra30 with additional Tegra186 onward
bits.
Also, shall I change name of tegra30_mc_handle_irq() to
tegra_mc_handle_irq() as we are using it across all Tegra SOCs ?
Ashish Mhetre Jan. 19, 2022, 8:58 a.m. UTC | #8
On 1/12/2022 4:32 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 11.01.2022 21:45, Ashish Mhetre пишет:
>> +static const struct tegra_mc_error int_mc_errors[] = {
>> +     {
>> +             .int_bit = MC_INT_DECERR_EMEM,
>> +             .msg = "EMEM address decode error",
>> +             .status_reg = MC_ERR_STATUS,
>> +             .addr_reg = MC_ERR_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_SECURITY_VIOLATION,
>> +             .msg = "non secure access to secure region",
>> +             .status_reg = MC_ERR_STATUS,
>> +             .addr_reg = MC_ERR_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_DECERR_VPR,
>> +             .msg = "MC request violates VPR requirements",
>> +             .status_reg = MC_ERR_VPR_STATUS,
>> +             .addr_reg = MC_ERR_VPR_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_SECERR_SEC,
>> +             .msg = "MC request violated SEC carveout requirements",
>> +             .status_reg = MC_ERR_SEC_STATUS,
>> +             .addr_reg = MC_ERR_SEC_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_DECERR_MTS,
>> +             .msg = "MTS carveout access violation",
>> +             .status_reg = MC_ERR_MTS_STATUS,
>> +             .addr_reg = MC_ERR_MTS_ADR,
>> +     },
>> +     {
>> +             .int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT,
>> +             .msg = "GSC access violation",
>> +             .status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS,
>> +             .addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR,
>> +     },
>> +};
>> +
>> +static irqreturn_t tegra186_mc_handle_irq(int irq, void *data)
>> +{
>> +     struct tegra_mc *mc = data;
>> +     unsigned long status;
>> +     unsigned int bit;
>> +
>> +     status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
>> +     if (!status)
>> +             return IRQ_NONE;
>> +
>> +     for_each_set_bit(bit, &status, 32) {
>> +             const char *error = int_mc_errors[bit].msg ?: "unknown";
> 
> int_mc_errors[bit] isn't what you need and .int_bit is unused, which
> suggests that all this code doesn't work and was untested. Please don't
> send untested patches.
Yes, my bad. I will update this in next patch.
Actually I made sure that the patches build without errors and also made
sure that they does not break anything. As for reproducing each of these
memory controller errors, I agree that I haven't done that.
Ashish Mhetre Jan. 19, 2022, 9:09 a.m. UTC | #9
On 1/12/2022 4:54 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
> 
> 
> 12.01.2022 14:22, Dmitry Osipenko пишет:
>> 11.01.2022 21:45, Ashish Mhetre пишет:
>>>   #define MC_INT_DECERR_ROUTE_SANITY                  BIT(20)
>>>   #define MC_INT_WCAM_ERR                                     BIT(19)
>>>   #define MC_INT_SCRUB_ECC_WR_ACK                             BIT(18)
>>
>> I don't see where these errors are handled in the code. Is documentation
>> that explains these bits publicly available?
>>
MC_INT_DECERR_ROUTE_SANITY is supposed to be part a of Tegra194
interrupts. I have missed adding it and will update in next version.
MC_INT_WCAM_ERR is a MC channel error and is not applicable in upstream.
I'll remove it.
These bits are defined in same documentation where other errors are
defined.

> 
> MC_INT_SCRUB_ECC_WR_ACK shouldn't be a error.
Okay, I 'll remove it.
Dmitry Osipenko Jan. 19, 2022, 1:41 p.m. UTC | #10
19.01.2022 11:53, Ashish Mhetre пишет:
> 
> 
> On 1/12/2022 4:31 PM, Dmitry Osipenko wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> 11.01.2022 21:45, Ashish Mhetre пишет:
>>> Add all mc-errors supported by T186.
>>> Implement mc interrupt handling routine for T186.
>>>
>>> Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
>>> ---
>>>   drivers/memory/tegra/mc.h       |  17 +++++++
>>>   drivers/memory/tegra/tegra186.c | 100
>>> ++++++++++++++++++++++++++++++++++++++++
>>>   2 files changed, 117 insertions(+)
>>>
>>> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
>>> index 2d4f495..7817492 100644
>>> --- a/drivers/memory/tegra/mc.h
>>> +++ b/drivers/memory/tegra/mc.h
>>> @@ -44,6 +44,15 @@
>>>   #define MC_TIMING_CONTROL_DBG                                0xf8
>>>   #define MC_TIMING_CONTROL                            0xfc
>>>
>>
>> this empty line is unnecessary
>>
> I'll fix this in next version.
> 
>>> +#define MC_ERR_VPR_STATUS                            0x654
>>> +#define MC_ERR_VPR_ADR                                       0x658
>>> +#define MC_ERR_SEC_STATUS                            0x67c
>>> +#define MC_ERR_SEC_ADR                                       0x680
>>> +#define MC_ERR_MTS_STATUS                            0x9b0
>>> +#define MC_ERR_MTS_ADR                                       0x9b4
>>> +#define MC_ERR_GENERALIZED_CARVEOUT_STATUS           0xc00
>>> +#define MC_ERR_GENERALIZED_CARVEOUT_ADR                      0xc04
>>> +
>>>   #define MC_INT_DECERR_ROUTE_SANITY                   BIT(20)
>>>   #define MC_INT_WCAM_ERR                                      BIT(19)
>>>   #define MC_INT_SCRUB_ECC_WR_ACK                              BIT(18)
>>> @@ -159,6 +168,14 @@ extern const struct tegra_mc_ops tegra186_mc_ops;
>>>   extern const char * const tegra_mc_status_names[32];
>>>   extern const char * const tegra_mc_error_names[8];
>>>
>>> +struct tegra_mc_error {
>>> +     u32 int_bit;
>>> +     const char *msg;
>>> +     u32 status_reg;
>>> +     u32 addr_reg;
>>> +     u32 addr_reg_hi;
>>> +};
>>> +
>>>   /*
>>>    * These IDs are for internal use of Tegra ICC drivers. The ID
>>> numbers are
>>>    * chosen such that they don't conflict with the device-tree ICC
>>> node IDs.
>>> diff --git a/drivers/memory/tegra/tegra186.c
>>> b/drivers/memory/tegra/tegra186.c
>>> index 6766cc4..4f3ae71 100644
>>> --- a/drivers/memory/tegra/tegra186.c
>>> +++ b/drivers/memory/tegra/tegra186.c
>>> @@ -146,8 +146,107 @@ static void tegra186_mc_clear_interrupt(struct
>>> tegra_mc *mc)
>>>        mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS);
>>>   }
>>>
>>> +static const struct tegra_mc_error int_mc_errors[] = {
>>> +     {
>>> +             .int_bit = MC_INT_DECERR_EMEM,
>>> +             .msg = "EMEM address decode error",
>>> +             .status_reg = MC_ERR_STATUS,
>>> +             .addr_reg = MC_ERR_ADR,
>>> +     },
>>> +     {
>>> +             .int_bit = MC_INT_SECURITY_VIOLATION,
>>> +             .msg = "non secure access to secure region",
>>> +             .status_reg = MC_ERR_STATUS,
>>> +             .addr_reg = MC_ERR_ADR,
>>> +     },
>>> +     {
>>> +             .int_bit = MC_INT_DECERR_VPR,
>>> +             .msg = "MC request violates VPR requirements",
>>> +             .status_reg = MC_ERR_VPR_STATUS,
>>> +             .addr_reg = MC_ERR_VPR_ADR,
>>> +     },
>>
>> I see that these VPR registers present on all SoCs starting with T124.
>> It doesn't look like you need the separate IRQ handlers at all, instead
>> please extend the common T30 handler. For example, you may add a
>> switch-case statements to handle those T124+ specific bits differently.
>>
>> static irqreturn_t tegra30_mc_handle_irq(int irq, void *data)
>> {
>> ...
>>          switch (bit) {
>>          case MC_INT_DECERR_VPR:
>>                  status_reg = MC_ERR_VPR_STATUS;
>>                  addr_reg   = MC_ERR_VPR_ADR;
>>                  break;
>>          ...
>>          default:
>>                  status_reg = MC_ERR_STATUS;
>>                  addr_reg   = MC_ERR_ADR;
>>          }
>>
>>          value = mc_readl(mc, status_reg);
>>          ...
>>
>>          value = mc_readl(mc, addr_reg);
> Okay. I'll use same handler as Tegra30 with additional Tegra186 onward
> bits.
> Also, shall I change name of tegra30_mc_handle_irq() to
> tegra_mc_handle_irq() as we are using it across all Tegra SOCs ?

T20 won't use it, no need to change the name.
Dmitry Osipenko Jan. 19, 2022, 1:42 p.m. UTC | #11
19.01.2022 12:09, Ashish Mhetre пишет:
> 
> 
> On 1/12/2022 4:54 PM, Dmitry Osipenko wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> 12.01.2022 14:22, Dmitry Osipenko пишет:
>>> 11.01.2022 21:45, Ashish Mhetre пишет:
>>>>   #define MC_INT_DECERR_ROUTE_SANITY                  BIT(20)
>>>>   #define MC_INT_WCAM_ERR                                     BIT(19)
>>>>   #define MC_INT_SCRUB_ECC_WR_ACK                             BIT(18)
>>>
>>> I don't see where these errors are handled in the code. Is documentation
>>> that explains these bits publicly available?
>>>
> MC_INT_DECERR_ROUTE_SANITY is supposed to be part a of Tegra194
> interrupts. I have missed adding it and will update in next version.
> MC_INT_WCAM_ERR is a MC channel error and is not applicable in upstream.
> I'll remove it.
> These bits are defined in same documentation where other errors are
> defined.

Is this documentation publicly available? I only have access to the
public TRM that can be downloaded from the NVIDIA website and there are
no register sets there for newer SoCs, AFAICS. If TRM was updated and
now contains registers, then I'll try to get the latest version.
diff mbox series

Patch

diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index 2d4f495..7817492 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -44,6 +44,15 @@ 
 #define MC_TIMING_CONTROL_DBG				0xf8
 #define MC_TIMING_CONTROL				0xfc
 
+#define MC_ERR_VPR_STATUS				0x654
+#define MC_ERR_VPR_ADR					0x658
+#define MC_ERR_SEC_STATUS				0x67c
+#define MC_ERR_SEC_ADR					0x680
+#define MC_ERR_MTS_STATUS				0x9b0
+#define MC_ERR_MTS_ADR					0x9b4
+#define MC_ERR_GENERALIZED_CARVEOUT_STATUS		0xc00
+#define MC_ERR_GENERALIZED_CARVEOUT_ADR			0xc04
+
 #define MC_INT_DECERR_ROUTE_SANITY			BIT(20)
 #define MC_INT_WCAM_ERR					BIT(19)
 #define MC_INT_SCRUB_ECC_WR_ACK				BIT(18)
@@ -159,6 +168,14 @@  extern const struct tegra_mc_ops tegra186_mc_ops;
 extern const char * const tegra_mc_status_names[32];
 extern const char * const tegra_mc_error_names[8];
 
+struct tegra_mc_error {
+	u32 int_bit;
+	const char *msg;
+	u32 status_reg;
+	u32 addr_reg;
+	u32 addr_reg_hi;
+};
+
 /*
  * These IDs are for internal use of Tegra ICC drivers. The ID numbers are
  * chosen such that they don't conflict with the device-tree ICC node IDs.
diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
index 6766cc4..4f3ae71 100644
--- a/drivers/memory/tegra/tegra186.c
+++ b/drivers/memory/tegra/tegra186.c
@@ -146,8 +146,107 @@  static void tegra186_mc_clear_interrupt(struct tegra_mc *mc)
 	mc_writel(mc, MC_INTSTATUS_CLEAR, MC_INTSTATUS);
 }
 
+static const struct tegra_mc_error int_mc_errors[] = {
+	{
+		.int_bit = MC_INT_DECERR_EMEM,
+		.msg = "EMEM address decode error",
+		.status_reg = MC_ERR_STATUS,
+		.addr_reg = MC_ERR_ADR,
+	},
+	{
+		.int_bit = MC_INT_SECURITY_VIOLATION,
+		.msg = "non secure access to secure region",
+		.status_reg = MC_ERR_STATUS,
+		.addr_reg = MC_ERR_ADR,
+	},
+	{
+		.int_bit = MC_INT_DECERR_VPR,
+		.msg = "MC request violates VPR requirements",
+		.status_reg = MC_ERR_VPR_STATUS,
+		.addr_reg = MC_ERR_VPR_ADR,
+	},
+	{
+		.int_bit = MC_INT_SECERR_SEC,
+		.msg = "MC request violated SEC carveout requirements",
+		.status_reg = MC_ERR_SEC_STATUS,
+		.addr_reg = MC_ERR_SEC_ADR,
+	},
+	{
+		.int_bit = MC_INT_DECERR_MTS,
+		.msg = "MTS carveout access violation",
+		.status_reg = MC_ERR_MTS_STATUS,
+		.addr_reg = MC_ERR_MTS_ADR,
+	},
+	{
+		.int_bit = MC_INT_DECERR_GENERALIZED_CARVEOUT,
+		.msg = "GSC access violation",
+		.status_reg = MC_ERR_GENERALIZED_CARVEOUT_STATUS,
+		.addr_reg = MC_ERR_GENERALIZED_CARVEOUT_ADR,
+	},
+};
+
+static irqreturn_t tegra186_mc_handle_irq(int irq, void *data)
+{
+	struct tegra_mc *mc = data;
+	unsigned long status;
+	unsigned int bit;
+
+	status = mc_readl(mc, MC_INTSTATUS) & mc->soc->intmask;
+	if (!status)
+		return IRQ_NONE;
+
+	for_each_set_bit(bit, &status, 32) {
+		const char *error = int_mc_errors[bit].msg ?: "unknown";
+		const char *client = "unknown";
+		const char *direction, *secure;
+		phys_addr_t addr = 0;
+		unsigned int i;
+		u8 id;
+		u32 value;
+
+		value = mc_readl(mc, int_mc_errors[bit].status_reg);
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+		if (mc->soc->num_address_bits > 32) {
+			addr = ((value >> MC_ERR_STATUS_ADR_HI_SHIFT) &
+				MC_ERR_STATUS_ADR_HI_MASK);
+			addr <<= 32;
+		}
+#endif
+		addr |= mc_readl(mc, int_mc_errors[bit].addr_reg);
+
+		if (value & MC_ERR_STATUS_RW)
+			direction = "write";
+		else
+			direction = "read";
+
+		if (value & MC_ERR_STATUS_SECURITY)
+			secure = "secure ";
+		else
+			secure = "";
+
+		id = value & mc->soc->client_id_mask;
+
+		for (i = 0; i < mc->soc->num_clients; i++) {
+			if (mc->soc->clients[i].id == id) {
+				client = mc->soc->clients[i].name;
+				break;
+			}
+		}
+
+		dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s\n",
+				    client, secure, direction, &addr, error);
+	}
+
+	/* clear interrupts */
+	mc_writel(mc, status, MC_INTSTATUS);
+
+	return IRQ_HANDLED;
+}
+
 const struct tegra_mc_interrupt_ops tegra186_mc_interrupt_ops = {
 	.clear_interrupt = tegra186_mc_clear_interrupt,
+	.handle_irq = tegra186_mc_handle_irq,
 };
 
 const struct tegra_mc_ops tegra186_mc_ops = {
@@ -886,6 +985,7 @@  const struct tegra_mc_soc tegra186_mc_soc = {
 	.num_clients = ARRAY_SIZE(tegra186_mc_clients),
 	.clients = tegra186_mc_clients,
 	.num_address_bits = 40,
+	.client_id_mask = 0xff,
 	.intmask = MC_INT_WCAM_ERR | MC_INT_SCRUB_ECC_WR_ACK |
 		   MC_INT_DECERR_GENERALIZED_CARVEOUT | MC_INT_DECERR_MTS |
 		   MC_INT_SECERR_SEC | MC_INT_DECERR_VPR |