diff mbox

[v2,2/2] mmc: sdhci-tegra: Specify valid DMA mask

Message ID 1456806764-16467-3-git-send-email-acourbot@nvidia.com
State Superseded, archived
Delegated to: Alexandre Courbot
Headers show

Commit Message

Alexandre Courbot March 1, 2016, 4:32 a.m. UTC
On T210, the sdhci controller can address more than 32 bits of address
space. Failing to express this fact results in the use of bounce
buffers and affects performance.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drivers/mmc/host/sdhci-tegra.c | 8 ++++++++
 1 file changed, 8 insertions(+)

Comments

Arnd Bergmann March 1, 2016, 9:34 p.m. UTC | #1
On Tuesday 01 March 2016 13:32:44 Alexandre Courbot wrote:
> On T210, the sdhci controller can address more than 32 bits of address
> space. Failing to express this fact results in the use of bounce
> buffers and affects performance.
> 
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>

I don't get this one. Why don't you just set the (SDHCI_USE_SDMA | SDHCI_USE_ADMA)
flags that are checked in the first patch?

> @@ -289,6 +291,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
>  	.pdata = &sdhci_tegra20_pdata,
>  	.nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
>  		    NVQUIRK_ENABLE_BLOCK_GAP_DET,
> +	.dma_mask = DMA_BIT_MASK(32),
>  };

Can you describe what the specific bug is in these controllers? Do you mean they
support SDHCI_USE_SDMA or SDHCI_USE_ADMA in theory but you still have to prevent
them from using high addresses?

> @@ -353,6 +358,7 @@ static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
>  
>  static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
>  	.pdata = &sdhci_tegra210_pdata,
> +	.dma_mask = DMA_BIT_MASK(34),
>  };
>  
>  static const struct of_device_id sdhci_tegra_dt_match[] = {

This one still completely weirds me out. What kind of odd limitation does
the controller have in Tegra 210?

Are there actually any machines with more than 16GB?

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexandre Courbot March 2, 2016, 10:36 a.m. UTC | #2
On Wed, Mar 2, 2016 at 6:34 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Tuesday 01 March 2016 13:32:44 Alexandre Courbot wrote:
>> On T210, the sdhci controller can address more than 32 bits of address
>> space. Failing to express this fact results in the use of bounce
>> buffers and affects performance.
>>
>> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>
> I don't get this one. Why don't you just set the (SDHCI_USE_SDMA | SDHCI_USE_ADMA)
> flags that are checked in the first patch?

The test is against (!(host->flags & (SDHCI_USE_SDMA |
SDHCI_USE_ADMA))), (see the '!') so it will be true (and the DMA mask
will be set) if both flags are *not* set (why we set the mask to 64
bits here in that case, I don't know).

T210 is capable of SDMA, so we cannot use this condition for that
purpose (maybe you missed the '!', in which case I understand why you
were surprised).

>
>> @@ -289,6 +291,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
>>       .pdata = &sdhci_tegra20_pdata,
>>       .nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
>>                   NVQUIRK_ENABLE_BLOCK_GAP_DET,
>> +     .dma_mask = DMA_BIT_MASK(32),
>>  };
>
> Can you describe what the specific bug is in these controllers? Do you mean they
> support SDHCI_USE_SDMA or SDHCI_USE_ADMA in theory but you still have to prevent
> them from using high addresses?

Ok, I think you probably missed the '!' then. :)

>
>> @@ -353,6 +358,7 @@ static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
>>
>>  static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
>>       .pdata = &sdhci_tegra210_pdata,
>> +     .dma_mask = DMA_BIT_MASK(34),
>>  };
>>
>>  static const struct of_device_id sdhci_tegra_dt_match[] = {
>
> This one still completely weirds me out. What kind of odd limitation does
> the controller have in Tegra 210?
>
> Are there actually any machines with more than 16GB?

It is not a limitation of the controller - I am just limiting the mask
to the range of physical memory we can ever access on T210. My intent
here is to overcome the default 32-bit mask, not to limit the range,
so I could have set a 64-bit mask if not for my OCD. :P

But actually looking at how the various flags are interpreted in
sdhci_add_host(), I see the following:

    /*
     * It is assumed that a 64-bit capable device has set a 64-bit DMA mask
     * and *must* do 64-bit DMA.  A driver has the opportunity to change
     * that during the first call to ->enable_dma().  Similarly
     * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
     * implement.
     */
    if (caps[0] & SDHCI_CAN_64BIT)
        host->flags |= SDHCI_USE_64_BIT_DMA;

Since this relies on what the hardware declares being capable of and
is set on T210, I am very tempted to set a 64-bit dma_mask here and
call it a day, but the above comment seems to suggest that this should
have been done before.

If you think this is cool though, I will just do that and in
conjunction with patch 1 this will do the job nicely.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann March 2, 2016, 11:25 a.m. UTC | #3
On Wednesday 02 March 2016 19:36:23 Alexandre Courbot wrote:
> On Wed, Mar 2, 2016 at 6:34 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Tuesday 01 March 2016 13:32:44 Alexandre Courbot wrote:
> >> On T210, the sdhci controller can address more than 32 bits of address
> >> space. Failing to express this fact results in the use of bounce
> >> buffers and affects performance.
> >>
> >> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> >
> > I don't get this one. Why don't you just set the (SDHCI_USE_SDMA | SDHCI_USE_ADMA)
> > flags that are checked in the first patch?
> 
> The test is against (!(host->flags & (SDHCI_USE_SDMA |
> SDHCI_USE_ADMA))), (see the '!') so it will be true (and the DMA mask
> will be set) if both flags are *not* set (why we set the mask to 64
> bits here in that case, I don't know).
> 
> T210 is capable of SDMA, so we cannot use this condition for that
> purpose (maybe you missed the '!', in which case I understand why you
> were surprised).

Ok, I see now that this code was just setting a fake mask in case
of PIO mode, which doesn't apply here. That in turn means that
your first patch is wrong though:

For a device that is not DMA capable (neither SDMA nor ADMA
claimed to be supported), we should not call dma_set_mask_and_coherent()
because that is likely to fail as well. It's an ugly hack to
just override the mask in that case, and I'd say it requires
a comment explaining what is going on.

> >> @@ -289,6 +291,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
> >>       .pdata = &sdhci_tegra20_pdata,
> >>       .nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
> >>                   NVQUIRK_ENABLE_BLOCK_GAP_DET,
> >> +     .dma_mask = DMA_BIT_MASK(32),
> >>  };
> >
> > Can you describe what the specific bug is in these controllers? Do you mean they
> > support SDHCI_USE_SDMA or SDHCI_USE_ADMA in theory but you still have to prevent
> > them from using high addresses?
> 
> Ok, I think you probably missed the '!' then. :)

I missed the larger context of that check too, but I think I've got it now.

> >> @@ -353,6 +358,7 @@ static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
> >>
> >>  static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
> >>       .pdata = &sdhci_tegra210_pdata,
> >> +     .dma_mask = DMA_BIT_MASK(34),
> >>  };
> >>
> >>  static const struct of_device_id sdhci_tegra_dt_match[] = {
> >
> > This one still completely weirds me out. What kind of odd limitation does
> > the controller have in Tegra 210?
> >
> > Are there actually any machines with more than 16GB?
> 
> It is not a limitation of the controller - I am just limiting the mask
> to the range of physical memory we can ever access on T210. My intent
> here is to overcome the default 32-bit mask, not to limit the range,
> so I could have set a 64-bit mask if not for my OCD. :P
> 
> But actually looking at how the various flags are interpreted in
> sdhci_add_host(), I see the following:
> 
>     /*
>      * It is assumed that a 64-bit capable device has set a 64-bit DMA mask
>      * and *must* do 64-bit DMA.  A driver has the opportunity to change
>      * that during the first call to ->enable_dma().  Similarly
>      * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
>      * implement.
>      */
>     if (caps[0] & SDHCI_CAN_64BIT)
>         host->flags |= SDHCI_USE_64_BIT_DMA;
> 
> Since this relies on what the hardware declares being capable of and
> is set on T210, I am very tempted to set a 64-bit dma_mask here and
> call it a day, but the above comment seems to suggest that this should
> have been done before.

Right, that sounds good, that also makes it independent of the specific
Tegra SoC, right?

> If you think this is cool though, I will just do that and in
> conjunction with patch 1 this will do the job nicely.

as mentioned above, I now have doubts about patch 1, why do you still
need this now?

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexandre Courbot March 4, 2016, 6:08 a.m. UTC | #4
On Wed, Mar 2, 2016 at 8:25 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Wednesday 02 March 2016 19:36:23 Alexandre Courbot wrote:
>> On Wed, Mar 2, 2016 at 6:34 AM, Arnd Bergmann <arnd@arndb.de> wrote:
>> > On Tuesday 01 March 2016 13:32:44 Alexandre Courbot wrote:
>> >> On T210, the sdhci controller can address more than 32 bits of address
>> >> space. Failing to express this fact results in the use of bounce
>> >> buffers and affects performance.
>> >>
>> >> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>> >
>> > I don't get this one. Why don't you just set the (SDHCI_USE_SDMA | SDHCI_USE_ADMA)
>> > flags that are checked in the first patch?
>>
>> The test is against (!(host->flags & (SDHCI_USE_SDMA |
>> SDHCI_USE_ADMA))), (see the '!') so it will be true (and the DMA mask
>> will be set) if both flags are *not* set (why we set the mask to 64
>> bits here in that case, I don't know).
>>
>> T210 is capable of SDMA, so we cannot use this condition for that
>> purpose (maybe you missed the '!', in which case I understand why you
>> were surprised).
>
> Ok, I see now that this code was just setting a fake mask in case
> of PIO mode, which doesn't apply here. That in turn means that
> your first patch is wrong though:
>
> For a device that is not DMA capable (neither SDMA nor ADMA
> claimed to be supported), we should not call dma_set_mask_and_coherent()
> because that is likely to fail as well. It's an ugly hack to
> just override the mask in that case, and I'd say it requires
> a comment explaining what is going on.

Yeah, I'm not too sure what is the point of setting the fake mask to
be honest, but you are definitely right that it is a contradiction to
call a DMA function on a device that is not DMA-capable.

>
>> >> @@ -289,6 +291,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
>> >>       .pdata = &sdhci_tegra20_pdata,
>> >>       .nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
>> >>                   NVQUIRK_ENABLE_BLOCK_GAP_DET,
>> >> +     .dma_mask = DMA_BIT_MASK(32),
>> >>  };
>> >
>> > Can you describe what the specific bug is in these controllers? Do you mean they
>> > support SDHCI_USE_SDMA or SDHCI_USE_ADMA in theory but you still have to prevent
>> > them from using high addresses?
>>
>> Ok, I think you probably missed the '!' then. :)
>
> I missed the larger context of that check too, but I think I've got it now.
>
>> >> @@ -353,6 +358,7 @@ static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
>> >>
>> >>  static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
>> >>       .pdata = &sdhci_tegra210_pdata,
>> >> +     .dma_mask = DMA_BIT_MASK(34),
>> >>  };
>> >>
>> >>  static const struct of_device_id sdhci_tegra_dt_match[] = {
>> >
>> > This one still completely weirds me out. What kind of odd limitation does
>> > the controller have in Tegra 210?
>> >
>> > Are there actually any machines with more than 16GB?
>>
>> It is not a limitation of the controller - I am just limiting the mask
>> to the range of physical memory we can ever access on T210. My intent
>> here is to overcome the default 32-bit mask, not to limit the range,
>> so I could have set a 64-bit mask if not for my OCD. :P
>>
>> But actually looking at how the various flags are interpreted in
>> sdhci_add_host(), I see the following:
>>
>>     /*
>>      * It is assumed that a 64-bit capable device has set a 64-bit DMA mask
>>      * and *must* do 64-bit DMA.  A driver has the opportunity to change
>>      * that during the first call to ->enable_dma().  Similarly
>>      * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
>>      * implement.
>>      */
>>     if (caps[0] & SDHCI_CAN_64BIT)
>>         host->flags |= SDHCI_USE_64_BIT_DMA;
>>
>> Since this relies on what the hardware declares being capable of and
>> is set on T210, I am very tempted to set a 64-bit dma_mask here and
>> call it a day, but the above comment seems to suggest that this should
>> have been done before.
>
> Right, that sounds good, that also makes it independent of the specific
> Tegra SoC, right?

Yes - it would just be a 2-liner that should set things properly for
all 64-bit capable controllers.

>> If you think this is cool though, I will just do that and in
>> conjunction with patch 1 this will do the job nicely.
>
> as mentioned above, I now have doubts about patch 1, why do you still
> need this now?

We would not need patch 1 anymore.

Let me send this and see if it looks better.

Thanks,
Alex.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexandre Courbot March 4, 2016, 6:43 a.m. UTC | #5
On Fri, Mar 4, 2016 at 3:08 PM, Alexandre Courbot <gnurou@gmail.com> wrote:
> On Wed, Mar 2, 2016 at 8:25 PM, Arnd Bergmann <arnd@arndb.de> wrote:
>> On Wednesday 02 March 2016 19:36:23 Alexandre Courbot wrote:
>>> On Wed, Mar 2, 2016 at 6:34 AM, Arnd Bergmann <arnd@arndb.de> wrote:
>>> > On Tuesday 01 March 2016 13:32:44 Alexandre Courbot wrote:
>>> >> On T210, the sdhci controller can address more than 32 bits of address
>>> >> space. Failing to express this fact results in the use of bounce
>>> >> buffers and affects performance.
>>> >>
>>> >> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>>> >
>>> > I don't get this one. Why don't you just set the (SDHCI_USE_SDMA | SDHCI_USE_ADMA)
>>> > flags that are checked in the first patch?
>>>
>>> The test is against (!(host->flags & (SDHCI_USE_SDMA |
>>> SDHCI_USE_ADMA))), (see the '!') so it will be true (and the DMA mask
>>> will be set) if both flags are *not* set (why we set the mask to 64
>>> bits here in that case, I don't know).
>>>
>>> T210 is capable of SDMA, so we cannot use this condition for that
>>> purpose (maybe you missed the '!', in which case I understand why you
>>> were surprised).
>>
>> Ok, I see now that this code was just setting a fake mask in case
>> of PIO mode, which doesn't apply here. That in turn means that
>> your first patch is wrong though:
>>
>> For a device that is not DMA capable (neither SDMA nor ADMA
>> claimed to be supported), we should not call dma_set_mask_and_coherent()
>> because that is likely to fail as well. It's an ugly hack to
>> just override the mask in that case, and I'd say it requires
>> a comment explaining what is going on.
>
> Yeah, I'm not too sure what is the point of setting the fake mask to
> be honest, but you are definitely right that it is a contradiction to
> call a DMA function on a device that is not DMA-capable.

Ah, I finally got it - we are just setting it to the *address* of
host->dma_mask so the device's DMA mask does not end up being a NULL
pointer.

That actually changes things a bit. DMA-capable devices are clearly
expected to set the mask themselves, but the only one to do it is
host/mtk-sd.c. And dma_set_mask() is only called in dw_mmc and
sdhci-acpi's enable_dma callback.

This means most DMA-capable devices (including Tegra, but not only)
are simply left with no DMA setup at all.

Probably we can detect when the host did not do any DMA setup in the
probe function and attempt some sane defaults depending on what the
hardware says it is capable of?
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann March 4, 2016, 8:38 a.m. UTC | #6
On Friday 04 March 2016 15:43:56 Alexandre Courbot wrote:
> >
> > Yeah, I'm not too sure what is the point of setting the fake mask to
> > be honest, but you are definitely right that it is a contradiction to
> > call a DMA function on a device that is not DMA-capable.
> 
> Ah, I finally got it - we are just setting it to the *address* of
> host->dma_mask so the device's DMA mask does not end up being a NULL
> pointer.
> 
> That actually changes things a bit. DMA-capable devices are clearly
> expected to set the mask themselves, but the only one to do it is
> host/mtk-sd.c. And dma_set_mask() is only called in dw_mmc and
> sdhci-acpi's enable_dma callback.
> 
> This means most DMA-capable devices (including Tegra, but not only)
> are simply left with no DMA setup at all.
> 
> Probably we can detect when the host did not do any DMA setup in the
> probe function and attempt some sane defaults depending on what the
> hardware says it is capable of?

When the host leaves an empty DMA mask, the intended meaning is
that the device is not on a DMA capable bus, so if we run into
that case, we should instead fix the creation of the device
rather than the driver that looks at the data.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 83c4bf7bc16c..66808ac64db5 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -25,6 +25,7 @@ 
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/gpio/consumer.h>
+#include <linux/dma-mapping.h>
 
 #include "sdhci-pltfm.h"
 
@@ -52,6 +53,7 @@ 
 struct sdhci_tegra_soc_data {
 	const struct sdhci_pltfm_data *pdata;
 	u32 nvquirks;
+	u64 dma_mask;
 };
 
 struct sdhci_tegra {
@@ -289,6 +291,7 @@  static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
 	.pdata = &sdhci_tegra20_pdata,
 	.nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
 		    NVQUIRK_ENABLE_BLOCK_GAP_DET,
+	.dma_mask = DMA_BIT_MASK(32),
 };
 
 static const struct sdhci_pltfm_data sdhci_tegra30_pdata = {
@@ -307,6 +310,7 @@  static const struct sdhci_tegra_soc_data soc_data_tegra30 = {
 	.nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
 		    NVQUIRK_ENABLE_SDR50 |
 		    NVQUIRK_ENABLE_SDR104,
+	.dma_mask = DMA_BIT_MASK(32),
 };
 
 static const struct sdhci_ops tegra114_sdhci_ops = {
@@ -338,6 +342,7 @@  static const struct sdhci_tegra_soc_data soc_data_tegra114 = {
 	.nvquirks = NVQUIRK_ENABLE_SDR50 |
 		    NVQUIRK_ENABLE_DDR50 |
 		    NVQUIRK_ENABLE_SDR104,
+	.dma_mask = DMA_BIT_MASK(32),
 };
 
 static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
@@ -353,6 +358,7 @@  static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
 
 static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
 	.pdata = &sdhci_tegra210_pdata,
+	.dma_mask = DMA_BIT_MASK(34),
 };
 
 static const struct of_device_id sdhci_tegra_dt_match[] = {
@@ -385,6 +391,8 @@  static int sdhci_tegra_probe(struct platform_device *pdev)
 		return PTR_ERR(host);
 	pltfm_host = sdhci_priv(host);
 
+	host->dma_mask = soc_data->dma_mask;
+
 	tegra_host = devm_kzalloc(&pdev->dev, sizeof(*tegra_host), GFP_KERNEL);
 	if (!tegra_host) {
 		dev_err(mmc_dev(host->mmc), "failed to allocate tegra_host\n");