diff mbox

[07/11] fsmc/nand: Provide contiguous buffers to dma

Message ID 2b88c853b3691338fae037f569917fc300cd6032.1349778821.git.vipin.kumar@st.com
State New, archived
Headers show

Commit Message

Vipin Kumar Oct. 9, 2012, 10:44 a.m. UTC
read_buf/write_buf callbacks should be able to accept a user space memory
address (virtually contiguous memory) as buffer pointer.

This patch allocates a logically contiguous memory area which is use for dma
xfers during read and write accesses.

Signed-off-by: Vipin Kumar <vipin.kumar@st.com>
---
 drivers/mtd/nand/fsmc_nand.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

Comments

Viresh Kumar Oct. 9, 2012, 5:41 p.m. UTC | #1
On Tue, Oct 9, 2012 at 4:14 PM, Vipin Kumar <vipin.kumar@st.com> wrote:
> diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
> @@ -675,7 +676,8 @@ static void fsmc_read_buf_dma(struct mtd_info *mtd, uint8_t *buf, int len)
>         struct fsmc_nand_data *host;
>
>         host = container_of(mtd, struct fsmc_nand_data, mtd);
> -       dma_xfer(host, buf, len, DMA_FROM_DEVICE);
> +       dma_xfer(host, host->dma_buf, len, DMA_FROM_DEVICE);
> +       memcpy(buf, (const void *)host->dma_buf, len);

Ahh.. Too much overhead. Can't you do something better here?
- DMA can be done to user buffers too, Pratyush has done some work in
past on this.
- There must be some other way of sharing kernel buffer to user space.

--
viresh
Linus Walleij Oct. 10, 2012, 5:07 p.m. UTC | #2
On Tue, Oct 9, 2012 at 12:44 PM, Vipin Kumar <vipin.kumar@st.com> wrote:

> read_buf/write_buf callbacks should be able to accept a user space memory
> address (virtually contiguous memory) as buffer pointer.
>
> This patch allocates a logically contiguous memory area which is use for dma

You mean PHYSICALLY contigous, don't you?

> xfers during read and write accesses.
>
> Signed-off-by: Vipin Kumar <vipin.kumar@st.com>

If you really want a physically contigous buffer you need to use
CMA, but I don't think that is the real problem here...

We're already using userspace buffers in e.g. the MMCI driver
(drivers/mmc/host/mmci.c).

The real problem is likely the DMA driver. The stuf that get
fed into dma.device_prep_dma_memcpy() needs to be
converted to a scatterlist and then set up in the LLI list
for the controller.

IIRC SPEAr is using drivers/dma/dw_dmac.c so
check this driver's dwc_prep_dma_memcpy().
It does seem like it is checking whether src or
dest is scattered in this for() loop:

for (offset = 0; offset < len; offset += xfer_count << src_width) {}

dma_sync_single_for_device() is translating the virtual
address to physical for every chunk BTW.

So instead of doing this copying, debug the problem, and
see if there is a bug in that for()-loop or similar, if it needs
to be rewritten or so.

Yours,
Linus Walleij
Viresh Kumar Oct. 11, 2012, 3:16 a.m. UTC | #3
On Wed, Oct 10, 2012 at 10:37 PM, Linus Walleij
<linus.walleij@linaro.org> wrote:
> The real problem is likely the DMA driver. The stuf that get
> fed into dma.device_prep_dma_memcpy() needs to be
> converted to a scatterlist and then set up in the LLI list
> for the controller.
>
> IIRC SPEAr is using drivers/dma/dw_dmac.c so

Both dw_dmac and pl080 for different SPEAr SoC's.

> check this driver's dwc_prep_dma_memcpy().
> It does seem like it is checking whether src or
> dest is scattered in this for() loop:
>
> for (offset = 0; offset < len; offset += xfer_count << src_width) {}
>
> dma_sync_single_for_device() is translating the virtual
> address to physical for every chunk BTW.
>
> So instead of doing this copying, debug the problem, and
> see if there is a bug in that for()-loop or similar, if it needs
> to be rewritten or so.

But what's the problem you saw vipin, for which you generated this patch?
Vipin Kumar Oct. 11, 2012, 4:07 a.m. UTC | #4
On 10/11/2012 8:46 AM, viresh kumar wrote:
> On Wed, Oct 10, 2012 at 10:37 PM, Linus Walleij
> <linus.walleij@linaro.org>  wrote:
>> The real problem is likely the DMA driver. The stuf that get
>> fed into dma.device_prep_dma_memcpy() needs to be
>> converted to a scatterlist and then set up in the LLI list
>> for the controller.
>>
>> IIRC SPEAr is using drivers/dma/dw_dmac.c so
>
> Both dw_dmac and pl080 for different SPEAr SoC's.
>
>> check this driver's dwc_prep_dma_memcpy().
>> It does seem like it is checking whether src or
>> dest is scattered in this for() loop:
>>
>> for (offset = 0; offset<  len; offset += xfer_count<<  src_width) {}
>>
>> dma_sync_single_for_device() is translating the virtual
>> address to physical for every chunk BTW.
>>
>> So instead of doing this copying, debug the problem, and
>> see if there is a bug in that for()-loop or similar, if it needs
>> to be rewritten or so.
>
> But what's the problem you saw vipin, for which you generated this patch?
> .
>

The nand tests were failing and that was because of a user pointer being 
passed to them but it was a long time back and this patch just carried 
on in the local repo. It is only now that I am sending it

It seems may be the dma drivers also got updated so it needs another 
test cycle it. I will do the needful and re-reply back on this mail

Vipin
Vipin Kumar Oct. 11, 2012, 4:08 a.m. UTC | #5
On 10/10/2012 10:37 PM, Linus Walleij wrote:
> On Tue, Oct 9, 2012 at 12:44 PM, Vipin Kumar<vipin.kumar@st.com>  wrote:
>
>> read_buf/write_buf callbacks should be able to accept a user space memory
>> address (virtually contiguous memory) as buffer pointer.
>>
>> This patch allocates a logically contiguous memory area which is use for dma
>
> You mean PHYSICALLY contigous, don't you?
>

Yes Sorry for that :)

>> xfers during read and write accesses.
>>
>> Signed-off-by: Vipin Kumar<vipin.kumar@st.com>
>
> If you really want a physically contigous buffer you need to use
> CMA, but I don't think that is the real problem here...
>
> We're already using userspace buffers in e.g. the MMCI driver
> (drivers/mmc/host/mmci.c).
>
> The real problem is likely the DMA driver. The stuf that get
> fed into dma.device_prep_dma_memcpy() needs to be
> converted to a scatterlist and then set up in the LLI list
> for the controller.
>
> IIRC SPEAr is using drivers/dma/dw_dmac.c so
> check this driver's dwc_prep_dma_memcpy().
> It does seem like it is checking whether src or
> dest is scattered in this for() loop:
>
> for (offset = 0; offset<  len; offset += xfer_count<<  src_width) {}
>
> dma_sync_single_for_device() is translating the virtual
> address to physical for every chunk BTW.
>
> So instead of doing this copying, debug the problem, and
> see if there is a bug in that for()-loop or similar, if it needs
> to be rewritten or so.
>

I would debug again and reply to this mail soon

> Yours,
> Linus Walleij
> .
>
Viresh Kumar Oct. 11, 2012, 4:15 a.m. UTC | #6
On Wed, Oct 10, 2012 at 10:37 PM, Linus Walleij
<linus.walleij@linaro.org> wrote:
> On Tue, Oct 9, 2012 at 12:44 PM, Vipin Kumar <vipin.kumar@st.com> wrote:

> The real problem is likely the DMA driver. The stuf that get
> fed into dma.device_prep_dma_memcpy() needs to be
> converted to a scatterlist and then set up in the LLI list
> for the controller.
>
> IIRC SPEAr is using drivers/dma/dw_dmac.c so
> check this driver's dwc_prep_dma_memcpy().
> It does seem like it is checking whether src or
> dest is scattered in this for() loop:
>
> for (offset = 0; offset < len; offset += xfer_count << src_width) {}
>
> dma_sync_single_for_device() is translating the virtual
> address to physical for every chunk BTW.

I pray that i am wrong here, otherwise i would be thrown out from
the maintainers list for this driver :)

dma_sync_single_for_device() is not doing anything on the buffer, but
on the LLI item. Actually it is flushing LLI struct so that DMA h/w can get
the correct values.

dwc_prep_dma_memcpy() doesn't expect a virtual address, look at type
of src & dest bufs: dma_addr_t. It is responsibility of user drivers to pass
physically contiguous address to it.

--
viresh
Linus Walleij Oct. 11, 2012, 4:06 p.m. UTC | #7
On Thu, Oct 11, 2012 at 6:15 AM, viresh kumar <viresh.kumar@linaro.org> wrote:
> On Wed, Oct 10, 2012 at 10:37 PM, Linus Walleij
> <linus.walleij@linaro.org> wrote:

>> dma_sync_single_for_device() is translating the virtual
>> address to physical for every chunk BTW.
>
> I pray that i am wrong here, otherwise i would be thrown out from
> the maintainers list for this driver :)
>
> dma_sync_single_for_device() is not doing anything on the buffer, but
> on the LLI item. Actually it is flushing LLI struct so that DMA h/w can get
> the correct values.

Sorry no, I'm the one who's wrong...

So the DMA engine memcpy() is not mapping virt->phys
but expects physical addresses to be provided.

So dma_map_single() needs to be called on the stuff
passed in to dev->device_prep_dma_memcpy().

And currently there is indeed a dma_map_single() in
dma_xfer() in fsmc_nand.c which should work just fine.

dma_map_single() will only work if the buffer is
physically contiguous.

And the block layer of the subsystem should take care
of only handing the driver buffers that are contiguous
I think? Not that I'm an expert here ... more some
guesswork :-/

Artem will know I hope!

Yours,
Linus Walleij
Viresh Kumar Oct. 11, 2012, 5:07 p.m. UTC | #8
On Thu, Oct 11, 2012 at 9:36 PM, Linus Walleij <linus.walleij@linaro.org> wrote:
> On Thu, Oct 11, 2012 at 6:15 AM, viresh kumar <viresh.kumar@linaro.org> wrote:
>> I pray that i am wrong here, otherwise i would be thrown out from

s/am/am not/ :(

>> the maintainers list for this driver :)
>>
>> dma_sync_single_for_device() is not doing anything on the buffer, but
>> on the LLI item. Actually it is flushing LLI struct so that DMA h/w can get
>> the correct values.
>
> Sorry no, I'm the one who's wrong...

Glad to hear that. :)
It happens very few times in one's lifetime, that a beginner like me is correct
and an expert like you is not. Just kidding :)

> And the block layer of the subsystem should take care
> of only handing the driver buffers that are contiguous

But why should that be a constraint on block layer? It is working in virtual
space and is concerned about that onlly.

--
viresh
Linus Walleij Oct. 11, 2012, 9:51 p.m. UTC | #9
On Thu, Oct 11, 2012 at 7:07 PM, viresh kumar <viresh.kumar@linaro.org> wrote:
> On Thu, Oct 11, 2012 at 9:36 PM, Linus Walleij <linus.walleij@linaro.org> wrote:
>>
>> And the block layer of the subsystem should take care
>> of only handing the driver buffers that are contiguous
>
> But why should that be a constraint on block layer? It is working in virtual
> space and is concerned about that onlly.

Not the block layer, the block layer of the subsystem.

It has to pick out the chunks that get transferred down to the drivers.

For example drivers/mmc/card/block.c will convert transfers into
sglists and pass down to the MMC drivers. mmc_queue_map_sg()
calls blk_rq_map_sg() to do the job. It's even documented in
Documentation/block/biodoc.txt

I don't know how that works in MTD...

Yours,
Linus Walleij
Vipin Kumar Oct. 12, 2012, 3:55 a.m. UTC | #10
On 10/11/2012 9:36 PM, Linus Walleij wrote:
> On Thu, Oct 11, 2012 at 6:15 AM, viresh kumar<viresh.kumar@linaro.org>  wrote:
>> On Wed, Oct 10, 2012 at 10:37 PM, Linus Walleij
>> <linus.walleij@linaro.org>  wrote:
>
>>> dma_sync_single_for_device() is translating the virtual
>>> address to physical for every chunk BTW.
>>
>> I pray that i am wrong here, otherwise i would be thrown out from
>> the maintainers list for this driver :)
>>
>> dma_sync_single_for_device() is not doing anything on the buffer, but
>> on the LLI item. Actually it is flushing LLI struct so that DMA h/w can get
>> the correct values.
>
> Sorry no, I'm the one who's wrong...
>
> So the DMA engine memcpy() is not mapping virt->phys
> but expects physical addresses to be provided.
>
> So dma_map_single() needs to be called on the stuff
> passed in to dev->device_prep_dma_memcpy().
>
> And currently there is indeed a dma_map_single() in
> dma_xfer() in fsmc_nand.c which should work just fine.
>
> dma_map_single() will only work if the buffer is
> physically contiguous.
>
> And the block layer of the subsystem should take care
> of only handing the driver buffers that are contiguous
> I think? Not that I'm an expert here ... more some
> guesswork :-/

The buffers provided to the driver are actually user buffers. The reason 
I say that is because the generic nand test modules eg 
drivers/mtd/nand/mtd_stresstest.c calls mtd->_read with a user buffer as 
an argument

This same buffer directly trickles down to the driver

Artem, should we clearly cast this buffer as a user pointer instead of 
just a 'uint8_t *'.

Regards
Vipin
Artem Bityutskiy Oct. 15, 2012, 1:18 p.m. UTC | #11
On Fri, 2012-10-12 at 09:25 +0530, Vipin Kumar wrote:
> The buffers provided to the driver are actually user buffers. The reason 
> I say that is because the generic nand test modules eg 
> drivers/mtd/nand/mtd_stresstest.c calls mtd->_read with a user buffer as 
> an argument

I am not sure what does "user" buffers mean, but they are vmalloced()
buffer, not kmalloc()'ed, so they are not physically contiguous.

> This same buffer directly trickles down to the driver
> 
> Artem, should we clearly cast this buffer as a user pointer instead of 
> just a 'uint8_t *'.

They are not "_user", they are really kernel buffers. Or what do you
mean, which exactly type do you suggest?

This stuff is something many people are bringing up for many years
already. Drivers that do DMA do not cope with vmalloc()ed memory well,
and we grew a number of hacks in several drives. I mean, hacks like the
one you are introducing to your driver.

I'd solve the problem by changing the in-kernel mtd users to use
physically-contiguous memory instead. The following are the users I can
think of:

UBI, UBIFS, JFFS2, mtdtests and probably mtdswap.

They use vmalloc() when they need to read/write entire eraseblock, which
is usually 128KiB or 256KiB, and kmalloc() that much may fail if the
memory is fragmented. 

In many cases, it is easy to read/write in smaller chunk, but several
times. E.g., mtdtests could be changed.

In some cases, it is not easy, though.

First thing which comes to mind is that in modern kernels memory
fragmentation is not that big issue as it used to be. So may be
kmalloc() the memory is not that bad nowadays? We have page migration,
memory compaction, etc?

I'd really prefer to just switch to kmalloc() everywhere instead of
adding hacks like this to the drivers. Then if this is a problem for
someone, he can fix it by either switching to smaller buffers (possible
in many places), or by improving memory fragmentation issues on his
system, or by just using CMA.

We can even have an mtd-wide funcion which will try kmalloc(), and if
that fails, fall-back to CMA.


Then we can add a guard check to all mtd function which accept a buffer
and WARN() if it not physically contiguous.
Brian Norris Oct. 15, 2012, 4:27 p.m. UTC | #12
On Mon, Oct 15, 2012 at 6:18 AM, Artem Bityutskiy <dedekind1@gmail.com> wrote:
> First thing which comes to mind is that in modern kernels memory
> fragmentation is not that big issue as it used to be. So may be
> kmalloc() the memory is not that bad nowadays? We have page migration,
> memory compaction, etc?
>
> I'd really prefer to just switch to kmalloc() everywhere instead of
> adding hacks like this to the drivers. Then if this is a problem for
> someone, he can fix it by either switching to smaller buffers (possible
> in many places), or by improving memory fragmentation issues on his
> system, or by just using CMA.

I think I can suggest that this is already a problem on real systems.
In bringing up a board on v3.3 kernel, I experienced a kernel memory
allocation error when trying to memdup_user() in eraseblock-size
regions. I believe I would experience more of these if all
eraseblock-sized buffers were kmalloc()'d.

See the commit description for the following commit in mtd-utils.git:

    commit 71c76e74661492b4f68f670514866cfc85f47089
    libmtd: fix mtd_write() issues for large data-only writes

> We can even have an mtd-wide funcion which will try kmalloc(), and if
> that fails, fall-back to CMA.

I would prefer not building a solution that hopes kmalloc() can get a
large contiguous buffer (remember, eraseblock sizes come as large as
2MB these days). A real solution like CMA or scatter-gather seems like
a better idea.

Brian
Linus Walleij Oct. 15, 2012, 7:51 p.m. UTC | #13
On Mon, Oct 15, 2012 at 6:27 PM, Brian Norris
<computersforpeace@gmail.com> wrote:

> I would prefer not building a solution that hopes kmalloc() can get a
> large contiguous buffer (remember, eraseblock sizes come as large as
> 2MB these days). A real solution like CMA or scatter-gather seems like
> a better idea.

+1 on that, and as I think I illustrated the MMC subsystem is using
the block layer helpers to form scatter-gather lists for it's requests.
I don't see why the MTD subsystem need to be very different?

Yours,
Linus Walleij
Artem Bityutskiy Oct. 16, 2012, 7:11 a.m. UTC | #14
On Mon, 2012-10-15 at 09:27 -0700, Brian Norris wrote:
> > I'd really prefer to just switch to kmalloc() everywhere instead of
> > adding hacks like this to the drivers. Then if this is a problem for
> > someone, he can fix it by either switching to smaller buffers (possible
> > in many places), or by improving memory fragmentation issues on his
> > system, or by just using CMA.
> 
> I think I can suggest that this is already a problem on real systems.
> In bringing up a board on v3.3 kernel, I experienced a kernel memory
> allocation error when trying to memdup_user() in eraseblock-size
> regions. I believe I would experience more of these if all
> eraseblock-sized buffers were kmalloc()'d.
> 
> See the commit description for the following commit in mtd-utils.git:
> 
>     commit 71c76e74661492b4f68f670514866cfc85f47089
>     libmtd: fix mtd_write() issues for large data-only writes

Well, this is another work-around. The better way to fix this would be
to change kernel's 'mtdchar_write_ioctl()' to be iterative and avoid
calling 'memdup_user(len)' for arbitrary 'len' passed from user-space.

> I would prefer not building a solution that hopes kmalloc() can get a
> large contiguous buffer (remember, eraseblock sizes come as large as
> 2MB these days). A real solution like CMA or scatter-gather seems like
> a better idea.

Me too. But this does not happen. People bring this up for years. Of
course it is easier to hack drivers, and very understandable. So what
I would like to do is somehow force people to fix this issue.

So I thought about something like introducing an mtd_alloc() which
would:
   a. Try doing CMA allocation: dma_alloc_from_contiguous()
   b. If it fails, use kmalloc().

This function would probably need a cookie which it returns and which
the mtd_free() function would use to call either kfree() or
dma_release_from_contiguous()...

If both fail - bad luck. Go look at the code and switch to S-G lists
instead of allocating contiguous regions. As I said, in many places it
is easy to do, and there are few places when you would need to spend a
bit more time.

How does this sound to you?
Artem Bityutskiy Oct. 16, 2012, 7:14 a.m. UTC | #15
On Mon, 2012-10-15 at 21:51 +0200, Linus Walleij wrote:
> On Mon, Oct 15, 2012 at 6:27 PM, Brian Norris
> <computersforpeace@gmail.com> wrote:
> 
> > I would prefer not building a solution that hopes kmalloc() can get a
> > large contiguous buffer (remember, eraseblock sizes come as large as
> > 2MB these days). A real solution like CMA or scatter-gather seems like
> > a better idea.
> 
> +1 on that, and as I think I illustrated the MMC subsystem is using
> the block layer helpers to form scatter-gather lists for it's requests.
> I don't see why the MTD subsystem need to be very different?

This is good idea, the problem is that it is a bit more work than
hacking the driver, so most people prefer the latter. By removing all
kmalloc()'s I basically wish to force people to go this way.

What are the SC functions names you refer?
Linus Walleij Oct. 16, 2012, 10:05 a.m. UTC | #16
On Tue, Oct 16, 2012 at 9:14 AM, Artem Bityutskiy <dedekind1@gmail.com> wrote:
> On Mon, 2012-10-15 at 21:51 +0200, Linus Walleij wrote:
>> On Mon, Oct 15, 2012 at 6:27 PM, Brian Norris
>> <computersforpeace@gmail.com> wrote:
>>
>> > I would prefer not building a solution that hopes kmalloc() can get a
>> > large contiguous buffer (remember, eraseblock sizes come as large as
>> > 2MB these days). A real solution like CMA or scatter-gather seems like
>> > a better idea.
>>
>> +1 on that, and as I think I illustrated the MMC subsystem is using
>> the block layer helpers to form scatter-gather lists for it's requests.
>> I don't see why the MTD subsystem need to be very different?
>
> This is good idea, the problem is that it is a bit more work than
> hacking the driver, so most people prefer the latter. By removing all
> kmalloc()'s I basically wish to force people to go this way.
>
> What are the SC functions names you refer?

I'm mainly thinking of blk_rq_map_sg()
from block/blk-merge.c

Yours,
Linus Walleij
Brian Norris Oct. 21, 2012, 7:21 a.m. UTC | #17
On Tue, Oct 16, 2012 at 12:11 AM, Artem Bityutskiy <dedekind1@gmail.com> wrote:
> On Mon, 2012-10-15 at 09:27 -0700, Brian Norris wrote:
>> See the commit description for the following commit in mtd-utils.git:
>>
>>     commit 71c76e74661492b4f68f670514866cfc85f47089
>>     libmtd: fix mtd_write() issues for large data-only writes
>
> Well, this is another work-around.

Yeah, I know it was a workaround. It's another issue of balancing time
and the "best" solution. Of course, the solutions actually don't seem
that difficult now, so I probably should have just fixed the ioctl()
in the first place... But I was simply pointing this out as evidence
of a real memory-fragmentation issue.

> The better way to fix this would be
> to change kernel's 'mtdchar_write_ioctl()' to be iterative and avoid
> calling 'memdup_user(len)' for arbitrary 'len' passed from user-space.

That's one way, but it would require specifying some arbitrary
iteration length, right? Like some multiple of writesize?

Instead, how about modelling it more closely after mtdchar_write(),
using mtd_kmalloc_up_to(), then iteratively call copy_from_user()?

> So I thought about something like introducing an mtd_alloc() which
> would:
>    a. Try doing CMA allocation: dma_alloc_from_contiguous()
>    b. If it fails, use kmalloc().
...
> If both fail - bad luck. Go look at the code and switch to S-G lists
> instead of allocating contiguous regions. As I said, in many places it
> is easy to do, and there are few places when you would need to spend a
> bit more time.
>
> How does this sound to you?

It sounds OK, but that depends on the success rate of
dma_alloc_from_contiguous(). I'm not familiar with it.

Brian
Brian Norris Oct. 21, 2012, 7:38 a.m. UTC | #18
On Tue, Oct 16, 2012 at 3:05 AM, Linus Walleij <linus.walleij@linaro.org> wrote:
> On Tue, Oct 16, 2012 at 9:14 AM, Artem Bityutskiy <dedekind1@gmail.com> wrote:
>> On Mon, 2012-10-15 at 21:51 +0200, Linus Walleij wrote:
>>> On Mon, Oct 15, 2012 at 6:27 PM, Brian Norris
>>> <computersforpeace@gmail.com> wrote:
>>>
>>> > I would prefer not building a solution that hopes kmalloc() can get a
>>> > large contiguous buffer (remember, eraseblock sizes come as large as
>>> > 2MB these days). A real solution like CMA or scatter-gather seems like
>>> > a better idea.
>>>
>>> +1 on that, and as I think I illustrated the MMC subsystem is using
>>> the block layer helpers to form scatter-gather lists for it's requests.
>>> I don't see why the MTD subsystem need to be very different?
>>
>> This is good idea, the problem is that it is a bit more work than
>> hacking the driver, so most people prefer the latter. By removing all
>> kmalloc()'s I basically wish to force people to go this way.

Did you mean vmalloc()'s?

>> What are the SC functions names you refer?
>
> I'm mainly thinking of blk_rq_map_sg()
> from block/blk-merge.c

I am also interested in the potential for the S-G approach. I'll see
what kind of work it takes to implement soon, hopefully.

As a side note, my hardware can, in fact, perform scatter-gather DMA.
If we were to support both S-G hardware like mine in addition to
standard, page-at-time access (nand_chip->write_page, for example),
could we introduce a DMA interface to nand_chip? Then this interface
would just be set to NULL for those that can't handle it.

Brian
Artem Bityutskiy Oct. 21, 2012, 11 a.m. UTC | #19
On Sun, 2012-10-21 at 00:38 -0700, Brian Norris wrote:
> >> This is good idea, the problem is that it is a bit more work than
> >> hacking the driver, so most people prefer the latter. By removing all
> >> kmalloc()'s I basically wish to force people to go this way.
> 
> Did you mean vmalloc()'s?

Yes.

> 
> >> What are the SC functions names you refer?
> >
> > I'm mainly thinking of blk_rq_map_sg()
> > from block/blk-merge.c

Yeah, may be re-working or extending MTD api with SG lists would be the
way to go, but this is a lot more work...
Artem Bityutskiy Oct. 21, 2012, 12:02 p.m. UTC | #20
On Sun, 2012-10-21 at 14:00 +0300, Artem Bityutskiy wrote:
> > > I'm mainly thinking of blk_rq_map_sg()
> > > from block/blk-merge.c
> 
> Yeah, may be re-working or extending MTD api with SG lists would be the
> way to go, but this is a lot more work...

Sorry, it would be correct to say that re-working MTD API _users_ to
incorporate the SG API would be a lot more work.
diff mbox

Patch

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 4b29a64..8de6dcf 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -323,6 +323,7 @@  struct fsmc_nand_data {
 	struct dma_chan		*read_dma_chan;
 	struct dma_chan		*write_dma_chan;
 	struct completion	dma_access_complete;
+	void			*dma_buf;
 
 	/* Recieved from plat data */
 	struct fsmc_rbpin	*rbpin;
@@ -675,7 +676,8 @@  static void fsmc_read_buf_dma(struct mtd_info *mtd, uint8_t *buf, int len)
 	struct fsmc_nand_data *host;
 
 	host = container_of(mtd, struct fsmc_nand_data, mtd);
-	dma_xfer(host, buf, len, DMA_FROM_DEVICE);
+	dma_xfer(host, host->dma_buf, len, DMA_FROM_DEVICE);
+	memcpy(buf, (const void *)host->dma_buf, len);
 }
 
 /*
@@ -690,7 +692,8 @@  static void fsmc_write_buf_dma(struct mtd_info *mtd, const uint8_t *buf,
 	struct fsmc_nand_data *host;
 
 	host = container_of(mtd, struct fsmc_nand_data, mtd);
-	dma_xfer(host, (void *)buf, len, DMA_TO_DEVICE);
+	memcpy(host->dma_buf, buf, len);
+	dma_xfer(host, host->dma_buf, len, DMA_TO_DEVICE);
 }
 
 /*
@@ -1133,6 +1136,13 @@  static int __init fsmc_nand_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "Unable to get write dma channel\n");
 			goto err_req_write_chnl;
 		}
+
+		host->dma_buf = kmalloc(NAND_MAX_PAGESIZE + NAND_MAX_OOBSIZE,
+				GFP_KERNEL);
+		if (!host->dma_buf) {
+			dev_err(&pdev->dev, "failed to allocate dma buffer\n");
+			goto err_req_dma_buf;
+		}
 		nand->read_buf = fsmc_read_buf_dma;
 		nand->write_buf = fsmc_write_buf_dma;
 		break;
@@ -1246,6 +1256,9 @@  static int __init fsmc_nand_probe(struct platform_device *pdev)
 err_probe:
 err_scan_ident:
 	if (host->mode == USE_DMA_ACCESS)
+		kfree(host->dma_buf);
+err_req_dma_buf:
+	if (host->mode == USE_DMA_ACCESS)
 		dma_release_channel(host->write_dma_chan);
 err_req_write_chnl:
 	if (host->mode == USE_DMA_ACCESS)