diff mbox

mtd: OneNAND: samsung: Write DMA support

Message ID 20110608101804.GA5527@july
State New, archived
Headers show

Commit Message

Kyungmin Park June 8, 2011, 10:18 a.m. UTC
From: Kyungmin Park <kyungmin.park@samsung.com>

Implement the write DMA feature. It can reduce the CPU usage when write.

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---

Comments

Kyungmin Park June 29, 2011, 7:22 a.m. UTC | #1
Hi Artem,

There's missing patch, can you add l2-mtd tree?

Thank you,
Kyungmin Park

On Wed, Jun 8, 2011 at 7:18 PM, Kyungmin Park <kmpark@infradead.org> wrote:
> From: Kyungmin Park <kyungmin.park@samsung.com>
>
> Implement the write DMA feature. It can reduce the CPU usage when write.
>
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> ---
> diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c
> index 3306b5b..f24cb6f 100644
> --- a/drivers/mtd/onenand/samsung.c
> +++ b/drivers/mtd/onenand/samsung.c
> @@ -712,6 +712,74 @@ normal:
>        return 0;
>  }
>
> +static int s5pc110_write_bufferram(struct mtd_info *mtd, int area,
> +               const unsigned char *buffer, int offset, size_t count)
> +{
> +       struct onenand_chip *this = mtd->priv;
> +       struct device *dev = &onenand->pdev->dev;
> +       void __iomem *p;
> +       void *buf = (void *) buffer;
> +       dma_addr_t dma_src, dma_dst;
> +       int err, ofs, page_dma = 0;
> +
> +       p = this->base + area;
> +       if (ONENAND_CURRENT_BUFFERRAM(this)) {
> +               if (area == ONENAND_DATARAM)
> +                       p += this->writesize;
> +               else
> +                       p += mtd->oobsize;
> +       }
> +
> +       if (count != mtd->writesize || offset & 3 || (size_t) buf & 3)
> +               goto normal;
> +
> +       /* Handle vmalloc address */
> +       if (buf >= high_memory) {
> +               struct page *page;
> +
> +               if (((size_t) buf & PAGE_MASK) !=
> +                   ((size_t) (buf + count - 1) & PAGE_MASK))
> +                       goto normal;
> +
> +               page = vmalloc_to_page(buf);
> +               if (unlikely(!page))
> +                       goto normal;
> +
> +               /* Page offset */
> +               ofs = ((size_t) buf & ~PAGE_MASK);
> +               page_dma = 1;
> +
> +               /* DMA routine */
> +               dma_src = dma_map_page(dev, page, ofs, count, DMA_TO_DEVICE);
> +               dma_dst = onenand->phys_base + (p - this->base);
> +       } else {
> +               /* DMA routine */
> +               dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE);
> +               dma_dst = onenand->phys_base + (p - this->base);
> +       }
> +
> +       if (dma_mapping_error(dev, dma_src)) {
> +               dev_err(dev, "Couldn't map a %d byte buffer for DMA\n", count);
> +               goto normal;
> +       }
> +
> +       err = s5pc110_dma_ops((void *) dma_dst, (void *) dma_src,
> +                       count, S5PC110_DMA_DIR_WRITE);
> +
> +       if (page_dma)
> +               dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE);
> +       else
> +               dma_unmap_single(dev, dma_src, count, DMA_TO_DEVICE);
> +
> +       if (!err)
> +               return 0;
> +
> +normal:
> +       memcpy(p + offset, buffer, count);
> +
> +       return 0;
> +}
> +
>  static int s5pc110_chip_probe(struct mtd_info *mtd)
>  {
>        /* Now just return 0 */
> @@ -844,6 +912,7 @@ static void s3c_onenand_setup(struct mtd_info *mtd)
>        } else if (onenand->type == TYPE_S5PC110) {
>                /* Use generic onenand functions */
>                this->read_bufferram = s5pc110_read_bufferram;
> +               this->write_bufferram = s5pc110_write_bufferram;
>                this->chip_probe = s5pc110_chip_probe;
>                return;
>        } else {
>
> ______________________________________________________
> Linux MTD discussion mailing list
> http://lists.infradead.org/mailman/listinfo/linux-mtd/
>
Artem Bityutskiy June 30, 2011, 7:21 a.m. UTC | #2
On Wed, 2011-06-29 at 16:22 +0900, Kyungmin Park wrote:
> Hi Artem,
> 
> There's missing patch, can you add l2-mtd tree?
> 
Sorry, I missed this. I have few minor comments which will come shortly,
and also I'd ask to re-send this patch with the arm mailing list in CC -
you use DMA API which I am not very good at, and I'd like others to
validate this.

Thanks!
Artem Bityutskiy June 30, 2011, 7:55 a.m. UTC | #3
On Wed, 2011-06-08 at 19:18 +0900, Kyungmin Park wrote:
> From: Kyungmin Park <kyungmin.park@samsung.com>
> 
> Implement the write DMA feature. It can reduce the CPU usage when write.
> 
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> ---
> diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c
> index 3306b5b..f24cb6f 100644
> --- a/drivers/mtd/onenand/samsung.c
> +++ b/drivers/mtd/onenand/samsung.c
> @@ -712,6 +712,74 @@ normal:
>  	return 0;
>  }
>  
> +static int s5pc110_write_bufferram(struct mtd_info *mtd, int area,
> +		const unsigned char *buffer, int offset, size_t count)
> +{
> +	struct onenand_chip *this = mtd->priv;
> +	struct device *dev = &onenand->pdev->dev;
> +	void __iomem *p;
> +	void *buf = (void *) buffer;

No, this is bad - if buffer is marked as const then it should be
constant, or remove the const modifier in the function declaration. And
when you remove the const modifier you can kill the cast, because
assigning to a void pointer does not require it. 

> +	dma_addr_t dma_src, dma_dst;
> +	int err, ofs, page_dma = 0;
> +
> +	p = this->base + area;
> +	if (ONENAND_CURRENT_BUFFERRAM(this)) {
> +		if (area == ONENAND_DATARAM)
> +			p += this->writesize;
> +		else
> +			p += mtd->oobsize;
> +	}
> +
> +	if (count != mtd->writesize || offset & 3 || (size_t) buf & 3)
> +		goto normal;
> +
> +	/* Handle vmalloc address */
> +	if (buf >= high_memory) {
> +		struct page *page;

OK, Russell will yell at this, but we do DMA vmalloc'ed addresses for
many years in the OneNAND driver and it works, and there are products
out there with this code (e.g., Nokia N900) and it works. But I'd like
to understand better why exactly this is a no-no, and why this works in
practice - were we lucky and how exactly ....

> +
> +		if (((size_t) buf & PAGE_MASK) !=
> +		    ((size_t) (buf + count - 1) & PAGE_MASK))

Something is fishy with these size_t casts, could you revisit this piece
of code - and turn it to something simpler, if possible?

> +			goto normal;
> +
> +		page = vmalloc_to_page(buf);
> +		if (unlikely(!page))
> +			goto normal;
> +
> +		/* Page offset */
> +		ofs = ((size_t) buf & ~PAGE_MASK);
> +		page_dma = 1;
> +
> +		/* DMA routine */
> +		dma_src = dma_map_page(dev, page, ofs, count, DMA_TO_DEVICE);
> +		dma_dst = onenand->phys_base + (p - this->base);
> +	} else {
> +		/* DMA routine */
> +		dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE);
> +		dma_dst = onenand->phys_base + (p - this->base);
> +	}
> +
> +	if (dma_mapping_error(dev, dma_src)) {
> +		dev_err(dev, "Couldn't map a %d byte buffer for DMA\n", count);
> +		goto normal;
> +	}
> +
> +	err = s5pc110_dma_ops((void *) dma_dst, (void *) dma_src,
> +			count, S5PC110_DMA_DIR_WRITE);

Why casting to void? Please, revise all casts you do.

> +
> +	if (page_dma)
> +		dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE);
> +	else
> +		dma_unmap_single(dev, dma_src, count, DMA_TO_DEVICE);
> +
> +	if (!err)
> +		return 0;
> +
> +normal:
> +	memcpy(p + offset, buffer, count);

p is iomem, and it cannot be involved in memcpy as Russell pointed
recently in another e-mail, see Documentation/bus-virt-phys-mapping.txt

Please, re-send this with the arm mailing list in CC.
Russell King - ARM Linux June 30, 2011, 9:19 a.m. UTC | #4
On Thu, Jun 30, 2011 at 10:55:25AM +0300, Artem Bityutskiy wrote:
> > +	/* Handle vmalloc address */
> > +	if (buf >= high_memory) {
> > +		struct page *page;
> 
> OK, Russell will yell at this, but we do DMA vmalloc'ed addresses for

I most certainly will, because its broken.

> > +
> > +		if (((size_t) buf & PAGE_MASK) !=
> > +		    ((size_t) (buf + count - 1) & PAGE_MASK))
> 
> Something is fishy with these size_t casts, could you revisit this piece
> of code - and turn it to something simpler, if possible?
> 
> > +			goto normal;
> > +
> > +		page = vmalloc_to_page(buf);
> > +		if (unlikely(!page))
> > +			goto normal;
> > +
> > +		/* Page offset */
> > +		ofs = ((size_t) buf & ~PAGE_MASK);
> > +		page_dma = 1;
> > +
> > +		/* DMA routine */
> > +		dma_src = dma_map_page(dev, page, ofs, count, DMA_TO_DEVICE);

Which is that this code is trying to work around the restriction in the
DMA API that dma_map_single() can only take virtual addresses in the
kernel direct mapped region.

The key thing here is that with CPUs which lookup by virtual address,
like ARMs, you _must_ handle the cache aliases associated with the mapping
which you are accessing the memory via.

This means if you are accessing a DMA buffer at address X, and address X
is cacheable, address X needs cache maintainence performed on it.  Address
Y, which may correspond with the same memory as X through a different
mapping is no good.  It has to be X.

However, there is NO API for mapping DMA buffers from vmalloc space.
What we do now have are a pair of functions which must be used _correctly_
(iow, one before and one after) to ensure that virtual cached architectures
can access the data correctly - and its documented at the bottom of
cachetlb.txt.

As this is something which keeps coming up in connection with MTD, it
may be a good idea if MTD gave driver authors a helping hand with DMA
setup/teardown so that the chances of driver authors getting this right
is greater...
Artem Bityutskiy June 30, 2011, 10:45 a.m. UTC | #5
Hi,

On Thu, 2011-06-30 at 10:19 +0100, Russell King - ARM Linux wrote:
> Which is that this code is trying to work around the restriction in the
> DMA API that dma_map_single() can only take virtual addresses in the
> kernel direct mapped region.

OK.

> The key thing here is that with CPUs which lookup by virtual address,
> like ARMs, you _must_ handle the cache aliases associated with the mapping
> which you are accessing the memory via.
> 
> This means if you are accessing a DMA buffer at address X, and address X
> is cacheable, address X needs cache maintainence performed on it.  Address
> Y, which may correspond with the same memory as X through a different
> mapping is no good.  It has to be X.

Jut to make sure I understand the issue (I am not good at all in this
stuff, this is why I CCed the arm ML):

* if I vmalloc a buffer, I and up with physical memory which has 2
mappings - X which is the direct kernel mapping and Y, which is the
vmalloc mapping.
* The DMA API takes care only about direct mapping in terms of CPU cache
flushing and invalidating. The vmalloc mapping is not taken care of.

And this may cause memory corruptions.

Correct?

> However, there is NO API for mapping DMA buffers from vmalloc space.
> What we do now have are a pair of functions which must be used _correctly_
> (iow, one before and one after) to ensure that virtual cached architectures
> can access the data correctly - and its documented at the bottom of
> cachetlb.txt.

OK, so you do not say it is fundamentally impossible to DMA vmalloc'ed
memory, it is just that no one bothered to invent an API for this.

> As this is something which keeps coming up in connection with MTD, it
> may be a good idea if MTD gave driver authors a helping hand with DMA
> setup/teardown so that the chances of driver authors getting this right
> is greater...

Yes, or the MTD community may push back all hacks and force someone to
invent the API...

Am I right that if we add a helper function which flushes and
invalidates CPU caches for the vmalloc mapping, and then calls the DMA
API function which will take care of the direct mapping, we should be
fine?

Thanks!

P.S.: we really need to document this issue somewhere - either on the
MTD web site or in Documentation/dma-vmalloced-memory.txt.
Russell King - ARM Linux June 30, 2011, 11:23 a.m. UTC | #6
On Thu, Jun 30, 2011 at 01:45:31PM +0300, Artem Bityutskiy wrote:
> Hi,
> 
> On Thu, 2011-06-30 at 10:19 +0100, Russell King - ARM Linux wrote:
> > Which is that this code is trying to work around the restriction in the
> > DMA API that dma_map_single() can only take virtual addresses in the
> > kernel direct mapped region.
> 
> OK.
> 
> > The key thing here is that with CPUs which lookup by virtual address,
> > like ARMs, you _must_ handle the cache aliases associated with the mapping
> > which you are accessing the memory via.
> > 
> > This means if you are accessing a DMA buffer at address X, and address X
> > is cacheable, address X needs cache maintainence performed on it.  Address
> > Y, which may correspond with the same memory as X through a different
> > mapping is no good.  It has to be X.
> 
> Jut to make sure I understand the issue (I am not good at all in this
> stuff, this is why I CCed the arm ML):
> 
> * if I vmalloc a buffer, I and up with physical memory which has 2
> mappings - X which is the direct kernel mapping and Y, which is the
> vmalloc mapping.
> * The DMA API takes care only about direct mapping in terms of CPU cache
> flushing and invalidating. The vmalloc mapping is not taken care of.
> 
> And this may cause memory corruptions.
> 
> Correct?

Yes.

> > However, there is NO API for mapping DMA buffers from vmalloc space.
> > What we do now have are a pair of functions which must be used _correctly_
> > (iow, one before and one after) to ensure that virtual cached architectures
> > can access the data correctly - and its documented at the bottom of
> > cachetlb.txt.
> 
> OK, so you do not say it is fundamentally impossible to DMA vmalloc'ed
> memory, it is just that no one bothered to invent an API for this.

It is not impossible, and XFS has had exactly the same issues - but
remember that XFS has to work with the block layer with drivers which
have no idea that the memory they're writing/reading may be also mapped
into vmalloc space.  The two functions at the bottom of cachetlb.txt were
invented to allow XFS to do this.

> > As this is something which keeps coming up in connection with MTD, it
> > may be a good idea if MTD gave driver authors a helping hand with DMA
> > setup/teardown so that the chances of driver authors getting this right
> > is greater...
> 
> Yes, or the MTD community may push back all hacks and force someone to
> invent the API...

It's probably going to be better to have a MTD block API for DMA which
provides a scatterlist to the MTD block driver, and which knows whether
the memory is from vmalloc.  When it is, it takes care of the flushing
via those two functions I referred to in cachetlb.txt.

This keeps the magic out of the individual MTD block drivers, and stops
them replicating the same mistakes time and time again - and also means
that you won't have to teach everyone about this.

It doesn't make sense for the DMA API to deal with vmalloc - MTD is
one of the very few (the only?) subsystems which does this.
Artem Bityutskiy June 30, 2011, 1:38 p.m. UTC | #7
On Thu, 2011-06-30 at 12:23 +0100, Russell King - ARM Linux wrote:
> It's probably going to be better to have a MTD block API for DMA which
> provides a scatterlist to the MTD block driver, and which knows whether
> the memory is from vmalloc.  When it is, it takes care of the flushing
> via those two functions I referred to in cachetlb.txt.
> 
> This keeps the magic out of the individual MTD block drivers, and stops
> them replicating the same mistakes time and time again - and also means
> that you won't have to teach everyone about this.

Yeah, something like this. The MTD API is not very suitable for this and
would need some rework to make sure we achieve 2 goals:

1. No need to change current MTD API users
2. Keep the vmalloced memory handling in one place and not replicate it
   in each driver.

Currently only OneNAND drivers do these hacks, so I guess Kyungmin will
suffer most...

> It doesn't make sense for the DMA API to deal with vmalloc - MTD is
> one of the very few (the only?) subsystems which does this.

OK, thank you!
diff mbox

Patch

diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c
index 3306b5b..f24cb6f 100644
--- a/drivers/mtd/onenand/samsung.c
+++ b/drivers/mtd/onenand/samsung.c
@@ -712,6 +712,74 @@  normal:
 	return 0;
 }
 
+static int s5pc110_write_bufferram(struct mtd_info *mtd, int area,
+		const unsigned char *buffer, int offset, size_t count)
+{
+	struct onenand_chip *this = mtd->priv;
+	struct device *dev = &onenand->pdev->dev;
+	void __iomem *p;
+	void *buf = (void *) buffer;
+	dma_addr_t dma_src, dma_dst;
+	int err, ofs, page_dma = 0;
+
+	p = this->base + area;
+	if (ONENAND_CURRENT_BUFFERRAM(this)) {
+		if (area == ONENAND_DATARAM)
+			p += this->writesize;
+		else
+			p += mtd->oobsize;
+	}
+
+	if (count != mtd->writesize || offset & 3 || (size_t) buf & 3)
+		goto normal;
+
+	/* Handle vmalloc address */
+	if (buf >= high_memory) {
+		struct page *page;
+
+		if (((size_t) buf & PAGE_MASK) !=
+		    ((size_t) (buf + count - 1) & PAGE_MASK))
+			goto normal;
+
+		page = vmalloc_to_page(buf);
+		if (unlikely(!page))
+			goto normal;
+
+		/* Page offset */
+		ofs = ((size_t) buf & ~PAGE_MASK);
+		page_dma = 1;
+
+		/* DMA routine */
+		dma_src = dma_map_page(dev, page, ofs, count, DMA_TO_DEVICE);
+		dma_dst = onenand->phys_base + (p - this->base);
+	} else {
+		/* DMA routine */
+		dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE);
+		dma_dst = onenand->phys_base + (p - this->base);
+	}
+
+	if (dma_mapping_error(dev, dma_src)) {
+		dev_err(dev, "Couldn't map a %d byte buffer for DMA\n", count);
+		goto normal;
+	}
+
+	err = s5pc110_dma_ops((void *) dma_dst, (void *) dma_src,
+			count, S5PC110_DMA_DIR_WRITE);
+
+	if (page_dma)
+		dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE);
+	else
+		dma_unmap_single(dev, dma_src, count, DMA_TO_DEVICE);
+
+	if (!err)
+		return 0;
+
+normal:
+	memcpy(p + offset, buffer, count);
+
+	return 0;
+}
+
 static int s5pc110_chip_probe(struct mtd_info *mtd)
 {
 	/* Now just return 0 */
@@ -844,6 +912,7 @@  static void s3c_onenand_setup(struct mtd_info *mtd)
 	} else if (onenand->type == TYPE_S5PC110) {
 		/* Use generic onenand functions */
 		this->read_bufferram = s5pc110_read_bufferram;
+		this->write_bufferram = s5pc110_write_bufferram;
 		this->chip_probe = s5pc110_chip_probe;
 		return;
 	} else {