Message ID | 20110608101804.GA5527@july |
---|---|
State | New, archived |
Headers | show |
Hi Artem, There's missing patch, can you add l2-mtd tree? Thank you, Kyungmin Park On Wed, Jun 8, 2011 at 7:18 PM, Kyungmin Park <kmpark@infradead.org> wrote: > From: Kyungmin Park <kyungmin.park@samsung.com> > > Implement the write DMA feature. It can reduce the CPU usage when write. > > Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com> > --- > diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c > index 3306b5b..f24cb6f 100644 > --- a/drivers/mtd/onenand/samsung.c > +++ b/drivers/mtd/onenand/samsung.c > @@ -712,6 +712,74 @@ normal: > return 0; > } > > +static int s5pc110_write_bufferram(struct mtd_info *mtd, int area, > + const unsigned char *buffer, int offset, size_t count) > +{ > + struct onenand_chip *this = mtd->priv; > + struct device *dev = &onenand->pdev->dev; > + void __iomem *p; > + void *buf = (void *) buffer; > + dma_addr_t dma_src, dma_dst; > + int err, ofs, page_dma = 0; > + > + p = this->base + area; > + if (ONENAND_CURRENT_BUFFERRAM(this)) { > + if (area == ONENAND_DATARAM) > + p += this->writesize; > + else > + p += mtd->oobsize; > + } > + > + if (count != mtd->writesize || offset & 3 || (size_t) buf & 3) > + goto normal; > + > + /* Handle vmalloc address */ > + if (buf >= high_memory) { > + struct page *page; > + > + if (((size_t) buf & PAGE_MASK) != > + ((size_t) (buf + count - 1) & PAGE_MASK)) > + goto normal; > + > + page = vmalloc_to_page(buf); > + if (unlikely(!page)) > + goto normal; > + > + /* Page offset */ > + ofs = ((size_t) buf & ~PAGE_MASK); > + page_dma = 1; > + > + /* DMA routine */ > + dma_src = dma_map_page(dev, page, ofs, count, DMA_TO_DEVICE); > + dma_dst = onenand->phys_base + (p - this->base); > + } else { > + /* DMA routine */ > + dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE); > + dma_dst = onenand->phys_base + (p - this->base); > + } > + > + if (dma_mapping_error(dev, dma_src)) { > + dev_err(dev, "Couldn't map a %d byte buffer for DMA\n", count); > + goto normal; > + } > + > + err = s5pc110_dma_ops((void *) dma_dst, (void *) dma_src, > + count, S5PC110_DMA_DIR_WRITE); > + > + if (page_dma) > + dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE); > + else > + dma_unmap_single(dev, dma_src, count, DMA_TO_DEVICE); > + > + if (!err) > + return 0; > + > +normal: > + memcpy(p + offset, buffer, count); > + > + return 0; > +} > + > static int s5pc110_chip_probe(struct mtd_info *mtd) > { > /* Now just return 0 */ > @@ -844,6 +912,7 @@ static void s3c_onenand_setup(struct mtd_info *mtd) > } else if (onenand->type == TYPE_S5PC110) { > /* Use generic onenand functions */ > this->read_bufferram = s5pc110_read_bufferram; > + this->write_bufferram = s5pc110_write_bufferram; > this->chip_probe = s5pc110_chip_probe; > return; > } else { > > ______________________________________________________ > Linux MTD discussion mailing list > http://lists.infradead.org/mailman/listinfo/linux-mtd/ >
On Wed, 2011-06-29 at 16:22 +0900, Kyungmin Park wrote: > Hi Artem, > > There's missing patch, can you add l2-mtd tree? > Sorry, I missed this. I have few minor comments which will come shortly, and also I'd ask to re-send this patch with the arm mailing list in CC - you use DMA API which I am not very good at, and I'd like others to validate this. Thanks!
On Wed, 2011-06-08 at 19:18 +0900, Kyungmin Park wrote: > From: Kyungmin Park <kyungmin.park@samsung.com> > > Implement the write DMA feature. It can reduce the CPU usage when write. > > Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com> > --- > diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c > index 3306b5b..f24cb6f 100644 > --- a/drivers/mtd/onenand/samsung.c > +++ b/drivers/mtd/onenand/samsung.c > @@ -712,6 +712,74 @@ normal: > return 0; > } > > +static int s5pc110_write_bufferram(struct mtd_info *mtd, int area, > + const unsigned char *buffer, int offset, size_t count) > +{ > + struct onenand_chip *this = mtd->priv; > + struct device *dev = &onenand->pdev->dev; > + void __iomem *p; > + void *buf = (void *) buffer; No, this is bad - if buffer is marked as const then it should be constant, or remove the const modifier in the function declaration. And when you remove the const modifier you can kill the cast, because assigning to a void pointer does not require it. > + dma_addr_t dma_src, dma_dst; > + int err, ofs, page_dma = 0; > + > + p = this->base + area; > + if (ONENAND_CURRENT_BUFFERRAM(this)) { > + if (area == ONENAND_DATARAM) > + p += this->writesize; > + else > + p += mtd->oobsize; > + } > + > + if (count != mtd->writesize || offset & 3 || (size_t) buf & 3) > + goto normal; > + > + /* Handle vmalloc address */ > + if (buf >= high_memory) { > + struct page *page; OK, Russell will yell at this, but we do DMA vmalloc'ed addresses for many years in the OneNAND driver and it works, and there are products out there with this code (e.g., Nokia N900) and it works. But I'd like to understand better why exactly this is a no-no, and why this works in practice - were we lucky and how exactly .... > + > + if (((size_t) buf & PAGE_MASK) != > + ((size_t) (buf + count - 1) & PAGE_MASK)) Something is fishy with these size_t casts, could you revisit this piece of code - and turn it to something simpler, if possible? > + goto normal; > + > + page = vmalloc_to_page(buf); > + if (unlikely(!page)) > + goto normal; > + > + /* Page offset */ > + ofs = ((size_t) buf & ~PAGE_MASK); > + page_dma = 1; > + > + /* DMA routine */ > + dma_src = dma_map_page(dev, page, ofs, count, DMA_TO_DEVICE); > + dma_dst = onenand->phys_base + (p - this->base); > + } else { > + /* DMA routine */ > + dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE); > + dma_dst = onenand->phys_base + (p - this->base); > + } > + > + if (dma_mapping_error(dev, dma_src)) { > + dev_err(dev, "Couldn't map a %d byte buffer for DMA\n", count); > + goto normal; > + } > + > + err = s5pc110_dma_ops((void *) dma_dst, (void *) dma_src, > + count, S5PC110_DMA_DIR_WRITE); Why casting to void? Please, revise all casts you do. > + > + if (page_dma) > + dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE); > + else > + dma_unmap_single(dev, dma_src, count, DMA_TO_DEVICE); > + > + if (!err) > + return 0; > + > +normal: > + memcpy(p + offset, buffer, count); p is iomem, and it cannot be involved in memcpy as Russell pointed recently in another e-mail, see Documentation/bus-virt-phys-mapping.txt Please, re-send this with the arm mailing list in CC.
On Thu, Jun 30, 2011 at 10:55:25AM +0300, Artem Bityutskiy wrote: > > + /* Handle vmalloc address */ > > + if (buf >= high_memory) { > > + struct page *page; > > OK, Russell will yell at this, but we do DMA vmalloc'ed addresses for I most certainly will, because its broken. > > + > > + if (((size_t) buf & PAGE_MASK) != > > + ((size_t) (buf + count - 1) & PAGE_MASK)) > > Something is fishy with these size_t casts, could you revisit this piece > of code - and turn it to something simpler, if possible? > > > + goto normal; > > + > > + page = vmalloc_to_page(buf); > > + if (unlikely(!page)) > > + goto normal; > > + > > + /* Page offset */ > > + ofs = ((size_t) buf & ~PAGE_MASK); > > + page_dma = 1; > > + > > + /* DMA routine */ > > + dma_src = dma_map_page(dev, page, ofs, count, DMA_TO_DEVICE); Which is that this code is trying to work around the restriction in the DMA API that dma_map_single() can only take virtual addresses in the kernel direct mapped region. The key thing here is that with CPUs which lookup by virtual address, like ARMs, you _must_ handle the cache aliases associated with the mapping which you are accessing the memory via. This means if you are accessing a DMA buffer at address X, and address X is cacheable, address X needs cache maintainence performed on it. Address Y, which may correspond with the same memory as X through a different mapping is no good. It has to be X. However, there is NO API for mapping DMA buffers from vmalloc space. What we do now have are a pair of functions which must be used _correctly_ (iow, one before and one after) to ensure that virtual cached architectures can access the data correctly - and its documented at the bottom of cachetlb.txt. As this is something which keeps coming up in connection with MTD, it may be a good idea if MTD gave driver authors a helping hand with DMA setup/teardown so that the chances of driver authors getting this right is greater...
Hi, On Thu, 2011-06-30 at 10:19 +0100, Russell King - ARM Linux wrote: > Which is that this code is trying to work around the restriction in the > DMA API that dma_map_single() can only take virtual addresses in the > kernel direct mapped region. OK. > The key thing here is that with CPUs which lookup by virtual address, > like ARMs, you _must_ handle the cache aliases associated with the mapping > which you are accessing the memory via. > > This means if you are accessing a DMA buffer at address X, and address X > is cacheable, address X needs cache maintainence performed on it. Address > Y, which may correspond with the same memory as X through a different > mapping is no good. It has to be X. Jut to make sure I understand the issue (I am not good at all in this stuff, this is why I CCed the arm ML): * if I vmalloc a buffer, I and up with physical memory which has 2 mappings - X which is the direct kernel mapping and Y, which is the vmalloc mapping. * The DMA API takes care only about direct mapping in terms of CPU cache flushing and invalidating. The vmalloc mapping is not taken care of. And this may cause memory corruptions. Correct? > However, there is NO API for mapping DMA buffers from vmalloc space. > What we do now have are a pair of functions which must be used _correctly_ > (iow, one before and one after) to ensure that virtual cached architectures > can access the data correctly - and its documented at the bottom of > cachetlb.txt. OK, so you do not say it is fundamentally impossible to DMA vmalloc'ed memory, it is just that no one bothered to invent an API for this. > As this is something which keeps coming up in connection with MTD, it > may be a good idea if MTD gave driver authors a helping hand with DMA > setup/teardown so that the chances of driver authors getting this right > is greater... Yes, or the MTD community may push back all hacks and force someone to invent the API... Am I right that if we add a helper function which flushes and invalidates CPU caches for the vmalloc mapping, and then calls the DMA API function which will take care of the direct mapping, we should be fine? Thanks! P.S.: we really need to document this issue somewhere - either on the MTD web site or in Documentation/dma-vmalloced-memory.txt.
On Thu, Jun 30, 2011 at 01:45:31PM +0300, Artem Bityutskiy wrote: > Hi, > > On Thu, 2011-06-30 at 10:19 +0100, Russell King - ARM Linux wrote: > > Which is that this code is trying to work around the restriction in the > > DMA API that dma_map_single() can only take virtual addresses in the > > kernel direct mapped region. > > OK. > > > The key thing here is that with CPUs which lookup by virtual address, > > like ARMs, you _must_ handle the cache aliases associated with the mapping > > which you are accessing the memory via. > > > > This means if you are accessing a DMA buffer at address X, and address X > > is cacheable, address X needs cache maintainence performed on it. Address > > Y, which may correspond with the same memory as X through a different > > mapping is no good. It has to be X. > > Jut to make sure I understand the issue (I am not good at all in this > stuff, this is why I CCed the arm ML): > > * if I vmalloc a buffer, I and up with physical memory which has 2 > mappings - X which is the direct kernel mapping and Y, which is the > vmalloc mapping. > * The DMA API takes care only about direct mapping in terms of CPU cache > flushing and invalidating. The vmalloc mapping is not taken care of. > > And this may cause memory corruptions. > > Correct? Yes. > > However, there is NO API for mapping DMA buffers from vmalloc space. > > What we do now have are a pair of functions which must be used _correctly_ > > (iow, one before and one after) to ensure that virtual cached architectures > > can access the data correctly - and its documented at the bottom of > > cachetlb.txt. > > OK, so you do not say it is fundamentally impossible to DMA vmalloc'ed > memory, it is just that no one bothered to invent an API for this. It is not impossible, and XFS has had exactly the same issues - but remember that XFS has to work with the block layer with drivers which have no idea that the memory they're writing/reading may be also mapped into vmalloc space. The two functions at the bottom of cachetlb.txt were invented to allow XFS to do this. > > As this is something which keeps coming up in connection with MTD, it > > may be a good idea if MTD gave driver authors a helping hand with DMA > > setup/teardown so that the chances of driver authors getting this right > > is greater... > > Yes, or the MTD community may push back all hacks and force someone to > invent the API... It's probably going to be better to have a MTD block API for DMA which provides a scatterlist to the MTD block driver, and which knows whether the memory is from vmalloc. When it is, it takes care of the flushing via those two functions I referred to in cachetlb.txt. This keeps the magic out of the individual MTD block drivers, and stops them replicating the same mistakes time and time again - and also means that you won't have to teach everyone about this. It doesn't make sense for the DMA API to deal with vmalloc - MTD is one of the very few (the only?) subsystems which does this.
On Thu, 2011-06-30 at 12:23 +0100, Russell King - ARM Linux wrote: > It's probably going to be better to have a MTD block API for DMA which > provides a scatterlist to the MTD block driver, and which knows whether > the memory is from vmalloc. When it is, it takes care of the flushing > via those two functions I referred to in cachetlb.txt. > > This keeps the magic out of the individual MTD block drivers, and stops > them replicating the same mistakes time and time again - and also means > that you won't have to teach everyone about this. Yeah, something like this. The MTD API is not very suitable for this and would need some rework to make sure we achieve 2 goals: 1. No need to change current MTD API users 2. Keep the vmalloced memory handling in one place and not replicate it in each driver. Currently only OneNAND drivers do these hacks, so I guess Kyungmin will suffer most... > It doesn't make sense for the DMA API to deal with vmalloc - MTD is > one of the very few (the only?) subsystems which does this. OK, thank you!
diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index 3306b5b..f24cb6f 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -712,6 +712,74 @@ normal: return 0; } +static int s5pc110_write_bufferram(struct mtd_info *mtd, int area, + const unsigned char *buffer, int offset, size_t count) +{ + struct onenand_chip *this = mtd->priv; + struct device *dev = &onenand->pdev->dev; + void __iomem *p; + void *buf = (void *) buffer; + dma_addr_t dma_src, dma_dst; + int err, ofs, page_dma = 0; + + p = this->base + area; + if (ONENAND_CURRENT_BUFFERRAM(this)) { + if (area == ONENAND_DATARAM) + p += this->writesize; + else + p += mtd->oobsize; + } + + if (count != mtd->writesize || offset & 3 || (size_t) buf & 3) + goto normal; + + /* Handle vmalloc address */ + if (buf >= high_memory) { + struct page *page; + + if (((size_t) buf & PAGE_MASK) != + ((size_t) (buf + count - 1) & PAGE_MASK)) + goto normal; + + page = vmalloc_to_page(buf); + if (unlikely(!page)) + goto normal; + + /* Page offset */ + ofs = ((size_t) buf & ~PAGE_MASK); + page_dma = 1; + + /* DMA routine */ + dma_src = dma_map_page(dev, page, ofs, count, DMA_TO_DEVICE); + dma_dst = onenand->phys_base + (p - this->base); + } else { + /* DMA routine */ + dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE); + dma_dst = onenand->phys_base + (p - this->base); + } + + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "Couldn't map a %d byte buffer for DMA\n", count); + goto normal; + } + + err = s5pc110_dma_ops((void *) dma_dst, (void *) dma_src, + count, S5PC110_DMA_DIR_WRITE); + + if (page_dma) + dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE); + else + dma_unmap_single(dev, dma_src, count, DMA_TO_DEVICE); + + if (!err) + return 0; + +normal: + memcpy(p + offset, buffer, count); + + return 0; +} + static int s5pc110_chip_probe(struct mtd_info *mtd) { /* Now just return 0 */ @@ -844,6 +912,7 @@ static void s3c_onenand_setup(struct mtd_info *mtd) } else if (onenand->type == TYPE_S5PC110) { /* Use generic onenand functions */ this->read_bufferram = s5pc110_read_bufferram; + this->write_bufferram = s5pc110_write_bufferram; this->chip_probe = s5pc110_chip_probe; return; } else {