Patchwork [1/2] block: add BH_Meta flag

login
register
mail settings
Submitter Saugata Das
Date May 11, 2012, 3:02 p.m.
Message ID <1336748577-9258-1-git-send-email-saugata.das@stericsson.com>
Download mbox | patch
Permalink /patch/158533/
State Superseded
Headers show

Comments

Saugata Das - May 11, 2012, 3:02 p.m.
From: Saugata Das <saugata.das@linaro.org>

Today, storage devices like eMMC has special features like data tagging
(introduced in MMC-4.5 version) in order to improve performance of some
specific writes. On MMC stack, data tagging is used for all writes which
has REQ_META flag set. This patch adds the capability to add REQ_META flag
during meta data write.

Signed-off-by: Saugata Das <saugata.das@linaro.org>
---
 fs/buffer.c                 |   10 ++++++++--
 include/linux/buffer_head.h |    2 ++
 2 files changed, 10 insertions(+), 2 deletions(-)
Namhyung Kim - May 14, 2012, 1:42 a.m.
Hi,

On Fri, 11 May 2012 20:32:56 +0530, Saugata Das wrote:
> From: Saugata Das <saugata.das@linaro.org>
>
> Today, storage devices like eMMC has special features like data tagging
> (introduced in MMC-4.5 version) in order to improve performance of some
> specific writes. On MMC stack, data tagging is used for all writes which
> has REQ_META flag set. This patch adds the capability to add REQ_META flag
> during meta data write.
>

AFAIK, the REQ_META is only for marking a bio/req to be recognized when
using blktrace or something. You can use REQ_PRIO for the purpose but it
applies only if your ioscheduler is CFQ.

But I'm not aware how the MMC stack works, so I might be missing something.

Thanks,
Namhyung


> Signed-off-by: Saugata Das <saugata.das@linaro.org>
> ---
>  fs/buffer.c                 |   10 ++++++++--
>  include/linux/buffer_head.h |    2 ++
>  2 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 36d6665..688b38b 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1685,7 +1685,10 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
>  	do {
>  		struct buffer_head *next = bh->b_this_page;
>  		if (buffer_async_write(bh)) {
> -			submit_bh(write_op, bh);
> +			if (buffer_meta(bh))
> +				submit_bh(write_op | REQ_META, bh);
> +			else
> +				submit_bh(write_op, bh);
>  			nr_underway++;
>  		}
>  		bh = next;
> @@ -1739,7 +1742,10 @@ recover:
>  		struct buffer_head *next = bh->b_this_page;
>  		if (buffer_async_write(bh)) {
>  			clear_buffer_dirty(bh);
> -			submit_bh(write_op, bh);
> +			if (buffer_meta(bh))
> +				submit_bh(write_op | REQ_META, bh);
> +			else
> +				submit_bh(write_op, bh);
>  			nr_underway++;
>  		}
>  		bh = next;
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index ef26043..0776564 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -34,6 +34,7 @@ enum bh_state_bits {
>  	BH_Write_EIO,	/* I/O error on write */
>  	BH_Unwritten,	/* Buffer is allocated on disk but not written */
>  	BH_Quiet,	/* Buffer Error Prinks to be quiet */
> +	BH_Meta,	/* Is meta */
>  
>  	BH_PrivateStart,/* not a state bit, but the first bit available
>  			 * for private allocation by other entities
> @@ -126,6 +127,7 @@ BUFFER_FNS(Delay, delay)
>  BUFFER_FNS(Boundary, boundary)
>  BUFFER_FNS(Write_EIO, write_io_error)
>  BUFFER_FNS(Unwritten, unwritten)
> +BUFFER_FNS(Meta, meta)
>  
>  #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
>  #define touch_buffer(bh)	mark_page_accessed(bh->b_page)
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Saugata Das - May 14, 2012, 4:54 a.m.
On 14 May 2012 07:12, Namhyung Kim <namhyung.kim@lge.com> wrote:
> Hi,
>
> On Fri, 11 May 2012 20:32:56 +0530, Saugata Das wrote:
>> From: Saugata Das <saugata.das@linaro.org>
>>
>> Today, storage devices like eMMC has special features like data tagging
>> (introduced in MMC-4.5 version) in order to improve performance of some
>> specific writes. On MMC stack, data tagging is used for all writes which
>> has REQ_META flag set. This patch adds the capability to add REQ_META flag
>> during meta data write.
>>
>
> AFAIK, the REQ_META is only for marking a bio/req to be recognized when
> using blktrace or something. You can use REQ_PRIO for the purpose but it
> applies only if your ioscheduler is CFQ.
>
> But I'm not aware how the MMC stack works, so I might be missing something.
>

Today on ext4, REQ_META or REQ_PRIO are only used during read
operation. For meta-data writes, no special flag (REQ_META or
REQ_PRIO) is set.

On eMMC, we depend on REQ_META flag to implement the "reliable write"
and "data tag" feature, which are linked with additional reliability
and performance for file system meta-data writes.


> Thanks,
> Namhyung
>
>
>> Signed-off-by: Saugata Das <saugata.das@linaro.org>
>> ---
>>  fs/buffer.c                 |   10 ++++++++--
>>  include/linux/buffer_head.h |    2 ++
>>  2 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/buffer.c b/fs/buffer.c
>> index 36d6665..688b38b 100644
>> --- a/fs/buffer.c
>> +++ b/fs/buffer.c
>> @@ -1685,7 +1685,10 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
>>       do {
>>               struct buffer_head *next = bh->b_this_page;
>>               if (buffer_async_write(bh)) {
>> -                     submit_bh(write_op, bh);
>> +                     if (buffer_meta(bh))
>> +                             submit_bh(write_op | REQ_META, bh);
>> +                     else
>> +                             submit_bh(write_op, bh);
>>                       nr_underway++;
>>               }
>>               bh = next;
>> @@ -1739,7 +1742,10 @@ recover:
>>               struct buffer_head *next = bh->b_this_page;
>>               if (buffer_async_write(bh)) {
>>                       clear_buffer_dirty(bh);
>> -                     submit_bh(write_op, bh);
>> +                     if (buffer_meta(bh))
>> +                             submit_bh(write_op | REQ_META, bh);
>> +                     else
>> +                             submit_bh(write_op, bh);
>>                       nr_underway++;
>>               }
>>               bh = next;
>> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
>> index ef26043..0776564 100644
>> --- a/include/linux/buffer_head.h
>> +++ b/include/linux/buffer_head.h
>> @@ -34,6 +34,7 @@ enum bh_state_bits {
>>       BH_Write_EIO,   /* I/O error on write */
>>       BH_Unwritten,   /* Buffer is allocated on disk but not written */
>>       BH_Quiet,       /* Buffer Error Prinks to be quiet */
>> +     BH_Meta,        /* Is meta */
>>
>>       BH_PrivateStart,/* not a state bit, but the first bit available
>>                        * for private allocation by other entities
>> @@ -126,6 +127,7 @@ BUFFER_FNS(Delay, delay)
>>  BUFFER_FNS(Boundary, boundary)
>>  BUFFER_FNS(Write_EIO, write_io_error)
>>  BUFFER_FNS(Unwritten, unwritten)
>> +BUFFER_FNS(Meta, meta)
>>
>>  #define bh_offset(bh)                ((unsigned long)(bh)->b_data & ~PAGE_MASK)
>>  #define touch_buffer(bh)     mark_page_accessed(bh->b_page)
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Saugata Das - May 16, 2012, 11:57 a.m.
Hi Ted, Christoph

Will you please comment on the following patch set,
[PATCH 1/2] block: add BH_Meta flag
[PATCH 2/2] ext4: annotate all meta data requests

If there is no remark, then will you please merge them to the next version.


Thanks,
Saugata


On 11 May 2012 20:32, Saugata Das <saugata.das@stericsson.com> wrote:
> From: Saugata Das <saugata.das@linaro.org>
>
> Today, storage devices like eMMC has special features like data tagging
> (introduced in MMC-4.5 version) in order to improve performance of some
> specific writes. On MMC stack, data tagging is used for all writes which
> has REQ_META flag set. This patch adds the capability to add REQ_META flag
> during meta data write.
>
> Signed-off-by: Saugata Das <saugata.das@linaro.org>
> ---
>  fs/buffer.c                 |   10 ++++++++--
>  include/linux/buffer_head.h |    2 ++
>  2 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 36d6665..688b38b 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1685,7 +1685,10 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
>        do {
>                struct buffer_head *next = bh->b_this_page;
>                if (buffer_async_write(bh)) {
> -                       submit_bh(write_op, bh);
> +                       if (buffer_meta(bh))
> +                               submit_bh(write_op | REQ_META, bh);
> +                       else
> +                               submit_bh(write_op, bh);
>                        nr_underway++;
>                }
>                bh = next;
> @@ -1739,7 +1742,10 @@ recover:
>                struct buffer_head *next = bh->b_this_page;
>                if (buffer_async_write(bh)) {
>                        clear_buffer_dirty(bh);
> -                       submit_bh(write_op, bh);
> +                       if (buffer_meta(bh))
> +                               submit_bh(write_op | REQ_META, bh);
> +                       else
> +                               submit_bh(write_op, bh);
>                        nr_underway++;
>                }
>                bh = next;
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index ef26043..0776564 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -34,6 +34,7 @@ enum bh_state_bits {
>        BH_Write_EIO,   /* I/O error on write */
>        BH_Unwritten,   /* Buffer is allocated on disk but not written */
>        BH_Quiet,       /* Buffer Error Prinks to be quiet */
> +       BH_Meta,        /* Is meta */
>
>        BH_PrivateStart,/* not a state bit, but the first bit available
>                         * for private allocation by other entities
> @@ -126,6 +127,7 @@ BUFFER_FNS(Delay, delay)
>  BUFFER_FNS(Boundary, boundary)
>  BUFFER_FNS(Write_EIO, write_io_error)
>  BUFFER_FNS(Unwritten, unwritten)
> +BUFFER_FNS(Meta, meta)
>
>  #define bh_offset(bh)          ((unsigned long)(bh)->b_data & ~PAGE_MASK)
>  #define touch_buffer(bh)       mark_page_accessed(bh->b_page)
> --
> 1.7.4.3
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Boaz Harrosh - May 16, 2012, 12:32 p.m.
On 05/11/2012 06:02 PM, Saugata Das wrote:

> From: Saugata Das <saugata.das@linaro.org>
> 
> Today, storage devices like eMMC has special features like data tagging
> (introduced in MMC-4.5 version) in order to improve performance of some
> specific writes. On MMC stack, data tagging is used for all writes which
> has REQ_META flag set. This patch adds the capability to add REQ_META flag
> during meta data write.
> 
> Signed-off-by: Saugata Das <saugata.das@linaro.org>
> ---
>  fs/buffer.c                 |   10 ++++++++--
>  include/linux/buffer_head.h |    2 ++
>  2 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 36d6665..688b38b 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1685,7 +1685,10 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
>  	do {
>  		struct buffer_head *next = bh->b_this_page;
>  		if (buffer_async_write(bh)) {
> -			submit_bh(write_op, bh);
> +			if (buffer_meta(bh))
> +				submit_bh(write_op | REQ_META, bh);
> +			else
> +				submit_bh(write_op, bh);


Its not nice to duplicate a call site for the parameter difference
it's better to change the parameter and call the function in one
place. (an assembler call site can get big)

You can do:
+				submit_bh(write_op | (buffer_meta(bh) << __REQ_META), bh);

And also avoid a conditional inside a loop.


>  			nr_underway++;
>  		}
>  		bh = next;
> @@ -1739,7 +1742,10 @@ recover:
>  		struct buffer_head *next = bh->b_this_page;
>  		if (buffer_async_write(bh)) {
>  			clear_buffer_dirty(bh);
> -			submit_bh(write_op, bh);
> +			if (buffer_meta(bh))
> +				submit_bh(write_op | REQ_META, bh);
> +			else
> +				submit_bh(write_op, bh);


Here too

Boaz

>  			nr_underway++;
>  		}
>  		bh = next;
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index ef26043..0776564 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -34,6 +34,7 @@ enum bh_state_bits {
>  	BH_Write_EIO,	/* I/O error on write */
>  	BH_Unwritten,	/* Buffer is allocated on disk but not written */
>  	BH_Quiet,	/* Buffer Error Prinks to be quiet */
> +	BH_Meta,	/* Is meta */
>  
>  	BH_PrivateStart,/* not a state bit, but the first bit available
>  			 * for private allocation by other entities
> @@ -126,6 +127,7 @@ BUFFER_FNS(Delay, delay)
>  BUFFER_FNS(Boundary, boundary)
>  BUFFER_FNS(Write_EIO, write_io_error)
>  BUFFER_FNS(Unwritten, unwritten)
> +BUFFER_FNS(Meta, meta)
>  
>  #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
>  #define touch_buffer(bh)	mark_page_accessed(bh->b_page)


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Saugata Das - May 16, 2012, 2:06 p.m.
On 16 May 2012 18:02, Boaz Harrosh <bharrosh@panasas.com> wrote:
> On 05/11/2012 06:02 PM, Saugata Das wrote:
>
>> From: Saugata Das <saugata.das@linaro.org>
>>
>> Today, storage devices like eMMC has special features like data tagging
>> (introduced in MMC-4.5 version) in order to improve performance of some
>> specific writes. On MMC stack, data tagging is used for all writes which
>> has REQ_META flag set. This patch adds the capability to add REQ_META flag
>> during meta data write.
>>
>> Signed-off-by: Saugata Das <saugata.das@linaro.org>
>> ---
>>  fs/buffer.c                 |   10 ++++++++--
>>  include/linux/buffer_head.h |    2 ++
>>  2 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/buffer.c b/fs/buffer.c
>> index 36d6665..688b38b 100644
>> --- a/fs/buffer.c
>> +++ b/fs/buffer.c
>> @@ -1685,7 +1685,10 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
>>       do {
>>               struct buffer_head *next = bh->b_this_page;
>>               if (buffer_async_write(bh)) {
>> -                     submit_bh(write_op, bh);
>> +                     if (buffer_meta(bh))
>> +                             submit_bh(write_op | REQ_META, bh);
>> +                     else
>> +                             submit_bh(write_op, bh);
>
>
> Its not nice to duplicate a call site for the parameter difference
> it's better to change the parameter and call the function in one
> place. (an assembler call site can get big)
>
> You can do:
> +                               submit_bh(write_op | (buffer_meta(bh) << __REQ_META), bh);
>
> And also avoid a conditional inside a loop.
>

Thanks for your comments. I will take care of this in the next version.


>
>>                       nr_underway++;
>>               }
>>               bh = next;
>> @@ -1739,7 +1742,10 @@ recover:
>>               struct buffer_head *next = bh->b_this_page;
>>               if (buffer_async_write(bh)) {
>>                       clear_buffer_dirty(bh);
>> -                     submit_bh(write_op, bh);
>> +                     if (buffer_meta(bh))
>> +                             submit_bh(write_op | REQ_META, bh);
>> +                     else
>> +                             submit_bh(write_op, bh);
>
>
> Here too
>
> Boaz
>
>>                       nr_underway++;
>>               }
>>               bh = next;
>> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
>> index ef26043..0776564 100644
>> --- a/include/linux/buffer_head.h
>> +++ b/include/linux/buffer_head.h
>> @@ -34,6 +34,7 @@ enum bh_state_bits {
>>       BH_Write_EIO,   /* I/O error on write */
>>       BH_Unwritten,   /* Buffer is allocated on disk but not written */
>>       BH_Quiet,       /* Buffer Error Prinks to be quiet */
>> +     BH_Meta,        /* Is meta */
>>
>>       BH_PrivateStart,/* not a state bit, but the first bit available
>>                        * for private allocation by other entities
>> @@ -126,6 +127,7 @@ BUFFER_FNS(Delay, delay)
>>  BUFFER_FNS(Boundary, boundary)
>>  BUFFER_FNS(Write_EIO, write_io_error)
>>  BUFFER_FNS(Unwritten, unwritten)
>> +BUFFER_FNS(Meta, meta)
>>
>>  #define bh_offset(bh)                ((unsigned long)(bh)->b_data & ~PAGE_MASK)
>>  #define touch_buffer(bh)     mark_page_accessed(bh->b_page)
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index 36d6665..688b38b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1685,7 +1685,10 @@  static int __block_write_full_page(struct inode *inode, struct page *page,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh(write_op, bh);
+			if (buffer_meta(bh))
+				submit_bh(write_op | REQ_META, bh);
+			else
+				submit_bh(write_op, bh);
 			nr_underway++;
 		}
 		bh = next;
@@ -1739,7 +1742,10 @@  recover:
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh(write_op, bh);
+			if (buffer_meta(bh))
+				submit_bh(write_op | REQ_META, bh);
+			else
+				submit_bh(write_op, bh);
 			nr_underway++;
 		}
 		bh = next;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index ef26043..0776564 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -34,6 +34,7 @@  enum bh_state_bits {
 	BH_Write_EIO,	/* I/O error on write */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
 	BH_Quiet,	/* Buffer Error Prinks to be quiet */
+	BH_Meta,	/* Is meta */
 
 	BH_PrivateStart,/* not a state bit, but the first bit available
 			 * for private allocation by other entities
@@ -126,6 +127,7 @@  BUFFER_FNS(Delay, delay)
 BUFFER_FNS(Boundary, boundary)
 BUFFER_FNS(Write_EIO, write_io_error)
 BUFFER_FNS(Unwritten, unwritten)
+BUFFER_FNS(Meta, meta)
 
 #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
 #define touch_buffer(bh)	mark_page_accessed(bh->b_page)