[2/4] Controlling block allocation of a file with respect to grading information

Message ID CAExFE6nB9XTOQ1hiL44pfzmFveA7rzja1-60-k-LtdvWU_Z9jA@mail.gmail.com
State New
Headers show
Series
  • RFC : Support for data gradation of a single file.
Related show

Commit Message

Sayan Ghosh April 6, 2018, 11:41 a.m.
Grades are being read from the extended attribute while preallocating
the blocks for a single graded file. We assume binary grading of the
file blocks, and high graded blocks to be placed in the persistent
memory region of the LVM while the lower graded ones to be placed in
the HDD portion of the LVM. Here we alter the block allocation method
in the functions ext4_ext_map_blocks() and ext4_alloc_file_blocks().
Leveraging the existing goal-block allocation to get goals in
different tiers according to the grades has yet not been done.
Consider the LVM is segmented as,

--- Segments ---
 Logical extents 0 to 1219:
    Type       linear
    Physical volume    /dev/sda11
    Physical extents    0 to 1219
 Logical extents 1220 to 1474:
    Type       linear
    Physical volume    /dev/pmem0
    Physical extents    0 to 254

We hard code the ‘LOW_GRADE_STARTING_BLOCK’ as 0*1024, and
HIGH_GRADE_STARTING_BLOCK as 1220*1024 for the initial logical block
number of the respective tiers. FIX_ME comments have been provided in
suitable positions.

The patch is on top of Linux Kernel 4.7.2.

Signed-off-by: Sayan Ghosh <sgdgp.2014@gmail.com>
---
 fs/ext4/ext4.h    |   1 +
 fs/ext4/extents.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 114 insertions(+), 3 deletions(-)

 void read_grade_xattr(struct inode *inode,struct grade_struct *grade_array)
@@ -92,6 +98,43 @@ unsigned long long read_count_xattr(struct inode *inode)
     return total;
 }

+/*
+ * find_grade() is to find the grade of a logical block.
+ * This also returns the length of graded or ungraded portion
+ * starting from that logical block number (gets stored in the variable
+ * req_len). The return value is 1 for high grade and 0 otherwise.
+ */
+int find_grade(struct grade_struct* grade_array, unsigned long long
total, ext4_fsblk_t val, unsigned long long *req_len)
+{
+    if (val >= (grade_array[total -1].block_num + grade_array[total -1].len) ){
+        if (req_len != NULL)
+            (*req_len) = 0;
+        return 0;
+    }
+    unsigned long long beg, end, mid;
+    beg = 0;
+    end = total-1;
+    while (beg <= end){
+        mid = (beg + end)/2;
+        if ((val >= grade_array[mid].block_num) && (val <=
(grade_array[mid].block_num + grade_array[mid].len - 1)) ){
+            if (req_len != NULL)
+                (*req_len) = grade_array[mid].len;
+            return 1;
+        }
+        if(beg == end)
+            break;
+        if (grade_array[mid].block_num > val){
+            end = (mid > 0) ? (mid - 1) : 0;
+        }
+        else{
+            beg = mid + 1;
+        }
+    }
+    if (req_len != NULL)
+        (*req_len) = grade_array[mid].block_num - val;
+    return 0;
+}
+
 static __le32 ext4_extent_block_csum(struct inode *inode,
                      struct ext4_extent_header *eh)
 {
@@ -4326,6 +4369,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct
inode *inode,
     struct ext4_extent newex, *ex, *ex2;
     struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
     ext4_fsblk_t newblock = 0;
+
     int free_on_err = 0, err = 0, depth, ret;
     unsigned int allocated = 0, offset = 0;
     unsigned int allocated_clusters = 0;
@@ -4333,6 +4377,14 @@ int ext4_ext_map_blocks(handle_t *handle,
struct inode *inode,
     ext4_lblk_t cluster_offset;
     bool map_from_cluster = false;

+    struct grade_struct *grade_array = NULL;
+    unsigned long long total;
+    if (is_file_graded(inode)){
+        total = read_count_xattr(inode);
+        grade_array = (struct grade_struct
*)kmalloc(total*sizeof(struct grade_struct), GFP_USER);
+        read_grade_xattr(inode,grade_array);
+    }
+
     ext_debug("blocks %u/%u requested for inode %lu\n",
           map->m_lblk, map->m_len, inode->i_ino);
     trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
@@ -4494,8 +4546,36 @@ int ext4_ext_map_blocks(handle_t *handle,
struct inode *inode,

     /* allocate new block */
     ar.inode = inode;
-    ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
+    if(!is_file_graded(inode)){
+        ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
+    }
+
+    /*
+     * ** FIX ME **
+     * Now accessing different goals for different tiers is hard coded.
+     * Please suggest a method to maintain multiple goal states in
different tiers,
+     * each corresponding to the respective grades for proper
goal-block placement.
+     *
+     * ** TODO 1 **
+     * Instead of hard-coding LOW_GRADE_STARTING_BLOCK and
HIGH_GRADE_STARTING_BLOCK
+     * set their values automatically from the LVM (see the description).
+     *
+     * ** TODO 2 **
+     * It is assumed that higher grade storage area will not overflow.
+     * We need to take care of the case when high grade storage
device gets full
+     * and data has to be stored in the lower tier.
+     */
+    else{
+        unsigned long long temp;
+        if(find_grade(grade_array,total,map->m_lblk,temp) == 0){
+            ar.goal = LOW_GRADE_STARTING_BLOCK;
+        }
+        if(find_grade(grade_array,total,map->m_lblk,temp) == 1){
+            ar.goal = HIGH_GRADE_STARTING_BLOCK;
+        }
+    }
     ar.logical = map->m_lblk;
+
     /*
      * We calculate the offset from the beginning of the cluster
      * for the logical block number, since when we allocate a
@@ -4519,7 +4599,12 @@ int ext4_ext_map_blocks(handle_t *handle,
struct inode *inode,
         ar.flags |= EXT4_MB_DELALLOC_RESERVED;
     if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
         ar.flags |= EXT4_MB_USE_RESERVED;
+    if(is_file_graded(inode)){
+        ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+    }
     newblock = ext4_mb_new_blocks(handle, &ar, &err);
+
+go_out:
     if (!newblock)
         goto out2;
     ext_debug("allocate new block: goal %llu, found %llu/%u\n",
@@ -4706,6 +4791,8 @@ static int ext4_alloc_file_blocks(struct file
*file, ext4_lblk_t offset,
 {
     struct inode *inode = file_inode(file);
     handle_t *handle;
+
+    int grade_val = 0;
     int ret = 0;
     int ret2 = 0;
     int retries = 0;
@@ -4713,9 +4800,17 @@ static int ext4_alloc_file_blocks(struct file
*file, ext4_lblk_t offset,
     struct ext4_map_blocks map;
     unsigned int credits;
     loff_t epos;
-
     map.m_lblk = offset;
     map.m_len = len;
+
+    struct grade_struct *grade_array = NULL;
+    unsigned long long total;
+    if (is_file_graded(inode)){
+        total = read_count_xattr(inode);
+        grade_array = (struct grade_struct
*)kmalloc(total*sizeof(struct grade_struct), GFP_USER);
+        read_grade_xattr(inode,grade_array);
+    }
+
     /*
      * Don't normalize the request if it can fit in one extent so
      * that it doesn't get unnecessarily split into multiple
@@ -4735,10 +4830,23 @@ static int ext4_alloc_file_blocks(struct file
*file, ext4_lblk_t offset,
         depth = ext_depth(inode);
     else
         depth = -1;
-
 retry:
     while (ret >= 0 && len) {
         /*
+         * Finding length of blocks which have same grade
+         * and they are preallocated together.
+         */
+        if (is_file_graded(inode)){
+            map.m_len = 1;
+            unsigned long long req_len;
+            grade_val = find_grade(grade_array,total,map.m_lblk,&req_len);
+            if (req_len == 0)
+                map.m_len = len;
+            else
+                map.m_len = req_len;
+        }
+
+        /*
          * Recalculate credits when extent tree depth changes.
          */
         if (depth >= 0 && depth != ext_depth(inode)) {
@@ -4753,6 +4861,7 @@ retry:
             break;
         }
         ret = ext4_map_blocks(handle, inode, &map, flags);
+
         if (ret <= 0) {
             ext4_debug("inode #%lu: block %u: len %u: "
                    "ext4_ext_map_blocks returned %d",
@@ -4762,6 +4871,7 @@ retry:
             ret2 = ext4_journal_stop(handle);
             break;
         }
+
         map.m_lblk += ret;
         map.m_len = len = len - ret;
         epos = (loff_t)map.m_lblk << inode->i_blkbits;
‌

Comments

Randy Dunlap April 6, 2018, 5:14 p.m. | #1
On 04/06/2018 04:41 AM, Sayan Ghosh wrote:

> The patch is on top of Linux Kernel 4.7.2.
> 
> Signed-off-by: Sayan Ghosh <sgdgp.2014@gmail.com>
> ---
>  fs/ext4/ext4.h    |   1 +
>  fs/ext4/extents.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 114 insertions(+), 3 deletions(-)
> 

> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index de9194f..aaff3a3 100755
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -58,6 +58,12 @@
>  #define EXT4_EXT_DATA_VALID2    0x10 /* second half contains valid data */
> 
>  /*
> + * Starting block numbers for low and high grades
> + */
> +#define LOW_GRADE_STARTING_BLOCK 0
> +#define HIGH_GRADE_STARTING_BLOCK 1249280
> +
> +/*
>   * read_grade_xattr() is used to read the grade array from the
> extended attribute.
>   */
>  void read_grade_xattr(struct inode *inode,struct grade_struct *grade_array)
> @@ -92,6 +98,43 @@ unsigned long long read_count_xattr(struct inode *inode)
>      return total;
>  }
> 
> +/*
> + * find_grade() is to find the grade of a logical block.
> + * This also returns the length of graded or ungraded portion
> + * starting from that logical block number (gets stored in the variable
> + * req_len). The return value is 1 for high grade and 0 otherwise.
> + */

Lots of style issues:

> +int find_grade(struct grade_struct* grade_array, unsigned long long
> total, ext4_fsblk_t val, unsigned long long *req_len)

line too long.

> +{
> +    if (val >= (grade_array[total -1].block_num + grade_array[total -1].len) ){
> +        if (req_len != NULL)
> +            (*req_len) = 0;
> +        return 0;
> +    }

use tabs, not spaces. and indentation should be 8, not 4. (many places)

> +    unsigned long long beg, end, mid;

Don't declare variables after code.

> +    beg = 0;
> +    end = total-1;
> +    while (beg <= end){
> +        mid = (beg + end)/2;
> +        if ((val >= grade_array[mid].block_num) && (val <=
> (grade_array[mid].block_num + grade_array[mid].len - 1)) ){

line too long.

> +            if (req_len != NULL)
> +                (*req_len) = grade_array[mid].len;
> +            return 1;
> +        }
> +        if(beg == end)

space after "if"

> +            break;
> +        if (grade_array[mid].block_num > val){
> +            end = (mid > 0) ? (mid - 1) : 0;
> +        }
> +        else{
> +            beg = mid + 1;
> +        }
> +    }
> +    if (req_len != NULL)
> +        (*req_len) = grade_array[mid].block_num - val;
> +    return 0;
> +}
> +
>  static __le32 ext4_extent_block_csum(struct inode *inode,
>                       struct ext4_extent_header *eh)
>  {

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b9ec0ca..c7d2eed 100755
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3201,6 +3201,7 @@  struct ext4_extent;
 extern unsigned long long read_count_xattr(struct inode *inode);
 extern void read_grade_xattr(struct inode *inode,struct grade_struct
*grade_array);
 extern int is_file_graded(struct inode *inode);
+extern int find_grade(struct grade_struct* grade_array, unsigned long
long total, ext4_fsblk_t val, unsigned long long *req_len);

 /*
  * Maximum number of logical blocks in a file; ext4_extent's ee_block is
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index de9194f..aaff3a3 100755
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -58,6 +58,12 @@ 
 #define EXT4_EXT_DATA_VALID2    0x10 /* second half contains valid data */

 /*
+ * Starting block numbers for low and high grades
+ */
+#define LOW_GRADE_STARTING_BLOCK 0
+#define HIGH_GRADE_STARTING_BLOCK 1249280
+
+/*
  * read_grade_xattr() is used to read the grade array from the
extended attribute.
  */