diff mbox

[v5,2/2] sheepdog: support user-defined redundancy option

Message ID 1383318613-490-3-git-send-email-namei.unix@gmail.com
State New
Headers show

Commit Message

Liu Yuan Nov. 1, 2013, 3:10 p.m. UTC
Sheepdog support two kinds of redundancy, full replication and erasure coding.

# create a fully replicated vdi with x copies
 -o redundancy=x (1 <= x <= SD_MAX_COPIES)

# create a erasure coded vdi with x data strips and y parity strips
 -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP)

E.g, to convert a vdi into sheepdog vdi 'test' with 8:3 erasure coding scheme

$ qemu-img convert -o redundancy=8:3 linux-0.2.img sheepdog:test

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Liu Yuan <namei.unix@gmail.com>
---
 block/sheepdog.c          |   90 ++++++++++++++++++++++++++++++++++++++++++++-
 include/block/block_int.h |    1 +
 2 files changed, 90 insertions(+), 1 deletion(-)

Comments

Stefan Hajnoczi Nov. 5, 2013, 2:37 p.m. UTC | #1
On Fri, Nov 01, 2013 at 11:10:13PM +0800, Liu Yuan wrote:
> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index 66b3ea8..a267d31 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -91,6 +91,14 @@
>  #define SD_NR_VDIS   (1U << 24)
>  #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
>  #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
> +/*
> + * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
> + * (SD_EC_MAX_STRIP - 1) for parity strips
> + *
> + * SD_MAX_COPIES is sum of number of data trips and parity strips.

s/data trips/data strips/

> +static bool is_numeric(const char *s)
> +{
> +    const char *p = s;
> +
> +    if (*p) {
> +        char c;
> +
> +        while ((c = *p++))
> +            if (!isdigit(c)) {
> +                return false;
> +            }
> +        return true;
> +    }
> +    return false;
> +}
> +
> +/*
> + * Sheepdog support two kinds of redundancy, full replication and erasure
> + * coding.
> + *
> + * # create a fully replicated vdi with x copies
> + * -o redundancy=x (1 <= x <= SD_MAX_COPIES)
> + *
> + * # create a erasure coded vdi with x data strips and y parity strips
> + * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP)
> + */
> +static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
> +{
> +    struct SheepdogInode *inode = &s->inode;
> +    const char *n1, *n2;
> +    uint8_t copy, parity;
> +    char p[10];
> +
> +    pstrcpy(p, sizeof(p), opt);
> +    n1 = strtok(p, ":");
> +    n2 = strtok(NULL, ":");
> +
> +    if (!n1 || !is_numeric(n1) || (n2 && !is_numeric(n2))) {
> +        return -EINVAL;
> +    }
> +
> +    copy = strtol(n1, NULL, 10);
> +    if (copy > SD_MAX_COPIES) {
> +        return -EINVAL;
> +    }
> +    if (!n2) {
> +        inode->copy_policy = 0;
> +        inode->nr_copies = copy;
> +        return 0;
> +    }
> +
> +    if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
> +        return -EINVAL;
> +    }
> +
> +    parity = strtol(n2, NULL, 10);
> +    if (parity >= SD_EC_MAX_STRIP || parity == 0) {
> +        return -EINVAL;
> +    }
> +
> +    /*
> +     * 4 bits for parity and 4 bits for data.
> +     * We have to compress upper data bits because it can't represent 16
> +     */
> +    inode->copy_policy = ((copy / 2) << 4) + parity;
> +    inode->nr_copies = copy + parity;
> +
> +    return 0;
> +}

The string manipulation can be simplified using sscanf(3) and
is_numeric() can be dropped:

static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
{
    struct SheepdogInode *inode = &s->inode;
    uint8_t copy, parity;
    int n;

    n = sscanf(opt, "%hhu:%hhu", &copy, &parity);
    if (n != 1 && n != 2) {
        return -EINVAL;
    }
    if (copy > SD_MAX_COPIES) {
        return -EINVAL;
    }
    if (n == 1) {
        inode->copy_policy = 0;
        inode->nr_copies = copy;
        return 0;
    }
    if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
        return -EINVAL;
    }
    if (parity >= SD_EC_MAX_STRIP || parity == 0) {
        return -EINVAL;
    }
    /*
     * 4 bits for parity and 4 bits for data.
     * We have to compress upper data bits because it can't represent 16
     */
    inode->copy_policy = ((copy / 2) << 4) + parity;
    inode->nr_copies = copy + parity;
    return 0;
}
Eric Blake Nov. 5, 2013, 3:46 p.m. UTC | #2
On 11/05/2013 07:37 AM, Stefan Hajnoczi wrote:

>> +
>> +    copy = strtol(n1, NULL, 10);
>> +    if (copy > SD_MAX_COPIES) {
>> +        return -EINVAL;
>> +    }

> 
> The string manipulation can be simplified using sscanf(3) and
> is_numeric() can be dropped:
> 
> static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
> {
>     struct SheepdogInode *inode = &s->inode;
>     uint8_t copy, parity;
>     int n;
> 
>     n = sscanf(opt, "%hhu:%hhu", &copy, &parity);

Personally, I detest the use of sscanf() to parse integers out of
strings, because POSIX says that behavior is undefined if overflow
occurs.  For internal strings, you can get away with it.  But for
untrusted input that did not originate in your process, a user can mess
you up by passing a string that parses larger than the integer you are
trying to store into, where the behavior is unspecified whether it wraps
around module 256, parses additional digits, or any other odd behavior.
 By the time you've added code to sanitize untrusted input, it's just as
fast to use strtol() anyways.
Stefan Hajnoczi Nov. 6, 2013, 9:34 a.m. UTC | #3
On Tue, Nov 05, 2013 at 08:46:07AM -0700, Eric Blake wrote:
> On 11/05/2013 07:37 AM, Stefan Hajnoczi wrote:
> 
> >> +
> >> +    copy = strtol(n1, NULL, 10);
> >> +    if (copy > SD_MAX_COPIES) {
> >> +        return -EINVAL;
> >> +    }
> 
> > 
> > The string manipulation can be simplified using sscanf(3) and
> > is_numeric() can be dropped:
> > 
> > static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
> > {
> >     struct SheepdogInode *inode = &s->inode;
> >     uint8_t copy, parity;
> >     int n;
> > 
> >     n = sscanf(opt, "%hhu:%hhu", &copy, &parity);
> 
> Personally, I detest the use of sscanf() to parse integers out of
> strings, because POSIX says that behavior is undefined if overflow
> occurs.  For internal strings, you can get away with it.  But for
> untrusted input that did not originate in your process, a user can mess
> you up by passing a string that parses larger than the integer you are
> trying to store into, where the behavior is unspecified whether it wraps
> around module 256, parses additional digits, or any other odd behavior.
>  By the time you've added code to sanitize untrusted input, it's just as
> fast to use strtol() anyways.

Hmm...I didn't know that overflow was undefined behavior in POSIX :(.

In that case forget sscanf(3) can look at the strtol(3) result for
errors.  There's still no need for a custom is_numeric() function.

Stefan
Liu Yuan Nov. 7, 2013, 2:58 p.m. UTC | #4
On Wed, Nov 06, 2013 at 10:34:24AM +0100, Stefan Hajnoczi wrote:
> On Tue, Nov 05, 2013 at 08:46:07AM -0700, Eric Blake wrote:
> > On 11/05/2013 07:37 AM, Stefan Hajnoczi wrote:
> > 
> > >> +
> > >> +    copy = strtol(n1, NULL, 10);
> > >> +    if (copy > SD_MAX_COPIES) {
> > >> +        return -EINVAL;
> > >> +    }
> > 
> > > 
> > > The string manipulation can be simplified using sscanf(3) and
> > > is_numeric() can be dropped:
> > > 
> > > static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
> > > {
> > >     struct SheepdogInode *inode = &s->inode;
> > >     uint8_t copy, parity;
> > >     int n;
> > > 
> > >     n = sscanf(opt, "%hhu:%hhu", &copy, &parity);
> > 
> > Personally, I detest the use of sscanf() to parse integers out of
> > strings, because POSIX says that behavior is undefined if overflow
> > occurs.  For internal strings, you can get away with it.  But for
> > untrusted input that did not originate in your process, a user can mess
> > you up by passing a string that parses larger than the integer you are
> > trying to store into, where the behavior is unspecified whether it wraps
> > around module 256, parses additional digits, or any other odd behavior.
> >  By the time you've added code to sanitize untrusted input, it's just as
> > fast to use strtol() anyways.
> 
> Hmm...I didn't know that overflow was undefined behavior in POSIX :(.
> 
> In that case forget sscanf(3) can look at the strtol(3) result for
> errors.  There's still no need for a custom is_numeric() function.

Thanks for your comments, I'll remove is_numeric() for v6

Thanks
Yuan
diff mbox

Patch

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 66b3ea8..a267d31 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -91,6 +91,14 @@ 
 #define SD_NR_VDIS   (1U << 24)
 #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
 #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
+/*
+ * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
+ * (SD_EC_MAX_STRIP - 1) for parity strips
+ *
+ * SD_MAX_COPIES is sum of number of data trips and parity strips.
+ */
+#define SD_EC_MAX_STRIP 16
+#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1)
 
 #define SD_INODE_SIZE (sizeof(SheepdogInode))
 #define CURRENT_VDI_ID 0
@@ -1495,6 +1503,7 @@  static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)
     hdr.data_length = wlen;
     hdr.vdi_size = s->inode.vdi_size;
     hdr.copy_policy = s->inode.copy_policy;
+    hdr.copies = s->inode.nr_copies;
 
     ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
 
@@ -1562,6 +1571,76 @@  out:
     return ret;
 }
 
+static bool is_numeric(const char *s)
+{
+    const char *p = s;
+
+    if (*p) {
+        char c;
+
+        while ((c = *p++))
+            if (!isdigit(c)) {
+                return false;
+            }
+        return true;
+    }
+    return false;
+}
+
+/*
+ * Sheepdog support two kinds of redundancy, full replication and erasure
+ * coding.
+ *
+ * # create a fully replicated vdi with x copies
+ * -o redundancy=x (1 <= x <= SD_MAX_COPIES)
+ *
+ * # create a erasure coded vdi with x data strips and y parity strips
+ * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP)
+ */
+static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
+{
+    struct SheepdogInode *inode = &s->inode;
+    const char *n1, *n2;
+    uint8_t copy, parity;
+    char p[10];
+
+    pstrcpy(p, sizeof(p), opt);
+    n1 = strtok(p, ":");
+    n2 = strtok(NULL, ":");
+
+    if (!n1 || !is_numeric(n1) || (n2 && !is_numeric(n2))) {
+        return -EINVAL;
+    }
+
+    copy = strtol(n1, NULL, 10);
+    if (copy > SD_MAX_COPIES) {
+        return -EINVAL;
+    }
+    if (!n2) {
+        inode->copy_policy = 0;
+        inode->nr_copies = copy;
+        return 0;
+    }
+
+    if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
+        return -EINVAL;
+    }
+
+    parity = strtol(n2, NULL, 10);
+    if (parity >= SD_EC_MAX_STRIP || parity == 0) {
+        return -EINVAL;
+    }
+
+    /*
+     * 4 bits for parity and 4 bits for data.
+     * We have to compress upper data bits because it can't represent 16
+     */
+    inode->copy_policy = ((copy / 2) << 4) + parity;
+    inode->nr_copies = copy + parity;
+
+    return 0;
+}
+
 static int sd_create(const char *filename, QEMUOptionParameter *options,
                      Error **errp)
 {
@@ -1602,6 +1681,11 @@  static int sd_create(const char *filename, QEMUOptionParameter *options,
                 ret = -EINVAL;
                 goto out;
             }
+        } else if (!strcmp(options->name, BLOCK_OPT_REDUNDANCY)) {
+            ret = parse_redundancy(s, options->value.s);
+            if (ret < 0) {
+                goto out;
+            }
         }
         options++;
     }
@@ -1644,7 +1728,6 @@  static int sd_create(const char *filename, QEMUOptionParameter *options,
         bdrv_unref(bs);
     }
 
-    /* TODO: allow users to specify copy number */
     ret = do_sd_create(s, &vid, 0);
     if (!prealloc || ret) {
         goto out;
@@ -2416,6 +2499,11 @@  static QEMUOptionParameter sd_create_options[] = {
         .type = OPT_STRING,
         .help = "Preallocation mode (allowed values: off, full)"
     },
+    {
+        .name = BLOCK_OPT_REDUNDANCY,
+        .type = OPT_STRING,
+        .help = "Redundancy of the image"
+    },
     { NULL }
 };
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a48731d..b90862f 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -53,6 +53,7 @@ 
 #define BLOCK_OPT_COMPAT_LEVEL      "compat"
 #define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
 #define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
+#define BLOCK_OPT_REDUNDANCY        "redundancy"
 
 typedef struct BdrvTrackedRequest {
     BlockDriverState *bs;