diff mbox

[1/3] curl: Fix parsing of readahead option from filename

Message ID 1398867658-2069-2-git-send-email-mbooth@redhat.com
State New
Headers show

Commit Message

Matthew Booth April 30, 2014, 2:20 p.m. UTC
curl_parse_filename wasn't removing the option string from the url,
resulting in a 404.

This change is a essentially a rewrite of that function as I also need
to extend it to handle more options. The rewrite is also much easier
to read.

Signed-off-by: Matthew Booth <mbooth@redhat.com>
---
 block/curl.c | 81 ++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 52 insertions(+), 29 deletions(-)

Comments

Kevin Wolf April 30, 2014, 3:16 p.m. UTC | #1
Am 30.04.2014 um 16:20 hat Matthew Booth geschrieben:
> curl_parse_filename wasn't removing the option string from the url,
> resulting in a 404.
> 
> This change is a essentially a rewrite of that function as I also need
> to extend it to handle more options. The rewrite is also much easier
> to read.
> 
> Signed-off-by: Matthew Booth <mbooth@redhat.com>
> ---
>  block/curl.c | 81 ++++++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 52 insertions(+), 29 deletions(-)
> 
> diff --git a/block/curl.c b/block/curl.c
> index d2f1084..4de6856 100644
> --- a/block/curl.c
> +++ b/block/curl.c
> @@ -46,12 +46,15 @@
>  #define CURL_NUM_STATES 8
>  #define CURL_NUM_ACB    8
>  #define SECTOR_SIZE     512
> -#define READ_AHEAD_SIZE (256 * 1024)
> +#define READ_AHEAD_DEFAULT (256 * 1024)
>  
>  #define FIND_RET_NONE   0
>  #define FIND_RET_OK     1
>  #define FIND_RET_WAIT   2
>  
> +#define CURL_BLOCK_OPT_URL       "url"
> +#define CURL_BLOCK_OPT_READAHEAD "readahead"
> +
>  struct BDRVCURLState;
>  
>  typedef struct CURLAIOCB {
> @@ -396,41 +399,60 @@ static void curl_clean_state(CURLState *s)
>  static void curl_parse_filename(const char *filename, QDict *options,
>                                  Error **errp)
>  {
> -
> -    #define RA_OPTSTR ":readahead="
>      char *file;
> -    char *ra;
> -    const char *ra_val;
> -    int parse_state = 0;
> +    char *end;
>  
>      file = g_strdup(filename);
> +    end = file + strlen(file) - 1;
> +
> +    /* Don't fail if we've been passed an empty string.
> +     * Only examine strings with a trailing ':'
> +     */
> +    if (end >= file && *end == ':') {
> +        /* We exit this loop when we don't find a recognised option immediately
> +         * preceding the trailing ':' of the form ':<option>=<value>'
> +         *
> +         * If filename has a trailing ':' but no preceding options, we don't
> +         * remove the trailing ':'.
> +         */
> +        for (;;) {
> +            /* Look for the preceding colon */
> +            char *colon = memrchr(file, ':', end - file);
> +            if (NULL == colon) {
> +                break;
> +            }
>  
> -    /* Parse a trailing ":readahead=#:" param, if present. */
> -    ra = file + strlen(file) - 1;
> -    while (ra >= file) {
> -        if (parse_state == 0) {
> -            if (*ra == ':') {
> -                parse_state++;
> -            } else {
> +            char *opt_start = colon + 1;
> +
> +            /* Look for an equals sign */
> +            char *equals = memchr(opt_start, '=', end - opt_start);
> +            if (NULL == equals) {
>                  break;
>              }
> -        } else if (parse_state == 1) {
> -            if (*ra > '9' || *ra < '0') {
> -                char *opt_start = ra - strlen(RA_OPTSTR) + 1;
> -                if (opt_start > file &&
> -                    strncmp(opt_start, RA_OPTSTR, strlen(RA_OPTSTR)) == 0) {
> -                    ra_val = ra + 1;
> -                    ra -= strlen(RA_OPTSTR) - 1;
> -                    *ra = '\0';
> -                    qdict_put(options, "readahead", qstring_from_str(ra_val));
> -                }
> +
> +            size_t opt_len = equals - opt_start;
> +            char *value = equals + 1;
> +            size_t value_len = end - value;
> +
> +            if (opt_len == strlen(CURL_BLOCK_OPT_READAHEAD) &&
> +                memcmp(opt_start, CURL_BLOCK_OPT_READAHEAD, opt_len) == 0) {
> +                /* This is redundant after the first iteration */
> +                *end = '\0';
> +                qdict_put(options, CURL_BLOCK_OPT_READAHEAD,
> +                          qstring_from_str(value));
> +            } else {
> +                /* Unknown option */
>                  break;

So if we get an unknown option, we simply abort parsing the filename.
This means that we'll try to open a URL that still contains an option
and will probably fail with a rather confusing error message.

Shouldn't we set a clear error message about the unknown option here
with error_setg() and immediately return?

Rest looks okay.

Kevin
Matthew Booth May 1, 2014, 8:56 a.m. UTC | #2
On 30/04/14 16:16, Kevin Wolf wrote:
> Am 30.04.2014 um 16:20 hat Matthew Booth geschrieben:
>> curl_parse_filename wasn't removing the option string from the url,
>> resulting in a 404.
>>
>> This change is a essentially a rewrite of that function as I also need
>> to extend it to handle more options. The rewrite is also much easier
>> to read.
>>
>> Signed-off-by: Matthew Booth <mbooth@redhat.com>
>> ---
>>  block/curl.c | 81 ++++++++++++++++++++++++++++++++++++++----------------------
>>  1 file changed, 52 insertions(+), 29 deletions(-)
>>
>> diff --git a/block/curl.c b/block/curl.c
>> index d2f1084..4de6856 100644
>> --- a/block/curl.c
>> +++ b/block/curl.c
>> @@ -46,12 +46,15 @@
>>  #define CURL_NUM_STATES 8
>>  #define CURL_NUM_ACB    8
>>  #define SECTOR_SIZE     512
>> -#define READ_AHEAD_SIZE (256 * 1024)
>> +#define READ_AHEAD_DEFAULT (256 * 1024)
>>  
>>  #define FIND_RET_NONE   0
>>  #define FIND_RET_OK     1
>>  #define FIND_RET_WAIT   2
>>  
>> +#define CURL_BLOCK_OPT_URL       "url"
>> +#define CURL_BLOCK_OPT_READAHEAD "readahead"
>> +
>>  struct BDRVCURLState;
>>  
>>  typedef struct CURLAIOCB {
>> @@ -396,41 +399,60 @@ static void curl_clean_state(CURLState *s)
>>  static void curl_parse_filename(const char *filename, QDict *options,
>>                                  Error **errp)
>>  {
>> -
>> -    #define RA_OPTSTR ":readahead="
>>      char *file;
>> -    char *ra;
>> -    const char *ra_val;
>> -    int parse_state = 0;
>> +    char *end;
>>  
>>      file = g_strdup(filename);
>> +    end = file + strlen(file) - 1;
>> +
>> +    /* Don't fail if we've been passed an empty string.
>> +     * Only examine strings with a trailing ':'
>> +     */
>> +    if (end >= file && *end == ':') {
>> +        /* We exit this loop when we don't find a recognised option immediately
>> +         * preceding the trailing ':' of the form ':<option>=<value>'
>> +         *
>> +         * If filename has a trailing ':' but no preceding options, we don't
>> +         * remove the trailing ':'.
>> +         */
>> +        for (;;) {
>> +            /* Look for the preceding colon */
>> +            char *colon = memrchr(file, ':', end - file);
>> +            if (NULL == colon) {
>> +                break;
>> +            }
>>  
>> -    /* Parse a trailing ":readahead=#:" param, if present. */
>> -    ra = file + strlen(file) - 1;
>> -    while (ra >= file) {
>> -        if (parse_state == 0) {
>> -            if (*ra == ':') {
>> -                parse_state++;
>> -            } else {
>> +            char *opt_start = colon + 1;
>> +
>> +            /* Look for an equals sign */
>> +            char *equals = memchr(opt_start, '=', end - opt_start);
>> +            if (NULL == equals) {
>>                  break;
>>              }
>> -        } else if (parse_state == 1) {
>> -            if (*ra > '9' || *ra < '0') {
>> -                char *opt_start = ra - strlen(RA_OPTSTR) + 1;
>> -                if (opt_start > file &&
>> -                    strncmp(opt_start, RA_OPTSTR, strlen(RA_OPTSTR)) == 0) {
>> -                    ra_val = ra + 1;
>> -                    ra -= strlen(RA_OPTSTR) - 1;
>> -                    *ra = '\0';
>> -                    qdict_put(options, "readahead", qstring_from_str(ra_val));
>> -                }
>> +
>> +            size_t opt_len = equals - opt_start;
>> +            char *value = equals + 1;
>> +            size_t value_len = end - value;
>> +
>> +            if (opt_len == strlen(CURL_BLOCK_OPT_READAHEAD) &&
>> +                memcmp(opt_start, CURL_BLOCK_OPT_READAHEAD, opt_len) == 0) {
>> +                /* This is redundant after the first iteration */
>> +                *end = '\0';
>> +                qdict_put(options, CURL_BLOCK_OPT_READAHEAD,
>> +                          qstring_from_str(value));
>> +            } else {
>> +                /* Unknown option */
>>                  break;
> 
> So if we get an unknown option, we simply abort parsing the filename.
> This means that we'll try to open a URL that still contains an option
> and will probably fail with a rather confusing error message.
> 
> Shouldn't we set a clear error message about the unknown option here
> with error_setg() and immediately return?

TBH I was just trying to stay compatible with the behaviour which most
recently worked. Hence the weirdness with the trailing ':', for example.

I did consider whether to do ignore these options or not. I decided to
ignore them because the option syntax isn't safe: the string
':readahead=1k:' could be found at the end of a valid URL. Ignoring
unknown options reduces the chances of a false positive. For example, in:

http://example.com/path?query=foo:

How do you avoid throwing an error about the unknown option
'//example.com/path?query'? I could probably chuck in a couple of
heuristics to avoid the obvious false positives, but it would be even
messier. The best option is not to do this at all, and use the separated
options command line syntax. In fact, I might add a note to the docs
advising this.

Alternatively I could completely change the syntax. However, the only
'safe' syntax I can think of would involve ugly custom escaping, which
would probably catch out more people than unsafe option parsing. I'm
open to suggestions.

On a related note, do you know if it's possible to specify a backing
file with separated options? i.e.:

qemu-img create -f qcow2 -o
backingfile.url='http://example.com/path',backingfile.readahead='64k'
/tmp/foo.qcow2

I suspect that qcow2 only stores a string, so probably not, but I
thought I'd ask.

Matt
Eric Blake May 1, 2014, 12:04 p.m. UTC | #3
On 05/01/2014 02:56 AM, Matthew Booth wrote:
> On 30/04/14 16:16, Kevin Wolf wrote:
>> Am 30.04.2014 um 16:20 hat Matthew Booth geschrieben:
>>> curl_parse_filename wasn't removing the option string from the url,
>>> resulting in a 404.
>>>


> 
> Alternatively I could completely change the syntax. However, the only
> 'safe' syntax I can think of would involve ugly custom escaping, which
> would probably catch out more people than unsafe option parsing. I'm
> open to suggestions.

The name is a URI, so it should already be possible to use URI escaping
(percent-hex-hex) for any character that must be interpreted as part of
the filename rather than as a bogus query.  Therefore, you should be
strict and require that the user passes in a valid URI with all options
known and with the filename spelled correctly using URI escapes, rather
than loose and parsing what would otherwise be garbage as a possible
weird filename.

> 
> On a related note, do you know if it's possible to specify a backing
> file with separated options? i.e.:
> 
> qemu-img create -f qcow2 -o
> backingfile.url='http://example.com/path',backingfile.readahead='64k'
> /tmp/foo.qcow2

It should be possible; in fact, if the backing file is supported in
BlockdevOptions for the QMP blockdev-add command it already is.  If it
is not supported in BlockdevOptions, then we need structured options
added to make it possible.  But you can use any of the backing stores
that are already structured as your example.

> 
> I suspect that qcow2 only stores a string, so probably not, but I
> thought I'd ask.

There's a proposal for a new json: protocol, which allows storing ANY
arbitrary BlockdevOptions as a flat string in the qcow2 metadata (within
the limits of the header format which forces you to 1024 bytes or less).
Kevin Wolf May 5, 2014, 9:27 a.m. UTC | #4
Am 01.05.2014 um 10:56 hat Matthew Booth geschrieben:
> On 30/04/14 16:16, Kevin Wolf wrote:
> > Am 30.04.2014 um 16:20 hat Matthew Booth geschrieben:
> >> curl_parse_filename wasn't removing the option string from the url,
> >> resulting in a 404.
> >>
> >> This change is a essentially a rewrite of that function as I also need
> >> to extend it to handle more options. The rewrite is also much easier
> >> to read.
> >>
> >> Signed-off-by: Matthew Booth <mbooth@redhat.com>
> >> ---
> >>  block/curl.c | 81 ++++++++++++++++++++++++++++++++++++++----------------------
> >>  1 file changed, 52 insertions(+), 29 deletions(-)
> >>
> >> diff --git a/block/curl.c b/block/curl.c
> >> index d2f1084..4de6856 100644
> >> --- a/block/curl.c
> >> +++ b/block/curl.c
> >> @@ -46,12 +46,15 @@
> >>  #define CURL_NUM_STATES 8
> >>  #define CURL_NUM_ACB    8
> >>  #define SECTOR_SIZE     512
> >> -#define READ_AHEAD_SIZE (256 * 1024)
> >> +#define READ_AHEAD_DEFAULT (256 * 1024)
> >>  
> >>  #define FIND_RET_NONE   0
> >>  #define FIND_RET_OK     1
> >>  #define FIND_RET_WAIT   2
> >>  
> >> +#define CURL_BLOCK_OPT_URL       "url"
> >> +#define CURL_BLOCK_OPT_READAHEAD "readahead"
> >> +
> >>  struct BDRVCURLState;
> >>  
> >>  typedef struct CURLAIOCB {
> >> @@ -396,41 +399,60 @@ static void curl_clean_state(CURLState *s)
> >>  static void curl_parse_filename(const char *filename, QDict *options,
> >>                                  Error **errp)
> >>  {
> >> -
> >> -    #define RA_OPTSTR ":readahead="
> >>      char *file;
> >> -    char *ra;
> >> -    const char *ra_val;
> >> -    int parse_state = 0;
> >> +    char *end;
> >>  
> >>      file = g_strdup(filename);
> >> +    end = file + strlen(file) - 1;
> >> +
> >> +    /* Don't fail if we've been passed an empty string.
> >> +     * Only examine strings with a trailing ':'
> >> +     */
> >> +    if (end >= file && *end == ':') {
> >> +        /* We exit this loop when we don't find a recognised option immediately
> >> +         * preceding the trailing ':' of the form ':<option>=<value>'
> >> +         *
> >> +         * If filename has a trailing ':' but no preceding options, we don't
> >> +         * remove the trailing ':'.
> >> +         */
> >> +        for (;;) {
> >> +            /* Look for the preceding colon */
> >> +            char *colon = memrchr(file, ':', end - file);
> >> +            if (NULL == colon) {
> >> +                break;
> >> +            }
> >>  
> >> -    /* Parse a trailing ":readahead=#:" param, if present. */
> >> -    ra = file + strlen(file) - 1;
> >> -    while (ra >= file) {
> >> -        if (parse_state == 0) {
> >> -            if (*ra == ':') {
> >> -                parse_state++;
> >> -            } else {
> >> +            char *opt_start = colon + 1;
> >> +
> >> +            /* Look for an equals sign */
> >> +            char *equals = memchr(opt_start, '=', end - opt_start);
> >> +            if (NULL == equals) {
> >>                  break;
> >>              }
> >> -        } else if (parse_state == 1) {
> >> -            if (*ra > '9' || *ra < '0') {
> >> -                char *opt_start = ra - strlen(RA_OPTSTR) + 1;
> >> -                if (opt_start > file &&
> >> -                    strncmp(opt_start, RA_OPTSTR, strlen(RA_OPTSTR)) == 0) {
> >> -                    ra_val = ra + 1;
> >> -                    ra -= strlen(RA_OPTSTR) - 1;
> >> -                    *ra = '\0';
> >> -                    qdict_put(options, "readahead", qstring_from_str(ra_val));
> >> -                }
> >> +
> >> +            size_t opt_len = equals - opt_start;
> >> +            char *value = equals + 1;
> >> +            size_t value_len = end - value;
> >> +
> >> +            if (opt_len == strlen(CURL_BLOCK_OPT_READAHEAD) &&
> >> +                memcmp(opt_start, CURL_BLOCK_OPT_READAHEAD, opt_len) == 0) {
> >> +                /* This is redundant after the first iteration */
> >> +                *end = '\0';
> >> +                qdict_put(options, CURL_BLOCK_OPT_READAHEAD,
> >> +                          qstring_from_str(value));
> >> +            } else {
> >> +                /* Unknown option */
> >>                  break;
> > 
> > So if we get an unknown option, we simply abort parsing the filename.
> > This means that we'll try to open a URL that still contains an option
> > and will probably fail with a rather confusing error message.
> > 
> > Shouldn't we set a clear error message about the unknown option here
> > with error_setg() and immediately return?
> 
> TBH I was just trying to stay compatible with the behaviour which most
> recently worked. Hence the weirdness with the trailing ':', for example.
> 
> I did consider whether to do ignore these options or not. I decided to
> ignore them because the option syntax isn't safe: the string
> ':readahead=1k:' could be found at the end of a valid URL. Ignoring
> unknown options reduces the chances of a false positive. For example, in:
> 
> http://example.com/path?query=foo:
> 
> How do you avoid throwing an error about the unknown option
> '//example.com/path?query'?

You don't? :-)

It's the same thing for local file names. If you want a safe way for
specifying files that have things like colons in their name, you
shouldn't use the filename string, but specific options that are taken
literally (like file.filename=foo:bar.qcow2), otherwise you get an error
about unknown protocols.

And Eric made the very good point that colons should be percent-encoded
anyway in a URL, so I really wouldn't worry too much about this. There
are enough ways for the user to achieve what they want, we don't have
to provide everything in the shortcut string.

>I could probably chuck in a couple of
> heuristics to avoid the obvious false positives, but it would be even
> messier. The best option is not to do this at all, and use the separated
> options command line syntax. In fact, I might add a note to the docs
> advising this.
> 
> Alternatively I could completely change the syntax. However, the only
> 'safe' syntax I can think of would involve ugly custom escaping, which
> would probably catch out more people than unsafe option parsing. I'm
> open to suggestions.

Let's not go there. We already have a safe syntax, and it's the
separated options.

> On a related note, do you know if it's possible to specify a backing
> file with separated options? i.e.:
> 
> qemu-img create -f qcow2 -o
> backingfile.url='http://example.com/path',backingfile.readahead='64k'
> /tmp/foo.qcow2
> 
> I suspect that qcow2 only stores a string, so probably not, but I
> thought I'd ask.

Like Eric said, the json: protocol is how we want to implement this.

If you're okay with specifying the option at runtime, you can already do
that with an option like this:

    -drive file=/tmp/foo.qcow2,backing.file.readahead=64k

Kevin
diff mbox

Patch

diff --git a/block/curl.c b/block/curl.c
index d2f1084..4de6856 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -46,12 +46,15 @@ 
 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
 #define SECTOR_SIZE     512
-#define READ_AHEAD_SIZE (256 * 1024)
+#define READ_AHEAD_DEFAULT (256 * 1024)
 
 #define FIND_RET_NONE   0
 #define FIND_RET_OK     1
 #define FIND_RET_WAIT   2
 
+#define CURL_BLOCK_OPT_URL       "url"
+#define CURL_BLOCK_OPT_READAHEAD "readahead"
+
 struct BDRVCURLState;
 
 typedef struct CURLAIOCB {
@@ -396,41 +399,60 @@  static void curl_clean_state(CURLState *s)
 static void curl_parse_filename(const char *filename, QDict *options,
                                 Error **errp)
 {
-
-    #define RA_OPTSTR ":readahead="
     char *file;
-    char *ra;
-    const char *ra_val;
-    int parse_state = 0;
+    char *end;
 
     file = g_strdup(filename);
+    end = file + strlen(file) - 1;
+
+    /* Don't fail if we've been passed an empty string.
+     * Only examine strings with a trailing ':'
+     */
+    if (end >= file && *end == ':') {
+        /* We exit this loop when we don't find a recognised option immediately
+         * preceding the trailing ':' of the form ':<option>=<value>'
+         *
+         * If filename has a trailing ':' but no preceding options, we don't
+         * remove the trailing ':'.
+         */
+        for (;;) {
+            /* Look for the preceding colon */
+            char *colon = memrchr(file, ':', end - file);
+            if (NULL == colon) {
+                break;
+            }
 
-    /* Parse a trailing ":readahead=#:" param, if present. */
-    ra = file + strlen(file) - 1;
-    while (ra >= file) {
-        if (parse_state == 0) {
-            if (*ra == ':') {
-                parse_state++;
-            } else {
+            char *opt_start = colon + 1;
+
+            /* Look for an equals sign */
+            char *equals = memchr(opt_start, '=', end - opt_start);
+            if (NULL == equals) {
                 break;
             }
-        } else if (parse_state == 1) {
-            if (*ra > '9' || *ra < '0') {
-                char *opt_start = ra - strlen(RA_OPTSTR) + 1;
-                if (opt_start > file &&
-                    strncmp(opt_start, RA_OPTSTR, strlen(RA_OPTSTR)) == 0) {
-                    ra_val = ra + 1;
-                    ra -= strlen(RA_OPTSTR) - 1;
-                    *ra = '\0';
-                    qdict_put(options, "readahead", qstring_from_str(ra_val));
-                }
+
+            size_t opt_len = equals - opt_start;
+            char *value = equals + 1;
+            size_t value_len = end - value;
+
+            if (opt_len == strlen(CURL_BLOCK_OPT_READAHEAD) &&
+                memcmp(opt_start, CURL_BLOCK_OPT_READAHEAD, opt_len) == 0) {
+                /* This is redundant after the first iteration */
+                *end = '\0';
+                qdict_put(options, CURL_BLOCK_OPT_READAHEAD,
+                          qstring_from_str(value));
+            } else {
+                /* Unknown option */
                 break;
             }
+
+            end = colon;
+
+            /* We've found at least 1 option. Remove the trailing ':' */
+            *end = '\0';
         }
-        ra--;
     }
 
-    qdict_put(options, "url", qstring_from_str(file));
+    qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(file));
 
     g_free(file);
 }
@@ -440,12 +462,12 @@  static QemuOptsList runtime_opts = {
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
     .desc = {
         {
-            .name = "url",
+            .name = CURL_BLOCK_OPT_URL,
             .type = QEMU_OPT_STRING,
             .help = "URL to open",
         },
         {
-            .name = "readahead",
+            .name = CURL_BLOCK_OPT_READAHEAD,
             .type = QEMU_OPT_SIZE,
             .help = "Readahead size",
         },
@@ -477,14 +499,15 @@  static int curl_open(BlockDriverState *bs, QDict *options, int flags,
         goto out_noclean;
     }
 
-    s->readahead_size = qemu_opt_get_size(opts, "readahead", READ_AHEAD_SIZE);
+    s->readahead_size = qemu_opt_get_size(opts, CURL_BLOCK_OPT_READAHEAD,
+                                          READ_AHEAD_DEFAULT);
     if ((s->readahead_size & 0x1ff) != 0) {
         error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
                    s->readahead_size);
         goto out_noclean;
     }
 
-    file = qemu_opt_get(opts, "url");
+    file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
     if (file == NULL) {
         error_setg(errp, "curl block driver requires an 'url' option");
         goto out_noclean;