Message ID | 878ve6epif.fsf@blackfin.pond.sub.org |
---|---|
State | New |
Headers | show |
On Thu, 26 Jul 2012 13:22:00 +0200 Markus Armbruster <armbru@redhat.com> wrote: > Peter Maydell <peter.maydell@linaro.org> writes: > > > On 25 July 2012 20:18, Luiz Capitulino <lcapitulino@redhat.com> wrote: > >> Peter Maydell <peter.maydell@linaro.org> wrote: > >>> On 25 July 2012 17:54, Luiz Capitulino <lcapitulino@redhat.com> wrote: > >>> > --- a/scripts/qapi.py > >>> > +++ b/scripts/qapi.py > >>> > @@ -21,7 +21,9 @@ def tokenize(data): > >>> > elif data[0] == "'": > >>> > data = data[1:] > >>> > string = '' > >>> > - while data[0] != "'": > >>> > + while True: > >>> > + if data[0] == "'" and string[len(string)-1] != "\\": > >>> > + break > >>> > string += data[0] > >>> > data = data[1:] > >>> > data = data[1:] > >>> > >>> Won't this cause us to look at string[-1] if > >>> the input data has two ' characters in a row? > >> > >> Non escaped? If you meant '' that's a zero length string and should work, but > >> if you meant 'foo '' bar' that's illegal, because ' characters > >> should be escaped. > > > > I meant the zero length string case. yes. We come in with data = "''", > > strip the first ' and set string to empty. Then in the first time > > in the while loop data[0] is "'" but len(string) is 0 and so we'll > > do string[-1] which I think will throw an exception. > > > > ...and yep, quick test of a nobbbled qapi-schema.json confirms: > > $ python /home/pm215/src/qemu/qemu/scripts/qapi-types.py -h -o "." < > > /home/pm215/src/qemu/qemu/qapi-schema.json > > Traceback (most recent call last): > > File "/home/pm215/src/qemu/qemu/scripts/qapi-types.py", line 260, in <module> > > exprs = parse_schema(sys.stdin) > > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 78, in parse_schema > > expr_eval = evaluate(expr) > > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 64, in evaluate > > return parse(map(lambda x: x, tokenize(string)))[0] > > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 25, in tokenize > > if data[0] == "'" and string[len(string)-1] != "\\": > > IndexError: string index out of range > > > > Try this (very lightly tested but seems to work): > > (feel free to do something nicer than raising an exception on > > the syntax error, and sorry I'm feeling too lazy to make this > > an actual patch email) > > > > Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Peter, I've replaced my original 07/11 patch with your patch below. > > > > --- a/scripts/qapi.py > > +++ b/scripts/qapi.py > > @@ -21,10 +21,16 @@ def tokenize(data): > > elif data[0] == "'": > > data = data[1:] > > string = '' > > - while data[0] != "'": > > - string += data[0] > > - data = data[1:] > > - data = data[1:] > > + while True: > > + pos = data.find("'") > > + if pos == -1: > > + raise Exception("Mismatched quotes") > > + string += data[0:pos] > > + data = data[pos+1:] > > + if len(string) == 0 or string[-1] != "\\": > > + # found a ' and it wasn't escaped > > + break > > + string = string[0:-1] + "'" > > yield string > > > > def parse(tokens): > > > > (if anybody wants to be able to use '\\' to escape escapes then > > this approach is a bit stuffed, of course.) > > For what it's worth, the orthodox way to lexically analyze strings is a > finite automaton. Utterly untested sketch: Feel free to send a patch if you're strong about this. > > diff --git a/scripts/qapi.py b/scripts/qapi.py > index 8082af3..a745e92 100644 > --- a/scripts/qapi.py > +++ b/scripts/qapi.py > @@ -21,8 +21,17 @@ def tokenize(data): > elif data[0] == "'": > data = data[1:] > string = '' > - while data[0] != "'": > - string += data[0] > + esc = False > + while True: > + if esc: > + string += data[0] > + esc = False > + elif data[0] == "\\": > + esc = True > + elif data[0] == "'": > + break > + else > + string += data[0] > data = data[1:] > data = data[1:] > yield string > > Doesn't handle missing close quote gracefully; you may want to add that. > > >> PS: Peter, I get claustrophobic when reading emails from you :) > > > > I can add more blank lines if that helps? :-) > > > > -- PMM >
Luiz Capitulino <lcapitulino@redhat.com> writes: > On Thu, 26 Jul 2012 13:22:00 +0200 > Markus Armbruster <armbru@redhat.com> wrote: > >> Peter Maydell <peter.maydell@linaro.org> writes: >> >> > On 25 July 2012 20:18, Luiz Capitulino <lcapitulino@redhat.com> wrote: >> >> Peter Maydell <peter.maydell@linaro.org> wrote: >> >>> On 25 July 2012 17:54, Luiz Capitulino <lcapitulino@redhat.com> wrote: >> >>> > --- a/scripts/qapi.py >> >>> > +++ b/scripts/qapi.py >> >>> > @@ -21,7 +21,9 @@ def tokenize(data): >> >>> > elif data[0] == "'": >> >>> > data = data[1:] >> >>> > string = '' >> >>> > - while data[0] != "'": >> >>> > + while True: >> >>> > + if data[0] == "'" and string[len(string)-1] != "\\": >> >>> > + break >> >>> > string += data[0] >> >>> > data = data[1:] >> >>> > data = data[1:] >> >>> >> >>> Won't this cause us to look at string[-1] if >> >>> the input data has two ' characters in a row? >> >> >> >> Non escaped? If you meant '' that's a zero length string and >> >> should work, but >> >> if you meant 'foo '' bar' that's illegal, because ' characters >> >> should be escaped. >> > >> > I meant the zero length string case. yes. We come in with data = "''", >> > strip the first ' and set string to empty. Then in the first time >> > in the while loop data[0] is "'" but len(string) is 0 and so we'll >> > do string[-1] which I think will throw an exception. >> > >> > ...and yep, quick test of a nobbbled qapi-schema.json confirms: >> > $ python /home/pm215/src/qemu/qemu/scripts/qapi-types.py -h -o "." < >> > /home/pm215/src/qemu/qemu/qapi-schema.json >> > Traceback (most recent call last): >> > File "/home/pm215/src/qemu/qemu/scripts/qapi-types.py", line 260, in <module> >> > exprs = parse_schema(sys.stdin) >> > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 78, in parse_schema >> > expr_eval = evaluate(expr) >> > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 64, in evaluate >> > return parse(map(lambda x: x, tokenize(string)))[0] >> > File "/home/pm215/src/qemu/qemu/scripts/qapi.py", line 25, in tokenize >> > if data[0] == "'" and string[len(string)-1] != "\\": >> > IndexError: string index out of range >> > >> > Try this (very lightly tested but seems to work): >> > (feel free to do something nicer than raising an exception on >> > the syntax error, and sorry I'm feeling too lazy to make this >> > an actual patch email) >> > >> > Signed-off-by: Peter Maydell <peter.maydell@linaro.org> > > Peter, I've replaced my original 07/11 patch with your patch below. > >> > >> > --- a/scripts/qapi.py >> > +++ b/scripts/qapi.py >> > @@ -21,10 +21,16 @@ def tokenize(data): >> > elif data[0] == "'": >> > data = data[1:] >> > string = '' >> > - while data[0] != "'": >> > - string += data[0] >> > - data = data[1:] >> > - data = data[1:] >> > + while True: >> > + pos = data.find("'") >> > + if pos == -1: >> > + raise Exception("Mismatched quotes") >> > + string += data[0:pos] >> > + data = data[pos+1:] >> > + if len(string) == 0 or string[-1] != "\\": >> > + # found a ' and it wasn't escaped >> > + break >> > + string = string[0:-1] + "'" >> > yield string >> > >> > def parse(tokens): >> > >> > (if anybody wants to be able to use '\\' to escape escapes then >> > this approach is a bit stuffed, of course.) An escape mechanism that can't be escaped sucks :) >> For what it's worth, the orthodox way to lexically analyze strings is a >> finite automaton. Utterly untested sketch: > > Feel free to send a patch if you're strong about this. I'll leave that to the poor guy who first needs to escape escapes. [...]
diff --git a/scripts/qapi.py b/scripts/qapi.py index 8082af3..a745e92 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -21,8 +21,17 @@ def tokenize(data): elif data[0] == "'": data = data[1:] string = '' - while data[0] != "'": - string += data[0] + esc = False + while True: + if esc: + string += data[0] + esc = False + elif data[0] == "\\": + esc = True + elif data[0] == "'": + break + else + string += data[0] data = data[1:] data = data[1:] yield string