diff mbox series

[v2,52/60] json: Eliminate lexer state IN_WHITESPACE, pseudo-token JSON_SKIP

Message ID 20180817150559.16243-53-armbru@redhat.com
State New
Headers show
Series json: Fixes, error reporting improvements, cleanups | expand

Commit Message

Markus Armbruster Aug. 17, 2018, 3:05 p.m. UTC
Bonus: static json_lexer[] loses its unused elements.  It shrinks from
8KiB to 4.75KiB for me.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 include/qapi/qmp/json-lexer.h |  1 -
 qobject/json-lexer.c          | 26 +++++++++-----------------
 2 files changed, 9 insertions(+), 18 deletions(-)

Comments

Eric Blake Aug. 17, 2018, 4:07 p.m. UTC | #1
On 08/17/2018 10:05 AM, Markus Armbruster wrote:
> Bonus: static json_lexer[] loses its unused elements.  It shrinks from
> 8KiB to 4.75KiB for me.
> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> Reviewed-by: Eric Blake <eblake@redhat.com>
> ---

>           ['a' ... 'z'] = IN_KEYWORD,
> -        [' '] = IN_WHITESPACE,
> -        ['\t'] = IN_WHITESPACE,
> -        ['\r'] = IN_WHITESPACE,
> -        ['\n'] = IN_WHITESPACE,
> +        [' '] = IN_START,
> +        ['\t'] = IN_START,
> +        ['\r'] = IN_START,
> +        ['\n'] = IN_START,
>       },
>       [IN_START_INTERPOL]['%'] = IN_INTERPOL,
> +    [IN_START_INTERPOL][' '] = IN_START_INTERPOL,
> +    [IN_START_INTERPOL]['\t'] = IN_START_INTERPOL,
> +    [IN_START_INTERPOL]['\r'] = IN_START_INTERPOL,
> +    [IN_START_INTERPOL]['\n'] = IN_START_INTERPOL,

Hmm, if we did this:

[IN_START_INTERPOL] {
   ['%'] = IN_INTERPOL,
   ['\t'] = IN_START_INTERPOL,
...
}

for similarity with all our other constructs, will gcc remember that 
we've already initialized other members not listed in the clause before, 
or will it mistakenly re-0-initialize the array members not mentioned?
Markus Armbruster Aug. 20, 2018, 11:51 a.m. UTC | #2
Eric Blake <eblake@redhat.com> writes:

> On 08/17/2018 10:05 AM, Markus Armbruster wrote:
>> Bonus: static json_lexer[] loses its unused elements.  It shrinks from
>> 8KiB to 4.75KiB for me.
>>
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>> Reviewed-by: Eric Blake <eblake@redhat.com>
>> ---
>
>>           ['a' ... 'z'] = IN_KEYWORD,
>> -        [' '] = IN_WHITESPACE,
>> -        ['\t'] = IN_WHITESPACE,
>> -        ['\r'] = IN_WHITESPACE,
>> -        ['\n'] = IN_WHITESPACE,
>> +        [' '] = IN_START,
>> +        ['\t'] = IN_START,
>> +        ['\r'] = IN_START,
>> +        ['\n'] = IN_START,
>>       },
>>       [IN_START_INTERPOL]['%'] = IN_INTERPOL,
>> +    [IN_START_INTERPOL][' '] = IN_START_INTERPOL,
>> +    [IN_START_INTERPOL]['\t'] = IN_START_INTERPOL,
>> +    [IN_START_INTERPOL]['\r'] = IN_START_INTERPOL,
>> +    [IN_START_INTERPOL]['\n'] = IN_START_INTERPOL,
>
> Hmm, if we did this:
>
> [IN_START_INTERPOL] {
>   ['%'] = IN_INTERPOL,
>   ['\t'] = IN_START_INTERPOL,
> ...
> }
>
> for similarity with all our other constructs, will gcc remember that
> we've already initialized other members not listed in the clause
> before, or will it mistakenly re-0-initialize the array members not
> mentioned?

Fails make check.

(gdb) p json_lexer[IN_START_INTERPOL]
$1 = "\000\000\000\000\000\000\000\000\000\016\016\000\000\016", '\000' <repeats 18 times>, "\016\000\000\000\000\035", '\000' <repeats 217 times>
(gdb) p json_lexer[IN_START]
$2 = "\000\000\000\000\000\000\000\000\000\r\r\000\000\r", '\000' <repeats 18 times>, "\r\000\020\000\000\000\000\022\000\000\000\000\006\033\000\000\023\032\032\032\032\032\032\032\032\032\005", '\000' <repeats 32 times>, "\003\000\004\000\000\000", '\034' <repeats 26 times>, "\001\000\002", '\000' <repeats 129 times>
Eric Blake Aug. 20, 2018, 6:38 p.m. UTC | #3
On 08/20/2018 06:51 AM, Markus Armbruster wrote:

>> Hmm, if we did this:
>>
>> [IN_START_INTERPOL] {
>>    ['%'] = IN_INTERPOL,
>>    ['\t'] = IN_START_INTERPOL,
>> ...
>> }
>>
>> for similarity with all our other constructs, will gcc remember that
>> we've already initialized other members not listed in the clause
>> before, or will it mistakenly re-0-initialize the array members not
>> mentioned?
> 
> Fails make check.
> 
> (gdb) p json_lexer[IN_START_INTERPOL]
> $1 = "\000\000\000\000\000\000\000\000\000\016\016\000\000\016", '\000' <repeats 18 times>, "\016\000\000\000\000\035", '\000' <repeats 217 times>

And thus answered my question - abbreviating causes gcc to 
re-zero-initialize any unmentioned members, so your override has to be 
one member at a time, as originally written.
diff mbox series

Patch

diff --git a/include/qapi/qmp/json-lexer.h b/include/qapi/qmp/json-lexer.h
index f3524de07a..1a2dbbb717 100644
--- a/include/qapi/qmp/json-lexer.h
+++ b/include/qapi/qmp/json-lexer.h
@@ -27,7 +27,6 @@  typedef enum {
     JSON_KEYWORD,
     JSON_STRING,
     JSON_INTERPOL,
-    JSON_SKIP,
     JSON_END_OF_INPUT           /* must be last */
 } JSONTokenType;
 
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 49e075a51e..431a1ede61 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -119,7 +119,6 @@  enum {
     IN_SIGN,
     IN_KEYWORD,
     IN_INTERPOL,
-    IN_WHITESPACE,
 };
 
 QEMU_BUILD_BUG_ON(JSON_ERROR != 0); /* json_lexer[] relies on this */
@@ -214,15 +213,6 @@  static const uint8_t json_lexer[][256] =  {
         ['a' ... 'z'] = IN_KEYWORD,
     },
 
-    /* whitespace */
-    [IN_WHITESPACE] = {
-        TERMINAL(JSON_SKIP),
-        [' '] = IN_WHITESPACE,
-        ['\t'] = IN_WHITESPACE,
-        ['\r'] = IN_WHITESPACE,
-        ['\n'] = IN_WHITESPACE,
-    },
-
     /* interpolation */
     [IN_INTERPOL] = {
         TERMINAL(JSON_INTERPOL),
@@ -249,12 +239,16 @@  static const uint8_t json_lexer[][256] =  {
         [','] = JSON_COMMA,
         [':'] = JSON_COLON,
         ['a' ... 'z'] = IN_KEYWORD,
-        [' '] = IN_WHITESPACE,
-        ['\t'] = IN_WHITESPACE,
-        ['\r'] = IN_WHITESPACE,
-        ['\n'] = IN_WHITESPACE,
+        [' '] = IN_START,
+        ['\t'] = IN_START,
+        ['\r'] = IN_START,
+        ['\n'] = IN_START,
     },
     [IN_START_INTERPOL]['%'] = IN_INTERPOL,
+    [IN_START_INTERPOL][' '] = IN_START_INTERPOL,
+    [IN_START_INTERPOL]['\t'] = IN_START_INTERPOL,
+    [IN_START_INTERPOL]['\r'] = IN_START_INTERPOL,
+    [IN_START_INTERPOL]['\n'] = IN_START_INTERPOL,
 };
 
 void json_lexer_init(JSONLexer *lexer, bool enable_interpolation)
@@ -279,7 +273,7 @@  static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
         assert(lexer->state <= ARRAY_SIZE(json_lexer));
         new_state = json_lexer[lexer->state][(uint8_t)ch];
         char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
-        if (char_consumed && !flush) {
+        if (char_consumed && new_state != lexer->start_state && !flush) {
             g_string_append_c(lexer->token, ch);
         }
 
@@ -297,8 +291,6 @@  static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
         case JSON_STRING:
             json_message_process_token(lexer, lexer->token, new_state,
                                        lexer->x, lexer->y);
-            /* fall through */
-        case JSON_SKIP:
             g_string_truncate(lexer->token, 0);
             new_state = lexer->start_state;
             break;