diff mbox

[10/11] json-streamer: limit the maximum recursion depth and maximum token count

Message ID 1299877249-13433-11-git-send-email-aliguori@us.ibm.com
State New
Headers show

Commit Message

Anthony Liguori March 11, 2011, 9 p.m. UTC
The JSON parse we use is a recursive decent parser which means that recursion
is used to backtrack.  To avoid stack overflows from malformed input, we need
to limit recursion depth.

Fortunately, we know the maximum recursion depth in the message parsing phase
so we can very easily check for unreasonably deep recursion.  If we find that
emit the current token stream and let the parser handle it.  The idea here is
that hopefully the stream will recover or at least error out gracefully.

We also limit the maximum token count.  We don't limit the number of tokens, but
rather the total memory used for tokens at any given point in time.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>

Comments

Michael Roth March 11, 2011, 11:16 p.m. UTC | #1
On 03/11/2011 03:00 PM, Anthony Liguori wrote:
> The JSON parse we use is a recursive decent parser which means that recursion
> is used to backtrack.  To avoid stack overflows from malformed input, we need
> to limit recursion depth.
>
> Fortunately, we know the maximum recursion depth in the message parsing phase
> so we can very easily check for unreasonably deep recursion.  If we find that
> emit the current token stream and let the parser handle it.  The idea here is
> that hopefully the stream will recover or at least error out gracefully.
>
> We also limit the maximum token count.  We don't limit the number of tokens, but
> rather the total memory used for tokens at any given point in time.
>
> Signed-off-by: Anthony Liguori<aliguori@us.ibm.com>
>
> diff --git a/json-streamer.c b/json-streamer.c
> index f7e7a68..c7f43b0 100644
> --- a/json-streamer.c
> +++ b/json-streamer.c
> @@ -18,6 +18,9 @@
>   #include "json-lexer.h"
>   #include "json-streamer.h"
>
> +#define MAX_TOKEN_SIZE (64ULL<<  20)
> +#define MAX_NESTING (1ULL<<  10)
> +
>   static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTokenType type, int x, int y)
>   {
>       JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer);
> @@ -49,6 +52,8 @@ static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTok
>       qdict_put(dict, "x", qint_from_int(x));
>       qdict_put(dict, "y", qint_from_int(y));
>
> +    parser->token_size += token->length;
> +
>       qlist_append(parser->tokens, dict);
>
>       if (parser->brace_count == 0&&
> @@ -56,6 +61,19 @@ static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTok
>           parser->emit(parser, parser->tokens);
>           QDECREF(parser->tokens);
>           parser->tokens = qlist_new();
> +        parser->token_size = 0;
> +    } else if (parser->token_size>  MAX_TOKEN_SIZE ||
> +               parser->bracket_count>  MAX_NESTING ||
> +               parser->brace_count>  MAX_NESTING) {
> +        /* Security consideration, we limit total memory allocated per object
> +         * and the maximum recursion depth that a message can force.
> +         */
> +        parser->brace_count = 0;
> +        parser->bracket_count = 0;
> +        parser->emit(parser, parser->tokens);

Should we even bother passing this to the parser? That's a lot stuff to 
churn on that we know isn't going to result in anything useful. If there 
was a proper object earlier in the stream it would've been emitted when 
brace/bracket count reached 0.

I think it might be nicer to do parser->emit(parser, NULL), and fix up 
json_parser_parse_err() to check for this and simply return NULL back to 
qmp_dispatch_err() or whoever is calling.

I think brace_count < 0 || bracket_count < 0 should get similar treatment.

> +        QDECREF(parser->tokens);
> +        parser->tokens = qlist_new();
> +        parser->token_size = 0;
>       }
>   }
>
> @@ -66,6 +84,7 @@ void json_message_parser_init(JSONMessageParser *parser,
>       parser->brace_count = 0;
>       parser->bracket_count = 0;
>       parser->tokens = qlist_new();
> +    parser->token_size = 0;
>
>       json_lexer_init(&parser->lexer, json_message_process_token);
>   }
> diff --git a/json-streamer.h b/json-streamer.h
> index 09f3bd7..f09bc4d 100644
> --- a/json-streamer.h
> +++ b/json-streamer.h
> @@ -24,6 +24,7 @@ typedef struct JSONMessageParser
>       int brace_count;
>       int bracket_count;
>       QList *tokens;
> +    uint64_t token_size;
>   } JSONMessageParser;
>
>   void json_message_parser_init(JSONMessageParser *parser,
Anthony Liguori March 12, 2011, 3:03 p.m. UTC | #2
On 03/11/2011 05:16 PM, Michael Roth wrote:
>>           parser->emit(parser, parser->tokens);
>>           QDECREF(parser->tokens);
>>           parser->tokens = qlist_new();
>> +        parser->token_size = 0;
>> +    } else if (parser->token_size>  MAX_TOKEN_SIZE ||
>> +               parser->bracket_count>  MAX_NESTING ||
>> +               parser->brace_count>  MAX_NESTING) {
>> +        /* Security consideration, we limit total memory allocated 
>> per object
>> +         * and the maximum recursion depth that a message can force.
>> +         */
>> +        parser->brace_count = 0;
>> +        parser->bracket_count = 0;
>> +        parser->emit(parser, parser->tokens);
> @@ -56,6 +61,19 @@ static void json_message_process_token(JSONLexer 
> *lexer, QString *token, JSONTok
>
> Should we even bother passing this to the parser? That's a lot stuff 
> to churn on that we know isn't going to result in anything useful. If 
> there was a proper object earlier in the stream it would've been 
> emitted when brace/bracket count reached 0.
>
> I think it might be nicer to do parser->emit(parser, NULL), and fix up 
> json_parser_parse_err() to check for this and simply return NULL back 
> to qmp_dispatch_err() or whoever is calling.
>
> I think brace_count < 0 || bracket_count < 0 should get similar 
> treatment.

I think the main advantage of doing it this way is that we can test the 
maximum stack usage by just doing a simple:

(while true; do echo "{'key': "; done) | socat stdio 
unix:/path/to/qmp.sock > /dev/null

However, if we don't pass the token list to the parser, we need to know 
exactly what the maximum is and only generate that depth in order to 
test stack usage.

The parser needs to be robust against bad input so from a test 
perspective, I like the idea of passing something to the parser even if 
we know it's bad.

Regards,

Anthony Liguori
diff mbox

Patch

diff --git a/json-streamer.c b/json-streamer.c
index f7e7a68..c7f43b0 100644
--- a/json-streamer.c
+++ b/json-streamer.c
@@ -18,6 +18,9 @@ 
 #include "json-lexer.h"
 #include "json-streamer.h"
 
+#define MAX_TOKEN_SIZE (64ULL << 20)
+#define MAX_NESTING (1ULL << 10)
+
 static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTokenType type, int x, int y)
 {
     JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer);
@@ -49,6 +52,8 @@  static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTok
     qdict_put(dict, "x", qint_from_int(x));
     qdict_put(dict, "y", qint_from_int(y));
 
+    parser->token_size += token->length;
+
     qlist_append(parser->tokens, dict);
 
     if (parser->brace_count == 0 &&
@@ -56,6 +61,19 @@  static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTok
         parser->emit(parser, parser->tokens);
         QDECREF(parser->tokens);
         parser->tokens = qlist_new();
+        parser->token_size = 0;
+    } else if (parser->token_size > MAX_TOKEN_SIZE ||
+               parser->bracket_count > MAX_NESTING ||
+               parser->brace_count > MAX_NESTING) {
+        /* Security consideration, we limit total memory allocated per object
+         * and the maximum recursion depth that a message can force.
+         */
+        parser->brace_count = 0;
+        parser->bracket_count = 0;
+        parser->emit(parser, parser->tokens);
+        QDECREF(parser->tokens);
+        parser->tokens = qlist_new();
+        parser->token_size = 0;
     }
 }
 
@@ -66,6 +84,7 @@  void json_message_parser_init(JSONMessageParser *parser,
     parser->brace_count = 0;
     parser->bracket_count = 0;
     parser->tokens = qlist_new();
+    parser->token_size = 0;
 
     json_lexer_init(&parser->lexer, json_message_process_token);
 }
diff --git a/json-streamer.h b/json-streamer.h
index 09f3bd7..f09bc4d 100644
--- a/json-streamer.h
+++ b/json-streamer.h
@@ -24,6 +24,7 @@  typedef struct JSONMessageParser
     int brace_count;
     int bracket_count;
     QList *tokens;
+    uint64_t token_size;
 } JSONMessageParser;
 
 void json_message_parser_init(JSONMessageParser *parser,