[LEDE-DEV] jshn: add functionality to read big JSON

Message ID 20180107170809.31401-2-dontmind@freeshell.org
State Changes Requested
Delegated to: John Crispin
Headers show
Series
  • [LEDE-DEV] jshn: add functionality to read big JSON
Related show

Commit Message

Christian Beier Jan. 7, 2018, 5:08 p.m.
The existing read functionality feeds the complete JSON to jshn as a
cmdline argument, leading to `-ash: jshn: Argument list too long`
errors for JSONs bigger than ca. 100KB.

This commit adds the ability to read the JSON directly from a file if
wanted, removing this shell-imposed size limit.

Tested on x86-64 and ar71xx. An mmap()-based solution was also evaluated,
but found to make no performance difference on either platform.

Signed-off-by: Christian Beier <dontmind@freeshell.org>
---
 jshn.c     | 30 ++++++++++++++++++++++++++++--
 sh/jshn.sh |  4 ++++
 2 files changed, 32 insertions(+), 2 deletions(-)

Comments

John Crispin Jan. 17, 2018, 9:11 a.m. | #1
On 07/01/18 18:08, Christian Beier wrote:
> The existing read functionality feeds the complete JSON to jshn as a
> cmdline argument, leading to `-ash: jshn: Argument list too long`
> errors for JSONs bigger than ca. 100KB.
>
> This commit adds the ability to read the JSON directly from a file if
> wanted, removing this shell-imposed size limit.
>
> Tested on x86-64 and ar71xx. An mmap()-based solution was also evaluated,
> but found to make no performance difference on either platform.
>
> Signed-off-by: Christian Beier <dontmind@freeshell.org>

Hi Christian,

comment inline ...

> ---
>   jshn.c     | 30 ++++++++++++++++++++++++++++--
>   sh/jshn.sh |  4 ++++
>   2 files changed, 32 insertions(+), 2 deletions(-)
>
> diff --git a/jshn.c b/jshn.c
> index 3188af5..eb72fb7 100644
> --- a/jshn.c
> +++ b/jshn.c
> @@ -25,6 +25,8 @@
>   #include <stdbool.h>
>   #include <ctype.h>
>   #include <getopt.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
>   #include "list.h"
>   
>   #include "avl.h"
> @@ -305,7 +307,7 @@ out:
>   
>   static int usage(const char *progname)
>   {
> -	fprintf(stderr, "Usage: %s [-n] [-i] -r <message>|-w\n", progname);
> +	fprintf(stderr, "Usage: %s [-n] [-i] -r <message>|-R <file>|-w\n", progname);
>   	return 2;
>   }
>   
> @@ -338,6 +340,10 @@ int main(int argc, char **argv)
>   	struct env_var *vars;
>   	int i;
>   	int ch;
> +	int fd;
> +	struct stat sb;
> +	char *fbuf;
> +	int ret;
>   
>   	avl_init(&env_vars, avl_strcmp_var, false, NULL);
>   	for (i = 0; environ[i]; i++);
> @@ -359,7 +365,7 @@ int main(int argc, char **argv)
>   		avl_insert(&env_vars, &vars[i].avl);
>   	}
>   
> -	while ((ch = getopt(argc, argv, "p:nir:w")) != -1) {
> +	while ((ch = getopt(argc, argv, "p:nir:R:w")) != -1) {
>   		switch(ch) {
>   		case 'p':
>   			var_prefix = optarg;
> @@ -367,6 +373,26 @@ int main(int argc, char **argv)
>   			break;
>   		case 'r':
>   			return jshn_parse(optarg);
> +		case 'R':
> +			if ((fd = open(optarg, O_RDONLY)) == -1) {
> +				fprintf(stderr, "Error opening %s\n", optarg);
> +				return 3;
> +			}
> +			if (fstat(fd, &sb) == -1) {
> +				fprintf(stderr, "Error getting size of %s\n", optarg);
> +				close(fd);
> +				return 3;
> +			}
> +			if (!(fbuf = malloc(sb.st_size)) || read(fd, fbuf, sb.st_size) != sb.st_size) {
> +				fprintf(stderr, "Error reading %s\n", optarg);
> +				free(fbuf);

this will blow up if the malloc fails. please spli the if clause up into 
2 blocks

     John

> +				close(fd);
> +				return 3;
> +			}
> +			ret = jshn_parse(fbuf);
> +			free(fbuf);
> +			close(fd);
> +			return ret;
>   		case 'w':
>   			return jshn_format(no_newline, indent);
>   		case 'n':
> diff --git a/sh/jshn.sh b/sh/jshn.sh
> index 1090814..66baccb 100644
> --- a/sh/jshn.sh
> +++ b/sh/jshn.sh
> @@ -180,6 +180,10 @@ json_load() {
>   	eval "`jshn -r "$1"`"
>   }
>   
> +json_load_file() {
> +	eval "`jshn -R "$1"`"
> +}
> +
>   json_dump() {
>   	jshn "$@" ${JSON_PREFIX:+-p "$JSON_PREFIX"} -w
>   }
Arjen de Korte Jan. 17, 2018, 9:44 a.m. | #2
Citeren John Crispin <john@phrozen.org>:

> On 07/01/18 18:08, Christian Beier wrote:
>> The existing read functionality feeds the complete JSON to jshn as a
>> cmdline argument, leading to `-ash: jshn: Argument list too long`
>> errors for JSONs bigger than ca. 100KB.
>>
>> This commit adds the ability to read the JSON directly from a file if
>> wanted, removing this shell-imposed size limit.
>>
>> Tested on x86-64 and ar71xx. An mmap()-based solution was also evaluated,
>> but found to make no performance difference on either platform.
>>
>> Signed-off-by: Christian Beier <dontmind@freeshell.org>
>
> Hi Christian,
>
> comment inline ...
>
>> ---
>>  jshn.c     | 30 ++++++++++++++++++++++++++++--
>>  sh/jshn.sh |  4 ++++
>>  2 files changed, 32 insertions(+), 2 deletions(-)
>>
>> diff --git a/jshn.c b/jshn.c
>> index 3188af5..eb72fb7 100644
>> --- a/jshn.c
>> +++ b/jshn.c
>> @@ -25,6 +25,8 @@
>>  #include <stdbool.h>
>>  #include <ctype.h>
>>  #include <getopt.h>
>> +#include <sys/stat.h>
>> +#include <fcntl.h>
>>  #include "list.h"
>>    #include "avl.h"
>> @@ -305,7 +307,7 @@ out:
>>    static int usage(const char *progname)
>>  {
>> -	fprintf(stderr, "Usage: %s [-n] [-i] -r <message>|-w\n", progname);
>> +	fprintf(stderr, "Usage: %s [-n] [-i] -r <message>|-R  
>> <file>|-w\n", progname);
>>  	return 2;
>>  }
>>  @@ -338,6 +340,10 @@ int main(int argc, char **argv)
>>  	struct env_var *vars;
>>  	int i;
>>  	int ch;
>> +	int fd;
>> +	struct stat sb;
>> +	char *fbuf;
>> +	int ret;
>>    	avl_init(&env_vars, avl_strcmp_var, false, NULL);
>>  	for (i = 0; environ[i]; i++);
>> @@ -359,7 +365,7 @@ int main(int argc, char **argv)
>>  		avl_insert(&env_vars, &vars[i].avl);
>>  	}
>>  -	while ((ch = getopt(argc, argv, "p:nir:w")) != -1) {
>> +	while ((ch = getopt(argc, argv, "p:nir:R:w")) != -1) {
>>  		switch(ch) {
>>  		case 'p':
>>  			var_prefix = optarg;
>> @@ -367,6 +373,26 @@ int main(int argc, char **argv)
>>  			break;
>>  		case 'r':
>>  			return jshn_parse(optarg);
>> +		case 'R':
>> +			if ((fd = open(optarg, O_RDONLY)) == -1) {
>> +				fprintf(stderr, "Error opening %s\n", optarg);
>> +				return 3;
>> +			}
>> +			if (fstat(fd, &sb) == -1) {
>> +				fprintf(stderr, "Error getting size of %s\n", optarg);
>> +				close(fd);
>> +				return 3;
>> +			}
>> +			if (!(fbuf = malloc(sb.st_size)) || read(fd, fbuf, sb.st_size)  
>> != sb.st_size) {
>> +				fprintf(stderr, "Error reading %s\n", optarg);
>> +				free(fbuf);
>
> this will blow up if the malloc fails.

How would it? If the malloc fails and returns a NULL pointer, the read  
will not be performed. Free'ing a NULL pointer is allowed, so although  
assigning a value in an if() statement is considered a no-no by some  
(including me), there is no reason for it to blow up.

> please spli the if clause up into 2 blocks

Agreed (but for a different reason). A memory allocation error is  
different from failure to read from a file and error handlers should  
not treat them the same.

>     John
>
>> +				close(fd);
>> +				return 3;
>> +			}
>> +			ret = jshn_parse(fbuf);
>> +			free(fbuf);
>> +			close(fd);
>> +			return ret;
>>  		case 'w':
>>  			return jshn_format(no_newline, indent);
>>  		case 'n':
>> diff --git a/sh/jshn.sh b/sh/jshn.sh
>> index 1090814..66baccb 100644
>> --- a/sh/jshn.sh
>> +++ b/sh/jshn.sh
>> @@ -180,6 +180,10 @@ json_load() {
>>  	eval "`jshn -r "$1"`"
>>  }
>>  +json_load_file() {
>> +	eval "`jshn -R "$1"`"
>> +}
>> +
>>  json_dump() {
>>  	jshn "$@" ${JSON_PREFIX:+-p "$JSON_PREFIX"} -w
>>  }
>
>
> _______________________________________________
> Lede-dev mailing list
> Lede-dev@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/lede-dev
John Crispin Jan. 17, 2018, 9:55 a.m. | #3
On 17/01/18 10:44, Arjen de Korte wrote:
> Citeren John Crispin <john@phrozen.org>:
>
>> On 07/01/18 18:08, Christian Beier wrote:
>>> The existing read functionality feeds the complete JSON to jshn as a
>>> cmdline argument, leading to `-ash: jshn: Argument list too long`
>>> errors for JSONs bigger than ca. 100KB.
>>>
>>> This commit adds the ability to read the JSON directly from a file if
>>> wanted, removing this shell-imposed size limit.
>>>
>>> Tested on x86-64 and ar71xx. An mmap()-based solution was also 
>>> evaluated,
>>> but found to make no performance difference on either platform.
>>>
>>> Signed-off-by: Christian Beier <dontmind@freeshell.org>
>>
>> Hi Christian,
>>
>> comment inline ...
>>
>>> ---
>>>  jshn.c     | 30 ++++++++++++++++++++++++++++--
>>>  sh/jshn.sh |  4 ++++
>>>  2 files changed, 32 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/jshn.c b/jshn.c
>>> index 3188af5..eb72fb7 100644
>>> --- a/jshn.c
>>> +++ b/jshn.c
>>> @@ -25,6 +25,8 @@
>>>  #include <stdbool.h>
>>>  #include <ctype.h>
>>>  #include <getopt.h>
>>> +#include <sys/stat.h>
>>> +#include <fcntl.h>
>>>  #include "list.h"
>>>    #include "avl.h"
>>> @@ -305,7 +307,7 @@ out:
>>>    static int usage(const char *progname)
>>>  {
>>> -    fprintf(stderr, "Usage: %s [-n] [-i] -r <message>|-w\n", 
>>> progname);
>>> +    fprintf(stderr, "Usage: %s [-n] [-i] -r <message>|-R 
>>> <file>|-w\n", progname);
>>>      return 2;
>>>  }
>>>  @@ -338,6 +340,10 @@ int main(int argc, char **argv)
>>>      struct env_var *vars;
>>>      int i;
>>>      int ch;
>>> +    int fd;
>>> +    struct stat sb;
>>> +    char *fbuf;
>>> +    int ret;
>>>        avl_init(&env_vars, avl_strcmp_var, false, NULL);
>>>      for (i = 0; environ[i]; i++);
>>> @@ -359,7 +365,7 @@ int main(int argc, char **argv)
>>>          avl_insert(&env_vars, &vars[i].avl);
>>>      }
>>>  -    while ((ch = getopt(argc, argv, "p:nir:w")) != -1) {
>>> +    while ((ch = getopt(argc, argv, "p:nir:R:w")) != -1) {
>>>          switch(ch) {
>>>          case 'p':
>>>              var_prefix = optarg;
>>> @@ -367,6 +373,26 @@ int main(int argc, char **argv)
>>>              break;
>>>          case 'r':
>>>              return jshn_parse(optarg);
>>> +        case 'R':
>>> +            if ((fd = open(optarg, O_RDONLY)) == -1) {
>>> +                fprintf(stderr, "Error opening %s\n", optarg);
>>> +                return 3;
>>> +            }
>>> +            if (fstat(fd, &sb) == -1) {
>>> +                fprintf(stderr, "Error getting size of %s\n", optarg);
>>> +                close(fd);
>>> +                return 3;
>>> +            }
>>> +            if (!(fbuf = malloc(sb.st_size)) || read(fd, fbuf, 
>>> sb.st_size) != sb.st_size) {
>>> +                fprintf(stderr, "Error reading %s\n", optarg);
>>> +                free(fbuf);
>>
>> this will blow up if the malloc fails.
>
> How would it? If the malloc fails and returns a NULL pointer, the read 
> will not be performed. Free'ing a NULL pointer is allowed, so although 
> assigning a value in an if() statement is considered a no-no by some 
> (including me), there is no reason for it to blow up.

ok, point taken ....

>
>> please spli the if clause up into 2 blocks
>
> Agreed (but for a different reason). A memory allocation error is 
> different from failure to read from a file and error handlers should 
> not treat them the same.
>
>>     John
>>
>>> +                close(fd);
>>> +                return 3;
>>> +            }
>>> +            ret = jshn_parse(fbuf);
>>> +            free(fbuf);
>>> +            close(fd);
>>> +            return ret;
>>>          case 'w':
>>>              return jshn_format(no_newline, indent);
>>>          case 'n':
>>> diff --git a/sh/jshn.sh b/sh/jshn.sh
>>> index 1090814..66baccb 100644
>>> --- a/sh/jshn.sh
>>> +++ b/sh/jshn.sh
>>> @@ -180,6 +180,10 @@ json_load() {
>>>      eval "`jshn -r "$1"`"
>>>  }
>>>  +json_load_file() {
>>> +    eval "`jshn -R "$1"`"
>>> +}
>>> +
>>>  json_dump() {
>>>      jshn "$@" ${JSON_PREFIX:+-p "$JSON_PREFIX"} -w
>>>  }
>>
>>
>> _______________________________________________
>> Lede-dev mailing list
>> Lede-dev@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/lede-dev
>
>
>
>
> _______________________________________________
> Lede-dev mailing list
> Lede-dev@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/lede-dev
Christian Beier Jan. 18, 2018, 8:21 p.m. | #4
Hi John,

Here's v3 of the patch with separate error handlers for the
different fail cases.

Cheers,

  Christian

Patch

diff --git a/jshn.c b/jshn.c
index 3188af5..eb72fb7 100644
--- a/jshn.c
+++ b/jshn.c
@@ -25,6 +25,8 @@ 
 #include <stdbool.h>
 #include <ctype.h>
 #include <getopt.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include "list.h"
 
 #include "avl.h"
@@ -305,7 +307,7 @@  out:
 
 static int usage(const char *progname)
 {
-	fprintf(stderr, "Usage: %s [-n] [-i] -r <message>|-w\n", progname);
+	fprintf(stderr, "Usage: %s [-n] [-i] -r <message>|-R <file>|-w\n", progname);
 	return 2;
 }
 
@@ -338,6 +340,10 @@  int main(int argc, char **argv)
 	struct env_var *vars;
 	int i;
 	int ch;
+	int fd;
+	struct stat sb;
+	char *fbuf;
+	int ret;
 
 	avl_init(&env_vars, avl_strcmp_var, false, NULL);
 	for (i = 0; environ[i]; i++);
@@ -359,7 +365,7 @@  int main(int argc, char **argv)
 		avl_insert(&env_vars, &vars[i].avl);
 	}
 
-	while ((ch = getopt(argc, argv, "p:nir:w")) != -1) {
+	while ((ch = getopt(argc, argv, "p:nir:R:w")) != -1) {
 		switch(ch) {
 		case 'p':
 			var_prefix = optarg;
@@ -367,6 +373,26 @@  int main(int argc, char **argv)
 			break;
 		case 'r':
 			return jshn_parse(optarg);
+		case 'R':
+			if ((fd = open(optarg, O_RDONLY)) == -1) {
+				fprintf(stderr, "Error opening %s\n", optarg);
+				return 3;
+			}
+			if (fstat(fd, &sb) == -1) {
+				fprintf(stderr, "Error getting size of %s\n", optarg);
+				close(fd);
+				return 3;
+			}
+			if (!(fbuf = malloc(sb.st_size)) || read(fd, fbuf, sb.st_size) != sb.st_size) {
+				fprintf(stderr, "Error reading %s\n", optarg);
+				free(fbuf);
+				close(fd);
+				return 3;
+			}
+			ret = jshn_parse(fbuf);
+			free(fbuf);
+			close(fd);
+			return ret;
 		case 'w':
 			return jshn_format(no_newline, indent);
 		case 'n':
diff --git a/sh/jshn.sh b/sh/jshn.sh
index 1090814..66baccb 100644
--- a/sh/jshn.sh
+++ b/sh/jshn.sh
@@ -180,6 +180,10 @@  json_load() {
 	eval "`jshn -r "$1"`"
 }
 
+json_load_file() {
+	eval "`jshn -R "$1"`"
+}
+
 json_dump() {
 	jshn "$@" ${JSON_PREFIX:+-p "$JSON_PREFIX"} -w 
 }