diff mbox series

[v2,1/7] docparse: Implement #define and #include

Message ID 20211101145342.7166-2-chrubis@suse.cz
State Changes Requested
Headers show
Series docparse improvements | expand

Commit Message

Cyril Hrubis Nov. 1, 2021, 2:53 p.m. UTC
We ignore most of the include statements and we attempt to parse only
header files that reside in the same directory as the test source code,
that is since we are not interested in any system or library headers as
we are only looking for constants used in the tst_test structure that
are always either directly in the test source or in header in the same
directory.

The macro support is very simple as well, it's a single pass as we are
not interested in intricate macros. We just need values for constants
that are used in the tst_test structure intializations.

+ Also add -v verbose mode that prints included files and defined macros

Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
---
 docparse/docparse.c | 234 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 224 insertions(+), 10 deletions(-)

Comments

Richard Palethorpe Nov. 2, 2021, 10:05 a.m. UTC | #1
Cyril Hrubis <chrubis@suse.cz> writes:

> We ignore most of the include statements and we attempt to parse only
> header files that reside in the same directory as the test source code,
> that is since we are not interested in any system or library headers as
> we are only looking for constants used in the tst_test structure that
> are always either directly in the test source or in header in the same
> directory.
>
> The macro support is very simple as well, it's a single pass as we are
> not interested in intricate macros. We just need values for constants
> that are used in the tst_test structure intializations.
>
> + Also add -v verbose mode that prints included files and defined macros
>
> Signed-off-by: Cyril Hrubis <chrubis@suse.cz>

I don't see any issues that are likely to cause trouble
immediately. However please check the comments below to ensure they are
out-of-scope.

Reviewed-by: rpalethorpe@suse.com

> ---
>  docparse/docparse.c | 234 ++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 224 insertions(+), 10 deletions(-)
>
> diff --git a/docparse/docparse.c b/docparse/docparse.c
> index 8cd0d0eef..64f9d08d9 100644
> --- a/docparse/docparse.c
> +++ b/docparse/docparse.c
> @@ -1,9 +1,12 @@
>  // SPDX-License-Identifier: GPL-2.0-or-later
>  /*
> - * Copyright (c) 2019 Cyril Hrubis <chrubis@suse.cz>
> + * Copyright (c) 2019-2021 Cyril Hrubis <chrubis@suse.cz>
>   * Copyright (c) 2020 Petr Vorel <pvorel@suse.cz>
>   */
>  
> +#define _GNU_SOURCE
> +
> +#include <search.h>
>  #include <stdio.h>
>  #include <string.h>
>  #include <libgen.h>
> @@ -12,6 +15,9 @@
>  
>  #include "data_storage.h"
>  
> +static int verbose;
> +static char *includepath;
> +
>  #define WARN(str) fprintf(stderr, "WARNING: " str "\n")
>  
>  static void oneline_comment(FILE *f)
> @@ -126,7 +132,7 @@ static void maybe_comment(FILE *f, struct data_node *doc)
>  	}
>  }
>  
> -const char *next_token(FILE *f, struct data_node *doc)
> +static char *next_token(FILE *f, struct data_node *doc)
>  {
>  	size_t i = 0;
>  	static char buf[4096];
> @@ -159,6 +165,7 @@ const char *next_token(FILE *f, struct data_node *doc)
>  		case ',':
>  		case '[':
>  		case ']':
> +		case '#':
>  			if (i) {
>  				ungetc(c, f);
>  				goto exit;
> @@ -197,6 +204,46 @@ exit:
>  	return buf;
>  }
>  
> +static FILE *open_include(const char *includepath, FILE *f)
> +{
> +	char buf[256];
> +	char *path;
> +	FILE *inc;
> +
> +	if (!fscanf(f, "%s\n", buf))
> +		return NULL;
> +
> +	if (buf[0] != '"')
> +		return NULL;
> +
> +	char *filename = buf + 1;
> +
> +	if (!buf[0])
> +		return NULL;
> +
> +	filename[strlen(filename)-1] = 0;
> +
> +	if (asprintf(&path, "%s/%s", includepath, filename) < 0)
> +		return NULL;
> +
> +	inc = fopen(path, "r");
> +
> +	if (inc && verbose)
> +		fprintf(stderr, "INCLUDE %s\n", path);
> +
> +	free(path);
> +
> +	return inc;
> +}
> +
> +static void close_include(FILE *inc)
> +{
> +	if (verbose)
> +		fprintf(stderr, "INCLUDE END\n");
> +
> +	fclose(inc);
> +}
> +
>  static int parse_array(FILE *f, struct data_node *node)
>  {
>  	const char *token;
> @@ -234,9 +281,28 @@ static int parse_array(FILE *f, struct data_node *node)
>  	return 0;
>  }
>  
> +static void try_apply_macro(char **res)
> +{
> +	ENTRY macro = {
> +		.key = *res,
> +	};
> +
> +	ENTRY *ret;
> +
> +	ret = hsearch(macro, FIND);
> +
> +	if (!ret)
> +		return;
> +
> +	if (verbose)
> +		fprintf(stderr, "APPLYING MACRO %s=%s\n", ret->key, (char*)ret->data);
> +
> +	*res = ret->data;
> +}
> +
>  static int parse_test_struct(FILE *f, struct data_node *doc, struct data_node *node)
>  {
> -	const char *token;
> +	char *token;
>  	char *id = NULL;
>  	int state = 0;
>  	struct data_node *ret;
> @@ -280,6 +346,7 @@ static int parse_test_struct(FILE *f, struct data_node *doc, struct data_node *n
>  			ret = data_node_array();
>  			parse_array(f, ret);
>  		} else {
> +			try_apply_macro(&token);
>  			ret = data_node_string(token);
>  		}
>  
> @@ -302,6 +369,114 @@ static const char *tokens[] = {
>  	"{",
>  };
>  
> +static void macro_get_string(FILE *f, char *buf, char *buf_end)
> +{
> +	int c;
> +
> +	for (;;) {
> +		c = fgetc(f);
> +
> +		switch (c) {
> +		case '"':

Luckily there are no instances of '#define MACRO "...\"...\"..."' in LTP
AFAICT. Also there don't appear to be any '#define MACRO "..." \\n' that
we would care about.

> +		case EOF:
> +			*buf = 0;
> +			return;
> +		default:
> +			if (buf < buf_end)
> +				*(buf++) = c;
> +		}
> +	}
> +}
> +
> +static void macro_get_val(FILE *f, char *buf, size_t buf_len)
> +{
> +	int c, prev = 0;
> +	char *buf_end = buf + buf_len - 1;
> +
> +	c = fgetc(f);
> +	if (c == '"') {

I guess this could be whitespace unless scanf slurps any trailing
whitespace?

Again no actual instances of this AFAICT.

> +		macro_get_string(f, buf, buf_end);
> +		return;
> +	}
> +
> +	for (;;) {
> +		switch (c) {
> +		case '\n':
> +			if (prev == '\\') {
> +				buf--;
> +			} else {
> +				*buf = 0;
> +				return;
> +			}
> +		break;
> +		case EOF:
> +			*buf = 0;
> +			return;
> +		case ' ':
> +		case '\t':
> +		break;
> +		default:
> +			if (buf < buf_end)
> +				*(buf++) = c;
> +		}
> +
> +		prev = c;
> +		c = fgetc(f);
> +	}
> +}
> +
> +static void parse_macro(FILE *f)
> +{
> +	char name[128];
> +	char val[256];
> +
> +	if (!fscanf(f, "%s[^\n]", name))
> +		return;
> +
> +	if (fgetc(f) == '\n')
> +		return;
> +
> +	macro_get_val(f, val, sizeof(val));
> +
> +	ENTRY e = {
> +		.key = strdup(name),
> +		.data = strdup(val),
> +	};
> +
> +	if (verbose)
> +		fprintf(stderr, " MACRO %s=%s\n", e.key, (char*)e.data);
> +
> +	hsearch(e, ENTER);
> +}
> +
> +static void parse_include_macros(FILE *f)
> +{
> +	FILE *inc;
> +	const char *token;
> +	int hash = 0;
> +
> +	inc = open_include(includepath, f);
> +	if (!inc)
> +		return;
> +
> +	while ((token = next_token(inc, NULL))) {
> +		if (token[0] == '#') {
> +			hash = 1;
> +			continue;
> +		}
> +
> +		if (!hash)
> +			continue;
> +
> +		if (!strcmp(token, "define"))
> +			parse_macro(inc);
> +
> +		hash = 0;
> +	}
> +
> +	close_include(inc);
> +}
> +
>  static struct data_node *parse_file(const char *fname)
>  {
>  	int state = 0, found = 0;
> @@ -314,14 +489,28 @@ static struct data_node *parse_file(const char *fname)
>  
>  	FILE *f = fopen(fname, "r");
>  
> +	includepath = dirname(strdup(fname));
> +
>  	struct data_node *res = data_node_hash();
>  	struct data_node *doc = data_node_array();
>  
>  	while ((token = next_token(f, doc))) {
> -		if (state < 6 && !strcmp(tokens[state], token))
> +		if (state < 6 && !strcmp(tokens[state], token)) {
>  			state++;
> -		else
> +		} else {
> +			if (token[0] == '#') {
> +				token = next_token(f, doc);
> +				if (token) {
> +					if (!strcmp(token, "define"))
> +						parse_macro(f);
> +
> +					if (!strcmp(token, "include"))
> +						parse_include_macros(f);
> +				}
> +			}
> +
>  			state = 0;
> +		}
>  
>  		if (state < 6)
>  			continue;
> @@ -386,17 +575,42 @@ const char *strip_name(char *path)
>  	return name;
>  }
>  
> +static void print_help(const char *prgname)
> +{
> +	printf("usage: %s [-vh] input.c\n\n", prgname);
> +	printf("-v sets verbose mode\n");
> +	printf("-h prints this help\n\n");
> +	exit(0);
> +}
> +
>  int main(int argc, char *argv[])
>  {
>  	unsigned int i, j;
>  	struct data_node *res;
> +	int opt;
> +
> +	while ((opt = getopt(argc, argv, "hv")) != -1) {
> +		switch (opt) {
> +		case 'h':
> +			print_help(argv[0]);
> +		break;
> +		case 'v':
> +			verbose = 1;
> +		break;
> +		}
> +	}
> +
> +	if (optind >= argc) {
> +		fprintf(stderr, "No input filename.c\n");
> +		return 1;
> +	}
>  
> -	if (argc != 2) {
> -		fprintf(stderr, "Usage: docparse filename.c\n");
> +	if (!hcreate(128)) {
> +		fprintf(stderr, "Failed to initialize hash table\n");
>  		return 1;
>  	}
>  
> -	res = parse_file(argv[1]);
> +	res = parse_file(argv[optind]);
>  	if (!res)
>  		return 0;
>  
> @@ -425,8 +639,8 @@ int main(int argc, char *argv[])
>  		}
>  	}
>  
> -	data_node_hash_add(res, "fname", data_node_string(argv[1]));
> -	printf("  \"%s\": ", strip_name(argv[1]));
> +	data_node_hash_add(res, "fname", data_node_string(argv[optind]));
> +	printf("  \"%s\": ", strip_name(argv[optind]));
>  	data_to_json(res, stdout, 2);
>  	data_node_free(res);
>  
> -- 
> 2.32.0
Cyril Hrubis Nov. 2, 2021, 11:21 a.m. UTC | #2
Hi!
> > +static void macro_get_string(FILE *f, char *buf, char *buf_end)
> > +{
> > +	int c;
> > +
> > +	for (;;) {
> > +		c = fgetc(f);
> > +
> > +		switch (c) {
> > +		case '"':
> 
> Luckily there are no instances of '#define MACRO "...\"...\"..."' in LTP
> AFAICT. Also there don't appear to be any '#define MACRO "..." \\n' that
> we would care about.

Well I can fix that and add a test to to be sure.

> > +		case EOF:
> > +			*buf = 0;
> > +			return;
> > +		default:
> > +			if (buf < buf_end)
> > +				*(buf++) = c;
> > +		}
> > +	}
> > +}
> > +
> > +static void macro_get_val(FILE *f, char *buf, size_t buf_len)
> > +{
> > +	int c, prev = 0;
> > +	char *buf_end = buf + buf_len - 1;
> > +
> > +	c = fgetc(f);
> > +	if (c == '"') {
> 
> I guess this could be whitespace unless scanf slurps any trailing
> whitespace?

The scanf does not slurp any trainling whitespaces, so this should be
fixed by:

	while (isspace(c = fgetc(f)));

With that we get slightly better output, so I will add that before
applying.
Petr Vorel Nov. 2, 2021, 2:54 p.m. UTC | #3
Hi Cyril, Richie,

> Hi!
> > > +static void macro_get_string(FILE *f, char *buf, char *buf_end)
> > > +{
> > > +	int c;
> > > +
> > > +	for (;;) {
> > > +		c = fgetc(f);
> > > +
> > > +		switch (c) {
> > > +		case '"':

> > Luckily there are no instances of '#define MACRO "...\"...\"..."' in LTP
> > AFAICT. Also there don't appear to be any '#define MACRO "..." \\n' that
> > we would care about.
Good catch!

Unless you send v3 feel free to add
Reviewed-by: Petr Vorel <pvorel@suse.cz>

> Well I can fix that and add a test to to be sure.
Thanks!

...
> > > +static void macro_get_val(FILE *f, char *buf, size_t buf_len)
> > > +{
> > > +	int c, prev = 0;
> > > +	char *buf_end = buf + buf_len - 1;
> > > +
> > > +	c = fgetc(f);
> > > +	if (c == '"') {

> > I guess this could be whitespace unless scanf slurps any trailing
> > whitespace?

> The scanf does not slurp any trainling whitespaces, so this should be
> fixed by:

> 	while (isspace(c = fgetc(f)));

> With that we get slightly better output, so I will add that before
> applying.
+1

Kind regards,
Petr
Cyril Hrubis Nov. 2, 2021, 3:10 p.m. UTC | #4
Hi!
> Unless you send v3 feel free to add

These two fixes are pretty minor changes, so I think that these can be
fixed before applying.
Petr Vorel Nov. 2, 2021, 3:38 p.m. UTC | #5
> Hi!
> > Unless you send v3 feel free to add

> These two fixes are pretty minor changes, so I think that these can be
> fixed before applying.

Sure.

Kind regards,
Petr
Richard Palethorpe Nov. 3, 2021, 9:08 a.m. UTC | #6
Hello,

Cyril Hrubis <chrubis@suse.cz> writes:

> Hi!
>> Unless you send v3 feel free to add
>
> These two fixes are pretty minor changes, so I think that these can be
> fixed before applying.

Oh and to confirm, all patches should be marked with

git interpret-trailers --trailer 'Reviewed-by: Richard Palethorpe <rpalethorpe@suse.com>'
diff mbox series

Patch

diff --git a/docparse/docparse.c b/docparse/docparse.c
index 8cd0d0eef..64f9d08d9 100644
--- a/docparse/docparse.c
+++ b/docparse/docparse.c
@@ -1,9 +1,12 @@ 
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * Copyright (c) 2019 Cyril Hrubis <chrubis@suse.cz>
+ * Copyright (c) 2019-2021 Cyril Hrubis <chrubis@suse.cz>
  * Copyright (c) 2020 Petr Vorel <pvorel@suse.cz>
  */
 
+#define _GNU_SOURCE
+
+#include <search.h>
 #include <stdio.h>
 #include <string.h>
 #include <libgen.h>
@@ -12,6 +15,9 @@ 
 
 #include "data_storage.h"
 
+static int verbose;
+static char *includepath;
+
 #define WARN(str) fprintf(stderr, "WARNING: " str "\n")
 
 static void oneline_comment(FILE *f)
@@ -126,7 +132,7 @@  static void maybe_comment(FILE *f, struct data_node *doc)
 	}
 }
 
-const char *next_token(FILE *f, struct data_node *doc)
+static char *next_token(FILE *f, struct data_node *doc)
 {
 	size_t i = 0;
 	static char buf[4096];
@@ -159,6 +165,7 @@  const char *next_token(FILE *f, struct data_node *doc)
 		case ',':
 		case '[':
 		case ']':
+		case '#':
 			if (i) {
 				ungetc(c, f);
 				goto exit;
@@ -197,6 +204,46 @@  exit:
 	return buf;
 }
 
+static FILE *open_include(const char *includepath, FILE *f)
+{
+	char buf[256];
+	char *path;
+	FILE *inc;
+
+	if (!fscanf(f, "%s\n", buf))
+		return NULL;
+
+	if (buf[0] != '"')
+		return NULL;
+
+	char *filename = buf + 1;
+
+	if (!buf[0])
+		return NULL;
+
+	filename[strlen(filename)-1] = 0;
+
+	if (asprintf(&path, "%s/%s", includepath, filename) < 0)
+		return NULL;
+
+	inc = fopen(path, "r");
+
+	if (inc && verbose)
+		fprintf(stderr, "INCLUDE %s\n", path);
+
+	free(path);
+
+	return inc;
+}
+
+static void close_include(FILE *inc)
+{
+	if (verbose)
+		fprintf(stderr, "INCLUDE END\n");
+
+	fclose(inc);
+}
+
 static int parse_array(FILE *f, struct data_node *node)
 {
 	const char *token;
@@ -234,9 +281,28 @@  static int parse_array(FILE *f, struct data_node *node)
 	return 0;
 }
 
+static void try_apply_macro(char **res)
+{
+	ENTRY macro = {
+		.key = *res,
+	};
+
+	ENTRY *ret;
+
+	ret = hsearch(macro, FIND);
+
+	if (!ret)
+		return;
+
+	if (verbose)
+		fprintf(stderr, "APPLYING MACRO %s=%s\n", ret->key, (char*)ret->data);
+
+	*res = ret->data;
+}
+
 static int parse_test_struct(FILE *f, struct data_node *doc, struct data_node *node)
 {
-	const char *token;
+	char *token;
 	char *id = NULL;
 	int state = 0;
 	struct data_node *ret;
@@ -280,6 +346,7 @@  static int parse_test_struct(FILE *f, struct data_node *doc, struct data_node *n
 			ret = data_node_array();
 			parse_array(f, ret);
 		} else {
+			try_apply_macro(&token);
 			ret = data_node_string(token);
 		}
 
@@ -302,6 +369,114 @@  static const char *tokens[] = {
 	"{",
 };
 
+static void macro_get_string(FILE *f, char *buf, char *buf_end)
+{
+	int c;
+
+	for (;;) {
+		c = fgetc(f);
+
+		switch (c) {
+		case '"':
+		case EOF:
+			*buf = 0;
+			return;
+		default:
+			if (buf < buf_end)
+				*(buf++) = c;
+		}
+	}
+}
+
+static void macro_get_val(FILE *f, char *buf, size_t buf_len)
+{
+	int c, prev = 0;
+	char *buf_end = buf + buf_len - 1;
+
+	c = fgetc(f);
+	if (c == '"') {
+		macro_get_string(f, buf, buf_end);
+		return;
+	}
+
+	for (;;) {
+		switch (c) {
+		case '\n':
+			if (prev == '\\') {
+				buf--;
+			} else {
+				*buf = 0;
+				return;
+			}
+		break;
+		case EOF:
+			*buf = 0;
+			return;
+		case ' ':
+		case '\t':
+		break;
+		default:
+			if (buf < buf_end)
+				*(buf++) = c;
+		}
+
+		prev = c;
+		c = fgetc(f);
+	}
+}
+
+static void parse_macro(FILE *f)
+{
+	char name[128];
+	char val[256];
+
+	if (!fscanf(f, "%s[^\n]", name))
+		return;
+
+	if (fgetc(f) == '\n')
+		return;
+
+	macro_get_val(f, val, sizeof(val));
+
+	ENTRY e = {
+		.key = strdup(name),
+		.data = strdup(val),
+	};
+
+	if (verbose)
+		fprintf(stderr, " MACRO %s=%s\n", e.key, (char*)e.data);
+
+	hsearch(e, ENTER);
+}
+
+static void parse_include_macros(FILE *f)
+{
+	FILE *inc;
+	const char *token;
+	int hash = 0;
+
+	inc = open_include(includepath, f);
+	if (!inc)
+		return;
+
+	while ((token = next_token(inc, NULL))) {
+		if (token[0] == '#') {
+			hash = 1;
+			continue;
+		}
+
+		if (!hash)
+			continue;
+
+		if (!strcmp(token, "define"))
+			parse_macro(inc);
+
+		hash = 0;
+	}
+
+	close_include(inc);
+}
+
 static struct data_node *parse_file(const char *fname)
 {
 	int state = 0, found = 0;
@@ -314,14 +489,28 @@  static struct data_node *parse_file(const char *fname)
 
 	FILE *f = fopen(fname, "r");
 
+	includepath = dirname(strdup(fname));
+
 	struct data_node *res = data_node_hash();
 	struct data_node *doc = data_node_array();
 
 	while ((token = next_token(f, doc))) {
-		if (state < 6 && !strcmp(tokens[state], token))
+		if (state < 6 && !strcmp(tokens[state], token)) {
 			state++;
-		else
+		} else {
+			if (token[0] == '#') {
+				token = next_token(f, doc);
+				if (token) {
+					if (!strcmp(token, "define"))
+						parse_macro(f);
+
+					if (!strcmp(token, "include"))
+						parse_include_macros(f);
+				}
+			}
+
 			state = 0;
+		}
 
 		if (state < 6)
 			continue;
@@ -386,17 +575,42 @@  const char *strip_name(char *path)
 	return name;
 }
 
+static void print_help(const char *prgname)
+{
+	printf("usage: %s [-vh] input.c\n\n", prgname);
+	printf("-v sets verbose mode\n");
+	printf("-h prints this help\n\n");
+	exit(0);
+}
+
 int main(int argc, char *argv[])
 {
 	unsigned int i, j;
 	struct data_node *res;
+	int opt;
+
+	while ((opt = getopt(argc, argv, "hv")) != -1) {
+		switch (opt) {
+		case 'h':
+			print_help(argv[0]);
+		break;
+		case 'v':
+			verbose = 1;
+		break;
+		}
+	}
+
+	if (optind >= argc) {
+		fprintf(stderr, "No input filename.c\n");
+		return 1;
+	}
 
-	if (argc != 2) {
-		fprintf(stderr, "Usage: docparse filename.c\n");
+	if (!hcreate(128)) {
+		fprintf(stderr, "Failed to initialize hash table\n");
 		return 1;
 	}
 
-	res = parse_file(argv[1]);
+	res = parse_file(argv[optind]);
 	if (!res)
 		return 0;
 
@@ -425,8 +639,8 @@  int main(int argc, char *argv[])
 		}
 	}
 
-	data_node_hash_add(res, "fname", data_node_string(argv[1]));
-	printf("  \"%s\": ", strip_name(argv[1]));
+	data_node_hash_add(res, "fname", data_node_string(argv[optind]));
+	printf("  \"%s\": ", strip_name(argv[optind]));
 	data_to_json(res, stdout, 2);
 	data_node_free(res);