Patchwork [U-Boot,2/2] env: add regex support for environment variables

login
register
mail settings
Submitter Wolfgang Denk
Date Nov. 6, 2011, 7:55 p.m.
Message ID <1320609326-3259-2-git-send-email-wd@denx.de>
Download mbox | patch
Permalink /patch/123964/
State Deferred
Delegated to: Tom Rini
Headers show

Comments

Wolfgang Denk - Nov. 6, 2011, 7:55 p.m.
Syntax:  env regex [-g] [-s subst] regex name [...]

The code is based on SLRE (http://slre.sourceforge.net/)
which provides a tiny subset of Perl regular expressions.

Without options, this will implement regex pattern matching on
environment variables.  Variables with matching values will be printd
as with "env print", so this basicly performs a "grep" on the given
list of variables.

With "-s subst", the matching pattern gets replaced with the string
given in "subst".  Back references '\0' ... '\9' are allowed, where
'\0' stands for the whole matched string, and '\1', '\2', ... are
replaced with the first, second, ... sub-pattern.

"-g" allows for global replacement.

Examples:
	=> setenv foo abcdefghijklmnop
	=> env reg 'A' '[bdgmo]' foo
	=> env reg -s 'A' '[bdgmo]' foo
	foo=aAcdefghijklmnop
	=> env reg -g -s 'B' '[bdgmo]' foo
	foo=aAcBefBhijklBnBp
	=> env reg -g -s '\\2--\\1' '(Be).*(kl)' foo
	foo=aAckl--BeBnBp

[Note: the double backslashes are needed by U-Boot's "shell" so one
backslash gets passed to the actual command.]

Signed-off-by: Wolfgang Denk <wd@denx.de>
---
 common/cmd_nvedit.c |  288 +++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/Makefile        |    1 +
 2 files changed, 289 insertions(+), 0 deletions(-)
Detlev Zundel - Nov. 7, 2011, 11:07 a.m.
Hi Wolfgang,

this really is an interesting addition!

> Syntax:  env regex [-g] [-s subst] regex name [...]
>
> The code is based on SLRE (http://slre.sourceforge.net/)
> which provides a tiny subset of Perl regular expressions.
>
> Without options, this will implement regex pattern matching on
> environment variables.  Variables with matching values will be printd
> as with "env print", so this basicly performs a "grep" on the given
> list of variables.

Ok, this usage looks fine.

> With "-s subst", the matching pattern gets replaced with the string
> given in "subst".  Back references '\0' ... '\9' are allowed, where
> '\0' stands for the whole matched string, and '\1', '\2', ... are
> replaced with the first, second, ... sub-pattern.
>
> "-g" allows for global replacement.

But IMHO this usage doesn't really belong into the "env" command.  It
much rather is a further operation of the setexpr command:

  set environment variable as the result of eval expression",
  name value1 <op> value2\n"
      - set environment variable 'name' to the result of the evaluated\n"
        express specified by <op>.  <op> can be &, |, ^, +, -, *, /, %"


We could add the <op>erations for regsubst and regsubstg.  For actual
names for the operations, I'm somewhat unsure.  Maybe "function like",
i.e. "regsubst(string, pattern, replacement)" and "regsubstg(string,
pattern, replacement)"?

> Examples:
> 	=> setenv foo abcdefghijklmnop
> 	=> env reg 'A' '[bdgmo]' foo
> 	=> env reg -s 'A' '[bdgmo]' foo
> 	foo=aAcdefghijklmnop
> 	=> env reg -g -s 'B' '[bdgmo]' foo
> 	foo=aAcBefBhijklBnBp
> 	=> env reg -g -s '\\2--\\1' '(Be).*(kl)' foo
> 	foo=aAckl--BeBnBp

So I'd vote for

=> setenv result regsubst($foo, '[bdgmo]', 'A')

What do you think?

Cheers
  Detlev

Patch

diff --git a/common/cmd_nvedit.c b/common/cmd_nvedit.c
index 2dd4eba..4307d27 100644
--- a/common/cmd_nvedit.c
+++ b/common/cmd_nvedit.c
@@ -164,6 +164,288 @@  int do_env_print (cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
 	return rcode;
 }
 
+#ifdef CONFIG_CMD_REGEX
+
+/*
+ * memstr - Find the first substring in memory
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ *
+ * Similar to and based on strstr(),
+ * but strings do not need to be NUL terminated.
+ */
+static char *memstr(const char *s1, int l1, const char *s2, int l2)
+{
+	if (!l2)
+		return (char *) s1;
+
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *) s1;
+		s1++;
+	}
+	return NULL;
+}
+
+static char *substitute(char *string,	/* string buffer */
+			int *slen,	/* current string length */
+			int ssize,	/* string bufer size */
+			const char *old,/* old (replaced) string */
+			int olen,	/* length of old string */
+			const char *new,/* new (replacement) string */
+			int nlen)	/* length of new string */
+{
+	char *p = memstr(string, *slen, old, olen);
+
+	if (p == NULL)
+		return NULL;
+
+	debug("## Match at pos %ld: match len %d, subst len %d\n",
+		(long)(p - string), olen, nlen);
+
+	/* make sure replacement matches */
+	if (*slen + nlen - olen > ssize) {
+		printf("## error: substitution buffer overflow\n");
+		return NULL;
+	}
+
+	/* move tail if needed */
+	if (olen != nlen) {
+		int tail, len;
+
+		len = (olen > nlen) ? olen : nlen;
+
+		tail = ssize - (p + len - string);
+
+		debug("## tail len %d\n", tail);
+
+		memmove(p + nlen, p + olen, tail);
+	}
+
+	/* insert substitue */
+	memcpy(p, new, nlen);
+
+	*slen += nlen - olen;
+
+	return p + nlen;
+}
+
+#include "slre.h"
+
+#define	SLRE_BUFSZ	16384
+#define	SLRE_PATSZ	4096
+
+/*
+ * Perform regex operations on a environment variable
+ *
+ * Returns 0 in case of error, or length of printed string
+ */
+static int env_regex(const char *name, const char *regexp, const char *subst,
+			int global)
+{
+	ENTRY e, *ep;
+	struct slre slre;
+	struct cap caps[32];
+	char data[SLRE_BUFSZ];
+	char *datap = data;
+	int res, len, nlen, loop;
+
+	if (name == NULL)
+		return 0;
+
+	if (slre_compile(&slre, regexp) == 0) {
+		printf("Error compiling regex: %s\n", slre.err_str);
+		return 1;
+	}
+
+	e.key = name;
+	e.data = NULL;
+	hsearch_r(e, FIND, &ep, &env_htab);
+	if (ep == NULL) {
+		printf("## Error: \"%s\" not defined\n", name);
+		return 1;
+	}
+
+	debug("REGEX on %s=%s\n", ep->key, ep->data);
+	debug("REGEX=\"%s\", SUBST=\"%s\", GLOBAL=%d\n",
+		regexp, subst ? subst : "<NULL>", global);
+
+	len = strlen(ep->data);
+	if (len + 1 > SLRE_BUFSZ) {
+		printf("## error: substitution buffer overflow\n");
+		return 1;
+	}
+
+	strcpy(data, ep->data);
+
+	if (subst == NULL)
+		nlen = 0;
+	else
+		nlen = strlen(subst);
+
+	for (loop = 0; ;loop++) {
+		char nbuf[SLRE_PATSZ];
+		const char *old;;
+		char *np;
+		int i, olen;
+
+		(void) memset(caps, 0, sizeof(caps));
+
+		res = slre_match(&slre, datap, len, caps);
+
+		debug("Result: %d\n", res);
+
+		for (i = 0; i < 20; i++) {
+			if (caps[i].len > 0) {
+				debug("Substring %d: [%.*s]\n", i,
+					caps[i].len, caps[i].ptr);
+			}
+		}
+
+		if (res == 0) {
+			if (loop == 0) {
+				printf("%s: No match\n", ep->key);
+				return 1;
+			} else {
+				break;
+			}
+		}
+
+		debug("## MATCH ## %s\n", data);
+
+		if (subst == NULL) {
+			printf("%s=%s\n", ep->key, ep->data);
+			return 0;
+		}
+
+		old = caps[0].ptr;
+		olen = caps[0].len;
+
+		if (nlen + 1 >= SLRE_PATSZ) {
+			printf("## error: pattern buffer overflow\n");
+			return 1;
+		}
+		strcpy(nbuf, subst);
+
+		debug("## SUBST(1) ## %s\n", nbuf);
+
+		/*
+		 * Handle back references
+		 *
+		 * Support for \0 ... \9, where \0 is the
+		 * whole matched pattern (similar to &).
+		 *
+		 * Implementation is a bit simpleminded as
+		 * backrefs are substituted sequentially, one
+		 * by one.  This will lead to somewhat
+		 * unexpected results if the replacement
+		 * strings contain any \N strings then then
+		 * may get substitued, too.  We accept this
+		 * restriction for the sake of simplicity.
+		 */
+		for (i = 0; i < 10; ++i) {
+			char backref[2] = {
+				'\\',
+				'0',
+			};
+
+			if (caps[i].len == 0)
+				break;
+
+			backref[1] += i;
+
+			debug("## BACKREF %d: replace \"%.*s\" by \"%.*s\" in \"%s\"\n",
+				i,
+				2, backref,
+				caps[i].len, caps[i].ptr,
+				nbuf);
+
+			for (np = nbuf;;) {
+				char *p = memstr(np, nlen,
+						backref, 2);
+
+				if (p == NULL)
+					break;
+
+				np = substitute(np, &nlen,
+					SLRE_PATSZ,
+					backref, 2,
+					caps[i].ptr, caps[i].len);
+
+				if (np == NULL)
+					return 1;
+			}
+		}
+		debug("## SUBST(2) ## %s\n", nbuf);
+
+		datap = substitute(datap, &len, SLRE_BUFSZ,
+				old, olen,
+				nbuf, nlen);
+
+		if (datap == NULL)
+			return 1;
+
+		debug("## REMAINDER: %s\n", datap);
+
+		debug("## RESULT: %s\n", data);
+
+		if (!global)
+			break;
+	}
+	debug("## FINAL (now setenv()) :  %s\n", data);
+	
+	printf("%s=%s\n", ep->key, data);
+
+	return setenv(ep->key, data);
+}
+
+static int do_env_regex(cmd_tbl_t *cmdtp, int flag, int argc,
+			char *const argv[])
+{
+	int i;
+	int rcode = 0;
+	int global = 0;
+	const char *regex, *subst = NULL;
+
+	while (--argc > 0 && **++argv == '-') {
+		char *arg = *argv;
+		while (*++arg) {
+			switch (*arg) {
+			case 'e':
+				/* allow regex starting with '-' */
+				++argv;
+				goto DONE;
+			case 'g':
+				global = 1;
+				break;
+			case 's':
+				if (--argc <= 0)
+					return cmd_usage(cmdtp);
+				subst = *++argv;
+				goto NXTARG;
+			default:
+				return cmd_usage(cmdtp);
+			}
+		}
+NXTARG:		;
+	}
+DONE:
+	if (argc < 2)
+		return cmd_usage(cmdtp);
+
+	regex = argv[0];
+
+	/* process selected env vars */
+	for (i = 1; i < argc; ++i) {
+		if (env_regex(argv[i], regex, subst, global) != 0)
+			rcode = 1;
+	}
+
+	return rcode;
+}
+#endif
+
 #ifdef CONFIG_CMD_GREPENV
 static int do_env_grep (cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])
 {
@@ -1022,6 +1304,9 @@  static cmd_tbl_t cmd_env_sub[] = {
 	U_BOOT_CMD_MKENT(import, 5, 0, do_env_import, "", ""),
 #endif
 	U_BOOT_CMD_MKENT(print, CONFIG_SYS_MAXARGS, 1, do_env_print, "", ""),
+#if defined(CONFIG_CMD_REGEX)
+	U_BOOT_CMD_MKENT(regex, CONFIG_SYS_MAXARGS, 1, do_env_regex, "", ""),
+#endif
 #if defined(CONFIG_CMD_RUN)
 	U_BOOT_CMD_MKENT(run, CONFIG_SYS_MAXARGS, 1, do_run, "", ""),
 #endif
@@ -1076,6 +1361,9 @@  U_BOOT_CMD(
 #endif
 	"env import [-d] [-t | -b | -c] addr [size] - import environment\n"
 	"env print [name ...] - print environment\n"
+#ifdef CONFIG_CMD_REGEX
+	"env regex [-g] [-s subst] regex name [...] - search and substitute regular expression\n"
+#endif
 #if defined(CONFIG_CMD_RUN)
 	"env run var [...] - run commands in an environment variable\n"
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index 54708c2..346409c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -48,6 +48,7 @@  COBJS-y += net_utils.o
 COBJS-y += qsort.o
 COBJS-$(CONFIG_SHA1) += sha1.o
 COBJS-$(CONFIG_SHA256) += sha256.o
+COBJS-$(CONFIG_CMD_REGEX) += slre.o
 COBJS-y	+= strmhz.o
 COBJS-$(CONFIG_RBTREE)	+= rbtree.o
 endif