Patchwork lib: fwts_klog: optimize regex scanning

login
register
mail settings
Submitter Colin King
Date July 23, 2012, 7:55 p.m.
Message ID <1343073341-26154-1-git-send-email-colin.king@canonical.com>
Download mbox | patch
Permalink /patch/172735/
State Accepted
Headers show

Comments

Colin King - July 23, 2012, 7:55 p.m.
From: Colin Ian King <colin.king@canonical.com>

pcre allows one to optimize regex matching and without it i386
systems seem to randomly break (which isn't expected) on some
regex pattern matches.  So, use the pcre study and also add some
safe-guarding in the code to ensure we only compare regexes if
the compiled form is not NULL.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 src/lib/include/fwts_klog.h |    1 +
 src/lib/src/fwts_klog.c     |   26 ++++++++++++++++++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)
Keng-Yu Lin - July 25, 2012, 7:51 a.m.
On Tue, Jul 24, 2012 at 3:55 AM, Colin King <colin.king@canonical.com> wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> pcre allows one to optimize regex matching and without it i386
> systems seem to randomly break (which isn't expected) on some
> regex pattern matches.  So, use the pcre study and also add some
> safe-guarding in the code to ensure we only compare regexes if
> the compiled form is not NULL.
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>  src/lib/include/fwts_klog.h |    1 +
>  src/lib/src/fwts_klog.c     |   26 ++++++++++++++++++++++----
>  2 files changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/src/lib/include/fwts_klog.h b/src/lib/include/fwts_klog.h
> index 3bbed5f..a47cf7a 100644
> --- a/src/lib/include/fwts_klog.h
> +++ b/src/lib/include/fwts_klog.h
> @@ -44,6 +44,7 @@ typedef struct {
>          const char *pattern;
>         const char *advice;
>         pcre *re;
> +       pcre_extra *extra;
>  } fwts_klog_pattern;
>
>  typedef void (*fwts_klog_progress_func)(fwts_framework *fw, int percent);
> diff --git a/src/lib/src/fwts_klog.c b/src/lib/src/fwts_klog.c
> index 93dbb06..ea713e8 100644
> --- a/src/lib/src/fwts_klog.c
> +++ b/src/lib/src/fwts_klog.c
> @@ -230,7 +230,8 @@ void fwts_klog_scan_patterns(fwts_framework *fw,
>                 int matched = 0;
>                 switch (pattern->compare_mode) {
>                 case FWTS_COMPARE_REGEX:
> -                       matched = (pcre_exec(pattern->re, NULL, line, strlen(line), 0, 0, vector, 1) == 0);
> +                       if (pattern->re)
> +                               matched = (pcre_exec(pattern->re, pattern->extra, line, strlen(line), 0, 0, vector, 1) == 0);
>                         break;
>                 case FWTS_COMPARE_STRING:
>                 default:
> @@ -354,8 +355,16 @@ static int fwts_klog_check(fwts_framework *fw,
>                 if ((patterns[i].advice = fwts_json_str(fw, table, i, obj, "advice")) == NULL)
>                         goto fail;
>
> -               if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL)
> +               if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
>                         fwts_log_error(fw, "Regex %s failed to compile: %s.", patterns[i].pattern, error);
> +                       patterns[i].re = NULL;
> +               } else {
> +                       patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
> +                       if (error != NULL) {
> +                               fwts_log_error(fw, "Regex %s failed to optimize: %s.", patterns[i].pattern, error);
> +                               patterns[i].re = NULL;
> +                       }
> +               }
>         }
>         /* We've now collected up the scan patterns, lets scan the log for errors */
>         ret = fwts_klog_scan(fw, klog, fwts_klog_scan_patterns, progress, patterns, errors);
> @@ -364,6 +373,8 @@ fail:
>         for (i=0; i<n; i++)
>                 if (patterns[i].re)
>                         pcre_free(patterns[i].re);
> +               if (patterns[i].extra)
> +                       pcre_free(patterns[i].extra);
>         free(patterns);
>  fail_put:
>         json_object_put(klog_objs);
> @@ -398,15 +409,22 @@ static void fwts_klog_regex_find_callback(fwts_framework *fw, char *line, int re
>         const char *error;
>         int erroffset;
>         pcre *re;
> +       pcre_extra *extra;
>         int rc;
>         int vector[1];
>
>         re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
>         if (re != NULL) {
> -               rc = pcre_exec(re, NULL, line, strlen(line), 0, 0, vector, 1);
> +               extra = pcre_study(re, 0, &error);
> +               if (error)
> +                       return;
> +
> +               rc = pcre_exec(re, extra, line, strlen(line), 0, 0, vector, 1);
> +               if (extra)
> +                       free(extra);
> +               pcre_free(re);
>                 if (rc == 0)
>                         (*match)++;
> -               pcre_free(re);
>         }
>  }
>
> --
> 1.7.10.4
>
Acked-by: Keng-Yu Lin <kengyu@canonical.com>
(LP: #1028031)
Alex Hung - July 27, 2012, 7:01 a.m.
On 07/24/2012 03:55 AM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> pcre allows one to optimize regex matching and without it i386
> systems seem to randomly break (which isn't expected) on some
> regex pattern matches.  So, use the pcre study and also add some
> safe-guarding in the code to ensure we only compare regexes if
> the compiled form is not NULL.
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>   src/lib/include/fwts_klog.h |    1 +
>   src/lib/src/fwts_klog.c     |   26 ++++++++++++++++++++++----
>   2 files changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/src/lib/include/fwts_klog.h b/src/lib/include/fwts_klog.h
> index 3bbed5f..a47cf7a 100644
> --- a/src/lib/include/fwts_klog.h
> +++ b/src/lib/include/fwts_klog.h
> @@ -44,6 +44,7 @@ typedef struct {
>           const char *pattern;
>   	const char *advice;
>   	pcre *re;
> +	pcre_extra *extra;
>   } fwts_klog_pattern;
>
>   typedef void (*fwts_klog_progress_func)(fwts_framework *fw, int percent);
> diff --git a/src/lib/src/fwts_klog.c b/src/lib/src/fwts_klog.c
> index 93dbb06..ea713e8 100644
> --- a/src/lib/src/fwts_klog.c
> +++ b/src/lib/src/fwts_klog.c
> @@ -230,7 +230,8 @@ void fwts_klog_scan_patterns(fwts_framework *fw,
>   		int matched = 0;
>   		switch (pattern->compare_mode) {
>   		case FWTS_COMPARE_REGEX:
> -			matched = (pcre_exec(pattern->re, NULL, line, strlen(line), 0, 0, vector, 1) == 0);
> +			if (pattern->re)
> +				matched = (pcre_exec(pattern->re, pattern->extra, line, strlen(line), 0, 0, vector, 1) == 0);
>   			break;
>   		case FWTS_COMPARE_STRING:
>   		default:
> @@ -354,8 +355,16 @@ static int fwts_klog_check(fwts_framework *fw,
>   		if ((patterns[i].advice = fwts_json_str(fw, table, i, obj, "advice")) == NULL)
>   			goto fail;
>
> -		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL)
> +		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
>   			fwts_log_error(fw, "Regex %s failed to compile: %s.", patterns[i].pattern, error);
> +			patterns[i].re = NULL;
> +		} else {
> +			patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
> +			if (error != NULL) {
> +				fwts_log_error(fw, "Regex %s failed to optimize: %s.", patterns[i].pattern, error);
> +				patterns[i].re = NULL;
> +			}
> +		}
>   	}
>   	/* We've now collected up the scan patterns, lets scan the log for errors */
>   	ret = fwts_klog_scan(fw, klog, fwts_klog_scan_patterns, progress, patterns, errors);
> @@ -364,6 +373,8 @@ fail:
>   	for (i=0; i<n; i++)
>   		if (patterns[i].re)
>   			pcre_free(patterns[i].re);
> +		if (patterns[i].extra)
> +			pcre_free(patterns[i].extra);
>   	free(patterns);
>   fail_put:
>   	json_object_put(klog_objs);
> @@ -398,15 +409,22 @@ static void fwts_klog_regex_find_callback(fwts_framework *fw, char *line, int re
>   	const char *error;
>   	int erroffset;
>   	pcre *re;
> +	pcre_extra *extra;
>   	int rc;
>   	int vector[1];
>
>   	re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
>   	if (re != NULL) {
> -		rc = pcre_exec(re, NULL, line, strlen(line), 0, 0, vector, 1);
> +		extra = pcre_study(re, 0, &error);
> +		if (error)
> +			return;
> +
> +		rc = pcre_exec(re, extra, line, strlen(line), 0, 0, vector, 1);
> +		if (extra)
> +			free(extra);
> +		pcre_free(re);
>   		if (rc == 0)
>   			(*match)++;
> -		pcre_free(re);
>   	}
>   }
>
>
Acked-by: Alex Hung <alex.hung@canonical.com>

Patch

diff --git a/src/lib/include/fwts_klog.h b/src/lib/include/fwts_klog.h
index 3bbed5f..a47cf7a 100644
--- a/src/lib/include/fwts_klog.h
+++ b/src/lib/include/fwts_klog.h
@@ -44,6 +44,7 @@  typedef struct {
         const char *pattern;
 	const char *advice;
 	pcre *re;
+	pcre_extra *extra;
 } fwts_klog_pattern;
 
 typedef void (*fwts_klog_progress_func)(fwts_framework *fw, int percent);
diff --git a/src/lib/src/fwts_klog.c b/src/lib/src/fwts_klog.c
index 93dbb06..ea713e8 100644
--- a/src/lib/src/fwts_klog.c
+++ b/src/lib/src/fwts_klog.c
@@ -230,7 +230,8 @@  void fwts_klog_scan_patterns(fwts_framework *fw,
 		int matched = 0;
 		switch (pattern->compare_mode) {
 		case FWTS_COMPARE_REGEX:
-			matched = (pcre_exec(pattern->re, NULL, line, strlen(line), 0, 0, vector, 1) == 0);
+			if (pattern->re)
+				matched = (pcre_exec(pattern->re, pattern->extra, line, strlen(line), 0, 0, vector, 1) == 0);
 			break;
 		case FWTS_COMPARE_STRING:
 		default:
@@ -354,8 +355,16 @@  static int fwts_klog_check(fwts_framework *fw,
 		if ((patterns[i].advice = fwts_json_str(fw, table, i, obj, "advice")) == NULL)
 			goto fail;
 
-		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL)
+		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
 			fwts_log_error(fw, "Regex %s failed to compile: %s.", patterns[i].pattern, error);
+			patterns[i].re = NULL;
+		} else {
+			patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
+			if (error != NULL) {
+				fwts_log_error(fw, "Regex %s failed to optimize: %s.", patterns[i].pattern, error);
+				patterns[i].re = NULL;
+			}
+		}
 	}
 	/* We've now collected up the scan patterns, lets scan the log for errors */
 	ret = fwts_klog_scan(fw, klog, fwts_klog_scan_patterns, progress, patterns, errors);
@@ -364,6 +373,8 @@  fail:
 	for (i=0; i<n; i++)
 		if (patterns[i].re)
 			pcre_free(patterns[i].re);
+		if (patterns[i].extra)
+			pcre_free(patterns[i].extra);
 	free(patterns);
 fail_put:
 	json_object_put(klog_objs);
@@ -398,15 +409,22 @@  static void fwts_klog_regex_find_callback(fwts_framework *fw, char *line, int re
 	const char *error;
 	int erroffset;
 	pcre *re;
+	pcre_extra *extra;
 	int rc;
 	int vector[1];
 
 	re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
 	if (re != NULL) {
-		rc = pcre_exec(re, NULL, line, strlen(line), 0, 0, vector, 1);
+		extra = pcre_study(re, 0, &error);
+		if (error)
+			return;
+
+		rc = pcre_exec(re, extra, line, strlen(line), 0, 0, vector, 1);
+		if (extra)
+			free(extra);
+		pcre_free(re);
 		if (rc == 0)
 			(*match)++;
-		pcre_free(re);
 	}
 }