diff mbox

lib: fwts_klog: optimize regex scanning

Message ID 1343073341-26154-1-git-send-email-colin.king@canonical.com
State Accepted
Headers show

Commit Message

Colin Ian King July 23, 2012, 7:55 p.m. UTC
From: Colin Ian King <colin.king@canonical.com>

pcre allows one to optimize regex matching and without it i386
systems seem to randomly break (which isn't expected) on some
regex pattern matches.  So, use the pcre study and also add some
safe-guarding in the code to ensure we only compare regexes if
the compiled form is not NULL.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 src/lib/include/fwts_klog.h |    1 +
 src/lib/src/fwts_klog.c     |   26 ++++++++++++++++++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

Comments

Keng-Yu Lin July 25, 2012, 7:51 a.m. UTC | #1
On Tue, Jul 24, 2012 at 3:55 AM, Colin King <colin.king@canonical.com> wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> pcre allows one to optimize regex matching and without it i386
> systems seem to randomly break (which isn't expected) on some
> regex pattern matches.  So, use the pcre study and also add some
> safe-guarding in the code to ensure we only compare regexes if
> the compiled form is not NULL.
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>  src/lib/include/fwts_klog.h |    1 +
>  src/lib/src/fwts_klog.c     |   26 ++++++++++++++++++++++----
>  2 files changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/src/lib/include/fwts_klog.h b/src/lib/include/fwts_klog.h
> index 3bbed5f..a47cf7a 100644
> --- a/src/lib/include/fwts_klog.h
> +++ b/src/lib/include/fwts_klog.h
> @@ -44,6 +44,7 @@ typedef struct {
>          const char *pattern;
>         const char *advice;
>         pcre *re;
> +       pcre_extra *extra;
>  } fwts_klog_pattern;
>
>  typedef void (*fwts_klog_progress_func)(fwts_framework *fw, int percent);
> diff --git a/src/lib/src/fwts_klog.c b/src/lib/src/fwts_klog.c
> index 93dbb06..ea713e8 100644
> --- a/src/lib/src/fwts_klog.c
> +++ b/src/lib/src/fwts_klog.c
> @@ -230,7 +230,8 @@ void fwts_klog_scan_patterns(fwts_framework *fw,
>                 int matched = 0;
>                 switch (pattern->compare_mode) {
>                 case FWTS_COMPARE_REGEX:
> -                       matched = (pcre_exec(pattern->re, NULL, line, strlen(line), 0, 0, vector, 1) == 0);
> +                       if (pattern->re)
> +                               matched = (pcre_exec(pattern->re, pattern->extra, line, strlen(line), 0, 0, vector, 1) == 0);
>                         break;
>                 case FWTS_COMPARE_STRING:
>                 default:
> @@ -354,8 +355,16 @@ static int fwts_klog_check(fwts_framework *fw,
>                 if ((patterns[i].advice = fwts_json_str(fw, table, i, obj, "advice")) == NULL)
>                         goto fail;
>
> -               if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL)
> +               if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
>                         fwts_log_error(fw, "Regex %s failed to compile: %s.", patterns[i].pattern, error);
> +                       patterns[i].re = NULL;
> +               } else {
> +                       patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
> +                       if (error != NULL) {
> +                               fwts_log_error(fw, "Regex %s failed to optimize: %s.", patterns[i].pattern, error);
> +                               patterns[i].re = NULL;
> +                       }
> +               }
>         }
>         /* We've now collected up the scan patterns, lets scan the log for errors */
>         ret = fwts_klog_scan(fw, klog, fwts_klog_scan_patterns, progress, patterns, errors);
> @@ -364,6 +373,8 @@ fail:
>         for (i=0; i<n; i++)
>                 if (patterns[i].re)
>                         pcre_free(patterns[i].re);
> +               if (patterns[i].extra)
> +                       pcre_free(patterns[i].extra);
>         free(patterns);
>  fail_put:
>         json_object_put(klog_objs);
> @@ -398,15 +409,22 @@ static void fwts_klog_regex_find_callback(fwts_framework *fw, char *line, int re
>         const char *error;
>         int erroffset;
>         pcre *re;
> +       pcre_extra *extra;
>         int rc;
>         int vector[1];
>
>         re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
>         if (re != NULL) {
> -               rc = pcre_exec(re, NULL, line, strlen(line), 0, 0, vector, 1);
> +               extra = pcre_study(re, 0, &error);
> +               if (error)
> +                       return;
> +
> +               rc = pcre_exec(re, extra, line, strlen(line), 0, 0, vector, 1);
> +               if (extra)
> +                       free(extra);
> +               pcre_free(re);
>                 if (rc == 0)
>                         (*match)++;
> -               pcre_free(re);
>         }
>  }
>
> --
> 1.7.10.4
>
Acked-by: Keng-Yu Lin <kengyu@canonical.com>
(LP: #1028031)
Alex Hung July 27, 2012, 7:01 a.m. UTC | #2
On 07/24/2012 03:55 AM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> pcre allows one to optimize regex matching and without it i386
> systems seem to randomly break (which isn't expected) on some
> regex pattern matches.  So, use the pcre study and also add some
> safe-guarding in the code to ensure we only compare regexes if
> the compiled form is not NULL.
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>   src/lib/include/fwts_klog.h |    1 +
>   src/lib/src/fwts_klog.c     |   26 ++++++++++++++++++++++----
>   2 files changed, 23 insertions(+), 4 deletions(-)
>
> diff --git a/src/lib/include/fwts_klog.h b/src/lib/include/fwts_klog.h
> index 3bbed5f..a47cf7a 100644
> --- a/src/lib/include/fwts_klog.h
> +++ b/src/lib/include/fwts_klog.h
> @@ -44,6 +44,7 @@ typedef struct {
>           const char *pattern;
>   	const char *advice;
>   	pcre *re;
> +	pcre_extra *extra;
>   } fwts_klog_pattern;
>
>   typedef void (*fwts_klog_progress_func)(fwts_framework *fw, int percent);
> diff --git a/src/lib/src/fwts_klog.c b/src/lib/src/fwts_klog.c
> index 93dbb06..ea713e8 100644
> --- a/src/lib/src/fwts_klog.c
> +++ b/src/lib/src/fwts_klog.c
> @@ -230,7 +230,8 @@ void fwts_klog_scan_patterns(fwts_framework *fw,
>   		int matched = 0;
>   		switch (pattern->compare_mode) {
>   		case FWTS_COMPARE_REGEX:
> -			matched = (pcre_exec(pattern->re, NULL, line, strlen(line), 0, 0, vector, 1) == 0);
> +			if (pattern->re)
> +				matched = (pcre_exec(pattern->re, pattern->extra, line, strlen(line), 0, 0, vector, 1) == 0);
>   			break;
>   		case FWTS_COMPARE_STRING:
>   		default:
> @@ -354,8 +355,16 @@ static int fwts_klog_check(fwts_framework *fw,
>   		if ((patterns[i].advice = fwts_json_str(fw, table, i, obj, "advice")) == NULL)
>   			goto fail;
>
> -		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL)
> +		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
>   			fwts_log_error(fw, "Regex %s failed to compile: %s.", patterns[i].pattern, error);
> +			patterns[i].re = NULL;
> +		} else {
> +			patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
> +			if (error != NULL) {
> +				fwts_log_error(fw, "Regex %s failed to optimize: %s.", patterns[i].pattern, error);
> +				patterns[i].re = NULL;
> +			}
> +		}
>   	}
>   	/* We've now collected up the scan patterns, lets scan the log for errors */
>   	ret = fwts_klog_scan(fw, klog, fwts_klog_scan_patterns, progress, patterns, errors);
> @@ -364,6 +373,8 @@ fail:
>   	for (i=0; i<n; i++)
>   		if (patterns[i].re)
>   			pcre_free(patterns[i].re);
> +		if (patterns[i].extra)
> +			pcre_free(patterns[i].extra);
>   	free(patterns);
>   fail_put:
>   	json_object_put(klog_objs);
> @@ -398,15 +409,22 @@ static void fwts_klog_regex_find_callback(fwts_framework *fw, char *line, int re
>   	const char *error;
>   	int erroffset;
>   	pcre *re;
> +	pcre_extra *extra;
>   	int rc;
>   	int vector[1];
>
>   	re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
>   	if (re != NULL) {
> -		rc = pcre_exec(re, NULL, line, strlen(line), 0, 0, vector, 1);
> +		extra = pcre_study(re, 0, &error);
> +		if (error)
> +			return;
> +
> +		rc = pcre_exec(re, extra, line, strlen(line), 0, 0, vector, 1);
> +		if (extra)
> +			free(extra);
> +		pcre_free(re);
>   		if (rc == 0)
>   			(*match)++;
> -		pcre_free(re);
>   	}
>   }
>
>
Acked-by: Alex Hung <alex.hung@canonical.com>
diff mbox

Patch

diff --git a/src/lib/include/fwts_klog.h b/src/lib/include/fwts_klog.h
index 3bbed5f..a47cf7a 100644
--- a/src/lib/include/fwts_klog.h
+++ b/src/lib/include/fwts_klog.h
@@ -44,6 +44,7 @@  typedef struct {
         const char *pattern;
 	const char *advice;
 	pcre *re;
+	pcre_extra *extra;
 } fwts_klog_pattern;
 
 typedef void (*fwts_klog_progress_func)(fwts_framework *fw, int percent);
diff --git a/src/lib/src/fwts_klog.c b/src/lib/src/fwts_klog.c
index 93dbb06..ea713e8 100644
--- a/src/lib/src/fwts_klog.c
+++ b/src/lib/src/fwts_klog.c
@@ -230,7 +230,8 @@  void fwts_klog_scan_patterns(fwts_framework *fw,
 		int matched = 0;
 		switch (pattern->compare_mode) {
 		case FWTS_COMPARE_REGEX:
-			matched = (pcre_exec(pattern->re, NULL, line, strlen(line), 0, 0, vector, 1) == 0);
+			if (pattern->re)
+				matched = (pcre_exec(pattern->re, pattern->extra, line, strlen(line), 0, 0, vector, 1) == 0);
 			break;
 		case FWTS_COMPARE_STRING:
 		default:
@@ -354,8 +355,16 @@  static int fwts_klog_check(fwts_framework *fw,
 		if ((patterns[i].advice = fwts_json_str(fw, table, i, obj, "advice")) == NULL)
 			goto fail;
 
-		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL)
+		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
 			fwts_log_error(fw, "Regex %s failed to compile: %s.", patterns[i].pattern, error);
+			patterns[i].re = NULL;
+		} else {
+			patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
+			if (error != NULL) {
+				fwts_log_error(fw, "Regex %s failed to optimize: %s.", patterns[i].pattern, error);
+				patterns[i].re = NULL;
+			}
+		}
 	}
 	/* We've now collected up the scan patterns, lets scan the log for errors */
 	ret = fwts_klog_scan(fw, klog, fwts_klog_scan_patterns, progress, patterns, errors);
@@ -364,6 +373,8 @@  fail:
 	for (i=0; i<n; i++)
 		if (patterns[i].re)
 			pcre_free(patterns[i].re);
+		if (patterns[i].extra)
+			pcre_free(patterns[i].extra);
 	free(patterns);
 fail_put:
 	json_object_put(klog_objs);
@@ -398,15 +409,22 @@  static void fwts_klog_regex_find_callback(fwts_framework *fw, char *line, int re
 	const char *error;
 	int erroffset;
 	pcre *re;
+	pcre_extra *extra;
 	int rc;
 	int vector[1];
 
 	re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
 	if (re != NULL) {
-		rc = pcre_exec(re, NULL, line, strlen(line), 0, 0, vector, 1);
+		extra = pcre_study(re, 0, &error);
+		if (error)
+			return;
+
+		rc = pcre_exec(re, extra, line, strlen(line), 0, 0, vector, 1);
+		if (extra)
+			free(extra);
+		pcre_free(re);
 		if (rc == 0)
 			(*match)++;
-		pcre_free(re);
 	}
 }