diff mbox

[1/6] utilities: kernelscan: Tool to scan kernel for error messages

Message ID 1351875517-19128-2-git-send-email-colin.king@canonical.com
State Accepted
Headers show

Commit Message

Colin Ian King Nov. 2, 2012, 4:58 p.m. UTC
From: Colin Ian King <colin.king@canonical.com>

This is the inital first cut of the kernelscan utility that
helps us to track new kernel messages so we can add them to
the fwts klog json database.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 Makefile.am                |    2 +-
 configure.ac               |    1 +
 src/utilities/Makefile.am  |    6 +
 src/utilities/kernelscan.c |  976 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 984 insertions(+), 1 deletion(-)
 create mode 100644 src/utilities/Makefile.am
 create mode 100644 src/utilities/kernelscan.c

Comments

Keng-Yu Lin Nov. 7, 2012, 2:10 a.m. UTC | #1
On Sat, Nov 3, 2012 at 12:58 AM, Colin King <colin.king@canonical.com> wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> This is the inital first cut of the kernelscan utility that
> helps us to track new kernel messages so we can add them to
> the fwts klog json database.
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>  Makefile.am                |    2 +-
>  configure.ac               |    1 +
>  src/utilities/Makefile.am  |    6 +
>  src/utilities/kernelscan.c |  976 ++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 984 insertions(+), 1 deletion(-)
>  create mode 100644 src/utilities/Makefile.am
>  create mode 100644 src/utilities/kernelscan.c
>
> diff --git a/Makefile.am b/Makefile.am
> index 09f5bec..057c47c 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -1,3 +1,3 @@
> -SUBDIRS = src data
> +SUBDIRS = src data src/utilities
>
>  ACLOCAL_AMFLAGS = -I m4
> diff --git a/configure.ac b/configure.ac
> index 48481aa..77d44bf 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -81,6 +81,7 @@
>            src/acpica/Makefile
>             src/lib/Makefile
>             src/lib/src/Makefile
> +          src/utilities/Makefile
>             data/Makefile
>            ])
>            AC_OUTPUT
> diff --git a/src/utilities/Makefile.am b/src/utilities/Makefile.am
> new file mode 100644
> index 0000000..427c44f
> --- /dev/null
> +++ b/src/utilities/Makefile.am
> @@ -0,0 +1,6 @@
> +AM_CPPFLAGS = -Wall -Werror -Wextra
> +
> +bin_PROGRAMS = kernelscan
> +kernelscan_SOURCES = kernelscan.c
> +kernelscan_LDFLAGS = -ljson -lpcre
> +
> diff --git a/src/utilities/kernelscan.c b/src/utilities/kernelscan.c
> new file mode 100644
> index 0000000..7569079
> --- /dev/null
> +++ b/src/utilities/kernelscan.c
> @@ -0,0 +1,976 @@
> +/*
> + * Copyright (C) 2012 Canonical
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
> + *
> + */
> +
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <unistd.h>
> +
> +#include <pcre.h>
> +#include <json/json.h>
> +
> +#define PARSER_OK              0
> +#define PARSER_COMMENT_FOUND   1
> +
> +#define __JSON_ERR_PTR__ ((json_object*) -1)
> +/*
> + *  Older versions of json-c may return an error in an
> + *  object as a ((json_object*)-1), where as newer
> + *  versions return NULL, so check for these. Sigh.
> + */
> +#define JSON_ERROR(ptr) \
> +       ( (ptr == NULL) || ((json_object*)ptr == __JSON_ERR_PTR__) )
> +
> +typedef enum {
> +        COMPARE_REGEX = 'r',
> +        COMPARE_STRING = 's',
> +        COMPARE_UNKNOWN = 'u',
> +} compare_mode;
> +
> +typedef struct {
> +        char *pattern;         /* pattern that we compare to kernel messages */
> +       compare_mode cm;        /* 'r' regex or 's' string comparison */
> +        pcre *re;              /* regex from pattern */
> +        pcre_extra *extra;
> +} klog_pattern;
> +
> +/*
> + *  Subset of tokens that we need to intelligently parse the kernel C source
> + */
> +typedef enum {
> +       TOKEN_UNKNOWN,          /* No idea what token it is */
> +       TOKEN_NUMBER,           /* Integer */
> +       TOKEN_LITERAL_STRING,   /* "string" */
> +       TOKEN_LITERAL_CHAR,     /* 'x' */
> +       TOKEN_IDENTIFIER,       /* identifier */
> +       TOKEN_PAREN_OPENED,     /* ( */
> +       TOKEN_PAREN_CLOSED,     /* ) */
> +       TOKEN_CPP,              /* # C pre-propressor */
> +       TOKEN_WHITE_SPACE,      /* ' ', '\t', '\r', '\n' white space */
> +       TOKEN_LESS_THAN,        /* < */
> +       TOKEN_GREATER_THAN,     /* > */
> +       TOKEN_COMMA,            /* , */
> +       TOKEN_ARROW,            /* -> */
> +       TOKEN_TERMINAL,         /* ; */
> +} token_type;
> +
> +/*
> + *  A token
> + */
> +typedef struct {
> +       char *token;            /* The gathered string for this token */
> +       size_t len;             /* Length of the token buffer */
> +       char *ptr;              /* Current end of the token during the lexical analysis */
> +       token_type type;        /* The type of token we think it is */
> +} token;
> +
> +/*
> + *  Quick and dirty way to push input stream back, like ungetc()
> + */
> +typedef struct get_stack {
> +       int ch;                 /* Char pushed back */
> +       struct get_stack *next; /* Next one in list */
> +} get_stack;
> +
> +/*
> + *  Parser context
> + */
> +typedef struct {
> +       FILE *fp;               /* The file descriptor we are reading */
> +       bool skip_white_space;  /* Magic skip white space flag */
> +       get_stack *get_chars;   /* Ungot chars get pushed onto this */
> +} parser;
> +
> +/*
> + *  FWTS klog patterns, loaded from a json file
> + */
> +static klog_pattern *patterns;
> +
> +static int get_token(parser *p, token *t);
> +
> +/*
> + *  Initialise the parser
> + */
> +static void parser_new(parser *p, FILE *fp, bool skip_white_space)
> +{
> +       p->get_chars = NULL;
> +       p->fp = fp;
> +       p->skip_white_space = skip_white_space;
> +}
> +
> +/*
> + *  Get next character from input stream
> + */
> +static int get_next(parser *p)
> +{
> +       int ch;
> +
> +       /*
> +        * If we have chars pushed using unget_next
> +        * then pop them off the list first
> +        */
> +       if (p->get_chars) {
> +               get_stack *tmp = p->get_chars;
> +               ch = tmp->ch;
> +
> +               p->get_chars = tmp->next;
> +               free(tmp);
> +
> +               return ch;
> +       }
> +       return fgetc(p->fp);
> +}
> +
> +/*
> + *  Push character back onto the input
> + *  stream (in this case, it is a simple FIFO stack
> + */
> +static void unget_next(parser *p, int ch)
> +{
> +       get_stack *new;
> +
> +       if ((new = calloc(sizeof(get_stack), 1)) == NULL) {
> +               fprintf(stderr, "unget_next: Out of memory!\n");
> +               exit(EXIT_FAILURE);
> +       }
> +
> +       new->ch = ch;
> +       new->next = p->get_chars;
> +       p->get_chars = new;
> +}
> +
> +/*
> + *  Create a new token, give it plenty of slop so
> + *  we don't need to keep on reallocating the token
> + *  buffer as we append more characters to it during
> + *  the lexing phase.
> + */
> +static void token_new(token *t)
> +{
> +       if ((t->token = calloc(1024, 1)) == NULL) {
> +               fprintf(stderr, "token_new: Out of memory!\n");
> +               exit(EXIT_FAILURE);
> +       }
> +       t->len = 1024;
> +       t->ptr = t->token;
> +       t->type = TOKEN_UNKNOWN;
> +}
> +
> +/*
> + *  Clear the token ready for re-use
> + */
> +static void token_clear(token *t)
> +{
> +       t->ptr = t->token;
> +       t->type = TOKEN_UNKNOWN;
> +       *(t->ptr) = '\0';
> +}
> +
> +/*
> + *  Free the token
> + */
> +static void token_free(token *t)
> +{
> +       free(t->token);
> +       t->token = NULL;
> +}
> +
> +/*
> + *  Append a single character to the token,
> + *  we may run out of space, so this occasionally
> + *  adds an extra 1K of token space for long tokens
> + */
> +static void token_append(token *t, int ch)
> +{
> +       if (t->ptr < t->token + t->len - 1) {
> +               /* Enough space, just add char */
> +               *(t->ptr) = ch;
> +               t->ptr++;
> +               *(t->ptr) = 0;
> +       } else {
> +               /* No more space, add 1K more space */
> +               t->len += 1024;
> +               if ((t->token = realloc(t->token, t->len)) == NULL) {
> +                       fprintf(stderr, "token_append: Out of memory!\n");
> +                       exit(EXIT_FAILURE);
> +               }
> +               *(t->ptr) = ch;
> +               t->ptr++;
> +               *(t->ptr) = 0;
> +       }
> +}
> +
> +/*
> + *  Figure out if a klog pattern is a regex or a plain text string
> + */
> +static compare_mode klog_compare_mode_str_to_val(const char *str)
> +{
> +       if (strcmp(str, "regex") == 0)
> +               return COMPARE_REGEX;
> +       else if (strcmp(str, "string") == 0)
> +               return COMPARE_STRING;
> +       else
> +               return COMPARE_UNKNOWN;
> +}
> +
> +/*
> + *  Load FWTS klog messages from the json table
> + */
> +static klog_pattern *klog_load(const char *table)
> +{
> +       int n;
> +       int i;
> +       json_object *klog_objs;
> +       json_object *klog_table;
> +       klog_pattern *patterns;
> +
> +       klog_objs = json_object_from_file("/usr/share/fwts/klog.json");
> +       if (JSON_ERROR(klog_objs)) {
> +               fprintf(stderr, "Cannot load klog data\n");
> +               exit(EXIT_FAILURE);
> +       }
> +
> +       klog_table = json_object_object_get(klog_objs, table);
> +       if (JSON_ERROR(klog_table)) {
> +               fprintf(stderr, "Cannot fetch klog table object from %s.\n", table);
> +               exit(EXIT_FAILURE);
> +       }
> +
> +       n = json_object_array_length(klog_table);
> +
> +       /* Last entry is null to indicate end, so alloc n+1 items */
> +       if ((patterns = calloc(n+1, sizeof(klog_pattern))) == NULL) {
> +               fprintf(stderr, "Cannot allocate pattern table.\n");
> +               exit(EXIT_FAILURE);
> +       }
> +
> +       /* Now fetch json objects and compile regex */
> +       for (i = 0; i < n; i++) {
> +               const char *error;
> +               char *str;
> +               int erroffset;
> +               json_object *obj;
> +
> +               obj = json_object_array_get_idx(klog_table, i);
> +               if (JSON_ERROR(obj)) {
> +                       fprintf(stderr, "Cannot fetch %d item from table %s.\n", i, table);
> +                       exit(EXIT_FAILURE);
> +               }
> +
> +               str = (char*)json_object_get_string(json_object_object_get(obj, "compare_mode"));
> +               if (JSON_ERROR(str)) {
> +                       fprintf(stderr, "Cannot fetch compare_mode  object, item %d from table %s.\n", i, table);
> +                       exit(EXIT_FAILURE);
> +               }
> +               patterns[i].cm = klog_compare_mode_str_to_val(str);
> +
> +               str = (char*)json_object_get_string(json_object_object_get(obj, "pattern"));
> +               if (JSON_ERROR(str)) {
> +                       fprintf(stderr, "Cannot fetch pattern object, item %d from table %s.\n", i, table);
> +                       exit(EXIT_FAILURE);
> +               }
> +               patterns[i].pattern = strdup(str);
> +               if (patterns[i].pattern == NULL) {
> +                       fprintf(stderr, "Failed to strdup regex pattern %d from table %s.\n", i, table);
> +                       exit(EXIT_FAILURE);
> +               }
> +
> +               if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
> +                       fprintf(stderr, "Regex %s failed to compile: %s.\n", patterns[i].pattern, error);
> +                       patterns[i].re = NULL;
> +               } else {
> +                       patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
> +                       if (error != NULL) {
> +                               fprintf(stderr, "Regex %s failed to optimize: %s.\n", patterns[i].pattern, error);
> +                               patterns[i].re = NULL;
> +                       }
> +               }
> +       }
> +
> +       /* Discard the json table now we've parsed it into patterns */
> +       json_object_put(klog_objs);
> +
> +       return patterns;
> +}
> +
> +/*
> + *  Does str match any of the patterns in the klog pattern table
> + */
> +static bool klog_find(char *str, klog_pattern *patterns)
> +{
> +       int i;
> +
> +       for (i = 0; patterns[i].pattern; i++) {
> +               if (patterns[i].cm == COMPARE_STRING) {
> +                       if (strstr(str, patterns[i].pattern)) {
> +                               return true;
> +                       }
> +               }
> +               if (patterns[i].cm == COMPARE_REGEX) {
> +                       int vector[1];
> +                       if (pcre_exec(patterns[i].re, patterns[i].extra, str, strlen(str), 0, 0, vector, 1) == 0) {
> +                               return true;
> +                       }
> +               }
> +       }
> +
> +       return false;
> +}
> +
> +/*
> + *  Free the klog patterns
> + */
> +static void klog_free(klog_pattern *patterns)
> +{
> +       int i;
> +
> +       for (i = 0; patterns[i].pattern; i++) {
> +               pcre_free(patterns[i].re);
> +               pcre_free(patterns[i].extra);
> +               free(patterns[i].pattern);
> +       }
> +       free(patterns);
> +}
> +
> +/*
> + *  Parse C comments and just throw them away
> + */
> +static int skip_comments(parser *p)
> +{
> +       int ch;
> +       int nextch;
> +
> +       nextch = get_next(p);
> +       if (nextch == EOF)
> +               return EOF;
> +
> +       if (nextch == '/') {
> +               do {
> +                       ch = get_next(p);
> +                       if (ch == EOF)
> +                               return EOF;
> +               }
> +               while (ch != '\n');
> +
> +               return PARSER_COMMENT_FOUND;
> +       }
> +
> +       if (nextch == '*') {
> +               for (;;) {
> +                       ch = get_next(p);
> +                       if (ch == EOF)
> +                               return EOF;
> +
> +                       if (ch == '*') {
> +                               ch = get_next(p);
> +                               if (ch == EOF)
> +                                       return EOF;
> +
> +                               if (ch == '/')
> +                                       return PARSER_COMMENT_FOUND;
> +                       }
> +               }
> +       }
> +
> +       /* Not a comment, push back */
> +       unget_next(p, nextch);
> +
> +       return PARSER_OK;
> +}
> +
> +/*
> + *  Parse an integer.  This is fairly minimal as the
> + *  kernel doesn't have floats or doubles, so we
> + *  can just parse decimal, octal or hex values.
> + */
> +static int parse_number(parser *p, token *t, int ch)
> +{
> +       int nextch1, nextch2;
> +       bool ishex = false;
> +       bool isoct = false;
> +
> +       /*
> +        *  Crude way to detect the kind of integer
> +        */
> +       if (ch == '0') {
> +               token_append(t, ch);
> +
> +               nextch1 = get_next(p);
> +               if (nextch1 == EOF) {
> +                       token_append(t, ch);
> +                       return PARSER_OK;
> +               }
> +
> +               if (nextch1 >= '0' && nextch1 <= '8') {
> +                       /* Must be an octal value */
> +                       ch = nextch1;
> +                       isoct = true;
> +               } else if (nextch1 == 'x' || nextch1 == 'X') {
> +                       /* Is it hexadecimal? */
> +                       nextch2 = get_next(p);
> +                       if (nextch2 == EOF) {
> +                               unget_next(p, nextch1);
> +                               return PARSER_OK;
> +                       }
> +
> +                       if (isxdigit(nextch2)) {
> +                               /* Hexadecimal */
> +                               token_append(t, nextch1);
> +                               ch = nextch2;
> +                               ishex = true;
> +                       } else {
> +                               /* Nope */
> +                               unget_next(p, nextch2);
> +                               unget_next(p, nextch1);
> +                               return PARSER_OK;
> +                       }
> +               } else {
> +                       unget_next(p, nextch1);
> +                       return PARSER_OK;
> +               }
> +       }
> +
> +       /*
> +        * OK, we now know what type of integer we
> +        * are processing, so just gather up the digits
> +        */
> +       token_append(t, ch);
> +
> +       for (;;) {
> +               ch = get_next(p);
> +
> +               if (ch == EOF) {
> +                       unget_next(p, ch);
> +                       return PARSER_OK;
> +               }
> +
> +               if (ishex) {
> +                       if (isxdigit(ch)) {
> +                               token_append(t, ch);
> +                       } else {
> +                               unget_next(p, ch);
> +                               return PARSER_OK;
> +                       }
> +               } else if (isoct) {
> +                       if (ch >= '0' && ch <= '8') {
> +                               token_append(t, ch);
> +                       } else {
> +                               unget_next(p, ch);
> +                               return PARSER_OK;
> +                       }
> +               } else {
> +                       if (isdigit(ch)) {
> +                               token_append(t, ch);
> +                       } else {
> +                               unget_next(p, ch);
> +                               return PARSER_OK;
> +                       }
> +               }
> +       }
> +}
> +
> +/*
> + *  Parse identifiers
> + */
> +static int parse_identifier(parser *p, token *t, int ch)
> +{
> +       token_append(t, ch);
> +
> +       t->type = TOKEN_IDENTIFIER;
> +
> +       for (;;) {
> +               ch = get_next(p);
> +               if (ch == EOF) {
> +                       break;
> +               }
> +               if (isalnum(ch) || ch == '_') {
> +                       token_append(t, ch);
> +               } else {
> +                       unget_next(p, ch);
> +                       break;
> +               }
> +       }
> +
> +       return PARSER_OK;
> +}
> +
> +/*
> + *  Parse literal strings
> + */
> +static int parse_literal(parser *p, token *t, int literal, token_type type)
> +{
> +       bool escaped = false;
> +       int ch;
> +
> +       t->type = type;
> +
> +       token_append(t, literal);
> +
> +       for (;;) {
> +               ch = get_next(p);
> +               if (ch == EOF) {
> +                       return PARSER_OK;
> +               }
> +
> +               if (ch == '\\') {
> +                       escaped = true;
> +                       token_append(t, ch);
> +                       continue;
> +               }
> +
> +               if (!escaped && ch == literal) {
> +                       token_append(t, ch);
> +                       return PARSER_OK;
> +               }
> +               escaped = false;
> +
> +               token_append(t, ch);
> +       }
> +
> +       return PARSER_OK;
> +}
> +
> +/*
> + *  Parse operators such as +, - which can
> + *  be + or ++ forms.
> + */
> +static int parse_op(parser *p, token *t, int op)
> +{
> +       int ch;
> +
> +       token_append(t, op);
> +
> +       ch = get_next(p);
> +       if (ch == EOF) {
> +               return PARSER_OK;
> +       }
> +
> +       if (ch == op) {
> +               token_append(t, op);
> +               return PARSER_OK;
> +       }
> +
> +       unget_next(p, ch);
> +       return PARSER_OK;
> +}
> +
> +/*
> + *  Parse -, --, ->
> + */
> +static int parse_minus(parser *p, token *t, int op)
> +{
> +       int ch;
> +
> +       token_append(t, op);
> +
> +       ch = get_next(p);
> +       if (ch == EOF) {
> +               return PARSER_OK;
> +       }
> +
> +       if (ch == op) {
> +               token_append(t, ch);
> +               return PARSER_OK;
> +       }
> +
> +       if (ch == '>') {
> +               token_append(t, ch);
> +               t->type = TOKEN_ARROW;
> +               return PARSER_OK;
> +       }
> +
> +       unget_next(p, ch);
> +       return PARSER_OK;
> +}
> +
> +/*
> + *  Gather a token from input stream
> + */
> +static int get_token(parser *p, token *t)
> +{
> +       int ch;
> +       int ret;
> +
> +       for (;;) {
> +               ch = get_next(p);
> +
> +               switch (ch) {
> +               case EOF:
> +                       return EOF;
> +
> +               /* Skip comments */
> +               case '/':
> +                       ret = skip_comments(p);
> +                       if (ret == EOF)
> +                               return EOF;
> +                       if (ret == PARSER_COMMENT_FOUND)
> +                               continue;
> +                       token_append(t, ch);
> +                       return PARSER_OK;
> +               case '#':
> +                       token_append(t, ch);
> +                       t->type = TOKEN_CPP;
> +                       return PARSER_OK;
> +               case ' ':
> +               case '\t':
> +               case '\r':
> +               case '\n':
> +               case '\\':
> +                       if (p->skip_white_space)
> +                               continue;
> +                       else {
> +                               token_append(t, ch);
> +                               t->type = TOKEN_WHITE_SPACE;
> +                               return PARSER_OK;
> +                       }
> +               case '(':
> +                       token_append(t, ch);
> +                       t->type = TOKEN_PAREN_OPENED;
> +                       return PARSER_OK;
> +               case ')':
> +                       token_append(t, ch);
> +                       t->type = TOKEN_PAREN_CLOSED;
> +                       return PARSER_OK;
> +               case '<':
> +                       token_append(t, ch);
> +                       t->type = TOKEN_LESS_THAN;
> +                       return PARSER_OK;
> +               case '>':
> +                       token_append(t, ch);
> +                       t->type = TOKEN_GREATER_THAN;
> +                       return PARSER_OK;
> +               case ',':
> +                       token_append(t, ch);
> +                       t->type = TOKEN_COMMA;
> +                       return PARSER_OK;
> +               case ';':
> +                       token_append(t, ch);
> +                       t->type = TOKEN_TERMINAL;
> +                       return PARSER_OK;
> +               case '{':
> +               case '}':
> +               case ':':
> +               case '~':
> +               case '?':
> +               case '*':
> +               case '%':
> +               case '!':
> +               case '.':
> +                       token_append(t, ch);
> +                       return PARSER_OK;
> +               case '0'...'9':
> +                       return parse_number(p, t, ch);
> +                       break;
> +               case 'a'...'z':
> +               case 'A'...'Z':
> +                       return parse_identifier(p, t, ch);
> +                       break;
> +               case '"':
> +                       return parse_literal(p, t, ch, TOKEN_LITERAL_STRING);
> +               case '\'':
> +                       return parse_literal(p, t, ch, TOKEN_LITERAL_CHAR);
> +               case '+':
> +               case '=':
> +               case '|':
> +               case '&':
> +                       return parse_op(p, t, ch);
> +               case '-':
> +                       return parse_minus(p, t, ch);
> +               }
> +       }
> +
> +       return PARSER_OK;
> +}
> +
> +/*
> + *  Literals such as "foo" and 'f' sometimes
> + *  need the quotes stripping off.
> + */
> +static void literal_strip_quotes(token *t)
> +{
> +       size_t len = strlen(t->token);
> +
> +       t->token[len-1] = 0;
> +
> +       strcpy(t->token, t->token + 1);
> +}
> +
> +/*
> + *  Concatenate new string onto old. The old
> + *  string can be NULL or an existing string
> + *  on the heap.  This returns the newly
> + *  concatenated string.
> + */
> +static char *strdupcat(char *old, char *new)
> +{
> +       size_t len = strlen(new);
> +       char *tmp;
> +
> +       if (old == NULL) {
> +               tmp = malloc(len + 1);
> +               if (tmp == NULL) {
> +                       fprintf(stderr, "strdupcat(): Out of memory.\n");
> +                       exit(EXIT_FAILURE);
> +               }
> +               strcpy(tmp, new);
> +       } else {
> +               size_t oldlen = strlen(old);
> +               tmp = realloc(old, oldlen + len + 1);
> +               if (tmp == NULL) {
> +                       fprintf(stderr, "strdupcat(): Out of memory.\n");
> +                       exit(EXIT_FAILURE);
> +               }
> +               strcat(tmp, new);
> +       }
> +
> +       return tmp;
> +}
> +
> +/*
> + *  Parse a kernel message, like printk() or dev_err()
> + */
> +static int parse_kernel_message(parser *p, token *t)
> +{
> +       int ret;
> +       bool got_string = false;
> +       bool emit = false;
> +       bool found = false;
> +       token_type prev_token_type = TOKEN_UNKNOWN;
> +       char *str = NULL;
> +       char *line = NULL;
> +       bool printk;
> +
> +       printk = (strcmp(t->token, "printk") == 0);
> +
> +       if (strcmp(t->token, "dev_err") == 0) {
> +               emit = true;
> +               line = strdupcat(line, "dev_err");
> +       }
> +       token_clear(t);
> +
> +       for (;;) {
> +               ret = get_token(p, t);
> +               if (ret == EOF) {
> +                       return EOF;
> +               }
> +
> +               /*
> +                *  Hit ; so lets push out what we've parsed
> +                */
> +               if (t->type == TOKEN_TERMINAL) {
> +                       if (emit) {
> +                               if (found) {
> +                                       printf("OK : %s\n", line);
> +                               } else {
> +                                       printf("ADD: %s\n", line);
> +                               }
> +                               free(line);
> +                       }
> +                       return PARSER_OK;
> +               }
> +
> +               /*
> +                *  We are only interested in KERN_ERR
> +                *  printk messages
> +                */
> +               if (printk &&
> +                   (t->type == TOKEN_IDENTIFIER) &&
> +                   (prev_token_type == TOKEN_PAREN_OPENED) &&
> +                   (strcmp(t->token, "KERN_ERR") == 0)) {
> +                       line = strdupcat(line, "printk( ");
> +               }
> +
> +               if (t->type == TOKEN_LITERAL_STRING) {
> +                       literal_strip_quotes(t);
> +                       str = strdupcat(str, t->token);
> +
> +                       if (!got_string)
> +                               line = strdupcat(line, "\"");
> +
> +                       got_string = true;
> +               } else {
> +                       if (got_string)
> +                               line = strdupcat(line, "\"");
> +
> +                       got_string = false;
> +
> +                       if (str) {
> +                               found |= klog_find(str, patterns);
> +                               free(str);
> +                               str = NULL;
> +                       }
> +               }
> +
> +               line = strdupcat(line, t->token);
> +               if (t->type == TOKEN_COMMA)
> +                       line = strdupcat(line, " ");
> +
> +               prev_token_type = t->type;
> +
> +               token_clear(t);
> +       }
> +}
> +
> +/*
> + *  Parse input looking for printk or dev_err calls
> + */
> +static void parse_kernel_messages(FILE *fp)
> +{
> +       token t;
> +       parser p;
> +
> +       parser_new(&p, fp, true);
> +       p.fp = fp;
> +       p.skip_white_space = true;
> +
> +       token_new(&t);
> +
> +       while ((get_token(&p, &t)) != EOF) {
> +               if ((strcmp(t.token, "printk") == 0) ||
> +                   (strcmp(t.token, "dev_err") == 0)) {
> +                       parse_kernel_message(&p, &t);
> +               } else
> +                       token_clear(&t);
> +       }
> +
> +       token_free(&t);
> +}
> +
> +/*
> + *  This is evil.  We parse the input stream
> + *  and throw away all #includes so we don't get
> + *  gcc -E breaking on include files that we haven't
> + *  got.  We don't really care at this level about
> + *  macros being expanded as we want to see tokens
> + *  such as KERN_ERR later on.
> + */
> +static int parse_cpp_include(parser *p, token *t)
> +{
> +       /*
> +        *  Gloop up #include "foo.h"
> +        */
> +       do {
> +               token_clear(t);
> +               if (get_token(p, t) == EOF)
> +                       return EOF;
> +               /* End of line, we're done! */
> +               if (strcmp(t->token, "\n") == 0)
> +                       return PARSER_OK;
> +       } while (t->type == TOKEN_WHITE_SPACE);
> +
> +
> +       /*
> +        *  Ah, we gobbled up white spaces and
> +        *  now we should be at a '<' token
> +        *  Parse #include <something/foo.h>
> +        */
> +       if (t->type == TOKEN_LESS_THAN) {
> +               do {
> +                       if (get_token(p, t) == EOF)
> +                               return EOF;
> +               } while (t->type != TOKEN_GREATER_THAN);
> +       }
> +
> +       token_clear(t);
> +
> +       return PARSER_OK;
> +}
> +
> +/*
> + *  CPP phase, find and remove #includes
> + */
> +static int parse_cpp_includes(FILE *fp)
> +{
> +       token t;
> +       parser p;
> +
> +       parser_new(&p, fp, false);
> +       p.fp = fp;
> +       p.skip_white_space = false;
> +
> +       token_new(&t);
> +
> +       while ((get_token(&p, &t)) != EOF) {
> +               if (t.type == TOKEN_CPP) {
> +                       for (;;) {
> +                               token_clear(&t);
> +                               if (get_token(&p, &t) == EOF)
> +                                       return EOF;
> +                               if (strcmp(t.token, "\n") == 0)
> +                                       break;
> +                               if (t.type == TOKEN_WHITE_SPACE) {
> +                                       continue;
> +                               }
> +                               if (strcmp(t.token, "include") == 0) {
> +                                       if (parse_cpp_include(&p, &t) == EOF)
> +                                               return EOF;
> +                                       break;
> +                               }
> +                               printf("#%s", t.token);
> +                               break;
> +                       }
> +               } else {
> +                       printf("%s", t.token);
> +               }
> +               token_clear(&t);
> +       }
> +       return EOF;
> +}
> +
> +/*
> + *  Scan kernel source for printk KERN_ERR and dev_err
> + *  calls.
> + *
> + *  Usage:
> + *     cat drivers/pnp/pnpacpi/rsparser.c | kernel_scan -E | gcc  -E - | kernel_scan -P
> + *
> + *  This prints out any kernel printk KERN_ERR calls
> + *  or dev_err calls and checks to see if the error can be matched by
> + *  any of the fwts klog messages.  It has some intelligence, it glues
> + *  literal strings together such as "this is" "a message" into
> + *  "this is a message" before it makes the klog comparison.
> + */
> +int main(int argc, char **argv)
> +{
> +       if (argc < 2) {
> +               fprintf(stderr, "%s: [-E] [-P]\n", argv[0]);
> +               exit(EXIT_FAILURE);
> +       }
> +
> +       /*
> +        *  GCC -E preprocess phase
> +        */
> +       if (strcmp(argv[1], "-E") == 0) {
> +               parse_cpp_includes(stdin);
> +               exit(EXIT_SUCCESS);
> +       }
> +
> +       /*
> +        *  Parse kernel printk and dev_err phase
> +        */
> +       if (strcmp(argv[1], "-P") == 0) {
> +               patterns = klog_load("firmware_error_warning_patterns");
> +               parse_kernel_messages(stdin);
> +               klog_free(patterns);
> +       }
> +
> +       exit(EXIT_SUCCESS);
> +}
> --
> 1.7.10.4
>
Acked-by: Keng-Yu Lin <kengyu@canonical.com>
Alex Hung Nov. 21, 2012, 5:49 a.m. UTC | #2
On 11/03/2012 12:58 AM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> This is the inital first cut of the kernelscan utility that
> helps us to track new kernel messages so we can add them to
> the fwts klog json database.
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>   Makefile.am                |    2 +-
>   configure.ac               |    1 +
>   src/utilities/Makefile.am  |    6 +
>   src/utilities/kernelscan.c |  976 ++++++++++++++++++++++++++++++++++++++++++++
>   4 files changed, 984 insertions(+), 1 deletion(-)
>   create mode 100644 src/utilities/Makefile.am
>   create mode 100644 src/utilities/kernelscan.c
>
> diff --git a/Makefile.am b/Makefile.am
> index 09f5bec..057c47c 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -1,3 +1,3 @@
> -SUBDIRS = src data
> +SUBDIRS = src data src/utilities
>
>   ACLOCAL_AMFLAGS = -I m4
> diff --git a/configure.ac b/configure.ac
> index 48481aa..77d44bf 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -81,6 +81,7 @@
>   	   src/acpica/Makefile
>              src/lib/Makefile
>              src/lib/src/Makefile
> +	   src/utilities/Makefile
>              data/Makefile
>             ])
>             AC_OUTPUT
> diff --git a/src/utilities/Makefile.am b/src/utilities/Makefile.am
> new file mode 100644
> index 0000000..427c44f
> --- /dev/null
> +++ b/src/utilities/Makefile.am
> @@ -0,0 +1,6 @@
> +AM_CPPFLAGS = -Wall -Werror -Wextra
> +
> +bin_PROGRAMS = kernelscan
> +kernelscan_SOURCES = kernelscan.c
> +kernelscan_LDFLAGS = -ljson -lpcre
> +
> diff --git a/src/utilities/kernelscan.c b/src/utilities/kernelscan.c
> new file mode 100644
> index 0000000..7569079
> --- /dev/null
> +++ b/src/utilities/kernelscan.c
> @@ -0,0 +1,976 @@
> +/*
> + * Copyright (C) 2012 Canonical
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
> + *
> + */
> +
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <unistd.h>
> +
> +#include <pcre.h>
> +#include <json/json.h>
> +
> +#define PARSER_OK		0
> +#define PARSER_COMMENT_FOUND	1
> +
> +#define __JSON_ERR_PTR__ ((json_object*) -1)
> +/*
> + *  Older versions of json-c may return an error in an
> + *  object as a ((json_object*)-1), where as newer
> + *  versions return NULL, so check for these. Sigh.
> + */
> +#define JSON_ERROR(ptr) \
> +	( (ptr == NULL) || ((json_object*)ptr == __JSON_ERR_PTR__) )
> +
> +typedef enum {
> +        COMPARE_REGEX = 'r',
> +        COMPARE_STRING = 's',
> +        COMPARE_UNKNOWN = 'u',
> +} compare_mode;
> +
> +typedef struct {
> +        char *pattern;		/* pattern that we compare to kernel messages */
> +	compare_mode cm;	/* 'r' regex or 's' string comparison */
> +        pcre *re;		/* regex from pattern */
> +        pcre_extra *extra;
> +} klog_pattern;
> +
> +/*
> + *  Subset of tokens that we need to intelligently parse the kernel C source
> + */
> +typedef enum {
> +	TOKEN_UNKNOWN,		/* No idea what token it is */
> +	TOKEN_NUMBER,		/* Integer */
> +	TOKEN_LITERAL_STRING,	/* "string" */
> +	TOKEN_LITERAL_CHAR,	/* 'x' */
> +	TOKEN_IDENTIFIER,	/* identifier */
> +	TOKEN_PAREN_OPENED,	/* ( */
> +	TOKEN_PAREN_CLOSED,	/* ) */
> +	TOKEN_CPP,		/* # C pre-propressor */
> +	TOKEN_WHITE_SPACE,	/* ' ', '\t', '\r', '\n' white space */
> +	TOKEN_LESS_THAN,	/* < */
> +	TOKEN_GREATER_THAN,	/* > */
> +	TOKEN_COMMA,		/* , */
> +	TOKEN_ARROW,		/* -> */
> +	TOKEN_TERMINAL,		/* ; */
> +} token_type;
> +
> +/*
> + *  A token
> + */
> +typedef struct {
> +	char *token;		/* The gathered string for this token */
> +	size_t len;		/* Length of the token buffer */
> +	char *ptr;		/* Current end of the token during the lexical analysis */
> +	token_type type;	/* The type of token we think it is */
> +} token;
> +
> +/*
> + *  Quick and dirty way to push input stream back, like ungetc()
> + */
> +typedef struct get_stack {
> +	int ch;			/* Char pushed back */
> +	struct get_stack *next;	/* Next one in list */
> +} get_stack;
> +
> +/*
> + *  Parser context
> + */
> +typedef struct {
> +	FILE *fp;		/* The file descriptor we are reading */
> +	bool skip_white_space;	/* Magic skip white space flag */
> +	get_stack *get_chars;	/* Ungot chars get pushed onto this */
> +} parser;
> +
> +/*
> + *  FWTS klog patterns, loaded from a json file
> + */
> +static klog_pattern *patterns;
> +
> +static int get_token(parser *p, token *t);
> +
> +/*
> + *  Initialise the parser
> + */
> +static void parser_new(parser *p, FILE *fp, bool skip_white_space)
> +{
> +	p->get_chars = NULL;
> +	p->fp = fp;
> +	p->skip_white_space = skip_white_space;
> +}
> +
> +/*
> + *  Get next character from input stream
> + */
> +static int get_next(parser *p)
> +{
> +	int ch;
> +
> +	/*
> +	 * If we have chars pushed using unget_next
> +	 * then pop them off the list first
> +	 */
> +	if (p->get_chars) {
> +		get_stack *tmp = p->get_chars;
> +		ch = tmp->ch;
> +
> +		p->get_chars = tmp->next;
> +		free(tmp);
> +
> +		return ch;
> +	}
> +	return fgetc(p->fp);
> +}
> +
> +/*
> + *  Push character back onto the input
> + *  stream (in this case, it is a simple FIFO stack
> + */
> +static void unget_next(parser *p, int ch)
> +{
> +	get_stack *new;
> +
> +	if ((new = calloc(sizeof(get_stack), 1)) == NULL) {
> +		fprintf(stderr, "unget_next: Out of memory!\n");
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	new->ch = ch;
> +	new->next = p->get_chars;
> +	p->get_chars = new;
> +}
> +
> +/*
> + *  Create a new token, give it plenty of slop so
> + *  we don't need to keep on reallocating the token
> + *  buffer as we append more characters to it during
> + *  the lexing phase.
> + */
> +static void token_new(token *t)
> +{
> +	if ((t->token = calloc(1024, 1)) == NULL) {
> +		fprintf(stderr, "token_new: Out of memory!\n");
> +		exit(EXIT_FAILURE);
> +	}
> +	t->len = 1024;
> +	t->ptr = t->token;
> +	t->type = TOKEN_UNKNOWN;
> +}
> +
> +/*
> + *  Clear the token ready for re-use
> + */
> +static void token_clear(token *t)
> +{
> +	t->ptr = t->token;
> +	t->type = TOKEN_UNKNOWN;
> +	*(t->ptr) = '\0';
> +}
> +
> +/*
> + *  Free the token
> + */
> +static void token_free(token *t)
> +{
> +	free(t->token);
> +	t->token = NULL;
> +}
> +
> +/*
> + *  Append a single character to the token,
> + *  we may run out of space, so this occasionally
> + *  adds an extra 1K of token space for long tokens
> + */
> +static void token_append(token *t, int ch)
> +{
> +	if (t->ptr < t->token + t->len - 1) {
> +		/* Enough space, just add char */
> +		*(t->ptr) = ch;
> +		t->ptr++;
> +		*(t->ptr) = 0;
> +	} else {
> +		/* No more space, add 1K more space */
> +		t->len += 1024;
> +		if ((t->token = realloc(t->token, t->len)) == NULL) {
> +			fprintf(stderr, "token_append: Out of memory!\n");
> +			exit(EXIT_FAILURE);
> +		}
> +		*(t->ptr) = ch;
> +		t->ptr++;
> +		*(t->ptr) = 0;
> +	}
> +}
> +
> +/*
> + *  Figure out if a klog pattern is a regex or a plain text string
> + */
> +static compare_mode klog_compare_mode_str_to_val(const char *str)
> +{
> +	if (strcmp(str, "regex") == 0)
> +		return COMPARE_REGEX;
> +	else if (strcmp(str, "string") == 0)
> +		return COMPARE_STRING;
> +	else
> +		return COMPARE_UNKNOWN;
> +}
> +
> +/*
> + *  Load FWTS klog messages from the json table
> + */
> +static klog_pattern *klog_load(const char *table)
> +{
> +	int n;
> +	int i;
> +	json_object *klog_objs;
> +	json_object *klog_table;
> +	klog_pattern *patterns;
> +
> +	klog_objs = json_object_from_file("/usr/share/fwts/klog.json");
> +	if (JSON_ERROR(klog_objs)) {
> +		fprintf(stderr, "Cannot load klog data\n");
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	klog_table = json_object_object_get(klog_objs, table);
> +	if (JSON_ERROR(klog_table)) {
> +		fprintf(stderr, "Cannot fetch klog table object from %s.\n", table);
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	n = json_object_array_length(klog_table);
> +
> +	/* Last entry is null to indicate end, so alloc n+1 items */
> +	if ((patterns = calloc(n+1, sizeof(klog_pattern))) == NULL) {
> +		fprintf(stderr, "Cannot allocate pattern table.\n");
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	/* Now fetch json objects and compile regex */
> +	for (i = 0; i < n; i++) {
> +		const char *error;
> +		char *str;
> +		int erroffset;
> +		json_object *obj;
> +
> +		obj = json_object_array_get_idx(klog_table, i);
> +		if (JSON_ERROR(obj)) {
> +			fprintf(stderr, "Cannot fetch %d item from table %s.\n", i, table);
> +			exit(EXIT_FAILURE);
> +		}
> +
> +		str = (char*)json_object_get_string(json_object_object_get(obj, "compare_mode"));
> +		if (JSON_ERROR(str)) {
> +			fprintf(stderr, "Cannot fetch compare_mode  object, item %d from table %s.\n", i, table);
> +			exit(EXIT_FAILURE);
> +		}
> +		patterns[i].cm = klog_compare_mode_str_to_val(str);
> +
> +		str = (char*)json_object_get_string(json_object_object_get(obj, "pattern"));
> +		if (JSON_ERROR(str)) {
> +			fprintf(stderr, "Cannot fetch pattern object, item %d from table %s.\n", i, table);
> +			exit(EXIT_FAILURE);
> +		}
> +		patterns[i].pattern = strdup(str);
> +		if (patterns[i].pattern == NULL) {
> +			fprintf(stderr, "Failed to strdup regex pattern %d from table %s.\n", i, table);
> +			exit(EXIT_FAILURE);
> +		}
> +
> +		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
> +			fprintf(stderr, "Regex %s failed to compile: %s.\n", patterns[i].pattern, error);
> +			patterns[i].re = NULL;
> +		} else {
> +			patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
> +			if (error != NULL) {
> +				fprintf(stderr, "Regex %s failed to optimize: %s.\n", patterns[i].pattern, error);
> +				patterns[i].re = NULL;
> +			}
> +		}
> +	}
> +
> +	/* Discard the json table now we've parsed it into patterns */
> +	json_object_put(klog_objs);
> +
> +	return patterns;
> +}
> +
> +/*
> + *  Does str match any of the patterns in the klog pattern table
> + */
> +static bool klog_find(char *str, klog_pattern *patterns)
> +{
> +	int i;
> +
> +	for (i = 0; patterns[i].pattern; i++) {
> +		if (patterns[i].cm == COMPARE_STRING) {
> +			if (strstr(str, patterns[i].pattern)) {
> +				return true;
> +			}
> +		}
> +		if (patterns[i].cm == COMPARE_REGEX) {
> +			int vector[1];
> +			if (pcre_exec(patterns[i].re, patterns[i].extra, str, strlen(str), 0, 0, vector, 1) == 0) {
> +				return true;
> +			}
> +		}
> +	}
> +
> +	return false;
> +}
> +
> +/*
> + *  Free the klog patterns
> + */
> +static void klog_free(klog_pattern *patterns)
> +{
> +	int i;
> +
> +	for (i = 0; patterns[i].pattern; i++) {
> +		pcre_free(patterns[i].re);
> +		pcre_free(patterns[i].extra);
> +		free(patterns[i].pattern);
> +	}
> +	free(patterns);
> +}
> +
> +/*
> + *  Parse C comments and just throw them away
> + */
> +static int skip_comments(parser *p)
> +{
> +	int ch;
> +	int nextch;
> +
> +	nextch = get_next(p);
> +	if (nextch == EOF)
> +		return EOF;
> +
> +	if (nextch == '/') {
> +		do {
> +			ch = get_next(p);
> +			if (ch == EOF)
> +				return EOF;
> +		}
> +		while (ch != '\n');
> +
> +		return PARSER_COMMENT_FOUND;
> +	}
> +
> +	if (nextch == '*') {
> +		for (;;) {
> +			ch = get_next(p);
> +			if (ch == EOF)
> +				return EOF;
> +
> +			if (ch == '*') {
> +				ch = get_next(p);
> +				if (ch == EOF)
> +					return EOF;
> +
> +				if (ch == '/')
> +					return PARSER_COMMENT_FOUND;
> +			}
> +		}
> +	}
> +
> +	/* Not a comment, push back */
> +	unget_next(p, nextch);
> +
> +	return PARSER_OK;
> +}
> +
> +/*
> + *  Parse an integer.  This is fairly minimal as the
> + *  kernel doesn't have floats or doubles, so we
> + *  can just parse decimal, octal or hex values.
> + */
> +static int parse_number(parser *p, token *t, int ch)
> +{
> +	int nextch1, nextch2;
> +	bool ishex = false;
> +	bool isoct = false;
> +
> +	/*
> +	 *  Crude way to detect the kind of integer
> +	 */
> +	if (ch == '0') {
> +		token_append(t, ch);
> +
> +		nextch1 = get_next(p);
> +		if (nextch1 == EOF) {
> +			token_append(t, ch);
> +			return PARSER_OK;
> +		}
> +
> +		if (nextch1 >= '0' && nextch1 <= '8') {
> +			/* Must be an octal value */
> +			ch = nextch1;
> +			isoct = true;
> +		} else if (nextch1 == 'x' || nextch1 == 'X') {
> +			/* Is it hexadecimal? */
> +			nextch2 = get_next(p);
> +			if (nextch2 == EOF) {
> +				unget_next(p, nextch1);
> +				return PARSER_OK;
> +			}
> +
> +			if (isxdigit(nextch2)) {
> +				/* Hexadecimal */
> +				token_append(t, nextch1);
> +				ch = nextch2;
> +				ishex = true;
> +			} else {
> +				/* Nope */
> +				unget_next(p, nextch2);
> +				unget_next(p, nextch1);
> +				return PARSER_OK;
> +			}
> +		} else {
> +			unget_next(p, nextch1);
> +			return PARSER_OK;
> +		}
> +	}
> +
> +	/*
> +	 * OK, we now know what type of integer we
> +	 * are processing, so just gather up the digits
> +	 */
> +	token_append(t, ch);
> +
> +	for (;;) {
> +		ch = get_next(p);
> +
> +		if (ch == EOF) {
> +			unget_next(p, ch);
> +			return PARSER_OK;
> +		}
> +
> +		if (ishex) {
> +			if (isxdigit(ch)) {
> +				token_append(t, ch);
> +			} else {
> +				unget_next(p, ch);
> +				return PARSER_OK;
> +			}
> +		} else if (isoct) {
> +			if (ch >= '0' && ch <= '8') {
> +				token_append(t, ch);
> +			} else {
> +				unget_next(p, ch);
> +				return PARSER_OK;
> +			}
> +		} else {
> +			if (isdigit(ch)) {
> +				token_append(t, ch);
> +			} else {
> +				unget_next(p, ch);
> +				return PARSER_OK;
> +			}
> +		}
> +	}
> +}
> +
> +/*
> + *  Parse identifiers
> + */
> +static int parse_identifier(parser *p, token *t, int ch)
> +{
> +	token_append(t, ch);
> +
> +	t->type = TOKEN_IDENTIFIER;
> +
> +	for (;;) {
> +		ch = get_next(p);
> +		if (ch == EOF) {
> +			break;
> +		}
> +		if (isalnum(ch) || ch == '_') {
> +			token_append(t, ch);
> +		} else {
> +			unget_next(p, ch);
> +			break;
> +		}
> +	}
> +
> +	return PARSER_OK;
> +}
> +
> +/*
> + *  Parse literal strings
> + */
> +static int parse_literal(parser *p, token *t, int literal, token_type type)
> +{
> +	bool escaped = false;
> +	int ch;
> +
> +	t->type = type;
> +
> +	token_append(t, literal);
> +
> +	for (;;) {
> +		ch = get_next(p);
> +		if (ch == EOF) {
> +			return PARSER_OK;
> +		}
> +
> +		if (ch == '\\') {
> +			escaped = true;
> +			token_append(t, ch);
> +			continue;
> +		}
> +
> +		if (!escaped && ch == literal) {
> +			token_append(t, ch);
> +			return PARSER_OK;
> +		}
> +		escaped = false;
> +
> +		token_append(t, ch);
> +	}
> +
> +	return PARSER_OK;
> +}
> +
> +/*
> + *  Parse operators such as +, - which can
> + *  be + or ++ forms.
> + */
> +static int parse_op(parser *p, token *t, int op)
> +{
> +	int ch;
> +
> +	token_append(t, op);
> +
> +	ch = get_next(p);
> +	if (ch == EOF) {
> +		return PARSER_OK;
> +	}
> +
> +	if (ch == op) {
> +		token_append(t, op);
> +		return PARSER_OK;
> +	}
> +
> +	unget_next(p, ch);
> +	return PARSER_OK;
> +}
> +
> +/*
> + *  Parse -, --, ->
> + */
> +static int parse_minus(parser *p, token *t, int op)
> +{
> +	int ch;
> +
> +	token_append(t, op);
> +
> +	ch = get_next(p);
> +	if (ch == EOF) {
> +		return PARSER_OK;
> +	}
> +
> +	if (ch == op) {
> +		token_append(t, ch);
> +		return PARSER_OK;
> +	}
> +
> +	if (ch == '>') {
> +		token_append(t, ch);
> +		t->type = TOKEN_ARROW;
> +		return PARSER_OK;
> +	}
> +
> +	unget_next(p, ch);
> +	return PARSER_OK;
> +}
> +
> +/*
> + *  Gather a token from input stream
> + */
> +static int get_token(parser *p, token *t)
> +{
> +	int ch;
> +	int ret;
> +
> +	for (;;) {
> +		ch = get_next(p);
> +
> +		switch (ch) {
> +		case EOF:
> +			return EOF;
> +
> +		/* Skip comments */
> +		case '/':
> +			ret = skip_comments(p);
> +			if (ret == EOF)
> +				return EOF;
> +			if (ret == PARSER_COMMENT_FOUND)
> +				continue;
> +			token_append(t, ch);
> +			return PARSER_OK;
> +		case '#':
> +			token_append(t, ch);
> +			t->type = TOKEN_CPP;
> +			return PARSER_OK;
> +		case ' ':
> +		case '\t':
> +		case '\r':
> +		case '\n':
> +		case '\\':
> +			if (p->skip_white_space)
> +				continue;
> +			else {
> +				token_append(t, ch);
> +				t->type = TOKEN_WHITE_SPACE;
> +				return PARSER_OK;
> +			}
> +		case '(':
> +			token_append(t, ch);
> +			t->type = TOKEN_PAREN_OPENED;
> +			return PARSER_OK;
> +		case ')':
> +			token_append(t, ch);
> +			t->type = TOKEN_PAREN_CLOSED;
> +			return PARSER_OK;
> +		case '<':
> +			token_append(t, ch);
> +			t->type = TOKEN_LESS_THAN;
> +			return PARSER_OK;
> +		case '>':
> +			token_append(t, ch);
> +			t->type = TOKEN_GREATER_THAN;
> +			return PARSER_OK;
> +		case ',':
> +			token_append(t, ch);
> +			t->type = TOKEN_COMMA;
> +			return PARSER_OK;
> +		case ';':
> +			token_append(t, ch);
> +			t->type = TOKEN_TERMINAL;
> +			return PARSER_OK;
> +		case '{':
> +		case '}':
> +		case ':':
> +		case '~':
> +		case '?':
> +		case '*':
> +		case '%':
> +		case '!':
> +		case '.':
> +			token_append(t, ch);
> +			return PARSER_OK;
> +		case '0'...'9':
> +			return parse_number(p, t, ch);
> +			break;
> +		case 'a'...'z':
> +		case 'A'...'Z':
> +			return parse_identifier(p, t, ch);
> +			break;
> +		case '"':
> +			return parse_literal(p, t, ch, TOKEN_LITERAL_STRING);
> +		case '\'':
> +			return parse_literal(p, t, ch, TOKEN_LITERAL_CHAR);
> +		case '+':
> +		case '=':
> +		case '|':
> +		case '&':
> +			return parse_op(p, t, ch);
> +		case '-':
> +			return parse_minus(p, t, ch);
> +		}
> +	}
> +
> +	return PARSER_OK;
> +}
> +
> +/*
> + *  Literals such as "foo" and 'f' sometimes
> + *  need the quotes stripping off.
> + */
> +static void literal_strip_quotes(token *t)
> +{
> +	size_t len = strlen(t->token);
> +
> +	t->token[len-1] = 0;
> +
> +	strcpy(t->token, t->token + 1);
> +}
> +
> +/*
> + *  Concatenate new string onto old. The old
> + *  string can be NULL or an existing string
> + *  on the heap.  This returns the newly
> + *  concatenated string.
> + */
> +static char *strdupcat(char *old, char *new)
> +{
> +	size_t len = strlen(new);
> +	char *tmp;
> +
> +	if (old == NULL) {
> +		tmp = malloc(len + 1);
> +		if (tmp == NULL) {
> +			fprintf(stderr, "strdupcat(): Out of memory.\n");
> +			exit(EXIT_FAILURE);
> +		}
> +		strcpy(tmp, new);
> +	} else {
> +		size_t oldlen = strlen(old);
> +		tmp = realloc(old, oldlen + len + 1);
> +		if (tmp == NULL) {
> +			fprintf(stderr, "strdupcat(): Out of memory.\n");
> +			exit(EXIT_FAILURE);
> +		}
> +		strcat(tmp, new);
> +	}
> +
> +	return tmp;
> +}
> +
> +/*
> + *  Parse a kernel message, like printk() or dev_err()
> + */
> +static int parse_kernel_message(parser *p, token *t)
> +{
> +	int ret;
> +	bool got_string = false;
> +	bool emit = false;
> +	bool found = false;
> +	token_type prev_token_type = TOKEN_UNKNOWN;
> +	char *str = NULL;
> +	char *line = NULL;
> +	bool printk;
> +
> +	printk = (strcmp(t->token, "printk") == 0);
> +
> +	if (strcmp(t->token, "dev_err") == 0) {
> +		emit = true;
> +		line = strdupcat(line, "dev_err");
> +	}
> +	token_clear(t);
> +
> +	for (;;) {
> +		ret = get_token(p, t);
> +		if (ret == EOF) {
> +			return EOF;
> +		}
> +
> +		/*
> +		 *  Hit ; so lets push out what we've parsed
> +		 */
> +		if (t->type == TOKEN_TERMINAL) {
> +			if (emit) {
> +				if (found) {
> +					printf("OK : %s\n", line);
> +				} else {
> +					printf("ADD: %s\n", line);
> +				}
> +				free(line);
> +			}
> +			return PARSER_OK;
> +		}
> +
> +		/*
> +		 *  We are only interested in KERN_ERR
> +		 *  printk messages
> +		 */
> +		if (printk &&
> +		    (t->type == TOKEN_IDENTIFIER) &&
> +		    (prev_token_type == TOKEN_PAREN_OPENED) &&
> +		    (strcmp(t->token, "KERN_ERR") == 0)) {
> +			line = strdupcat(line, "printk( ");
> +		}
> +
> +		if (t->type == TOKEN_LITERAL_STRING) {
> +			literal_strip_quotes(t);
> +			str = strdupcat(str, t->token);
> +
> +			if (!got_string)
> +				line = strdupcat(line, "\"");
> +
> +			got_string = true;
> +		} else {
> +			if (got_string)
> +				line = strdupcat(line, "\"");
> +
> +			got_string = false;
> +
> +			if (str) {
> +				found |= klog_find(str, patterns);
> +				free(str);
> +				str = NULL;
> +			}
> +		}
> +
> +		line = strdupcat(line, t->token);
> +		if (t->type == TOKEN_COMMA)
> +			line = strdupcat(line, " ");
> +
> +		prev_token_type = t->type;
> +
> +		token_clear(t);
> +	}
> +}
> +
> +/*
> + *  Parse input looking for printk or dev_err calls
> + */
> +static void parse_kernel_messages(FILE *fp)
> +{
> +	token t;
> +	parser p;
> +
> +	parser_new(&p, fp, true);
> +	p.fp = fp;
> +	p.skip_white_space = true;
> +
> +	token_new(&t);
> +
> +	while ((get_token(&p, &t)) != EOF) {
> +		if ((strcmp(t.token, "printk") == 0) ||
> +		    (strcmp(t.token, "dev_err") == 0)) {
> +			parse_kernel_message(&p, &t);
> +		} else
> +			token_clear(&t);
> +	}
> +
> +	token_free(&t);
> +}
> +
> +/*
> + *  This is evil.  We parse the input stream
> + *  and throw away all #includes so we don't get
> + *  gcc -E breaking on include files that we haven't
> + *  got.  We don't really care at this level about
> + *  macros being expanded as we want to see tokens
> + *  such as KERN_ERR later on.
> + */
> +static int parse_cpp_include(parser *p, token *t)
> +{
> +	/*
> +	 *  Gloop up #include "foo.h"
> +	 */
> +	do {
> +		token_clear(t);
> +		if (get_token(p, t) == EOF)
> +			return EOF;
> +		/* End of line, we're done! */
> +		if (strcmp(t->token, "\n") == 0)
> +			return PARSER_OK;
> +	} while (t->type == TOKEN_WHITE_SPACE);
> +
> +
> +	/*
> +	 *  Ah, we gobbled up white spaces and
> +	 *  now we should be at a '<' token
> +	 *  Parse #include <something/foo.h>
> +	 */
> +	if (t->type == TOKEN_LESS_THAN) {
> +		do {
> +			if (get_token(p, t) == EOF)
> +				return EOF;
> +		} while (t->type != TOKEN_GREATER_THAN);
> +	}
> +
> +	token_clear(t);
> +
> +	return PARSER_OK;
> +}
> +
> +/*
> + *  CPP phase, find and remove #includes
> + */
> +static int parse_cpp_includes(FILE *fp)
> +{
> +	token t;
> +	parser p;
> +
> +	parser_new(&p, fp, false);
> +	p.fp = fp;
> +	p.skip_white_space = false;
> +
> +	token_new(&t);
> +
> +	while ((get_token(&p, &t)) != EOF) {
> +		if (t.type == TOKEN_CPP) {
> +			for (;;) {
> +				token_clear(&t);
> +				if (get_token(&p, &t) == EOF)
> +					return EOF;
> +				if (strcmp(t.token, "\n") == 0)
> +					break;
> +				if (t.type == TOKEN_WHITE_SPACE) {
> +					continue;
> +				}
> +				if (strcmp(t.token, "include") == 0) {
> +					if (parse_cpp_include(&p, &t) == EOF)
> +						return EOF;
> +					break;
> +				}
> +				printf("#%s", t.token);
> +				break;
> +			}
> +		} else {
> +			printf("%s", t.token);
> +		}
> +		token_clear(&t);
> +	}
> +	return EOF;
> +}
> +
> +/*
> + *  Scan kernel source for printk KERN_ERR and dev_err
> + *  calls.
> + *
> + *  Usage:
> + *  	cat drivers/pnp/pnpacpi/rsparser.c | kernel_scan -E | gcc  -E - | kernel_scan -P
> + *
> + *  This prints out any kernel printk KERN_ERR calls
> + *  or dev_err calls and checks to see if the error can be matched by
> + *  any of the fwts klog messages.  It has some intelligence, it glues
> + *  literal strings together such as "this is" "a message" into
> + *  "this is a message" before it makes the klog comparison.
> + */
> +int main(int argc, char **argv)
> +{
> +	if (argc < 2) {
> +		fprintf(stderr, "%s: [-E] [-P]\n", argv[0]);
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	/*
> +	 *  GCC -E preprocess phase
> +	 */
> +	if (strcmp(argv[1], "-E") == 0) {
> +		parse_cpp_includes(stdin);
> +		exit(EXIT_SUCCESS);
> +	}
> +
> +	/*
> +	 *  Parse kernel printk and dev_err phase
> +	 */
> +	if (strcmp(argv[1], "-P") == 0) {
> +		patterns = klog_load("firmware_error_warning_patterns");
> +		parse_kernel_messages(stdin);
> +		klog_free(patterns);
> +	}
> +
> +	exit(EXIT_SUCCESS);
> +}
>
Ackecd-by: Alex Hung <alex.hung@canonical.com>
diff mbox

Patch

diff --git a/Makefile.am b/Makefile.am
index 09f5bec..057c47c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,3 +1,3 @@ 
-SUBDIRS = src data
+SUBDIRS = src data src/utilities
 
 ACLOCAL_AMFLAGS = -I m4
diff --git a/configure.ac b/configure.ac
index 48481aa..77d44bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -81,6 +81,7 @@ 
 	   src/acpica/Makefile
            src/lib/Makefile
            src/lib/src/Makefile
+	   src/utilities/Makefile
            data/Makefile
           ])
           AC_OUTPUT
diff --git a/src/utilities/Makefile.am b/src/utilities/Makefile.am
new file mode 100644
index 0000000..427c44f
--- /dev/null
+++ b/src/utilities/Makefile.am
@@ -0,0 +1,6 @@ 
+AM_CPPFLAGS = -Wall -Werror -Wextra
+
+bin_PROGRAMS = kernelscan
+kernelscan_SOURCES = kernelscan.c
+kernelscan_LDFLAGS = -ljson -lpcre
+
diff --git a/src/utilities/kernelscan.c b/src/utilities/kernelscan.c
new file mode 100644
index 0000000..7569079
--- /dev/null
+++ b/src/utilities/kernelscan.c
@@ -0,0 +1,976 @@ 
+/*
+ * Copyright (C) 2012 Canonical
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+
+#include <pcre.h>
+#include <json/json.h>
+
+#define PARSER_OK		0
+#define PARSER_COMMENT_FOUND	1
+
+#define __JSON_ERR_PTR__ ((json_object*) -1)
+/*
+ *  Older versions of json-c may return an error in an
+ *  object as a ((json_object*)-1), where as newer
+ *  versions return NULL, so check for these. Sigh.
+ */
+#define JSON_ERROR(ptr) \
+	( (ptr == NULL) || ((json_object*)ptr == __JSON_ERR_PTR__) )
+
+typedef enum {
+        COMPARE_REGEX = 'r',
+        COMPARE_STRING = 's',
+        COMPARE_UNKNOWN = 'u',
+} compare_mode;
+
+typedef struct {
+        char *pattern;		/* pattern that we compare to kernel messages */
+	compare_mode cm;	/* 'r' regex or 's' string comparison */
+        pcre *re;		/* regex from pattern */
+        pcre_extra *extra;
+} klog_pattern;
+
+/*
+ *  Subset of tokens that we need to intelligently parse the kernel C source
+ */
+typedef enum {
+	TOKEN_UNKNOWN,		/* No idea what token it is */
+	TOKEN_NUMBER,		/* Integer */
+	TOKEN_LITERAL_STRING,	/* "string" */
+	TOKEN_LITERAL_CHAR,	/* 'x' */
+	TOKEN_IDENTIFIER,	/* identifier */
+	TOKEN_PAREN_OPENED,	/* ( */
+	TOKEN_PAREN_CLOSED,	/* ) */
+	TOKEN_CPP,		/* # C pre-propressor */
+	TOKEN_WHITE_SPACE,	/* ' ', '\t', '\r', '\n' white space */
+	TOKEN_LESS_THAN,	/* < */
+	TOKEN_GREATER_THAN,	/* > */
+	TOKEN_COMMA,		/* , */
+	TOKEN_ARROW,		/* -> */
+	TOKEN_TERMINAL,		/* ; */
+} token_type;
+
+/*
+ *  A token
+ */
+typedef struct {
+	char *token;		/* The gathered string for this token */
+	size_t len;		/* Length of the token buffer */
+	char *ptr;		/* Current end of the token during the lexical analysis */
+	token_type type;	/* The type of token we think it is */
+} token;
+
+/*
+ *  Quick and dirty way to push input stream back, like ungetc()
+ */
+typedef struct get_stack {
+	int ch;			/* Char pushed back */
+	struct get_stack *next;	/* Next one in list */
+} get_stack;
+
+/*
+ *  Parser context
+ */
+typedef struct {
+	FILE *fp;		/* The file descriptor we are reading */
+	bool skip_white_space;	/* Magic skip white space flag */
+	get_stack *get_chars;	/* Ungot chars get pushed onto this */
+} parser;
+
+/*
+ *  FWTS klog patterns, loaded from a json file
+ */
+static klog_pattern *patterns;
+
+static int get_token(parser *p, token *t);
+
+/*
+ *  Initialise the parser
+ */
+static void parser_new(parser *p, FILE *fp, bool skip_white_space)
+{
+	p->get_chars = NULL;
+	p->fp = fp;
+	p->skip_white_space = skip_white_space;
+}
+
+/*
+ *  Get next character from input stream
+ */
+static int get_next(parser *p)
+{
+	int ch;
+
+	/*
+	 * If we have chars pushed using unget_next
+	 * then pop them off the list first
+	 */
+	if (p->get_chars) {
+		get_stack *tmp = p->get_chars;
+		ch = tmp->ch;
+
+		p->get_chars = tmp->next;
+		free(tmp);
+
+		return ch;
+	}
+	return fgetc(p->fp);
+}
+
+/*
+ *  Push character back onto the input
+ *  stream (in this case, it is a simple FIFO stack
+ */
+static void unget_next(parser *p, int ch)
+{
+	get_stack *new;
+
+	if ((new = calloc(sizeof(get_stack), 1)) == NULL) {
+		fprintf(stderr, "unget_next: Out of memory!\n");
+		exit(EXIT_FAILURE);
+	}
+
+	new->ch = ch;
+	new->next = p->get_chars;
+	p->get_chars = new;
+}
+
+/*
+ *  Create a new token, give it plenty of slop so
+ *  we don't need to keep on reallocating the token
+ *  buffer as we append more characters to it during
+ *  the lexing phase.
+ */
+static void token_new(token *t)
+{
+	if ((t->token = calloc(1024, 1)) == NULL) {
+		fprintf(stderr, "token_new: Out of memory!\n");
+		exit(EXIT_FAILURE);
+	}
+	t->len = 1024;
+	t->ptr = t->token;
+	t->type = TOKEN_UNKNOWN;
+}
+
+/*
+ *  Clear the token ready for re-use
+ */
+static void token_clear(token *t)
+{
+	t->ptr = t->token;
+	t->type = TOKEN_UNKNOWN;
+	*(t->ptr) = '\0';
+}
+
+/*
+ *  Free the token
+ */
+static void token_free(token *t)
+{
+	free(t->token);
+	t->token = NULL;
+}
+
+/*
+ *  Append a single character to the token,
+ *  we may run out of space, so this occasionally
+ *  adds an extra 1K of token space for long tokens
+ */
+static void token_append(token *t, int ch)
+{
+	if (t->ptr < t->token + t->len - 1) {
+		/* Enough space, just add char */
+		*(t->ptr) = ch;
+		t->ptr++;
+		*(t->ptr) = 0;
+	} else {
+		/* No more space, add 1K more space */
+		t->len += 1024;
+		if ((t->token = realloc(t->token, t->len)) == NULL) {
+			fprintf(stderr, "token_append: Out of memory!\n");
+			exit(EXIT_FAILURE);
+		}
+		*(t->ptr) = ch;
+		t->ptr++;
+		*(t->ptr) = 0;
+	}
+}
+
+/*
+ *  Figure out if a klog pattern is a regex or a plain text string
+ */
+static compare_mode klog_compare_mode_str_to_val(const char *str)
+{
+	if (strcmp(str, "regex") == 0)
+		return COMPARE_REGEX;
+	else if (strcmp(str, "string") == 0)
+		return COMPARE_STRING;
+	else
+		return COMPARE_UNKNOWN;
+}
+
+/*
+ *  Load FWTS klog messages from the json table
+ */
+static klog_pattern *klog_load(const char *table)
+{
+	int n;
+	int i;
+	json_object *klog_objs;
+	json_object *klog_table;
+	klog_pattern *patterns;
+
+	klog_objs = json_object_from_file("/usr/share/fwts/klog.json");
+	if (JSON_ERROR(klog_objs)) {
+		fprintf(stderr, "Cannot load klog data\n");
+		exit(EXIT_FAILURE);
+	}
+
+	klog_table = json_object_object_get(klog_objs, table);
+	if (JSON_ERROR(klog_table)) {
+		fprintf(stderr, "Cannot fetch klog table object from %s.\n", table);
+		exit(EXIT_FAILURE);
+	}
+
+	n = json_object_array_length(klog_table);
+
+	/* Last entry is null to indicate end, so alloc n+1 items */
+	if ((patterns = calloc(n+1, sizeof(klog_pattern))) == NULL) {
+		fprintf(stderr, "Cannot allocate pattern table.\n");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Now fetch json objects and compile regex */
+	for (i = 0; i < n; i++) {
+		const char *error;
+		char *str;
+		int erroffset;
+		json_object *obj;
+
+		obj = json_object_array_get_idx(klog_table, i);
+		if (JSON_ERROR(obj)) {
+			fprintf(stderr, "Cannot fetch %d item from table %s.\n", i, table);
+			exit(EXIT_FAILURE);
+		}
+
+		str = (char*)json_object_get_string(json_object_object_get(obj, "compare_mode"));
+		if (JSON_ERROR(str)) {
+			fprintf(stderr, "Cannot fetch compare_mode  object, item %d from table %s.\n", i, table);
+			exit(EXIT_FAILURE);
+		}
+		patterns[i].cm = klog_compare_mode_str_to_val(str);
+
+		str = (char*)json_object_get_string(json_object_object_get(obj, "pattern"));
+		if (JSON_ERROR(str)) {
+			fprintf(stderr, "Cannot fetch pattern object, item %d from table %s.\n", i, table);
+			exit(EXIT_FAILURE);
+		}
+		patterns[i].pattern = strdup(str);
+		if (patterns[i].pattern == NULL) {
+			fprintf(stderr, "Failed to strdup regex pattern %d from table %s.\n", i, table);
+			exit(EXIT_FAILURE);
+		}
+
+		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
+			fprintf(stderr, "Regex %s failed to compile: %s.\n", patterns[i].pattern, error);
+			patterns[i].re = NULL;
+		} else {
+			patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
+			if (error != NULL) {
+				fprintf(stderr, "Regex %s failed to optimize: %s.\n", patterns[i].pattern, error);
+				patterns[i].re = NULL;
+			}
+		}
+	}
+
+	/* Discard the json table now we've parsed it into patterns */
+	json_object_put(klog_objs);
+
+	return patterns;
+}
+
+/*
+ *  Does str match any of the patterns in the klog pattern table
+ */
+static bool klog_find(char *str, klog_pattern *patterns)
+{
+	int i;
+
+	for (i = 0; patterns[i].pattern; i++) {
+		if (patterns[i].cm == COMPARE_STRING) {
+			if (strstr(str, patterns[i].pattern)) {
+				return true;
+			}
+		}
+		if (patterns[i].cm == COMPARE_REGEX) {
+			int vector[1];
+			if (pcre_exec(patterns[i].re, patterns[i].extra, str, strlen(str), 0, 0, vector, 1) == 0) {
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/*
+ *  Free the klog patterns
+ */
+static void klog_free(klog_pattern *patterns)
+{
+	int i;
+
+	for (i = 0; patterns[i].pattern; i++) {
+		pcre_free(patterns[i].re);
+		pcre_free(patterns[i].extra);
+		free(patterns[i].pattern);
+	}
+	free(patterns);
+}
+
+/*
+ *  Parse C comments and just throw them away
+ */
+static int skip_comments(parser *p)
+{
+	int ch;
+	int nextch;
+
+	nextch = get_next(p);
+	if (nextch == EOF)
+		return EOF;
+
+	if (nextch == '/') {
+		do {
+			ch = get_next(p);
+			if (ch == EOF)
+				return EOF;
+		}
+		while (ch != '\n');
+
+		return PARSER_COMMENT_FOUND;
+	}
+
+	if (nextch == '*') {
+		for (;;) {
+			ch = get_next(p);
+			if (ch == EOF)
+				return EOF;
+
+			if (ch == '*') {
+				ch = get_next(p);
+				if (ch == EOF)
+					return EOF;
+
+				if (ch == '/')
+					return PARSER_COMMENT_FOUND;
+			}
+		}
+	}
+
+	/* Not a comment, push back */
+	unget_next(p, nextch);
+
+	return PARSER_OK;
+}
+
+/*
+ *  Parse an integer.  This is fairly minimal as the
+ *  kernel doesn't have floats or doubles, so we
+ *  can just parse decimal, octal or hex values.
+ */
+static int parse_number(parser *p, token *t, int ch)
+{
+	int nextch1, nextch2;
+	bool ishex = false;
+	bool isoct = false;
+
+	/*
+	 *  Crude way to detect the kind of integer
+	 */
+	if (ch == '0') {
+		token_append(t, ch);
+
+		nextch1 = get_next(p);
+		if (nextch1 == EOF) {
+			token_append(t, ch);
+			return PARSER_OK;
+		}
+
+		if (nextch1 >= '0' && nextch1 <= '8') {
+			/* Must be an octal value */
+			ch = nextch1;
+			isoct = true;
+		} else if (nextch1 == 'x' || nextch1 == 'X') {
+			/* Is it hexadecimal? */
+			nextch2 = get_next(p);
+			if (nextch2 == EOF) {
+				unget_next(p, nextch1);
+				return PARSER_OK;
+			}
+
+			if (isxdigit(nextch2)) {
+				/* Hexadecimal */
+				token_append(t, nextch1);
+				ch = nextch2;
+				ishex = true;
+			} else {
+				/* Nope */
+				unget_next(p, nextch2);
+				unget_next(p, nextch1);
+				return PARSER_OK;
+			}
+		} else {
+			unget_next(p, nextch1);
+			return PARSER_OK;
+		}
+	}
+
+	/*
+	 * OK, we now know what type of integer we
+	 * are processing, so just gather up the digits
+	 */
+	token_append(t, ch);
+
+	for (;;) {
+		ch = get_next(p);
+
+		if (ch == EOF) {
+			unget_next(p, ch);
+			return PARSER_OK;
+		}
+
+		if (ishex) {
+			if (isxdigit(ch)) {
+				token_append(t, ch);
+			} else {
+				unget_next(p, ch);
+				return PARSER_OK;
+			}
+		} else if (isoct) {
+			if (ch >= '0' && ch <= '8') {
+				token_append(t, ch);
+			} else {
+				unget_next(p, ch);
+				return PARSER_OK;
+			}
+		} else {
+			if (isdigit(ch)) {
+				token_append(t, ch);
+			} else {
+				unget_next(p, ch);
+				return PARSER_OK;
+			}
+		}
+	}
+}
+
+/*
+ *  Parse identifiers
+ */
+static int parse_identifier(parser *p, token *t, int ch)
+{
+	token_append(t, ch);
+
+	t->type = TOKEN_IDENTIFIER;
+
+	for (;;) {
+		ch = get_next(p);
+		if (ch == EOF) {
+			break;
+		}
+		if (isalnum(ch) || ch == '_') {
+			token_append(t, ch);
+		} else {
+			unget_next(p, ch);
+			break;
+		}
+	}
+
+	return PARSER_OK;
+}
+
+/*
+ *  Parse literal strings
+ */
+static int parse_literal(parser *p, token *t, int literal, token_type type)
+{
+	bool escaped = false;
+	int ch;
+
+	t->type = type;
+
+	token_append(t, literal);
+
+	for (;;) {
+		ch = get_next(p);
+		if (ch == EOF) {
+			return PARSER_OK;
+		}
+
+		if (ch == '\\') {
+			escaped = true;
+			token_append(t, ch);
+			continue;
+		}
+
+		if (!escaped && ch == literal) {
+			token_append(t, ch);
+			return PARSER_OK;
+		}
+		escaped = false;
+
+		token_append(t, ch);
+	}
+
+	return PARSER_OK;
+}
+
+/*
+ *  Parse operators such as +, - which can
+ *  be + or ++ forms.
+ */
+static int parse_op(parser *p, token *t, int op)
+{
+	int ch;
+
+	token_append(t, op);
+
+	ch = get_next(p);
+	if (ch == EOF) {
+		return PARSER_OK;
+	}
+
+	if (ch == op) {
+		token_append(t, op);
+		return PARSER_OK;
+	}
+
+	unget_next(p, ch);
+	return PARSER_OK;
+}
+
+/*
+ *  Parse -, --, ->
+ */
+static int parse_minus(parser *p, token *t, int op)
+{
+	int ch;
+
+	token_append(t, op);
+
+	ch = get_next(p);
+	if (ch == EOF) {
+		return PARSER_OK;
+	}
+
+	if (ch == op) {
+		token_append(t, ch);
+		return PARSER_OK;
+	}
+
+	if (ch == '>') {
+		token_append(t, ch);
+		t->type = TOKEN_ARROW;
+		return PARSER_OK;
+	}
+
+	unget_next(p, ch);
+	return PARSER_OK;
+}
+
+/*
+ *  Gather a token from input stream
+ */
+static int get_token(parser *p, token *t)
+{
+	int ch;
+	int ret;
+
+	for (;;) {
+		ch = get_next(p);
+
+		switch (ch) {
+		case EOF:
+			return EOF;
+
+		/* Skip comments */
+		case '/':
+			ret = skip_comments(p);
+			if (ret == EOF)
+				return EOF;
+			if (ret == PARSER_COMMENT_FOUND)
+				continue;
+			token_append(t, ch);
+			return PARSER_OK;
+		case '#':
+			token_append(t, ch);
+			t->type = TOKEN_CPP;
+			return PARSER_OK;
+		case ' ':
+		case '\t':
+		case '\r':
+		case '\n':
+		case '\\':
+			if (p->skip_white_space)
+				continue;
+			else {
+				token_append(t, ch);
+				t->type = TOKEN_WHITE_SPACE;
+				return PARSER_OK;
+			}
+		case '(':
+			token_append(t, ch);
+			t->type = TOKEN_PAREN_OPENED;
+			return PARSER_OK;
+		case ')':
+			token_append(t, ch);
+			t->type = TOKEN_PAREN_CLOSED;
+			return PARSER_OK;
+		case '<':
+			token_append(t, ch);
+			t->type = TOKEN_LESS_THAN;
+			return PARSER_OK;
+		case '>':
+			token_append(t, ch);
+			t->type = TOKEN_GREATER_THAN;
+			return PARSER_OK;
+		case ',':
+			token_append(t, ch);
+			t->type = TOKEN_COMMA;
+			return PARSER_OK;
+		case ';':
+			token_append(t, ch);
+			t->type = TOKEN_TERMINAL;
+			return PARSER_OK;
+		case '{':
+		case '}':
+		case ':':
+		case '~':
+		case '?':
+		case '*':
+		case '%':
+		case '!':
+		case '.':
+			token_append(t, ch);
+			return PARSER_OK;
+		case '0'...'9':
+			return parse_number(p, t, ch);
+			break;
+		case 'a'...'z':
+		case 'A'...'Z':
+			return parse_identifier(p, t, ch);
+			break;
+		case '"':
+			return parse_literal(p, t, ch, TOKEN_LITERAL_STRING);
+		case '\'':
+			return parse_literal(p, t, ch, TOKEN_LITERAL_CHAR);
+		case '+':
+		case '=':
+		case '|':
+		case '&':
+			return parse_op(p, t, ch);
+		case '-':
+			return parse_minus(p, t, ch);
+		}
+	}
+
+	return PARSER_OK;
+}
+
+/*
+ *  Literals such as "foo" and 'f' sometimes
+ *  need the quotes stripping off.
+ */
+static void literal_strip_quotes(token *t)
+{
+	size_t len = strlen(t->token);
+
+	t->token[len-1] = 0;
+
+	strcpy(t->token, t->token + 1);
+}
+
+/*
+ *  Concatenate new string onto old. The old
+ *  string can be NULL or an existing string
+ *  on the heap.  This returns the newly
+ *  concatenated string.
+ */
+static char *strdupcat(char *old, char *new)
+{
+	size_t len = strlen(new);
+	char *tmp;
+
+	if (old == NULL) {
+		tmp = malloc(len + 1);
+		if (tmp == NULL) {
+			fprintf(stderr, "strdupcat(): Out of memory.\n");
+			exit(EXIT_FAILURE);
+		}
+		strcpy(tmp, new);
+	} else {
+		size_t oldlen = strlen(old);
+		tmp = realloc(old, oldlen + len + 1);
+		if (tmp == NULL) {
+			fprintf(stderr, "strdupcat(): Out of memory.\n");
+			exit(EXIT_FAILURE);
+		}
+		strcat(tmp, new);
+	}
+
+	return tmp;
+}
+
+/*
+ *  Parse a kernel message, like printk() or dev_err()
+ */
+static int parse_kernel_message(parser *p, token *t)
+{
+	int ret;
+	bool got_string = false;
+	bool emit = false;
+	bool found = false;
+	token_type prev_token_type = TOKEN_UNKNOWN;
+	char *str = NULL;
+	char *line = NULL;
+	bool printk;
+
+	printk = (strcmp(t->token, "printk") == 0);
+
+	if (strcmp(t->token, "dev_err") == 0) {
+		emit = true;
+		line = strdupcat(line, "dev_err");
+	}
+	token_clear(t);
+
+	for (;;) {
+		ret = get_token(p, t);
+		if (ret == EOF) {
+			return EOF;
+		}
+
+		/*
+		 *  Hit ; so lets push out what we've parsed
+		 */
+		if (t->type == TOKEN_TERMINAL) {
+			if (emit) {
+				if (found) {
+					printf("OK : %s\n", line);
+				} else {
+					printf("ADD: %s\n", line);
+				}
+				free(line);
+			}
+			return PARSER_OK;
+		}
+
+		/*
+		 *  We are only interested in KERN_ERR
+		 *  printk messages
+		 */
+		if (printk &&
+		    (t->type == TOKEN_IDENTIFIER) &&
+		    (prev_token_type == TOKEN_PAREN_OPENED) &&
+		    (strcmp(t->token, "KERN_ERR") == 0)) {
+			line = strdupcat(line, "printk( ");
+		}
+
+		if (t->type == TOKEN_LITERAL_STRING) {
+			literal_strip_quotes(t);
+			str = strdupcat(str, t->token);
+
+			if (!got_string)
+				line = strdupcat(line, "\"");
+
+			got_string = true;
+		} else {
+			if (got_string)
+				line = strdupcat(line, "\"");
+
+			got_string = false;
+
+			if (str) {
+				found |= klog_find(str, patterns);
+				free(str);
+				str = NULL;
+			}
+		}
+
+		line = strdupcat(line, t->token);
+		if (t->type == TOKEN_COMMA)
+			line = strdupcat(line, " ");
+
+		prev_token_type = t->type;
+
+		token_clear(t);
+	}
+}
+
+/*
+ *  Parse input looking for printk or dev_err calls
+ */
+static void parse_kernel_messages(FILE *fp)
+{
+	token t;
+	parser p;
+
+	parser_new(&p, fp, true);
+	p.fp = fp;
+	p.skip_white_space = true;
+
+	token_new(&t);
+
+	while ((get_token(&p, &t)) != EOF) {
+		if ((strcmp(t.token, "printk") == 0) ||
+		    (strcmp(t.token, "dev_err") == 0)) {
+			parse_kernel_message(&p, &t);
+		} else
+			token_clear(&t);
+	}
+
+	token_free(&t);
+}
+
+/*
+ *  This is evil.  We parse the input stream
+ *  and throw away all #includes so we don't get
+ *  gcc -E breaking on include files that we haven't
+ *  got.  We don't really care at this level about
+ *  macros being expanded as we want to see tokens
+ *  such as KERN_ERR later on.
+ */
+static int parse_cpp_include(parser *p, token *t)
+{
+	/*
+	 *  Gloop up #include "foo.h"
+	 */
+	do {
+		token_clear(t);
+		if (get_token(p, t) == EOF)
+			return EOF;
+		/* End of line, we're done! */
+		if (strcmp(t->token, "\n") == 0)
+			return PARSER_OK;
+	} while (t->type == TOKEN_WHITE_SPACE);
+
+
+	/*
+	 *  Ah, we gobbled up white spaces and
+	 *  now we should be at a '<' token
+	 *  Parse #include <something/foo.h>
+	 */
+	if (t->type == TOKEN_LESS_THAN) {
+		do {
+			if (get_token(p, t) == EOF)
+				return EOF;
+		} while (t->type != TOKEN_GREATER_THAN);
+	}
+
+	token_clear(t);
+
+	return PARSER_OK;
+}
+
+/*
+ *  CPP phase, find and remove #includes
+ */
+static int parse_cpp_includes(FILE *fp)
+{
+	token t;
+	parser p;
+
+	parser_new(&p, fp, false);
+	p.fp = fp;
+	p.skip_white_space = false;
+
+	token_new(&t);
+
+	while ((get_token(&p, &t)) != EOF) {
+		if (t.type == TOKEN_CPP) {
+			for (;;) {
+				token_clear(&t);
+				if (get_token(&p, &t) == EOF)
+					return EOF;
+				if (strcmp(t.token, "\n") == 0)
+					break;
+				if (t.type == TOKEN_WHITE_SPACE) {
+					continue;
+				}
+				if (strcmp(t.token, "include") == 0) {
+					if (parse_cpp_include(&p, &t) == EOF)
+						return EOF;
+					break;
+				}
+				printf("#%s", t.token);
+				break;
+			}
+		} else {
+			printf("%s", t.token);
+		}
+		token_clear(&t);
+	}
+	return EOF;
+}
+
+/*
+ *  Scan kernel source for printk KERN_ERR and dev_err
+ *  calls.
+ *
+ *  Usage:
+ *  	cat drivers/pnp/pnpacpi/rsparser.c | kernel_scan -E | gcc  -E - | kernel_scan -P
+ *
+ *  This prints out any kernel printk KERN_ERR calls
+ *  or dev_err calls and checks to see if the error can be matched by
+ *  any of the fwts klog messages.  It has some intelligence, it glues
+ *  literal strings together such as "this is" "a message" into
+ *  "this is a message" before it makes the klog comparison.
+ */
+int main(int argc, char **argv)
+{
+	if (argc < 2) {
+		fprintf(stderr, "%s: [-E] [-P]\n", argv[0]);
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 *  GCC -E preprocess phase
+	 */
+	if (strcmp(argv[1], "-E") == 0) {
+		parse_cpp_includes(stdin);
+		exit(EXIT_SUCCESS);
+	}
+
+	/*
+	 *  Parse kernel printk and dev_err phase
+	 */
+	if (strcmp(argv[1], "-P") == 0) {
+		patterns = klog_load("firmware_error_warning_patterns");
+		parse_kernel_messages(stdin);
+		klog_free(patterns);
+	}
+
+	exit(EXIT_SUCCESS);
+}