From patchwork Tue Aug 14 16:27:23 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael Roth X-Patchwork-Id: 177398 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id D0C9E2C008F for ; Wed, 15 Aug 2012 03:50:01 +1000 (EST) Received: from localhost ([::1]:38814 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1T1Jzx-0002c4-HP for incoming@patchwork.ozlabs.org; Tue, 14 Aug 2012 12:29:41 -0400 Received: from eggs.gnu.org ([208.118.235.92]:44014) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1T1Jz4-00017Q-J3 for qemu-devel@nongnu.org; Tue, 14 Aug 2012 12:28:50 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1T1Jz2-0006UZ-7X for qemu-devel@nongnu.org; Tue, 14 Aug 2012 12:28:46 -0400 Received: from mail-qa0-f45.google.com ([209.85.216.45]:42979) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1T1Jz2-0006UT-2F for qemu-devel@nongnu.org; Tue, 14 Aug 2012 12:28:44 -0400 Received: by qadc10 with SMTP id c10so3072511qad.4 for ; Tue, 14 Aug 2012 09:28:43 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:from:to:cc:subject:date:message-id:x-mailer:in-reply-to :references; bh=HVnIBttwZTLNDoWZ+vW9arKjTQxo+ryovJl9+4URdZs=; b=TC3J3JCFRS73ouKg88TE469uXQM6oN38FWQAQhmK+C0rabjagVw1lYcfm6Zl8KSPLj wgztHKboG10MriCmPKasxHaT3m5bhV53fpHJIfE4QHMrlCAAz8Q+JULL2uqZ9nWcStjv 6wcJuP8up1afpkYfdns1BKzLIEJMLGihYlqh4Q7hC7XFJBdoDaO1L6yn8H4zQRyAN4AC qrGHpPPmArMShYnLodb9817q5yokDqBoqIcZU1+hXkxcAmfRx6acMKF6rQVGQjFvwFDg Ju7e6fqsqjIDmepT1GPihwJ50Idav5Utf8tYAP5/s8pXtCVlEHbSeFf1nvezJlnZ0HN4 nicw== Received: by 10.50.160.168 with SMTP id xl8mr12616261igb.25.1344961722854; Tue, 14 Aug 2012 09:28:42 -0700 (PDT) Received: from loki.morrigu.org (cpe-72-179-62-111.austin.res.rr.com. [72.179.62.111]) by mx.google.com with ESMTPS id ul4sm11903203igb.15.2012.08.14.09.28.41 (version=TLSv1/SSLv3 cipher=OTHER); Tue, 14 Aug 2012 09:28:42 -0700 (PDT) From: Michael Roth To: qemu-devel@nongnu.org Date: Tue, 14 Aug 2012 11:27:23 -0500 Message-Id: <1344961646-21194-18-git-send-email-mdroth@linux.vnet.ibm.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1344961646-21194-1-git-send-email-mdroth@linux.vnet.ibm.com> References: <1344961646-21194-1-git-send-email-mdroth@linux.vnet.ibm.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 209.85.216.45 Cc: blauwirbel@gmail.com, aliguori@us.ibm.com Subject: [Qemu-devel] [PATCH 17/20] qidl: parser, initial import from qc.git X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Signed-off-by: Michael Roth --- scripts/qidl_parser.py | 512 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 512 insertions(+) create mode 100644 scripts/qidl_parser.py diff --git a/scripts/qidl_parser.py b/scripts/qidl_parser.py new file mode 100644 index 0000000..831b3f5 --- /dev/null +++ b/scripts/qidl_parser.py @@ -0,0 +1,512 @@ +# +# QEMU IDL Parser +# +# Copyright IBM, Corp. 2012 +# +# Authors: +# Anthony Liguori +# Michael Roth +# +# This work is licensed under the terms of the GNU GPLv2. +# See the COPYING file in the top-level directory. +# +# The lexer code is based off of: +# http://www.lysator.liu.se/c/ANSI-C-grammar-l.html + +import sys, json + +class Input(object): + def __init__(self, text): + self.fp = text + self.buf = text + self.eof = False + + def pop(self): + if len(self.buf) == 0: + self.eof = True + return '' + ch = self.buf[0] + self.buf = self.buf[1:] + return ch + +def in_range(ch, start, end): + if ch >= start and ch <= end: + return True + return False + +# D [0-9] +# L [a-zA-Z_] +# H [a-fA-F0-9] +# E [Ee][+-]?{D}+ +# FS (f|F|l|L) +# IS (u|U|l|L)* + +def is_D(ch): + return in_range(ch, '0', '9') + +def is_L(ch): + return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z') or ch == '_' + +def is_H(ch): + return in_range(ch, 'a', 'f') or in_range(ch, 'A', 'F') or is_D(ch) + +def is_FS(ch): + return ch in 'fFlL' + +def is_IS(ch): + return ch in 'uUlL' + +def lexer(fp): + ch = fp.pop() + + while not fp.eof: + token = '' + + if is_L(ch): + token += ch + + ch = fp.pop() + while is_L(ch) or is_D(ch): + token += ch + ch = fp.pop() + if token in [ 'auto', 'break', 'case', 'const', 'continue', + 'default', 'do', 'else', 'enum', 'extern', + 'for', 'goto', 'if', 'register', 'return', 'signed', + 'sizeof', + 'static', 'struct', 'typedef', 'union', 'unsigned', + 'volatile', 'while' ]: + yield (token, token) + else: + yield ('symbol', token) + elif ch == "'": + token += ch + + ch = fp.pop() + if ch == '\\': + token += ch + token += fp.pop() + else: + token += ch + token += fp.pop() + ch = fp.pop() + yield ('literal', token) + elif ch == '"': + token += ch + + ch = fp.pop() + while ch not in ['', '"']: + token += ch + if ch == '\\': + token += fp.pop() + ch = fp.pop() + token += ch + yield ('literal', token) + ch = fp.pop() + elif ch in '.><+-*/%&^|!;{},:=()[]~?': + token += ch + ch = fp.pop() + tmp_token = token + ch + if tmp_token in ['<:']: + yield ('operator', '[') + ch = fp.pop() + elif tmp_token in [':>']: + yield ('operator', ']') + ch = fp.pop() + elif tmp_token in ['<%']: + yield ('operator', '{') + ch = fp.pop() + elif tmp_token in ['%>']: + yield ('operator', '}') + ch = fp.pop() + elif tmp_token == '//': + token = tmp_token + ch = fp.pop() + while ch != '\n' and ch != '': + token += ch + ch = fp.pop() + yield ('comment', token) + elif tmp_token == '/*': + token = tmp_token + + ch = fp.pop() + while True: + while ch != '*': + token += ch + ch = fp.pop() + token += ch + ch = fp.pop() + if ch == '/': + token += ch + break + yield ('comment', token) + ch = fp.pop() + elif tmp_token in [ '+=', '-=', '*=', '/=', '%=', '&=', '^=', + '|=', '>>', '<<', '++', '--', '->', '&&', + '||', '<=', '>=', '==', '!=' ]: + yield ('operator', tmp_token) + ch = fp.pop() + else: + yield ('operator', token) + elif ch == '0': + token += ch + ch = fp.pop() + if ch in 'xX': + token += ch + ch = fp.pop() + while is_H(ch): + token += ch + ch = fp.pop() + while is_IS(ch): + token += ch + ch = fp.pop() + elif is_D(ch): + token += ch + ch = fp.pop() + while is_D(ch): + token += ch + ch = fp.pop() + yield ('literal', token) + elif is_D(ch): + token += ch + ch = fp.pop() + while is_D(ch): + token += ch + ch = fp.pop() + yield ('literal', token) + elif ch in ' \t\v\n\f': + token += ch + ch = fp.pop() + while len(ch) and ch in ' \t\v\n\f': + token += ch + ch = fp.pop() + yield ('whitespace', token) + elif ch in '#': + token += ch + ch = fp.pop() + while len(ch) and ch != '\n': + token += ch + ch = fp.pop() + yield ('directive', token) + else: + yield ('unknown', ch) + ch = fp.pop() + +class LookAhead(object): + def __init__(self, container): + self.i = container.__iter__() + self.la = [] + self.full = False + + def at(self, i): + if i >= len(self.la): + if self.full: + raise StopIteration() + else: + try: + self.la.append(self.i.next()) + except StopIteration, e: + self.full = True + raise StopIteration() + + return self.la[i] + + def eof(self): + try: + self.at(len(self.la)) + except StopIteration, e: + return True + + return False + +def skip(c): + for token, value in c: + if token in ['whitespace', 'comment', 'directive']: + continue + yield (token, value) + +def expect(la, index, first, second=None): + if la.at(index)[0] != first: + raise Exception("expected '%s', got %s %s" % (first, la.at(index)[0], la.at(index)[1])) + if second != None: + if la.at(index)[1] != second: + raise Exception("expected '%s', got %s" % (second, la.at(index)[1])) + return index + 1, la.at(index)[1] + +def choice(la, index, first, second=None): + if la.at(index)[0] != first: + return False + if second != None: + if la.at(index)[1] != second: + return False + return True + +def process_marker(ret, params): + marker_type = params[0] + if marker_type == "derived": + ret['is_derived'] = True + elif marker_type == 'immutable': + ret['is_immutable'] = True + elif marker_type == 'elsewhere': + ret['is_elsewhere'] = True + elif marker_type == 'broken': + ret['is_broken'] = True + elif marker_type == 'size_is': + ret['is_array'] = True + ret['array_size'] = params[1] + elif marker_type == 'optional': + ret['is_optional'] = True + elif marker_type == 'property': + ret['is_property'] = True + if ret.has_key('property_fields'): + ret['property_fields'].append(params[1:]) + else: + ret['property_fields'] = [params[1:]] + + return ret + +def parse_markers(la, index, ret): + next = index + + while choice(la, next, 'symbol', 'QIDL'): + params = [] + next += 1 + + next, _ = expect(la, next, 'operator', '(') + open_parens = 1 + param = "" + while open_parens: + if choice(la, next, 'operator', ','): + params.append(param) + param = "" + next += 1 + continue + + if choice(la, next, 'operator', '('): + open_parens += 1 + elif choice(la, next, 'operator', ')'): + open_parens -= 1 + + if open_parens > 0: + param += la.at(next)[1] + next += 1 + if param != "": + params.append(param) + + ret = process_marker(ret, params) + + return (next - index), ret + +def parse_type(la, index): + next = index + ret = {} + + typename = '' + if choice(la, next, 'const', 'const'): + typename += 'const ' + next += 1 + + if choice(la, next, 'struct', 'struct'): + typename += 'struct ' + next += 1 + + if choice(la, next, 'unsigned', 'unsigned'): + typename += 'unsigned ' + next += 1 + + if choice(la, next, 'union', 'union'): + typename += 'union ' + next += 1 + + if choice(la, next, 'enum', 'enum'): + typename += 'enum ' + next += 1 + + # we don't currently handle embedded struct declarations, skip them for now + if choice(la, next, 'operator', '{'): + open_braces = 1 + while open_braces: + next += 1 + if choice(la, next, 'operator', '{'): + open_braces += 1 + elif choice(la, next, 'operator', '}'): + open_braces -= 1 + next += 1 + typename += "" + ret['is_nested_decl'] = True + else: + if choice(la, next, 'operator', '*'): + next += 1 + ret['is_pointer'] = True + else: + next, rest = expect(la, next, 'symbol') + typename += rest + + ret['type'] = typename + + off, ret = parse_markers(la, next, ret) + next += off + + if choice(la, next, 'operator', '*'): + next += 1 + ret['is_pointer'] = True + + return (next - index), ret + +def parse_var_decl(la, index, repeating_type=None): + next = index + + if repeating_type == None: + off, ret = parse_type(la, next) + next += off + else: + ret = { 'type': repeating_type } + + if choice(la, next, 'operator', '('): + ret['is_function'] = True + next += 1 + next, _ = expect(la, next, 'operator', '*') + next, variable = expect(la, next, 'symbol') + next, _ = expect(la, next, 'operator', ')') + + # skip the param list since we don't use it currently + next, _ = expect(la, next, 'operator', '(') + open_parens = 1 + while open_parens: + if choice(la, next, 'operator', '('): + open_parens += 1 + elif choice(la, next, 'operator', ')'): + open_parens -= 1 + next += 1 + else: + next, variable = expect(la, next, 'symbol') + ret['variable'] = variable + + if choice(la, next, 'operator', '['): + next += 1 + expression = "" + while not choice(la, next, 'operator', ']'): + expression += la.at(next)[1] + next += 1 + next, _ = expect(la, next, 'operator', ']') + + if not ret.has_key('is_array'): + ret['is_array'] = True + ret['array_size'] = expression + else: + ret['array_capacity'] = expression + + off, ret = parse_markers(la, next, ret) + next += off + + return (next - index), ret + +def parse_struct(la, index): + next = index + + next, _ = expect(la, next, 'struct', 'struct') + + name = None + if choice(la, next, 'symbol'): + name = la.at(next)[1] + next += 1 + + next, _ = expect(la, next, 'operator', '{') + + nodes = [] + + while not choice(la, next, 'operator', '}'): + offset, node = parse_var_decl(la, next) + next += offset + nodes.append(node) + while choice(la, next, 'operator', ','): + next += 1 + offset, node = parse_var_decl(la, next, node['type']) + next += offset + nodes.append(node) + + next, _ = expect(la, next, 'operator', ';') + + next += 1 + + return (next - index), { 'struct': name, 'fields': nodes } + +def parse_typedef(la, index): + next = index + + next, _ = expect(la, next, 'typedef', 'typedef') + + offset, node = parse_struct(la, next) + next += offset + + next, typename = expect(la, next, 'symbol') + + return (next - index), { 'typedef': typename, 'type': node } + +def parse(la, index=0): + next = index + + if choice(la, next, 'typedef'): + offset, node = parse_typedef(la, next) + elif choice(la, next, 'struct'): + offset, node = parse_struct(la, next) + else: + raise Exception("unsupported QIDL declaration") + + next, _ = expect(la, next + offset, 'operator', ';') + + return (next - index), node + +def process_declaration_params(params, declaration={}): + declaration['id'] = params[0] + declaration['do_state'] = False + declaration['do_properties'] = False + if "state" in params: + declaration['do_state'] = True + if "properties" in params: + declaration['do_properties'] = True + return declaration + +def get_declaration_params(line): + params = [] + for param in line.split("(")[1][:-2].split(","): + params.append(param.strip()) + return params + +def get_declarations(f): + in_declaration = False + declaration = {} + while True: + line = f.readline() + if line == '': + raise StopIteration() + elif line.startswith("QIDL_START("): + params = get_declaration_params(line) + declaration = process_declaration_params(params, declaration) + declaration['text'] = "" + in_declaration = True + elif line.startswith("QIDL_END("): + params = get_declaration_params(line) + if declaration['id'] != params[0]: + raise Exception("unterminated QIDL declaration: %s" % declaration['id']) + in_declaration = False + yield declaration + elif in_declaration: + declaration['text'] += line + +def parse_file(f): + nodes = [] + for declaration in get_declarations(f): + la = LookAhead(skip(lexer(Input(declaration['text'])))) + _, node = parse(la) + node['id'] = declaration['id'] + node['do_state'] = declaration['do_state'] + node['do_properties'] = declaration['do_properties'] + nodes.append(node) + return nodes + +def main(): + nodes = parse_file(sys.stdin) + print json.dumps(nodes, sort_keys=True, indent=2) + +if __name__ == '__main__': + main()