Message ID | ca351e133230d46d36fa0bc84a7c740341fc6d17.1662385087.git.fweimer@redhat.com |
---|---|
State | New |
Headers | show |
Series | Parse <elf.h> in the glibcelf Python module | expand |
On 2022-09-05 09:44, Florian Weimer via Libc-alpha wrote: > --- > scripts/glibcpp.py | 317 +++++++++++++++++++++++++++++++++++++++++ > support/Makefile | 10 +- > support/tst-glibcpp.py | 217 ++++++++++++++++++++++++++++ > 3 files changed, 542 insertions(+), 2 deletions(-) > create mode 100644 support/tst-glibcpp.py OK except for a minor nit at the end; Copyright year for the new test file should be just 2022. Thanks, Sid > > diff --git a/scripts/glibcpp.py b/scripts/glibcpp.py > index b44c6a4392..455459a609 100644 > --- a/scripts/glibcpp.py > +++ b/scripts/glibcpp.py > @@ -33,7 +33,9 @@ Accepts non-ASCII characters only within comments and strings. > """ > > import collections > +import operator > import re > +import sys > > # Caution: The order of the outermost alternation matters. > # STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST, > @@ -210,3 +212,318 @@ def tokenize_c(file_contents, reporter): > > yield tok > pos = mo.end() > + > +class MacroDefinition(collections.namedtuple('MacroDefinition', > + 'name_token args body error')): > + """A preprocessor macro definition. > + > + name_token is the Token_ for the name. > + > + args is None for a macro that is not function-like. Otherwise, it > + is a tuple that contains the macro argument name tokens. > + > + body is a tuple that contains the tokens that constitue the body > + of the macro definition (excluding whitespace). > + > + error is None if no error was detected, or otherwise a problem > + description associated with this macro definition. > + > + """ > + > + @property > + def function(self): > + """Return true if the macro is function-like.""" > + return self.args is not None > + > + @property > + def name(self): > + """Return the name of the macro being defined.""" > + return self.name_token.text > + > + @property > + def line(self): > + """Return the line number of the macro defintion.""" > + return self.name_token.line > + > + @property > + def args_lowered(self): > + """Return the macro argument list as a list of strings""" > + if self.function: > + return [token.text for token in self.args] > + else: > + return None > + > + @property > + def body_lowered(self): > + """Return the macro body as a list of strings.""" > + return [token.text for token in self.body] OK. > + > +def macro_definitions(tokens): > + """A generator for C macro definitions among tokens. > + > + The generator yields MacroDefinition objects. > + > + tokens must be iterable, yielding Token_ objects. > + > + """ > + > + macro_name = None > + macro_start = False # Set to false after macro name and one otken. > + macro_args = None # Set to a list during the macro argument sequence. > + in_macro_args = False # True while processing macro identifier-list. > + error = None > + body = [] > + > + for token in tokens: > + if token.context == 'define' and macro_name is None \ > + and token.kind == 'IDENT': > + # Starting up macro processing. > + if macro_start: > + # First identifier is the macro name. > + macro_name = token > + else: > + # Next token is the name. > + macro_start = True > + continue > + > + if macro_name is None: > + # Drop tokens not in macro definitions. > + continue > + > + if token.context != 'define': > + # End of the macro definition. > + if in_macro_args and error is None: > + error = 'macro definition ends in macro argument list' > + yield MacroDefinition(macro_name, macro_args, tuple(body), error) > + # No longer in a macro definition. > + macro_name = None > + macro_start = False > + macro_args = None > + in_macro_args = False > + error = None > + body.clear() > + continue > + > + if macro_start: > + # First token after the macro name. > + macro_start = False > + if token.kind == 'PUNCTUATOR' and token.text == '(': > + macro_args = [] > + in_macro_args = True > + continue > + > + if in_macro_args: > + if token.kind == 'IDENT' \ > + or (token.kind == 'PUNCTUATOR' and token.text == '...'): > + # Macro argument or ... placeholder. > + macro_args.append(token) > + if token.kind == 'PUNCTUATOR': > + if token.text == ')': > + macro_args = tuple(macro_args) > + in_macro_args = False > + elif token.text == ',': > + pass # Skip. Not a full syntax check. > + elif error is None: > + error = 'invalid punctuator in macro argument list: ' \ > + + repr(token.text) > + elif error is None: > + error = 'invalid {} token in macro argument list'.format( > + token.kind) > + continue > + > + if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'): > + body.append(token) > + > + # Emit the macro in case the last line does not end with a newline. > + if macro_name is not None: > + if in_macro_args and error is None: > + error = 'macro definition ends in macro argument list' > + yield MacroDefinition(macro_name, macro_args, tuple(body), error) OK. > + > +# Used to split UL etc. suffixes from numbers such as 123UL. > +RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)') This will match 15LLU as well, but I suppose we could assume valid C input for this parser. > + > +BINARY_OPERATORS = { > + '+': operator.add, > + '<<': operator.lshift, > +} We only need these for now. OK. > + > +# Use the general-purpose dict type if it is order-preserving. > +if (sys.version_info[0], sys.version_info[1]) <= (3, 6): > + OrderedDict = collections.OrderedDict > +else: > + OrderedDict = dict > + > +def macro_eval(macro_defs, reporter): > + """Compute macro values > + > + macro_defs is the output from macro_definitions. reporter is an > + object that accepts reporter.error(line_number, message) and > + reporter.note(line_number, message) calls to report errors > + and error context invocations. > + > + The returned dict contains the values of macros which are not > + function-like, pairing their names with their computed values. > + > + The current implementation is incomplete. It is deliberately not > + entirely faithful to C, even in the implemented parts. It checks > + that macro replacements follow certain syntactic rules even if > + they are never evaluated. > + > + """ > + > + # Unevaluated macro definitions by name. > + definitions = OrderedDict() > + for md in macro_defs: > + if md.name in definitions: > + reporter.error(md.line, 'macro {} redefined'.format(md.name)) > + reporter.note(definitions[md.name].line, > + 'location of previous definition') > + else: > + definitions[md.name] = md > + > + # String to value mappings for fully evaluated macros. > + evaluated = OrderedDict() > + > + # String to macro definitions during evaluation. Nice error > + # reporting relies on determinstic iteration order. > + stack = OrderedDict() > + > + def eval_token(current, token): > + """Evaluate one macro token. > + > + Integers and strings are returned as such (the latter still > + quoted). Identifiers are expanded. > + > + None indicates an empty expansion or an error. > + > + """ > + > + if token.kind == 'PP_NUMBER': > + value = None > + m = RE_SPLIT_INTEGER_SUFFIX.match(token.text) > + if m: > + try: > + value = int(m.group(1), 0) > + except ValueError: > + pass > + if value is None: > + reporter.error(token.line, > + 'invalid number {!r} in definition of {}'.format( > + token.text, current.name)) > + return value > + > + if token.kind == 'STRING': > + return token.text > + > + if token.kind == 'CHARCONST' and len(token.text) == 3: > + return ord(token.text[1]) > + > + if token.kind == 'IDENT': > + name = token.text > + result = eval1(current, name) > + if name not in evaluated: > + evaluated[name] = result > + return result > + > + reporter.error(token.line, > + 'unrecognized {!r} in definition of {}'.format( > + token.text, current.name)) > + return None > + > + > + def eval1(current, name): > + """Evaluate one name. > + > + The name is looked up and the macro definition evaluated > + recursively if necessary. The current argument is the macro > + definition being evaluated. > + > + None as a return value indicates an error. > + > + """ > + > + # Fast path if the value has already been evaluated. > + if name in evaluated: > + return evaluated[name] > + > + try: > + md = definitions[name] > + except KeyError: > + reporter.error(current.line, > + 'reference to undefined identifier {} in definition of {}' > + .format(name, current.name)) > + return None > + > + if md.name in stack: > + # Recursive macro definition. > + md = stack[name] > + reporter.error(md.line, > + 'macro definition {} refers to itself'.format(md.name)) > + for md1 in reversed(list(stack.values())): > + if md1 is md: > + break > + reporter.note(md1.line, > + 'evaluated from {}'.format(md1.name)) > + return None > + > + stack[md.name] = md > + if md.function: > + reporter.error(current.line, > + 'attempt to evaluate function-like macro {}'.format(name)) > + reporter.note(md.line, 'definition of {}'.format(md.name)) > + return None > + > + try: > + body = md.body > + if len(body) == 0: > + # Empty expansion. > + return None > + > + # Remove surrounding (). > + if body[0].text == '(' and body[-1].text == ')': > + body = body[1:-1] > + had_parens = True > + else: > + had_parens = False > + > + if len(body) == 1: > + return eval_token(md, body[0]) > + > + # Minimal expression evaluator for binary operators. > + op = body[1].text > + if len(body) == 3 and op in BINARY_OPERATORS: > + if not had_parens: > + reporter.error(body[1].line, > + 'missing parentheses around {} expression'.format(op)) > + reporter.note(md.line, > + 'in definition of macro {}'.format(md.name)) > + > + left = eval_token(md, body[0]) > + right = eval_token(md, body[2]) > + > + if type(left) != type(1): > + reporter.error(left.line, > + 'left operand of {} is not an integer'.format(op)) > + reporter.note(md.line, > + 'in definition of macro {}'.format(md.name)) > + if type(right) != type(1): > + reporter.error(left.line, > + 'right operand of {} is not an integer'.format(op)) > + reporter.note(md.line, > + 'in definition of macro {}'.format(md.name)) > + return BINARY_OPERATORS[op](left, right) > + > + reporter.error(md.line, > + 'uninterpretable macro token sequence: {}'.format( > + ' '.join(md.body_lowered))) > + return None > + finally: > + del stack[md.name] > + > + # Start of main body of macro_eval. > + for md in definitions.values(): > + name = md.name > + if name not in evaluated and not md.function: > + evaluated[name] = eval1(md, name) > + return evaluated > diff --git a/support/Makefile b/support/Makefile > index 9b50eac117..551d02941f 100644 > --- a/support/Makefile > +++ b/support/Makefile > @@ -274,12 +274,12 @@ $(objpfx)test-run-command : $(libsupport) $(common-objpfx)elf/static-stubs.o > tests = \ > README-testing \ > tst-support-namespace \ > + tst-support-open-dev-null-range \ > + tst-support-process_state \ > tst-support_blob_repeat \ > tst-support_capture_subprocess \ > tst-support_descriptors \ > tst-support_format_dns_packet \ > - tst-support-open-dev-null-range \ > - tst-support-process_state \ > tst-support_quote_blob \ > tst-support_quote_blob_wide \ > tst-support_quote_string \ > @@ -304,6 +304,12 @@ $(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \ > $(evaluate-test) > endif > > +tests-special += $(objpfx)tst-glibcpp.out > + > +$(objpfx)tst-glibcpp.out: tst-glibcpp.py $(..)scripts/glibcpp.py > + PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcpp.py > $@ 2>&1; \ > + $(evaluate-test) > + > $(objpfx)tst-support_format_dns_packet: $(common-objpfx)resolv/libresolv.so > > tst-support_capture_subprocess-ARGS = -- $(host-test-program-cmd) > diff --git a/support/tst-glibcpp.py b/support/tst-glibcpp.py > new file mode 100644 > index 0000000000..b7a7a44184 > --- /dev/null > +++ b/support/tst-glibcpp.py > @@ -0,0 +1,217 @@ > +#! /usr/bin/python3 > +# Tests for scripts/glibcpp.py > +# Copyright (C) 2019-2022 Free Software Foundation, Inc. Only 2022? > +# This file is part of the GNU C Library. > +# > +# The GNU C Library is free software; you can redistribute it and/or > +# modify it under the terms of the GNU Lesser General Public > +# License as published by the Free Software Foundation; either > +# version 2.1 of the License, or (at your option) any later version. > +# > +# The GNU C Library is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# Lesser General Public License for more details. > +# > +# You should have received a copy of the GNU Lesser General Public > +# License along with the GNU C Library; if not, see > +# <https://www.gnu.org/licenses/>. > + > +import inspect > +import sys > + > +import glibcpp > + > +# Error counter. > +errors = 0 > + > +class TokenizerErrors: > + """Used as the error reporter during tokenization.""" > + > + def __init__(self): > + self.errors = [] > + > + def error(self, token, message): > + self.errors.append((token, message)) > + > +def check_macro_definitions(source, expected): > + reporter = TokenizerErrors() > + tokens = glibcpp.tokenize_c(source, reporter) > + > + actual = [] > + for md in glibcpp.macro_definitions(tokens): > + if md.function: > + md_name = '{}({})'.format(md.name, ','.join(md.args_lowered)) > + else: > + md_name = md.name > + actual.append((md_name, md.body_lowered)) > + > + if actual != expected or reporter.errors: > + global errors > + errors += 1 > + # Obtain python source line information. > + frame = inspect.stack(2)[1] > + print('{}:{}: error: macro definition mismatch, actual definitions:' > + .format(frame[1], frame[2])) > + for md in actual: > + print('note: {} {!r}'.format(md[0], md[1])) > + > + if reporter.errors: > + for err in reporter.errors: > + print('note: tokenizer error: {}: {}'.format( > + err[0].line, err[1])) > + > +def check_macro_eval(source, expected, expected_errors=''): > + reporter = TokenizerErrors() > + tokens = list(glibcpp.tokenize_c(source, reporter)) > + > + if reporter.errors: > + # Obtain python source line information. > + frame = inspect.stack(2)[1] > + for err in reporter.errors: > + print('{}:{}: tokenizer error: {}: {}'.format( > + frame[1], frame[2], err[0].line, err[1])) > + return > + > + class EvalReporter: > + """Used as the error reporter during evaluation.""" > + > + def __init__(self): > + self.lines = [] > + > + def error(self, line, message): > + self.lines.append('{}: error: {}\n'.format(line, message)) > + > + def note(self, line, message): > + self.lines.append('{}: note: {}\n'.format(line, message)) > + > + reporter = EvalReporter() > + actual = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter) > + actual_errors = ''.join(reporter.lines) > + if actual != expected or actual_errors != expected_errors: > + global errors > + errors += 1 > + # Obtain python source line information. > + frame = inspect.stack(2)[1] > + print('{}:{}: error: macro evaluation mismatch, actual results:' > + .format(frame[1], frame[2])) > + for k, v in actual.items(): > + print(' {}: {!r}'.format(k, v)) > + for msg in reporter.lines: > + sys.stdout.write(' | ' + msg) > + > +# Individual test cases follow. > + > +check_macro_definitions('', []) > +check_macro_definitions('int main()\n{\n{\n', []) > +check_macro_definitions(""" > +#define A 1 > +#define B 2 /* ignored */ > +#define C 3 // also ignored > +#define D \ > + 4 > +#define STRING "string" > +#define FUNCLIKE(a, b) (a + b) > +#define FUNCLIKE2(a, b) (a + \ > + b) > +""", [('A', ['1']), > + ('B', ['2']), > + ('C', ['3']), > + ('D', ['4']), > + ('STRING', ['"string"']), > + ('FUNCLIKE(a,b)', list('(a+b)')), > + ('FUNCLIKE2(a,b)', list('(a+b)')), > + ]) > +check_macro_definitions('#define MACRO', [('MACRO', [])]) > +check_macro_definitions('#define MACRO\n', [('MACRO', [])]) > +check_macro_definitions('#define MACRO()', [('MACRO()', [])]) > +check_macro_definitions('#define MACRO()\n', [('MACRO()', [])]) > + > +check_macro_eval('#define A 1', {'A': 1}) > +check_macro_eval('#define A (1)', {'A': 1}) > +check_macro_eval('#define A (1 + 1)', {'A': 2}) > +check_macro_eval('#define A (1U << 31)', {'A': 1 << 31}) > +check_macro_eval('''\ > +#define A (B + 1) > +#define B 10 > +#define F(x) ignored > +#define C "not ignored" > +''', { > + 'A': 11, > + 'B': 10, > + 'C': '"not ignored"', > +}) > + > +# Checking for evaluation errors. > +check_macro_eval('''\ > +#define A 1 > +#define A 2 > +''', { > + 'A': 1, > +}, '''\ > +2: error: macro A redefined > +1: note: location of previous definition > +''') > + > +check_macro_eval('''\ > +#define A A > +#define B 1 > +''', { > + 'A': None, > + 'B': 1, > +}, '''\ > +1: error: macro definition A refers to itself > +''') > + > +check_macro_eval('''\ > +#define A B > +#define B A > +''', { > + 'A': None, > + 'B': None, > +}, '''\ > +1: error: macro definition A refers to itself > +2: note: evaluated from B > +''') > + > +check_macro_eval('''\ > +#define A B > +#define B C > +#define C A > +''', { > + 'A': None, > + 'B': None, > + 'C': None, > +}, '''\ > +1: error: macro definition A refers to itself > +3: note: evaluated from C > +2: note: evaluated from B > +''') > + > +check_macro_eval('''\ > +#define A 1 + > +''', { > + 'A': None, > +}, '''\ > +1: error: uninterpretable macro token sequence: 1 + > +''') > + > +check_macro_eval('''\ > +#define A 3*5 > +''', { > + 'A': None, > +}, '''\ > +1: error: uninterpretable macro token sequence: 3 * 5 > +''') > + > +check_macro_eval('''\ > +#define A 3 + 5 > +''', { > + 'A': 8, > +}, '''\ > +1: error: missing parentheses around + expression > +1: note: in definition of macro A > +''') > + > +if errors: > + sys.exit(1) OK.
* Siddhesh Poyarekar: > On 2022-09-05 09:44, Florian Weimer via Libc-alpha wrote: >> --- >> scripts/glibcpp.py | 317 +++++++++++++++++++++++++++++++++++++++++ >> support/Makefile | 10 +- >> support/tst-glibcpp.py | 217 ++++++++++++++++++++++++++++ >> 3 files changed, 542 insertions(+), 2 deletions(-) >> create mode 100644 support/tst-glibcpp.py > > OK except for a minor nit at the end; Copyright year for the new test > file should be just 2022. Fair enough, I've fixed it locally. Thanks, Florian
diff --git a/scripts/glibcpp.py b/scripts/glibcpp.py index b44c6a4392..455459a609 100644 --- a/scripts/glibcpp.py +++ b/scripts/glibcpp.py @@ -33,7 +33,9 @@ Accepts non-ASCII characters only within comments and strings. """ import collections +import operator import re +import sys # Caution: The order of the outermost alternation matters. # STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST, @@ -210,3 +212,318 @@ def tokenize_c(file_contents, reporter): yield tok pos = mo.end() + +class MacroDefinition(collections.namedtuple('MacroDefinition', + 'name_token args body error')): + """A preprocessor macro definition. + + name_token is the Token_ for the name. + + args is None for a macro that is not function-like. Otherwise, it + is a tuple that contains the macro argument name tokens. + + body is a tuple that contains the tokens that constitue the body + of the macro definition (excluding whitespace). + + error is None if no error was detected, or otherwise a problem + description associated with this macro definition. + + """ + + @property + def function(self): + """Return true if the macro is function-like.""" + return self.args is not None + + @property + def name(self): + """Return the name of the macro being defined.""" + return self.name_token.text + + @property + def line(self): + """Return the line number of the macro defintion.""" + return self.name_token.line + + @property + def args_lowered(self): + """Return the macro argument list as a list of strings""" + if self.function: + return [token.text for token in self.args] + else: + return None + + @property + def body_lowered(self): + """Return the macro body as a list of strings.""" + return [token.text for token in self.body] + +def macro_definitions(tokens): + """A generator for C macro definitions among tokens. + + The generator yields MacroDefinition objects. + + tokens must be iterable, yielding Token_ objects. + + """ + + macro_name = None + macro_start = False # Set to false after macro name and one otken. + macro_args = None # Set to a list during the macro argument sequence. + in_macro_args = False # True while processing macro identifier-list. + error = None + body = [] + + for token in tokens: + if token.context == 'define' and macro_name is None \ + and token.kind == 'IDENT': + # Starting up macro processing. + if macro_start: + # First identifier is the macro name. + macro_name = token + else: + # Next token is the name. + macro_start = True + continue + + if macro_name is None: + # Drop tokens not in macro definitions. + continue + + if token.context != 'define': + # End of the macro definition. + if in_macro_args and error is None: + error = 'macro definition ends in macro argument list' + yield MacroDefinition(macro_name, macro_args, tuple(body), error) + # No longer in a macro definition. + macro_name = None + macro_start = False + macro_args = None + in_macro_args = False + error = None + body.clear() + continue + + if macro_start: + # First token after the macro name. + macro_start = False + if token.kind == 'PUNCTUATOR' and token.text == '(': + macro_args = [] + in_macro_args = True + continue + + if in_macro_args: + if token.kind == 'IDENT' \ + or (token.kind == 'PUNCTUATOR' and token.text == '...'): + # Macro argument or ... placeholder. + macro_args.append(token) + if token.kind == 'PUNCTUATOR': + if token.text == ')': + macro_args = tuple(macro_args) + in_macro_args = False + elif token.text == ',': + pass # Skip. Not a full syntax check. + elif error is None: + error = 'invalid punctuator in macro argument list: ' \ + + repr(token.text) + elif error is None: + error = 'invalid {} token in macro argument list'.format( + token.kind) + continue + + if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'): + body.append(token) + + # Emit the macro in case the last line does not end with a newline. + if macro_name is not None: + if in_macro_args and error is None: + error = 'macro definition ends in macro argument list' + yield MacroDefinition(macro_name, macro_args, tuple(body), error) + +# Used to split UL etc. suffixes from numbers such as 123UL. +RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)') + +BINARY_OPERATORS = { + '+': operator.add, + '<<': operator.lshift, +} + +# Use the general-purpose dict type if it is order-preserving. +if (sys.version_info[0], sys.version_info[1]) <= (3, 6): + OrderedDict = collections.OrderedDict +else: + OrderedDict = dict + +def macro_eval(macro_defs, reporter): + """Compute macro values + + macro_defs is the output from macro_definitions. reporter is an + object that accepts reporter.error(line_number, message) and + reporter.note(line_number, message) calls to report errors + and error context invocations. + + The returned dict contains the values of macros which are not + function-like, pairing their names with their computed values. + + The current implementation is incomplete. It is deliberately not + entirely faithful to C, even in the implemented parts. It checks + that macro replacements follow certain syntactic rules even if + they are never evaluated. + + """ + + # Unevaluated macro definitions by name. + definitions = OrderedDict() + for md in macro_defs: + if md.name in definitions: + reporter.error(md.line, 'macro {} redefined'.format(md.name)) + reporter.note(definitions[md.name].line, + 'location of previous definition') + else: + definitions[md.name] = md + + # String to value mappings for fully evaluated macros. + evaluated = OrderedDict() + + # String to macro definitions during evaluation. Nice error + # reporting relies on determinstic iteration order. + stack = OrderedDict() + + def eval_token(current, token): + """Evaluate one macro token. + + Integers and strings are returned as such (the latter still + quoted). Identifiers are expanded. + + None indicates an empty expansion or an error. + + """ + + if token.kind == 'PP_NUMBER': + value = None + m = RE_SPLIT_INTEGER_SUFFIX.match(token.text) + if m: + try: + value = int(m.group(1), 0) + except ValueError: + pass + if value is None: + reporter.error(token.line, + 'invalid number {!r} in definition of {}'.format( + token.text, current.name)) + return value + + if token.kind == 'STRING': + return token.text + + if token.kind == 'CHARCONST' and len(token.text) == 3: + return ord(token.text[1]) + + if token.kind == 'IDENT': + name = token.text + result = eval1(current, name) + if name not in evaluated: + evaluated[name] = result + return result + + reporter.error(token.line, + 'unrecognized {!r} in definition of {}'.format( + token.text, current.name)) + return None + + + def eval1(current, name): + """Evaluate one name. + + The name is looked up and the macro definition evaluated + recursively if necessary. The current argument is the macro + definition being evaluated. + + None as a return value indicates an error. + + """ + + # Fast path if the value has already been evaluated. + if name in evaluated: + return evaluated[name] + + try: + md = definitions[name] + except KeyError: + reporter.error(current.line, + 'reference to undefined identifier {} in definition of {}' + .format(name, current.name)) + return None + + if md.name in stack: + # Recursive macro definition. + md = stack[name] + reporter.error(md.line, + 'macro definition {} refers to itself'.format(md.name)) + for md1 in reversed(list(stack.values())): + if md1 is md: + break + reporter.note(md1.line, + 'evaluated from {}'.format(md1.name)) + return None + + stack[md.name] = md + if md.function: + reporter.error(current.line, + 'attempt to evaluate function-like macro {}'.format(name)) + reporter.note(md.line, 'definition of {}'.format(md.name)) + return None + + try: + body = md.body + if len(body) == 0: + # Empty expansion. + return None + + # Remove surrounding (). + if body[0].text == '(' and body[-1].text == ')': + body = body[1:-1] + had_parens = True + else: + had_parens = False + + if len(body) == 1: + return eval_token(md, body[0]) + + # Minimal expression evaluator for binary operators. + op = body[1].text + if len(body) == 3 and op in BINARY_OPERATORS: + if not had_parens: + reporter.error(body[1].line, + 'missing parentheses around {} expression'.format(op)) + reporter.note(md.line, + 'in definition of macro {}'.format(md.name)) + + left = eval_token(md, body[0]) + right = eval_token(md, body[2]) + + if type(left) != type(1): + reporter.error(left.line, + 'left operand of {} is not an integer'.format(op)) + reporter.note(md.line, + 'in definition of macro {}'.format(md.name)) + if type(right) != type(1): + reporter.error(left.line, + 'right operand of {} is not an integer'.format(op)) + reporter.note(md.line, + 'in definition of macro {}'.format(md.name)) + return BINARY_OPERATORS[op](left, right) + + reporter.error(md.line, + 'uninterpretable macro token sequence: {}'.format( + ' '.join(md.body_lowered))) + return None + finally: + del stack[md.name] + + # Start of main body of macro_eval. + for md in definitions.values(): + name = md.name + if name not in evaluated and not md.function: + evaluated[name] = eval1(md, name) + return evaluated diff --git a/support/Makefile b/support/Makefile index 9b50eac117..551d02941f 100644 --- a/support/Makefile +++ b/support/Makefile @@ -274,12 +274,12 @@ $(objpfx)test-run-command : $(libsupport) $(common-objpfx)elf/static-stubs.o tests = \ README-testing \ tst-support-namespace \ + tst-support-open-dev-null-range \ + tst-support-process_state \ tst-support_blob_repeat \ tst-support_capture_subprocess \ tst-support_descriptors \ tst-support_format_dns_packet \ - tst-support-open-dev-null-range \ - tst-support-process_state \ tst-support_quote_blob \ tst-support_quote_blob_wide \ tst-support_quote_string \ @@ -304,6 +304,12 @@ $(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \ $(evaluate-test) endif +tests-special += $(objpfx)tst-glibcpp.out + +$(objpfx)tst-glibcpp.out: tst-glibcpp.py $(..)scripts/glibcpp.py + PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcpp.py > $@ 2>&1; \ + $(evaluate-test) + $(objpfx)tst-support_format_dns_packet: $(common-objpfx)resolv/libresolv.so tst-support_capture_subprocess-ARGS = -- $(host-test-program-cmd) diff --git a/support/tst-glibcpp.py b/support/tst-glibcpp.py new file mode 100644 index 0000000000..b7a7a44184 --- /dev/null +++ b/support/tst-glibcpp.py @@ -0,0 +1,217 @@ +#! /usr/bin/python3 +# Tests for scripts/glibcpp.py +# Copyright (C) 2019-2022 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <https://www.gnu.org/licenses/>. + +import inspect +import sys + +import glibcpp + +# Error counter. +errors = 0 + +class TokenizerErrors: + """Used as the error reporter during tokenization.""" + + def __init__(self): + self.errors = [] + + def error(self, token, message): + self.errors.append((token, message)) + +def check_macro_definitions(source, expected): + reporter = TokenizerErrors() + tokens = glibcpp.tokenize_c(source, reporter) + + actual = [] + for md in glibcpp.macro_definitions(tokens): + if md.function: + md_name = '{}({})'.format(md.name, ','.join(md.args_lowered)) + else: + md_name = md.name + actual.append((md_name, md.body_lowered)) + + if actual != expected or reporter.errors: + global errors + errors += 1 + # Obtain python source line information. + frame = inspect.stack(2)[1] + print('{}:{}: error: macro definition mismatch, actual definitions:' + .format(frame[1], frame[2])) + for md in actual: + print('note: {} {!r}'.format(md[0], md[1])) + + if reporter.errors: + for err in reporter.errors: + print('note: tokenizer error: {}: {}'.format( + err[0].line, err[1])) + +def check_macro_eval(source, expected, expected_errors=''): + reporter = TokenizerErrors() + tokens = list(glibcpp.tokenize_c(source, reporter)) + + if reporter.errors: + # Obtain python source line information. + frame = inspect.stack(2)[1] + for err in reporter.errors: + print('{}:{}: tokenizer error: {}: {}'.format( + frame[1], frame[2], err[0].line, err[1])) + return + + class EvalReporter: + """Used as the error reporter during evaluation.""" + + def __init__(self): + self.lines = [] + + def error(self, line, message): + self.lines.append('{}: error: {}\n'.format(line, message)) + + def note(self, line, message): + self.lines.append('{}: note: {}\n'.format(line, message)) + + reporter = EvalReporter() + actual = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter) + actual_errors = ''.join(reporter.lines) + if actual != expected or actual_errors != expected_errors: + global errors + errors += 1 + # Obtain python source line information. + frame = inspect.stack(2)[1] + print('{}:{}: error: macro evaluation mismatch, actual results:' + .format(frame[1], frame[2])) + for k, v in actual.items(): + print(' {}: {!r}'.format(k, v)) + for msg in reporter.lines: + sys.stdout.write(' | ' + msg) + +# Individual test cases follow. + +check_macro_definitions('', []) +check_macro_definitions('int main()\n{\n{\n', []) +check_macro_definitions(""" +#define A 1 +#define B 2 /* ignored */ +#define C 3 // also ignored +#define D \ + 4 +#define STRING "string" +#define FUNCLIKE(a, b) (a + b) +#define FUNCLIKE2(a, b) (a + \ + b) +""", [('A', ['1']), + ('B', ['2']), + ('C', ['3']), + ('D', ['4']), + ('STRING', ['"string"']), + ('FUNCLIKE(a,b)', list('(a+b)')), + ('FUNCLIKE2(a,b)', list('(a+b)')), + ]) +check_macro_definitions('#define MACRO', [('MACRO', [])]) +check_macro_definitions('#define MACRO\n', [('MACRO', [])]) +check_macro_definitions('#define MACRO()', [('MACRO()', [])]) +check_macro_definitions('#define MACRO()\n', [('MACRO()', [])]) + +check_macro_eval('#define A 1', {'A': 1}) +check_macro_eval('#define A (1)', {'A': 1}) +check_macro_eval('#define A (1 + 1)', {'A': 2}) +check_macro_eval('#define A (1U << 31)', {'A': 1 << 31}) +check_macro_eval('''\ +#define A (B + 1) +#define B 10 +#define F(x) ignored +#define C "not ignored" +''', { + 'A': 11, + 'B': 10, + 'C': '"not ignored"', +}) + +# Checking for evaluation errors. +check_macro_eval('''\ +#define A 1 +#define A 2 +''', { + 'A': 1, +}, '''\ +2: error: macro A redefined +1: note: location of previous definition +''') + +check_macro_eval('''\ +#define A A +#define B 1 +''', { + 'A': None, + 'B': 1, +}, '''\ +1: error: macro definition A refers to itself +''') + +check_macro_eval('''\ +#define A B +#define B A +''', { + 'A': None, + 'B': None, +}, '''\ +1: error: macro definition A refers to itself +2: note: evaluated from B +''') + +check_macro_eval('''\ +#define A B +#define B C +#define C A +''', { + 'A': None, + 'B': None, + 'C': None, +}, '''\ +1: error: macro definition A refers to itself +3: note: evaluated from C +2: note: evaluated from B +''') + +check_macro_eval('''\ +#define A 1 + +''', { + 'A': None, +}, '''\ +1: error: uninterpretable macro token sequence: 1 + +''') + +check_macro_eval('''\ +#define A 3*5 +''', { + 'A': None, +}, '''\ +1: error: uninterpretable macro token sequence: 3 * 5 +''') + +check_macro_eval('''\ +#define A 3 + 5 +''', { + 'A': 8, +}, '''\ +1: error: missing parentheses around + expression +1: note: in definition of macro A +''') + +if errors: + sys.exit(1)