Message ID | 20210422030720.3685766-13-jsnow@redhat.com |
---|---|
State | New |
Headers | show |
Series | qapi: static typing conversion, pt5a | expand |
John Snow <jsnow@redhat.com> writes: > Annotations do not change runtime behavior. > This commit *only* adds annotations. > > (Annotations for QAPIDoc are in a later commit.) > > Signed-off-by: John Snow <jsnow@redhat.com> > --- > scripts/qapi/parser.py | 61 ++++++++++++++++++++++++++++-------------- > 1 file changed, 41 insertions(+), 20 deletions(-) > > diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py > index d02a134aae9..f2b57d5642a 100644 > --- a/scripts/qapi/parser.py > +++ b/scripts/qapi/parser.py > @@ -17,16 +17,29 @@ > from collections import OrderedDict > import os > import re > -from typing import List > +from typing import ( > + Dict, > + List, > + Optional, > + Set, > + Union, > +) > > from .common import match_nofail > from .error import QAPISemError, QAPISourceError > from .source import QAPISourceInfo > > > +#: Represents a parsed JSON object; semantically: one QAPI schema expression. > +Expression = Dict[str, object] I believe you use this for what qapi-code-gen.txt calls a top-level expression. TopLevelExpression is rather long, but it's used just once, and once more in RFC PATCH 13. What do you think? > + > +# Return value alias for get_expr(). > +_ExprValue = Union[List[object], Dict[str, object], str, bool] This is essentially a node in our pidgin-JSON parser's abstract syntax tree. Tree roots use the Dict branch of this Union. See also my review of PATCH 06. > + > + > class QAPIParseError(QAPISourceError): > """Error class for all QAPI schema parsing errors.""" > - def __init__(self, parser, msg): > + def __init__(self, parser: 'QAPISchemaParser', msg: str): Forward reference needs quotes. Can't be helped. > col = 1 > for ch in parser.src[parser.line_pos:parser.pos]: > if ch == '\t': > @@ -38,7 +51,10 @@ def __init__(self, parser, msg): > > class QAPISchemaParser: > > - def __init__(self, fname, previously_included=None, incl_info=None): > + def __init__(self, > + fname: str, > + previously_included: Optional[Set[str]] = None, This needs to be Optional[] because using the empty set as default parameter value would be a dangerous trap. Python's choice to evaluate the default parameter value just once has always been iffy. Stirring static typing into the language makes it iffier. Can't be helped. > + incl_info: Optional[QAPISourceInfo] = None): > self._fname = fname > self._included = previously_included or set() > self._included.add(os.path.abspath(self._fname)) > @@ -46,20 +62,20 @@ def __init__(self, fname, previously_included=None, incl_info=None): > > # Lexer state (see `accept` for details): > self.info = QAPISourceInfo(self._fname, incl_info) > - self.tok = None > + self.tok: Optional[str] = None Would self.tok: str work? > self.pos = 0 > self.cursor = 0 > - self.val = None > + self.val: Optional[Union[bool, str]] = None > self.line_pos = 0 > > # Parser output: > - self.exprs = [] > - self.docs = [] > + self.exprs: List[Expression] = [] > + self.docs: List[QAPIDoc] = [] > > # Showtime! > self._parse() > > - def _parse(self): > + def _parse(self) -> None: > cur_doc = None > > with open(self._fname, 'r', encoding='utf-8') as fp: > @@ -122,7 +138,7 @@ def _parse(self): > self.reject_expr_doc(cur_doc) > > @staticmethod > - def reject_expr_doc(doc): > + def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: > if doc and doc.symbol: > raise QAPISemError( > doc.info, > @@ -130,10 +146,14 @@ def reject_expr_doc(doc): > % doc.symbol) > > @staticmethod > - def _include(include, info, incl_fname, previously_included): > + def _include(include: str, > + info: QAPISourceInfo, > + incl_fname: str, > + previously_included: Set[str] > + ) -> Optional['QAPISchemaParser']: > incl_abs_fname = os.path.abspath(incl_fname) > # catch inclusion cycle > - inf = info > + inf: Optional[QAPISourceInfo] = info > while inf: > if incl_abs_fname == os.path.abspath(inf.fname): > raise QAPISemError(info, "inclusion loop for %s" % include) > @@ -152,9 +172,9 @@ def _include(include, info, incl_fname, previously_included): > ) from err > > @staticmethod > - def _pragma(name, value, info): > + def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: value: object isn't wrong, but why not _ExprValue? > > - def _check(name, value) -> List[str]: > + def _check(name: str, value: object) -> List[str]: > if (not isinstance(value, list) or > any([not isinstance(elt, str) for elt in value])): > raise QAPISemError( > @@ -176,7 +196,7 @@ def _check(name, value) -> List[str]: > else: > raise QAPISemError(info, "unknown pragma '%s'" % name) > > - def accept(self, skip_comment=True): > + def accept(self, skip_comment: bool = True) -> None: > while True: > self.tok = self.src[self.cursor] > self.pos = self.cursor > @@ -240,8 +260,8 @@ def accept(self, skip_comment=True): > self.src[self.cursor-1:]) > raise QAPIParseError(self, "stray '%s'" % match.group(0)) > > - def get_members(self): > - expr = OrderedDict() > + def get_members(self) -> 'OrderedDict[str, object]': > + expr: 'OrderedDict[str, object]' = OrderedDict() > if self.tok == '}': > self.accept() > return expr > @@ -267,8 +287,8 @@ def get_members(self): > if self.tok != "'": > raise QAPIParseError(self, "expected string") > > - def get_values(self): > - expr = [] > + def get_values(self) -> List[object]: > + expr: List[object] = [] > if self.tok == ']': > self.accept() > return expr > @@ -284,8 +304,9 @@ def get_values(self): > raise QAPIParseError(self, "expected ',' or ']'") > self.accept() > > - def get_expr(self, nested): > + def get_expr(self, nested: bool = False) -> _ExprValue: > # TODO: Teach mypy that nested=False means the retval is a Dict. > + expr: _ExprValue > if self.tok != '{' and not nested: > raise QAPIParseError(self, "expected '{'") > if self.tok == '{': > @@ -303,7 +324,7 @@ def get_expr(self, nested): > self, "expected '{', '[', string, or boolean") > return expr > > - def get_doc(self, info): > + def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']: > if self.val != '##': > raise QAPIParseError( > self, "junk after '##' at start of documentation comment")
On 4/25/21 8:34 AM, Markus Armbruster wrote: > John Snow <jsnow@redhat.com> writes: > >> Annotations do not change runtime behavior. >> This commit *only* adds annotations. >> >> (Annotations for QAPIDoc are in a later commit.) >> >> Signed-off-by: John Snow <jsnow@redhat.com> >> --- >> scripts/qapi/parser.py | 61 ++++++++++++++++++++++++++++-------------- >> 1 file changed, 41 insertions(+), 20 deletions(-) >> >> diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py >> index d02a134aae9..f2b57d5642a 100644 >> --- a/scripts/qapi/parser.py >> +++ b/scripts/qapi/parser.py >> @@ -17,16 +17,29 @@ >> from collections import OrderedDict >> import os >> import re >> -from typing import List >> +from typing import ( >> + Dict, >> + List, >> + Optional, >> + Set, >> + Union, >> +) >> >> from .common import match_nofail >> from .error import QAPISemError, QAPISourceError >> from .source import QAPISourceInfo >> >> >> +#: Represents a parsed JSON object; semantically: one QAPI schema expression. >> +Expression = Dict[str, object] > > I believe you use this for what qapi-code-gen.txt calls a top-level > expression. TopLevelExpression is rather long, but it's used just once, > and once more in RFC PATCH 13. What do you think? > Yeah, I left a comment on gitlab about this -- Sorry for splitting the stream, I didn't expect you to reply on-list without at least clicking the link ;) You're right, this is TOP LEVEL EXPR. I actually do mean to start using it in expr.py as well too, in what will become (I think) pt5c: non-immediately-necessary parser cleanups. I can use TopLevelExpression as the type name if you'd like, but if you have a suggestion for something shorter I am open to suggestions if "Expression" is way too overloaded/confusing. >> + >> +# Return value alias for get_expr(). >> +_ExprValue = Union[List[object], Dict[str, object], str, bool] > > This is essentially a node in our pidgin-JSON parser's abstract syntax > tree. Tree roots use the Dict branch of this Union. > > See also my review of PATCH 06. > OK, I skimmed that one for now but I'll get back to it. >> + >> + >> class QAPIParseError(QAPISourceError): >> """Error class for all QAPI schema parsing errors.""" >> - def __init__(self, parser, msg): >> + def __init__(self, parser: 'QAPISchemaParser', msg: str): > > Forward reference needs quotes. Can't be helped. > >> col = 1 >> for ch in parser.src[parser.line_pos:parser.pos]: >> if ch == '\t': >> @@ -38,7 +51,10 @@ def __init__(self, parser, msg): >> >> class QAPISchemaParser: >> >> - def __init__(self, fname, previously_included=None, incl_info=None): >> + def __init__(self, >> + fname: str, >> + previously_included: Optional[Set[str]] = None, > > This needs to be Optional[] because using the empty set as default > parameter value would be a dangerous trap. Python's choice to evaluate > the default parameter value just once has always been iffy. Stirring > static typing into the language makes it iffier. Can't be helped. > We could force it to accept a tuple and convert it into a set internally. It's just that we seem to use it for sets now. Or ... in pt5c I float the idea of just passing the parent parser in, and I reach up and grab the previously-included stuff directly. >> + incl_info: Optional[QAPISourceInfo] = None): >> self._fname = fname >> self._included = previously_included or set() >> self._included.add(os.path.abspath(self._fname)) >> @@ -46,20 +62,20 @@ def __init__(self, fname, previously_included=None, incl_info=None): >> >> # Lexer state (see `accept` for details): >> self.info = QAPISourceInfo(self._fname, incl_info) >> - self.tok = None >> + self.tok: Optional[str] = None > > Would > > self.tok: str > > work? > Not without modifications, because the Token being None is used to represent EOF. >> self.pos = 0 >> self.cursor = 0 >> - self.val = None >> + self.val: Optional[Union[bool, str]] = None >> self.line_pos = 0 >> >> # Parser output: >> - self.exprs = [] >> - self.docs = [] >> + self.exprs: List[Expression] = [] >> + self.docs: List[QAPIDoc] = [] >> >> # Showtime! >> self._parse() >> >> - def _parse(self): >> + def _parse(self) -> None: >> cur_doc = None >> >> with open(self._fname, 'r', encoding='utf-8') as fp: >> @@ -122,7 +138,7 @@ def _parse(self): >> self.reject_expr_doc(cur_doc) >> >> @staticmethod >> - def reject_expr_doc(doc): >> + def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: >> if doc and doc.symbol: >> raise QAPISemError( >> doc.info, >> @@ -130,10 +146,14 @@ def reject_expr_doc(doc): >> % doc.symbol) >> >> @staticmethod >> - def _include(include, info, incl_fname, previously_included): >> + def _include(include: str, >> + info: QAPISourceInfo, >> + incl_fname: str, >> + previously_included: Set[str] >> + ) -> Optional['QAPISchemaParser']: >> incl_abs_fname = os.path.abspath(incl_fname) >> # catch inclusion cycle >> - inf = info >> + inf: Optional[QAPISourceInfo] = info >> while inf: >> if incl_abs_fname == os.path.abspath(inf.fname): >> raise QAPISemError(info, "inclusion loop for %s" % include) >> @@ -152,9 +172,9 @@ def _include(include, info, incl_fname, previously_included): >> ) from err >> >> @staticmethod >> - def _pragma(name, value, info): >> + def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: > > value: object isn't wrong, but why not _ExprValue? > I forget; admit this one slipped through from an earlier revision. Right now: because _ExprValue is too broad. It really wants Dict[str, object] but the typing on get_expr() is challenging. I'll revisit this with better excuses after I digest your patch 6 review. >> >> - def _check(name, value) -> List[str]: >> + def _check(name: str, value: object) -> List[str]: >> if (not isinstance(value, list) or >> any([not isinstance(elt, str) for elt in value])): >> raise QAPISemError( >> @@ -176,7 +196,7 @@ def _check(name, value) -> List[str]: >> else: >> raise QAPISemError(info, "unknown pragma '%s'" % name) >> >> - def accept(self, skip_comment=True): >> + def accept(self, skip_comment: bool = True) -> None: >> while True: >> self.tok = self.src[self.cursor] >> self.pos = self.cursor >> @@ -240,8 +260,8 @@ def accept(self, skip_comment=True): >> self.src[self.cursor-1:]) >> raise QAPIParseError(self, "stray '%s'" % match.group(0)) >> >> - def get_members(self): >> - expr = OrderedDict() >> + def get_members(self) -> 'OrderedDict[str, object]': >> + expr: 'OrderedDict[str, object]' = OrderedDict() >> if self.tok == '}': >> self.accept() >> return expr >> @@ -267,8 +287,8 @@ def get_members(self): >> if self.tok != "'": >> raise QAPIParseError(self, "expected string") >> >> - def get_values(self): >> - expr = [] >> + def get_values(self) -> List[object]: >> + expr: List[object] = [] >> if self.tok == ']': >> self.accept() >> return expr >> @@ -284,8 +304,9 @@ def get_values(self): >> raise QAPIParseError(self, "expected ',' or ']'") >> self.accept() >> >> - def get_expr(self, nested): >> + def get_expr(self, nested: bool = False) -> _ExprValue: >> # TODO: Teach mypy that nested=False means the retval is a Dict. >> + expr: _ExprValue >> if self.tok != '{' and not nested: >> raise QAPIParseError(self, "expected '{'") >> if self.tok == '{': >> @@ -303,7 +324,7 @@ def get_expr(self, nested): >> self, "expected '{', '[', string, or boolean") >> return expr >> >> - def get_doc(self, info): >> + def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']: >> if self.val != '##': >> raise QAPIParseError( >> self, "junk after '##' at start of documentation comment") Thanks! --js
On 4/25/21 8:34 AM, Markus Armbruster wrote: > value: object isn't wrong, but why not _ExprValue? > Updated excuse: because all the way back outside in _parse, we know that: 1. expr is a dict (because of get_expr(False)) 2. expr['pragma'] is also a dict, because we explicitly check it there. 3. We iterate over the keys; all we know so far is that the values are ... something. 4. _pragma()'s job is to validate the type(s) anyway. More or less, the _ExprValue type union isn't remembered here -- even though it was once upon a time something returned by get_expr, it happened in a nested call that is now opaque to mypy in this context. So, it's some combination of "That's all we know about it" and "It happens to be exactly sufficient for this function to operate." --js
John Snow <jsnow@redhat.com> writes: > On 4/25/21 8:34 AM, Markus Armbruster wrote: >> John Snow <jsnow@redhat.com> writes: >> >>> Annotations do not change runtime behavior. >>> This commit *only* adds annotations. >>> >>> (Annotations for QAPIDoc are in a later commit.) >>> >>> Signed-off-by: John Snow <jsnow@redhat.com> >>> --- >>> scripts/qapi/parser.py | 61 ++++++++++++++++++++++++++++-------------- >>> 1 file changed, 41 insertions(+), 20 deletions(-) >>> >>> diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py >>> index d02a134aae9..f2b57d5642a 100644 >>> --- a/scripts/qapi/parser.py >>> +++ b/scripts/qapi/parser.py >>> @@ -17,16 +17,29 @@ >>> from collections import OrderedDict >>> import os >>> import re >>> -from typing import List >>> +from typing import ( >>> + Dict, >>> + List, >>> + Optional, >>> + Set, >>> + Union, >>> +) >>> >>> from .common import match_nofail >>> from .error import QAPISemError, QAPISourceError >>> from .source import QAPISourceInfo >>> >>> >>> +#: Represents a parsed JSON object; semantically: one QAPI schema expression. >>> +Expression = Dict[str, object] >> >> I believe you use this for what qapi-code-gen.txt calls a top-level >> expression. TopLevelExpression is rather long, but it's used just once, >> and once more in RFC PATCH 13. What do you think? >> > > Yeah, I left a comment on gitlab about this -- Sorry for splitting the > stream, I didn't expect you to reply on-list without at least clicking > the link ;) I should have; sorry about that. I need to avoid distractions to stay productive, and web browsers are basically gatling guns firing armor-piercing distraction rounds at me. > You're right, this is TOP LEVEL EXPR. I actually do mean to start using > it in expr.py as well too, in what will become (I think) pt5c: > non-immediately-necessary parser cleanups. > > I can use TopLevelExpression as the type name if you'd like, but if you > have a suggestion for something shorter I am open to suggestions if > "Expression" is way too overloaded/confusing. TopLevelExpr? Matches qapi-code-gen.txt's grammar: SCHEMA = TOP-LEVEL-EXPR... TOP-LEVEL-EXPR = DIRECTIVE | DEFINITION DIRECTIVE = INCLUDE | PRAGMA DEFINITION = ENUM | STRUCT | UNION | ALTERNATE | COMMAND | EVENT >>> + >>> +# Return value alias for get_expr(). >>> +_ExprValue = Union[List[object], Dict[str, object], str, bool] >> >> This is essentially a node in our pidgin-JSON parser's abstract syntax >> tree. Tree roots use the Dict branch of this Union. >> >> See also my review of PATCH 06. >> > > OK, I skimmed that one for now but I'll get back to it. > >>> + >>> + >>> class QAPIParseError(QAPISourceError): >>> """Error class for all QAPI schema parsing errors.""" >>> - def __init__(self, parser, msg): >>> + def __init__(self, parser: 'QAPISchemaParser', msg: str): >> >> Forward reference needs quotes. Can't be helped. >> >>> col = 1 >>> for ch in parser.src[parser.line_pos:parser.pos]: >>> if ch == '\t': >>> @@ -38,7 +51,10 @@ def __init__(self, parser, msg): >>> >>> class QAPISchemaParser: >>> >>> - def __init__(self, fname, previously_included=None, incl_info=None): >>> + def __init__(self, >>> + fname: str, >>> + previously_included: Optional[Set[str]] = None, >> >> This needs to be Optional[] because using the empty set as default >> parameter value would be a dangerous trap. Python's choice to evaluate >> the default parameter value just once has always been iffy. Stirring >> static typing into the language makes it iffier. Can't be helped. >> > > We could force it to accept a tuple and convert it into a set > internally. It's just that we seem to use it for sets now. Another candidate: frozenset. > Or ... in pt5c I float the idea of just passing the parent parser in, > and I reach up and grab the previously-included stuff directly. > >>> + incl_info: Optional[QAPISourceInfo] = None): >>> self._fname = fname >>> self._included = previously_included or set() >>> self._included.add(os.path.abspath(self._fname)) >>> @@ -46,20 +62,20 @@ def __init__(self, fname, previously_included=None, incl_info=None): >>> >>> # Lexer state (see `accept` for details): >>> self.info = QAPISourceInfo(self._fname, incl_info) >>> - self.tok = None >>> + self.tok: Optional[str] = None >> >> Would >> >> self.tok: str >> >> work? >> > > Not without modifications, because the Token being None is used to > represent EOF. True. I missed that, and thought we'd need None just as an initial value here. >>> self.pos = 0 >>> self.cursor = 0 >>> - self.val = None >>> + self.val: Optional[Union[bool, str]] = None >>> self.line_pos = 0 [...]
John Snow <jsnow@redhat.com> writes: > On 4/25/21 8:34 AM, Markus Armbruster wrote: >> value: object isn't wrong, but why not _ExprValue? >> > > Updated excuse: > > because all the way back outside in _parse, we know that: > > 1. expr is a dict (because of get_expr(False)) > 2. expr['pragma'] is also a dict, because we explicitly check it there. Yes: pragma = expr['pragma'] --> if not isinstance(pragma, dict): --> raise QAPISemError( --> info, "value of 'pragma' must be an object") for name, value in pragma.items(): self._pragma(name, value, info) > 3. We iterate over the keys; all we know so far is that the values are > ... something. Actually, *we* know more about the values. get_expr() returns a tree whose inner nodes are dict or list, and whose leaves are str or bool. Therefore, the values are dict, list, str, or bool. It's *mypy* that doesn't know, because it lacks recursive types. I know that you're prbably using "we" in the sense of "the toolchain". I'm annoying you with the difference between "the toolchain" and "we (you, me, and other humans) because I'm concerned about us humans dumbing ourselves down to mypy's level of understanding. To be honest, I'm less and less sure typing these trees without the necessary typing tools is worth the bother. The notational overhead it more oppressive than elsewhere, and yet the typing remains weak. The result fails to satisfy, and that's a constant source of discussions (between us as well as just in my head) on how to best mitigate. > 4. _pragma()'s job is to validate the type(s) anyway. _pragma() can safely assume @value is dict, list, str, or bool. It just happens not to rely on this assumption. > More or less, the _ExprValue type union isn't remembered here -- even > though it was once upon a time something returned by get_expr, it > happened in a nested call that is now opaque to mypy in this context. Understand. > So, it's some combination of "That's all we know about it" and "It > happens to be exactly sufficient for this function to operate." I can accept "it's all mypy can figure out by itself, and it's good enough to get the static checking we want".
On 4/21/21 11:07 PM, John Snow wrote:
> + self.exprs: List[Expression] = []
I did indeed intend to use Expression to mean TopLevelExpr ... However,
in this case, that's not what actually gets stored here.
I tricked myself!
This stores the dict that associates 'expr', 'doc' and 'info'.
Fixing it to be the generic Dict[str, object] removes the last usage of
TopLevelExpr from parser.py ... for now.
(pt5c, optional parser cleanups, uses a stronger type for parser's
return type and sees the reintroduction of that type.)
--js
On 4/27/21 4:43 AM, Markus Armbruster wrote: > John Snow <jsnow@redhat.com> writes: > >> On 4/25/21 8:34 AM, Markus Armbruster wrote: >>> value: object isn't wrong, but why not _ExprValue? >>> >> >> Updated excuse: >> >> because all the way back outside in _parse, we know that: >> >> 1. expr is a dict (because of get_expr(False)) >> 2. expr['pragma'] is also a dict, because we explicitly check it there. > > Yes: > > pragma = expr['pragma'] > --> if not isinstance(pragma, dict): > --> raise QAPISemError( > --> info, "value of 'pragma' must be an object") > for name, value in pragma.items(): > self._pragma(name, value, info) > >> 3. We iterate over the keys; all we know so far is that the values are >> ... something. > > Actually, *we* know more about the values. get_expr() returns a tree > whose inner nodes are dict or list, and whose leaves are str or bool. > Therefore, the values are dict, list, str, or bool. > > It's *mypy* that doesn't know, because it lacks recursive types. > > I know that you're prbably using "we" in the sense of "the toolchain". > I'm annoying you with the difference between "the toolchain" and "we > (you, me, and other humans) because I'm concerned about us humans > dumbing ourselves down to mypy's level of understanding. > Put in a gentler way: The risk is that type annotations that assume less because they *must* assume less will potentially miscommunicate the reality of the interface to future developers. I agree, that is a genuine risk. but ... > To be honest, I'm less and less sure typing these trees without the > necessary typing tools is worth the bother. The notational overhead it > more oppressive than elsewhere, and yet the typing remains weak. The > result fails to satisfy, and that's a constant source of discussions > (between us as well as just in my head) on how to best mitigate. > ... What's the alternative? I still think strict typing has strong benefits -- it's found a few bugs, albeit small. It offers good refactoring assurance and can help communicate the expected types in an interface *very* quickly. Whenever I type something as Dict[str, object] that is my genuine attempt at just cutting my losses and saying "It gets ... something. Figure it out, like you did before Python 3.6." I could use 'Any', but that really just effectively shuts the checker off. You could pass <Lasagna> to the interface and mypy won't flinch. Dict[str, object] at least enforces: - It must be a dict - 100% of its keys must be strings - You cannot safely do anything with its values until you interrogate them at runtime ...And I think that's perfectly accurate. I tried too hard to accurately type introspect.py, and I am avoiding repeating that mistake. >> 4. _pragma()'s job is to validate the type(s) anyway. > > _pragma() can safely assume @value is dict, list, str, or bool. It just > happens not to rely on this assumption. > Correct. Though, there's not too many operations that dict/list/str/bool all share, so you're going to be interrogating these types at runtime anyway. Really, just about everything they share as an interface is probably perfectly summed up by the python object type. So ... I dunno. I share your frustrations at the lack of expressiveness in recursive types, and it has been a major bummer while working on ... a recursive expression parser. * abandons series * /s >> More or less, the _ExprValue type union isn't remembered here -- even >> though it was once upon a time something returned by get_expr, it >> happened in a nested call that is now opaque to mypy in this context. > > Understand. > >> So, it's some combination of "That's all we know about it" and "It >> happens to be exactly sufficient for this function to operate." > > I can accept "it's all mypy can figure out by itself, and it's good > enough to get the static checking we want". > Yep. I think the typing of this particular interface is as good as it can be for the moment, so I recommend leaving it as Dict[str, object]. --js
diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py index d02a134aae9..f2b57d5642a 100644 --- a/scripts/qapi/parser.py +++ b/scripts/qapi/parser.py @@ -17,16 +17,29 @@ from collections import OrderedDict import os import re -from typing import List +from typing import ( + Dict, + List, + Optional, + Set, + Union, +) from .common import match_nofail from .error import QAPISemError, QAPISourceError from .source import QAPISourceInfo +#: Represents a parsed JSON object; semantically: one QAPI schema expression. +Expression = Dict[str, object] + +# Return value alias for get_expr(). +_ExprValue = Union[List[object], Dict[str, object], str, bool] + + class QAPIParseError(QAPISourceError): """Error class for all QAPI schema parsing errors.""" - def __init__(self, parser, msg): + def __init__(self, parser: 'QAPISchemaParser', msg: str): col = 1 for ch in parser.src[parser.line_pos:parser.pos]: if ch == '\t': @@ -38,7 +51,10 @@ def __init__(self, parser, msg): class QAPISchemaParser: - def __init__(self, fname, previously_included=None, incl_info=None): + def __init__(self, + fname: str, + previously_included: Optional[Set[str]] = None, + incl_info: Optional[QAPISourceInfo] = None): self._fname = fname self._included = previously_included or set() self._included.add(os.path.abspath(self._fname)) @@ -46,20 +62,20 @@ def __init__(self, fname, previously_included=None, incl_info=None): # Lexer state (see `accept` for details): self.info = QAPISourceInfo(self._fname, incl_info) - self.tok = None + self.tok: Optional[str] = None self.pos = 0 self.cursor = 0 - self.val = None + self.val: Optional[Union[bool, str]] = None self.line_pos = 0 # Parser output: - self.exprs = [] - self.docs = [] + self.exprs: List[Expression] = [] + self.docs: List[QAPIDoc] = [] # Showtime! self._parse() - def _parse(self): + def _parse(self) -> None: cur_doc = None with open(self._fname, 'r', encoding='utf-8') as fp: @@ -122,7 +138,7 @@ def _parse(self): self.reject_expr_doc(cur_doc) @staticmethod - def reject_expr_doc(doc): + def reject_expr_doc(doc: Optional['QAPIDoc']) -> None: if doc and doc.symbol: raise QAPISemError( doc.info, @@ -130,10 +146,14 @@ def reject_expr_doc(doc): % doc.symbol) @staticmethod - def _include(include, info, incl_fname, previously_included): + def _include(include: str, + info: QAPISourceInfo, + incl_fname: str, + previously_included: Set[str] + ) -> Optional['QAPISchemaParser']: incl_abs_fname = os.path.abspath(incl_fname) # catch inclusion cycle - inf = info + inf: Optional[QAPISourceInfo] = info while inf: if incl_abs_fname == os.path.abspath(inf.fname): raise QAPISemError(info, "inclusion loop for %s" % include) @@ -152,9 +172,9 @@ def _include(include, info, incl_fname, previously_included): ) from err @staticmethod - def _pragma(name, value, info): + def _pragma(name: str, value: object, info: QAPISourceInfo) -> None: - def _check(name, value) -> List[str]: + def _check(name: str, value: object) -> List[str]: if (not isinstance(value, list) or any([not isinstance(elt, str) for elt in value])): raise QAPISemError( @@ -176,7 +196,7 @@ def _check(name, value) -> List[str]: else: raise QAPISemError(info, "unknown pragma '%s'" % name) - def accept(self, skip_comment=True): + def accept(self, skip_comment: bool = True) -> None: while True: self.tok = self.src[self.cursor] self.pos = self.cursor @@ -240,8 +260,8 @@ def accept(self, skip_comment=True): self.src[self.cursor-1:]) raise QAPIParseError(self, "stray '%s'" % match.group(0)) - def get_members(self): - expr = OrderedDict() + def get_members(self) -> 'OrderedDict[str, object]': + expr: 'OrderedDict[str, object]' = OrderedDict() if self.tok == '}': self.accept() return expr @@ -267,8 +287,8 @@ def get_members(self): if self.tok != "'": raise QAPIParseError(self, "expected string") - def get_values(self): - expr = [] + def get_values(self) -> List[object]: + expr: List[object] = [] if self.tok == ']': self.accept() return expr @@ -284,8 +304,9 @@ def get_values(self): raise QAPIParseError(self, "expected ',' or ']'") self.accept() - def get_expr(self, nested): + def get_expr(self, nested: bool = False) -> _ExprValue: # TODO: Teach mypy that nested=False means the retval is a Dict. + expr: _ExprValue if self.tok != '{' and not nested: raise QAPIParseError(self, "expected '{'") if self.tok == '{': @@ -303,7 +324,7 @@ def get_expr(self, nested): self, "expected '{', '[', string, or boolean") return expr - def get_doc(self, info): + def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']: if self.val != '##': raise QAPIParseError( self, "junk after '##' at start of documentation comment")
Annotations do not change runtime behavior. This commit *only* adds annotations. (Annotations for QAPIDoc are in a later commit.) Signed-off-by: John Snow <jsnow@redhat.com> --- scripts/qapi/parser.py | 61 ++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 20 deletions(-)