From 73f1c86b97031942b47164a65ea536386fdfa618 Mon Sep 17 00:00:00 2001 From: ostr00000 <ostr00000@gmail.com> Date: Fri, 12 Jan 2024 01:37:27 +0100 Subject: [PATCH 1/5] implement parsing free conversion operators --- cxxheaderparser/parser.py | 100 ++++++++++++++++++------------ cxxheaderparser/types.py | 17 +++++ tests/test_operators.py | 127 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 40 deletions(-) diff --git a/cxxheaderparser/parser.py b/cxxheaderparser/parser.py index 5bef044..796ae21 100644 --- a/cxxheaderparser/parser.py +++ b/cxxheaderparser/parser.py @@ -6,7 +6,7 @@ from . import lexer from .errors import CxxParseError -from .lexer import LexToken, Location, PhonyEnding +from .lexer import LexToken, Location, PhonyEnding, PlyLexer from .options import ParserOptions from .parserstate import ( ClassBlockState, @@ -39,6 +39,7 @@ NameSpecifier, NamespaceAlias, NamespaceDecl, + Operator, PQNameSegment, Parameter, PQName, @@ -697,7 +698,7 @@ def _parse_template_specialization(self) -> TemplateSpecialization: try: parsed_type, mods = self._parse_type(None) - if parsed_type is None: + if not isinstance(parsed_type, Type): raise self._parse_error(None) mods.validate(var_ok=False, meth_ok=False, msg="") @@ -1022,7 +1023,7 @@ def _parse_using_typealias( """ parsed_type, mods = self._parse_type(None) - if parsed_type is None: + if not isinstance(parsed_type, Type): raise self._parse_error(None) mods.validate(var_ok=False, meth_ok=False, msg="parsing typealias") @@ -1770,7 +1771,7 @@ def _parse_parameter( else: # required typename + decorators parsed_type, mods = self._parse_type(tok) - if parsed_type is None: + if not isinstance(parsed_type, Type): raise self._parse_error(None) mods.validate(var_ok=False, meth_ok=False, msg="parsing parameter") @@ -1883,7 +1884,7 @@ def _parse_trailing_return_type( ) parsed_type, mods = self._parse_type(None) - if parsed_type is None: + if not isinstance(parsed_type, Type): raise self._parse_error(None) mods.validate(var_ok=False, meth_ok=False, msg="parsing trailing return type") @@ -2301,7 +2302,7 @@ def _parse_type( self, tok: typing.Optional[LexToken], operator_ok: bool = False, - ) -> typing.Tuple[typing.Optional[Type], ParsedTypeModifiers]: + ) -> typing.Tuple[typing.Union[Type, Operator], ParsedTypeModifiers]: """ This parses a typename and stops parsing when it hits something that it doesn't understand. The caller uses the results to figure @@ -2310,7 +2311,7 @@ def _parse_type( This only parses the base type, does not parse pointers, references, or additional const/volatile qualifiers - The returned type will only be None if operator_ok is True and an + The returned type will only be `Operator` if operator_ok is True and an operator is encountered. """ @@ -2331,8 +2332,6 @@ def _parse_type( tok = get_token() pqname: typing.Optional[PQName] = None - pqname_optional = False - _pqname_start_tokens = self._pqname_start_tokens _attribute_start = self._attribute_start_tokens @@ -2343,13 +2342,23 @@ def _parse_type( if pqname is not None: # found second set of names, done here break + if operator_ok and tok_type == "operator": # special case: conversion operators such as operator bool - pqname_optional = True - break - pqname, _ = self._parse_pqname( - tok, compound_ok=True, fn_ok=False, fund_ok=True + mods = ParsedTypeModifiers(vars, both, meths) + po = self._parse_member_operator() + return po, mods + + pqname, op = self._parse_pqname( + tok, compound_ok=True, fn_ok=True, fund_ok=True ) + + if op is not None: + # special case: conversion operator, but also a free operator + mods = ParsedTypeModifiers(vars, both, meths) + po = self._parse_free_operator(pqname, op, mods, const, volatile) + return po, mods + elif tok_type in self._parse_type_ptr_ref_paren: if pqname is None: raise self._parse_error(tok) @@ -2374,13 +2383,8 @@ def _parse_type( tok = get_token() - if pqname is None: - if not pqname_optional: - raise self._parse_error(tok) - parsed_type = None - else: - # Construct a type from the parsed name - parsed_type = Type(pqname, const, volatile) + # Construct a type from the parsed name + parsed_type = Type(pqname, const, volatile) self.lex.return_token(tok) @@ -2388,6 +2392,31 @@ def _parse_type( mods = ParsedTypeModifiers(vars, both, meths) return parsed_type, mods + def _parse_member_operator(self) -> Operator: + """This function parses operator from class body.""" + ctype, cmods = self._parse_type(None) + if not isinstance(ctype, Type): + raise self._parse_error(None) + pqname = PQName([NameSpecifier("operator")]) + return Operator(pqname, "conversion", ctype, cmods) + + def _parse_free_operator( + self, + pqname: PQName, + op: str, + mods: ParsedTypeModifiers, + const: bool, + volatile: bool, + ) -> Operator: + """This function parses operator implemented outside class body.""" + last_seg = pqname.segments[-1] + assert last_seg.name.startswith("operator") + last_seg.name = "operator" + + type_name = PQName([NameSpecifier(p) for p in op.split(PlyLexer.t_DBL_COLON)]) + t = Type(type_name, const, volatile) + return Operator(pqname, "conversion", t, mods) + def _parse_decl( self, parsed_type: Type, @@ -2538,6 +2567,7 @@ def _parse_decl( def _parse_operator_conversion( self, + operator: Operator, mods: ParsedTypeModifiers, location: Location, doxygen: typing.Optional[str], @@ -2545,34 +2575,24 @@ def _parse_operator_conversion( is_typedef: bool, is_friend: bool, ) -> None: - tok = self._next_token_must_be("operator") - if is_typedef: - raise self._parse_error(tok, "operator not permitted in typedef") - - # next piece must be the conversion type - ctype, cmods = self._parse_type(None) - if ctype is None: - raise self._parse_error(None) + raise self._parse_error(None, "operator not permitted in typedef") - cmods.validate(var_ok=False, meth_ok=False, msg="parsing conversion operator") + operator.cmods.validate( + var_ok=False, meth_ok=False, msg="parsing conversion operator" + ) # Check for any cv decorations for the type - rtype = self._parse_cv_ptr(ctype) + rtype = self._parse_cv_ptr(operator.ctype) # then this must be a method self._next_token_must_be("(") - # make our own pqname/op here - segments: typing.List[PQNameSegment] = [NameSpecifier("operator")] - pqname = PQName(segments) - op = "conversion" - if self._parse_function( mods, rtype, - pqname, - op, + operator.pqname, + operator.operator_name, template, doxygen, location, @@ -2612,7 +2632,7 @@ def _parse_declarations( # Check to see if this might be a class/enum declaration if ( - parsed_type is not None + isinstance(parsed_type, Type) and parsed_type.typename.classkey and self._maybe_parse_class_enum_decl( parsed_type, mods, doxygen, template, is_typedef, is_friend, location @@ -2635,10 +2655,10 @@ def _parse_declarations( mods.validate(var_ok=var_ok, meth_ok=meth_ok, msg=msg) - if parsed_type is None: + if isinstance(parsed_type, Operator): # this means an operator was encountered, deal with the special case self._parse_operator_conversion( - mods, location, doxygen, template, is_typedef, is_friend + parsed_type, mods, location, doxygen, template, is_typedef, is_friend ) return diff --git a/cxxheaderparser/types.py b/cxxheaderparser/types.py index 1aa0b99..12c2712 100644 --- a/cxxheaderparser/types.py +++ b/cxxheaderparser/types.py @@ -3,6 +3,9 @@ from .tokfmt import tokfmt, Token +if typing.TYPE_CHECKING: + from .parserstate import ParsedTypeModifiers + @dataclass class Value: @@ -298,6 +301,20 @@ def format_decl(self, name: str): return f"{c}{v}{self.typename.format()} {name}" +@dataclass +class Operator: + """An internal structure for parsing operator.""" + + pqname: PQName + """Possibly qualified name for operator.""" + operator_name: str + """Conversion operator have always `conversion` str in this attribute.""" + ctype: Type + """Return type for this operator.""" + cmods: "ParsedTypeModifiers" + """Return type modifiers for this operator.""" + + @dataclass class Array: """ diff --git a/tests/test_operators.py b/tests/test_operators.py index 2c0a82b..e5d4e25 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -746,3 +746,130 @@ def test_free_operator() -> None: ] ) ) + + +def test_free_conversion_operator() -> None: + content = """ + Foo::operator Type1() { return SomeMethod(); } + const Foo::operator Type2() const { return SomeMethod(); } + volatile Foo::operator Type3() const { return SomeMethod(); } + + Foo::operator Foo::Type4() { return SomeMethod(); } + const Foo::operator Foo::Type5() const { return SomeMethod(); } + volatile Foo::operator Foo::Type6() const { return SomeMethod(); } + """ + data = parse_string(content, cleandoc=True) + + assert data == ParsedData( + namespace=NamespaceScope( + method_impls=[ + Method( + return_type=Type( + typename=PQName(segments=[NameSpecifier(name="Type1")]) + ), + name=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="operator"), + ] + ), + parameters=[], + has_body=True, + operator="conversion", + ), + Method( + return_type=Type( + typename=PQName(segments=[NameSpecifier(name="Type2")]), + const=True, + ), + name=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="operator"), + ] + ), + parameters=[], + has_body=True, + operator="conversion", + const=True, + ), + Method( + return_type=Type( + typename=PQName(segments=[NameSpecifier(name="Type3")]), + volatile=True, + ), + name=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="operator"), + ] + ), + parameters=[], + has_body=True, + operator="conversion", + const=True, + ), + Method( + return_type=Type( + typename=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="Type4"), + ] + ) + ), + name=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="operator"), + ] + ), + parameters=[], + has_body=True, + operator="conversion", + ), + Method( + return_type=Type( + typename=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="Type5"), + ] + ), + const=True, + ), + name=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="operator"), + ] + ), + parameters=[], + has_body=True, + operator="conversion", + const=True, + ), + Method( + return_type=Type( + typename=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="Type6"), + ] + ), + volatile=True, + ), + name=PQName( + segments=[ + NameSpecifier(name="Foo"), + NameSpecifier(name="operator"), + ] + ), + parameters=[], + has_body=True, + operator="conversion", + const=True, + ), + ] + ) + ) From b1dec5eaf329878e4a91001ff69756e48c14ed50 Mon Sep 17 00:00:00 2001 From: ostr00000 <ostr00000@gmail.com> Date: Fri, 12 Jan 2024 03:25:21 +0100 Subject: [PATCH 2/5] fix mypy errors --- cxxheaderparser/parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cxxheaderparser/parser.py b/cxxheaderparser/parser.py index 796ae21..f1cdef6 100644 --- a/cxxheaderparser/parser.py +++ b/cxxheaderparser/parser.py @@ -1759,7 +1759,7 @@ def _parse_parameter( param_name = None default = None param_pack = False - parsed_type: typing.Optional[Type] + parsed_type: typing.Union[Type, Operator] at_type: typing.Optional[Type] = None if not tok: @@ -2383,6 +2383,9 @@ def _parse_type( tok = get_token() + if pqname is None: + raise self._parse_error(tok) + # Construct a type from the parsed name parsed_type = Type(pqname, const, volatile) @@ -2410,6 +2413,7 @@ def _parse_free_operator( ) -> Operator: """This function parses operator implemented outside class body.""" last_seg = pqname.segments[-1] + assert isinstance(last_seg, NameSpecifier) assert last_seg.name.startswith("operator") last_seg.name = "operator" From 1eba2383932181de52a8c8e276221caad14066c4 Mon Sep 17 00:00:00 2001 From: ostr00000 <ostr00000@gmail.com> Date: Fri, 12 Jan 2024 03:26:45 +0100 Subject: [PATCH 3/5] change attribute annotations to comment syntax (to be consistent with project conventions) --- cxxheaderparser/types.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cxxheaderparser/types.py b/cxxheaderparser/types.py index 12c2712..9a9f031 100644 --- a/cxxheaderparser/types.py +++ b/cxxheaderparser/types.py @@ -305,14 +305,17 @@ def format_decl(self, name: str): class Operator: """An internal structure for parsing operator.""" + #: Possibly qualified name for operator. pqname: PQName - """Possibly qualified name for operator.""" + + #: Conversion operator have always `conversion` str in this attribute. operator_name: str - """Conversion operator have always `conversion` str in this attribute.""" + + #: Return type for this operator. ctype: Type - """Return type for this operator.""" + + #: Return type modifiers for this operator. cmods: "ParsedTypeModifiers" - """Return type modifiers for this operator.""" @dataclass From 96960816d10e4c1d158f2c664a06efd88fc4b673 Mon Sep 17 00:00:00 2001 From: ostr00000 <ostr00000@gmail.com> Date: Fri, 12 Jan 2024 22:44:00 +0100 Subject: [PATCH 4/5] do not process operator name in `_parse_pqname_name` --- cxxheaderparser/parser.py | 54 ++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/cxxheaderparser/parser.py b/cxxheaderparser/parser.py index f1cdef6..5e1d189 100644 --- a/cxxheaderparser/parser.py +++ b/cxxheaderparser/parser.py @@ -1572,25 +1572,22 @@ def _parse_pqname_name_operator(self) -> LexTokenList: def _parse_pqname_name( self, tok_value: str - ) -> typing.Tuple[NameSpecifier, typing.Optional[str]]: - name = "" - specialization = None - op = None - + ) -> typing.Tuple[NameSpecifier, LexTokenList]: # parse out operators as that's generally useful if tok_value == "operator": op_parts = self._parse_pqname_name_operator() - op = "".join(o.value for o in op_parts) - name = f"operator{op}" - + name = "operator" + "".join(o.value for o in op_parts) else: + op_parts = [] name = tok_value if self.lex.token_if("<"): # template specialization specialization = self._parse_template_specialization() + else: + specialization = None - return NameSpecifier(name, specialization), op + return NameSpecifier(name, specialization), op_parts def _parse_pqname( self, @@ -1599,7 +1596,7 @@ def _parse_pqname( fn_ok: bool = False, compound_ok: bool = False, fund_ok: bool = False, - ) -> typing.Tuple[PQName, typing.Optional[str]]: + ) -> typing.Tuple[PQName, LexTokenList]: """ Parses a possibly qualified function name or a type name, returns when unexpected item encountered (but does not consume it) @@ -1607,6 +1604,11 @@ def _parse_pqname( :param fn_ok: Operator functions ok :param compound_ok: Compound types ok :param fund_ok: Fundamental types ok + :return: 2-element tuple where the first element is a possibly qualified + function name, and the second element is list of Lex tokens that + are used as operator name. + If this list is empty, then no operator is detected. + qualified_id: ["::"] nested_name_specifier ["template"] unqualified_id | "::" IDENTIFIER @@ -1641,7 +1643,7 @@ def _parse_pqname( classkey = None segments: typing.List[PQNameSegment] = [] - op = None + op_parts: LexTokenList = [] has_typename = False if tok is None: @@ -1651,7 +1653,7 @@ def _parse_pqname( raise self._parse_error(tok) if tok.type == "auto": - return PQName([AutoSpecifier()]), None + return PQName([AutoSpecifier()]), [] _fundamentals = self._fundamentals @@ -1676,7 +1678,7 @@ def _parse_pqname( # Handle unnamed class/enum/struct self.anon_id += 1 segments.append(AnonymousName(self.anon_id)) - return PQName(segments, classkey), None + return PQName(segments, classkey), [] elif tok.type == "typename": has_typename = True tok = self.lex.token() @@ -1712,9 +1714,9 @@ def _parse_pqname( tok = self._next_token_must_be("NAME") tok_value = tok.value - name, op = self._parse_pqname_name(tok_value) + name, op_parts = self._parse_pqname_name(tok_value) segments.append(name) - if op: + if op_parts: if not fn_ok: # encountered unexpected operator raise self._parse_error(tok, "NAME") @@ -1733,10 +1735,10 @@ def _parse_pqname( self.debug_print( "parse_pqname: %s op=%s", pqname, - op, + op_parts, ) - return pqname, op + return pqname, op_parts # # Function parsing @@ -2349,14 +2351,16 @@ def _parse_type( po = self._parse_member_operator() return po, mods - pqname, op = self._parse_pqname( + pqname, op_parts = self._parse_pqname( tok, compound_ok=True, fn_ok=True, fund_ok=True ) - if op is not None: + if op_parts: # special case: conversion operator, but also a free operator mods = ParsedTypeModifiers(vars, both, meths) - po = self._parse_free_operator(pqname, op, mods, const, volatile) + po = self._parse_free_operator( + pqname, op_parts, mods, const, volatile + ) return po, mods elif tok_type in self._parse_type_ptr_ref_paren: @@ -2406,7 +2410,7 @@ def _parse_member_operator(self) -> Operator: def _parse_free_operator( self, pqname: PQName, - op: str, + op_parts: LexTokenList, mods: ParsedTypeModifiers, const: bool, volatile: bool, @@ -2417,7 +2421,9 @@ def _parse_free_operator( assert last_seg.name.startswith("operator") last_seg.name = "operator" - type_name = PQName([NameSpecifier(p) for p in op.split(PlyLexer.t_DBL_COLON)]) + type_name = PQName( + [NameSpecifier(op.value) for op in op_parts if op.type != "DBL_COLON"] + ) t = Type(type_name, const, volatile) return Operator(pqname, "conversion", t, mods) @@ -2510,7 +2516,9 @@ def _parse_decl( tok = self.lex.token_if_in_set(self._pqname_start_tokens) if tok: - pqname, op = self._parse_pqname(tok, fn_ok=True) + pqname, op_parts = self._parse_pqname(tok, fn_ok=True) + if op_parts: + op = "".join(o.value for o in op_parts) # TODO: "type fn(x);" is ambiguous here. Because this is a header # parser, we assume it's a function, not a variable declaration From 79f9c947042d91e8d35f9ea9437b16091120aee3 Mon Sep 17 00:00:00 2001 From: ostr00000 <ostr00000@gmail.com> Date: Fri, 12 Jan 2024 22:56:26 +0100 Subject: [PATCH 5/5] remove `Operator` class from `types` --- cxxheaderparser/parser.py | 23 +++++++++++++++++++---- cxxheaderparser/types.py | 20 -------------------- 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/cxxheaderparser/parser.py b/cxxheaderparser/parser.py index 5e1d189..e0b7a52 100644 --- a/cxxheaderparser/parser.py +++ b/cxxheaderparser/parser.py @@ -3,10 +3,11 @@ import inspect import re import typing +from dataclasses import dataclass from . import lexer from .errors import CxxParseError -from .lexer import LexToken, Location, PhonyEnding, PlyLexer +from .lexer import LexToken, Location, PhonyEnding from .options import ParserOptions from .parserstate import ( ClassBlockState, @@ -39,7 +40,6 @@ NameSpecifier, NamespaceAlias, NamespaceDecl, - Operator, PQNameSegment, Parameter, PQName, @@ -64,11 +64,26 @@ from .visitor import CxxVisitor, null_visitor LexTokenList = typing.List[LexToken] -T = typing.TypeVar("T") - PT = typing.TypeVar("PT", Parameter, TemplateNonTypeParam) +@dataclass +class Operator: + """An internal structure for parsing operator.""" + + #: Possibly qualified name for operator. + pqname: PQName + + #: Conversion operator have always `conversion` str in this attribute. + operator_name: str + + #: Return type for this operator. + ctype: Type + + #: Return type modifiers for this operator. + cmods: ParsedTypeModifiers + + class CxxParser: """ Single-use parser object diff --git a/cxxheaderparser/types.py b/cxxheaderparser/types.py index 9a9f031..1aa0b99 100644 --- a/cxxheaderparser/types.py +++ b/cxxheaderparser/types.py @@ -3,9 +3,6 @@ from .tokfmt import tokfmt, Token -if typing.TYPE_CHECKING: - from .parserstate import ParsedTypeModifiers - @dataclass class Value: @@ -301,23 +298,6 @@ def format_decl(self, name: str): return f"{c}{v}{self.typename.format()} {name}" -@dataclass -class Operator: - """An internal structure for parsing operator.""" - - #: Possibly qualified name for operator. - pqname: PQName - - #: Conversion operator have always `conversion` str in this attribute. - operator_name: str - - #: Return type for this operator. - ctype: Type - - #: Return type modifiers for this operator. - cmods: "ParsedTypeModifiers" - - @dataclass class Array: """