From 1de738013890245b10db12770daf1c3398e8030d Mon Sep 17 00:00:00 2001 From: Marko Durkovic Date: Thu, 28 Jul 2022 19:14:39 +0200 Subject: [PATCH] Fix comment parsing in message definitions --- src/rosbags/typesys/idl.py | 16 ++++------- src/rosbags/typesys/msg.py | 12 ++------ src/rosbags/typesys/peg.py | 57 ++++++++++++++++++++++++++------------ tests/test_parse.py | 9 +++++- 4 files changed, 57 insertions(+), 37 deletions(-) diff --git a/src/rosbags/typesys/idl.py b/src/rosbags/typesys/idl.py index 84522269..cd8d3b47 100644 --- a/src/rosbags/typesys/idl.py +++ b/src/rosbags/typesys/idl.py @@ -11,6 +11,7 @@ Grammar, parse tree visitor and conversion functions for message definitions in from __future__ import annotations +import re from typing import TYPE_CHECKING from .base import Nodetype, parse_message_definition @@ -31,17 +32,12 @@ specification = definition+ definition - = comment - / macro + = macro / include / module_dcl ';' / const_dcl ';' / type_dcl ';' -comment - = r'/\*.*?\*/' - / r'[/][/][^\n]*' - macro = ifndef / define @@ -254,7 +250,10 @@ string_literal class VisitorIDL(Visitor): # pylint: disable=too-many-public-methods """IDL file visitor.""" - RULES = parse_grammar(GRAMMAR_IDL) + RULES = parse_grammar( + GRAMMAR_IDL, + re.compile(r'(\s|/[*]([^*]|[*](?!/))*[*]/|//[^\n]*$)+', re.M | re.S), + ) def __init__(self) -> None: """Initialize.""" @@ -299,9 +298,6 @@ class VisitorIDL(Visitor): # pylint: disable=too-many-public-methods return {k: (consts[k], v) for k, v in structs.items()} # yapf: enable - def visit_comment(self, _: str) -> None: - """Process comment, suppress output.""" - def visit_macro(self, _: Union[LiteralMatch, tuple[LiteralMatch, str]]) -> None: """Process macro, suppress output.""" diff --git a/src/rosbags/typesys/msg.py b/src/rosbags/typesys/msg.py index 0ba942b4..61245dec 100644 --- a/src/rosbags/typesys/msg.py +++ b/src/rosbags/typesys/msg.py @@ -12,6 +12,7 @@ Rosbag1 connection information. from __future__ import annotations +import re from hashlib import md5 from pathlib import PurePosixPath as Path from typing import TYPE_CHECKING @@ -43,13 +44,9 @@ msgsep = r'================================================================================' definition - = comment - / const_dcl + = const_dcl / field_dcl -comment - = r'#[^\n]*' - const_dcl = 'string' identifier '=' r'(?!={79}\n)[^\n]+' / type_spec identifier '=' float_literal @@ -205,7 +202,7 @@ def denormalize_msgtype(typename: str) -> str: class VisitorMSG(Visitor): """MSG file visitor.""" - RULES = parse_grammar(GRAMMAR_MSG) + RULES = parse_grammar(GRAMMAR_MSG, re.compile(r'(\s|#[^\n]*$)+', re.M | re.S)) BASETYPES = { 'bool', @@ -222,9 +219,6 @@ class VisitorMSG(Visitor): 'string', } - def visit_comment(self, _: str) -> None: - """Process comment, suppress output.""" - def visit_const_dcl( self, children: tuple[StringNode, StringNode, LiteralMatch, ConstValue], diff --git a/src/rosbags/typesys/peg.py b/src/rosbags/typesys/peg.py index bb2b9dba..1c296ea9 100644 --- a/src/rosbags/typesys/peg.py +++ b/src/rosbags/typesys/peg.py @@ -24,12 +24,12 @@ class Rule: """Rule base class.""" LIT = 'LITERAL' - WS = re.compile(r'\s+', re.M | re.S) def __init__( self, value: Union[str, Pattern[str], Rule, list[Rule]], rules: dict[str, Rule], + whitespace: Pattern[str], name: Optional[str] = None, ): """Initialize. @@ -37,16 +37,18 @@ class Rule: Args: value: Value of this rule. rules: Grammar containing all rules. + whitespace: Whitespace pattern. name: Name of this rule. """ self.value = value self.rules = rules self.name = name + self.whitespace = whitespace def skip_ws(self, text: str, pos: int) -> int: """Skip whitespace.""" - match = self.WS.match(text, pos) + match = self.whitespace.match(text, pos) return match.span()[1] if match else pos def make_node(self, data: T) -> Union[T, dict[str, Union[str, T]]]: @@ -61,16 +63,23 @@ class Rule: class RuleLiteral(Rule): """Rule to match string literal.""" - def __init__(self, value: str, rules: dict[str, Rule], name: Optional[str] = None): + def __init__( + self, + value: str, + rules: dict[str, Rule], + whitespace: Pattern[str], + name: Optional[str] = None, + ): """Initialize. Args: value: Value of this rule. rules: Grammar containing all rules. + whitespace: Whitespace pattern. name: Name of this rule. """ - super().__init__(value, rules, name) + super().__init__(value, rules, whitespace, name) self.value = value[1:-1].replace('\\\'', '\'') def parse(self, text: str, pos: int) -> tuple[int, Any]: @@ -89,16 +98,23 @@ class RuleRegex(Rule): value: Pattern[str] - def __init__(self, value: str, rules: dict[str, Rule], name: Optional[str] = None): + def __init__( + self, + value: str, + rules: dict[str, Rule], + whitespace: Pattern[str], + name: Optional[str] = None, + ): """Initialize. Args: value: Value of this rule. rules: Grammar containing all rules. + whitespace: Whitespace pattern. name: Name of this rule. """ - super().__init__(value, rules, name) + super().__init__(value, rules, whitespace, name) self.value = re.compile(value[2:-1], re.M | re.S) def parse(self, text: str, pos: int) -> tuple[int, Any]: @@ -234,7 +250,11 @@ def split_token(tok: str) -> list[str]: return list(filter(None, re.split(r'(^\()|(\)(?=[*+?]?$))|([*+?]$)', tok))) -def collapse_tokens(toks: list[Optional[Rule]], rules: dict[str, Rule]) -> Rule: +def collapse_tokens( + toks: list[Optional[Rule]], + rules: dict[str, Rule], + whitespace: Pattern[str], +) -> Rule: """Collapse linear list of tokens to oneof of sequences.""" value: list[Rule] = [] seq: list[Rule] = [] @@ -242,13 +262,16 @@ def collapse_tokens(toks: list[Optional[Rule]], rules: dict[str, Rule]) -> Rule: if tok: seq.append(tok) else: - value.append(RuleSequence(seq, rules) if len(seq) > 1 else seq[0]) + value.append(RuleSequence(seq, rules, whitespace) if len(seq) > 1 else seq[0]) seq = [] - value.append(RuleSequence(seq, rules) if len(seq) > 1 else seq[0]) - return RuleOneof(value, rules) if len(value) > 1 else value[0] + value.append(RuleSequence(seq, rules, whitespace) if len(seq) > 1 else seq[0]) + return RuleOneof(value, rules, whitespace) if len(value) > 1 else value[0] -def parse_grammar(grammar: str) -> dict[str, Rule]: +def parse_grammar( + grammar: str, + whitespace: Pattern[str] = re.compile(r'\s+', re.M | re.S), +) -> dict[str, Rule]: """Parse grammar into rule dictionary.""" rules: dict[str, Rule] = {} for token in grammar.split('\n\n'): @@ -268,24 +291,24 @@ def parse_grammar(grammar: str) -> dict[str, Rule]: '*': RuleZeroPlus, '+': RuleOnePlus, '?': RuleZeroOne, - }[tok](stack[-1], rules) + }[tok](stack[-1], rules, whitespace) elif tok == '/': stack.append(None) elif tok == '(': parens.append(len(stack)) elif tok == ')': index = parens.pop() - rule = collapse_tokens(stack[index:], rules) + rule = collapse_tokens(stack[index:], rules, whitespace) stack = stack[:index] stack.append(rule) elif len(tok) > 2 and tok[:2] == 'r\'': - stack.append(RuleRegex(tok, rules)) + stack.append(RuleRegex(tok, rules, whitespace)) elif tok[0] == '\'': - stack.append(RuleLiteral(tok, rules)) + stack.append(RuleLiteral(tok, rules, whitespace)) else: - stack.append(RuleToken(tok, rules)) + stack.append(RuleToken(tok, rules, whitespace)) - res = collapse_tokens(stack, rules) + res = collapse_tokens(stack, rules, whitespace) res.name = name rules[name] = res return rules diff --git a/tests/test_parse.py b/tests/test_parse.py index 662aecfd..129f16d2 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -118,6 +118,12 @@ module test_msgs { // comment in module typedef std_msgs::msg::Bool Bool; + /**/ /***/ /* block comment */ + + /* + * block comment + */ + module msg { // comment in submodule typedef Bool Balias; @@ -131,10 +137,11 @@ module test_msgs { @comment(type="text", text="ignore") struct Foo { + // comment in struct std_msgs::msg::Header header; Balias bool; Bar sibling; - double x; + double/* comment in member declaration */x; sequence seq1; sequence seq2; d4 array;