Fix comment parsing in message definitions

2022-07-28 19:14:39 +02:00 · 2022-07-28 19:14:39 +02:00 · 1de7380138
commit 1de7380138
parent 17f4d54449
4 changed files with 57 additions and 37 deletions
--- a/src/rosbags/typesys/idl.py
+++ b/src/rosbags/typesys/idl.py
@ -11,6 +11,7 @@ Grammar, parse tree visitor and conversion functions for message definitions in

 from __future__ import annotations

+import re
 from typing import TYPE_CHECKING

 from .base import Nodetype, parse_message_definition
@ -31,17 +32,12 @@ specification
  = definition+

 definition
-  = comment
-  / macro
+  = macro
  / include
  / module_dcl ';'
  / const_dcl ';'
  / type_dcl ';'

-comment
-  = r'/\*.*?\*/'
-  / r'[/][/][^\n]*'
-
 macro
  = ifndef
  / define
@ -254,7 +250,10 @@ string_literal
 class VisitorIDL(Visitor):  # pylint: disable=too-many-public-methods
    """IDL file visitor."""

-    RULES = parse_grammar(GRAMMAR_IDL)
+    RULES = parse_grammar(
+        GRAMMAR_IDL,
+        re.compile(r'(\s|/[*]([^*]|[*](?!/))*[*]/|//[^\n]*$)+', re.M | re.S),
+    )

    def __init__(self) -> None:
        """Initialize."""
@ -299,9 +298,6 @@ class VisitorIDL(Visitor):  # pylint: disable=too-many-public-methods
        return {k: (consts[k], v) for k, v in structs.items()}
    # yapf: enable

-    def visit_comment(self, _: str) -> None:
-        """Process comment, suppress output."""
-
    def visit_macro(self, _: Union[LiteralMatch, tuple[LiteralMatch, str]]) -> None:
        """Process macro, suppress output."""

--- a/src/rosbags/typesys/msg.py
+++ b/src/rosbags/typesys/msg.py
@ -12,6 +12,7 @@ Rosbag1 connection information.

 from __future__ import annotations

+import re
 from hashlib import md5
 from pathlib import PurePosixPath as Path
 from typing import TYPE_CHECKING
@ -43,13 +44,9 @@ msgsep
  = r'================================================================================'

 definition
-  = comment
-  / const_dcl
+  = const_dcl
  / field_dcl

-comment
-  = r'#[^\n]*'
-
 const_dcl
  = 'string' identifier '=' r'(?!={79}\n)[^\n]+'
  / type_spec identifier '=' float_literal
@ -205,7 +202,7 @@ def denormalize_msgtype(typename: str) -> str:
 class VisitorMSG(Visitor):
    """MSG file visitor."""

-    RULES = parse_grammar(GRAMMAR_MSG)
+    RULES = parse_grammar(GRAMMAR_MSG, re.compile(r'(\s|#[^\n]*$)+', re.M | re.S))

    BASETYPES = {
        'bool',
@ -222,9 +219,6 @@ class VisitorMSG(Visitor):
        'string',
    }

-    def visit_comment(self, _: str) -> None:
-        """Process comment, suppress output."""
-
    def visit_const_dcl(
        self,
        children: tuple[StringNode, StringNode, LiteralMatch, ConstValue],
--- a/src/rosbags/typesys/peg.py
+++ b/src/rosbags/typesys/peg.py
@ -24,12 +24,12 @@ class Rule:
    """Rule base class."""

    LIT = 'LITERAL'
-    WS = re.compile(r'\s+', re.M | re.S)

    def __init__(
        self,
        value: Union[str, Pattern[str], Rule, list[Rule]],
        rules: dict[str, Rule],
+        whitespace: Pattern[str],
        name: Optional[str] = None,
    ):
        """Initialize.
@ -37,16 +37,18 @@ class Rule:
        Args:
            value: Value of this rule.
            rules: Grammar containing all rules.
+            whitespace: Whitespace pattern.
            name: Name of this rule.

        """
        self.value = value
        self.rules = rules
        self.name = name
+        self.whitespace = whitespace

    def skip_ws(self, text: str, pos: int) -> int:
        """Skip whitespace."""
-        match = self.WS.match(text, pos)
+        match = self.whitespace.match(text, pos)
        return match.span()[1] if match else pos

    def make_node(self, data: T) -> Union[T, dict[str, Union[str, T]]]:
@ -61,16 +63,23 @@ class Rule:
 class RuleLiteral(Rule):
    """Rule to match string literal."""

-    def __init__(self, value: str, rules: dict[str, Rule], name: Optional[str] = None):
+    def __init__(
+        self,
+        value: str,
+        rules: dict[str, Rule],
+        whitespace: Pattern[str],
+        name: Optional[str] = None,
+    ):
        """Initialize.

        Args:
            value: Value of this rule.
            rules: Grammar containing all rules.
+            whitespace: Whitespace pattern.
            name: Name of this rule.

        """
-        super().__init__(value, rules, name)
+        super().__init__(value, rules, whitespace, name)
        self.value = value[1:-1].replace('\\\'', '\'')

    def parse(self, text: str, pos: int) -> tuple[int, Any]:
@ -89,16 +98,23 @@ class RuleRegex(Rule):

    value: Pattern[str]

-    def __init__(self, value: str, rules: dict[str, Rule], name: Optional[str] = None):
+    def __init__(
+        self,
+        value: str,
+        rules: dict[str, Rule],
+        whitespace: Pattern[str],
+        name: Optional[str] = None,
+    ):
        """Initialize.

        Args:
            value: Value of this rule.
            rules: Grammar containing all rules.
+            whitespace: Whitespace pattern.
            name: Name of this rule.

        """
-        super().__init__(value, rules, name)
+        super().__init__(value, rules, whitespace, name)
        self.value = re.compile(value[2:-1], re.M | re.S)

    def parse(self, text: str, pos: int) -> tuple[int, Any]:
@ -234,7 +250,11 @@ def split_token(tok: str) -> list[str]:
    return list(filter(None, re.split(r'(^\()|(\)(?=[*+?]?$))|([*+?]$)', tok)))


-def collapse_tokens(toks: list[Optional[Rule]], rules: dict[str, Rule]) -> Rule:
+def collapse_tokens(
+    toks: list[Optional[Rule]],
+    rules: dict[str, Rule],
+    whitespace: Pattern[str],
+) -> Rule:
    """Collapse linear list of tokens to oneof of sequences."""
    value: list[Rule] = []
    seq: list[Rule] = []
@ -242,13 +262,16 @@ def collapse_tokens(toks: list[Optional[Rule]], rules: dict[str, Rule]) -> Rule:
        if tok:
            seq.append(tok)
        else:
-            value.append(RuleSequence(seq, rules) if len(seq) > 1 else seq[0])
+            value.append(RuleSequence(seq, rules, whitespace) if len(seq) > 1 else seq[0])
            seq = []
-    value.append(RuleSequence(seq, rules) if len(seq) > 1 else seq[0])
-    return RuleOneof(value, rules) if len(value) > 1 else value[0]
+    value.append(RuleSequence(seq, rules, whitespace) if len(seq) > 1 else seq[0])
+    return RuleOneof(value, rules, whitespace) if len(value) > 1 else value[0]


-def parse_grammar(grammar: str) -> dict[str, Rule]:
+def parse_grammar(
+    grammar: str,
+    whitespace: Pattern[str] = re.compile(r'\s+', re.M | re.S),
+) -> dict[str, Rule]:
    """Parse grammar into rule dictionary."""
    rules: dict[str, Rule] = {}
    for token in grammar.split('\n\n'):
@ -268,24 +291,24 @@ def parse_grammar(grammar: str) -> dict[str, Rule]:
                    '*': RuleZeroPlus,
                    '+': RuleOnePlus,
                    '?': RuleZeroOne,
-                }[tok](stack[-1], rules)
+                }[tok](stack[-1], rules, whitespace)
            elif tok == '/':
                stack.append(None)
            elif tok == '(':
                parens.append(len(stack))
            elif tok == ')':
                index = parens.pop()
-                rule = collapse_tokens(stack[index:], rules)
+                rule = collapse_tokens(stack[index:], rules, whitespace)
                stack = stack[:index]
                stack.append(rule)
            elif len(tok) > 2 and tok[:2] == 'r\'':
-                stack.append(RuleRegex(tok, rules))
+                stack.append(RuleRegex(tok, rules, whitespace))
            elif tok[0] == '\'':
-                stack.append(RuleLiteral(tok, rules))
+                stack.append(RuleLiteral(tok, rules, whitespace))
            else:
-                stack.append(RuleToken(tok, rules))
+                stack.append(RuleToken(tok, rules, whitespace))

-        res = collapse_tokens(stack, rules)
+        res = collapse_tokens(stack, rules, whitespace)
        res.name = name
        rules[name] = res
    return rules
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@ -118,6 +118,12 @@ module test_msgs {
  // comment in module
  typedef std_msgs::msg::Bool Bool;

+  /**/ /***/ /* block comment */
+
+  /*
+   * block comment
+   */
+
  module msg {
    // comment in submodule
    typedef Bool Balias;
@ -131,10 +137,11 @@ module test_msgs {

    @comment(type="text", text="ignore")
    struct Foo {
+        // comment in struct
        std_msgs::msg::Header header;
        Balias bool;
        Bar sibling;
-        double x;
+        double/* comment in member declaration */x;
        sequence<double> seq1;
        sequence<double, 4> seq2;
        d4 array;