Fix comment parsing in message definitions

This commit is contained in:
Marko Durkovic 2022-07-28 19:14:39 +02:00
parent 17f4d54449
commit 1de7380138
4 changed files with 57 additions and 37 deletions

View File

@ -11,6 +11,7 @@ Grammar, parse tree visitor and conversion functions for message definitions in
from __future__ import annotations from __future__ import annotations
import re
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from .base import Nodetype, parse_message_definition from .base import Nodetype, parse_message_definition
@ -31,17 +32,12 @@ specification
= definition+ = definition+
definition definition
= comment = macro
/ macro
/ include / include
/ module_dcl ';' / module_dcl ';'
/ const_dcl ';' / const_dcl ';'
/ type_dcl ';' / type_dcl ';'
comment
= r'/\*.*?\*/'
/ r'[/][/][^\n]*'
macro macro
= ifndef = ifndef
/ define / define
@ -254,7 +250,10 @@ string_literal
class VisitorIDL(Visitor): # pylint: disable=too-many-public-methods class VisitorIDL(Visitor): # pylint: disable=too-many-public-methods
"""IDL file visitor.""" """IDL file visitor."""
RULES = parse_grammar(GRAMMAR_IDL) RULES = parse_grammar(
GRAMMAR_IDL,
re.compile(r'(\s|/[*]([^*]|[*](?!/))*[*]/|//[^\n]*$)+', re.M | re.S),
)
def __init__(self) -> None: def __init__(self) -> None:
"""Initialize.""" """Initialize."""
@ -299,9 +298,6 @@ class VisitorIDL(Visitor): # pylint: disable=too-many-public-methods
return {k: (consts[k], v) for k, v in structs.items()} return {k: (consts[k], v) for k, v in structs.items()}
# yapf: enable # yapf: enable
def visit_comment(self, _: str) -> None:
"""Process comment, suppress output."""
def visit_macro(self, _: Union[LiteralMatch, tuple[LiteralMatch, str]]) -> None: def visit_macro(self, _: Union[LiteralMatch, tuple[LiteralMatch, str]]) -> None:
"""Process macro, suppress output.""" """Process macro, suppress output."""

View File

@ -12,6 +12,7 @@ Rosbag1 connection information.
from __future__ import annotations from __future__ import annotations
import re
from hashlib import md5 from hashlib import md5
from pathlib import PurePosixPath as Path from pathlib import PurePosixPath as Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
@ -43,13 +44,9 @@ msgsep
= r'================================================================================' = r'================================================================================'
definition definition
= comment = const_dcl
/ const_dcl
/ field_dcl / field_dcl
comment
= r'#[^\n]*'
const_dcl const_dcl
= 'string' identifier '=' r'(?!={79}\n)[^\n]+' = 'string' identifier '=' r'(?!={79}\n)[^\n]+'
/ type_spec identifier '=' float_literal / type_spec identifier '=' float_literal
@ -205,7 +202,7 @@ def denormalize_msgtype(typename: str) -> str:
class VisitorMSG(Visitor): class VisitorMSG(Visitor):
"""MSG file visitor.""" """MSG file visitor."""
RULES = parse_grammar(GRAMMAR_MSG) RULES = parse_grammar(GRAMMAR_MSG, re.compile(r'(\s|#[^\n]*$)+', re.M | re.S))
BASETYPES = { BASETYPES = {
'bool', 'bool',
@ -222,9 +219,6 @@ class VisitorMSG(Visitor):
'string', 'string',
} }
def visit_comment(self, _: str) -> None:
"""Process comment, suppress output."""
def visit_const_dcl( def visit_const_dcl(
self, self,
children: tuple[StringNode, StringNode, LiteralMatch, ConstValue], children: tuple[StringNode, StringNode, LiteralMatch, ConstValue],

View File

@ -24,12 +24,12 @@ class Rule:
"""Rule base class.""" """Rule base class."""
LIT = 'LITERAL' LIT = 'LITERAL'
WS = re.compile(r'\s+', re.M | re.S)
def __init__( def __init__(
self, self,
value: Union[str, Pattern[str], Rule, list[Rule]], value: Union[str, Pattern[str], Rule, list[Rule]],
rules: dict[str, Rule], rules: dict[str, Rule],
whitespace: Pattern[str],
name: Optional[str] = None, name: Optional[str] = None,
): ):
"""Initialize. """Initialize.
@ -37,16 +37,18 @@ class Rule:
Args: Args:
value: Value of this rule. value: Value of this rule.
rules: Grammar containing all rules. rules: Grammar containing all rules.
whitespace: Whitespace pattern.
name: Name of this rule. name: Name of this rule.
""" """
self.value = value self.value = value
self.rules = rules self.rules = rules
self.name = name self.name = name
self.whitespace = whitespace
def skip_ws(self, text: str, pos: int) -> int: def skip_ws(self, text: str, pos: int) -> int:
"""Skip whitespace.""" """Skip whitespace."""
match = self.WS.match(text, pos) match = self.whitespace.match(text, pos)
return match.span()[1] if match else pos return match.span()[1] if match else pos
def make_node(self, data: T) -> Union[T, dict[str, Union[str, T]]]: def make_node(self, data: T) -> Union[T, dict[str, Union[str, T]]]:
@ -61,16 +63,23 @@ class Rule:
class RuleLiteral(Rule): class RuleLiteral(Rule):
"""Rule to match string literal.""" """Rule to match string literal."""
def __init__(self, value: str, rules: dict[str, Rule], name: Optional[str] = None): def __init__(
self,
value: str,
rules: dict[str, Rule],
whitespace: Pattern[str],
name: Optional[str] = None,
):
"""Initialize. """Initialize.
Args: Args:
value: Value of this rule. value: Value of this rule.
rules: Grammar containing all rules. rules: Grammar containing all rules.
whitespace: Whitespace pattern.
name: Name of this rule. name: Name of this rule.
""" """
super().__init__(value, rules, name) super().__init__(value, rules, whitespace, name)
self.value = value[1:-1].replace('\\\'', '\'') self.value = value[1:-1].replace('\\\'', '\'')
def parse(self, text: str, pos: int) -> tuple[int, Any]: def parse(self, text: str, pos: int) -> tuple[int, Any]:
@ -89,16 +98,23 @@ class RuleRegex(Rule):
value: Pattern[str] value: Pattern[str]
def __init__(self, value: str, rules: dict[str, Rule], name: Optional[str] = None): def __init__(
self,
value: str,
rules: dict[str, Rule],
whitespace: Pattern[str],
name: Optional[str] = None,
):
"""Initialize. """Initialize.
Args: Args:
value: Value of this rule. value: Value of this rule.
rules: Grammar containing all rules. rules: Grammar containing all rules.
whitespace: Whitespace pattern.
name: Name of this rule. name: Name of this rule.
""" """
super().__init__(value, rules, name) super().__init__(value, rules, whitespace, name)
self.value = re.compile(value[2:-1], re.M | re.S) self.value = re.compile(value[2:-1], re.M | re.S)
def parse(self, text: str, pos: int) -> tuple[int, Any]: def parse(self, text: str, pos: int) -> tuple[int, Any]:
@ -234,7 +250,11 @@ def split_token(tok: str) -> list[str]:
return list(filter(None, re.split(r'(^\()|(\)(?=[*+?]?$))|([*+?]$)', tok))) return list(filter(None, re.split(r'(^\()|(\)(?=[*+?]?$))|([*+?]$)', tok)))
def collapse_tokens(toks: list[Optional[Rule]], rules: dict[str, Rule]) -> Rule: def collapse_tokens(
toks: list[Optional[Rule]],
rules: dict[str, Rule],
whitespace: Pattern[str],
) -> Rule:
"""Collapse linear list of tokens to oneof of sequences.""" """Collapse linear list of tokens to oneof of sequences."""
value: list[Rule] = [] value: list[Rule] = []
seq: list[Rule] = [] seq: list[Rule] = []
@ -242,13 +262,16 @@ def collapse_tokens(toks: list[Optional[Rule]], rules: dict[str, Rule]) -> Rule:
if tok: if tok:
seq.append(tok) seq.append(tok)
else: else:
value.append(RuleSequence(seq, rules) if len(seq) > 1 else seq[0]) value.append(RuleSequence(seq, rules, whitespace) if len(seq) > 1 else seq[0])
seq = [] seq = []
value.append(RuleSequence(seq, rules) if len(seq) > 1 else seq[0]) value.append(RuleSequence(seq, rules, whitespace) if len(seq) > 1 else seq[0])
return RuleOneof(value, rules) if len(value) > 1 else value[0] return RuleOneof(value, rules, whitespace) if len(value) > 1 else value[0]
def parse_grammar(grammar: str) -> dict[str, Rule]: def parse_grammar(
grammar: str,
whitespace: Pattern[str] = re.compile(r'\s+', re.M | re.S),
) -> dict[str, Rule]:
"""Parse grammar into rule dictionary.""" """Parse grammar into rule dictionary."""
rules: dict[str, Rule] = {} rules: dict[str, Rule] = {}
for token in grammar.split('\n\n'): for token in grammar.split('\n\n'):
@ -268,24 +291,24 @@ def parse_grammar(grammar: str) -> dict[str, Rule]:
'*': RuleZeroPlus, '*': RuleZeroPlus,
'+': RuleOnePlus, '+': RuleOnePlus,
'?': RuleZeroOne, '?': RuleZeroOne,
}[tok](stack[-1], rules) }[tok](stack[-1], rules, whitespace)
elif tok == '/': elif tok == '/':
stack.append(None) stack.append(None)
elif tok == '(': elif tok == '(':
parens.append(len(stack)) parens.append(len(stack))
elif tok == ')': elif tok == ')':
index = parens.pop() index = parens.pop()
rule = collapse_tokens(stack[index:], rules) rule = collapse_tokens(stack[index:], rules, whitespace)
stack = stack[:index] stack = stack[:index]
stack.append(rule) stack.append(rule)
elif len(tok) > 2 and tok[:2] == 'r\'': elif len(tok) > 2 and tok[:2] == 'r\'':
stack.append(RuleRegex(tok, rules)) stack.append(RuleRegex(tok, rules, whitespace))
elif tok[0] == '\'': elif tok[0] == '\'':
stack.append(RuleLiteral(tok, rules)) stack.append(RuleLiteral(tok, rules, whitespace))
else: else:
stack.append(RuleToken(tok, rules)) stack.append(RuleToken(tok, rules, whitespace))
res = collapse_tokens(stack, rules) res = collapse_tokens(stack, rules, whitespace)
res.name = name res.name = name
rules[name] = res rules[name] = res
return rules return rules

View File

@ -118,6 +118,12 @@ module test_msgs {
// comment in module // comment in module
typedef std_msgs::msg::Bool Bool; typedef std_msgs::msg::Bool Bool;
/**/ /***/ /* block comment */
/*
* block comment
*/
module msg { module msg {
// comment in submodule // comment in submodule
typedef Bool Balias; typedef Bool Balias;
@ -131,10 +137,11 @@ module test_msgs {
@comment(type="text", text="ignore") @comment(type="text", text="ignore")
struct Foo { struct Foo {
// comment in struct
std_msgs::msg::Header header; std_msgs::msg::Header header;
Balias bool; Balias bool;
Bar sibling; Bar sibling;
double x; double/* comment in member declaration */x;
sequence<double> seq1; sequence<double> seq1;
sequence<double, 4> seq2; sequence<double, 4> seq2;
d4 array; d4 array;