Fix comment parsing in message definitions

This commit is contained in:
Marko Durkovic 2022-07-28 19:14:39 +02:00
parent 17f4d54449
commit 1de7380138
4 changed files with 57 additions and 37 deletions

View File

@ -11,6 +11,7 @@ Grammar, parse tree visitor and conversion functions for message definitions in
from __future__ import annotations
import re
from typing import TYPE_CHECKING
from .base import Nodetype, parse_message_definition
@ -31,17 +32,12 @@ specification
= definition+
definition
= comment
/ macro
= macro
/ include
/ module_dcl ';'
/ const_dcl ';'
/ type_dcl ';'
comment
= r'/\*.*?\*/'
/ r'[/][/][^\n]*'
macro
= ifndef
/ define
@ -254,7 +250,10 @@ string_literal
class VisitorIDL(Visitor): # pylint: disable=too-many-public-methods
"""IDL file visitor."""
RULES = parse_grammar(GRAMMAR_IDL)
RULES = parse_grammar(
GRAMMAR_IDL,
re.compile(r'(\s|/[*]([^*]|[*](?!/))*[*]/|//[^\n]*$)+', re.M | re.S),
)
def __init__(self) -> None:
"""Initialize."""
@ -299,9 +298,6 @@ class VisitorIDL(Visitor): # pylint: disable=too-many-public-methods
return {k: (consts[k], v) for k, v in structs.items()}
# yapf: enable
def visit_comment(self, _: str) -> None:
"""Process comment, suppress output."""
def visit_macro(self, _: Union[LiteralMatch, tuple[LiteralMatch, str]]) -> None:
"""Process macro, suppress output."""

View File

@ -12,6 +12,7 @@ Rosbag1 connection information.
from __future__ import annotations
import re
from hashlib import md5
from pathlib import PurePosixPath as Path
from typing import TYPE_CHECKING
@ -43,13 +44,9 @@ msgsep
= r'================================================================================'
definition
= comment
/ const_dcl
= const_dcl
/ field_dcl
comment
= r'#[^\n]*'
const_dcl
= 'string' identifier '=' r'(?!={79}\n)[^\n]+'
/ type_spec identifier '=' float_literal
@ -205,7 +202,7 @@ def denormalize_msgtype(typename: str) -> str:
class VisitorMSG(Visitor):
"""MSG file visitor."""
RULES = parse_grammar(GRAMMAR_MSG)
RULES = parse_grammar(GRAMMAR_MSG, re.compile(r'(\s|#[^\n]*$)+', re.M | re.S))
BASETYPES = {
'bool',
@ -222,9 +219,6 @@ class VisitorMSG(Visitor):
'string',
}
def visit_comment(self, _: str) -> None:
"""Process comment, suppress output."""
def visit_const_dcl(
self,
children: tuple[StringNode, StringNode, LiteralMatch, ConstValue],

View File

@ -24,12 +24,12 @@ class Rule:
"""Rule base class."""
LIT = 'LITERAL'
WS = re.compile(r'\s+', re.M | re.S)
def __init__(
self,
value: Union[str, Pattern[str], Rule, list[Rule]],
rules: dict[str, Rule],
whitespace: Pattern[str],
name: Optional[str] = None,
):
"""Initialize.
@ -37,16 +37,18 @@ class Rule:
Args:
value: Value of this rule.
rules: Grammar containing all rules.
whitespace: Whitespace pattern.
name: Name of this rule.
"""
self.value = value
self.rules = rules
self.name = name
self.whitespace = whitespace
def skip_ws(self, text: str, pos: int) -> int:
"""Skip whitespace."""
match = self.WS.match(text, pos)
match = self.whitespace.match(text, pos)
return match.span()[1] if match else pos
def make_node(self, data: T) -> Union[T, dict[str, Union[str, T]]]:
@ -61,16 +63,23 @@ class Rule:
class RuleLiteral(Rule):
"""Rule to match string literal."""
def __init__(self, value: str, rules: dict[str, Rule], name: Optional[str] = None):
def __init__(
self,
value: str,
rules: dict[str, Rule],
whitespace: Pattern[str],
name: Optional[str] = None,
):
"""Initialize.
Args:
value: Value of this rule.
rules: Grammar containing all rules.
whitespace: Whitespace pattern.
name: Name of this rule.
"""
super().__init__(value, rules, name)
super().__init__(value, rules, whitespace, name)
self.value = value[1:-1].replace('\\\'', '\'')
def parse(self, text: str, pos: int) -> tuple[int, Any]:
@ -89,16 +98,23 @@ class RuleRegex(Rule):
value: Pattern[str]
def __init__(self, value: str, rules: dict[str, Rule], name: Optional[str] = None):
def __init__(
self,
value: str,
rules: dict[str, Rule],
whitespace: Pattern[str],
name: Optional[str] = None,
):
"""Initialize.
Args:
value: Value of this rule.
rules: Grammar containing all rules.
whitespace: Whitespace pattern.
name: Name of this rule.
"""
super().__init__(value, rules, name)
super().__init__(value, rules, whitespace, name)
self.value = re.compile(value[2:-1], re.M | re.S)
def parse(self, text: str, pos: int) -> tuple[int, Any]:
@ -234,7 +250,11 @@ def split_token(tok: str) -> list[str]:
return list(filter(None, re.split(r'(^\()|(\)(?=[*+?]?$))|([*+?]$)', tok)))
def collapse_tokens(toks: list[Optional[Rule]], rules: dict[str, Rule]) -> Rule:
def collapse_tokens(
toks: list[Optional[Rule]],
rules: dict[str, Rule],
whitespace: Pattern[str],
) -> Rule:
"""Collapse linear list of tokens to oneof of sequences."""
value: list[Rule] = []
seq: list[Rule] = []
@ -242,13 +262,16 @@ def collapse_tokens(toks: list[Optional[Rule]], rules: dict[str, Rule]) -> Rule:
if tok:
seq.append(tok)
else:
value.append(RuleSequence(seq, rules) if len(seq) > 1 else seq[0])
value.append(RuleSequence(seq, rules, whitespace) if len(seq) > 1 else seq[0])
seq = []
value.append(RuleSequence(seq, rules) if len(seq) > 1 else seq[0])
return RuleOneof(value, rules) if len(value) > 1 else value[0]
value.append(RuleSequence(seq, rules, whitespace) if len(seq) > 1 else seq[0])
return RuleOneof(value, rules, whitespace) if len(value) > 1 else value[0]
def parse_grammar(grammar: str) -> dict[str, Rule]:
def parse_grammar(
grammar: str,
whitespace: Pattern[str] = re.compile(r'\s+', re.M | re.S),
) -> dict[str, Rule]:
"""Parse grammar into rule dictionary."""
rules: dict[str, Rule] = {}
for token in grammar.split('\n\n'):
@ -268,24 +291,24 @@ def parse_grammar(grammar: str) -> dict[str, Rule]:
'*': RuleZeroPlus,
'+': RuleOnePlus,
'?': RuleZeroOne,
}[tok](stack[-1], rules)
}[tok](stack[-1], rules, whitespace)
elif tok == '/':
stack.append(None)
elif tok == '(':
parens.append(len(stack))
elif tok == ')':
index = parens.pop()
rule = collapse_tokens(stack[index:], rules)
rule = collapse_tokens(stack[index:], rules, whitespace)
stack = stack[:index]
stack.append(rule)
elif len(tok) > 2 and tok[:2] == 'r\'':
stack.append(RuleRegex(tok, rules))
stack.append(RuleRegex(tok, rules, whitespace))
elif tok[0] == '\'':
stack.append(RuleLiteral(tok, rules))
stack.append(RuleLiteral(tok, rules, whitespace))
else:
stack.append(RuleToken(tok, rules))
stack.append(RuleToken(tok, rules, whitespace))
res = collapse_tokens(stack, rules)
res = collapse_tokens(stack, rules, whitespace)
res.name = name
rules[name] = res
return rules

View File

@ -118,6 +118,12 @@ module test_msgs {
// comment in module
typedef std_msgs::msg::Bool Bool;
/**/ /***/ /* block comment */
/*
* block comment
*/
module msg {
// comment in submodule
typedef Bool Balias;
@ -131,10 +137,11 @@ module test_msgs {
@comment(type="text", text="ignore")
struct Foo {
// comment in struct
std_msgs::msg::Header header;
Balias bool;
Bar sibling;
double x;
double/* comment in member declaration */x;
sequence<double> seq1;
sequence<double, 4> seq2;
d4 array;