QAPISchemaParser is a conventional recursive descent parser. Except
QAPISchemaParser.get_doc() delegates most of the doc comment parsing
work to a state machine in QAPIDoc. The state machine doesn't get
tokens like a recursive descent parser, it is fed tokens.
I find this state machine rather opaque and hard to maintain.
Replace it by a conventional parser, all in QAPISchemaParser. Less
code, and (at least in my opinion) easier to understand.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
scripts/qapi/parser.py | 478 ++++++++++++++++++-----------------------
1 file changed, 210 insertions(+), 268 deletions(-)
diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 48cc9a6367..73ff150430 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -134,8 +134,8 @@ def _parse(self) -> None:
info = self.info
if self.tok == '#':
self.reject_expr_doc(cur_doc)
- for cur_doc in self.get_doc(info):
- self.docs.append(cur_doc)
+ cur_doc = self.get_doc()
+ self.docs.append(cur_doc)
continue
expr = self.get_expr()
@@ -414,39 +414,171 @@ def get_expr(self) -> _ExprValue:
self, "expected '{', '[', string, or boolean")
return expr
- def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
+ def get_doc_line(self) -> Optional[str]:
+ if self.tok != '#':
+ raise QAPIParseError(
+ self, "documentation comment must end with '##'")
+ assert isinstance(self.val, str)
+ if self.val.startswith('##'):
+ # End of doc comment
+ if self.val != '##':
+ raise QAPIParseError(
+ self, "junk after '##' at end of documentation comment")
+ return None
+ if self.val == '#':
+ return ''
+ if self.val[1] != ' ':
+ raise QAPIParseError(self, "missing space after #")
+ return self.val[2:].rstrip()
+
+ @staticmethod
+ def _match_at_name_colon(string: str) -> Optional[Match[str]]:
+ return re.match(r'@([^:]*): *', string)
+
+ def get_doc_indented(self, doc: 'QAPIDoc') -> Optional[str]:
+ self.accept(False)
+ line = self.get_doc_line()
+ while line == '':
+ doc.append_line(line)
+ self.accept(False)
+ line = self.get_doc_line()
+ if line is None:
+ return line
+ indent = must_match(r'\s*', line).end()
+ if not indent:
+ return line
+ doc.append_line(line[indent:])
+ prev_line_blank = False
+ while True:
+ self.accept(False)
+ line = self.get_doc_line()
+ if line is None:
+ return line
+ if self._match_at_name_colon(line):
+ return line
+ cur_indent = must_match(r'\s*', line).end()
+ if line != '' and cur_indent < indent:
+ if prev_line_blank:
+ return line
+ raise QAPIParseError(
+ self,
+ "unexpected de-indent (expected at least %d spaces)" %
+ indent)
+ doc.append_line(line[indent:])
+ prev_line_blank = True
+
+ def get_doc_paragraph(self, doc: 'QAPIDoc') -> Optional[str]:
+ while True:
+ self.accept(False)
+ line = self.get_doc_line()
+ if line is None:
+ return line
+ if line == '':
+ return line
+ doc.append_line(line)
+
+ def get_doc(self) -> 'QAPIDoc':
if self.val != '##':
raise QAPIParseError(
self, "junk after '##' at start of documentation comment")
-
- docs = []
- cur_doc = QAPIDoc(self, info)
+ info = self.info
self.accept(False)
- while self.tok == '#':
- assert isinstance(self.val, str)
- if self.val.startswith('##'):
- # End of doc comment
- if self.val != '##':
- raise QAPIParseError(
- self,
- "junk after '##' at end of documentation comment")
- cur_doc.end_comment()
- docs.append(cur_doc)
- self.accept()
- return docs
- if self.val.startswith('# ='):
- if cur_doc.symbol:
+ line = self.get_doc_line()
+ if line is not None and line.startswith('@'):
+ # Definition documentation
+ if not line.endswith(':'):
+ raise QAPIParseError(self, "line should end with ':'")
+ # Invalid names are not checked here, but the name
+ # provided *must* match the following definition,
+ # which *is* validated in expr.py.
+ symbol = line[1:-1]
+ if not symbol:
+ raise QAPIParseError(self, "name required after '@'")
+ doc = QAPIDoc(self, info, symbol)
+ self.accept(False)
+ line = self.get_doc_line()
+ no_more_args = False
+
+ while line is not None:
+ # Blank lines
+ while line == '':
+ self.accept(False)
+ line = self.get_doc_line()
+ if line is None:
+ break
+ # Non-blank line, first of a section
+ if line == 'Features:' and not doc.features:
+ self.accept(False)
+ line = self.get_doc_line()
+ while line == '':
+ self.accept(False)
+ line = self.get_doc_line()
+ while (line is not None
+ and (match := self._match_at_name_colon(line))):
+ doc.new_feature(match.group(1))
+ text = line[match.end():]
+ if text:
+ doc.append_line(text)
+ line = self.get_doc_indented(doc)
+ no_more_args = True
+ elif match := self._match_at_name_colon(line):
+ # description
+ if no_more_args:
+ raise QAPIParseError(
+ self,
+ "description of '@%s:' follows a section"
+ % match.group(1))
+ while (line is not None
+ and (match := self._match_at_name_colon(line))):
+ doc.new_argument(match.group(1))
+ text = line[match.end():]
+ if text:
+ doc.append_line(text)
+ line = self.get_doc_indented(doc)
+ no_more_args = True
+ elif match := re.match(
+ r'(Returns|Since|Notes?|Examples?|TODO): *',
+ line):
+ # tagged section
+ doc.new_tagged_section(match.group(1))
+ text = line[match.end():]
+ if text:
+ doc.append_line(text)
+ line = self.get_doc_indented(doc)
+ no_more_args = True
+ elif line.startswith('='):
raise QAPIParseError(
self,
"unexpected '=' markup in definition documentation")
- if cur_doc.body.text:
+ else:
+ # tag-less paragraph
+ doc.ensure_untagged_section()
+ doc.append_line(line)
+ line = self.get_doc_paragraph(doc)
+ else:
+ # Free-form documentation
+ doc = QAPIDoc(self, info)
+ doc.ensure_untagged_section()
+ first = True
+ while line is not None:
+ if match := self._match_at_name_colon(line):
raise QAPIParseError(
self,
- "'=' heading must come first in a comment block")
- cur_doc.append(self.val)
- self.accept(False)
+ "'@%s:' not allowed in free-form documentation"
+ % match.group(1))
+ if line.startswith('='):
+ if not first:
+ raise QAPIParseError(
+ self,
+ "'=' heading must come first in a comment block")
+ doc.append_line(line)
+ self.accept(False)
+ line = self.get_doc_line()
+ first = False
- raise QAPIParseError(self, "documentation comment must end with '##'")
+ self.accept(False)
+ doc.end()
+ return doc
class QAPIDoc:
@@ -469,7 +601,6 @@ class QAPIDoc:
"""
class Section:
- # pylint: disable=too-few-public-methods
def __init__(self, parser: QAPISchemaParser,
tag: Optional[str] = None):
# section source info, i.e. where it begins
@@ -480,29 +611,8 @@ def __init__(self, parser: QAPISchemaParser,
self.tag = tag
# section text without tag
self.text = ''
- # indentation to strip (None means indeterminate)
- self._indent = None if self.tag else 0
-
- def append(self, line: str) -> None:
- line = line.rstrip()
-
- if line:
- indent = must_match(r'\s*', line).end()
- if self._indent is None:
- # indeterminate indentation
- if self.text != '':
- # non-blank, non-first line determines indentation
- if indent == 0:
- raise QAPIParseError(
- self._parser, "line needs to be indented")
- self._indent = indent
- elif indent < self._indent:
- raise QAPIParseError(
- self._parser,
- "unexpected de-indent (expected at least %d spaces)" %
- self._indent)
- line = line[self._indent:]
+ def append_line(self, line: str) -> None:
self.text += line + '\n'
class ArgSection(Section):
@@ -514,243 +624,75 @@ def __init__(self, parser: QAPISchemaParser,
def connect(self, member: 'QAPISchemaMember') -> None:
self.member = member
- class NullSection(Section):
- """
- Immutable dummy section for use at the end of a doc block.
- """
- # pylint: disable=too-few-public-methods
- def append(self, line: str) -> None:
- assert False, "Text appended after end_comment() called."
-
- def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo):
+ def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo,
+ symbol: Optional[str] = None):
# self._parser is used to report errors with QAPIParseError. The
# resulting error position depends on the state of the parser.
# It happens to be the beginning of the comment. More or less
# servicable, but action at a distance.
self._parser = parser
+ # info points to the doc comment block's first line
self.info = info
- self.symbol: Optional[str] = None
- self.body = QAPIDoc.Section(parser)
- # dicts mapping parameter/feature names to their ArgSection
- self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
- self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
+ # definition doc's symbol, None for free-form doc
+ self.symbol: Optional[str] = symbol
+ # the sections in textual order
+ self.all_sections: List[QAPIDoc.Section] = [QAPIDoc.Section(parser)]
+ # the body section
+ self.body: Optional[QAPIDoc.Section] = self.all_sections[0]
+ # dicts mapping parameter/feature names to their description
+ self.args: Dict[str, QAPIDoc.ArgSection] = {}
+ self.features: Dict[str, QAPIDoc.ArgSection] = {}
+ # sections other than .body, .args, .features
self.sections: List[QAPIDoc.Section] = []
- # the current section
- self._section = self.body
- self._append_line = self._append_body_line
- self._first_line_in_paragraph = False
- def has_section(self, tag: str) -> bool:
- """Return True if we have a section with this tag."""
- for i in self.sections:
- if i.tag == tag:
- return True
- return False
+ def end(self) -> None:
+ for section in self.all_sections:
+ section.text = section.text.strip('\n')
+ if section.tag is not None and section.text == '':
+ raise QAPISemError(
+ section.info, "text required after '%s:'" % section.tag)
- def append(self, line: str) -> None:
- """
- Parse a comment line and add it to the documentation.
-
- The way that the line is dealt with depends on which part of
- the documentation we're parsing right now:
- * The body section: ._append_line is ._append_body_line
- * An argument section: ._append_line is ._append_args_line
- * A features section: ._append_line is ._append_features_line
- * An additional section: ._append_line is ._append_various_line
- """
- line = line[1:]
- if not line:
- self._append_freeform(line)
- self._first_line_in_paragraph = True
+ def ensure_untagged_section(self) -> None:
+ if self.all_sections and not self.all_sections[-1].tag:
+ # extend current section
+ self.all_sections[-1].text += '\n'
return
+ # start new section
+ section = self.Section(self._parser)
+ self.sections.append(section)
+ self.all_sections.append(section)
- if line[0] != ' ':
- raise QAPIParseError(self._parser, "missing space after #")
- line = line[1:]
- self._append_line(line)
- self._first_line_in_paragraph = False
+ def new_tagged_section(self, tag: str) -> None:
+ if tag in ('Returns', 'Since'):
+ for section in self.all_sections:
+ if isinstance(section, self.ArgSection):
+ continue
+ if section.tag == tag:
+ raise QAPIParseError(
+ self._parser, "duplicated '%s' section" % tag)
+ section = self.Section(self._parser, tag)
+ self.sections.append(section)
+ self.all_sections.append(section)
- def end_comment(self) -> None:
- self._switch_section(QAPIDoc.NullSection(self._parser))
-
- @staticmethod
- def _match_at_name_colon(string: str) -> Optional[Match[str]]:
- return re.match(r'@([^:]*): *', string)
-
- def _match_section_tag(self, string: str) -> Optional[Match[str]]:
- if not self._first_line_in_paragraph:
- return None
- return re.match(r'(Returns|Since|Notes?|Examples?|TODO): *',
- string)
-
- def _append_body_line(self, line: str) -> None:
- """
- Process a line of documentation text in the body section.
-
- If this a symbol line and it is the section's first line, this
- is a definition documentation block for that symbol.
-
- If it's a definition documentation block, another symbol line
- begins the argument section for the argument named by it, and
- a section tag begins an additional section. Start that
- section and append the line to it.
-
- Else, append the line to the current section.
- """
- # FIXME not nice: things like '# @foo:' and '# @foo: ' aren't
- # recognized, and get silently treated as ordinary text
- if not self.symbol and not self.body.text and line.startswith('@'):
- if not line.endswith(':'):
- raise QAPIParseError(self._parser, "line should end with ':'")
- self.symbol = line[1:-1]
- # Invalid names are not checked here, but the name provided MUST
- # match the following definition, which *is* validated in expr.py.
- if not self.symbol:
- raise QAPIParseError(
- self._parser, "name required after '@'")
- elif self.symbol:
- # This is a definition documentation block
- if self._match_at_name_colon(line):
- self._append_line = self._append_args_line
- self._append_args_line(line)
- elif line == 'Features:':
- self._append_line = self._append_features_line
- elif self._match_section_tag(line):
- self._append_line = self._append_various_line
- self._append_various_line(line)
- else:
- self._append_freeform(line)
- else:
- # This is a free-form documentation block
- self._append_freeform(line)
-
- def _append_args_line(self, line: str) -> None:
- """
- Process a line of documentation text in an argument section.
-
- A symbol line begins the next argument section, a section tag
- section or a non-indented line after a blank line begins an
- additional section. Start that section and append the line to
- it.
-
- Else, append the line to the current section.
-
- """
- match = self._match_at_name_colon(line)
- if match:
- line = line[match.end():]
- self._start_args_section(match.group(1))
- elif self._match_section_tag(line):
- self._append_line = self._append_various_line
- self._append_various_line(line)
- return
- elif (self._section.text.endswith('\n\n')
- and line and not line[0].isspace()):
- if line == 'Features:':
- self._append_line = self._append_features_line
- else:
- self._start_section()
- self._append_line = self._append_various_line
- self._append_various_line(line)
- return
-
- self._append_freeform(line)
-
- def _append_features_line(self, line: str) -> None:
- match = self._match_at_name_colon(line)
- if match:
- line = line[match.end():]
- self._start_features_section(match.group(1))
- elif self._match_section_tag(line):
- self._append_line = self._append_various_line
- self._append_various_line(line)
- return
- elif (self._section.text.endswith('\n\n')
- and line and not line[0].isspace()):
- self._start_section()
- self._append_line = self._append_various_line
- self._append_various_line(line)
- return
-
- self._append_freeform(line)
-
- def _append_various_line(self, line: str) -> None:
- """
- Process a line of documentation text in an additional section.
-
- A symbol line is an error.
-
- A section tag begins an additional section. Start that
- section and append the line to it.
-
- Else, append the line to the current section.
- """
- match = self._match_at_name_colon(line)
- if match:
- raise QAPIParseError(self._parser,
- "description of '@%s:' follows a section"
- % match.group(1))
- match = self._match_section_tag(line)
- if match:
- line = line[match.end():]
- self._start_section(match.group(1))
-
- self._append_freeform(line)
-
- def _start_symbol_section(
- self,
- symbols_dict: Dict[str, 'QAPIDoc.ArgSection'],
- name: str) -> None:
- # FIXME invalid names other than the empty string aren't flagged
+ def _new_description(self, name: str,
+ desc: Dict[str, ArgSection]) -> None:
if not name:
raise QAPIParseError(self._parser, "invalid parameter name")
- if name in symbols_dict:
+ if name in desc:
raise QAPIParseError(self._parser,
"'%s' parameter name duplicated" % name)
- assert not self.sections
- new_section = QAPIDoc.ArgSection(self._parser, '@' + name)
- self._switch_section(new_section)
- symbols_dict[name] = new_section
+ section = self.ArgSection(self._parser, '@' + name)
+ self.all_sections.append(section)
+ desc[name] = section
- def _start_args_section(self, name: str) -> None:
- self._start_symbol_section(self.args, name)
+ def new_argument(self, name: str) -> None:
+ self._new_description(name, self.args)
- def _start_features_section(self, name: str) -> None:
- self._start_symbol_section(self.features, name)
+ def new_feature(self, name: str) -> None:
+ self._new_description(name, self.features)
- def _start_section(self, tag: Optional[str] = None) -> None:
- if not tag and not self._section.tag:
- # extend current section
- return
- if tag in ('Returns', 'Since') and self.has_section(tag):
- raise QAPIParseError(self._parser,
- "duplicated '%s' section" % tag)
- new_section = QAPIDoc.Section(self._parser, tag)
- self._switch_section(new_section)
- self.sections.append(new_section)
-
- def _switch_section(self, new_section: 'QAPIDoc.Section') -> None:
- text = self._section.text = self._section.text.strip('\n')
-
- # Only the 'body' section is allowed to have an empty body.
- # All other sections, including anonymous ones, must have text.
- if self._section != self.body and not text:
- # We do not create anonymous sections unless there is
- # something to put in them; this is a parser bug.
- assert self._section.tag
- raise QAPISemError(
- self._section.info,
- "text required after '%s:'" % self._section.tag)
-
- self._section = new_section
-
- def _append_freeform(self, line: str) -> None:
- match = re.match(r'(@\S+:)', line)
- if match:
- raise QAPIParseError(self._parser,
- "'%s' not allowed in free-form documentation"
- % match.group(1))
- self._section.append(line)
+ def append_line(self, line: str) -> None:
+ self.all_sections[-1].append_line(line)
def connect_member(self, member: 'QAPISchemaMember') -> None:
if member.name not in self.args:
@@ -758,8 +700,8 @@ def connect_member(self, member: 'QAPISchemaMember') -> None:
raise QAPISemError(member.info,
"%s '%s' lacks documentation"
% (member.role, member.name))
- self.args[member.name] = QAPIDoc.ArgSection(self._parser,
- '@' + member.name)
+ self.args[member.name] = QAPIDoc.ArgSection(
+ self._parser, '@' + member.name)
self.args[member.name].connect(member)
def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
--
2.43.0
On Fri, Feb 16, 2024 at 03:58:38PM +0100, Markus Armbruster wrote: > QAPISchemaParser is a conventional recursive descent parser. Except > QAPISchemaParser.get_doc() delegates most of the doc comment parsing > work to a state machine in QAPIDoc. The state machine doesn't get > tokens like a recursive descent parser, it is fed tokens. > > I find this state machine rather opaque and hard to maintain. > > Replace it by a conventional parser, all in QAPISchemaParser. Less > code, and (at least in my opinion) easier to understand. > > Signed-off-by: Markus Armbruster <armbru@redhat.com> > --- > scripts/qapi/parser.py | 478 ++++++++++++++++++----------------------- > 1 file changed, 210 insertions(+), 268 deletions(-) Reviewing parsing code typically gives me a headache, and reviewing diffs of parsing code is even worse. Thus instead of R-b I'll give a Tested-by: Daniel P. Berrangé <berrange@redhat.com> on the basis that we've got great test coverage and I think that's the real killer requirement for parsing code. The tests still pass with this commit, so functionally it is working as expected. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
Daniel P. Berrangé <berrange@redhat.com> writes: > On Fri, Feb 16, 2024 at 03:58:38PM +0100, Markus Armbruster wrote: >> QAPISchemaParser is a conventional recursive descent parser. Except >> QAPISchemaParser.get_doc() delegates most of the doc comment parsing >> work to a state machine in QAPIDoc. The state machine doesn't get >> tokens like a recursive descent parser, it is fed tokens. >> >> I find this state machine rather opaque and hard to maintain. >> >> Replace it by a conventional parser, all in QAPISchemaParser. Less >> code, and (at least in my opinion) easier to understand. >> >> Signed-off-by: Markus Armbruster <armbru@redhat.com> >> --- >> scripts/qapi/parser.py | 478 ++++++++++++++++++----------------------- >> 1 file changed, 210 insertions(+), 268 deletions(-) > > Reviewing parsing code typically gives me a headache, and reviewing > diffs of parsing code is even worse. Thus instead of R-b I'll give > a > > Tested-by: Daniel P. Berrangé <berrange@redhat.com> > > on the basis that we've got great test coverage and I think that's > the real killer requirement for parsing code. The tests still pass > with this commit, so functionally it is working as expected. Makes sense. Figuring out what language the old parser parses gave me a headache or three, too. Thanks!
© 2016 - 2024 Red Hat, Inc.