From: Vladimír Vondruš Date: Wed, 13 Dec 2017 18:10:52 +0000 (+0100) Subject: doxygen: rework paragraph patching yet again to avoid duplicate code. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=559bca4190d7586b3b865e253789ccb5593ea972;p=blog.git doxygen: rework paragraph patching yet again to avoid duplicate code. The test results are almost the same, there's just one less newline between adjacent block elements extracted out of paragraphs, which is completely fine. --- diff --git a/doxygen/dox2html5.py b/doxygen/dox2html5.py index 26d39d3a..38d4af5d 100755 --- a/doxygen/dox2html5.py +++ b/doxygen/dox2html5.py @@ -141,7 +141,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. out.params = {} out.return_value = None - # DOXYGEN PATCHING 1/5 + # DOXYGEN PATCHING 1/4 # # In the optimistic case, when parsing the element, the parsed # content is treated as single reasonable paragraph and the caller is told @@ -167,7 +167,11 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. i: ET.Element for i in element: - # DOXYGEN PATCHING 2/5 + # State used later + code_block = None + formula_block = None + + # DOXYGEN PATCHING 2/4 # # Upon encountering a block element nested in , we need to act. # If there was any content before, we close the paragraph. If there @@ -180,16 +184,15 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. # Those elements are: # - # -
- # - and + # - (if not describing return type) and # - # - , , # - , - # - block - # - (complex block/inline autodetection involved, so - # the check is deferred to later in the loop) + # - (if block) + # - (if block) # - # Note that and are - # extracted out of the text flow, so these are removed from this check. + # and are extracted out of + # the text flow, so these are removed from this check. # # In addition, there's special handling to achieve things like this: #
    @@ -201,16 +204,62 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. # I.e., not wrapping "A paragraph" in a

    , but only if it's # immediately followed by another and it's the first paragraph in a # list item. We check that using the immediate_parent variable. - if (i.tag in ['heading', 'blockquote', 'xrefsect', 'variablelist', 'verbatim', 'itemizedlist', 'orderedlist', 'image', 'table'] or (i.tag == 'simplesect' and i.attrib['kind'] != 'return') or (i.tag == 'formula' and i.text.startswith('\[ ') and i.text.endswith(' \]'))) and element.tag == 'para' and out.write_paragraph_close_tag: - out.is_reasonable_paragraph = False - out.parsed = out.parsed.rstrip() - if not out.parsed: - out.write_paragraph_start_tag = False - elif immediate_parent and immediate_parent.tag == 'listitem' and i.tag in ['itemizedlist', 'orderedlist']: - out.write_paragraph_start_tag = False - else: - out.parsed += '

    ' - out.write_paragraph_close_tag = False + if element.tag == 'para': + end_previous_paragraph = False + + # Straightforward elements + if i.tag in ['heading', 'blockquote', 'xrefsect', 'variablelist', 'verbatim', 'itemizedlist', 'orderedlist', 'image', 'table']: + end_previous_paragraph = True + + # describing return type is cut out of text flow, so + # it doesn't contribute + elif i.tag == 'simplesect' and i.attrib['kind'] != 'return': + end_previous_paragraph = True + + # can be both, depending on what's inside + elif i.tag == 'formula': + if i.text.startswith('\[ ') and i.text.endswith(' \]'): + end_previous_paragraph = True + formula_block = True + else: + assert i.text.startswith('$ ') and i.text.endswith(' $') + formula_block = False + + # is autodetected to be either block or inline + elif i.tag == 'programlisting': + # If it seems to be a standalone code paragraph, don't wrap it + # in

    and use

    :
    +                # - is either alone in the paragraph, with no text or other
    +                #   elements around
    +                # - or is a code snippet (filename instead of just .ext).
    +                #   Doxygen unfortunately doesn't put @snippet in its own
    +                #   paragraph even if it's separated by blank lines. It does so
    +                #   for @include and related, though.
    +                if ((not element.text or not element.text.strip()) and (not i.tail or not i.tail.strip()) and len([listing for listing in element]) == 1) or ('filename' in i.attrib and not i.attrib['filename'].startswith('.')):
    +                    end_previous_paragraph = True
    +                    code_block = True
    +
    +                # Looks like inline code, but has multiple code lines, so it's
    +                # suspicious. Use code block, but warn.
    +                elif len([codeline for codeline in i]) > 1:
    +                    end_previous_paragraph = True
    +                    code_block = True
    +                    logging.warning("Inline code has multiple lines, fallback to a code block")
    +
    +                # Otherwise wrap it in 

    and use + else: + code_block = False + + if end_previous_paragraph: + out.is_reasonable_paragraph = False + out.parsed = out.parsed.rstrip() + if not out.parsed: + out.write_paragraph_start_tag = False + elif immediate_parent and immediate_parent.tag == 'listitem' and i.tag in ['itemizedlist', 'orderedlist']: + out.write_paragraph_start_tag = False + elif out.write_paragraph_close_tag: + out.parsed += '

    ' + out.write_paragraph_close_tag = False # Block elements if i.tag in ['sect1', 'sect2', 'sect3']: @@ -247,6 +296,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. out.parsed += '<{0}>{2}'.format(tag, id, title) elif i.tag == 'heading': + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True if i.attrib['level'] == '1': @@ -266,7 +316,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. assert element.tag != 'para' # should be top-level block element paragraph_count += 1 - # DOXYGEN PATCHING 3/5 + # DOXYGEN PATCHING 3/4 # # Parse contents of the paragraph, don't trim whitespace around # nested elements but trim it at the begin and end of the paragraph @@ -278,7 +328,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. # and assume they are not scattered all over the place (ugh). # # There's also the patching of nested lists that results in the - # immediate_parent variable in the section 2/5 -- we pass the + # immediate_parent variable in the section 2/4 -- we pass the # parent only if this is the first paragraph inside it. parsed = parse_desc_internal(state, i, element if paragraph_count == 1 and not has_block_elements else None, False) parsed.parsed = parsed.parsed.strip() @@ -302,10 +352,12 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. assert not parsed.section elif i.tag == 'blockquote': + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True out.parsed += '
    {}
    '.format(parse_desc(state, i)) elif i.tag in ['itemizedlist', 'orderedlist']: + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True tag = 'ul' if i.tag == 'itemizedlist' else 'ol' out.parsed += '<{}>'.format(tag) @@ -315,6 +367,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. out.parsed += ''.format(tag) elif i.tag == 'table': + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True out.parsed += '
' inside_tbody = False @@ -340,6 +393,8 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. out.parsed += '
' elif i.tag == 'simplesect': + assert element.tag == 'para' # is inside a paragraph :/ + # Return value is separated from the text flow if i.attrib['kind'] == 'return': assert not out.return_value @@ -359,6 +414,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. out.parsed += '' elif i.tag == 'xrefsect': + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True id = i.attrib['id'] @@ -374,6 +430,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. color, file, match.group(2), i.find('xreftitle').text, parse_desc(state, i.find('xrefdescription'))) elif i.tag == 'parameterlist': + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True out.param_kind = i.attrib['kind'] @@ -394,6 +451,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. out.templates[name.text] = description elif i.tag == 'variablelist': + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True out.parsed += '
' @@ -407,54 +465,23 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. out.parsed += '
' elif i.tag == 'verbatim': + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True out.parsed += '
{}
'.format(html.escape(i.text)) - elif i.tag == 'linebreak': - # Strip all whitespace before the linebreak, as it is of no use - out.parsed = out.parsed.rstrip() + '
' - elif i.tag == 'programlisting': - # If it seems to be a standalone code paragraph, don't wrap it in - #

and use

:
-            # - is either alone in the paragraph, with no text or other
-            #   elements around
-            # - or is a code snippet (filename instead of just .ext). Doxygen
-            #   unfortunately doesn't put @snippet in its own paragraph even
-            #   if it's separated by blank lines. It does so for @include and
-            #   related, though.
-            if element.tag == 'para' and (((not element.text or not element.text.strip()) and (not i.tail or not i.tail.strip()) and len([listing for listing in element]) == 1) or ('filename' in i.attrib and not i.attrib['filename'].startswith('.'))):
-                code_block = True
-
-            # Looks like inline code, but has multiple code lines, so it's
-            # suspicious. Use code block, but warn.
-            elif len([codeline for codeline in i]) > 1:
-                code_block = True
-                logging.warning("Inline code has multiple lines, fallback to a code block")
-
-            # Otherwise wrap it in 

and use - else: - code_block = False + assert element.tag == 'para' # is inside a paragraph :/ - # Doxygen doesn't add a space before if it's - # inline, add it manually in case there should be a space - # before it. However, it does add a space after it always. + # We should have decided about block/inline above + assert code_block is not None + + # Doxygen doesn't add a space before if it's + # inline, add it manually in case there should be a space before + # it. However, it does add a space after it always. + if not code_block: if out.parsed and not out.parsed[-1].isspace() and not out.parsed[-1] in '([{': out.parsed += ' ' - # DOXYGEN PATCHING 4/5 - # - # Specialization of similar paragraph cleanup code above. - if code_block: - out.is_reasonable_paragraph = False - has_block_elements = True - out.parsed = out.parsed.rstrip() - if not out.parsed: - out.write_paragraph_start_tag = False - else: - out.parsed += '

' - out.write_paragraph_close_tag = False - # Hammer unhighlighted code out of the block # TODO: preserve links code = '' @@ -538,9 +565,10 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. out.parsed += '<{0} class="{1}">{2}'.format('pre' if code_block else 'code', class_, highlighted) elif i.tag == 'image': + assert element.tag == 'para' # is inside a paragraph :/ has_block_elements = True - name = i.attrib['name'] + name = i.attrib['name'] if i.attrib['type'] == 'html': path = os.path.join(state.basedir, state.doxyfile['OUTPUT_DIRECTORY'], state.doxyfile['XML_OUTPUT'], name) if os.path.exists(path): @@ -556,8 +584,15 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. # Either block or inline because DOXYGEN!!! WHAT!!! elif i.tag == 'formula': - # Inline formula - if i.text.startswith('$ ') and i.text.endswith(' $'): + assert element.tag == 'para' # is inside a paragraph :/ + + # We should have decided about block/inline above + assert formula_block is not None + if formula_block: + has_block_elements = True + rendered = latex2svg.latex2svg('$${}$$'.format(i.text[3:-3]), params=m.math.latex2svg_params) + out.parsed += '
{}
'.format(m.math._patch(i.text, rendered, '')) + else: rendered = latex2svg.latex2svg('${}$'.format(i.text[2:-2]), params=m.math.latex2svg_params) # CSS classes and styling for proper vertical alignment. Depth is relative @@ -566,14 +601,11 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. attribs = ' class="m-math" style="vertical-align: -{:.1f}pt;"'.format(rendered['depth']*12*1.25) out.parsed += m.math._patch(i.text, rendered, attribs) - # Block formula - else: - assert i.text.startswith('\[ ') and i.text.endswith(' \]') - has_block_elements = True - rendered = latex2svg.latex2svg('$${}$$'.format(i.text[3:-3]), params=m.math.latex2svg_params) - out.parsed += '
{}
'.format(m.math._patch(i.text, rendered, '')) - # Inline elements + elif i.tag == 'linebreak': + # Strip all whitespace before the linebreak, as it is of no use + out.parsed = out.parsed.rstrip() + '
' + elif i.tag == 'anchor': out.parsed += ''.format(extract_id(i)) @@ -600,7 +632,7 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. else: # pragma: no cover logging.warning("Ignoring <{}> in desc".format(i.tag)) - # DOXYGEN PATCHING 5/5 + # DOXYGEN PATCHING 4/4 # # Besides putting notes and blockquotes and shit inside paragraphs, # Doxygen also doesn't attempt to open a new for the ACTUAL NEW diff --git a/doxygen/test/contents_blocks/index.html b/doxygen/test/contents_blocks/index.html index 25539a45..32530ac0 100644 --- a/doxygen/test/contents_blocks/index.html +++ b/doxygen/test/contents_blocks/index.html @@ -37,9 +37,7 @@

My Project

-

First paragraph containing some content.

-

Paragraph following the sections.

-

A blockquote

Text right after that blockquote should be a new paragraph.

  • A simple
  • List
    1. With one line
    2. for each
  • item, so paragraphs are removed
  • A simple
  • List
    1. With the sublist delimited
    2. by blank lines
  • should behave the same as above
  • A new list

    of multiple

    paragraphs.

  • Another item

    • A sub list

      Another paragraph

A paragraph after that list.

Table headerAnotherThird
CellAnother cell3rd
Next rowThis
is a tablereallyyes.
+

First paragraph containing some content.

Paragraph following the sections.

A blockquote

Text right after that blockquote should be a new paragraph.

  • A simple
  • List
    1. With one line
    2. for each
  • item, so paragraphs are removed
  • A simple
  • List
    1. With the sublist delimited
    2. by blank lines
  • should behave the same as above
  • A new list

    of multiple

    paragraphs.

  • Another item

    • A sub list

      Another paragraph

A paragraph after that list.

Table headerAnotherThird
CellAnother cell3rd
Next rowThis
is a tablereallyyes.