From: Cris Luengo Date: Fri, 26 Apr 2019 04:02:46 +0000 (-0600) Subject: Correctly handling , , and all HTML entities accepted by Doxygen. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=539771683a63eecd314f02e043f3e72d26375595;p=blog.git Correctly handling , , and all HTML entities accepted by Doxygen. --- diff --git a/documentation/doxygen.py b/documentation/doxygen.py index 2ac97836..e8d98f82 100755 --- a/documentation/doxygen.py +++ b/documentation/doxygen.py @@ -1485,10 +1485,12 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. content = parse_inline_desc(state, i).strip() if content: out.parsed += '{}'.format(content) - elif i.tag in ['emphasis', 'bold', 'small']: + elif i.tag in ['emphasis', 'bold', 'small', 'superscript', 'subscript']: mapping = {'emphasis': 'em', 'bold': 'strong', - 'small': 'small'} + 'small': 'small', + 'superscript': 'sup', + 'subscript': 'sub'} content = parse_inline_desc(state, i).strip() if content: out.parsed += '<{0}{1}>{2}'.format( @@ -1516,15 +1518,262 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. else: out.parsed += '{}'.format(content) - # WHAT THE HELL WHY IS THIS NOT AN XML ENTITY - elif i.tag in ['mdash', 'ndash', 'laquo', 'raquo']: - out.parsed += '&{};'.format(i.tag) - elif i.tag == 'nonbreakablespace': - out.parsed += ' ' - - # Something new :O - else: # pragma: no cover - logging.warning("{}: ignoring <{}> in desc".format(state.current, i.tag)) + else: + # Most of these are the same as HTML entities, but not all + mapping = {'nonbreakablespace': 'nbsp', + 'iexcl': 'iexcl', + 'cent': 'cent', + 'pound': 'pound', + 'curren': 'curren', + 'yen': 'yen', + 'brvbar': 'brvbar', + 'sect': 'sect', + 'umlaut': 'uml', + 'copy': 'copy', + 'ordf': 'ordf', + 'laquo': 'laquo', + 'not': 'not', + 'shy': 'shy', + 'registered': 'reg', + 'macr': 'macr', + 'deg': 'deg', + 'plusmn': 'plusmn', + 'sup2': 'sup2', + 'sup3': 'sup3', + 'acute': 'acute', + 'micro': 'micro', + 'para': 'para', + 'middot': 'middot', + 'cedil': 'cedil', + 'sup1': 'sup1', + 'ordm': 'ordm', + 'raquo': 'raquo', + 'frac14': 'frac14', + 'frac12': 'frac12', + 'frac34': 'frac34', + 'iquest': 'iquest', + 'Agrave': 'Agrave', + 'Aacute': 'Aacute', + 'Acirc': 'Acirc', + 'Atilde': 'Atilde', + 'Aumlaut': 'Auml', + 'Aring': 'Aring', + 'AElig': 'AElig', + 'Ccedil': 'Ccedil', + 'Egrave': 'Egrave', + 'Eacute': 'Eacute', + 'Ecirc': 'Ecirc', + 'Eumlaut': 'Euml', + 'Igrave': 'Igrave', + 'Iacute': 'Iacute', + 'Icirc': 'Icirc', + 'Iumlaut': 'Iuml', + 'ETH': 'ETH', + 'Ntilde': 'Ntilde', + 'Ograve': 'Ograve', + 'Oacute': 'Oacute', + 'Ocirc': 'Ocirc', + 'Otilde': 'Otilde', + 'Oumlaut': 'Ouml', + 'times': 'times', + 'Oslash': 'Oslash', + 'Ugrave': 'Ugrave', + 'Uacute': 'Uacute', + 'Ucirc': 'Ucirc', + 'Uumlaut': 'Uuml', + 'Yacute': 'Yacute', + 'THORN': 'THORN', + 'szlig': 'szlig', + 'agrave': 'agrave', + 'aacute': 'aacute', + 'acirc': 'acirc', + 'atilde': 'atilde', + 'aumlaut': 'auml', + 'aring': 'aring', + 'aelig': 'aelig', + 'ccedil': 'ccedil', + 'egrave': 'egrave', + 'eacute': 'eacute', + 'ecirc': 'ecirc', + 'eumlaut': 'euml', + 'igrave': 'igrave', + 'iacute': 'iacute', + 'icirc': 'icirc', + 'iumlaut': 'iuml', + 'eth': 'eth', + 'ntilde': 'ntilde', + 'ograve': 'ograve', + 'oacute': 'oacute', + 'ocirc': 'ocirc', + 'otilde': 'otilde', + 'oumlaut': 'ouml', + 'divide': 'divide', + 'oslash': 'oslash', + 'ugrave': 'ugrave', + 'uacute': 'uacute', + 'ucirc': 'ucirc', + 'uumlaut': 'uuml', + 'yacute': 'yacute', + 'thorn': 'thorn', + 'yumlaut': 'yuml', + 'fnof': 'fnof', + 'Alpha': 'Alpha', + 'Beta': 'Beta', + 'Gamma': 'Gamma', + 'Delta': 'Delta', + 'Epsilon': 'Epsilon', + 'Zeta': 'Zeta', + 'Eta': 'Eta', + 'Theta': 'Theta', + 'Iota': 'Iota', + 'Kappa': 'Kappa', + 'Lambda': 'Lambda', + 'Mu': 'Mu', + 'Nu': 'Nu', + 'Xi': 'Xi', + 'Omicron': 'Omicron', + 'Pi': 'Pi', + 'Rho': 'Rho', + 'Sigma': 'Sigma', + 'Tau': 'Tau', + 'Upsilon': 'Upsilon', + 'Phi': 'Phi', + 'Chi': 'Chi', + 'Psi': 'Psi', + 'Omega': 'Omega', + 'alpha': 'alpha', + 'beta': 'beta', + 'gamma': 'gamma', + 'delta': 'delta', + 'epsilon': 'epsilon', + 'zeta': 'zeta', + 'eta': 'eta', + 'theta': 'theta', + 'iota': 'iota', + 'kappa': 'kappa', + 'lambda': 'lambda', + 'mu': 'mu', + 'nu': 'nu', + 'xi': 'xi', + 'omicron': 'omicron', + 'pi': 'pi', + 'rho': 'rho', + 'sigmaf': 'sigmaf', + 'sigma': 'sigma', + 'tau': 'tau', + 'upsilon': 'upsilon', + 'phi': 'phi', + 'chi': 'chi', + 'psi': 'psi', + 'omega': 'omega', + 'thetasym': 'thetasym', + 'upsih': 'upsih', + 'piv': 'piv', + 'bull': 'bull', + 'hellip': 'hellip', + 'prime': 'prime', + 'Prime': 'Prime', + 'oline': 'oline', + 'frasl': 'frasl', + 'weierp': 'weierp', + 'imaginary': 'image', + 'real': 'real', + 'trademark': 'trade', + 'alefsym': 'alefsym', + 'larr': 'larr', + 'uarr': 'uarr', + 'rarr': 'rarr', + 'darr': 'darr', + 'harr': 'harr', + 'crarr': 'crarr', + 'lArr': 'lArr', + 'uArr': 'uArr', + 'rArr': 'rArr', + 'dArr': 'dArr', + 'hArr': 'hArr', + 'forall': 'forall', + 'part': 'part', + 'exist': 'exist', + 'empty': 'empty', + 'nabla': 'nabla', + 'isin': 'isin', + 'notin': 'notin', + 'ni': 'ni', + 'prod': 'prod', + 'sum': 'sum', + 'minus': 'minus', + 'lowast': 'lowast', + 'radic': 'radic', + 'prop': 'prop', + 'infin': 'infin', + 'ang': 'ang', + 'and': 'and', + 'or': 'or', + 'cap': 'cap', + 'cup': 'cup', + 'int': 'int', + 'there4': 'there4', + 'sim': 'sim', + 'cong': 'cong', + 'asymp': 'asymp', + 'ne': 'ne', + 'equiv': 'equiv', + 'le': 'le', + 'ge': 'ge', + 'sub': 'sub', + 'sup': 'sup', + 'nsub': 'nsub', + 'sube': 'sube', + 'supe': 'supe', + 'oplus': 'oplus', + 'otimes': 'otimes', + 'perp': 'perp', + 'sdot': 'sdot', + 'lceil': 'lceil', + 'rceil': 'rceil', + 'lfloor': 'lfloor', + 'rfloor': 'rfloor', + 'lang': 'lang', + 'rang': 'rang', + 'loz': 'loz', + 'spades': 'spades', + 'clubs': 'clubs', + 'hearts': 'hearts', + 'diams': 'diams', + 'OElig': 'OElig', + 'oelig': 'oelig', + 'Scaron': 'Scaron', + 'scaron': 'scaron', + 'Yumlaut': 'Yuml', + 'circ': 'circ', + 'tilde': 'tilde', + 'ensp': 'ensp', + 'emsp': 'emsp', + 'thinsp': 'thinsp', + 'zwnj': 'zwnj', + 'zwj': 'zwj', + 'lrm': 'lrm', + 'rlm': 'rlm', + 'ndash': 'ndash', + 'mdash': 'mdash', + 'lsquo': 'lsquo', + 'rsquo': 'rsquo', + 'sbquo': 'sbquo', + 'ldquo': 'ldquo', + 'rdquo': 'rdquo', + 'bdquo': 'bdquo', + 'dagger': 'dagger', + 'Dagger': 'Dagger', + 'permil': 'permil', + 'lsaquo': 'lsaquo', + 'rsaquo': 'rsaquo', + 'euro': 'euro', + 'tm': 'trade'} + try: + entity = mapping[i.tag] + except: + logging.warning("{}: ignoring <{}> in desc".format(state.current, i.tag)) + out.parsed += '&{};'.format(entity) # Now we can reset previous_section to None, nobody needs it anymore. # Of course we're resetting it only in case nothing else (such as the diff --git a/documentation/test_doxygen/contents_typography/index.html b/documentation/test_doxygen/contents_typography/index.html index 89ff6804..7f158c21 100644 --- a/documentation/test_doxygen/contents_typography/index.html +++ b/documentation/test_doxygen/contents_typography/index.html @@ -25,7 +25,7 @@

A blockquote.

Preformatted text.
 

Paragraph
with
explicit
line
breaks.

Page section

Differently
   preformatted
-text.
  • Unordered
  • list
  • of
    • nested
    • items
  • and back
  1. Ordered
  2. list
  3. of
    1. nested
    2. items
  4. and back

This is a typewriter text, emphasis and bold. Emphasis with typewriter and bold nested. http://google.com and URL. Small text. En-dash – and em-dash —. Reference to a Page section. Named reference with special characters in title: » Warnings «. Reference with escaped characters in title: <anchor>.

Empty elements:


Above is a horizontal line.

+text.
  • Unordered
  • list
  • of
    • nested
    • items
  • and back
  1. Ordered
  2. list
  3. of
    1. nested
    2. items
  4. and back

This is a typewriter text, emphasis and bold. Emphasis with typewriter and bold nested. http://google.com and URL. Small text. En-dash – and em-dash —. Reference to a Page section. Named reference with special characters in title: » Warnings «. Reference with escaped characters in title: <anchor>.

2nd is L ∀ ∇ π ℜ ℑ This costs no $, €, £, ¥ or ¤.

Empty elements:


Above is a horizontal line.

diff --git a/documentation/test_doxygen/contents_typography/input.dox b/documentation/test_doxygen/contents_typography/input.dox index bf3ec484..c42607c3 100644 --- a/documentation/test_doxygen/contents_typography/input.dox +++ b/documentation/test_doxygen/contents_typography/input.dox @@ -35,6 +35,9 @@ em-dash ---. Reference to a @ref section. Named reference with special characters in title: @ref section "» Warnings «". Reference with escaped characters in title: @ref an-anchor "". +2nd is L ∀ ∇ π ℜ ℑ +This costs no $, €, £, ¥ or ¤. + Empty elements:
 
 
 - - -