From: Vladimír Vondruš Date: Tue, 17 Sep 2024 21:00:52 +0000 (+0200) Subject: documentation: verify HTML escaping for both the doxygen and python theme. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=d6f0ca1af2b36da33a0998d1095a13a5b6c99b1c;p=blog.git documentation: verify HTML escaping for both the doxygen and python theme. It's a mess and needs to be cleaned up, especially given that I now need to have non-HTML output from the Python generator, so it's good to have it actually regression checked. This actually revealed one superfluous unescape in the Python theme (which was likely a leftover from when that code was copypasted from the Doxygen theme), and a Doxygen bug where it doesn't unescape image alt text. Then m.css doesn't correctly escape the alt text either, which makes the bug cancel out and everything is alright. Oh and there's also a bug where if special HTML chars are used in filenames, it breaks the Doxygen XML output. I should probably make a PR for those two. --- diff --git a/documentation/doxygen.py b/documentation/doxygen.py index 8502d5e9..3bb13b86 100755 --- a/documentation/doxygen.py +++ b/documentation/doxygen.py @@ -1130,6 +1130,9 @@ def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET. # The alt text can apparently be specified only with the HTML # tag, not with @image. It's also present only since # 1.9.1(?). + # TODO Doxygen seems to be double-escaping this, which + # ultimately means we cannot escape this ourselves as it'd be + # wrong. See test_contents.HtmlEscape for a repro case. alt = i.attrib.get('alt', 'Image') caption = i.text @@ -2640,25 +2643,26 @@ def postprocess_state(state: State): # Fill breadcrumb with leaf names and URLs include = [] for i in reversed(path_reverse): - include += [state.compounds[i].leaf_name] + # TODO the escaping / unescaping is a mess, fix that + include += [html.unescape(state.compounds[i].leaf_name)] state.includes['/'.join(include)] = compound.id # Resolve navbar links that are just an ID - def resolve_link(html, title, url, id): - if not html and not title and not url: + def resolve_link(html_, title, url, id): + if not html_ and not title and not url: assert id in state.compounds, "Navbar references {} which wasn't found".format(id) found = state.compounds[id] title, url = found.name, found.url - return html, title, url, id + return html_, title, url, id for var in 'LINKS_NAVBAR1', 'LINKS_NAVBAR2': links = [] - for html, title, url, id, sub in state.config[var]: - html, title, url, id = resolve_link(html, title, url, id) + for html_, title, url, id, sub in state.config[var]: + html_, title, url, id = resolve_link(html_, title, url, id) sublinks = [] for i in sub: sublinks += [resolve_link(*i)] - links += [(html, title, url, id, sublinks)] + links += [(html_, title, url, id, sublinks)] state.config[var] = links def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers=True, merge_prefixes=True) -> bytearray: diff --git a/documentation/python.py b/documentation/python.py index 6c8274c0..1df447ea 100755 --- a/documentation/python.py +++ b/documentation/python.py @@ -2497,16 +2497,16 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers= # chars which should fit the full function name in the list in most # cases, yet be still long enough to be able to distinguish # particular overloads. - # TODO: the suffix_length has to be calculated on UTF-8 and I - # am (un)escaping a lot back and forth here -- needs to be - # cleaned up + # TODO: the suffix_length has to be calculated on UTF-8 params = ', '.join(result.params) + assert is_html_safe(params) # this is not C++, so no <>& if len(params) > 49: params = params[:48] + 'â¦' name_with_args += '(' + params + ')' suffix_length += len(params.encode('utf-8')) + 2 complete_name = joiner.join(result.prefix + [name_with_args]) + # TODO needs escaping once page names are exposed to search assert is_html_safe(complete_name) # this is not C++, so no <>& index = map.add(complete_name, result.url, suffix_length=suffix_length, flags=result.flags) @@ -2525,7 +2525,9 @@ def build_search_data(state: State, merge_subtrees=True, add_lookahead_barriers= if name: lookahead_barriers += [len(name)] name += joiner - name += html.unescape(j) + # TODO needs escaping once page names are exposed to search + assert is_html_safe(j) # this is not C++, so no <>& + name += j trie.insert(name.lower(), index, lookahead_barriers=lookahead_barriers if add_lookahead_barriers else []) # Add functions the second time with () appended, referencing diff --git a/documentation/test_doxygen/__init__.py b/documentation/test_doxygen/__init__.py index a238d9b5..36865fd0 100644 --- a/documentation/test_doxygen/__init__.py +++ b/documentation/test_doxygen/__init__.py @@ -82,8 +82,10 @@ class BaseTestCase(unittest.TestCase): if os.path.exists(os.path.join(self.path, 'html')): shutil.rmtree(os.path.join(self.path, 'html')) def run_doxygen(self, templates=default_templates, wildcard=default_wildcard, index_pages=default_index_pages, config={}): - state = State({**copy.deepcopy(default_config), **config}) + state = State(copy.deepcopy(default_config)) parse_doxyfile(state, os.path.join(self.path, self.doxyfile)) + # Make the supplied config values overwrite what's in the Doxyfile + state.config = {**state.config, **config} run(state, templates=templates, wildcard=wildcard, index_pages=index_pages, sort_globbed_files=True) def actual_expected_contents(self, actual, expected = None): diff --git a/documentation/test_doxygen/contents_html_escape/Doxyfile b/documentation/test_doxygen/contents_html_escape/Doxyfile new file mode 100644 index 00000000..a511c1f6 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/Doxyfile @@ -0,0 +1,19 @@ +INPUT = page.dox Fi&le.h +IMAGE_PATH = . +QUIET = YES +GENERATE_HTML = NO +GENERATE_LATEX = NO +GENERATE_XML = YES +XML_PROGRAMLISTING = NO +CASE_SENSE_NAMES = YES + +# Needed to make the @struct recognized by m.css, otherwise it's +# ignored as the directory structure gets lost with empty STRIP_FROM_INC_PATH +STRIP_FROM_INC_PATH = . + +##! M_PAGE_FINE_PRINT = +##! M_THEME_COLOR = +##! M_FAVICON = +##! M_LINKS_NAVBAR1 = files +##! M_LINKS_NAVBAR2 = pages annotated +##! M_SEARCH_DISABLED = YES diff --git a/documentation/test_doxygen/contents_html_escape/Fi&le.h b/documentation/test_doxygen/contents_html_escape/Fi&le.h new file mode 100644 index 00000000..4f674599 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/Fi&le.h @@ -0,0 +1,29 @@ +/** @file + * @brief The file path should be escaped, in the file list also + */ + +/** +@brief The class include name as well as derived class reference should be escaped +*/ +template struct Class { + /** @brief Function */ + void suffixShouldBeEscaped(const Type::ShouldBeEscaped& = "default value <&> should be escaped") &&; + + /** @brief Enum */ + enum Enum { + Value = Initializer::ShouldBeEscaped + }; +}; + +template struct Sub; + +template struct Sub: Class { + /** @brief The outer class name should be escaped, in the class list as well */ + struct Nested {}; +}; + +/** @struct Sub Fi&le.h FakeFi&le.h + * @brief The class name as well as base class reference should be escaped, in the class list as well; the faked include should be escaped here too */ + +/** @brief Function specialization */ +template<> void functionShouldHaveSpecializedNameEscaped(); diff --git a/documentation/test_doxygen/contents_html_escape/Fi_6le_8h.html b/documentation/test_doxygen/contents_html_escape/Fi_6le_8h.html new file mode 100644 index 00000000..2a961c83 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/Fi_6le_8h.html @@ -0,0 +1,87 @@ + + + + + Fi&le.h file | My Project + + + + + +

+ Fi&le.h file +

The file path should be escaped, in the file list also.

+ +

Classes

+ template<class T> + struct Class +: The class include name as well as derived class reference should be escaped.
+ template<class T> + struct Sub<char, T> +: The class name as well as base class reference should be escaped, in the class list as well; the faked include should be escaped here too.
+ struct Sub<char, T>::Nested +: The outer class name should be escaped, in the class list as well.

Functions

+ template<> + void functionShouldHaveSpecializedNameEscaped<char&>() +: Function specialization.

+ + diff --git a/documentation/test_doxygen/contents_html_escape/annotated.html b/documentation/test_doxygen/contents_html_escape/annotated.html new file mode 100644 index 00000000..a82c6f7b --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/annotated.html @@ -0,0 +1,68 @@ + + + + + My Project + + + + + +

Classes

struct Class The class include name as well as derived class reference should be escaped.
+ struct Sub<char, T> The class name as well as base class reference should be escaped, in the class list as well; the faked include should be escaped here too. +
- struct Nested The outer class name should be escaped, in the class list as well.
+

+ +

+ + diff --git a/documentation/test_doxygen/contents_html_escape/files.html b/documentation/test_doxygen/contents_html_escape/files.html new file mode 100644 index 00000000..8d66fad8 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/files.html @@ -0,0 +1,62 @@ + + + + + My Project + + + + + +

Files

file Fi&le.h The file path should be escaped, in the file list also.

+ +

+ + diff --git a/documentation/test_doxygen/contents_html_escape/page-1820.html b/documentation/test_doxygen/contents_html_escape/page-1820.html new file mode 100644 index 00000000..e4b4bdc0 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/page-1820.html @@ -0,0 +1,47 @@ + + + + + Page <&> title <&> should be escaped, in the page list also | My Project + + + + + +

+ Page <&> title <&> should be escaped, in the page list also +

Section <&> name should be escaped

The <&> first text before any markup should be escaped. Yes The <&> last text after any markup should be escaped.

Section level 2 <&> should be escaped

Section level 3 <&> should be escaped

Section level 4 <&> should be escaped

5 Section level 5 <&> should be escaped

Stuff in a code <&> block <&> should be escaped
+

Text right after a note <&> should be escaped

Text in a blockquote <&> should be escaped

Yes

+ + diff --git a/documentation/test_doxygen/contents_html_escape/page.dox b/documentation/test_doxygen/contents_html_escape/page.dox new file mode 100644 index 00000000..113cd8b7 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/page.dox @@ -0,0 +1,35 @@ +/** +@page page Page <&> title <&> should be escaped, in the page list also + +@section section Section <&> name should be escaped + +The <&> first text before any markup should be escaped. Yes +The <&> last text after any markup should be escaped. + +## Section level 2 <&> should be escaped + +### Section level 3 <&> should be escaped + +#### Section level 4 <&> should be escaped + +####5 Section level 5 <&> should be escaped + + Stuff in a code <&> block <&> should be escaped + +@par Paragraph <&> title should be escaped + Yes + +@note Yes + +Text right after a note <&> should be escaped + +> Text in a blockquote <&> should be escaped + +$Id: some strange RCS <&> content should be escaped $ + +@image html tiny.png "Image <&> title should be escaped" + + Image <&> alt should be escaped, but only because Doxygen forgets to unescape when parsing and so m.css doesn't escape again

Image <&> alt should be escaped, but only because Doxygen forgets to unescape when parsing and so m.css doesn't escape again

+ +Yes +*/ diff --git a/documentation/test_doxygen/contents_html_escape/page.html b/documentation/test_doxygen/contents_html_escape/page.html new file mode 100644 index 00000000..8e3cfa22 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/page.html @@ -0,0 +1,47 @@ + + + + + Page <&> title <&> should be escaped, in the page list also | My Project + + + + + +

+ Page <&> title <&> should be escaped, in the page list also +

Section <&> name should be escaped

The <&> first text before any markup should be escaped. Yes The <&> last text after any markup should be escaped.

Section level 2 <&> should be escaped

Section level 3 <&> should be escaped

Section level 4 <&> should be escaped

5 Section level 5 <&> should be escaped

Stuff in a code <&> block <&> should be escaped
+

Text right after a note <&> should be escaped

Text in a blockquote <&> should be escaped

Yes

+ + diff --git a/documentation/test_doxygen/contents_html_escape/pages.html b/documentation/test_doxygen/contents_html_escape/pages.html new file mode 100644 index 00000000..c328f238 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/pages.html @@ -0,0 +1,62 @@ + + + + + My Project + + + + + +

Pages

Page <&> title <&> should be escaped, in the page list also

+ +

+ + diff --git a/documentation/test_doxygen/contents_html_escape/structClass.html b/documentation/test_doxygen/contents_html_escape/structClass.html new file mode 100644 index 00000000..c492ef9a --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/structClass.html @@ -0,0 +1,89 @@ + + + + + Class struct | My Project + + + + + +

+
#include <Fi&le.h>
+
template<class T>
+ Class struct +

The class include name as well as derived class reference should be escaped.

+ +

Derived classes

+ template<class T> + struct Sub<char, T> +: The class name as well as base class reference should be escaped, in the class list as well; the faked include should be escaped here too.

Public types

+ enum Enum { Value = Initializer<char&>::ShouldBeEscaped } +: Enum.

Public functions

+ void suffixShouldBeEscaped(const Type<char>::ShouldBeEscaped& = "default value <&> should be escaped") && +: Function.

+ + diff --git a/documentation/test_doxygen/contents_html_escape/structSub_3_01char_00_01T_01_4.html b/documentation/test_doxygen/contents_html_escape/structSub_3_01char_00_01T_01_4.html new file mode 100644 index 00000000..849c147c --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/structSub_3_01char_00_01T_01_4.html @@ -0,0 +1,79 @@ + + + + + Sub<char, T> struct | My Project + + + + + +

+
#include <FakeFi&le.h>
+
template<class T>
+ Sub<char, T> struct +

The class name as well as base class reference should be escaped, in the class list as well; the faked include should be escaped here too.

+ +

Base classes

+ template<class T> + struct Class<T> +: The class include name as well as derived class reference should be escaped.

Public types

+ struct Nested +: The outer class name should be escaped, in the class list as well.

+ + diff --git a/documentation/test_doxygen/contents_html_escape/structSub_3_01char_00_01T_01_4_1_1Nested.html b/documentation/test_doxygen/contents_html_escape/structSub_3_01char_00_01T_01_4_1_1Nested.html new file mode 100644 index 00000000..a95dec43 --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/structSub_3_01char_00_01T_01_4_1_1Nested.html @@ -0,0 +1,47 @@ + + + + + Sub<char, T>::Nested struct | My Project + + + + + +

+ Sub<char, T>::Nested struct +
#include <Fi&le.h>
+

The outer class name should be escaped, in the class list as well.

+ + diff --git a/documentation/test_doxygen/contents_html_escape/tiny.png b/documentation/test_doxygen/contents_html_escape/tiny.png new file mode 120000 index 00000000..79abd3ca --- /dev/null +++ b/documentation/test_doxygen/contents_html_escape/tiny.png @@ -0,0 +1 @@ +../../../plugins/m/test/images/tiny.png \ No newline at end of file diff --git a/documentation/test_doxygen/contents_typography/indexpage_1817.xml b/documentation/test_doxygen/contents_typography/indexpage_1817.xml new file mode 100644 index 00000000..e4487105 --- /dev/null +++ b/documentation/test_doxygen/contents_typography/indexpage_1817.xml @@ -0,0 +1,52 @@ + + + + index + My Project + + + +

A blockquote. +

Preformatted text. + +Paragraph + with + explicit + line + breaks. + +Page section +Differently + preformatted +text. + +Unordered +list +of +nested +items + + +and back + + +Ordered +list +of +nested +items + + +and back + + + This is a typewriter text, emphasis, bold. Emphasis with typewriter and bold nested. http://google.com and URL. Small text. En-dash and em-dash . Reference to a Page section. Named reference with special characters in title: Warnings . Reference with escaped characters intitle: <anchor>. +2nd is L This costs no $, , , or . +Empty elements: + + +Above is a horizontal line. + + + + diff --git a/documentation/test_doxygen/test_contents.py b/documentation/test_doxygen/test_contents.py index 06dc2f1f..1c067d43 100644 --- a/documentation/test_doxygen/test_contents.py +++ b/documentation/test_doxygen/test_contents.py @@ -33,6 +33,9 @@ import unittest from hashlib import sha1 +from doxygen import EntryType +from _search import pretty_print, searchdata_filename + from . import BaseTestCase, IntegrationTestCase, doxygen_version, parse_version def dot_version(): @@ -625,3 +628,112 @@ class Blockquote(IntegrationTestCase): self.assertEqual(*self.actual_expected_contents('index.html')) else: self.assertEqual(*self.actual_expected_contents('index.html', 'index-pygments29.html')) + +class HtmlEscape(IntegrationTestCase): + def setUp(self): + IntegrationTestCase.setUp(self) + + # Doxygen does *almost* a good job of escaping everything, except the + # bit in . Patch that up. + for i in [ + 'structClass.xml', + 'structSub_3_01char_00_01T_01_4.xml', + 'structSub_3_01char_00_01T_01_4_1_1Nested.xml', + # These two are broken only in 1.8.16 and older, the second one + # isn't actually used for anything but still produces an error log + # if not patched. + 'Fi_6le_8h.xml', + 'structSub.xml' + ]: + with open(os.path.join(self.path, 'xml', i), 'r+') as f: + contents = f.read() + f.seek(0) + f.truncate(0) + f.write(contents.replace('Fi&le.h', 'Fi&le.h')) + + def test(self): + self.run_doxygen(wildcard='*.xml') + + # Page title escaping, content escaping + self.assertEqual(*self.actual_expected_contents('pages.html')) + + # Versions before 1.9.1(?) don't have the alt attribute preserved for + # + if parse_version(doxygen_version()) >= parse_version("1.9.1"): + self.assertEqual(*self.actual_expected_contents('page.html')) + else: + self.assertEqual(*self.actual_expected_contents('page.html', 'page-1820.html')) + + # Filename escaping + self.assertEqual(*self.actual_expected_contents('files.html')) + self.assertEqual(*self.actual_expected_contents('Fi_6le_8h.html')) + + # Class name escaping; include, symbol and value escaping + self.assertEqual(*self.actual_expected_contents('annotated.html')) + self.assertEqual(*self.actual_expected_contents('structClass.html')) + self.assertEqual(*self.actual_expected_contents('structSub_3_01char_00_01T_01_4.html')) + self.assertEqual(*self.actual_expected_contents('structSub_3_01char_00_01T_01_4_1_1Nested.html')) + + def test_search(self): + # Re-run everything with search enabled, the search data shouldn't be + # escaped. Not done as part of above as it'd unnecessarily inflate the + # size of compared files with the search icon and popup. + self.run_doxygen(index_pages=[], wildcard='*.xml', config={ + 'SEARCH_DISABLED': False, + 'SEARCH_DOWNLOAD_BINARY': True + }) + + with open(os.path.join(self.path, 'html', searchdata_filename.format(search_filename_prefix='searchdata')), 'rb') as f: + serialized = f.read() + search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] + # print(search_data_pretty) + self.assertEqual(search_data_pretty, """ +8 symbols +fi&le.h [2] +|| :$ +|| :functionshouldhavespecializednameescaped [0] +|| ($ +|| ) [1] +|unctionshouldhavespecializednameescaped [0] +|| ($ +|| ) [1] +page <&> title <&> should be escaped, in the page list also [3] +class [7] +| :$ +| :enum [4] +| suffixshouldbeescaped [5] +| | ($ +| | ) [6] +enum [4] +suffixshouldbeescaped [5] +| | ($ +| | ) [6] +| b [8] +| | :$ +| | :nested [9] +nested [9] +0: ::functionShouldHaveSpecializedNameEscaped() [prefix=2[:14], suffix_length=2, type=FUNC] -> #a3b5d61927252197070e8d998f643a2b2 +1: [prefix=0[:48], type=FUNC] -> +2: Fi&le.h [type=FILE] -> Fi_6le_8h.html +3: Page <&> title <&> should be escaped, in the page list also [type=PAGE] -> page.html +4: ::Enum [prefix=7[:16], type=ENUM] -> #abc566500394204b1aff6106bb4559e18 +5: ::suffixShouldBeEscaped(const Type::ShouldBeEscaped&) && [prefix=7[:16], suffix_length=39, type=FUNC] -> #a0dbbef222ebfc3607c4ad7283ec260c3 +6: [prefix=5[:50], suffix_length=37, type=FUNC] -> +7: Class [type=STRUCT] -> structClass.html +8: Sub [type=STRUCT] -> structSub_3_01char_00_01T_01_4.html +9: ::Nested [prefix=8[:30], type=STRUCT] -> _1_1Nested.html +(EntryType.PAGE, CssClass.SUCCESS, 'page'), +(EntryType.NAMESPACE, CssClass.PRIMARY, 'namespace'), +(EntryType.GROUP, CssClass.SUCCESS, 'group'), +(EntryType.CLASS, CssClass.PRIMARY, 'class'), +(EntryType.STRUCT, CssClass.PRIMARY, 'struct'), +(EntryType.UNION, CssClass.PRIMARY, 'union'), +(EntryType.TYPEDEF, CssClass.PRIMARY, 'typedef'), +(EntryType.DIR, CssClass.WARNING, 'dir'), +(EntryType.FILE, CssClass.WARNING, 'file'), +(EntryType.FUNC, CssClass.INFO, 'func'), +(EntryType.DEFINE, CssClass.INFO, 'define'), +(EntryType.ENUM, CssClass.PRIMARY, 'enum'), +(EntryType.ENUM_VALUE, CssClass.DEFAULT, 'enum val'), +(EntryType.VAR, CssClass.DEFAULT, 'var') +""".strip()) diff --git a/documentation/test_python/CMakeLists.txt b/documentation/test_python/CMakeLists.txt index 2c068008..bb7457ab 100644 --- a/documentation/test_python/CMakeLists.txt +++ b/documentation/test_python/CMakeLists.txt @@ -33,6 +33,12 @@ foreach(target pybind_signatures pybind_enums pybind_external_overload_docs pybi set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${target}) endforeach() +# Need a special location for this one +pybind11_add_module(pybind_content_html_escape content_html_escape/content_html_escape/pybind.cpp) +set_target_properties(pybind_content_html_escape PROPERTIES + OUTPUT_NAME pybind + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/content_html_escape/content_html_escape) + # Need a special location for this one pybind11_add_module(pybind_inspect_create_intersphinx inspect_create_intersphinx/inspect_create_intersphinx/pybind.cpp) set_target_properties(pybind_inspect_create_intersphinx PROPERTIES diff --git a/documentation/test_python/content_html_escape/content_html_escape.Class.html b/documentation/test_python/content_html_escape/content_html_escape.Class.html new file mode 100644 index 00000000..4d7e0b77 --- /dev/null +++ b/documentation/test_python/content_html_escape/content_html_escape.Class.html @@ -0,0 +1,114 @@ + + + + + content_html_escape.Class | My Python Project + + + + + +

+ content_html_escape.Class class +

+ +

Enums

+ class ClassEnum(enum.Enum): VALUE_THAT_SHOULD_BE_ESCAPED = '<&>' +

Class methods

+ def classmethod(default_string_that_should_be_escaped = '<&>') +

Static methods

+ def staticmethod(default_string_that_should_be_escaped = '<&>') +

Methods

+ def method(self, + default_string_that_should_be_escaped = '<&>') +

Special methods

+ def __dunder_method__(self, + default_string_that_should_be_escaped = '<&>') +

Data

+ DATA_THAT_SHOULD_BE_ESCAPED = '<&>' +

+ + diff --git a/documentation/test_python/content_html_escape/content_html_escape.html b/documentation/test_python/content_html_escape/content_html_escape.html new file mode 100644 index 00000000..b85681f5 --- /dev/null +++ b/documentation/test_python/content_html_escape/content_html_escape.html @@ -0,0 +1,101 @@ + + + + + content_html_escape | My Python Project + + + + + +

+ content_html_escape module +

Summary that should be <&> escaped <&>

+ +

Details that *aren't* rST-processed and thus should only be <&> escaped <&>.

Modules

module pybind: pybind11 html escaping

Classes

class Class

Enums

+ class Enum(enum.Enum): VALUE_THAT_SHOULD_BE_ESCAPED = '<&>' +

Functions

+ def function(default_string_that_should_be_escaped = '<&>', + default_function_that_should_be_escaped = <function <lambda>>) +

Data

+ DATA_THAT_SHOULD_BE_ESCAPED = '<&>' +

+ + diff --git a/documentation/test_python/content_html_escape/content_html_escape.pybind.html b/documentation/test_python/content_html_escape/content_html_escape.pybind.html new file mode 100644 index 00000000..12f29771 --- /dev/null +++ b/documentation/test_python/content_html_escape/content_html_escape.pybind.html @@ -0,0 +1,63 @@ + + + + + content_html_escape.pybind | My Python Project + + + + + +

+ content_html_escape.pybind module +

pybind11 html escaping

+ +

Functions

+ def default_value_should_be_escaped(string: str = '<&>') -> int +

+ + diff --git a/documentation/test_python/content_html_escape/content_html_escape/__init__.py b/documentation/test_python/content_html_escape/content_html_escape/__init__.py new file mode 100644 index 00000000..dba885eb --- /dev/null +++ b/documentation/test_python/content_html_escape/content_html_escape/__init__.py @@ -0,0 +1,39 @@ +""" +Summary that should be <&> escaped <&> + +Details that *aren't* rST-processed and thus should only be <&> escaped <&>. +""" + +import enum + +from . import pybind + +# TODO page names can be escaped! + +class Class: + class ClassEnum(enum.Enum): + VALUE_THAT_SHOULD_BE_ESCAPED = "<&>" + + @staticmethod + def staticmethod(default_string_that_should_be_escaped = "<&>"): + pass + + @classmethod + def classmethod(cls, default_string_that_should_be_escaped = "<&>"): + pass + + def __dunder_method__(self, default_string_that_should_be_escaped = "<&>"): + pass + + def method(self, default_string_that_should_be_escaped = "<&>"): + pass + + DATA_THAT_SHOULD_BE_ESCAPED = "<&>" + +class Enum(enum.Enum): + VALUE_THAT_SHOULD_BE_ESCAPED = "<&>" + +def function(default_string_that_should_be_escaped = "<&>", default_function_that_should_be_escaped = lambda a: a): + pass + +DATA_THAT_SHOULD_BE_ESCAPED = "<&>" diff --git a/documentation/test_python/content_html_escape/content_html_escape/pybind.cpp b/documentation/test_python/content_html_escape/content_html_escape/pybind.cpp new file mode 100644 index 00000000..9c6df58d --- /dev/null +++ b/documentation/test_python/content_html_escape/content_html_escape/pybind.cpp @@ -0,0 +1,11 @@ +#include + +namespace py = pybind11; + +int defaultValueShouldBeEscaped(const char*) { return 0; } + +PYBIND11_MODULE(pybind, m) { + m.doc() = "pybind11 html escaping"; + + m.def("default_value_should_be_escaped", defaultValueShouldBeEscaped, py::arg("string") = "<&>"); +} diff --git a/documentation/test_python/content_html_escape/page.html b/documentation/test_python/content_html_escape/page.html new file mode 100644 index 00000000..d3c61966 --- /dev/null +++ b/documentation/test_python/content_html_escape/page.html @@ -0,0 +1,43 @@ + + + + + Page title that should be <&> escaped <&>, and also in the page tree | My Python Project + + + + + +

+ Page title that should be <&> escaped <&>, and also in the page tree +

Page content that <&> is rST-processed <&> and thus gets escaped by docutils.

+ + diff --git a/documentation/test_python/content_html_escape/page.rst b/documentation/test_python/content_html_escape/page.rst new file mode 100644 index 00000000..22a87341 --- /dev/null +++ b/documentation/test_python/content_html_escape/page.rst @@ -0,0 +1,4 @@ +Page title that should be <&> escaped <&>, and also in the page tree +#################################################################### + +Page content that <&> *is* rST-processed <&> and thus gets escaped by docutils. diff --git a/documentation/test_python/content_html_escape/pages.html b/documentation/test_python/content_html_escape/pages.html new file mode 100644 index 00000000..e52b6823 --- /dev/null +++ b/documentation/test_python/content_html_escape/pages.html @@ -0,0 +1,59 @@ + + + + + My Python Project + + + + + +

Pages

Page title that should be <&> escaped <&>, and also in the page tree

+ +

+ + diff --git a/documentation/test_python/test_content.py b/documentation/test_python/test_content.py index 0076e762..cdc2bccc 100644 --- a/documentation/test_python/test_content.py +++ b/documentation/test_python/test_content.py @@ -24,9 +24,13 @@ # import os +import unittest from . import BaseInspectTestCase +from _search import pretty_print, searchdata_filename +from python import EntryType + class Content(BaseInspectTestCase): def test(self): self.run_python({ @@ -52,3 +56,42 @@ class ParseDocstrings(BaseInspectTestCase): }) self.assertEqual(*self.actual_expected_contents('content_parse_docstrings.html')) self.assertEqual(*self.actual_expected_contents('content_parse_docstrings.Class.html')) + +class HtmlEscape(BaseInspectTestCase): + def test(self): + self.run_python({ + 'INPUT_PAGES': ['page.rst'], + 'PYBIND11_COMPATIBILITY': True, + 'LINKS_NAVBAR1': [ + ('Pages', 'pages', []), + ('Modules', 'modules', [])], + }) + + # Page title escaping + self.assertEqual(*self.actual_expected_contents('page.html')) + self.assertEqual(*self.actual_expected_contents('pages.html')) + + # Value escaping + self.assertEqual(*self.actual_expected_contents('content_html_escape.html')) + self.assertEqual(*self.actual_expected_contents('content_html_escape.Class.html')) + self.assertEqual(*self.actual_expected_contents('content_html_escape.pybind.html')) + + @unittest.skip("Page names are currently not exposed to search and there's nothing else that would require escaping, nothing to test") + def test_search(self): + # Re-run everything with search enabled, the search data shouldn't be + # escaped. Not done as part of above as it'd unnecessarily inflate the + # size of compared files with the search icon and popup. + self.run_python({ + 'INPUT_PAGES': ['page.rst'], + 'PYBIND11_COMPATIBILITY': True, + 'SEARCH_DISABLED': False, + 'SEARCH_DOWNLOAD_BINARY': True + }) + + with open(os.path.join(self.path, 'output', searchdata_filename.format(search_filename_prefix='searchdata')), 'rb') as f: + serialized = f.read() + search_data_pretty = pretty_print(serialized, entryTypeClass=EntryType)[0] + # print(search_data_pretty) + self.assertEqual(search_data_pretty, """ +TODO +""".strip())

+ Fi&le.h file +

Classes

Files

+ Page <&> title <&> should be escaped, in the page list also +

+ Page <&> title <&> should be escaped, in the page list also +

Pages

+ #include <Fi&le.h> + template<class T> + Class struct +

+ #include <FakeFi&le.h> + template<class T> + Sub<char, T> struct +

+ Sub<char, T>::Nested struct + #include <Fi&le.h> +

+ content_html_escape.Class class +

+ content_html_escape module +

+ content_html_escape.pybind module +

+ Page title that should be <&> escaped <&>, and also in the page tree +

Pages

+
#include <Fi&le.h>
+
template<class T>
+ Class struct +

+
#include <FakeFi&le.h>
+
template<class T>
+ Sub<char, T> struct +

+ Sub<char, T>::Nested struct +
#include <Fi&le.h>
+