chiark / gitweb /
documentation/doxygen: robustly skip unexpected XML files.
authorVladimír Vondruš <mosra@centrum.cz>
Sun, 9 Jan 2022 18:01:59 +0000 (19:01 +0100)
committerVladimír Vondruš <mosra@centrum.cz>
Sun, 9 Jan 2022 21:50:15 +0000 (22:50 +0100)
This skips the Doxyfile.xml from 1.8.19 (without a warning, to avoid
polluting the output for everyone running newer Doxygen), and
additionally also all other files that don't have the expected root or
immediately following tags (with a warning, because that's likely
something to act on).

These checks used to be inconsistent between extract_metadata() and
parse_xml() (resulting extract_metadata() to continuing further and
dying on some other exception, sigh) and also was an assert instead of a
graceful handling.

A new test case is added to verify that all expected messages are
printed, all unexpected files are skipped but also the expected files
are not skipped. The warnings/errors are also printed just once, not
once during metadata extraction and once during actual page generation.

Co-authored-by: crf8472 <crf8472@web.de>
Co-authored-by: SRGDamia1 <sdamiano@stroudcenter.org>
12 files changed:
documentation/doxygen.py
documentation/test_doxygen/contents_parse_error/Doxyfile [deleted file]
documentation/test_doxygen/contents_parse_error/broken.xml [deleted file]
documentation/test_doxygen/ignored_xmls/Doxyfile [new file with mode: 0644]
documentation/test_doxygen/ignored_xmls/Doxyfile.xml [new file with mode: 0644]
documentation/test_doxygen/ignored_xmls/Foxydile.xml [new file with mode: 0644]
documentation/test_doxygen/ignored_xmls/broken.xml [new file with mode: 0644]
documentation/test_doxygen/ignored_xmls/index.xml [new file with mode: 0644]
documentation/test_doxygen/ignored_xmls/pages.html [new file with mode: 0644]
documentation/test_doxygen/ignored_xmls/thingsgotcrazy.xml [new file with mode: 0644]
documentation/test_doxygen/test_contents.py
documentation/test_doxygen/test_ignored_xmls.py [new file with mode: 0644]

index 845c9776ea828dab3e6165485c9177b3daca969a..e1483d287fe7156151a84aa679f6dcf694a221f4 100755 (executable)
@@ -2176,18 +2176,26 @@ def is_a_stupid_empty_markdown_page(compounddef: ET.Element):
     return compounddef.find('compoundname').text.startswith('md_') and compounddef.find('compoundname').text.endswith(compounddef.find('title').text) and not compounddef.find('briefdescription') and not compounddef.find('detaileddescription')
 
 def extract_metadata(state: State, xml):
-    logging.debug("Extracting metadata from {}".format(os.path.basename(xml)))
+    basename = os.path.basename(xml)
+
+    # These early returns should be kept consistent with parse_xml()
+    if basename == 'Doxyfile.xml':
+        logging.debug("Ignoring {}".format(basename))
+        return
+
+    logging.debug("Extracting metadata from {}".format(basename))
 
     try:
         tree = ET.parse(xml)
     except ET.ParseError as e:
-        logging.error("{}: XML parse error, skipping: {}".format(os.path.basename(xml), e))
+        logging.error("{}: XML parse error, skipping whole file: {}".format(basename, e))
         return
 
     root = tree.getroot()
 
-    # We need just list of all example files in correct order, nothing else
-    if os.path.basename(xml) == 'index.xml':
+    # From index.xml we need just list of all example files in correct order,
+    # nothing else
+    if basename == 'index.xml':
         for i in root:
             if i.attrib['kind'] == 'example':
                 compound = Empty()
@@ -2197,10 +2205,18 @@ def extract_metadata(state: State, xml):
                 state.examples += [compound]
         return
 
-    compounddef: ET.Element = root.find('compounddef')
+    # From other files we expect <doxygen><compounddef>
+    if root.tag != 'doxygen':
+        logging.warning("{}: root element expected to be <doxygen> but is <{}>, skipping whole file".format(basename, root.tag))
+        return
+    compounddef: ET.Element = root[0]
+    if compounddef.tag != 'compounddef':
+        logging.warning("{}: first child element expected to be <compounddef> but is <{}>, skipping whole file".format(basename, compounddef.tag))
+        return
+    assert len([i for i in root]) == 1
 
     if compounddef.attrib['kind'] not in ['namespace', 'group', 'class', 'struct', 'union', 'dir', 'file', 'page']:
-        logging.debug("No useful info in {}, skipping".format(os.path.basename(xml)))
+        logging.debug("No useful info in {}, skipping".format(basename))
         return
 
     # In order to show also undocumented members, go through all empty
@@ -2452,19 +2468,24 @@ def parse_xml(state: State, xml: str):
 
     state.current = os.path.basename(xml)
 
+    # All these early returns were logged in extract_metadata() already, no
+    # need to print the same warnings/erorrs twice. Keep the two consistent.
+    if state.current == 'Doxyfile.html':
+        return
+
     logging.debug("Parsing {}".format(state.current))
 
     try:
         tree = ET.parse(xml)
     except ET.ParseError as e:
-        logging.error("{}: XML parse error, skipping: {}".format(state.current, e))
         return
-
     root = tree.getroot()
-    assert root.tag == 'doxygen'
-
+    if root.tag != 'doxygen':
+        return
     compounddef: ET.Element = root[0]
-    assert compounddef.tag == 'compounddef'
+    if compounddef.tag != 'compounddef':
+        return
+
     assert len([i for i in root]) == 1
 
     # Ignoring private structs/classes and unnamed namespaces
diff --git a/documentation/test_doxygen/contents_parse_error/Doxyfile b/documentation/test_doxygen/contents_parse_error/Doxyfile
deleted file mode 100644 (file)
index 8ec4cc4..0000000
+++ /dev/null
@@ -1 +0,0 @@
-XML_OUTPUT              =
diff --git a/documentation/test_doxygen/contents_parse_error/broken.xml b/documentation/test_doxygen/contents_parse_error/broken.xml
deleted file mode 100644 (file)
index 7d69da7..0000000
+++ /dev/null
@@ -1 +0,0 @@
-&<><><><>
diff --git a/documentation/test_doxygen/ignored_xmls/Doxyfile b/documentation/test_doxygen/ignored_xmls/Doxyfile
new file mode 100644 (file)
index 0000000..da7ab9e
--- /dev/null
@@ -0,0 +1,7 @@
+XML_OUTPUT              =
+
+##! M_THEME_COLOR       =
+##! M_FAVICON           =
+##! M_LINKS_NAVBAR1     =
+##! M_LINKS_NAVBAR2     =
+##! M_SEARCH_DISABLED   = YES
diff --git a/documentation/test_doxygen/ignored_xmls/Doxyfile.xml b/documentation/test_doxygen/ignored_xmls/Doxyfile.xml
new file mode 100644 (file)
index 0000000..b44092d
--- /dev/null
@@ -0,0 +1 @@
+<> This XML is broken but isn't even opened <>
diff --git a/documentation/test_doxygen/ignored_xmls/Foxydile.xml b/documentation/test_doxygen/ignored_xmls/Foxydile.xml
new file mode 100644 (file)
index 0000000..a681f32
--- /dev/null
@@ -0,0 +1,5 @@
+<!-- this file should get opened but skipped because it doesn't contain a
+     <doxygen> element -->
+<doxyfile>
+  <option/>
+</doxyfile>
diff --git a/documentation/test_doxygen/ignored_xmls/broken.xml b/documentation/test_doxygen/ignored_xmls/broken.xml
new file mode 100644 (file)
index 0000000..8747667
--- /dev/null
@@ -0,0 +1 @@
+<> This XML is broken and should get skipped <>
diff --git a/documentation/test_doxygen/ignored_xmls/index.xml b/documentation/test_doxygen/ignored_xmls/index.xml
new file mode 100644 (file)
index 0000000..98fb054
--- /dev/null
@@ -0,0 +1,2 @@
+<doxygenindex version="1.0.666">
+</doxygenindex>
diff --git a/documentation/test_doxygen/ignored_xmls/pages.html b/documentation/test_doxygen/ignored_xmls/pages.html
new file mode 100644 (file)
index 0000000..b75eb47
--- /dev/null
@@ -0,0 +1,55 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>My Project</title>
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600" />
+  <link rel="stylesheet" href="m-dark+documentation.compiled.css" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+</head>
+<body>
+<header><nav id="navigation">
+  <div class="m-container">
+    <div class="m-row">
+      <a href="index.html" id="m-navbar-brand" class="m-col-t-8 m-col-m-none m-left-m">My Project</a>
+    </div>
+  </div>
+</nav></header>
+<main><article>
+  <div class="m-container m-container-inflatable">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <h1>Pages</h1>
+        <ul class="m-doc">
+        </ul>
+        <script>
+        function toggle(e) {
+            e.parentElement.className = e.parentElement.className == 'm-doc-collapsible' ?
+                'm-doc-expansible' : 'm-doc-collapsible';
+            return false;
+        }
+        /* Collapse all nodes marked as such. Doing it via JS instead of
+           directly in markup so disabling it doesn't harm usability. The list
+           is somehow regenerated on every iteration and shrinks as I change
+           the classes. It's not documented anywhere and I'm not sure if this
+           is the same across browsers, so I am going backwards in that list to
+           be sure. */
+        var collapsed = document.getElementsByClassName("collapsed");
+        for(var i = collapsed.length - 1; i >= 0; --i)
+            collapsed[i].className = 'm-doc-expansible';
+        </script>
+      </div>
+    </div>
+  </div>
+</article></main>
+<footer><nav>
+  <div class="m-container">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <p>My Project. Created with <a href="https://doxygen.org/">Doxygen</a> 1.0.666 and <a href="https://mcss.mosra.cz/">m.css</a>.</p>
+      </div>
+    </div>
+  </div>
+</nav></footer>
+</body>
+</html>
diff --git a/documentation/test_doxygen/ignored_xmls/thingsgotcrazy.xml b/documentation/test_doxygen/ignored_xmls/thingsgotcrazy.xml
new file mode 100644 (file)
index 0000000..bbb19ab
--- /dev/null
@@ -0,0 +1,7 @@
+<!-- this file should get opened but skipped because it doesn't contain a
+     <compounddef> as the first thing in the <doxygen> element. Might be a bit
+     too tight, but better safe than sorry AGAIN. -->
+<doxygen>
+  <crazyisay/>
+  <compounddef/>
+</doxygen>
index cb5e31e77776c013dbb8b9c651495af8554804bc..277216da5630fe8e49088de7f6a616738ade876f 100644 (file)
@@ -272,13 +272,6 @@ class Custom(IntegrationTestCase):
 
         self.assertEqual(*self.actual_expected_contents('dot.html', file))
 
-class ParseError(BaseTestCase):
-    def test(self):
-        self.run_doxygen(wildcard='broken.xml')
-
-        # The index file should be generated, no abort
-        self.assertTrue(os.path.exists(os.path.join(self.path, 'html', 'index.html')))
-
 class AutobriefCppComments(IntegrationTestCase):
     def test(self):
         self.run_doxygen(wildcard='File_8h.xml')
diff --git a/documentation/test_doxygen/test_ignored_xmls.py b/documentation/test_doxygen/test_ignored_xmls.py
new file mode 100644 (file)
index 0000000..3244f1c
--- /dev/null
@@ -0,0 +1,56 @@
+#
+#   This file is part of m.css.
+#
+#   Copyright © 2017, 2018, 2019, 2020, 2021, 2022
+#             Vladimír Vondruš <mosra@centrum.cz>
+#
+#   Permission is hereby granted, free of charge, to any person obtaining a
+#   copy of this software and associated documentation files (the "Software"),
+#   to deal in the Software without restriction, including without limitation
+#   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#   and/or sell copies of the Software, and to permit persons to whom the
+#   Software is furnished to do so, subject to the following conditions:
+#
+#   The above copyright notice and this permission notice shall be included
+#   in all copies or substantial portions of the Software.
+#
+#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+#   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+#   DEALINGS IN THE SOFTWARE.
+#
+
+import os
+
+from . import BaseTestCase
+
+class IgnoredXmls(BaseTestCase):
+    def test(self):
+        with self.assertLogs() as cm:
+            self.run_doxygen(wildcard='*.xml')
+
+        self.assertEqual(cm.output, [
+            # The Doxyfile.xml should be completely ignored, producing just a
+            # debug message and not even being opened, as that would spam the
+            # output otherwise
+
+            # This is like Doxyfile.xml, but with a different name, ensure it
+            # gets properly skipped if the root element mismatches
+            "WARNING:root:Foxydile.xml: root element expected to be <doxygen> but is <doxyfile>, skipping whole file",
+
+            # A file that has a XML parse error should get skipped
+            "ERROR:root:broken.xml: XML parse error, skipping whole file: not well-formed (invalid token): line 1, column 1",
+
+            # The index.xml should be parsed and not produce any warning
+
+            # A file that has <doxygen> but something weird inside, skipped
+            # also
+            "WARNING:root:thingsgotcrazy.xml: first child element expected to be <compounddef> but is <crazyisay>, skipping whole file"
+        ])
+
+        # Some index page should be generated, with version 1.0.666 extracted
+        # from index.xml
+        self.assertEqual(*self.actual_expected_contents('pages.html'))