From 45911a188292a62b49ca4b226f590e9ca8cb75fa Mon Sep 17 00:00:00 2001
From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= <mosra@centrum.cz>
Date: Sun, 9 Jan 2022 19:01:59 +0100
Subject: [PATCH] documentation/doxygen: robustly skip unexpected XML files.

This skips the Doxyfile.xml from 1.8.19 (without a warning, to avoid
polluting the output for everyone running newer Doxygen), and
additionally also all other files that don't have the expected root or
immediately following tags (with a warning, because that's likely
something to act on).

These checks used to be inconsistent between extract_metadata() and
parse_xml() (resulting extract_metadata() to continuing further and
dying on some other exception, sigh) and also was an assert instead of a
graceful handling.

A new test case is added to verify that all expected messages are
printed, all unexpected files are skipped but also the expected files
are not skipped. The warnings/errors are also printed just once, not
once during metadata extraction and once during actual page generation.

Co-authored-by: crf8472 <crf8472@web.de>
Co-authored-by: SRGDamia1 <sdamiano@stroudcenter.org>
---
 documentation/doxygen.py                      | 43 ++++++++++----
 .../contents_parse_error/Doxyfile             |  1 -
 .../contents_parse_error/broken.xml           |  1 -
 .../test_doxygen/ignored_xmls/Doxyfile        |  7 +++
 .../test_doxygen/ignored_xmls/Doxyfile.xml    |  1 +
 .../test_doxygen/ignored_xmls/Foxydile.xml    |  5 ++
 .../test_doxygen/ignored_xmls/broken.xml      |  1 +
 .../test_doxygen/ignored_xmls/index.xml       |  2 +
 .../test_doxygen/ignored_xmls/pages.html      | 55 ++++++++++++++++++
 .../ignored_xmls/thingsgotcrazy.xml           |  7 +++
 documentation/test_doxygen/test_contents.py   |  7 ---
 .../test_doxygen/test_ignored_xmls.py         | 56 +++++++++++++++++++
 12 files changed, 166 insertions(+), 20 deletions(-)
 delete mode 100644 documentation/test_doxygen/contents_parse_error/Doxyfile
 delete mode 100644 documentation/test_doxygen/contents_parse_error/broken.xml
 create mode 100644 documentation/test_doxygen/ignored_xmls/Doxyfile
 create mode 100644 documentation/test_doxygen/ignored_xmls/Doxyfile.xml
 create mode 100644 documentation/test_doxygen/ignored_xmls/Foxydile.xml
 create mode 100644 documentation/test_doxygen/ignored_xmls/broken.xml
 create mode 100644 documentation/test_doxygen/ignored_xmls/index.xml
 create mode 100644 documentation/test_doxygen/ignored_xmls/pages.html
 create mode 100644 documentation/test_doxygen/ignored_xmls/thingsgotcrazy.xml
 create mode 100644 documentation/test_doxygen/test_ignored_xmls.py

diff --git a/documentation/doxygen.py b/documentation/doxygen.py
index 845c9776..e1483d28 100755
--- a/documentation/doxygen.py
+++ b/documentation/doxygen.py
@@ -2176,18 +2176,26 @@ def is_a_stupid_empty_markdown_page(compounddef: ET.Element):
     return compounddef.find('compoundname').text.startswith('md_') and compounddef.find('compoundname').text.endswith(compounddef.find('title').text) and not compounddef.find('briefdescription') and not compounddef.find('detaileddescription')
 
 def extract_metadata(state: State, xml):
-    logging.debug("Extracting metadata from {}".format(os.path.basename(xml)))
+    basename = os.path.basename(xml)
+
+    # These early returns should be kept consistent with parse_xml()
+    if basename == 'Doxyfile.xml':
+        logging.debug("Ignoring {}".format(basename))
+        return
+
+    logging.debug("Extracting metadata from {}".format(basename))
 
     try:
         tree = ET.parse(xml)
     except ET.ParseError as e:
-        logging.error("{}: XML parse error, skipping: {}".format(os.path.basename(xml), e))
+        logging.error("{}: XML parse error, skipping whole file: {}".format(basename, e))
         return
 
     root = tree.getroot()
 
-    # We need just list of all example files in correct order, nothing else
-    if os.path.basename(xml) == 'index.xml':
+    # From index.xml we need just list of all example files in correct order,
+    # nothing else
+    if basename == 'index.xml':
         for i in root:
             if i.attrib['kind'] == 'example':
                 compound = Empty()
@@ -2197,10 +2205,18 @@ def extract_metadata(state: State, xml):
                 state.examples += [compound]
         return
 
-    compounddef: ET.Element = root.find('compounddef')
+    # From other files we expect <doxygen><compounddef>
+    if root.tag != 'doxygen':
+        logging.warning("{}: root element expected to be <doxygen> but is <{}>, skipping whole file".format(basename, root.tag))
+        return
+    compounddef: ET.Element = root[0]
+    if compounddef.tag != 'compounddef':
+        logging.warning("{}: first child element expected to be <compounddef> but is <{}>, skipping whole file".format(basename, compounddef.tag))
+        return
+    assert len([i for i in root]) == 1
 
     if compounddef.attrib['kind'] not in ['namespace', 'group', 'class', 'struct', 'union', 'dir', 'file', 'page']:
-        logging.debug("No useful info in {}, skipping".format(os.path.basename(xml)))
+        logging.debug("No useful info in {}, skipping".format(basename))
         return
 
     # In order to show also undocumented members, go through all empty
@@ -2452,19 +2468,24 @@ def parse_xml(state: State, xml: str):
 
     state.current = os.path.basename(xml)
 
+    # All these early returns were logged in extract_metadata() already, no
+    # need to print the same warnings/erorrs twice. Keep the two consistent.
+    if state.current == 'Doxyfile.html':
+        return
+
     logging.debug("Parsing {}".format(state.current))
 
     try:
         tree = ET.parse(xml)
     except ET.ParseError as e:
-        logging.error("{}: XML parse error, skipping: {}".format(state.current, e))
         return
-
     root = tree.getroot()
-    assert root.tag == 'doxygen'
-
+    if root.tag != 'doxygen':
+        return
     compounddef: ET.Element = root[0]
-    assert compounddef.tag == 'compounddef'
+    if compounddef.tag != 'compounddef':
+        return
+
     assert len([i for i in root]) == 1
 
     # Ignoring private structs/classes and unnamed namespaces
diff --git a/documentation/test_doxygen/contents_parse_error/Doxyfile b/documentation/test_doxygen/contents_parse_error/Doxyfile
deleted file mode 100644
index 8ec4cc49..00000000
--- a/documentation/test_doxygen/contents_parse_error/Doxyfile
+++ /dev/null
@@ -1 +0,0 @@
-XML_OUTPUT              =
diff --git a/documentation/test_doxygen/contents_parse_error/broken.xml b/documentation/test_doxygen/contents_parse_error/broken.xml
deleted file mode 100644
index 7d69da77..00000000
--- a/documentation/test_doxygen/contents_parse_error/broken.xml
+++ /dev/null
@@ -1 +0,0 @@
-&<><><><>
diff --git a/documentation/test_doxygen/ignored_xmls/Doxyfile b/documentation/test_doxygen/ignored_xmls/Doxyfile
new file mode 100644
index 00000000..da7ab9e8
--- /dev/null
+++ b/documentation/test_doxygen/ignored_xmls/Doxyfile
@@ -0,0 +1,7 @@
+XML_OUTPUT              =
+
+##! M_THEME_COLOR       =
+##! M_FAVICON           =
+##! M_LINKS_NAVBAR1     =
+##! M_LINKS_NAVBAR2     =
+##! M_SEARCH_DISABLED   = YES
diff --git a/documentation/test_doxygen/ignored_xmls/Doxyfile.xml b/documentation/test_doxygen/ignored_xmls/Doxyfile.xml
new file mode 100644
index 00000000..b44092d8
--- /dev/null
+++ b/documentation/test_doxygen/ignored_xmls/Doxyfile.xml
@@ -0,0 +1 @@
+<> This XML is broken but isn't even opened <>
diff --git a/documentation/test_doxygen/ignored_xmls/Foxydile.xml b/documentation/test_doxygen/ignored_xmls/Foxydile.xml
new file mode 100644
index 00000000..a681f325
--- /dev/null
+++ b/documentation/test_doxygen/ignored_xmls/Foxydile.xml
@@ -0,0 +1,5 @@
+<!-- this file should get opened but skipped because it doesn't contain a
+     <doxygen> element -->
+<doxyfile>
+  <option/>
+</doxyfile>
diff --git a/documentation/test_doxygen/ignored_xmls/broken.xml b/documentation/test_doxygen/ignored_xmls/broken.xml
new file mode 100644
index 00000000..87476671
--- /dev/null
+++ b/documentation/test_doxygen/ignored_xmls/broken.xml
@@ -0,0 +1 @@
+<> This XML is broken and should get skipped <>
diff --git a/documentation/test_doxygen/ignored_xmls/index.xml b/documentation/test_doxygen/ignored_xmls/index.xml
new file mode 100644
index 00000000..98fb054a
--- /dev/null
+++ b/documentation/test_doxygen/ignored_xmls/index.xml
@@ -0,0 +1,2 @@
+<doxygenindex version="1.0.666">
+</doxygenindex>
diff --git a/documentation/test_doxygen/ignored_xmls/pages.html b/documentation/test_doxygen/ignored_xmls/pages.html
new file mode 100644
index 00000000..b75eb47b
--- /dev/null
+++ b/documentation/test_doxygen/ignored_xmls/pages.html
@@ -0,0 +1,55 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>My Project</title>
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600" />
+  <link rel="stylesheet" href="m-dark+documentation.compiled.css" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+</head>
+<body>
+<header><nav id="navigation">
+  <div class="m-container">
+    <div class="m-row">
+      <a href="index.html" id="m-navbar-brand" class="m-col-t-8 m-col-m-none m-left-m">My Project</a>
+    </div>
+  </div>
+</nav></header>
+<main><article>
+  <div class="m-container m-container-inflatable">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <h1>Pages</h1>
+        <ul class="m-doc">
+        </ul>
+        <script>
+        function toggle(e) {
+            e.parentElement.className = e.parentElement.className == 'm-doc-collapsible' ?
+                'm-doc-expansible' : 'm-doc-collapsible';
+            return false;
+        }
+        /* Collapse all nodes marked as such. Doing it via JS instead of
+           directly in markup so disabling it doesn't harm usability. The list
+           is somehow regenerated on every iteration and shrinks as I change
+           the classes. It's not documented anywhere and I'm not sure if this
+           is the same across browsers, so I am going backwards in that list to
+           be sure. */
+        var collapsed = document.getElementsByClassName("collapsed");
+        for(var i = collapsed.length - 1; i >= 0; --i)
+            collapsed[i].className = 'm-doc-expansible';
+        </script>
+      </div>
+    </div>
+  </div>
+</article></main>
+<footer><nav>
+  <div class="m-container">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <p>My Project. Created with <a href="https://doxygen.org/">Doxygen</a> 1.0.666 and <a href="https://mcss.mosra.cz/">m.css</a>.</p>
+      </div>
+    </div>
+  </div>
+</nav></footer>
+</body>
+</html>
diff --git a/documentation/test_doxygen/ignored_xmls/thingsgotcrazy.xml b/documentation/test_doxygen/ignored_xmls/thingsgotcrazy.xml
new file mode 100644
index 00000000..bbb19abd
--- /dev/null
+++ b/documentation/test_doxygen/ignored_xmls/thingsgotcrazy.xml
@@ -0,0 +1,7 @@
+<!-- this file should get opened but skipped because it doesn't contain a
+     <compounddef> as the first thing in the <doxygen> element. Might be a bit
+     too tight, but better safe than sorry AGAIN. -->
+<doxygen>
+  <crazyisay/>
+  <compounddef/>
+</doxygen>
diff --git a/documentation/test_doxygen/test_contents.py b/documentation/test_doxygen/test_contents.py
index cb5e31e7..277216da 100644
--- a/documentation/test_doxygen/test_contents.py
+++ b/documentation/test_doxygen/test_contents.py
@@ -272,13 +272,6 @@ class Custom(IntegrationTestCase):
 
         self.assertEqual(*self.actual_expected_contents('dot.html', file))
 
-class ParseError(BaseTestCase):
-    def test(self):
-        self.run_doxygen(wildcard='broken.xml')
-
-        # The index file should be generated, no abort
-        self.assertTrue(os.path.exists(os.path.join(self.path, 'html', 'index.html')))
-
 class AutobriefCppComments(IntegrationTestCase):
     def test(self):
         self.run_doxygen(wildcard='File_8h.xml')
diff --git a/documentation/test_doxygen/test_ignored_xmls.py b/documentation/test_doxygen/test_ignored_xmls.py
new file mode 100644
index 00000000..3244f1cb
--- /dev/null
+++ b/documentation/test_doxygen/test_ignored_xmls.py
@@ -0,0 +1,56 @@
+#
+#   This file is part of m.css.
+#
+#   Copyright Â© 2017, 2018, 2019, 2020, 2021, 2022
+#             VladimÃ­r VondruÅ¡ <mosra@centrum.cz>
+#
+#   Permission is hereby granted, free of charge, to any person obtaining a
+#   copy of this software and associated documentation files (the "Software"),
+#   to deal in the Software without restriction, including without limitation
+#   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#   and/or sell copies of the Software, and to permit persons to whom the
+#   Software is furnished to do so, subject to the following conditions:
+#
+#   The above copyright notice and this permission notice shall be included
+#   in all copies or substantial portions of the Software.
+#
+#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+#   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+#   DEALINGS IN THE SOFTWARE.
+#
+
+import os
+
+from . import BaseTestCase
+
+class IgnoredXmls(BaseTestCase):
+    def test(self):
+        with self.assertLogs() as cm:
+            self.run_doxygen(wildcard='*.xml')
+
+        self.assertEqual(cm.output, [
+            # The Doxyfile.xml should be completely ignored, producing just a
+            # debug message and not even being opened, as that would spam the
+            # output otherwise
+
+            # This is like Doxyfile.xml, but with a different name, ensure it
+            # gets properly skipped if the root element mismatches
+            "WARNING:root:Foxydile.xml: root element expected to be <doxygen> but is <doxyfile>, skipping whole file",
+
+            # A file that has a XML parse error should get skipped
+            "ERROR:root:broken.xml: XML parse error, skipping whole file: not well-formed (invalid token): line 1, column 1",
+
+            # The index.xml should be parsed and not produce any warning
+
+            # A file that has <doxygen> but something weird inside, skipped
+            # also
+            "WARNING:root:thingsgotcrazy.xml: first child element expected to be <compounddef> but is <crazyisay>, skipping whole file"
+        ])
+
+        # Some index page should be generated, with version 1.0.666 extracted
+        # from index.xml
+        self.assertEqual(*self.actual_expected_contents('pages.html'))
-- 
2.30.2