From: Vladimír Vondruš <mosra@centrum.cz>
Date: Wed, 21 Aug 2024 10:23:57 +0000 (+0200)
Subject: m.htmlsanity: traverse() is deprecated since docutils 0.18.
X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=6c0dfc9ad6a6b7bff7761b8d305be7d4ff86d26d;p=blog.git

m.htmlsanity: traverse() is deprecated since docutils 0.18.

Looks like it got changed to an iterator to be faster, unfortunately
Sphinx relied on it returning a list, so it got changed back and
deprecated in favor of findall().

Using the new name and adding it as an alias for traverse() on older
versions.
---

diff --git a/documentation/python.py b/documentation/python.py
index 8b056c5e..5933db3a 100755
--- a/documentation/python.py
+++ b/documentation/python.py
@@ -2188,7 +2188,7 @@ class ExtractImages(Transform):
 
     def apply(self):
         ExtractImages._external_data = set()
-        for image in self.document.traverse(docutils.nodes.image):
+        for image in self.document.findall(docutils.nodes.image):
             # Skip absolute URLs
             if urllib.parse.urlparse(image['uri']).netloc: continue
 
@@ -2368,7 +2368,7 @@ def render_page(state: State, path, input_filename, env):
 
     # Extract metadata from the page
     metadata = {}
-    for docinfo in pub.document.traverse(docutils.nodes.docinfo):
+    for docinfo in pub.document.findall(docutils.nodes.docinfo):
         for element in docinfo.children:
             if element.tagname == 'field':
                 name_elem, body_elem = element.children
diff --git a/plugins/m/htmlsanity.py b/plugins/m/htmlsanity.py
index 2f68d84b..435c1a8d 100644
--- a/plugins/m/htmlsanity.py
+++ b/plugins/m/htmlsanity.py
@@ -69,6 +69,14 @@ docutils_settings = {
     'embed_stylesheet': False
 }
 
+# findall() is new in docutils 0.18, replacing traverse() because that one was
+# attempted to be changed to return an iterator instead of a list, which broke
+# Sphinx, so it instead got deprecated in favor of findall() which is the same
+# as traverse() returning an iterator was. To retain compatibility with earlier
+# versions, add an alias.
+if not hasattr(docutils.nodes.Node, 'findall'):
+    setattr(docutils.nodes.Node, 'findall', docutils.nodes.Node.traverse)
+
 words_re = re.compile(r'\w+', re.UNICODE|re.X)
 
 def extract_document_language(document):
@@ -76,7 +84,7 @@ def extract_document_language(document):
     language = document.settings.language_code
 
     # Then try to find the :lang: metadata option
-    for field in document.traverse(nodes.field):
+    for field in document.findall(nodes.field):
         assert isinstance(field[0], nodes.field_name)
         assert isinstance(field[1], nodes.field_body)
         # field_body -> paragraph -> text
@@ -137,7 +145,7 @@ class SmartQuotes(docutils.transforms.universal.SmartQuotes):
 
         # "Educate" quotes in normal text. Handle each block of text
         # (TextElement node) as a unit to keep context around inline nodes:
-        for node in self.document.traverse(nodes.TextElement):
+        for node in self.document.findall(nodes.TextElement):
             # skip preformatted text blocks and special elements:
             if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
                 continue
@@ -148,7 +156,7 @@ class SmartQuotes(docutils.transforms.universal.SmartQuotes):
             # list of text nodes in the "text block":
             # Patched here to exclude more stuff.
             txtnodes = []
-            for txtnode in node.traverse(nodes.Text):
+            for txtnode in node.findall(nodes.Text):
                 if not can_apply_typography(txtnode): continue
                 # Don't convert -- in option strings
                 if isinstance(txtnode.parent, nodes.option_string): continue
@@ -204,12 +212,12 @@ class Pyphen(Transform):
         pyphen_for_lang = {}
 
         # Go through all text words and hyphenate them
-        for node in self.document.traverse(nodes.TextElement):
+        for node in self.document.findall(nodes.TextElement):
             # Skip preformatted text blocks and special elements
             if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
                 continue
 
-            for txtnode in node.traverse(nodes.Text):
+            for txtnode in node.findall(nodes.Text):
                 if not can_apply_typography(txtnode): continue
 
                 # Don't hyphenate document title. Not part of