From: Vladimír Vondruš Date: Wed, 21 Aug 2024 10:23:57 +0000 (+0200) Subject: m.htmlsanity: traverse() is deprecated since docutils 0.18. X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~cjwatson/git?a=commitdiff_plain;h=6c0dfc9ad6a6b7bff7761b8d305be7d4ff86d26d;p=blog.git m.htmlsanity: traverse() is deprecated since docutils 0.18. Looks like it got changed to an iterator to be faster, unfortunately Sphinx relied on it returning a list, so it got changed back and deprecated in favor of findall(). Using the new name and adding it as an alias for traverse() on older versions. --- diff --git a/documentation/python.py b/documentation/python.py index 8b056c5e..5933db3a 100755 --- a/documentation/python.py +++ b/documentation/python.py @@ -2188,7 +2188,7 @@ class ExtractImages(Transform): def apply(self): ExtractImages._external_data = set() - for image in self.document.traverse(docutils.nodes.image): + for image in self.document.findall(docutils.nodes.image): # Skip absolute URLs if urllib.parse.urlparse(image['uri']).netloc: continue @@ -2368,7 +2368,7 @@ def render_page(state: State, path, input_filename, env): # Extract metadata from the page metadata = {} - for docinfo in pub.document.traverse(docutils.nodes.docinfo): + for docinfo in pub.document.findall(docutils.nodes.docinfo): for element in docinfo.children: if element.tagname == 'field': name_elem, body_elem = element.children diff --git a/plugins/m/htmlsanity.py b/plugins/m/htmlsanity.py index 2f68d84b..435c1a8d 100644 --- a/plugins/m/htmlsanity.py +++ b/plugins/m/htmlsanity.py @@ -69,6 +69,14 @@ docutils_settings = { 'embed_stylesheet': False } +# findall() is new in docutils 0.18, replacing traverse() because that one was +# attempted to be changed to return an iterator instead of a list, which broke +# Sphinx, so it instead got deprecated in favor of findall() which is the same +# as traverse() returning an iterator was. To retain compatibility with earlier +# versions, add an alias. +if not hasattr(docutils.nodes.Node, 'findall'): + setattr(docutils.nodes.Node, 'findall', docutils.nodes.Node.traverse) + words_re = re.compile(r'\w+', re.UNICODE|re.X) def extract_document_language(document): @@ -76,7 +84,7 @@ def extract_document_language(document): language = document.settings.language_code # Then try to find the :lang: metadata option - for field in document.traverse(nodes.field): + for field in document.findall(nodes.field): assert isinstance(field[0], nodes.field_name) assert isinstance(field[1], nodes.field_body) # field_body -> paragraph -> text @@ -137,7 +145,7 @@ class SmartQuotes(docutils.transforms.universal.SmartQuotes): # "Educate" quotes in normal text. Handle each block of text # (TextElement node) as a unit to keep context around inline nodes: - for node in self.document.traverse(nodes.TextElement): + for node in self.document.findall(nodes.TextElement): # skip preformatted text blocks and special elements: if isinstance(node, (nodes.FixedTextElement, nodes.Special)): continue @@ -148,7 +156,7 @@ class SmartQuotes(docutils.transforms.universal.SmartQuotes): # list of text nodes in the "text block": # Patched here to exclude more stuff. txtnodes = [] - for txtnode in node.traverse(nodes.Text): + for txtnode in node.findall(nodes.Text): if not can_apply_typography(txtnode): continue # Don't convert -- in option strings if isinstance(txtnode.parent, nodes.option_string): continue @@ -204,12 +212,12 @@ class Pyphen(Transform): pyphen_for_lang = {} # Go through all text words and hyphenate them - for node in self.document.traverse(nodes.TextElement): + for node in self.document.findall(nodes.TextElement): # Skip preformatted text blocks and special elements if isinstance(node, (nodes.FixedTextElement, nodes.Special)): continue - for txtnode in node.traverse(nodes.Text): + for txtnode in node.findall(nodes.Text): if not can_apply_typography(txtnode): continue # Don't hyphenate document title. Not part of