From 51b193523c0694ab3806fe377ffd172466d63399 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= <mosra@centrum.cz>
Date: Sun, 7 Jul 2019 12:49:59 +0200
Subject: [PATCH] documentation/python: properly extract images with complex
 paths.

The paths are patched to contains just a filename so things like
../../image.png work and don't lead to unreferenceable images. This is
similar to what Doxygen does even though I don't really like that -- it
could lead to conflicting files if there are files with the same
basename.
---
 doc/documentation/python.rst                  |  4 +-
 documentation/python.py                       | 52 +++++++++++++++----
 .../test_python/page_plugins/index.html       |  4 ++
 .../test_python/page_plugins/index.rst        |  5 ++
 .../test_python/page_plugins/subdir/tiny.png  |  1 +
 5 files changed, 53 insertions(+), 13 deletions(-)
 create mode 120000 documentation/test_python/page_plugins/subdir/tiny.png
diff --git a/doc/documentation/python.rst b/doc/documentation/python.rst
index bb9a5885..777370b9 100644
--- a/doc/documentation/python.rst
+++ b/doc/documentation/python.rst
@@ -581,8 +581,8 @@ expected to be formatted as :abbr:`reST <reStructuredText>` and exposed as
 HTML, otherwise as a plain text.
 
 All referenced images are expected to have either an absolute URL or be
-relative to :py:`INPUT`, the ones with relative paths are then copied to
-:py:`OUTPUT`.
+relative to :py:`INPUT`, the ones with relative paths are then copied directly
+to :py:`OUTPUT` with the leading dirs stripped from the path.
 
 `Plugins`_
 ==========
diff --git a/documentation/python.py b/documentation/python.py
index 905d57e3..3cab6900 100755
--- a/documentation/python.py
+++ b/documentation/python.py
@@ -44,6 +44,7 @@ from importlib.machinery import SourceFileLoader
 from typing import Tuple, Dict, Set, Any, List
 from urllib.parse import urljoin
 from distutils.version import LooseVersion
+from docutils.transforms import Transform
 
 import jinja2
 
@@ -949,9 +950,40 @@ def render_class(state: State, path, class_, env):
 
     return index_entry
 
-def publish_rst(state: State, source, translator_class=m.htmlsanity.SaneHtmlTranslator):
+# Extracts image paths and transforms them to just the filenames
+class ExtractImages(Transform):
+    # Max Docutils priority is 990, be sure that this is applied at the very
+    # last
+    default_priority = 991
+
+    # There is no simple way to have stateful transforms (the publisher always
+    # gets just the class, not the instance) so we have to use this
+    # TODO: maybe the pending nodes could solve this?
+    external_data = set()
+
+    def __init__(self, document, startnode):
+        Transform.__init__(self, document, startnode=startnode)
+
+    def apply(self):
+        ExtractImages._external_data = set()
+        for image in self.document.traverse(docutils.nodes.image):
+            # Skip absolute URLs
+            if urllib.parse.urlparse(image['uri']).netloc: continue
+
+            # TODO: is there a non-private access to current document source
+            # path?
+            ExtractImages._external_data.add(os.path.join(os.path.dirname(self.document.settings._source), image['uri']) if isinstance(self.document.settings._source, str) else image['uri'])
+
+            # Patch the URL to be just the filename
+            image['uri'] = os.path.basename(image['uri'])
+
+class DocumentationWriter(m.htmlsanity.SaneHtmlWriter):
+    def get_transforms(self):
+        return m.htmlsanity.SaneHtmlWriter.get_transforms(self) + [ExtractImages]
+
+def publish_rst(state: State, source, *, source_path=None, translator_class=m.htmlsanity.SaneHtmlTranslator):
     pub = docutils.core.Publisher(
-        writer=m.htmlsanity.SaneHtmlWriter(),
+        writer=DocumentationWriter(),
         source_class=docutils.io.StringInput,
         destination_class=docutils.io.StringOutput)
     pub.set_components('standalone', 'restructuredtext', 'html')
@@ -960,20 +992,18 @@ def publish_rst(state: State, source, translator_class=m.htmlsanity.SaneHtmlTran
     # Docutils uses a deprecated U mode for opening files, so instead of
     # monkey-patching docutils.io.FileInput to not do that (like Pelican does),
     # I just read the thing myself.
-    # TODO *somehow* need to supply the filename to it for better error
-    # reporting, this is too awful
-    pub.set_source(source=source)
+    # TODO for external docs it *somehow* needs to supply the filename and line
+    # range to it for better error reporting, this is too awful
+    pub.set_source(source=source, source_path=source_path)
     pub.publish()
 
     # External images to pull later
-    # TODO: some actual path handling
-    for image in pub.document.traverse(docutils.nodes.image):
-        state.external_data.add(image['uri'])
+    state.external_data = state.external_data.union(ExtractImages._external_data)
 
     return pub
 
 def render_rst(state: State, source):
-    return publish_rst(state, source).writer.parts.get('body').rstrip()
+    return publish_rst(state, source, source_path=None).writer.parts.get('body').rstrip()
 
 class _SaneInlineHtmlTranslator(m.htmlsanity.SaneHtmlTranslator):
     # Unconditionally force compact paragraphs. This means the inline HTML
@@ -982,7 +1012,7 @@ class _SaneInlineHtmlTranslator(m.htmlsanity.SaneHtmlTranslator):
         return True
 
 def render_inline_rst(state: State, source):
-    return publish_rst(state, source, _SaneInlineHtmlTranslator).writer.parts.get('body').rstrip()
+    return publish_rst(state, source, translator_class=_SaneInlineHtmlTranslator).writer.parts.get('body').rstrip()
 
 def render_doc(state: State, filename):
     logging.debug("parsing docs from %s", filename)
@@ -1001,7 +1031,7 @@ def render_page(state: State, path, filename, env):
     for hook in state.hooks_pre_page: hook()
 
     # Render the file
-    with open(filename, 'r') as f: pub = publish_rst(state, f.read())
+    with open(filename, 'r') as f: pub = publish_rst(state, f.read(), source_path=filename)
 
     # Extract metadata from the page
     metadata = {}
diff --git a/documentation/test_python/page_plugins/index.html b/documentation/test_python/page_plugins/index.html
index 0992172c..8368a6ef 100644
--- a/documentation/test_python/page_plugins/index.html
+++ b/documentation/test_python/page_plugins/index.html
@@ -47,6 +47,10 @@ Yup!</aside>
 <span class="go">14.0</span></pre>
 <p>Images!</p>
 <img class="m-image" src="tiny.png" style="width: 60px" />
+<figure class="m-figure">
+<img src="tiny.png" style="width: 200px" />
+<figcaption>Image in a subdir</figcaption>
+</figure>
 <p class="m-transition">~~~ Custom plugins! ~~~</p>
 <p>And now something totally different:</p>
 <style>
diff --git a/documentation/test_python/page_plugins/index.rst b/documentation/test_python/page_plugins/index.rst
index c71270fe..c9a6d7f1 100644
--- a/documentation/test_python/page_plugins/index.rst
+++ b/documentation/test_python/page_plugins/index.rst
@@ -35,6 +35,11 @@ Images!
 .. image:: tiny.png
     :scale: 2000%
 
+.. figure:: subdir/tiny.png
+    :width: 200px
+
+    Image in a subdir
+
 .. fancy-line:: Custom plugins!
 
 And now something totally different:
diff --git a/documentation/test_python/page_plugins/subdir/tiny.png b/documentation/test_python/page_plugins/subdir/tiny.png
new file mode 120000
index 00000000..364efa70
--- /dev/null
+++ b/documentation/test_python/page_plugins/subdir/tiny.png
@@ -0,0 +1 @@
+../../../../plugins/m/test/images/tiny.png
\ No newline at end of file
-- 
2.30.2