chiark / gitweb /
documentation/python: properly extract images with complex paths.
authorVladimír Vondruš <mosra@centrum.cz>
Sun, 7 Jul 2019 10:49:59 +0000 (12:49 +0200)
committerVladimír Vondruš <mosra@centrum.cz>
Sun, 7 Jul 2019 12:10:41 +0000 (14:10 +0200)
The paths are patched to contains just a filename so things like
../../image.png work and don't lead to unreferenceable images. This is
similar to what Doxygen does even though I don't really like that -- it
could lead to conflicting files if there are files with the same
basename.

doc/documentation/python.rst
documentation/python.py
documentation/test_python/page_plugins/index.html
documentation/test_python/page_plugins/index.rst
documentation/test_python/page_plugins/subdir/tiny.png [new symlink]

index bb9a588549ed0bdc894641c806fb2cf951d3f5f0..777370b97666e43faca40a4bf40b5db2e590aefc 100644 (file)
@@ -581,8 +581,8 @@ expected to be formatted as :abbr:`reST <reStructuredText>` and exposed as
 HTML, otherwise as a plain text.
 
 All referenced images are expected to have either an absolute URL or be
-relative to :py:`INPUT`, the ones with relative paths are then copied to
-:py:`OUTPUT`.
+relative to :py:`INPUT`, the ones with relative paths are then copied directly
+to :py:`OUTPUT` with the leading dirs stripped from the path.
 
 `Plugins`_
 ==========
index 905d57e3afbb3bb19631779fadcb96d65aa95788..3cab6900737878b0e72811e4224725881e6f0001 100755 (executable)
@@ -44,6 +44,7 @@ from importlib.machinery import SourceFileLoader
 from typing import Tuple, Dict, Set, Any, List
 from urllib.parse import urljoin
 from distutils.version import LooseVersion
+from docutils.transforms import Transform
 
 import jinja2
 
@@ -949,9 +950,40 @@ def render_class(state: State, path, class_, env):
 
     return index_entry
 
-def publish_rst(state: State, source, translator_class=m.htmlsanity.SaneHtmlTranslator):
+# Extracts image paths and transforms them to just the filenames
+class ExtractImages(Transform):
+    # Max Docutils priority is 990, be sure that this is applied at the very
+    # last
+    default_priority = 991
+
+    # There is no simple way to have stateful transforms (the publisher always
+    # gets just the class, not the instance) so we have to use this
+    # TODO: maybe the pending nodes could solve this?
+    external_data = set()
+
+    def __init__(self, document, startnode):
+        Transform.__init__(self, document, startnode=startnode)
+
+    def apply(self):
+        ExtractImages._external_data = set()
+        for image in self.document.traverse(docutils.nodes.image):
+            # Skip absolute URLs
+            if urllib.parse.urlparse(image['uri']).netloc: continue
+
+            # TODO: is there a non-private access to current document source
+            # path?
+            ExtractImages._external_data.add(os.path.join(os.path.dirname(self.document.settings._source), image['uri']) if isinstance(self.document.settings._source, str) else image['uri'])
+
+            # Patch the URL to be just the filename
+            image['uri'] = os.path.basename(image['uri'])
+
+class DocumentationWriter(m.htmlsanity.SaneHtmlWriter):
+    def get_transforms(self):
+        return m.htmlsanity.SaneHtmlWriter.get_transforms(self) + [ExtractImages]
+
+def publish_rst(state: State, source, *, source_path=None, translator_class=m.htmlsanity.SaneHtmlTranslator):
     pub = docutils.core.Publisher(
-        writer=m.htmlsanity.SaneHtmlWriter(),
+        writer=DocumentationWriter(),
         source_class=docutils.io.StringInput,
         destination_class=docutils.io.StringOutput)
     pub.set_components('standalone', 'restructuredtext', 'html')
@@ -960,20 +992,18 @@ def publish_rst(state: State, source, translator_class=m.htmlsanity.SaneHtmlTran
     # Docutils uses a deprecated U mode for opening files, so instead of
     # monkey-patching docutils.io.FileInput to not do that (like Pelican does),
     # I just read the thing myself.
-    # TODO *somehow* need to supply the filename to it for better error
-    # reporting, this is too awful
-    pub.set_source(source=source)
+    # TODO for external docs it *somehow* needs to supply the filename and line
+    # range to it for better error reporting, this is too awful
+    pub.set_source(source=source, source_path=source_path)
     pub.publish()
 
     # External images to pull later
-    # TODO: some actual path handling
-    for image in pub.document.traverse(docutils.nodes.image):
-        state.external_data.add(image['uri'])
+    state.external_data = state.external_data.union(ExtractImages._external_data)
 
     return pub
 
 def render_rst(state: State, source):
-    return publish_rst(state, source).writer.parts.get('body').rstrip()
+    return publish_rst(state, source, source_path=None).writer.parts.get('body').rstrip()
 
 class _SaneInlineHtmlTranslator(m.htmlsanity.SaneHtmlTranslator):
     # Unconditionally force compact paragraphs. This means the inline HTML
@@ -982,7 +1012,7 @@ class _SaneInlineHtmlTranslator(m.htmlsanity.SaneHtmlTranslator):
         return True
 
 def render_inline_rst(state: State, source):
-    return publish_rst(state, source, _SaneInlineHtmlTranslator).writer.parts.get('body').rstrip()
+    return publish_rst(state, source, translator_class=_SaneInlineHtmlTranslator).writer.parts.get('body').rstrip()
 
 def render_doc(state: State, filename):
     logging.debug("parsing docs from %s", filename)
@@ -1001,7 +1031,7 @@ def render_page(state: State, path, filename, env):
     for hook in state.hooks_pre_page: hook()
 
     # Render the file
-    with open(filename, 'r') as f: pub = publish_rst(state, f.read())
+    with open(filename, 'r') as f: pub = publish_rst(state, f.read(), source_path=filename)
 
     # Extract metadata from the page
     metadata = {}
index 0992172c3de2a18b71ee3d41c1b1c66e3ff459ca..8368a6efaee5497b0b02500fefed62553fa1dca2 100644 (file)
@@ -47,6 +47,10 @@ Yup!</aside>
 <span class="go">14.0</span></pre>
 <p>Images!</p>
 <img class="m-image" src="tiny.png" style="width: 60px" />
+<figure class="m-figure">
+<img src="tiny.png" style="width: 200px" />
+<figcaption>Image in a subdir</figcaption>
+</figure>
 <p class="m-transition">~~~ Custom plugins! ~~~</p>
 <p>And now something totally different:</p>
 <style>
index c71270fe65fc247235d6c1c4135c004f81a543b5..c9a6d7f14a099c661d64ae86a2df454bc439f34e 100644 (file)
@@ -35,6 +35,11 @@ Images!
 .. image:: tiny.png
     :scale: 2000%
 
+.. figure:: subdir/tiny.png
+    :width: 200px
+
+    Image in a subdir
+
 .. fancy-line:: Custom plugins!
 
 And now something totally different:
diff --git a/documentation/test_python/page_plugins/subdir/tiny.png b/documentation/test_python/page_plugins/subdir/tiny.png
new file mode 120000 (symlink)
index 0000000..364efa7
--- /dev/null
@@ -0,0 +1 @@
+../../../../plugins/m/test/images/tiny.png
\ No newline at end of file