chiark / gitweb /
documentation/python, m.sphinx: hook for parsing docstrings.
authorVladimír Vondruš <mosra@centrum.cz>
Wed, 28 Aug 2019 14:01:24 +0000 (16:01 +0200)
committerVladimír Vondruš <mosra@centrum.cz>
Fri, 30 Aug 2019 14:47:58 +0000 (16:47 +0200)
Centuries of work and research later, this gem is done. Yay!

doc/documentation/python.rst
doc/plugins/sphinx.rst
documentation/python.py
documentation/test_python/content_parse_docstrings/content_parse_docstrings.Class.html [new file with mode: 0644]
documentation/test_python/content_parse_docstrings/content_parse_docstrings.html [new file with mode: 0644]
documentation/test_python/content_parse_docstrings/content_parse_docstrings.py [new file with mode: 0644]
documentation/test_python/page_plugins/plugins/fancyline.py
documentation/test_python/test_content.py
documentation/test_python/test_page.py
plugins/m/sphinx.py

index f135fad959e7d5cd72e215ec637fa19bb1ab5a40..86a4bd4d4407e162f4d05d07838d59bcfdded2a8 100644 (file)
@@ -769,6 +769,7 @@ Keyword argument            Content
 :py:`property_doc_contents` Property documentation contents
 :py:`data_doc_contents`     Data documentation contents
 :py:`hooks_post_crawl`      Hooks to call after the initial name crawl
+:py:`hooks_docstring`       Hooks to call when parsing a docstring
 :py:`hooks_pre_page`        Hooks to call before each page gets rendered
 :py:`hooks_post_run`        Hooks to call at the very end of the script run
 =========================== ===================================================
@@ -790,9 +791,10 @@ important to avoid fully overwriting it:
     docs['summary'] = "A pretty class"
     docs['details'] = "This class is *pretty*."
 
-The :py:`hooks_post_crawl`, :py:`hooks_pre_page` and :py:`hooks_post_run`
-variables are lists of functions. Plugins that need to do something at specific
-points of the execution are supposed to add functions to the list.
+The :py:`hooks_post_crawl`, :py:`hooks_docstring`, :py:`hooks_pre_page` and
+:py:`hooks_post_run` variables are lists of functions. Plugins that need to do
+something at specific points of the execution are supposed to add functions to
+the list.
 
 The :py:`hooks_post_crawl` is called once gathering of all names is done. It
 gets passed the following arguments:
@@ -838,6 +840,36 @@ Keyword argument    Content
     added by the plugin *need* to have :py:`object` set to :py:`None` so the
     script as well as other plugins can correctly distinguish them.
 
+Hooks listed in :py:`hooks_docstring` are called when docstrings are parsed.
+The first gets the raw docstring only processed by :py:`inspect.cleandoc()` and
+each following gets the output of the previous. When a hook returns an empty
+string, hooks later in the list are not called. String returned by the last
+hook is processed, if any, the same way as if no hooks would be present --- it
+gets partitioned into summary and content and those put to the output as-is,
+each paragraph wrapped in :html:`<p>` tags. The hooks are free to do anything
+with the docstring --- extracting metadata from it and returning it as-is,
+transpiling it from one markup language to another, or fully consuming it,
+populating the ``*_doc_contents`` variables mentioned above and returning
+nothing back. Each hook gets passed the following arguments:
+
+.. class:: m-table
+
+=================== ===========================================================
+Keyword argument    Content
+=================== ===========================================================
+:py:`type`          Name type. Same as the enum passed to
+                    `custom URL formatters`_.
+:py:`path`          Path of the module / class / function / enum / enum value /
+                    data containing the docstring. A list of names,
+                    :py:`'.'.join(path)` is equivalent to the fully qualified
+                    name.
+:py:`signature`     Signature of a function, for distinguishing between
+                    particular overloads. In a form of
+                    ``(param1: type1, param2: type2)``.
+:py:`doc`           Docstring content. Always non-empty --- once a hook returns
+                    nothing back, no further hooks are called.
+=================== ===========================================================
+
 The :py:`hooks_pre_page` is called before each page of output gets rendered.
 Can be used for example for resetting some internal counter for page-wide
 unique element IDs. It gets passed the following arguments:
index 63c9384b18379faaf6ccf8d7691b543f58937803..5a81819e40dac43449071b87f19f7c0995025668 100644 (file)
@@ -77,6 +77,7 @@ symbols to a file to be linked from elsewhere, see
     PLUGINS += ['m.sphinx']
     M_SPHINX_INVENTORIES = [...]
     M_SPHINX_INVENTORY_OUTPUT = 'objects.inv'
+    M_SPHIMX_PARSE_DOCSTRINGS = False
 
 `Links to external Sphinx documentation`_
 =========================================
@@ -281,11 +282,13 @@ the markup. Example:
     .. py:data:: mymodule.ALMOST_PI
         :summary: :math:`\pi`, but *less precise*.
 
-Compared to docstrings, the :py:`:summary:` is interpreted as
+By default, unlike docstrings, the :py:`:summary:` is interpreted as
 :abbr:`reST <reStructuredText>`, which means you can keep the docstring
 formatting simpler (for display inside IDEs or via the builtin :py:`help()`),
 while supplying an alternative and more complex-formatted summary for the
-actual rendered docs.
+actual rendered docs. It's however possible to enable
+:abbr:`reST <reStructuredText>` parsing for docstrings as well --- see
+`Using parsed docstrings`_ below.
 
 .. note-warning::
 
@@ -332,3 +335,32 @@ Example:
         :summary: Dot product
 
         .. this documentation will be used for all other overloads
+
+`Using parsed docstrings`_
+--------------------------
+
+By default, docstrings are `treated by the Python doc generator as plain text <{filename}/documentation/python.rst#docstrings>`_
+and only externally-supplied docs are parsed. This is done because, for example
+in Python standard library, embedded docstrings are often very terse without
+any markup and full docs are external. If you want the docstrings to be parsed,
+enable the :py:`M_SPHIMX_PARSE_DOCSTRINGS` option. Compared to the directives
+above, there's only one difference --- instead of a :rst:`:summary:` option,
+the first paragraph is taken as a summary, the second paragraph as the option
+list (if it contains option fields) and the rest as documentation content.
+Continuing with the :rst:`.. py:function::` example above, embedded in a
+docstring it would look like this instead:
+
+.. code:: py
+
+    def add(self, key, value, *, overwrite_existing=False):
+        """Add a key/value pair to the container
+
+        :param key:                 Key to add
+        :param value:               Corresponding value
+        :param overwrite_existing:  Overwrite existing value if already present
+            in the container
+        :return:                    The inserted tuple or the existing
+            key/value pair in case ``overwrite_existing`` is not set
+
+        The operation has a :math:`\mathcal{O}(\log{}n)` complexity.
+        """
index ef40ad5eb899bda293109e491ed4ce23ab3f7fd3..0244abb38840adcdcce84f5f4b3c024eedef8bf7 100755 (executable)
@@ -184,6 +184,7 @@ class State:
         self.external_data: Set[str] = set()
 
         self.hooks_post_crawl: List = []
+        self.hooks_docstring: List = []
         self.hooks_pre_page: List = []
         self.hooks_post_run: List = []
 
@@ -790,7 +791,7 @@ def format_value(state: State, referrer_path: List[str], value: str) -> Optional
     else:
         return None
 
-def extract_docs(state: State, external_docs, path: List[str], doc: str, *, signature=None, summary_only=False) -> Tuple[str, str]:
+def extract_docs(state: State, external_docs, type: EntryType, path: List[str], doc: str, *, signature=None, summary_only=False) -> Tuple[str, str]:
     path_str = '.'.join(path)
     # If function signature is supplied, try that first
     if signature and path_str + signature in external_docs:
@@ -813,22 +814,72 @@ def extract_docs(state: State, external_docs, path: List[str], doc: str, *, sign
     # later.
     if external_doc_entry.get('summary') is None or external_doc_entry.get('content') is None:
         # some modules (xml.etree) have None as a docstring :(
-        summary, _, content = inspect.cleandoc(doc or '').partition('\n\n')
-
-        # Turn both into a raw HTML block so it doesn't get further processed
-        # by reST. For the content, wrap each paragraph in <p> so it looks
-        # acceptable in the output.
-        if summary:
-            summary = html.escape(summary)
-            summary = ".. raw:: html\n\n    " + summary.replace('\n', '\n    ')
-        if content:
-            content = '\n'.join(['<p>{}</p>'.format(p) for p in html.escape(content).split('\n\n')])
-            content = ".. raw:: html\n\n    " + content.replace('\n', '\n    ')
-
-        if external_doc_entry.get('summary') is None:
-            external_doc_entry['summary'] = summary
-        if external_doc_entry.get('content') is None:
-            external_doc_entry['content'] = content
+        doc = inspect.cleandoc(doc or '').strip()
+
+        if doc:
+            # Do the same as in render_doc() to support directives with
+            # multi-word field names and duplicate fields, restore the original
+            # implementations again after.
+            prev_extract_options = docutils.utils.extract_options
+            prev_assemble_option_dict = docutils.utils.assemble_option_dict
+            docutils.utils.extract_options = _docutils_extract_options
+            docutils.utils.assemble_option_dict = _docutils_assemble_option_dict
+
+            # Go through all registered docstring hooks and let them process
+            # this one after another; stopping once there's nothing left. If
+            # nothing left, the populated entries should be non-None.
+            for hook in state.hooks_docstring:
+                doc = hook(
+                    type=type,
+                    path=path,
+                    signature=signature,
+                    doc=doc)
+
+                # The hook could have replaced the entry with a new dict
+                # instance, fetch it again to avoid looking at stale data below
+                external_doc_entry = external_docs[path_signature_str]
+
+                if not doc:
+                    # Assuming the doc were non-empty on input, if those are
+                    # empty on output, the hook should be filling both summary
+                    # and content to non-None values (so, in the worst case,
+                    # an empty string)
+                    assert external_doc_entry['summary'] is not None
+                    assert external_doc_entry['content'] is not None
+                    break
+
+            # If there's still something left after the hooks (or there are no
+            # hooks), process it as a plain unformatted text.
+            else:
+                summary, _, content = doc.partition('\n\n')
+
+                # Turn both into a raw HTML block so it doesn't get further
+                # processed by reST. For the content, wrap each paragraph in
+                # <p> so it looks acceptable in the output.
+                if summary:
+                    summary = html.escape(summary)
+                    summary = ".. raw:: html\n\n    " + summary.replace('\n', '\n    ')
+                if content:
+                    content = '\n'.join(['<p>{}</p>'.format(p) for p in html.escape(content).split('\n\n')])
+                    content = ".. raw:: html\n\n    " + content.replace('\n', '\n    ')
+
+                if external_doc_entry.get('summary') is None:
+                    external_doc_entry['summary'] = summary
+                if external_doc_entry.get('content') is None:
+                    external_doc_entry['content'] = content
+
+            # Restore original implementations again
+            docutils.utils.extract_options = prev_extract_options
+            docutils.utils.assemble_option_dict = prev_assemble_option_dict
+
+        # We ain't got nothing. If there isn't anything supplied externally,
+        # set summary / content to an empty string so this branch isn't entered
+        # again.
+        else:
+            if external_doc_entry.get('summary') is None:
+                external_doc_entry['summary'] = ''
+            if external_doc_entry.get('content') is None:
+                external_doc_entry['content'] = ''
 
     # Render. This can't be done just once and then cached because e.g. math
     # rendering needs to ensure each SVG formula has unique IDs on each page.
@@ -974,7 +1025,7 @@ def extract_module_doc(state: State, entry: Empty):
     out = Empty()
     out.url = entry.url
     out.name = entry.path[-1]
-    out.summary = extract_docs(state, state.class_docs, entry.path, entry.object.__doc__, summary_only=True)
+    out.summary = extract_docs(state, state.class_docs, entry.type, entry.path, entry.object.__doc__, summary_only=True)
     return out
 
 def extract_class_doc(state: State, entry: Empty):
@@ -983,7 +1034,7 @@ def extract_class_doc(state: State, entry: Empty):
     out = Empty()
     out.url = entry.url
     out.name = entry.path[-1]
-    out.summary = extract_docs(state, state.class_docs, entry.path, entry.object.__doc__, summary_only=True)
+    out.summary = extract_docs(state, state.class_docs, entry.type, entry.path, entry.object.__doc__, summary_only=True)
     return out
 
 def extract_enum_doc(state: State, entry: Empty):
@@ -1000,7 +1051,7 @@ def extract_enum_doc(state: State, entry: Empty):
             docstring = ''
         else:
             docstring = entry.object.__doc__
-        out.summary, out.content = extract_docs(state, state.enum_docs, entry.path, docstring)
+        out.summary, out.content = extract_docs(state, state.enum_docs, entry.type, entry.path, docstring)
         out.has_details = bool(out.content)
 
         out.base = extract_type(entry.object.__base__)
@@ -1020,7 +1071,7 @@ def extract_enum_doc(state: State, entry: Empty):
                 docstring = i.__doc__
 
             # TODO: external summary for enum values
-            value.summary = extract_docs(state, {}, [], docstring, summary_only=True)
+            value.summary = extract_docs(state, {}, EntryType.ENUM_VALUE, [], docstring, summary_only=True)
 
             if value.summary:
                 out.has_details = True
@@ -1035,7 +1086,7 @@ def extract_enum_doc(state: State, entry: Empty):
         # that yet and it adds clutter to the output (especially if the values
         # aren't documented), so cut that away
         # TODO: implement this
-        out.summary, out.content = extract_docs(state, state.enum_docs, entry.path, entry.object.__doc__.partition('\n\n')[0])
+        out.summary, out.content = extract_docs(state, state.enum_docs, entry.type, entry.path, entry.object.__doc__.partition('\n\n')[0])
         out.has_details = bool(out.content)
         out.base = None
 
@@ -1187,7 +1238,7 @@ def extract_function_doc(state: State, parent, entry: Empty) -> List[Any]:
 
             # Get summary and details. Passing the signature as well, so
             # different overloads can (but don't need to) have different docs.
-            out.summary, out.content = extract_docs(state, state.function_docs, entry.path, summary, signature='({})'.format(', '.join(signature)))
+            out.summary, out.content = extract_docs(state, state.function_docs, entry.type, entry.path, summary, signature='({})'.format(', '.join(signature)))
             if out.content: out.has_details = True
 
             overloads += [out]
@@ -1199,7 +1250,7 @@ def extract_function_doc(state: State, parent, entry: Empty) -> List[Any]:
         out.id = state.config['ID_FORMATTER'](EntryType.FUNCTION, entry.path[-1:])
         out.params = []
         out.has_complex_params = False
-        out.summary, out.content = extract_docs(state, state.function_docs, entry.path, entry.object.__doc__)
+        out.summary, out.content = extract_docs(state, state.function_docs, entry.type, entry.path, entry.object.__doc__)
         out.has_details = bool(out.content)
 
         # Decide if classmethod or staticmethod in case this is a method
@@ -1323,7 +1374,7 @@ def extract_property_doc(state: State, parent, entry: Empty):
         out.is_settable = True
         out.is_deletable = True
         # Unfortunately we can't get any docstring for these
-        out.summary, out.content = extract_docs(state, state.property_docs, entry.path, '')
+        out.summary, out.content = extract_docs(state, state.property_docs, entry.type, entry.path, '')
         out.has_details = bool(out.content)
 
         # First try to get fully dereferenced type hints (with strings
@@ -1351,7 +1402,7 @@ def extract_property_doc(state: State, parent, entry: Empty):
         out.is_settable = False
         out.is_deletable = False
         # Unfortunately we can't get any docstring for these
-        out.summary, out.content = extract_docs(state, state.property_docs, entry.path, '')
+        out.summary, out.content = extract_docs(state, state.property_docs, entry.type, entry.path, '')
         out.has_details = bool(out.content)
         out.type = None
         return out
@@ -1362,7 +1413,7 @@ def extract_property_doc(state: State, parent, entry: Empty):
     else:
         assert entry.object.fset
         docstring = entry.object.fset.__doc__
-    out.summary, out.content = extract_docs(state, state.property_docs, entry.path, docstring)
+    out.summary, out.content = extract_docs(state, state.property_docs, entry.type, entry.path, docstring)
     out.is_settable = entry.object.fset is not None
     out.is_deletable = entry.object.fdel is not None
     out.has_details = bool(out.content)
@@ -1436,7 +1487,7 @@ def extract_data_doc(state: State, parent, entry: Empty):
     out.name = entry.path[-1]
     out.id = state.config['ID_FORMATTER'](EntryType.DATA, entry.path[-1:])
     # Welp. https://stackoverflow.com/questions/8820276/docstring-for-variable
-    out.summary, out.content = extract_docs(state, state.data_docs, entry.path, '')
+    out.summary, out.content = extract_docs(state, state.data_docs, entry.type, entry.path, '')
     out.has_details = bool(out.content)
 
     # First try to get fully dereferenced type hints (with strings converted to
@@ -1493,7 +1544,7 @@ def render_module(state: State, path, module, env):
         hook(path=path)
 
     page = Empty()
-    page.summary, page.content = extract_docs(state, state.module_docs, path, module.__doc__)
+    page.summary, page.content = extract_docs(state, state.module_docs, EntryType.MODULE, path, module.__doc__)
     page.filename = filename
     page.url = url
     page.breadcrumb = breadcrumb
@@ -1569,7 +1620,7 @@ def render_class(state: State, path, class_, env):
         hook(path=path)
 
     page = Empty()
-    page.summary, page.content = extract_docs(state, state.class_docs, path, class_.__doc__)
+    page.summary, page.content = extract_docs(state, state.class_docs, EntryType.CLASS, path, class_.__doc__)
     page.filename = filename
     page.url = url
     page.breadcrumb = breadcrumb
@@ -1976,6 +2027,7 @@ def run(basedir, config, *, templates=default_templates, search_add_lookahead_ba
             property_doc_contents=state.property_docs,
             data_doc_contents=state.data_docs,
             hooks_post_crawl=state.hooks_post_crawl,
+            hooks_docstring=state.hooks_docstring,
             hooks_pre_page=state.hooks_pre_page,
             hooks_post_run=state.hooks_post_run)
 
diff --git a/documentation/test_python/content_parse_docstrings/content_parse_docstrings.Class.html b/documentation/test_python/content_parse_docstrings/content_parse_docstrings.Class.html
new file mode 100644 (file)
index 0000000..005f94c
--- /dev/null
@@ -0,0 +1,63 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>content_parse_docstrings.Class | My Python Project</title>
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600" />
+  <link rel="stylesheet" href="m-dark+documentation.compiled.css" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+</head>
+<body>
+<header><nav id="navigation">
+  <div class="m-container">
+    <div class="m-row">
+      <a href="index.html" id="m-navbar-brand" class="m-col-t-8 m-col-m-none m-left-m">My Python Project</a>
+    </div>
+  </div>
+</nav></header>
+<main><article>
+  <div class="m-container m-container-inflatable">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <h1>
+          <span class="m-breadcrumb"><a href="content_parse_docstrings.html">content_parse_docstrings</a>.<wbr/></span>Class <span class="m-thin">class</span>
+        </h1>
+        <p>This class has a <em>serious</em> docstring.
+With a multi-line summary.</p>
+        <div class="m-block m-default">
+          <h3>Contents</h3>
+          <ul>
+            <li>
+              Reference
+              <ul>
+                <li><a href="#properties">Properties</a></li>
+              </ul>
+            </li>
+          </ul>
+        </div>
+<p>And class <strong>details</strong> as well.</p>
+        <section id="properties">
+          <h2><a href="#properties">Properties</a></h2>
+          <dl class="m-doc">
+            <dt>
+              <a href="#a_property" class="m-doc">a_property</a>: float <span class="m-label m-flat m-warning">get</span>
+            </dt>
+            <dd>This property has a <em>serious</em> docstring.</dd>
+          </dl>
+        </section>
+        <section>
+          <h2>Property documentation</h2>
+          <section class="m-doc-details" id="a_property"><div>
+            <h3>
+              content_parse_docstrings.<wbr />Class.<wbr /><a href="#a_property" class="m-doc-self">a_property</a>: float <span class="m-label m-flat m-warning">get</span>
+            </h3>
+            <p>This property has a <em>serious</em> docstring.</p>
+<p>And property <strong>details</strong> as well.</p>
+          </div></section>
+        </section>
+      </div>
+    </div>
+  </div>
+</article></main>
+</body>
+</html>
diff --git a/documentation/test_python/content_parse_docstrings/content_parse_docstrings.html b/documentation/test_python/content_parse_docstrings/content_parse_docstrings.html
new file mode 100644 (file)
index 0000000..edfdf55
--- /dev/null
@@ -0,0 +1,135 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>content_parse_docstrings | My Python Project</title>
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600" />
+  <link rel="stylesheet" href="m-dark+documentation.compiled.css" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+</head>
+<body>
+<header><nav id="navigation">
+  <div class="m-container">
+    <div class="m-row">
+      <a href="index.html" id="m-navbar-brand" class="m-col-t-8 m-col-m-none m-left-m">My Python Project</a>
+    </div>
+  </div>
+</nav></header>
+<main><article>
+  <div class="m-container m-container-inflatable">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <h1>
+          content_parse_docstrings <span class="m-thin">module</span>
+        </h1>
+        <p>This module has a <em>serious</em> docstring.</p>
+        <div class="m-block m-default">
+          <h3>Contents</h3>
+          <ul>
+            <li>
+              Reference
+              <ul>
+                <li><a href="#classes">Classes</a></li>
+                <li><a href="#enums">Enums</a></li>
+                <li><a href="#functions">Functions</a></li>
+              </ul>
+            </li>
+          </ul>
+        </div>
+<p>And module <strong>details</strong> as well.</p>
+        <section id="classes">
+          <h2><a href="#classes">Classes</a></h2>
+          <dl class="m-doc">
+            <dt>class <a href="content_parse_docstrings.Class.html" class="m-doc">Class</a></dt>
+            <dd>This class has a <em>serious</em> docstring.
+With a multi-line summary.</dd>
+          </dl>
+        </section>
+        <section id="enums">
+          <h2><a href="#enums">Enums</a></h2>
+          <dl class="m-doc">
+            <dt>
+              <span class="m-doc-wrap-bumper">class <a href="#Enum" class="m-doc">Enum</a>(enum.Enum): </span><span class="m-doc-wrap"><a href="#Enum-VALUE" class="m-doc">VALUE</a> = 3</span>
+            </dt>
+            <dd>This enum has a <em>serious</em> docstring.</dd>
+          </dl>
+        </section>
+        <section id="functions">
+          <h2><a href="#functions">Functions</a></h2>
+          <dl class="m-doc">
+            <dt id="empty_docstring">
+              <span class="m-doc-wrap-bumper">def <a href="#empty_docstring" class="m-doc-self">empty_docstring</a>(</span><span class="m-doc-wrap">)</span>
+            </dt>
+            <dd></dd>
+            <dt>
+              <span class="m-doc-wrap-bumper">def <a href="#function" class="m-doc">function</a>(</span><span class="m-doc-wrap">a: str,
+              b: int) -&gt; float</span>
+            </dt>
+            <dd>This function has a <em>serious</em> docstring.</dd>
+            <dt id="summary_only">
+              <span class="m-doc-wrap-bumper">def <a href="#summary_only" class="m-doc-self">summary_only</a>(</span><span class="m-doc-wrap">)</span>
+            </dt>
+            <dd>This is just a summary.</dd>
+          </dl>
+        </section>
+        <section>
+          <h2>Enum documentation</h2>
+          <section class="m-doc-details" id="Enum"><div>
+            <h3>
+              class content_parse_docstrings.<wbr /><a href="#Enum" class="m-doc-self">Enum</a>(enum.Enum)
+            </h3>
+            <p>This enum has a <em>serious</em> docstring.</p>
+            <table class="m-table m-fullwidth m-flat m-doc">
+              <thead><tr><th style="width: 1%">Enumerators</th><th></th></tr></thead>
+              <tbody>
+                <tr>
+                  <td><a href="#Enum-VALUE" id="Enum-VALUE" class="m-doc-self">VALUE</a></td>
+                  <td>
+                  <p>Tho enum value docs are unfortunately *not* processed.</p>
+                  </td>
+                </tr>
+              </tbody>
+            </table>
+<p>And property <strong>details</strong> as well.</p>
+          </div></section>
+        </section>
+        <section>
+          <h2>Function documentation</h2>
+          <section class="m-doc-details" id="function"><div>
+            <h3>
+              <span class="m-doc-wrap-bumper">def content_parse_docstrings.<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#function" class="m-doc-self">function</a>(</span><span class="m-doc-wrap">a: str,
+              b: int) -&gt; float</span></span>
+            </h3>
+            <p>This function has a <em>serious</em> docstring.</p>
+            <table class="m-table m-fullwidth m-flat">
+              <thead>
+                <tr><th colspan="2">Parameters</th></tr>
+              </thead>
+              <tbody>
+                <tr>
+                  <td style="width: 1%">a</td>
+                  <td>And parameter docs.
+On multiple lines.</td>
+                </tr>
+                <tr>
+                  <td>b</td>
+                  <td><em>Wow.</em></td>
+                </tr>
+              </tbody>
+              <tfoot>
+                <tr>
+                  <th>Returns</th>
+                  <td>This too.</td>
+                </tr>
+              </tfoot>
+            </table>
+<p>And details.
+<strong>Amazing</strong>.</p>
+          </div></section>
+        </section>
+      </div>
+    </div>
+  </div>
+</article></main>
+</body>
+</html>
diff --git a/documentation/test_python/content_parse_docstrings/content_parse_docstrings.py b/documentation/test_python/content_parse_docstrings/content_parse_docstrings.py
new file mode 100644 (file)
index 0000000..24600dc
--- /dev/null
@@ -0,0 +1,42 @@
+"""This module has a *serious* docstring.
+
+And module **details** as well."""
+
+import enum
+
+class Class:
+    """This class has a *serious* docstring.
+    With a multi-line summary.
+
+    And class **details** as well."""
+
+    @property
+    def a_property(self) -> float:
+        """This property has a *serious* docstring.
+
+        And property **details** as well."""
+
+class Enum(enum.Enum):
+    """This enum has a *serious* docstring.
+
+    And property **details** as well."""
+
+    VALUE = 3
+
+Enum.VALUE.__doc__ = "Tho enum value docs are unfortunately *not* processed."
+
+def function(a: str, b: int) -> float:
+    """This function has a *serious* docstring.
+
+    :param a: And parameter docs.
+        On multiple lines.
+    :param b: *Wow.*
+    :return: This too.
+
+    And details.
+    **Amazing**."""
+
+def empty_docstring(): pass
+
+def summary_only():
+    """This is just a summary."""
index 507c316b22b788268ae6e732436f198add2c46d1..3e73a1c558da42b5b06ef2c4e1db6e57bfdf75e2 100644 (file)
@@ -40,6 +40,7 @@ class FancyLine(rst.Directive):
         return [node]
 
 post_crawl_call_count = 0
+docstring_call_count = 0
 pre_page_call_count = 0
 post_run_call_count = 0
 
@@ -47,6 +48,9 @@ def _post_crawl(**kwargs):
     global post_crawl_call_count
     post_crawl_call_count = post_crawl_call_count + 1
 
+def _docstring(**kwargs):
+    docstring_call_count += 1
+
 def _pre_page(**kwargs):
     global pre_page_call_count
     pre_page_call_count = pre_page_call_count + 1
@@ -55,8 +59,9 @@ def _post_run(**kwargs):
     global post_run_call_count
     post_run_call_count = post_run_call_count + 1
 
-def register_mcss(hooks_post_crawl, hooks_pre_page, hooks_post_run, **kwargs):
+def register_mcss(hooks_post_crawl, hooks_docstring, hooks_pre_page, hooks_post_run, **kwargs):
     hooks_post_crawl += [_post_crawl]
+    hooks_docstring += [_docstring]
     hooks_pre_page += [_pre_page]
     hooks_post_run += [_post_run]
 
index 5df26cf63177e2e018f7e41f34b7bd282f153d7b..e073ebfff9536cb70b7f264f373cad284c53ba45 100644 (file)
@@ -37,3 +37,12 @@ class Content(BaseInspectTestCase):
         self.assertEqual(*self.actual_expected_contents('content.docstring_summary.html'))
         self.assertEqual(*self.actual_expected_contents('content.Class.html'))
         self.assertEqual(*self.actual_expected_contents('content.ClassWithSummary.html'))
+
+class ParseDocstrings(BaseInspectTestCase):
+    def test(self):
+        self.run_python({
+            'PLUGINS': ['m.sphinx'],
+            'M_SPHINX_PARSE_DOCSTRINGS': True
+        })
+        self.assertEqual(*self.actual_expected_contents('content_parse_docstrings.html'))
+        self.assertEqual(*self.actual_expected_contents('content_parse_docstrings.Class.html'))
index aab3794fafbc2665cd6aefd34de6c1bcd99bcd5a..773c584173f1bc8f2f38a97fef633e0a0cb25225 100644 (file)
@@ -89,5 +89,9 @@ class Plugins(BaseTestCase):
 
         import fancyline
         self.assertEqual(fancyline.post_crawl_call_count, 1)
+
+        # No code, thus no docstrings processed
+        self.assertEqual(fancyline.docstring_call_count, 0)
+
         self.assertEqual(fancyline.pre_page_call_count, 4)
         self.assertEqual(fancyline.post_run_call_count, 1)
index 1d6eee56b9c4c8aedfcc6664b4552bce2ffad959..0942d38b5cbf8e16567ada39663d1a9bbc3baca0 100755 (executable)
@@ -28,17 +28,22 @@ import argparse
 import logging
 import os
 import re
+import sys
 from types import SimpleNamespace as Empty
-from typing import Dict
+from typing import Dict, List, Optional
 from urllib.parse import urljoin
 import zlib
 
+import docutils
 from docutils import nodes, utils
 from docutils.parsers import rst
 from docutils.parsers.rst import directives
 from docutils.parsers.rst.roles import set_classes
 from docutils.parsers.rst.states import Inliner
 
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
+import m.htmlsanity
+
 referer_path = []
 module_doc_output = None
 class_doc_output = None
@@ -299,6 +304,71 @@ def ref(name, rawtext, text, lineno, inliner: Inliner, options={}, content=[]):
         node = nodes.literal(rawtext, target, **_options)
     return [node], []
 
+def consume_docstring(type, path: List[str], signature: Optional[str], doc: str) -> str:
+    # Create the directive header based on type
+    if type.name == 'MODULE':
+        source = '.. py:module:: '
+        doc_output = module_doc_output
+    elif type.name == 'CLASS':
+        source = '.. py:class:: '
+        doc_output = class_doc_output
+    elif type.name == 'ENUM': # TODO: enum values?
+        source = '.. py:enum:: '
+        doc_output = enum_doc_output
+    elif type.name in ['FUNCTION', 'OVERLOADED_FUNCTION']:
+        source = '.. py:function:: '
+        doc_output = function_doc_output
+    elif type.name == 'PROPERTY':
+        source = '.. py:property:: '
+        doc_output = property_doc_output
+    else:
+        # Data don't have docstrings, you silly
+        assert type.name != 'DATA'
+        # Ignore unknown types, pass the docs through
+        return doc
+
+    # Add path and signature to the header
+    path_signature_str = '.'.join(path) + (signature if signature else '')
+    source += path_signature_str + '\n'
+
+    # Assuming first paragraph is summary, turn it into a :summary: directive
+    # option with successive lines indented
+    summary, _, doc = doc.partition('\n\n')
+    source += '    :summary: {}\n'.format(summary.replace('\n', '\n        '))
+
+    # The next paragraph could be option list. If that's so, indent those as
+    # well, append
+    if doc.startswith(':'):
+        options, _, doc = doc.partition('\n\n')
+        source += '    {}\n\n'.format(options.replace('\n', '\n    '))
+    else:
+        source += '\n'
+
+    # The rest (if any) is content. Indent as well.
+    source += '    {}\n'.format(doc.replace('\n', '\n    '))
+
+    # Unleash docutils on this piece. It will call into the proper directive
+    # and do the thing. Ignore the output as there shouldn't be anything left.
+    pub = docutils.core.Publisher(
+        writer=m.htmlsanity.SaneHtmlWriter(),
+        source_class=docutils.io.StringInput,
+        destination_class=docutils.io.StringOutput)
+    pub.set_components('standalone', 'restructuredtext', 'html')
+    pub.writer.translator_class = m.htmlsanity.SaneHtmlTranslator
+    pub.process_programmatic_settings(None, m.htmlsanity.docutils_settings, None)
+    # Docutils uses a deprecated U mode for opening files, so instead of
+    # monkey-patching docutils.io.FileInput to not do that (like Pelican does),
+    # I just read the thing myself.
+    # TODO it *somehow* needs to supply the original docstring filename and
+    # line range to it for better error reporting, this is too awful
+    pub.set_source(source=source)
+    pub.publish()
+
+    # Because there's no fallback to a docstring, mark everything as non-None
+    doc_output = doc_output[path_signature_str]
+    if doc_output.get('summary') is None: doc_output['summary'] = ''
+    if doc_output.get('content') is None: doc_output['content'] = ''
+
 def remember_referer_path(path):
     global referer_path
     referer_path = path
@@ -392,7 +462,7 @@ def merge_inventories(name_map, **kwargs):
                     f.write(compressor.compress('{} {} 2 {} {}\n'.format(path, type_, url, title).encode('utf-8')))
             f.write(compressor.flush())
 
-def register_mcss(mcss_settings, module_doc_contents, class_doc_contents, enum_doc_contents, function_doc_contents, property_doc_contents, data_doc_contents, hooks_post_crawl, hooks_pre_page, **kwargs):
+def register_mcss(mcss_settings, module_doc_contents, class_doc_contents, enum_doc_contents, function_doc_contents, property_doc_contents, data_doc_contents, hooks_post_crawl, hooks_docstring, hooks_pre_page, **kwargs):
     global module_doc_output, class_doc_output, enum_doc_output, function_doc_output, property_doc_output, data_doc_output, inventory_filename
     module_doc_output = module_doc_contents
     class_doc_output = class_doc_contents
@@ -414,6 +484,8 @@ def register_mcss(mcss_settings, module_doc_contents, class_doc_contents, enum_d
 
     rst.roles.register_local_role('ref', ref)
 
+    if mcss_settings.get('M_SPHINX_PARSE_DOCSTRINGS', False):
+        hooks_docstring += [consume_docstring]
     hooks_pre_page += [remember_referer_path]
     hooks_post_crawl += [merge_inventories]