chiark / gitweb /
documentation/python: properly handle recursively imported modules.
authorVladimír Vondruš <mosra@centrum.cz>
Thu, 11 Jul 2019 23:03:42 +0000 (01:03 +0200)
committerVladimír Vondruš <mosra@centrum.cz>
Sun, 14 Jul 2019 17:11:08 +0000 (19:11 +0200)
Basically converting the crawl to be breadth-first instead of
depth-first and maintaining a set of crawled modules to ensure they're
never visited again.

This could happen with classes as well, but it's rarer -- one would need
to add it to __all__ twice (or once and then it gets discovered
implicitly) etc. For now I'm simply asserting, will revisit in case it
proves to be an actual problem.

documentation/python.py
documentation/test_python/inspect_recursive/inspect_recursive.a.html [new file with mode: 0644]
documentation/test_python/inspect_recursive/inspect_recursive.first.html [new file with mode: 0644]
documentation/test_python/inspect_recursive/inspect_recursive.html [new file with mode: 0644]
documentation/test_python/inspect_recursive/inspect_recursive/__init__.py [new file with mode: 0644]
documentation/test_python/inspect_recursive/inspect_recursive/first.py [new file with mode: 0644]
documentation/test_python/inspect_recursive/inspect_recursive/second.py [new file with mode: 0644]
documentation/test_python/test_inspect.py

index 996e2691f1633144c00a5c764d984cee6ff032b6..2b79f402b1d218c96064e13186f6935d3e5b0e4e 100755 (executable)
@@ -161,6 +161,8 @@ class State:
 
         self.name_map: Dict[str, Empty] = {}
 
+        self.crawled: Set[object] = set()
+
 def map_name_prefix(state: State, type: str) -> str:
     for prefix, replace in state.module_mapping.items():
         if type == prefix or type.startswith(prefix + '.'):
@@ -240,6 +242,14 @@ _filtered_builtin_properties = set([
 ])
 
 def crawl_class(state: State, path: List[str], class_):
+    assert inspect.isclass(class_)
+
+    # TODO: if this fires, it means there's a class duplicated in more than one
+    # __all__ (or it gets picked up implicitly and then in __all__) -- how to
+    # handle gracefully?
+    assert id(class_) not in state.crawled
+    state.crawled.add(id(class_))
+
     class_entry = Empty()
     class_entry.type = EntryType.CLASS
     class_entry.object = class_
@@ -288,13 +298,29 @@ def crawl_class(state: State, path: List[str], class_):
     # Add itself to the name map
     state.name_map['.'.join(path)] = class_entry
 
-def crawl_module(state: State, path: List[str], module):
+def crawl_module(state: State, path: List[str], module) -> List[Tuple[List[str], object]]:
+    assert inspect.ismodule(module)
+
+    # Assume this module is not crawled yet -- the parent crawl shouldn't even
+    # put it to members if it's crawled already. Otherwise add itself to
+    # the list of crawled objects to avoid going through it again.
+    assert id(module) not in state.crawled
+    state.crawled.add(id(module))
+
+    # This module isn't a duplicate, thus we can now safely add itself to
+    # parent's members (if there's a parent)
+    if len(path) > 1: state.name_map['.'.join(path[:-1])].members += [path[-1]]
+
     module_entry = Empty()
     module_entry.type = EntryType.MODULE
     module_entry.object = module
     module_entry.path = path
     module_entry.members = []
 
+    # This gets returned to ensure the modules get processed in a breadth-first
+    # order
+    submodules_to_crawl: List[Tuple[List[str], object]] = []
+
     # This is actually complicated -- if the module defines __all__, use that.
     # The __all__ is meant to expose the public API, so we don't filter out
     # underscored things.
@@ -340,7 +366,14 @@ def crawl_module(state: State, path: List[str], module):
                 logging.warning("unknown symbol %s in %s", name, '.'.join(path))
                 continue
             elif type == EntryType.MODULE:
-                crawl_module(state, subpath, object)
+                # TODO: this might fire if a module is in __all__ after it was
+                # picked up implicitly before -- how to handle gracefully?
+                assert id(object) not in state.crawled
+                submodules_to_crawl += [(subpath, object)]
+                # Not adding to module_entry.members, done by the nested
+                # crawl_module() itself if it is sure that it isn't a
+                # duplicated module
+                continue
             elif type == EntryType.CLASS:
                 crawl_class(state, subpath, object)
             else:
@@ -405,7 +438,11 @@ def crawl_module(state: State, path: List[str], module):
                 # Ignore unknown object types (with __all__ we warn instead)
                 continue
             elif type == EntryType.MODULE:
-                crawl_module(state, subpath, object)
+                submodules_to_crawl += [(subpath, object)]
+                # Not adding to module_entry.members, done by the nested
+                # crawl_module() itself if it is sure that it isn't a
+                # duplicated module
+                continue
             elif type == EntryType.CLASS:
                 crawl_class(state, subpath, object)
             else:
@@ -421,6 +458,8 @@ def crawl_module(state: State, path: List[str], module):
     # Add itself to the name map
     state.name_map['.'.join(path)] = module_entry
 
+    return submodules_to_crawl
+
 _pybind_name_rx = re.compile('[a-zA-Z0-9_]*')
 _pybind_arg_name_rx = re.compile('[*a-zA-Z0-9_]+')
 _pybind_type_rx = re.compile('[a-zA-Z0-9_.]+')
@@ -1187,17 +1226,22 @@ def run(basedir, config, templates):
     for hook in state.hooks_pre_page: hook()
 
     # Crawl all input modules to gather the name tree, put their names into a
-    # list for the index
+    # list for the index. The crawl is done breadth-first, so the function
+    # returns a list of submodules to be crawled next.
     class_index = []
+    modules_to_crawl = []
     for module in config['INPUT_MODULES']:
         if isinstance(module, str):
             module_name = module
             module = importlib.import_module(module)
         else:
             module_name = module.__name__
-
-        crawl_module(state, [module_name], module)
+        modules_to_crawl += [([module_name], module)]
         class_index += [module_name]
+    while modules_to_crawl:
+        path, object = modules_to_crawl.pop(0)
+        if id(object) in state.crawled: continue
+        modules_to_crawl += crawl_module(state, path, object)
 
     # Add special pages to the name map. The pages are done after so they can
     # override these.
diff --git a/documentation/test_python/inspect_recursive/inspect_recursive.a.html b/documentation/test_python/inspect_recursive/inspect_recursive.a.html
new file mode 100644 (file)
index 0000000..69333f4
--- /dev/null
@@ -0,0 +1,31 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>inspect_recursive.a | My Python Project</title>
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600" />
+  <link rel="stylesheet" href="m-dark+documentation.compiled.css" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+</head>
+<body>
+<header><nav id="navigation">
+  <div class="m-container">
+    <div class="m-row">
+      <a href="index.html" id="m-navbar-brand" class="m-col-t-8 m-col-m-none m-left-m">My Python Project</a>
+    </div>
+  </div>
+</nav></header>
+<main><article>
+  <div class="m-container m-container-inflatable">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <h1>
+          <span class="m-breadcrumb"><a href="inspect_recursive.html">inspect_recursive</a>.<wbr/></span>a <span class="m-thin">module</span>
+        </h1>
+        <p>Second module, imported as inspect_recursive.a, with no contents</p>
+      </div>
+    </div>
+  </div>
+</article></main>
+</body>
+</html>
diff --git a/documentation/test_python/inspect_recursive/inspect_recursive.first.html b/documentation/test_python/inspect_recursive/inspect_recursive.first.html
new file mode 100644 (file)
index 0000000..c53e926
--- /dev/null
@@ -0,0 +1,31 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>inspect_recursive.first | My Python Project</title>
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600" />
+  <link rel="stylesheet" href="m-dark+documentation.compiled.css" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+</head>
+<body>
+<header><nav id="navigation">
+  <div class="m-container">
+    <div class="m-row">
+      <a href="index.html" id="m-navbar-brand" class="m-col-t-8 m-col-m-none m-left-m">My Python Project</a>
+    </div>
+  </div>
+</nav></header>
+<main><article>
+  <div class="m-container m-container-inflatable">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <h1>
+          <span class="m-breadcrumb"><a href="inspect_recursive.html">inspect_recursive</a>.<wbr/></span>first <span class="m-thin">module</span>
+        </h1>
+        <p>First module, imported as inspect_recursive.first, with no contents</p>
+      </div>
+    </div>
+  </div>
+</article></main>
+</body>
+</html>
diff --git a/documentation/test_python/inspect_recursive/inspect_recursive.html b/documentation/test_python/inspect_recursive/inspect_recursive.html
new file mode 100644 (file)
index 0000000..a8668b6
--- /dev/null
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>inspect_recursive | My Python Project</title>
+  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600" />
+  <link rel="stylesheet" href="m-dark+documentation.compiled.css" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+</head>
+<body>
+<header><nav id="navigation">
+  <div class="m-container">
+    <div class="m-row">
+      <a href="index.html" id="m-navbar-brand" class="m-col-t-8 m-col-m-none m-left-m">My Python Project</a>
+    </div>
+  </div>
+</nav></header>
+<main><article>
+  <div class="m-container m-container-inflatable">
+    <div class="m-row">
+      <div class="m-col-l-10 m-push-l-1">
+        <h1>
+          inspect_recursive <span class="m-thin">module</span>
+        </h1>
+        <p>Recursive imports</p>
+        <div class="m-block m-default">
+          <h3>Contents</h3>
+          <ul>
+            <li>
+              Reference
+              <ul>
+                <li><a href="#packages">Modules</a></li>
+                <li><a href="#classes">Classes</a></li>
+                <li><a href="#functions">Functions</a></li>
+              </ul>
+            </li>
+          </ul>
+        </div>
+        <section id="namespaces">
+          <h2><a href="#namespaces">Modules</a></h2>
+          <dl class="m-doc">
+            <dt>module <a href="inspect_recursive.a.html" class="m-doc">a</a></dt>
+            <dd>Second module, imported as inspect_recursive.a, with no contents</dd>
+            <dt>module <a href="inspect_recursive.first.html" class="m-doc">first</a></dt>
+            <dd>First module, imported as inspect_recursive.first, with no contents</dd>
+          </dl>
+        </section>
+        <section id="classes">
+          <h2><a href="#classes">Classes</a></h2>
+          <dl class="m-doc">
+            <dt>class <a href="inspect_recursive.Foo.html" class="m-doc">Foo</a></dt>
+            <dd>A class</dd>
+          </dl>
+        </section>
+        <section id="functions">
+          <h2><a href="#functions">Functions</a></h2>
+          <dl class="m-doc">
+            <dt>
+              <span class="m-doc-wrap-bumper">def <a href="#bar" class="m-doc-self" id="bar">bar</a>(</span><span class="m-doc-wrap">) -&gt; <a href="inspect_recursive.Foo.html" class="m-doc">Foo</a></span>
+            </dt>
+            <dd>Function that also returns Foo</dd>
+            <dt>
+              <span class="m-doc-wrap-bumper">def <a href="#foo" class="m-doc-self" id="foo">foo</a>(</span><span class="m-doc-wrap">) -&gt; <a href="inspect_recursive.Foo.html" class="m-doc">Foo</a></span>
+            </dt>
+            <dd>Function that returns Foo</dd>
+          </dl>
+        </section>
+      </div>
+    </div>
+  </div>
+</article></main>
+</body>
+</html>
diff --git a/documentation/test_python/inspect_recursive/inspect_recursive/__init__.py b/documentation/test_python/inspect_recursive/inspect_recursive/__init__.py
new file mode 100644 (file)
index 0000000..a512bbb
--- /dev/null
@@ -0,0 +1,18 @@
+"""Recursive imports"""
+
+import inspect_recursive # Importing self, should get ignored
+
+class Foo:
+    """A class"""
+
+from . import first
+
+# Importing a module twice, only one of them should be picked
+from . import second as b
+from . import second as a
+
+def foo() -> b.Bar:
+    """Function that returns Foo"""
+
+def bar() -> a.Bar:
+    """Function that also returns Foo"""
diff --git a/documentation/test_python/inspect_recursive/inspect_recursive/first.py b/documentation/test_python/inspect_recursive/inspect_recursive/first.py
new file mode 100644 (file)
index 0000000..15ad459
--- /dev/null
@@ -0,0 +1,5 @@
+"""First module, imported as inspect_recursive.first, with no contents"""
+
+import inspect_recursive
+
+from inspect_recursive import Foo as Bar
diff --git a/documentation/test_python/inspect_recursive/inspect_recursive/second.py b/documentation/test_python/inspect_recursive/inspect_recursive/second.py
new file mode 100644 (file)
index 0000000..2b96dc1
--- /dev/null
@@ -0,0 +1,6 @@
+"""Second module, imported as inspect_recursive.a, with no contents"""
+
+import inspect_recursive.first as a
+import inspect_recursive.second as b
+
+from inspect_recursive import Foo as Bar
index 6920b9edbbe8c01b6138e4c63ae97b401c2a4a2d..e7ee4612015bb93e3a043be5e31165a64d0f7d69 100644 (file)
@@ -136,3 +136,13 @@ class NameMapping(BaseInspectTestCase):
         self.assertEqual(*self.actual_expected_contents('inspect_name_mapping.html'))
         self.assertEqual(*self.actual_expected_contents('inspect_name_mapping.Class.html'))
         self.assertEqual(*self.actual_expected_contents('inspect_name_mapping.submodule.html'))
+
+class Recursive(BaseInspectTestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(__file__, 'recursive', *args, **kwargs)
+
+    def test(self):
+        self.run_python()
+        self.assertEqual(*self.actual_expected_contents('inspect_recursive.html'))
+        self.assertEqual(*self.actual_expected_contents('inspect_recursive.first.html'))
+        self.assertEqual(*self.actual_expected_contents('inspect_recursive.a.html'))