From d8215902602756ac88a1c65254eb405fabe4c343 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Sat, 20 Apr 2019 23:41:31 +0200 Subject: [PATCH] documentation/python: parse pybind11 function signatures. --- doc/documentation/python.rst | 14 + documentation/python.py | 299 +++++++++++++++--- documentation/test_python/CMakeLists.txt | 2 +- .../pybind_signatures.MyClass.html | 84 +++++ .../pybind_signatures/pybind_signatures.cpp | 49 +++ .../pybind_signatures/pybind_signatures.html | 85 +++++ documentation/test_python/test_pybind.py | 134 ++++++++ 7 files changed, 620 insertions(+), 47 deletions(-) create mode 100644 documentation/test_python/pybind_signatures/pybind_signatures.MyClass.html create mode 100644 documentation/test_python/pybind_signatures/pybind_signatures.cpp create mode 100644 documentation/test_python/pybind_signatures/pybind_signatures.html diff --git a/doc/documentation/python.rst b/doc/documentation/python.rst index 67b154e4..a1349778 100644 --- a/doc/documentation/python.rst +++ b/doc/documentation/python.rst @@ -528,6 +528,20 @@ has to do a few pybind11-specific workarounds to generate expected output. This behavior is not enabled by default as it *might* have unwanted consequences in pure Python code, enable it using the :py:`PYBIND11_COMPATIBILITY` option. +`Function signatures`_ +---------------------- + +For reasons explained in :gh:`pybind/pybind11#990`, pybind11 is not able to +provide function signatures through introspection and thus the script falls +back to parsing argument names, type annotations and default values from the +docstring instead. By default, unless :cpp:`py::arg()` is used, function +arguments are positional-only (shown as :py:`arg0`, :py:`arg1`, ...) and marked +as such in the output. + +The signature parsing can't handle all cases and, especially when templated C++ +type names leak through, it may fail to extract the argument names. If that +happens, the function signature shows just an ellipsis (``…``). + `Enums`_ -------- diff --git a/documentation/python.py b/documentation/python.py index 1e62a227..03b20f35 100755 --- a/documentation/python.py +++ b/documentation/python.py @@ -34,6 +34,7 @@ import inspect import logging import mimetypes import os +import re import sys import shutil @@ -159,6 +160,128 @@ def is_enum(state: State, object) -> bool: def make_url(path: List[str]) -> str: return '.'.join(path) + '.html' +_pybind_name_rx = re.compile('[a-zA-Z0-9_]+') +_pybind_arg_name_rx = re.compile('[*a-zA-Z0-9_]+') +_pybind_type_rx = re.compile('[a-zA-Z0-9_.]+') +_pybind_default_value_rx = re.compile('[^,)]+') + +def parse_pybind_type(signature: str) -> str: + type = _pybind_type_rx.match(signature).group(0) + signature = signature[len(type):] + if signature and signature[0] == '[': + type += '[' + signature = signature[1:] + while signature[0] != ']': + inner_type = parse_pybind_type(signature) + type += inner_type + signature = signature[len(inner_type):] + + if signature[0] == ']': break + assert signature.startswith(', ') + signature = signature[2:] + type += ', ' + + assert signature[0] == ']' + type += ']' + + return type + +def parse_pybind_signature(signature: str) -> Tuple[str, str, List[Tuple[str, str, str]], str]: + original_signature = signature # For error reporting + name = _pybind_name_rx.match(signature).group(0) + signature = signature[len(name):] + args = [] + assert signature[0] == '(' + signature = signature[1:] + + # Arguments + while signature[0] != ')': + # Name + arg_name = _pybind_arg_name_rx.match(signature).group(0) + assert arg_name + signature = signature[len(arg_name):] + + # Type (optional) + if signature.startswith(': '): + signature = signature[2:] + arg_type = parse_pybind_type(signature) + signature = signature[len(arg_type):] + else: + arg_type = None + + # Default (optional) -- for now take everything until the next comma + # TODO: ugh, do properly + if signature.startswith('='): + signature = signature[1:] + default = _pybind_default_value_rx.match(signature).group(0) + signature = signature[len(default):] + else: + default = None + + args += [(arg_name, arg_type, default)] + + if signature[0] == ')': break + + # Failed to parse, return an ellipsis and docs + if not signature.startswith(', '): + end = original_signature.find('\n') + logging.warning("cannot parse pybind11 function signature %s", original_signature[:end]) + if end != -1 and len(original_signature) > end + 1 and original_signature[end + 1] == '\n': + brief = extract_brief(original_signature[end + 1:]) + else: + brief = '' + return (name, brief, [('…', None, None)], None) + + signature = signature[2:] + + assert signature[0] == ')' + signature = signature[1:] + + # Return type (optional) + if signature.startswith(' -> '): + signature = signature[4:] + return_type = parse_pybind_type(signature) + signature = signature[len(return_type):] + else: + return_type = None + + assert not signature or signature[0] == '\n' + if len(signature) > 1 and signature[1] == '\n': + brief = extract_brief(signature[2:]) + else: + brief = '' + + return (name, brief, args, return_type) + +def parse_pybind_docstring(name: str, doc: str) -> List[Tuple[str, str, List[Tuple[str, str, str]], str]]: + # Multiple overloads, parse each separately + overload_header = "{}(*args, **kwargs)\nOverloaded function.\n\n".format(name); + if doc.startswith(overload_header): + doc = doc[len(overload_header):] + overloads = [] + id = 1 + while True: + assert doc.startswith('{}. {}('.format(id, name)) + id = id + 1 + next = doc.find('{}. {}('.format(id, name)) + + # Parse the signature and docs from known slice + overloads += [parse_pybind_signature(doc[3:next])] + assert overloads[-1][0] == name + if next == -1: break + + # Continue to the next signature. Sorry, didn't bother to check how + # docstrings for more than 9 overloads look yet, that's why the + # assert + assert id < 10 + doc = doc[next:] + + return overloads + + # Normal function, parse and return the first signature + else: + return [parse_pybind_signature(doc)] + def extract_brief(doc: str) -> str: if not doc: return '' # some modules (xml.etree) have that :( doc = inspect.cleandoc(doc) @@ -264,43 +387,130 @@ def extract_enum_doc(state: State, path: List[str], enum_): return out -def extract_function_doc(path: List[str], function): +def extract_function_doc(state: State, parent, path: List[str], function) -> List[Any]: assert inspect.isfunction(function) or inspect.ismethod(function) or inspect.isroutine(function) - out = Empty() - out.name = path[-1] - out.brief = extract_brief(function.__doc__) - out.params = [] - out.has_complex_params = False - out.has_details = False + # Extract the signature from the docstring for pybind11, since it can't + # expose it to the metadata: https://github.com/pybind/pybind11/issues/990 + # What's not solvable with metadata, however, are function overloads --- + # one function in Python may equal more than one function on the C++ side. + # To make the docs usable, list all overloads separately. + if state.config['PYBIND11_COMPATIBILITY'] and function.__doc__.startswith(path[-1]): + funcs = parse_pybind_docstring(path[-1], function.__doc__) + overloads = [] + for name, brief, args, type in funcs: + out = Empty() + out.name = path[-1] + out.params = [] + out.has_complex_params = False + out.has_details = False + out.brief = brief + + # Don't show None return type for void functions + out.type = None if type == 'None' else type + + # There's no other way to check staticmethods than to check for + # self being the name of first parameter :( No support for + # classmethods, as C++11 doesn't have that + out.is_classmethod = False + if inspect.isclass(parent) and args and args[0][0] == 'self': + out.is_staticmethod = False + else: + out.is_staticmethod = True + + # Guesstimate whether the arguments are positional-only or + # position-or-keyword. It's either all or none. This is a brown + # magic, sorry. + + # For instance methods positional-only argument names are either + # self (for the first argument) or arg(I-1) (for second + # argument and further). Also, the `self` argument is + # positional-or-keyword only if there are positional-or-keyword + # arguments afgter it, otherwise it's positional-only. + if inspect.isclass(parent) and not out.is_staticmethod: + assert args and args[0][0] == 'self' + + positional_only = True + for i, arg in enumerate(args[1:]): + name, type, default = arg + if name != 'arg{}'.format(i): + positional_only = False + break + + # For static methods or free functions positional-only arguments + # are argI. + else: + positional_only = True + for i, arg in enumerate(args): + name, type, default = arg + if name != 'arg{}'.format(i): + positional_only = False + break - try: - signature = inspect.signature(function) - out.type = extract_annotation(signature.return_annotation) - for i in signature.parameters.values(): + for i, arg in enumerate(args): + name, type, default = arg + param = Empty() + param.name = name + # Don't include redundant type for the self argument + if name == 'self': param.type = None + else: param.type = type + param.default = default + if type or default: out.has_complex_params = True + + # *args / **kwargs are shown in the signature only for + # overloaded functions and we are expanding those + assert name not in ['*args', '**kwargs'] + + param.kind = 'POSITIONAL_ONLY' if positional_only else 'POSITIONAL_OR_KEYWORD' + + out.params += [param] + + overloads += [out] + + return overloads + + # Sane introspection path for non-pybind11 code + else: + out = Empty() + out.name = path[-1] + out.params = [] + out.has_complex_params = False + out.has_details = False + out.brief = extract_brief(function.__doc__) + + # Decide if classmethod or staticmethod in case this is a method + if inspect.isclass(parent): + out.is_classmethod = inspect.ismethod(function) + out.is_staticmethod = out.name in parent.__dict__ and isinstance(parent.__dict__[out.name], staticmethod) + + try: + signature = inspect.signature(function) + out.type = extract_annotation(signature.return_annotation) + for i in signature.parameters.values(): + param = Empty() + param.name = i.name + param.type = extract_annotation(i.annotation) + if param.type: + out.has_complex_params = True + if i.default is inspect.Signature.empty: + param.default = None + else: + param.default = repr(i.default) + out.has_complex_params = True + param.kind = str(i.kind) + out.params += [param] + + # In CPython, some builtin functions (such as math.log) do not provide + # metadata about their arguments. Source: + # https://docs.python.org/3/library/inspect.html#inspect.signature + except ValueError: param = Empty() - param.name = i.name - param.type = extract_annotation(i.annotation) - if param.type: - out.has_complex_params = True - if i.default is inspect.Signature.empty: - param.default = None - else: - param.default = repr(i.default) - out.has_complex_params = True - param.kind = str(i.kind) - out.params += [param] - - # In CPython, some builtin functions (such as math.log) do not provide - # metadata about their arguments. Source: - # https://docs.python.org/3/library/inspect.html#inspect.signature - except ValueError: - param = Empty() - param.name = '...' - param.name_type = param.name - out.params = [param] + param.name = '...' + param.name_type = param.name + out.params = [param] + out.type = None - return out + return [out] def extract_property_doc(path: List[str], property): assert inspect.isdatadescriptor(property) @@ -393,7 +603,7 @@ def render_module(state: State, path, module, env): page.enums += [enum_] if enum_.has_details: page.has_enum_details = True elif inspect.isfunction(object) or inspect.isbuiltin(object): - page.functions += [extract_function_doc(subpath, object)] + page.functions += extract_function_doc(state, module, subpath, object) # Assume everything else is data. The builtin help help() (from # pydoc) does the same: # https://github.com/python/cpython/blob/d29b3dd9227cfc4a23f77e99d62e20e063272de1/Lib/pydoc.py#L113 @@ -443,7 +653,7 @@ def render_module(state: State, path, module, env): subpath = path + [name] if not object.__doc__: logging.warning("%s() is undocumented", '.'.join(subpath)) - page.functions += [extract_function_doc(subpath, object)] + page.functions += extract_function_doc(state, module, subpath, object) # Get data # TODO: unify this query @@ -571,18 +781,15 @@ def render_class(state: State, path, class_, env): subpath = path + [name] if not object.__doc__: logging.warning("%s() is undocumented", '.'.join(subpath)) - function = extract_function_doc(subpath, object) - function.is_classmethod = inspect.ismethod(object) - function.is_staticmethod = name in class_.__dict__ and isinstance(class_.__dict__[name], staticmethod) - - if name.startswith('__'): - page.dunder_methods += [function] - elif function.is_classmethod: - page.classmethods += [function] - elif function.is_staticmethod: - page.staticmethods += [function] - else: - page.methods += [function] + for function in extract_function_doc(state, class_, subpath, object): + if name.startswith('__'): + page.dunder_methods += [function] + elif function.is_classmethod: + page.classmethods += [function] + elif function.is_staticmethod: + page.staticmethods += [function] + else: + page.methods += [function] # Get properties for name, object in inspect.getmembers(class_, inspect.isdatadescriptor): diff --git a/documentation/test_python/CMakeLists.txt b/documentation/test_python/CMakeLists.txt index c37d827d..e38cccaa 100644 --- a/documentation/test_python/CMakeLists.txt +++ b/documentation/test_python/CMakeLists.txt @@ -27,7 +27,7 @@ project(McssDocumentationPybindTests) find_package(pybind11 CONFIG REQUIRED) -foreach(target enums submodules) +foreach(target signatures enums submodules) pybind11_add_module(pybind_${target} pybind_${target}/pybind_${target}.cpp) set_target_properties(pybind_${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/pybind_${target}) endforeach() diff --git a/documentation/test_python/pybind_signatures/pybind_signatures.MyClass.html b/documentation/test_python/pybind_signatures/pybind_signatures.MyClass.html new file mode 100644 index 00000000..93360731 --- /dev/null +++ b/documentation/test_python/pybind_signatures/pybind_signatures.MyClass.html @@ -0,0 +1,84 @@ + + + + + pybind_signatures.MyClass | My Python Project + + + + + +
+
+
+
+
+

+ pybind_signatures.MyClass class +

+

My fun class!

+
+

Contents

+ +
+
+

Static methods

+
+
+ def static_function(arg0: int, + arg1: float, /) -> pybind_signatures.MyClass +
+
Static method with positional-only args
+
+
+
+

Methods

+
+
+ def another(self, /) -> int +
+
Instance method with no args, 'self' is thus position-only
+
+ def instance_function(self, + arg0: int, + arg1: str, /) -> Tuple[float, int] +
+
Instance method with positional-only args
+
+ def instance_function_kwargs(self, + hey: int, + what: str = 'eh?') -> Tuple[float, int] +
+
Instance method with position or keyword args
+
+
+
+

Special methods

+
+
+ def __init__(self, /) +
+
Constructor
+
+
+
+
+
+
+ + diff --git a/documentation/test_python/pybind_signatures/pybind_signatures.cpp b/documentation/test_python/pybind_signatures/pybind_signatures.cpp new file mode 100644 index 00000000..c84b49ba --- /dev/null +++ b/documentation/test_python/pybind_signatures/pybind_signatures.cpp @@ -0,0 +1,49 @@ +#include +#include /* needed for std::vector! */ + +namespace py = pybind11; + +int scale(int a, float argument) { + return int(a*argument); +} + +void voidFunction(int) {} + +std::tuple takingAListReturningATuple(const std::vector&) { + return {}; +} + +template struct Crazy {}; + +void crazySignature(const Crazy<3, int>&) {} + +std::string overloaded(int) { return {}; } +bool overloaded(float) { return {}; } + +struct MyClass { + static MyClass staticFunction(int, float) { return {}; } + + std::pair instanceFunction(int, const std::string&) { return {0.5f, 42}; } + + int another() { return 42; } +}; + +PYBIND11_MODULE(pybind_signatures, m) { + m.doc() = "pybind11 function signature extraction"; + + m + .def("scale", &scale, "Scale an integer") + .def("scale_kwargs", &scale, "Scale an integer, kwargs", py::arg("a"), py::arg("argument")) + .def("void_function", &voidFunction, "Returns nothing") + .def("taking_a_list_returning_a_tuple", &takingAListReturningATuple, "Takes a list, returns a tuple") + .def("crazy_signature", &crazySignature, "Function that failed to get parsed") + .def("overloaded", static_cast(&overloaded), "Overloaded for ints") + .def("overloaded", static_cast(&overloaded), "Overloaded for floats"); + + py::class_(m, "MyClass", "My fun class!") + .def_static("static_function", &MyClass::staticFunction, "Static method with positional-only args") + .def(py::init(), "Constructor") + .def("instance_function", &MyClass::instanceFunction, "Instance method with positional-only args") + .def("instance_function_kwargs", &MyClass::instanceFunction, "Instance method with position or keyword args", py::arg("hey"), py::arg("what") = "eh?") + .def("another", &MyClass::another, "Instance method with no args, 'self' is thus position-only"); +} diff --git a/documentation/test_python/pybind_signatures/pybind_signatures.html b/documentation/test_python/pybind_signatures/pybind_signatures.html new file mode 100644 index 00000000..82ed966d --- /dev/null +++ b/documentation/test_python/pybind_signatures/pybind_signatures.html @@ -0,0 +1,85 @@ + + + + + pybind_signatures | My Python Project + + + + + +
+
+
+
+
+

+ pybind_signatures module +

+

pybind11 function signature extraction

+
+

Contents

+ +
+
+

Classes

+
+
class MyClass
+
My fun class!
+
+
+
+

Functions

+
+
+ def crazy_signature(…) +
+
Function that failed to get parsed
+
+ def overloaded(arg0: int, /) -> str +
+
Overloaded for ints
+
+ def overloaded(arg0: float, /) -> bool +
+
Overloaded for floats
+
+ def scale(arg0: int, + arg1: float, /) -> int +
+
Scale an integer
+
+ def scale_kwargs(a: int, + argument: float) -> int +
+
Scale an integer, kwargs
+
+ def taking_a_list_returning_a_tuple(arg0: List[float], /) -> Tuple[int, int, int] +
+
Takes a list, returns a tuple
+
+ def void_function(arg0: int, /) +
+
Returns nothing
+
+
+
+
+
+
+ + diff --git a/documentation/test_python/test_pybind.py b/documentation/test_python/test_pybind.py index 01caab8c..58914e57 100644 --- a/documentation/test_python/test_pybind.py +++ b/documentation/test_python/test_pybind.py @@ -22,8 +22,142 @@ # DEALINGS IN THE SOFTWARE. # +import sys +import unittest + +from python import parse_pybind_signature + from . import BaseTestCase +class Signature(unittest.TestCase): + def test(self): + self.assertEqual(parse_pybind_signature( + 'foo(a: int, a2: module.Thing) -> module.Thing3'), + ('foo', '', [ + ('a', 'int', None), + ('a2', 'module.Thing', None), + ], 'module.Thing3')) + + def test_newline(self): + self.assertEqual(parse_pybind_signature( + 'foo(a: int, a2: module.Thing) -> module.Thing3\n'), + ('foo', '', [ + ('a', 'int', None), + ('a2', 'module.Thing', None), + ], 'module.Thing3')) + + def test_docs(self): + self.assertEqual(parse_pybind_signature( + 'foo(a: int, a2: module.Thing) -> module.Thing3\n\nDocs here!!'), + ('foo', 'Docs here!!', [ + ('a', 'int', None), + ('a2', 'module.Thing', None), + ], 'module.Thing3')) + + def test_no_args(self): + self.assertEqual(parse_pybind_signature( + 'thingy() -> str'), + ('thingy', '', [], 'str')) + + def test_no_return(self): + self.assertEqual(parse_pybind_signature( + '__init__(self: module.Thing)'), + ('__init__', '', [ + ('self', 'module.Thing', None), + ], None)) + + def test_no_arg_types(self): + self.assertEqual(parse_pybind_signature( + 'thingy(self, the_other_thing)'), + ('thingy', '', [ + ('self', None, None), + ('the_other_thing', None, None), + ], None)) + + def test_square_brackets(self): + self.assertEqual(parse_pybind_signature( + 'foo(a: Tuple[int, str], no_really: str) -> List[str]'), + ('foo', '', [ + ('a', 'Tuple[int, str]', None), + ('no_really', 'str', None), + ], 'List[str]')) + + def test_nested_square_brackets(self): + self.assertEqual(parse_pybind_signature( + 'foo(a: Tuple[int, List[Tuple[int, int]]], another: float) -> Union[str, Any]'), + ('foo', '', [ + ('a', 'Tuple[int, List[Tuple[int, int]]]', None), + ('another', 'float', None), + ], 'Union[str, Any]')) + + def test_kwargs(self): + self.assertEqual(parse_pybind_signature( + 'foo(*args, **kwargs)'), + ('foo', '', [ + ('*args', None, None), + ('**kwargs', None, None), + ], None)) + + def test_default_values(self): + self.assertEqual(parse_pybind_signature( + 'foo(a: float=1.0, b: str=\'hello\')'), + ('foo', '', [ + ('a', 'float', '1.0'), + ('b', 'str', '\'hello\''), + ], None)) + + def test_crazy_stuff(self): + self.assertEqual(parse_pybind_signature( + 'foo(a: int, b: Math::Vector<4, UnsignedInt>)'), + ('foo', '', [('…', None, None)], None)) + + def test_crazy_stuff_docs(self): + self.assertEqual(parse_pybind_signature( + 'foo(a: int, b: Math::Vector<4, UnsignedInt>)\n\nThis is text!!'), + ('foo', 'This is text!!', [('…', None, None)], None)) + +class Signatures(BaseTestCase): + def __init__(self, *args, **kwargs): + super().__init__(__file__, 'signatures', *args, **kwargs) + + sys.path.append(self.path) + import pybind_signatures + + def test_positional_args(self): + import pybind_signatures + + # Verify that the assumptions are correct -- not using py::arg() makes + # the parameters positional-only, while py::arg() makes them + # positional-or-keyword + self.assertEqual(pybind_signatures.scale(14, 0.3), 4) + with self.assertRaises(TypeError): + pybind_signatures.scale(arg0=1, arg1=3.0) + self.assertEqual(pybind_signatures.scale_kwargs(14, 0.3), 4) + self.assertEqual(pybind_signatures.scale_kwargs(a=14, argument=0.3), 4) + + # Verify the same for classes + a = pybind_signatures.MyClass() + self.assertEqual(pybind_signatures.MyClass.instance_function(a, 3, 'bla'), (0.5, 42)) + with self.assertRaises(TypeError): + pybind_signatures.MyClass.instance_function(self=a, arg0=3, arg1='bla') + self.assertEqual(pybind_signatures.MyClass.instance_function_kwargs(a, 3, 'bla'), (0.5, 42)) + self.assertEqual(pybind_signatures.MyClass.instance_function_kwargs(self=a, hey=3, what='bla'), (0.5, 42)) + + # In particular, the 'self' parameter is positional-only if there are + # no arguments to use py::arg() for + self.assertEqual(pybind_signatures.MyClass.another(a), 42) + with self.assertRaises(TypeError): + pybind_signatures.MyClass.another(self=a) + + def test(self): + import pybind_signatures + self.run_python({ + 'INPUT_MODULES': ['pybind_signatures'], + 'PYBIND11_COMPATIBILITY': True + }) + self.assertEqual(*self.actual_expected_contents('pybind_signatures.html')) + self.assertEqual(*self.actual_expected_contents('pybind_signatures.MyClass.html')) + class Enums(BaseTestCase): def __init__(self, *args, **kwargs): super().__init__(__file__, 'enums', *args, **kwargs) -- 2.30.2