Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pre-process parsed docstrings to add hints on how to resolve names #723

Draft
wants to merge 14 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions pydoctor/astbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def visit_Module(self, node: ast.Module) -> None:

def depart_Module(self, node: ast.Module) -> None:
self.builder.pop(self.module)
epydoc2stan.transform_parsed_names(self.module)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be done in post-processing


def visit_ClassDef(self, node: ast.ClassDef) -> None:
# Ignore classes within functions.
Expand Down Expand Up @@ -1072,6 +1073,8 @@ def _annotation_for_elements(sequence: Iterable[object]) -> Optional[ast.expr]:
class ASTBuilder:
"""
Keeps tracks of the state of the AST build, creates documentable and adds objects to the system.

One ASTBuilder instance is only suitable to build one Module.
"""
ModuleVistor = ModuleVistor

Expand Down
87 changes: 86 additions & 1 deletion pydoctor/epydoc2stan.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from collections import defaultdict
import enum
import inspect
from itertools import chain
from typing import (
TYPE_CHECKING, Any, Callable, ClassVar, DefaultDict, Dict, Generator,
Iterator, List, Mapping, Optional, Sequence, Tuple, Union,
Expand All @@ -12,8 +14,11 @@
import re

import attr
from docutils.transforms import Transform
from docutils import nodes

from pydoctor import model, linker, node2stan
from pydoctor.node2stan import parse_reference
from pydoctor.astutils import is_none_literal
from pydoctor.epydoc.markup import Field as EpydocField, ParseError, get_parser_by_name, processtypes
from twisted.web.template import Tag, tags
Expand Down Expand Up @@ -884,7 +889,10 @@ def get_parsed_type(obj: model.Documentable) -> Optional[ParsedDocstring]:
# Only Attribute instances have the 'annotation' attribute.
annotation: Optional[ast.expr] = getattr(obj, 'annotation', None)
if annotation is not None:
return colorize_inline_pyval(annotation)
parsed_type = colorize_inline_pyval(annotation)
# cache parsed_type attribute
obj.parsed_type = parsed_type
return parsed_type

return None

Expand Down Expand Up @@ -1143,3 +1151,80 @@ def populate_constructors_extra_info(cls:model.Class) -> None:
extra_epytext += '`%s <%s>`' % (short_text, c.fullName())

cls.extra_info.append(parse_docstring(cls, extra_epytext, cls, 'restructuredtext', section='constructor extra'))

class _ReferenceTransform(Transform):

def __init__(self, document:nodes.document, ctx:'model.Documentable'):
super().__init__(document)
self.ctx = ctx

def apply(self):
ctx = self.ctx
module = self.ctx.module
for node in self.document.findall(nodes.title_reference):
_, target = parse_reference(node)
if target == node.attributes.get('refuri', target):
name, *rest = target.split('.')
# Only apply transformation to non-ambigous names,
# because we don't know if we're dealing with an annotation
# or an interpreted, so we must go with the conservative approach.
if ((module.isNameDefined(name) and
not ctx.isNameDefined(name, only_locals=True))
or (ctx.isNameDefined(name, only_locals=True) and
not module.isNameDefined(name))):

node.attributes['refuri'] = '.'.join(chain(
ctx._localNameToFullName(name).split('.'), rest))

def _apply_reference_transform(doc:ParsedDocstring, ctx:'model.Documentable') -> None:
"""
Runs L{_ReferenceTransform} on the underlying docutils document.
No-op if L{to_node} raises L{NotImplementedError}.
"""
try:
document = doc.to_node()
except NotImplementedError:
return
else:
_ReferenceTransform(document, ctx).apply()

def transform_parsed_names(node:'model.Module') -> None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not be re-run on nodes that have been reparented. Meaning we must keep track of reparented nodes for every modules.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This issue will be fixed if we move the reparenting in post process and call transform_parsed_names() after.

"""
Walk this module's content and apply in-place transformations to the
L{ParsedDocstring} instances that olds L{obj_reference} or L{title_reference} nodes.

Fixing "Lookup of name in annotation fails on reparented object #295".
The fix is not 100% complete at the moment: attribute values and decorators
are not handled.
Comment on lines +1296 to +1298
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Fixing "Lookup of name in annotation fails on reparented object #295".
The fix is not 100% complete at the moment: attribute values and decorators
are not handled.
Fixing "Lookup of name in annotation fails on reparented object #295".

"""
from pydoctor import model, astbuilder
# resolve names early when possible
for ob in model.walk(node):
# resolve names in parsed_docstring, do not forget field bodies
if ob.parsed_docstring:
_apply_reference_transform(ob.parsed_docstring, ob)
for f in ob.parsed_docstring.fields:
_apply_reference_transform(f.body(), ob)
Comment on lines +1304 to +1307
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're not using ensure_parsed_docstring, we might miss some docs

if isinstance(ob, model.Function):
if ob.signature:
for p in ob.signature.parameters.values():
ann = p.annotation if p.annotation is not inspect.Parameter.empty else None
if isinstance(ann, astbuilder._ValueFormatter):
_apply_reference_transform(ann._colorized, ob)
default = p.default if p.default is not inspect.Parameter.empty else None
if isinstance(default, astbuilder._ValueFormatter):
_apply_reference_transform(default._colorized, ob)
# TODO: resolve function's annotations, they are currently presented twice
# we can only change signature, annotations in param table must be handled by
# introducing attribute parsed_annotations
elif isinstance(ob, model.Attribute):
# resolve attribute annotation with parsed_type attribute
parsed_type = get_parsed_type(ob)
if parsed_type:
_apply_reference_transform(parsed_type, ob)
# TODO: resolve parsed_value
# TODO: resolve parsed_decorators
elif isinstance(ob, model.Class):
# TODO: resolve parsed_class_signature
# TODO: resolve parsed_decorators
pass
22 changes: 18 additions & 4 deletions pydoctor/linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,13 @@ def look_for_intersphinx(self, name: str) -> Optional[str]:
def link_to(self, identifier: str, label: "Flattenable") -> Tag:
fullID = self.obj.expandName(identifier)

target = self.obj.system.objForFullName(fullID)
if target is not None:
return taglink(target, self.page_url, label)
try:
target = self.obj.system.find_object(fullID)
except LookupError:
pass
else:
if target is not None:
return taglink(target, self.page_url, label)

url = self.look_for_intersphinx(fullID)
if url is not None:
Expand Down Expand Up @@ -184,8 +188,18 @@ def _resolve_identifier_xref(self,
if target is not None:
return target

# Check if the fullID exists in an intersphinx inventory.
fullID = self.obj.expandName(identifier)

# Try fetching the name with it's outdated fullname
try:
target = self.obj.system.find_object(fullID)
except LookupError:
pass
else:
if target is not None:
return target

# Check if the fullID exists in an intersphinx inventory.
target_url = self.look_for_intersphinx(fullID)
if not target_url:
# FIXME: https://github.com/twisted/pydoctor/issues/125
Expand Down
23 changes: 18 additions & 5 deletions pydoctor/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,19 @@ class DocumentableKind(Enum):
PROPERTY = 150
VARIABLE = 100

def walk(node:'Documentable') -> Iterator['Documentable']:
"""
Recursively yield all descendant nodes in the tree starting at *node*
(including *node* itself), in no specified order. This is useful if you
only want to modify nodes in place and don't care about the context.
"""
from collections import deque
todo = deque([node])
while todo:
node = todo.popleft()
todo.extend(node.contents.values())
yield node

class Documentable:
"""An object that can be documented.

Expand Down Expand Up @@ -271,7 +284,7 @@ def _handle_reparenting_post(self) -> None:
def _localNameToFullName(self, name: str) -> str:
raise NotImplementedError(self._localNameToFullName)

def isNameDefined(self, name:str) -> bool:
def isNameDefined(self, name:str, only_locals:bool=False) -> bool:
"""
Is the given name defined in the globals/locals of self-context?
Only the first name of a dotted name is checked.
Expand Down Expand Up @@ -411,13 +424,13 @@ def setup(self) -> None:
super().setup()
self._localNameToFullName_map: Dict[str, str] = {}

def isNameDefined(self, name: str) -> bool:
def isNameDefined(self, name: str, only_locals:bool=False) -> bool:
name = name.split('.')[0]
if name in self.contents:
return True
if name in self._localNameToFullName_map:
return True
if not isinstance(self, Module):
if not isinstance(self, Module) and not only_locals:
return self.module.isNameDefined(name)
else:
return False
Expand Down Expand Up @@ -811,8 +824,8 @@ def docsources(self) -> Iterator[Documentable]:
def _localNameToFullName(self, name: str) -> str:
return self.parent._localNameToFullName(name)

def isNameDefined(self, name: str) -> bool:
return self.parent.isNameDefined(name)
def isNameDefined(self, name: str, only_locals:bool=False) -> bool:
return self.parent.isNameDefined(name, only_locals=only_locals)

class Function(Inheritable):
kind = DocumentableKind.FUNCTION
Expand Down
39 changes: 24 additions & 15 deletions pydoctor/node2stan.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import re
import optparse
from typing import Any, Callable, ClassVar, Iterable, List, Optional, Union, TYPE_CHECKING
from typing import Any, Callable, ClassVar, Iterable, List, Optional, Sequence, Tuple, Union, TYPE_CHECKING
from docutils.writers import html4css1
from docutils import nodes, frontend, __version_info__ as docutils_version_info

Expand Down Expand Up @@ -52,6 +52,25 @@ def gettext(node: Union[nodes.Node, List[nodes.Node]]) -> List[str]:
filtered.extend(gettext(child))
return filtered

def parse_reference(node:nodes.title_reference) -> Tuple[Union[str, Sequence[nodes.Node]], str]:
"""
Split a reference into (label, target).
"""
label: "Flattenable"
if 'refuri' in node.attributes:
# Epytext parsed or manually constructed nodes.
label, target = node.children, node.attributes['refuri']
else:
# RST parsed.
m = _TARGET_RE.match(node.astext())
if m:
label, target = m.groups()
else:
label = target = node.astext()
# Support linking to functions and methods with () at the end
if target.endswith('()'):
target = target[:len(target)-2]
return label, target

_TARGET_RE = re.compile(r'^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$')
_VALID_IDENTIFIER_RE = re.compile('[^0-9a-zA-Z_]')
Expand Down Expand Up @@ -105,22 +124,12 @@ def visit_obj_reference(self, node: nodes.Node) -> None:
self._handle_reference(node, link_func=self._linker.link_to)

def _handle_reference(self, node: nodes.Node, link_func: Callable[[str, "Flattenable"], "Flattenable"]) -> None:
node_label, target = parse_reference(node)
label: "Flattenable"
if 'refuri' in node.attributes:
# Epytext parsed or manually constructed nodes.
label, target = node2stan(node.children, self._linker), node.attributes['refuri']
if not isinstance(node_label, str):
label = node2stan(node_label, self._linker)
else:
# RST parsed.
m = _TARGET_RE.match(node.astext())
if m:
label, target = m.groups()
else:
label = target = node.astext()

# Support linking to functions and methods with () at the end
if target.endswith('()'):
target = target[:len(target)-2]

label = node_label
self.body.append(flatten(link_func(target, label)))
raise nodes.SkipNode()

Expand Down
78 changes: 78 additions & 0 deletions pydoctor/test/test_epydoc2stan.py
Original file line number Diff line number Diff line change
Expand Up @@ -2106,3 +2106,81 @@ def func():
assert docstring2html(mod.contents['func'], docformat='plaintext') == expected
captured = capsys.readouterr().out
assert captured == ''

def test_parsed_names_partially_resolved_early() -> None:
"""
Test for issue #295

Annotations are first locally resolved when we reach the end of the module,
then again when we actually resolve the name when generating the stan for the annotation.
"""
typing = '''\
List = ClassVar = TypeVar = object()
'''

base = '''\
import ast
class Vis(ast.NodeVisitor):
...
'''
src = '''\
from typing import List
import typing as t

from .base import Vis

class Cls(Vis, t.Generic['_T']):
"""
L{Cls}
"""
clsvar:List[str]
clsvar2:t.ClassVar[List[str]]

def __init__(self, a:'_T'):
self._a:'_T' = a

C = Cls
_T = t.TypeVar('_T')
unknow: i|None|list
ann:Cls
'''

top = '''\
# the order matters here
from .src import C, Cls, Vis
__all__ = ['Cls', 'C', 'Vis']
'''

system = model.System()
builder = system.systemBuilder(system)
builder.addModuleString(top, 'top', is_package=True)
builder.addModuleString(base, 'base', 'top')
builder.addModuleString(src, 'src', 'top')
builder.addModuleString(typing, 'typing')
builder.buildModules()

Cls = system.allobjects['top.Cls']
clsvar = Cls.contents['clsvar']
clsvar2 = Cls.contents['clsvar2']
a = Cls.contents['_a']
assert clsvar.expandName('typing.List')=='typing.List'
assert '<obj_reference refuri="typing.List">' in clsvar.parsed_type.to_node().pformat()
assert 'href="typing.html#List"' in flatten(clsvar.parsed_type.to_stan(clsvar.docstring_linker))
assert 'href="typing.html#ClassVar"' in flatten(clsvar2.parsed_type.to_stan(clsvar2.docstring_linker))
assert 'href="top.src.html#_T"' in flatten(a.parsed_type.to_stan(clsvar.docstring_linker))

# the reparenting/alias issue
ann = system.allobjects['top.src.ann']
assert 'href="top.Cls.html"' in flatten(ann.parsed_type.to_stan(ann.docstring_linker))
assert 'href="top.Cls.html"' in flatten(Cls.parsed_docstring.to_stan(Cls.docstring_linker))

unknow = system.allobjects['top.src.unknow']
assert flatten_text(unknow.parsed_type.to_stan(unknow.docstring_linker)) == 'i|None|list'



# TODO: test the __init__ signature and the class bases

# TODO: Fix two new twisted warnings:
# twisted/internet/_sslverify.py:330: Cannot find link target for "twisted.internet.ssl.DN", resolved from "twisted.internet._sslverify.DistinguishedName"
# twisted/internet/_sslverify.py:347: Cannot find link target for "twisted.internet.ssl.DN", resolved from "twisted.internet._sslverify.DistinguishedName"