Skip to content

Commit

Permalink
Switch from pyrsistent to (my new) rpds bindings
Browse files Browse the repository at this point in the history
This represents a big speedup, as unfortunately even with the
C extension, pyrsistent was showing up quite high on profiling
output.

I need to benchmark a bit more on PyPy -- pyrsistent has a
pure-python implementation which was super fast on PyPy, not sure
whether rpds will beat that, so we may bring pyrsistent back in
the mix for PyPy, but TBD.
  • Loading branch information
Julian committed Mar 4, 2023
1 parent 6a2d5b9 commit 598b82a
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 75 deletions.
1 change: 0 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def setup(app):
"https://jsonschema-specifications.readthedocs.io/en/latest/",
None,
),
"pyrsistent": ("https://pyrsistent.readthedocs.io/en/latest/", None),
"python": ("https://docs.python.org/", None),
"setuptools": ("https://setuptools.pypa.io/en/latest/", None),
}
Expand Down
8 changes: 3 additions & 5 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ alabaster==0.7.13
# via sphinx
attrs==22.2.0
# via referencing
babel==2.11.0
babel==2.12.1
# via sphinx
beautifulsoup4==4.11.2
# via furo
Expand Down Expand Up @@ -49,14 +49,12 @@ pygments==2.14.0
# sphinx
pygments-github-lexers==0.0.5
# via -r docs/requirements.in
pyrsistent==0.19.3
# via referencing
pytz==2022.7.1
# via babel
file:.#egg=referencing
# via -r docs/requirements.in
requests==2.28.2
# via sphinx
rpds-py==0.4.1
# via referencing
snowballstemmer==2.2.0
# via sphinx
soupsieve==2.4
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ classifiers = [
dynamic = ["version"]
dependencies = [
"attrs>=22.2.0",
"pyrsistent>=0.19.3",
"rpds-py>=0.4.1",
]

[project.urls]
Expand Down
90 changes: 28 additions & 62 deletions referencing/_core.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,18 @@
from __future__ import annotations

from collections.abc import Iterable, Iterator, Sequence
from typing import (
Any,
Callable,
ClassVar,
Generic,
Protocol,
Tuple,
TypeVar,
cast,
)
from typing import Any, Callable, ClassVar, Generic, Protocol, TypeVar
from urllib.parse import unquote, urldefrag, urljoin

from attrs import evolve, field
from pyrsistent import PMap as PMapType, plist, pmap, pset
from pyrsistent.typing import PList, PMap, PSet
from rpds import HashTrieMap, HashTrieSet, List

from referencing import exceptions
from referencing._attrs import frozen
from referencing.typing import URI, Anchor as AnchorType, D, Mapping, Retrieve

EMPTY_RESOURCES: PMap[URI, Resource[Any]] = pmap({}, pre_size=64)
EMPTY_ANCHORS = cast(PMap[Tuple[URI, str], AnchorType[Any]], EMPTY_RESOURCES)
EMPTY_UNCRAWLED: PSet[URI] = pset(pre_size=128)
EMPTY_PREVIOUS_RESOLVERS: PList[URI] = plist()
EMPTY_UNCRAWLED: HashTrieSet[URI] = HashTrieSet()
EMPTY_PREVIOUS_RESOLVERS: List[URI] = List()


class _MaybeInSubresource(Protocol[D]):
Expand Down Expand Up @@ -212,14 +200,6 @@ def _fail_to_retrieve(uri: URI):
raise exceptions.NoSuchResource(ref=uri)


def _to_pmap(
value: dict[URI, Resource[D]]
| PMap[URI, Resource[D]]
| list[tuple[URI, Resource[D]]],
):
return value if isinstance(value, PMapType) else pmap(value)


@frozen
class Registry(Mapping[URI, Resource[D]]):
r"""
Expand All @@ -245,14 +225,12 @@ class Registry(Mapping[URI, Resource[D]]):
even according to the retrieval logic.
"""

_resources: PMap[URI, Resource[D]] = field(
default=EMPTY_RESOURCES,
converter=_to_pmap,
_resources: HashTrieMap[URI, Resource[D]] = field( # type: ignore[reportGeneralTypeIssues] # noqa: E501
default=HashTrieMap(),
converter=HashTrieMap.convert,
)
_anchors: PMap[tuple[URI, str], AnchorType[D]] = field(
default=EMPTY_ANCHORS,
)
_uncrawled: PSet[URI] = field(default=EMPTY_UNCRAWLED)
_anchors: HashTrieMap[tuple[URI, str], AnchorType[D]] = HashTrieMap() # type: ignore[reportGeneralTypeIssues] # noqa: E501
_uncrawled: HashTrieSet[URI] = EMPTY_UNCRAWLED
_retrieve: Retrieve[D] = field(default=_fail_to_retrieve)

def __getitem__(self, uri: URI) -> Resource[D]:
Expand Down Expand Up @@ -301,19 +279,15 @@ def __rmatmul__(self, new: Resource[D] | Iterable[Resource[D]]):
if isinstance(new, Resource):
new = (new,)

resources = self._resources.evolver()
uncrawled = self._uncrawled.evolver()
resources = self._resources
uncrawled = self._uncrawled
for resource in new:
id = resource.id()
if id is None:
raise exceptions.NoInternalID(resource=resource)
uncrawled.add(id)
resources.set(id, resource)
return evolve(
self,
resources=resources.persistent(),
uncrawled=uncrawled.persistent(),
)
uncrawled = uncrawled.insert(id)
resources = resources.insert(id, resource)
return evolve(self, resources=resources, uncrawled=uncrawled)

def __repr__(self) -> str:
size = len(self)
Expand Down Expand Up @@ -365,7 +339,7 @@ def remove(self, uri: URI):
self,
resources=self._resources.remove(uri),
uncrawled=self._uncrawled.discard(uri),
anchors=pmap(
anchors=HashTrieMap(
(k, v) for k, v in self._anchors.items() if k[0] != uri
),
)
Expand Down Expand Up @@ -394,23 +368,23 @@ def crawl(self) -> Registry[D]:
"""
Immediately crawl all added resources, discovering subresources.
"""
resources = self._resources.evolver()
anchors = self._anchors.evolver()
resources = self._resources
anchors = self._anchors
uncrawled = [(uri, resources[uri]) for uri in self._uncrawled]
while uncrawled:
uri, resource = uncrawled.pop()

id = resource.id()
if id is not None:
uri = urljoin(uri, id)
resources[uri] = resource
resources = resources.insert(uri, resource)
for each in resource.anchors():
anchors.set((uri, each.name), each)
anchors = anchors.insert((uri, each.name), each)
uncrawled.extend((uri, each) for each in resource.subresources())
return evolve(
self,
resources=resources.persistent(),
anchors=anchors.persistent(),
resources=resources,
anchors=anchors,
uncrawled=EMPTY_UNCRAWLED,
)

Expand All @@ -427,16 +401,12 @@ def with_resources(
r"""
Add the given `Resource`\ s to the registry, without crawling them.
"""
resources = self._resources.evolver()
uncrawled = self._uncrawled.evolver()
resources = self._resources
uncrawled = self._uncrawled
for uri, resource in pairs:
uncrawled.add(uri)
resources[uri] = resource
return evolve(
self,
resources=resources.persistent(),
uncrawled=uncrawled.persistent(),
)
uncrawled = uncrawled.insert(uri)
resources = resources.insert(uri, resource)
return evolve(self, resources=resources, uncrawled=uncrawled)

def with_contents(
self,
Expand Down Expand Up @@ -540,11 +510,7 @@ class Resolver(Generic[D]):

_base_uri: str = field(alias="base_uri")
_registry: Registry[D] = field(alias="registry")
_previous: PList[URI] = field(
default=EMPTY_PREVIOUS_RESOLVERS,
repr=False,
alias="previous",
)
_previous: List[URI] = field(default=List(), repr=False, alias="previous")

def lookup(self, ref: URI) -> Resolved[D]:
"""
Expand Down Expand Up @@ -614,7 +580,7 @@ def _evolve(self, base_uri: str, **kwargs: Any):
"""
previous = self._previous
if self._base_uri and (not previous or base_uri != self._base_uri):
previous = previous.cons(self._base_uri)
previous = previous.push_front(self._base_uri)
return evolve(self, base_uri=base_uri, previous=previous, **kwargs)


Expand Down
6 changes: 3 additions & 3 deletions referencing/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pyrsistent import pmap
from rpds import HashTrieMap
import pytest

from referencing import Anchor, Registry, Resource, Specification, exceptions
Expand Down Expand Up @@ -188,7 +188,7 @@ def test_init(self):

def test_dict_conversion(self):
"""
Passing a `dict` to `Registry` gets converted to a `pmap`.
Passing a `dict` to `Registry` gets converted to a `HashTrieMap`.
So continuing to use the registry works.
"""
Expand Down Expand Up @@ -244,7 +244,7 @@ def test_combine(self):
("http://example.com/baz", three),
("http://example.com/foo/quux", four),
],
anchors=pmap(
anchors=HashTrieMap(
{
("http://example.com/foo/quux", "foo"): Anchor(
name="foo",
Expand Down
6 changes: 3 additions & 3 deletions test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ packaging==23.0
# via pytest
pluggy==1.0.0
# via pytest
pyrsistent==0.19.3
# via referencing
pytest==7.2.1
pytest==7.2.2
# via
# -r test-requirements.in
# pytest-subtests
pytest-subtests==0.10.0
# via -r test-requirements.in
file:.#egg=referencing
# via -r test-requirements.in
rpds-py==0.4.1
# via referencing

0 comments on commit 598b82a

Please sign in to comment.