Skip to content

Commit

Permalink
Merge pull request #460 from seperman/dev
Browse files Browse the repository at this point in the history
8.0.0
  • Loading branch information
seperman authored Aug 27, 2024
2 parents be22027 + 8a7a004 commit a62abc1
Show file tree
Hide file tree
Showing 42 changed files with 818 additions and 340 deletions.
40 changes: 26 additions & 14 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,28 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
architecture: ["x64"]
include:
- python-version: "3.10"
numpy-version: "2.0.dev"
steps:
- uses: actions/checkout@v2
- name: Setup Python ${{ matrix.python-version }} on ${{ matrix.architecture }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
architecture: ${{ matrix.architecture }}
- name: Cache pip 3.8
if: matrix.python-version == 3.8
uses: actions/cache@v2
with:
# This path is specific to Ubuntu
path: ~/.cache/pip
# Look to see if there is a cache hit for the corresponding requirements file
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-dev3.8.txt') }}
restore-keys: |
${{ runner.os }}-pip-
${{ runner.os }}-
- name: Cache pip
if: matrix.python-version != 3.8
uses: actions/cache@v2
with:
# This path is specific to Ubuntu
Expand All @@ -40,28 +49,31 @@ jobs:
# workaround for 3.12, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177
pip install --upgrade setuptools
- name: Install dependencies
if: matrix.python-version != 3.8
run: pip install -r requirements-dev.txt
- name: Install Numpy Dev
if: ${{ matrix.numpy-version }}
run: pip install -I --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple "numpy>=0.0.dev0"
- name: Install dependencies
if: matrix.python-version == 3.8
run: pip install -r requirements-dev3.8.txt
- name: Lint with flake8
if: matrix.python-version == 3.11
if: matrix.python-version == 3.12
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 deepdiff --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 deepdiff --count --exit-zero --max-complexity=26 --max-line-lengt=250 --statistics
- name: Test with pytest and get the coverage
if: matrix.python-version == 3.11
if: matrix.python-version == 3.12
run: |
pytest --cov-report=xml --cov=deepdiff tests/ --runslow
pytest --benchmark-disable --cov-report=xml --cov=deepdiff tests/ --runslow
- name: Test with pytest and no coverage report
if: matrix.python-version != 3.11
if: matrix.python-version != 3.12
run: |
pytest
pytest --benchmark-disable
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
if: matrix.python-version == 3.11
uses: codecov/codecov-action@v4
if: matrix.python-version == 3.12
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
file: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ authors:
given-names: "Sep"
orcid: "https://orcid.org/0009-0009-5828-4345"
title: "DeepDiff"
version: 7.0.1
version: 8.0.0
date-released: 2024
url: "https://github.com/seperman/deepdiff"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# DeepDiff v 7.0.1
# DeepDiff v 8.0.0

![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat)
![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat)
Expand All @@ -17,7 +17,7 @@

Tested on Python 3.8+ and PyPy3.

- **[Documentation](https://zepworks.com/deepdiff/7.0.1/)**
- **[Documentation](https://zepworks.com/deepdiff/8.0.0/)**

## What is new?

Expand Down
2 changes: 1 addition & 1 deletion deepdiff/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes."""
# flake8: noqa
__version__ = '7.0.1'
__version__ = '8.0.0'
import logging

if __name__ == '__main__':
Expand Down
5 changes: 2 additions & 3 deletions deepdiff/anyset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from ordered_set import OrderedSet
from deepdiff.deephash import DeepHash
from deepdiff.helper import dict_
from deepdiff.helper import dict_, SetOrdered


class AnySet:
Expand All @@ -11,7 +10,7 @@ class AnySet:
However one the AnySet object is deleted, all those traces will be gone too.
"""
def __init__(self, items=None):
self._set = OrderedSet()
self._set = SetOrdered()
self._hashes = dict_()
self._hash_to_objects = dict_()
if items:
Expand Down
9 changes: 4 additions & 5 deletions deepdiff/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from ordered_set import OrderedSet
from deepdiff.helper import strings, numbers
from deepdiff.helper import strings, numbers, SetOrdered


DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12
Expand Down Expand Up @@ -31,18 +30,18 @@ def get_ignore_types_in_groups(self, ignore_type_in_groups,

result = []
for item_group in ignore_type_in_groups:
new_item_group = OrderedSet()
new_item_group = SetOrdered()
for item in item_group:
item = type(item) if item is None or not isinstance(item, type) else item
new_item_group.add(item)
result.append(new_item_group)
ignore_type_in_groups = result

if ignore_string_type_changes and self.strings not in ignore_type_in_groups:
ignore_type_in_groups.append(OrderedSet(self.strings))
ignore_type_in_groups.append(SetOrdered(self.strings))

if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups:
ignore_type_in_groups.append(OrderedSet(self.numbers))
ignore_type_in_groups.append(SetOrdered(self.numbers))

if not ignore_type_subclasses:
# is_instance method needs tuples. When we look for subclasses, we need them to be tuples
Expand Down
1 change: 1 addition & 0 deletions deepdiff/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def cli():
@click.option('--log-frequency-in-sec', required=False, default=0, type=int, show_default=True)
@click.option('--max-passes', required=False, default=10000000, type=int, show_default=True)
@click.option('--max_diffs', required=False, default=None, type=int, show_default=True)
@click.option('--threshold-to-diff-deeper', required=False, default=0.33, type=float, show_default=False)
@click.option('--number-format-notation', required=False, type=click.Choice(['f', 'e'], case_sensitive=True), show_default=True, default="f")
@click.option('--progress-logger', required=False, type=click.Choice(['info', 'error'], case_sensitive=True), show_default=True, default="info")
@click.option('--report-repetition', is_flag=True, show_default=True)
Expand Down
43 changes: 36 additions & 7 deletions deepdiff/deephash.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@
number_to_string, datetime_normalize, KEY_TO_VAL_STR, short_repr,
get_truncate_datetime, dict_, add_root_to_paths)
from deepdiff.base import Base

try:
import pandas
except ImportError:
pandas = False

try:
import polars
except ImportError:
polars = False

logger = logging.getLogger(__name__)

UNPROCESSED_KEY = object()
Expand Down Expand Up @@ -139,6 +150,7 @@ def __init__(self,
ignore_numeric_type_changes=False,
ignore_type_subclasses=False,
ignore_string_case=False,
use_enum_value=False,
exclude_obj_callback=None,
number_to_string_func=None,
ignore_private_variables=True,
Expand All @@ -154,7 +166,7 @@ def __init__(self,
"exclude_paths, include_paths, exclude_regex_paths, hasher, ignore_repetition, "
"number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
"ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
"number_to_string_func, ignore_private_variables, parent "
"number_to_string_func, ignore_private_variables, parent, use_enum_value "
"encodings, ignore_encoding_errors") % ', '.join(kwargs.keys()))
if isinstance(hashes, MutableMapping):
self.hashes = hashes
Expand All @@ -170,6 +182,7 @@ def __init__(self,
self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
self.hasher = default_hasher if hasher is None else hasher
self.hashes[UNPROCESSED_KEY] = []
self.use_enum_value = use_enum_value

self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
self.truncate_datetime = get_truncate_datetime(truncate_datetime)
Expand Down Expand Up @@ -206,10 +219,10 @@ def __init__(self,
sha1hex = sha1hex

def __getitem__(self, obj, extract_index=0):
return self._getitem(self.hashes, obj, extract_index=extract_index)
return self._getitem(self.hashes, obj, extract_index=extract_index, use_enum_value=self.use_enum_value)

@staticmethod
def _getitem(hashes, obj, extract_index=0):
def _getitem(hashes, obj, extract_index=0, use_enum_value=False):
"""
extract_index is zero for hash and 1 for count and None to get them both.
To keep it backward compatible, we only get the hash by default so it is set to zero by default.
Expand All @@ -220,6 +233,8 @@ def _getitem(hashes, obj, extract_index=0):
key = BoolObj.TRUE
elif obj is False:
key = BoolObj.FALSE
elif use_enum_value and isinstance(obj, Enum):
key = obj.value

result_n_count = (None, 0)

Expand Down Expand Up @@ -256,14 +271,14 @@ def get(self, key, default=None, extract_index=0):
return self.get_key(self.hashes, key, default=default, extract_index=extract_index)

@staticmethod
def get_key(hashes, key, default=None, extract_index=0):
def get_key(hashes, key, default=None, extract_index=0, use_enum_value=False):
"""
get_key method for the hashes dictionary.
It can extract the hash for a given key that is already calculated when extract_index=0
or the count of items that went to building the object whenextract_index=1.
"""
try:
result = DeepHash._getitem(hashes, key, extract_index=extract_index)
result = DeepHash._getitem(hashes, key, extract_index=extract_index, use_enum_value=use_enum_value)
except KeyError:
result = default
return result
Expand Down Expand Up @@ -444,7 +459,6 @@ def _prep_path(self, obj):
type_ = obj.__class__.__name__
return KEY_TO_VAL_STR.format(type_, obj)


def _prep_number(self, obj):
type_ = "number" if self.ignore_numeric_type_changes else obj.__class__.__name__
if self.significant_digits is not None:
Expand Down Expand Up @@ -475,12 +489,14 @@ def _prep_tuple(self, obj, parent, parents_ids):
return result, counts

def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
"""The main diff method"""
"""The main hash method"""
counts = 1

if isinstance(obj, bool):
obj = self._prep_bool(obj)
result = None
elif self.use_enum_value and isinstance(obj, Enum):
obj = obj.value
else:
result = not_hashed
try:
Expand Down Expand Up @@ -523,6 +539,19 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
elif isinstance(obj, tuple):
result, counts = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids)

elif (pandas and isinstance(obj, pandas.DataFrame)):
def gen():
yield ('dtype', obj.dtypes)
yield ('index', obj.index)
yield from obj.items() # which contains (column name, series tuples)
result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)
elif (polars and isinstance(obj, polars.DataFrame)):
def gen():
yield from obj.columns
yield from list(obj.schema.items())
yield from obj.rows()
result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)

elif isinstance(obj, Iterable):
result, counts = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)

Expand Down
4 changes: 2 additions & 2 deletions deepdiff/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from functools import partial, cmp_to_key
from collections.abc import Mapping
from copy import deepcopy
from ordered_set import OrderedSet
from deepdiff import DeepDiff
from deepdiff.serialization import pickle_load, pickle_dump
from deepdiff.helper import (
Expand All @@ -14,6 +13,7 @@
Opcode, FlatDeltaRow, UnkownValueCode, FlatDataAction,
OPCODE_TAG_TO_FLAT_DATA_ACTION,
FLAT_DATA_ACTION_TO_OPCODE_TAG,
SetOrdered,
)
from deepdiff.path import (
_path_to_elements, _get_nested_obj, _get_nested_obj_and_force,
Expand Down Expand Up @@ -744,7 +744,7 @@ def _do_ignore_order(self):
"""
fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_())
remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_())
paths = OrderedSet(fixed_indexes.keys()) | OrderedSet(remove_indexes.keys())
paths = SetOrdered(fixed_indexes.keys()) | SetOrdered(remove_indexes.keys())
for path in paths:
# In the case of ignore_order reports, we are pointing to the container object.
# Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.
Expand Down
Loading

0 comments on commit a62abc1

Please sign in to comment.