Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests on more real datasets for some datasets #707

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions datumaro/util/dataset_mangling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (C) 2022 Intel Corporation
#
# SPDX-License-Identifier: MIT

import numpy as np

from datumaro.components.annotation import AnnotationType, Mask
from datumaro.components.dataset import Dataset
from datumaro.components.media import Image
from datumaro.plugins.sampler.random_sampler import RandomSampler


def dataset_mangling(dataset, count=-1, image_size=(3, 1, 3)):
if count > 0:
dataset = RandomSampler(dataset, count)
dataset = Dataset.from_extractors(dataset)

id = 1
for subset in dataset.subsets().values():
for item in subset:
item.id = str(id)
id += 1

item.media = Image(data=np.ones(image_size))

annotations = []

labels = [anno for anno in item.annotations if anno.type == AnnotationType.label]
for label in labels:
label.label = (label.label + np.random.randint(0, 10)) % len(
dataset.categories()[AnnotationType.label]
)

annotations += labels

bboxes = [anno for anno in item.annotations if anno.type == AnnotationType.bbox]
for bbox in bboxes:
x0 = bbox.points[0]
bbox.points[0] = np.random.uniform(max(x0 - x0 / 2, 0), x0 + x0 / 2)
y0 = bbox.points[1]
bbox.points[1] = np.random.uniform(max(y0 - y0 / 2, 0), y0 + y0 / 2)
x1 = bbox.points[2]
bbox.points[2] = np.random.uniform(max(x1 - x1 / 2, 0), x1 + x1 / 2)
y1 = bbox.points[3]
bbox.points[3] = np.random.uniform(max(y1 - y1 / 2, 0), y1 + y1 / 2)

annotations += bboxes

masks = [anno for anno in item.annotations if anno.type == AnnotationType.mask]
if masks:
mask_size = image_size[:2]

mask = np.random.randint(
0, len(dataset.categories()[AnnotationType.mask]), size=mask_size
)

segm_ids = np.unique(mask)
for segm_id in segm_ids:
annotations.append(Mask(image=lazy_extract_mask(mask, segm_id), label=segm_id))

item.annotations = annotations

return dataset


def lazy_extract_mask(mask, c):
return lambda: mask == c
65 changes: 56 additions & 9 deletions datumaro/util/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,23 @@ def __enter__(self) -> str:
return path


def compare_categories(test, expected, actual):
test.assertEqual(sorted(expected, key=lambda t: t.value), sorted(actual, key=lambda t: t.value))
def compare_categories(test, expected, actual, externally_comparison=False):
if not externally_comparison:
test.assertEqual(
sorted(expected, key=lambda t: t.value), sorted(actual, key=lambda t: t.value)
)

if AnnotationType.label in expected:
if AnnotationType.label in expected and AnnotationType.label in actual:
test.assertEqual(
expected[AnnotationType.label].items,
actual[AnnotationType.label].items,
)
if AnnotationType.mask in expected:
if AnnotationType.mask in expected and AnnotationType.mask in actual:
test.assertEqual(
expected[AnnotationType.mask].colormap,
actual[AnnotationType.mask].colormap,
)
if AnnotationType.points in expected:
if AnnotationType.points in expected and AnnotationType.points in actual:
test.assertEqual(
expected[AnnotationType.points].items,
actual[AnnotationType.points].items,
Expand Down Expand Up @@ -137,15 +140,49 @@ def _compare_annotations(expected, actual, ignored_attrs=None):
return r


def _compare_annotations_externally(expected, actual, ignored_attrs=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you reuse the regular comparison function?

a_attr = expected.attributes
b_attr = actual.attributes

if ignored_attrs != IGNORE_ALL:
expected.attributes = filter_dict(a_attr, exclude_keys=ignored_attrs)
actual.attributes = filter_dict(b_attr, exclude_keys=ignored_attrs)
elif ignored_attrs:
expected.attributes = {}
actual.attributes = {}

a_id = expected.id
b_id = actual.id
a_group = expected.group
b_group = actual.group

expected.id = 0
actual.id = 0
expected.group = 0
actual.group = 0

r = expected == actual

expected.attributes = a_attr
actual.attributes = b_attr
expected.id = a_id
actual.id = b_id
expected.group = a_group
actual.group = b_group

return r


def compare_datasets(
test,
expected: IDataset,
actual: IDataset,
ignored_attrs: Union[None, Literal["*"], Collection[str]] = None,
require_media: bool = False,
require_images: bool = False,
externally_comparison=False,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear what external means here.

):
compare_categories(test, expected.categories(), actual.categories())
compare_categories(test, expected.categories(), actual.categories(), externally_comparison)

test.assertTrue(issubclass(actual.media_type(), expected.media_type()))

Expand Down Expand Up @@ -187,9 +224,19 @@ def compare_datasets(
ann_b_matches = [x for x in item_b.annotations if x.type == ann_a.type]
test.assertFalse(len(ann_b_matches) == 0, "ann id: %s" % ann_a.id)

ann_b = find(
ann_b_matches, lambda x: _compare_annotations(x, ann_a, ignored_attrs=ignored_attrs)
)
if externally_comparison:
ann_b = find(
ann_b_matches,
lambda x: _compare_annotations_externally(
x, ann_a, ignored_attrs=ignored_attrs
),
)
else:
ann_b = find(
ann_b_matches,
lambda x: _compare_annotations(x, ann_a, ignored_attrs=ignored_attrs),
)
Comment on lines +227 to +238
Copy link
Contributor

@zhiltsov-max zhiltsov-max Apr 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cmp_anns = ...
...
cmp_anns(a, b, ...)

I suggest to use the strategy pattern here.


if ann_b is None:
test.fail("ann %s, candidates %s" % (ann_a, ann_b_matches))
item_b.annotations.remove(ann_b) # avoid repeats
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
9 changes: 9 additions & 0 deletions tests/assets/widerface_dataset/mangling_dataset/labels.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
face
Demonstration
Family_Group
Picnic
Baseball
Car_Racing
Group
Interview
Award_Ceremony
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
5--Car_Racing/1.jpg
3--Picnic/2.jpg
1--Demonstration/3.jpg
4--Baseball/4.jpg
7--Interview/5.jpg
8--Award_Ceremony/6.jpg
0--face/7.jpg
4--Baseball/8.jpg
0--face/9.jpg
Loading