-
Notifications
You must be signed in to change notification settings - Fork 1.8k
/
Copy pathgenerate_dataset_json.py
103 lines (86 loc) · 3.95 KB
/
generate_dataset_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from typing import Tuple
from batchgenerators.utilities.file_and_folder_operations import save_json, join
def generate_dataset_json(output_folder: str,
channel_names: dict,
labels: dict,
num_training_cases: int,
file_ending: str,
regions_class_order: Tuple[int, ...] = None,
dataset_name: str = None, reference: str = None, release: str = None, license: str = None,
description: str = None,
overwrite_image_reader_writer: str = None, **kwargs):
"""
Generates a dataset.json file in the output folder
channel_names:
Channel names must map the index to the name of the channel, example:
{
0: 'T1',
1: 'CT'
}
Note that the channel names may influence the normalization scheme!! Learn more in the documentation.
labels:
This will tell nnU-Net what labels to expect. Important: This will also determine whether you use region-based training or not.
Example regular labels:
{
'background': 0,
'left atrium': 1,
'some other label': 2
}
Example region-based training:
{
'background': 0,
'whole tumor': (1, 2, 3),
'tumor core': (2, 3),
'enhancing tumor': 3
}
Remember that nnU-Net expects consecutive values for labels! nnU-Net also expects 0 to be background!
num_training_cases: is used to double check all cases are there!
file_ending: needed for finding the files correctly. IMPORTANT! File endings must match between images and
segmentations!
dataset_name, reference, release, license, description: self-explanatory and not used by nnU-Net. Just for
completeness and as a reminder that these would be great!
overwrite_image_reader_writer: If you need a special IO class for your dataset you can derive it from
BaseReaderWriter, place it into nnunet.imageio and reference it here by name
kwargs: whatever you put here will be placed in the dataset.json as well
"""
has_regions: bool = any([isinstance(i, (tuple, list)) and len(i) > 1 for i in labels.values()])
if has_regions:
assert regions_class_order is not None, f"You have defined regions but regions_class_order is not set. " \
f"You need that."
# channel names need strings as keys
keys = list(channel_names.keys())
for k in keys:
if not isinstance(k, str):
channel_names[str(k)] = channel_names[k]
del channel_names[k]
# labels need ints as values
for l in labels.keys():
value = labels[l]
if isinstance(value, (tuple, list)):
value = tuple([int(i) for i in value])
labels[l] = value
else:
labels[l] = int(labels[l])
dataset_json = {
'channel_names': channel_names, # previously this was called 'modality'. I didn't like this so this is
# channel_names now. Live with it.
'labels': labels,
'numTraining': num_training_cases,
'file_ending': file_ending,
}
if dataset_name is not None:
dataset_json['name'] = dataset_name
if reference is not None:
dataset_json['reference'] = reference
if release is not None:
dataset_json['release'] = release
if license is not None:
dataset_json['licence'] = license
if description is not None:
dataset_json['description'] = description
if overwrite_image_reader_writer is not None:
dataset_json['overwrite_image_reader_writer'] = overwrite_image_reader_writer
if regions_class_order is not None:
dataset_json['regions_class_order'] = regions_class_order
dataset_json.update(kwargs)
save_json(dataset_json, join(output_folder, 'dataset.json'), sort_keys=False)