Release of v2.24.0

open-mmlab · Apr 26, 2022 · 1376e77 · 1376e77
2 parents c72bc70 + 7d1c097
commit 1376e77
Show file tree

Hide file tree

Showing 144 changed files with 3,651 additions and 376 deletions.
diff --git a/.dev_scripts/gather_models.py b/.dev_scripts/gather_models.py
@@ -48,44 +48,85 @@ def process_checkpoint(in_file, out_file):
     return final_file
 
 
-def get_final_epoch(config):
+def is_by_epoch(config):
     cfg = mmcv.Config.fromfile('./configs/' + config)
-    return cfg.runner.max_epochs
+    return cfg.runner.type == 'EpochBasedRunner'
 
 
-def get_best_epoch(exp_dir):
-    best_epoch_full_path = list(
+def get_final_epoch_or_iter(config):
+    cfg = mmcv.Config.fromfile('./configs/' + config)
+    if cfg.runner.type == 'EpochBasedRunner':
+        return cfg.runner.max_epochs
+    else:
+        return cfg.runner.max_iters
+
+
+def get_best_epoch_or_iter(exp_dir):
+    best_epoch_iter_full_path = list(
         sorted(glob.glob(osp.join(exp_dir, 'best_*.pth'))))[-1]
-    best_epoch_model_path = best_epoch_full_path.split('/')[-1]
-    best_epoch = best_epoch_model_path.split('_')[-1].split('.')[0]
-    return best_epoch_model_path, int(best_epoch)
+    best_epoch_or_iter_model_path = best_epoch_iter_full_path.split('/')[-1]
+    best_epoch_or_iter = best_epoch_or_iter_model_path.\
+        split('_')[-1].split('.')[0]
+    return best_epoch_or_iter_model_path, int(best_epoch_or_iter)
 
 
-def get_real_epoch(config):
+def get_real_epoch_or_iter(config):
     cfg = mmcv.Config.fromfile('./configs/' + config)
-    epoch = cfg.runner.max_epochs
-    if cfg.data.train.type == 'RepeatDataset':
-        epoch *= cfg.data.train.times
-    return epoch
+    if cfg.runner.type == 'EpochBasedRunner':
+        epoch = cfg.runner.max_epochs
+        if cfg.data.train.type == 'RepeatDataset':
+            epoch *= cfg.data.train.times
+        return epoch
+    else:
+        return cfg.runner.max_iters
 
 
-def get_final_results(log_json_path, epoch, results_lut):
+def get_final_results(log_json_path,
+                      epoch_or_iter,
+                      results_lut,
+                      by_epoch=True):
     result_dict = dict()
+    last_val_line = None
+    last_train_line = None
+    last_val_line_idx = -1
+    last_train_line_idx = -1
     with open(log_json_path, 'r') as f:
-        for line in f.readlines():
+        for i, line in enumerate(f.readlines()):
             log_line = json.loads(line)
             if 'mode' not in log_line.keys():
                 continue
 
-            if log_line['mode'] == 'train' and log_line['epoch'] == epoch:
-                result_dict['memory'] = log_line['memory']
-
-            if log_line['mode'] == 'val' and log_line['epoch'] == epoch:
-                result_dict.update({
-                    key: log_line[key]
-                    for key in results_lut if key in log_line
-                })
-                return result_dict
+            if by_epoch:
+                if (log_line['mode'] == 'train'
+                        and log_line['epoch'] == epoch_or_iter):
+                    result_dict['memory'] = log_line['memory']
+
+                if (log_line['mode'] == 'val'
+                        and log_line['epoch'] == epoch_or_iter):
+                    result_dict.update({
+                        key: log_line[key]
+                        for key in results_lut if key in log_line
+                    })
+                    return result_dict
+            else:
+                if log_line['mode'] == 'train':
+                    last_train_line_idx = i
+                    last_train_line = log_line
+
+                if log_line and log_line['mode'] == 'val':
+                    last_val_line_idx = i
+                    last_val_line = log_line
+
+    # bug: max_iters = 768, last_train_line['iter'] = 750
+    assert last_val_line_idx == last_train_line_idx + 1, \
+        'Log file is incomplete'
+    result_dict['memory'] = last_train_line['memory']
+    result_dict.update({
+        key: last_val_line[key]
+        for key in results_lut if key in last_val_line
+    })
+
+    return result_dict
 
 
 def get_dataset_name(config):
@@ -116,10 +157,12 @@ def convert_model_info_to_pwc(model_infos):
 
         # get metadata
         memory = round(model['results']['memory'] / 1024, 1)
-        epochs = get_real_epoch(model['config'])
         meta_data = OrderedDict()
         meta_data['Training Memory (GB)'] = memory
-        meta_data['Epochs'] = epochs
+        if 'epochs' in model:
+            meta_data['Epochs'] = get_real_epoch_or_iter(model['config'])
+        else:
+            meta_data['Iterations'] = get_real_epoch_or_iter(model['config'])
         pwc_model_info['Metadata'] = meta_data
 
         # get dataset name
@@ -200,12 +243,14 @@ def main():
     model_infos = []
     for used_config in used_configs:
         exp_dir = osp.join(models_root, used_config)
+        by_epoch = is_by_epoch(used_config)
         # check whether the exps is finished
         if args.best is True:
-            final_model, final_epoch = get_best_epoch(exp_dir)
+            final_model, final_epoch_or_iter = get_best_epoch_or_iter(exp_dir)
         else:
-            final_epoch = get_final_epoch(used_config)
-            final_model = 'epoch_{}.pth'.format(final_epoch)
+            final_epoch_or_iter = get_final_epoch_or_iter(used_config)
+            final_model = '{}_{}.pth'.format('epoch' if by_epoch else 'iter',
+                                             final_epoch_or_iter)
 
         model_path = osp.join(exp_dir, final_model)
         # skip if the model is still training
@@ -225,21 +270,23 @@ def main():
         for i, key in enumerate(results_lut):
             if 'mAP' not in key and 'PQ' not in key:
                 results_lut[i] = key + 'm_AP'
-        model_performance = get_final_results(log_json_path, final_epoch,
-                                              results_lut)
+        model_performance = get_final_results(log_json_path,
+                                              final_epoch_or_iter, results_lut,
+                                              by_epoch)
 
         if model_performance is None:
             continue
 
         model_time = osp.split(log_txt_path)[-1].split('.')[0]
-        model_infos.append(
-            dict(
-                config=used_config,
-                results=model_performance,
-                epochs=final_epoch,
-                model_time=model_time,
-                final_model=final_model,
-                log_json_path=osp.split(log_json_path)[-1]))
+        model_info = dict(
+            config=used_config,
+            results=model_performance,
+            model_time=model_time,
+            final_model=final_model,
+            log_json_path=osp.split(log_json_path)[-1])
+        model_info['epochs' if by_epoch else 'iterations'] =\
+            final_epoch_or_iter
+        model_infos.append(model_info)
 
     # publish model for each checkpoint
     publish_model_infos = []

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -40,7 +40,7 @@ repos:
       - id: docformatter
         args: ["--in-place", "--wrap-descriptions", "79"]
   - repo: https://github.com/open-mmlab/pre-commit-hooks
-    rev: master  # Use the ref you want to point at
+    rev: v0.2.0  # Use the ref you want to point at
     hooks:
       - id: check-algo-readme
       - id: check-copyright

diff --git a/README.md b/README.md
@@ -74,11 +74,11 @@ This project is released under the [Apache 2.0 license](LICENSE).
 
 ## Changelog
 
-**2.23.0** was released in 28/3/2022:
+**2.24.0** was released in 26/4/2022:
 
-- Support [Mask2Former](configs/mask2former) and [EfficientNet](configs/efficientnet)
-- Support setting data root through environment variable `MMDET_DATASETS`, users don't have to modify the corresponding path in config files anymore.
-- Find a good recipe for fine-tuning high precision ResNet backbone pre-trained by Torchvision.
+- Support [Simple Copy Paste](configs/simple_copy_paste)
+- Support automatically scaling LR according to GPU number and samples per GPU
+- Support Class Aware Sampler that improves performance on OpenImages Dataset
 
 Please refer to [changelog.md](docs/en/changelog.md) for details and release history.
 

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -73,11 +73,11 @@ MMDetection 是一个基于 PyTorch 的目标检测开源工具箱。它是 [Ope
 
 ## 更新日志
 
-最新的 **2.23.0** 版本已经在 2022.03.28 发布:
+最新的 **2.24.0** 版本已经在 2022.03.28 发布:
 
-- 支持 [Mask2Former](configs/mask2former) 和 [Efficientnet](configs/efficientnet)
-- 支持通环境变量 `MMDET_DATASETS` 设置数据根目录，因此无需修改配置文件中对应的路径。
-- 发现一个很好的方法来微调由 Torchvision 预训练的高精度 ResNet 主干。
+- 支持算法 [Simple Copy Paste](configs/simple_copy_paste)
+- 支持训练时根据总 batch 数自动缩放学习率
+- 支持类别可知的采样器来提高算法在 OpenImages 数据集上的性能
 
 如果想了解更多版本更新细节和历史信息，请阅读[更新日志](docs/en/changelog.md)。
 

diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py
@@ -19,3 +19,9 @@
 opencv_num_threads = 0
 # set multi-process start method as `fork` to speed up the training
 mp_start_method = 'fork'
+
+# Default setting for scaling LR automatically
+#   - `enable` means enable scaling LR automatically
+#       or not by default.
+#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
+auto_scale_lr = dict(enable=False, base_batch_size=16)
diff --git a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py
@@ -42,7 +42,10 @@
             dilation=1,
             style='caffe',
             norm_cfg=norm_cfg,
-            norm_eval=True),
+            norm_eval=True,
+            init_cfg=dict(
+                type='Pretrained',
+                checkpoint='open-mmlab://detectron2/resnet50_caffe')),
         bbox_roi_extractor=dict(
             type='SingleRoIExtractor',
             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
@@ -78,7 +81,7 @@
                 pos_fraction=0.5,
                 neg_pos_ub=-1,
                 add_gt_as_proposals=False),
-            allowed_border=0,
+            allowed_border=-1,
             pos_weight=-1,
             debug=False),
         rpn_proposal=dict(

diff --git a/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py b/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py
@@ -120,3 +120,8 @@
     warmup_ratio=1.0 / 1000,
     step=[18, 24])  # the real step is [18*5, 24*5]
 runner = dict(max_epochs=28)  # the real epoch is 28*5=140
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (16 samples per GPU)
+auto_scale_lr = dict(base_batch_size=128)
diff --git a/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py b/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py
@@ -103,3 +103,8 @@
     warmup_ratio=1.0 / 3,
     step=[190])
 runner = dict(type='EpochBasedRunner', max_epochs=210)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (16 GPUs) x (6 samples per GPU)
+auto_scale_lr = dict(base_batch_size=96)
diff --git a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py
@@ -37,3 +37,8 @@
 log_config = dict(interval=100)
 # For better, more stable performance initialize from COCO
 load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (1 samples per GPU)
+auto_scale_lr = dict(base_batch_size=8)
diff --git a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py
@@ -44,3 +44,8 @@
 log_config = dict(interval=100)
 # For better, more stable performance initialize from COCO
 load_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (1 samples per GPU)
+auto_scale_lr = dict(base_batch_size=8)
diff --git a/configs/common/ssj_270k_coco_instance.py b/configs/common/ssj_270k_coco_instance.py
@@ -0,0 +1,91 @@
+_base_ = '../_base_/default_runtime.py'
+# dataset settings
+dataset_type = 'CocoDataset'
+data_root = 'data/coco/'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+image_size = (1024, 1024)
+
+file_client_args = dict(backend='disk')
+
+# Standard Scale Jittering (SSJ) resizes and crops an image
+# with a resize range of 0.8 to 1.25 of the original image size.
+train_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='Resize',
+        img_scale=image_size,
+        ratio_range=(0.8, 1.25),
+        multiscale_mode='range',
+        keep_ratio=True),
+    dict(
+        type='RandomCrop',
+        crop_type='absolute_range',
+        crop_size=image_size,
+        recompute_bbox=True,
+        allow_negative_crop=True),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='Pad', size=image_size),  # padding to image_size leads 0.5+ mAP
+    dict(type='DefaultFormatBundle'),
+    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+            dict(type='Normalize', **img_norm_cfg),
+            dict(type='Pad', size_divisor=32),
+            dict(type='ImageToTensor', keys=['img']),
+            dict(type='Collect', keys=['img']),
+        ])
+]
+
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_train2017.json',
+        img_prefix=data_root + 'train2017/',
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/instances_val2017.json',
+        img_prefix=data_root + 'val2017/',
+        pipeline=test_pipeline))
+
+evaluation = dict(interval=6000, metric=['bbox', 'segm'])
+
+# optimizer assumes batch_size = (32 GPUs) x (2 samples per GPU)
+optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00004)
+optimizer_config = dict(grad_clip=None)
+
+# lr steps at [0.9, 0.95, 0.975] of the maximum iterations
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=1000,
+    warmup_ratio=0.001,
+    step=[243000, 256500, 263250])
+checkpoint_config = dict(interval=6000)
+# The model is trained by 270k iterations with batch_size 64,
+# which is roughly equivalent to 144 epochs.
+runner = dict(type='IterBasedRunner', max_iters=270000)
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (32 GPUs) x (2 samples per GPU)
+auto_scale_lr = dict(base_batch_size=64)