From 242222ed8402d9ddc3dd45d6ef5c8430f7953d1c Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Sat, 9 Sep 2017 00:09:17 -0700 Subject: [PATCH 01/12] fix concat link --- symbol/symbol_darknet19_yolo.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/symbol/symbol_darknet19_yolo.py b/symbol/symbol_darknet19_yolo.py index 5878632..1f5804e 100644 --- a/symbol/symbol_darknet19_yolo.py +++ b/symbol/symbol_darknet19_yolo.py @@ -9,7 +9,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): bone = get_darknet19(num_classes=num_classes, **kwargs) - conv5_5 = bone.get_internals()["conv5_5_output"] + conv5_1 = bone.get_internals()["conv5_1_output"] conv6_5 = bone.get_internals()["conv6_5_output"] # anchors anchors = [ @@ -21,14 +21,16 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): num_anchor = len(anchors) // 2 # extra layers + conv5_6 = conv_act_layer(conv5_1, 'conv5_6', 1024, kernel=(3, 3), pad=(1, 1), + act_type='leaky') conv7_1 = conv_act_layer(conv6_5, 'conv7_1', 1024, kernel=(3, 3), pad=(1, 1), act_type='leaky') conv7_2 = conv_act_layer(conv7_1, 'conv7_2', 1024, kernel=(3, 3), pad=(1, 1), act_type='leaky') - # re-organze conv5_5 and concat conv7_2 - conv5_6 = mx.sym.stack_neighbor(data=conv5_5, kernel=(2, 2), name='stack_downsample') - concat = mx.sym.Concat(*[conv5_6, conv7_2], dim=1) + # re-organze conv5_6 and concat conv7_2 + conv5_7 = mx.sym.stack_neighbor(data=conv5_6, kernel=(2, 2), name='stack_downsample') + concat = mx.sym.Concat(*[conv5_7, conv7_2], dim=1) # concat = conv7_2 conv8_1 = conv_act_layer(concat, 'conv8_1', 1024, kernel=(3, 3), pad=(1, 1), act_type='leaky') From e0c0a0f3d0158cb4b3de5fe0b4044b3c05cfddb9 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Mon, 18 Sep 2017 11:08:05 -0700 Subject: [PATCH 02/12] fix activation --- symbol/symbol_darknet19_yolo.py | 6 +++--- train.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/symbol/symbol_darknet19_yolo.py b/symbol/symbol_darknet19_yolo.py index 1f5804e..d921bdb 100644 --- a/symbol/symbol_darknet19_yolo.py +++ b/symbol/symbol_darknet19_yolo.py @@ -9,8 +9,8 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): bone = get_darknet19(num_classes=num_classes, **kwargs) - conv5_1 = bone.get_internals()["conv5_1_output"] - conv6_5 = bone.get_internals()["conv6_5_output"] + conv5_5 = bone.get_internals()["leaky_conv5_5_output"] + conv6_5 = bone.get_internals()["leaky_conv6_5_output"] # anchors anchors = [ 1.3221, 1.73145, @@ -21,7 +21,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): num_anchor = len(anchors) // 2 # extra layers - conv5_6 = conv_act_layer(conv5_1, 'conv5_6', 1024, kernel=(3, 3), pad=(1, 1), + conv5_6 = conv_act_layer(conv5_5, 'conv5_6', 1024, kernel=(3, 3), pad=(1, 1), act_type='leaky') conv7_1 = conv_act_layer(conv6_5, 'conv7_1', 1024, kernel=(3, 3), pad=(1, 1), act_type='leaky') diff --git a/train.py b/train.py index b7d3cf4..3c31589 100644 --- a/train.py +++ b/train.py @@ -49,7 +49,7 @@ def parse_args(): default=512, help='maximum random data shape') parser.add_argument('--label-width', dest='label_width', type=int, default=350, help='force padding label width to sync across train and validation') - parser.add_argument('--lr', dest='learning_rate', type=float, default=0.0001, + parser.add_argument('--lr', dest='learning_rate', type=float, default=0.001, help='learning rate') parser.add_argument('--momentum', dest='momentum', type=float, default=0.9, help='momentum') @@ -61,9 +61,9 @@ def parse_args(): help='green mean value') parser.add_argument('--mean-b', dest='mean_b', type=float, default=103.939, help='blue mean value') - parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='150, 200', + parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='90, 180', help='refactor learning rate at specified epochs') - parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=str, default=1, + parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=str, default=.1, help='ratio to refactor learning rate') parser.add_argument('--freeze', dest='freeze_pattern', type=str, default="^(conv1_|conv2_).*", help='freeze layer pattern') From 8436202735fdca0d105b8509722934401b1340ba Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Mon, 18 Sep 2017 11:13:37 -0700 Subject: [PATCH 03/12] update hyper params --- train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index 3c31589..396b118 100644 --- a/train.py +++ b/train.py @@ -44,9 +44,9 @@ def parse_args(): parser.add_argument('--random-shape-epoch', dest='random_shape_epoch', type=int, default=10, help='random shape epoch') parser.add_argument('--min-random-shape', dest='min_random_shape', type=int, - default=352, help='minimum random data shape') + default=320, help='minimum random data shape') parser.add_argument('--max-random-shape', dest='max_random_shape', type=int, - default=512, help='maximum random data shape') + default=608, help='maximum random data shape') parser.add_argument('--label-width', dest='label_width', type=int, default=350, help='force padding label width to sync across train and validation') parser.add_argument('--lr', dest='learning_rate', type=float, default=0.001, @@ -65,7 +65,7 @@ def parse_args(): help='refactor learning rate at specified epochs') parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=str, default=.1, help='ratio to refactor learning rate') - parser.add_argument('--freeze', dest='freeze_pattern', type=str, default="^(conv1_|conv2_).*", + parser.add_argument('--freeze', dest='freeze_pattern', type=str, default="", help='freeze layer pattern') parser.add_argument('--log', dest='log_file', type=str, default="train.log", help='save training log to file') From 61368b76e8ba17dc7a186fd3194090b80d5203d2 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Mon, 18 Sep 2017 11:34:06 -0700 Subject: [PATCH 04/12] rescale grad --- train/train_net.py | Bin 11584 -> 11597 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/train/train_net.py b/train/train_net.py index ade2825b4ca16f6cacb40043251e020f3336a0a0..b7a246b721614602059a8461fd1eb4db16c81ddb 100644 GIT binary patch delta 25 gcmX>Qbv9~4oEDG1LQ-N$az=b{W>xCuSgprQ0ESNr5C8xG delta 12 TcmX>bbs%a(oYv-Stw&4%D6Ivi From 83f831f5eefbc6d03e74af7c8e9baada243bbcea Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Wed, 20 Sep 2017 21:48:24 -0700 Subject: [PATCH 05/12] replace stack_neighbor with reshape + transpose --- demo.py | 2 +- symbol/symbol_darknet19_yolo.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/demo.py b/demo.py index 75b0b18..f510c75 100644 --- a/demo.py +++ b/demo.py @@ -36,7 +36,7 @@ def get_detector(net, prefix, epoch, data_shape, mean_pixels, ctx, """ sys.path.append(os.path.join(os.getcwd(), 'symbol')) if net is not None: - prefix = prefix + "_" + net.strip('_yolo') + '_' + str(416) + prefix = prefix + "_" + net.strip('_yolo') + '_' + str(data_shape) net = importlib.import_module("symbol_" + net) \ .get_symbol(len(CLASSES), nms_thresh, force_nms) detector = Detector(net, prefix, epoch, \ diff --git a/symbol/symbol_darknet19_yolo.py b/symbol/symbol_darknet19_yolo.py index d921bdb..c33c299 100644 --- a/symbol/symbol_darknet19_yolo.py +++ b/symbol/symbol_darknet19_yolo.py @@ -29,7 +29,11 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): act_type='leaky') # re-organze conv5_6 and concat conv7_2 - conv5_7 = mx.sym.stack_neighbor(data=conv5_6, kernel=(2, 2), name='stack_downsample') + # conv5_7 = mx.sym.stack_neighbor(data=conv5_6, kernel=(2, 2), name='stack_downsample') + conv5_7 = mx.sym.reshape(conv5_6, shape=(0, 0, -4, -1, 2, -4, -1, 2)) # (b, c, h/2, 2, w/2, 2) + conv5_7 = mx.sym.transpose(conv5_7, axes=(0, 1, 3, 5, 2, 4)) # (b, c, 2, 2, h/2, w/2) + conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -2, 0, 0, 0)) # (b, c * 2, 2, h/2, w/2) + conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -2, 0, 0)) # (b, c * 4, h/2, w/2) concat = mx.sym.Concat(*[conv5_7, conv7_2], dim=1) # concat = conv7_2 conv8_1 = conv_act_layer(concat, 'conv8_1', 1024, kernel=(3, 3), pad=(1, 1), From 3430276ce809c4aa6e745f2b9b612ad34c5f20f5 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Wed, 20 Sep 2017 22:12:44 -0700 Subject: [PATCH 06/12] fix --- symbol/symbol_darknet19_yolo.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/symbol/symbol_darknet19_yolo.py b/symbol/symbol_darknet19_yolo.py index c33c299..91dc20e 100644 --- a/symbol/symbol_darknet19_yolo.py +++ b/symbol/symbol_darknet19_yolo.py @@ -30,10 +30,12 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): # re-organze conv5_6 and concat conv7_2 # conv5_7 = mx.sym.stack_neighbor(data=conv5_6, kernel=(2, 2), name='stack_downsample') - conv5_7 = mx.sym.reshape(conv5_6, shape=(0, 0, -4, -1, 2, -4, -1, 2)) # (b, c, h/2, 2, w/2, 2) - conv5_7 = mx.sym.transpose(conv5_7, axes=(0, 1, 3, 5, 2, 4)) # (b, c, 2, 2, h/2, w/2) - conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -2, 0, 0, 0)) # (b, c * 2, 2, h/2, w/2) - conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -2, 0, 0)) # (b, c * 4, h/2, w/2) + conv5_7 = mx.sym.reshape(conv5_6, shape=(0, 0, -4, -1, 2, 0)) # (b, c, h/2, 2, w) + conv5_7 = mx.sym.transpose(conv5_7, axes=(0, 1, 3, 2, 4)) # (b, c, 2, h/2, w) + conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -2, 0, 0)) # (b, c * 2, h/2, w) + conv5_7 = mx.sym.reshape(conv5_7, shape=(0, 0, 0, -4, -1, 2)) # (b, c * 2, h/2, w/2, 2) + conv5_7 = mx.sym.transpose(conv5_7, axes=(0, 1, 4, 2, 3)) # (b, c*2, 2, h/2, w/2) + conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -2, 0, 0)) # (b, c*4, h/2, w/2) concat = mx.sym.Concat(*[conv5_7, conv7_2], dim=1) # concat = conv7_2 conv8_1 = conv_act_layer(concat, 'conv8_1', 1024, kernel=(3, 3), pad=(1, 1), From 29c82bd8db520878e1f9a949321007ce31d83ef2 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Wed, 20 Sep 2017 22:19:45 -0700 Subject: [PATCH 07/12] fix reshape --- symbol/symbol_darknet19_yolo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/symbol/symbol_darknet19_yolo.py b/symbol/symbol_darknet19_yolo.py index 91dc20e..45c1b1e 100644 --- a/symbol/symbol_darknet19_yolo.py +++ b/symbol/symbol_darknet19_yolo.py @@ -30,12 +30,12 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): # re-organze conv5_6 and concat conv7_2 # conv5_7 = mx.sym.stack_neighbor(data=conv5_6, kernel=(2, 2), name='stack_downsample') - conv5_7 = mx.sym.reshape(conv5_6, shape=(0, 0, -4, -1, 2, 0)) # (b, c, h/2, 2, w) + conv5_7 = mx.sym.reshape(conv5_6, shape=(0, 0, -4, -1, 2, -2)) # (b, c, h/2, 2, w) conv5_7 = mx.sym.transpose(conv5_7, axes=(0, 1, 3, 2, 4)) # (b, c, 2, h/2, w) - conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -2, 0, 0)) # (b, c * 2, h/2, w) + conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -3, -1, -2)) # (b, c * 2, h/2, w) conv5_7 = mx.sym.reshape(conv5_7, shape=(0, 0, 0, -4, -1, 2)) # (b, c * 2, h/2, w/2, 2) conv5_7 = mx.sym.transpose(conv5_7, axes=(0, 1, 4, 2, 3)) # (b, c*2, 2, h/2, w/2) - conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -2, 0, 0)) # (b, c*4, h/2, w/2) + conv5_7 = mx.sym.reshape(conv5_7, shape=(0, -3, -1, -2)) # (b, c*4, h/2, w/2) concat = mx.sym.Concat(*[conv5_7, conv7_2], dim=1) # concat = conv7_2 conv8_1 = conv_act_layer(concat, 'conv8_1', 1024, kernel=(3, 3), pad=(1, 1), From 8241a2eec56625d9d01051c000db291a89fc3b54 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Wed, 20 Sep 2017 22:34:31 -0700 Subject: [PATCH 08/12] fix lr-factor to float --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 396b118..08e5504 100644 --- a/train.py +++ b/train.py @@ -63,7 +63,7 @@ def parse_args(): help='blue mean value') parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='90, 180', help='refactor learning rate at specified epochs') - parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=str, default=.1, + parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=float, default=.1, help='ratio to refactor learning rate') parser.add_argument('--freeze', dest='freeze_pattern', type=str, default="", help='freeze layer pattern') From bd0503554d46e587870d905e877a957124e84722 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Fri, 22 Sep 2017 11:00:50 -0700 Subject: [PATCH 09/12] add small yolo --- symbol/symbol_darknet_syolo.py | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 symbol/symbol_darknet_syolo.py diff --git a/symbol/symbol_darknet_syolo.py b/symbol/symbol_darknet_syolo.py new file mode 100644 index 0000000..71742dc --- /dev/null +++ b/symbol/symbol_darknet_syolo.py @@ -0,0 +1,37 @@ +""" +Reference: +Redmon, Joseph, and Ali Farhadi. "YOLO9000: Better, Faster, Stronger." +"https://arxiv.org/pdf/1612.08242.pdf" +""" +import mxnet as mx +from symbol_darknet19 import get_symbol as get_darknet19 +from symbol_darknet19 import conv_act_layer + +def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): + bone = get_darknet19(num_classes=num_classes, **kwargs) + conv5_5 = bone.get_internals()["leaky_conv5_5_output"] + conv6_5 = bone.get_internals()["leaky_conv6_5_output"] + # anchors + anchors = [ + 1.3221, 1.73145, + 3.19275, 4.00944, + 5.05587, 8.09892, + 9.47112, 4.84053, + 11.2364, 10.0071] + num_anchor = len(anchors) // 2 + + # extra layers + conv7_1 = conv_act_layer(conv6_5, 'conv7_1', 1024, kernel=(3, 3), pad=(1, 1), + act_type='leaky') + conv7_2 = conv_act_layer(conv7_1, 'conv7_2', 1024, kernel=(3, 3), pad=(1, 1), + act_type='leaky') + conv8_1 = conv_act_layer(conv7_2, 'conv8_1', 1024, kernel=(3, 3), pad=(1, 1), + act_type='leaky') + pred = mx.symbol.Convolution(data=conv8_1, name='conv_pred', kernel=(1, 1), + num_filter=num_anchor * (num_classes + 4 + 1)) + + out = mx.contrib.symbol.YoloOutput(data=pred, num_class=num_classes, + num_anchor=num_anchor, object_grad_scale=5.0, background_grad_scale=1.0, + coord_grad_scale=1.0, class_grad_scale=1.0, anchors=anchors, + nms_topk=400, warmup_samples=12800, name='yolo_output') + return out From 554b2ae3b64cb4a14dfdb05b7da57332118103f2 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Wed, 24 Jan 2018 13:23:10 -0600 Subject: [PATCH 10/12] update mxnet and python compatiblility --- config/config.py | 2 +- dataset/concat_db.py | 2 +- dataset/pascal_voc.py | 2 +- dataset/testdb.py | 2 +- dataset/yolo_format.py | 2 +- mxnet | 2 +- symbol/common.py | 156 ++----------------------------- symbol/symbol_darknet19_lyolo.py | 4 +- symbol/symbol_darknet19_yolo.py | 4 +- symbol/symbol_darknet_syolo.py | 4 +- symbol/symbol_resnet50_yolo.py | 6 +- 11 files changed, 25 insertions(+), 161 deletions(-) diff --git a/config/config.py b/config/config.py index 0522e64..7328cdf 100644 --- a/config/config.py +++ b/config/config.py @@ -1,5 +1,5 @@ import os -from utils import DotDict, namedtuple_with_defaults, zip_namedtuple, config_as_dict +from config.utils import DotDict, namedtuple_with_defaults, zip_namedtuple, config_as_dict RandCropper = namedtuple_with_defaults('RandCropper', 'min_crop_scales, max_crop_scales, \ diff --git a/dataset/concat_db.py b/dataset/concat_db.py index da9e151..4c1d92a 100644 --- a/dataset/concat_db.py +++ b/dataset/concat_db.py @@ -1,4 +1,4 @@ -from imdb import Imdb +from dataset.imdb import Imdb import random class ConcatDB(Imdb): diff --git a/dataset/pascal_voc.py b/dataset/pascal_voc.py index 2c61be7..9f6ba7a 100644 --- a/dataset/pascal_voc.py +++ b/dataset/pascal_voc.py @@ -1,7 +1,7 @@ from __future__ import print_function import os import numpy as np -from imdb import Imdb +from dataset.imdb import Imdb import xml.etree.ElementTree as ET from evaluate.eval_voc import voc_eval import cv2 diff --git a/dataset/testdb.py b/dataset/testdb.py index 7477d77..a7e1735 100644 --- a/dataset/testdb.py +++ b/dataset/testdb.py @@ -1,5 +1,5 @@ import os -from imdb import Imdb +from dataset.imdb import Imdb class TestDB(Imdb): diff --git a/dataset/yolo_format.py b/dataset/yolo_format.py index e82e5ca..f5dd821 100644 --- a/dataset/yolo_format.py +++ b/dataset/yolo_format.py @@ -1,6 +1,6 @@ import os import numpy as np -from imdb import Imdb +from dataset.imdb import Imdb class YoloFormat(Imdb): diff --git a/mxnet b/mxnet index 37455ca..27598c9 160000 --- a/mxnet +++ b/mxnet @@ -1 +1 @@ -Subproject commit 37455ca2b2c19ff88b3d1a3075a02a4f49db9dd6 +Subproject commit 27598c951ae9f0455baae9092a08dca5f001aae6 diff --git a/symbol/common.py b/symbol/common.py index 9c97eee..a8e80a1 100644 --- a/symbol/common.py +++ b/symbol/common.py @@ -37,150 +37,12 @@ def conv_act_layer(from_layer, name, num_filter, kernel=(1,1), pad=(0,0), \ name="{}{}".format(act_type, name)) return relu -def multibox_layer(from_layers, num_classes, sizes=[.2, .95], - ratios=[1], normalization=-1, num_channels=[], - clip=True, interm_layer=0, steps=[]): - """ - the basic aggregation module for SSD detection. Takes in multiple layers, - generate multiple object detection targets by customized layers - - Parameters: - ---------- - from_layers : list of mx.symbol - generate multibox detection from layers - num_classes : int - number of classes excluding background, will automatically handle - background in this function - sizes : list or list of list - [min_size, max_size] for all layers or [[], [], []...] for specific layers - ratios : list or list of list - [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers - normalizations : int or list of int - use normalizations value for all layers or [...] for specific layers, - -1 indicate no normalizations and scales - num_channels : list of int - number of input layer channels, used when normalization is enabled, the - length of list should equals to number of normalization layers - clip : bool - whether to clip out-of-image boxes - interm_layer : int - if > 0, will add a intermediate Convolution layer - steps : list - specify steps for each MultiBoxPrior layer, leave empty, it will calculate - according to layer dimensions - - Returns: - ---------- - list of outputs, as [loc_preds, cls_preds, anchor_boxes] - loc_preds : localization regression prediction - cls_preds : classification prediction - anchor_boxes : generated anchor boxes - """ - assert len(from_layers) > 0, "from_layers must not be empty list" - assert num_classes > 0, \ - "num_classes {} must be larger than 0".format(num_classes) - - assert len(ratios) > 0, "aspect ratios must not be empty list" - if not isinstance(ratios[0], list): - # provided only one ratio list, broadcast to all from_layers - ratios = [ratios] * len(from_layers) - assert len(ratios) == len(from_layers), \ - "ratios and from_layers must have same length" - - assert len(sizes) > 0, "sizes must not be empty list" - if len(sizes) == 2 and not isinstance(sizes[0], list): - # provided size range, we need to compute the sizes for each layer - assert sizes[0] > 0 and sizes[0] < 1 - assert sizes[1] > 0 and sizes[1] < 1 and sizes[1] > sizes[0] - tmp = np.linspace(sizes[0], sizes[1], num=(len(from_layers)-1)) - min_sizes = [start_offset] + tmp.tolist() - max_sizes = tmp.tolist() + [tmp[-1]+start_offset] - sizes = zip(min_sizes, max_sizes) - assert len(sizes) == len(from_layers), \ - "sizes and from_layers must have same length" - - if not isinstance(normalization, list): - normalization = [normalization] * len(from_layers) - assert len(normalization) == len(from_layers) - - assert sum(x > 0 for x in normalization) == len(num_channels), \ - "must provide number of channels for each normalized layer" - - if steps: - assert len(steps) == len(from_layers), "provide steps for all layers or leave empty" - - loc_pred_layers = [] - cls_pred_layers = [] - anchor_layers = [] - num_classes += 1 # always use background as label 0 - - for k, from_layer in enumerate(from_layers): - from_name = from_layer.name - # normalize - if normalization[k] > 0: - from_layer = mx.symbol.L2Normalization(data=from_layer, \ - mode="channel", name="{}_norm".format(from_name)) - scale = mx.symbol.Variable(name="{}_scale".format(from_name), - shape=(1, num_channels.pop(0), 1, 1), - init=mx.init.Constant(normalization[k])) - from_layer = mx.symbol.broadcast_mul(lhs=scale, rhs=from_layer) - if interm_layer > 0: - from_layer = mx.symbol.Convolution(data=from_layer, kernel=(3,3), \ - stride=(1,1), pad=(1,1), num_filter=interm_layer, \ - name="{}_inter_conv".format(from_name)) - from_layer = mx.symbol.Activation(data=from_layer, act_type="relu", \ - name="{}_inter_relu".format(from_name)) - - # estimate number of anchors per location - # here I follow the original version in caffe - # TODO: better way to shape the anchors?? - size = sizes[k] - assert len(size) > 0, "must provide at least one size" - size_str = "(" + ",".join([str(x) for x in size]) + ")" - ratio = ratios[k] - assert len(ratio) > 0, "must provide at least one ratio" - ratio_str = "(" + ",".join([str(x) for x in ratio]) + ")" - num_anchors = len(size) -1 + len(ratio) - - # create location prediction layer - num_loc_pred = num_anchors * 4 - bias = mx.symbol.Variable(name="{}_loc_pred_conv_bias".format(from_name), - init=mx.init.Constant(0.0), attr={'__lr_mult__': '2.0'}) - loc_pred = mx.symbol.Convolution(data=from_layer, bias=bias, kernel=(3,3), \ - stride=(1,1), pad=(1,1), num_filter=num_loc_pred, \ - name="{}_loc_pred_conv".format(from_name)) - loc_pred = mx.symbol.transpose(loc_pred, axes=(0,2,3,1)) - loc_pred = mx.symbol.Flatten(data=loc_pred) - loc_pred_layers.append(loc_pred) - - # create class prediction layer - num_cls_pred = num_anchors * num_classes - bias = mx.symbol.Variable(name="{}_cls_pred_conv_bias".format(from_name), - init=mx.init.Constant(0.0), attr={'__lr_mult__': '2.0'}) - cls_pred = mx.symbol.Convolution(data=from_layer, bias=bias, kernel=(3,3), \ - stride=(1,1), pad=(1,1), num_filter=num_cls_pred, \ - name="{}_cls_pred_conv".format(from_name)) - cls_pred = mx.symbol.transpose(cls_pred, axes=(0,2,3,1)) - cls_pred = mx.symbol.Flatten(data=cls_pred) - cls_pred_layers.append(cls_pred) - - # create anchor generation layer - if steps: - step = (steps[k], steps[k]) - else: - step = '(-1.0, -1.0)' - anchors = mx.contrib.symbol.MultiBoxPrior(from_layer, sizes=size_str, ratios=ratio_str, \ - clip=clip, name="{}_anchors".format(from_name), steps=step) - anchors = mx.symbol.Flatten(data=anchors) - anchor_layers.append(anchors) - - loc_preds = mx.symbol.Concat(*loc_pred_layers, num_args=len(loc_pred_layers), \ - dim=1, name="multibox_loc_pred") - cls_preds = mx.symbol.Concat(*cls_pred_layers, num_args=len(cls_pred_layers), \ - dim=1) - cls_preds = mx.symbol.Reshape(data=cls_preds, shape=(0, -1, num_classes)) - cls_preds = mx.symbol.transpose(cls_preds, axes=(0, 2, 1), name="multibox_cls_pred") - anchor_boxes = mx.symbol.Concat(*anchor_layers, \ - num_args=len(anchor_layers), dim=1) - anchor_boxes = mx.symbol.Reshape(data=anchor_boxes, shape=(0, -1, 4), name="multibox_anchors") - return [loc_preds, cls_preds, anchor_boxes] +def stack_neighbor(from_layer, factor=2): + """Downsample spatial dimentions and collapse to channel dimention by factor""" + out = mx.sym.reshape(from_layer, shape=(0, 0, -4, -1, factor, -2)) # (b, c, h/2, 2, w) + out = mx.sym.transpose(out, axes=(0, 1, 3, 2, 4)) # (b, c, 2, h/2, w) + out = mx.sym.reshape(out, shape=(0, -3, -1, -2)) # (b, c * 2, h/2, w) + out = mx.sym.reshape(out, shape=(0, 0, 0, -4, -1, factor)) # (b, c * 2, h/2, w/2, 2) + out = mx.sym.transpose(out, axes=(0, 1, 4, 2, 3)) # (b, c*2, 2, h/2, w/2) + out = mx.sym.reshape(out, shape=(0, -3, -1, -2)) # (b, c*4, h/2, w/2) + return out diff --git a/symbol/symbol_darknet19_lyolo.py b/symbol/symbol_darknet19_lyolo.py index eda5b38..aa17094 100644 --- a/symbol/symbol_darknet19_lyolo.py +++ b/symbol/symbol_darknet19_lyolo.py @@ -4,8 +4,8 @@ "https://arxiv.org/pdf/1612.08242.pdf" """ import mxnet as mx -from symbol_darknet19 import get_symbol as get_darknet19 -from symbol_darknet19 import conv_act_layer +from symbol.symbol_darknet19 import get_symbol as get_darknet19 +from symbol.symbol_darknet19 import conv_act_layer def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): bone = get_darknet19(num_classes=num_classes, **kwargs) diff --git a/symbol/symbol_darknet19_yolo.py b/symbol/symbol_darknet19_yolo.py index 45c1b1e..5a03ddb 100644 --- a/symbol/symbol_darknet19_yolo.py +++ b/symbol/symbol_darknet19_yolo.py @@ -4,8 +4,8 @@ "https://arxiv.org/pdf/1612.08242.pdf" """ import mxnet as mx -from symbol_darknet19 import get_symbol as get_darknet19 -from symbol_darknet19 import conv_act_layer +from symbol.symbol_darknet19 import get_symbol as get_darknet19 +from symbol.symbol_darknet19 import conv_act_layer def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): bone = get_darknet19(num_classes=num_classes, **kwargs) diff --git a/symbol/symbol_darknet_syolo.py b/symbol/symbol_darknet_syolo.py index 71742dc..9624d3b 100644 --- a/symbol/symbol_darknet_syolo.py +++ b/symbol/symbol_darknet_syolo.py @@ -4,8 +4,8 @@ "https://arxiv.org/pdf/1612.08242.pdf" """ import mxnet as mx -from symbol_darknet19 import get_symbol as get_darknet19 -from symbol_darknet19 import conv_act_layer +from symbol.symbol_darknet19 import get_symbol as get_darknet19 +from symbol.symbol_darknet19 import conv_act_layer def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): bone = get_darknet19(num_classes=num_classes, **kwargs) diff --git a/symbol/symbol_resnet50_yolo.py b/symbol/symbol_resnet50_yolo.py index cf632a6..35bda2e 100644 --- a/symbol/symbol_resnet50_yolo.py +++ b/symbol/symbol_resnet50_yolo.py @@ -1,5 +1,6 @@ import mxnet as mx -import resnet +import symbol.resnet +from symbol.common import stack_neighbor def conv_act_layer(from_layer, name, num_filter, kernel=(3, 3), pad=(1, 1), \ stride=(1,1), act_type="relu", use_batchnorm=True): @@ -59,7 +60,8 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): act_type='leaky') # re-organize - conv5_6 = mx.sym.stack_neighbor(data=conv1, kernel=(2, 2), name='stack_downsample') + # conv5_6 = mx.sym.stack_neighbor(data=conv1, kernel=(2, 2), name='stack_downsample') + conv5_6 = stack_neighbor(conv1, factor=2) concat = mx.sym.Concat(*[conv5_6, conv7_2], dim=1) # concat = conv7_2 conv8_1 = conv_act_layer(concat, 'conv8_1', 1024, kernel=(3, 3), pad=(1, 1), From 5ebb666fc7853a86ba40478e897580fc3875107a Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Tue, 30 Jan 2018 09:35:01 -0600 Subject: [PATCH 11/12] fix python3 --- config/{config.py => default_config.py} | 0 evaluate.py | 8 ++++---- evaluate/evaluate_net.py | 8 ++++---- symbol/symbol_resnet50_yolo.py | 4 ++-- train/train_net.py | Bin 11597 -> 11605 bytes 5 files changed, 10 insertions(+), 10 deletions(-) rename config/{config.py => default_config.py} (100%) diff --git a/config/config.py b/config/default_config.py similarity index 100% rename from config/config.py rename to config/default_config.py diff --git a/evaluate.py b/evaluate.py index a38a7f6..918e3aa 100644 --- a/evaluate.py +++ b/evaluate.py @@ -17,8 +17,8 @@ def parse_args(): default=os.path.join(os.getcwd(), 'data', 'val.rec'), type=str) parser.add_argument('--list-path', dest='list_path', help='which list file to use', default="", type=str) - parser.add_argument('--network', dest='network', type=str, default='vgg16_ssd_300', - choices=['vgg16_ssd_300', 'vgg16_ssd_512'], help='which network to use') + parser.add_argument('--network', dest='network', type=str, default='resnet50_yolo', + help='which network to use') parser.add_argument('--batch-size', dest='batch_size', type=int, default=32, help='evaluation batch size') parser.add_argument('--num-class', dest='num_class', type=int, default=20, @@ -28,12 +28,12 @@ def parse_args(): parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model', default=0, type=int) parser.add_argument('--prefix', dest='prefix', help='load model prefix', - default=os.path.join(os.getcwd(), 'model', 'ssd'), type=str) + default=os.path.join(os.getcwd(), 'model', 'yolo2_resnet50'), type=str) parser.add_argument('--gpus', dest='gpu_id', help='GPU devices to evaluate with', default='0', type=str) parser.add_argument('--cpu', dest='cpu', help='use cpu to evaluate, this can be slow', action='store_true') - parser.add_argument('--data-shape', dest='data_shape', type=int, default=300, + parser.add_argument('--data-shape', dest='data_shape', type=int, default=416, help='set image shape') parser.add_argument('--mean-r', dest='mean_r', type=float, default=123, help='red mean value') diff --git a/evaluate/evaluate_net.py b/evaluate/evaluate_net.py index 8d86f8e..9de25d9 100644 --- a/evaluate/evaluate_net.py +++ b/evaluate/evaluate_net.py @@ -4,7 +4,7 @@ import importlib import mxnet as mx from dataset.iterator import DetRecordIter -from config.config import cfg +from config.default_config import cfg from evaluate.eval_metric import MApMetric, VOC07MApMetric import logging @@ -74,12 +74,12 @@ class names in string, must correspond to num_classes if set sys.path.append(os.path.join(cfg.ROOT_DIR, 'symbol')) net = importlib.import_module("symbol_" + net) \ .get_symbol(num_classes, nms_thresh, force_nms) - if not 'label' in net.list_arguments(): - label = mx.sym.Variable(name='label') + if not 'yolo_output_label' in net.list_arguments(): + label = mx.sym.Variable(name='yolo_output_label') net = mx.sym.Group([net, label]) # init module - mod = mx.mod.Module(net, label_names=('label',), logger=logger, context=ctx, + mod = mx.mod.Module(net, label_names=('yolo_output_label',), logger=logger, context=ctx, fixed_param_names=net.list_arguments()) mod.bind(data_shapes=eval_iter.provide_data, label_shapes=eval_iter.provide_label) mod.set_params(args, auxs, allow_missing=False, force_init=True) diff --git a/symbol/symbol_resnet50_yolo.py b/symbol/symbol_resnet50_yolo.py index 35bda2e..3fe2bd4 100644 --- a/symbol/symbol_resnet50_yolo.py +++ b/symbol/symbol_resnet50_yolo.py @@ -1,5 +1,5 @@ import mxnet as mx -import symbol.resnet +from symbol import resnet from symbol.common import stack_neighbor def conv_act_layer(from_layer, name, num_filter, kernel=(3, 3), pad=(1, 1), \ @@ -69,7 +69,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_nms=False, **kwargs): pred = mx.symbol.Convolution(data=conv8_1, name='conv_pred', kernel=(1, 1), num_filter=num_anchor * (num_classes + 4 + 1)) - out = mx.contrib.symbol.YoloOutput(data=pred, num_class=num_classes, + out = mx.contrib.symbol.Yolo2Output(data=pred, num_class=num_classes, num_anchor=num_anchor, object_grad_scale=5.0, background_grad_scale=1.0, coord_grad_scale=1.0, class_grad_scale=1.0, anchors=anchors, nms_topk=400, warmup_samples=12800, name='yolo_output') diff --git a/train/train_net.py b/train/train_net.py index b7a246b721614602059a8461fd1eb4db16c81ddb..87be9e96b8465b3553dac0e0969b8d048035784a 100644 GIT binary patch delta 21 ccmX>bbv0^(5F Date: Tue, 30 Jan 2018 09:45:40 -0600 Subject: [PATCH 12/12] update readme --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a586c84..b0520b6 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # YOLO-v2: Real-Time Object Detection -Still under development. 71 mAP on VOC2007 achieved so far. +Still under development. 71 mAP(darknet) and 74mAP(resnet50) on VOC2007 achieved so far. This is a pre-released version. @@ -21,7 +21,7 @@ custom operators are not presented in official MXNet. [Instructions](http://mxne - Download the pretrained [model](https://github.com/zhreshold/mxnet-yolo/releases/download/0.1-alpha/yolo2_darknet19_416_pascalvoc0712_trainval.zip), and extract to `model/` directory. - Run ``` -# cd /paht/to/mxnet-yolo +# cd /path/to/mxnet-yolo python demo.py --cpu # available options python demo.py -h @@ -29,6 +29,7 @@ python demo.py -h ### Train the model - Grab a pretrained model, e.g. [`darknet19`](https://github.com/zhreshold/mxnet-yolo/releases/download/0.1-alpha/darknet19_416_ILSVRC2012.zip) +- (optional) Grab a pretrained resnet50 model, [`resnet-50-0000.params`](http://data.dmlc.ml/models/imagenet/resnet/50-layers/resnet-50-0000.params),[`resnet-50-symbol.json`](http://data.dmlc.ml/models/imagenet/resnet/50-layers/resnet-50-symbol.json), this will produce slightly better mAP than `darknet` in my experiments. - Download PASCAL VOC dataset. ``` cd /path/to/where_you_store_datasets/ @@ -52,4 +53,8 @@ python tools/prepare_dataset.py --dataset pascal --year 2007 --set test --target - Start training ``` python train.py --gpus 0,1,2,3 --epoch 0 +# choose different networks, such as resnet50_yolo +python train.py --gpus 0,1,2,3 --network resnet50_yolo --data-shape 416 --pretrained model/resnet-50 --epoch 0 +# see advanced arguments for training +python train.py -h ```