forked from Azure/ObjectDetectionUsingCntk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPARAMETERS.py
148 lines (120 loc) · 6.72 KB
/
PARAMETERS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from __future__ import print_function
from helpers import *
from imdb_data import imdb_data
import fastRCNN, time, datetime
from fastRCNN.pascal_voc import pascal_voc
print(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
############################
# Adjust these parameters
# to make scripts run
############################
rootDir = os.path.dirname(os.path.realpath(sys.argv[0]))
############################
# default parameters
############################
datasetName = "grocery"
#directories
imgDir = rootDir + "/data/" + datasetName + "/"
procDir = rootDir + "/proc/" + datasetName + "/"
resultsDir = rootDir + "/results/" + datasetName + "/"
roiDir = procDir + "rois/"
modelDir = procDir + "models/"
cntkFilesDir = procDir + "cntkFiles/"
trainedSvmDir = procDir + "trainedSvm/"
cntkResourcesDir = rootDir + "/resources/cntk/"
# ROI generation
roi_maxImgDim = 200 # image size used for ROI generation
roi_minDimRel = 0.01 # minimum relative width/height of a ROI
roi_maxDimRel = 1.0 # maximum relative width/height of a ROI
roi_minNrPixelsRel = 0 # minimum relative area covered by a ROI
roi_maxNrPixelsRel = 1.0 # maximum relative area covered by a ROI
roi_maxAspectRatio = 4.0 # maximum aspect Ratio of a ROI, both vertically and horizontally
ss_minSize = 20 # for a description of the selective search parameters see:
ss_kvals = (50, 500, 6) # http://dlib.net/dlib/image_transforms/segment_image_abstract.h.html#find_candidate_object_locations
ss_max_merging_iterations = 20 #
ss_nmsThreshold = 0.85 # non-maxima surpression threshold run after selective search
grid_nrScales = 7 # uniform grid ROIs: number of iterations from largest possible ROI to smaller ROIs
grid_stepSizeRel = 0.5 # uniform grid ROIs: step size for sliding windows
grid_aspectRatios = [1.0, 2.0, 0.5] # uniform grid ROIs: allowed aspect ratio of ROIs
grid_downscaleRatioPerIteration = 1.5 # uniform grid ROIs: relative ROI width/height reduction per iteration, starting from largest possible ROI
# cntk model
cntk_nrRois = 2000 # DNN input number of ROIs per image. Zero-padded/truncated if necessary
cntk_padWidth = 1000 # DNN input image width [pixels]
cntk_padHeight = 1000 # DNN input image height [pixels]
cntk_featureDimensions = {'svm': 4096} # DNN output, dimension of each ROI
# nn and svm training
classifier = 'svm' # Options: 'svm', 'nn'. Train either a Support Vector Machine, or directly the Neural Network
train_posOverlapThres = 0.5 # DNN and SVM threshold for marking ROIs with significant overlap with a GT object as positive
# nn training
cntk_max_epochs = 18 # number of training epochs (only relevant if 'lassifier' is set to: 'nn')
cntk_mb_size = 5 # minibatch size
cntk_l2_reg_weight = 0.0005 # l2 regularizer weight
cntk_lr_per_image = [0.01] * 10 + [0.001] * 5 + [0.0001] #learning rate per image
cntk_momentum_time_constant = 10 # momentum
# svm training
svm_C = 0.001 # regularization parameter of the soft-margin error term
svm_B = 10.0 # intercept scaling
svm_nrEpochs = 2 # number of training iterations
svm_retrainLimit = 2000 # number of new items to trigger SVM training
svm_evictThreshold = -1.1 # remove easy negatives with decision value below this threshold
svm_posWeight = "balanced"# automatically balance training set to correct for the majority of ROIs being negative
svm_targetNorm = 20.0 # magic value from traditional R-CNN (helps with convergence)
svm_penality = 'l2' # penalty norm
svm_loss = 'l1' # loss norm
svm_rngSeed = 3 # seed for randomization
# postprocessing
nmsThreshold = 0.3 # Non-Maxima suppression threshold (in range [0,1])
# The lower the more ROIs will be combined. Used during evaluation and visualization (scripts 5_)
vis_decisionThresholds = {'svm' : 0.5, # Reject detections with low confidence, used only in 5_visualizeResults
'nn' : None}
# evaluation
evalVocOverlapThreshold = 0.5 # voc-style intersection-over-union threshold used to determine if object was found
############################
# project-specific
# parameters / overrides
############################
if datasetName.startswith("grocery"):
classes = ('__background__', # always have '__background__' be at index 0
"orange", "eggBox", "joghurt", "ketchup", "squash", "mushroom", "water", "mustard")
# roi generation
cntk_nrRois = 200 #this number is too low to get good accuracy but allows for fast training and scoring (for demo purposes)
roi_minDimRel = 0.04
roi_maxDimRel = 0.4
roi_minNrPixelsRel = 2 * roi_minDimRel * roi_minDimRel
roi_maxNrPixelsRel = 0.33 * roi_maxDimRel * roi_maxDimRel
# postprocessing
nmsThreshold = 0.01
# database
imdbs = dict() # database provider of images and image annotations
for image_set in ["train", "test"]:
imdbs[image_set] = imdb_data(image_set, classes, cntk_nrRois, imgDir, roiDir, cntkFilesDir, boAddGroundTruthRois = (image_set!='test'))
elif datasetName.startswith("pascalVoc"):
classes = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
lutImageSet = {"train": "trainval", "test": "test"}
# model training / scoring
classifier = 'nn'
# cntk model (Should train a model with mean-AP around 0.45)
# more than 99% of the test images have less than 4000 rois, but 50% more than 2000
cntk_mb_size = 2
cntk_nrRois = 4000
cntk_lr_per_image = [0.05] * 10 + [0.005] * 5 + [0.0005]
# database
imdbs = dict()
for image_set, year in zip(["train", "test"], ["2007", "2007"]):
imdbs[image_set] = fastRCNN.pascal_voc(lutImageSet[image_set], year, classes, cntk_nrRois, cacheDir = cntkFilesDir)
print("Number of {} images: {}".format(image_set, imdbs[image_set].num_images))
else:
ERROR
############################
# computed parameters
############################
nrClasses = len(classes)
cntk_featureDimensions['nn'] = nrClasses
lutClass2Id = dict(zip(classes, range(len(classes))))
print("PARAMETERS: datasetName = " + datasetName)
assert cntk_padWidth == cntk_padHeight, "ERROR: different width and height for padding not supported."
assert classifier.lower() in ['svm','nn'], "ERROR: only 'nn' or 'svm' classifier supported."
assert not (datasetName == 'pascalVoc' and classifier == 'svm'), "ERROR: 'svm' classifier for pascal VOC not supported."
assert(train_posOverlapThres >= 0 and train_posOverlapThres <= 1)