-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathterra_rgbmask.py
481 lines (368 loc) · 18.7 KB
/
terra_rgbmask.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
import os
import numpy as np
import tempfile
import shutil
from osgeo import gdal
from PIL import Image
from pyclowder.utils import CheckMessage
from pyclowder.datasets import download_metadata, upload_metadata, remove_metadata
from terrautils.metadata import get_extractor_metadata, get_terraref_metadata
from terrautils.extractors import TerrarefExtractor, is_latest_file, check_file_in_dataset, \
build_metadata, upload_to_dataset, file_exists, contains_required_files, \
confirm_clowder_info, timestamp_to_terraref
from terrautils.formats import create_geotiff, compress_geotiff
from terrautils.spatial import geojson_to_tuples
from terrautils.imagefile import file_is_image_type, image_get_geobounds, get_epsg
from skimage import morphology
import cv2
SATURATE_THRESHOLD = 245
MAX_PIXEL_VAL = 255
SMALL_AREA_THRESHOLD = 200
def getImageQuality(imgfile):
img = Image.open(imgfile)
img = np.array(img)
NRMAC = MAC(img, img, img)
return NRMAC
def gen_plant_mask(colorImg, kernelSize=3):
r = colorImg[:, :, 2]
g = colorImg[:, :, 1]
b = colorImg[:, :, 0]
sub_img = (g.astype('int') - r.astype('int') - 0) > 0 # normal: -2
mask = np.zeros_like(b)
mask[sub_img] = MAX_PIXEL_VAL
blur = cv2.blur(mask, (kernelSize, kernelSize))
pix = np.array(blur)
sub_mask = pix > 128
mask_1 = np.zeros_like(b)
mask_1[sub_mask] = MAX_PIXEL_VAL
return mask_1
def remove_small_area_mask(maskImg, min_area_size):
mask_array = maskImg > 0
rel_array = morphology.remove_small_objects(mask_array, min_area_size)
rel_img = np.zeros_like(maskImg)
rel_img[rel_array] = MAX_PIXEL_VAL
return rel_img
def remove_small_holes_mask(maskImg, max_hole_size):
mask_array = maskImg > 0
rel_array = morphology.remove_small_holes(mask_array, max_hole_size)
rel_img = np.zeros_like(maskImg)
rel_img[rel_array] = MAX_PIXEL_VAL
return rel_img
def saturated_pixel_classification(gray_img, baseMask, saturatedMask, dilateSize=0):
# add saturated area into basic mask
saturatedMask = morphology.binary_dilation(saturatedMask, morphology.diamond(dilateSize))
rel_img = np.zeros_like(gray_img)
rel_img[saturatedMask] = MAX_PIXEL_VAL
label_img, num = morphology.label(rel_img, connectivity=2, return_num=True)
rel_mask = baseMask
for i in range(1, num):
x = (label_img == i)
if np.sum(x) > 100000: # if the area is too large, do not add it into basic mask
continue
if not (x & baseMask).any():
continue
rel_mask = rel_mask | x
return rel_mask
def over_saturation_pocess(rgb_img, init_mask, threshold=SATURATE_THRESHOLD):
# connected component analysis for over saturation pixels
gray_img = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
mask_over = gray_img > threshold
mask_0 = gray_img < threshold
src_mask_array = init_mask > 0
mask_1 = src_mask_array & mask_0
mask_1 = morphology.remove_small_objects(mask_1, SMALL_AREA_THRESHOLD)
mask_over = morphology.remove_small_objects(mask_over, SMALL_AREA_THRESHOLD)
rel_mask = saturated_pixel_classification(gray_img, mask_1, mask_over, 1)
rel_img = np.zeros_like(gray_img)
rel_img[rel_mask] = MAX_PIXEL_VAL
return rel_img
def gen_saturated_mask(img, kernelSize):
binMask = gen_plant_mask(img, kernelSize)
binMask = remove_small_area_mask(binMask,
500) # 500 is a parameter for number of pixels to be removed as small area
binMask = remove_small_holes_mask(binMask,
300) # 300 is a parameter for number of pixels to be filled as small holes
binMask = over_saturation_pocess(img, binMask, SATURATE_THRESHOLD)
binMask = remove_small_holes_mask(binMask, 4000)
return binMask
def gen_mask(img, kernelSize):
binMask = gen_plant_mask(img, kernelSize)
binMask = remove_small_area_mask(binMask, SMALL_AREA_THRESHOLD)
binMask = remove_small_holes_mask(binMask,
3000) # 3000 is a parameter for number of pixels to be filled as small holes
return binMask
def gen_rgb_mask(img, binMask):
rgbMask = cv2.bitwise_and(img, img, mask=binMask)
return rgbMask
def rgb2gray(rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
def MAC(im1, im2, im): # main function: Multiscale Autocorrelation (MAC)
h, v, c = im1.shape
if c > 1:
im = np.matrix.round(rgb2gray(im))
im1 = np.matrix.round(rgb2gray(im1))
im2 = np.matrix.round(rgb2gray(im2))
# multiscale parameters
scales = np.array([2, 3, 5])
FM = np.zeros(len(scales))
for s in range(len(scales)):
im1[0: h - 1, :] = im[1:h, :]
im2[0: h - scales[s], :] = im[scales[s]:h, :]
dif = im * (im1 - im2)
FM[s] = np.mean(dif)
NRMAC = np.mean(FM)
return NRMAC
def check_saturation(img):
# check how many percent of pix close to 255 or 0
grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
m1 = grayImg > SATURATE_THRESHOLD
m2 = grayImg < 20 # 20 is a threshold to classify low pixel value
over_rate = float(np.sum(m1)) / float(grayImg.size)
low_rate = float(np.sum(m2)) / float(grayImg.size)
return over_rate, low_rate
def check_brightness(img):
# gen average pixel value from grayscale image
grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
aveValue = np.average(grayImg)
return aveValue
def gen_cc_enhanced(input_path, kernelSize=3):
# abandon low quality images, mask enhanced
# TODO: cv2 has problems with some RGB geotiffs...
# img = cv2.imread(input_path)
img = np.rollaxis(gdal.Open(input_path).ReadAsArray().astype(np.uint8), 0, 3)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# calculate image scores
over_rate, low_rate = check_saturation(img)
# TODO: disabling this check for now because it's crashing extractor - generate mask regardless
# if low score, return None
# low_rate is percentage of low value pixels(lower than 20) in the grayscale image, if low_rate > 0.1, return
# aveValue is average pixel value of grayscale image, if aveValue lower than 30 or higher than 195, return
# quality_score is a score from Multiscale Autocorrelation (MAC), if quality_score lower than 13, return
#aveValue = check_brightness(img)
#quality_score = getImageQuality(input_path)
#if low_rate > 0.1 or aveValue < 30 or aveValue > 195 or quality_score < 13:
# return None, None, None
# saturated image process
# over_rate is percentage of high value pixels(higher than SATUTATE_THRESHOLD) in the grayscale image, if over_rate > 0.15, try to fix it use gen_saturated_mask()
if over_rate > 0.15:
binMask = gen_saturated_mask(img, kernelSize)
else: # nomal image process
binMask = gen_mask(img, kernelSize)
c = np.count_nonzero(binMask)
ratio = c / float(binMask.size)
rgbMask = gen_rgb_mask(img, binMask)
return ratio, rgbMask
def find_terraref_files(resource):
"""Returns the left, and right image file names
Args:
resource(dict): the resource associated with the process request
Return:
A tuple containing the left file name followed by the right filename
Exception:
A ValueError is raised if all the fields aren't found
"""
img_left, img_right = None, None
for fname in resource['local_paths']:
if fname.endswith('_left.tif'):
img_left = fname
elif fname.endswith('_right.tif'):
img_right = fname
if None in [img_left, img_right]:
raise ValueError("could not locate all files in processing")
return (img_left, img_right)
def find_image_files(identify_binary, resource, file_metadata_ending):
"""Returns the left, and right image file names
Args:
identify_binary(str): path to the executable which will return a MIME type on an image file
resource(dict): the resource associated with the process request
file_metadata_ending(str): the filename ending identifying an associated metadata file
Return:
A tuple of found image files
"""
return_files = []
for fname in resource['local_paths']:
if file_is_image_type(identify_binary, fname, fname + file_metadata_ending):
return_files.append(fname)
return tuple(return_files)
def add_local_arguments(parser):
""" Add any additional arguments to parser for rgbEnhancementExtractor class
Args:
parse: the command argument parser
"""
# For only processing the TERRA REF left image
parser.add_argument('--left', type=bool, default=os.getenv('LEFT_ONLY', True),
help="only generate a mask for the left image")
# Name of an image, or file, MIME type app
identify_binary = os.getenv('IDENTIFY_BINARY', '/usr/bin/identify')
# Command line override of an image or file MIME type app
parser.add_argument('--identify-binary', nargs='?', dest='identify_binary',
default=identify_binary,
help='Identify executable used to for image type capture ' +
'(default=' + identify_binary + ')')
class rgbEnhancementExtractor(TerrarefExtractor):
def __init__(self):
super(rgbEnhancementExtractor, self).__init__()
add_local_arguments(self.parser)
# parse command line and load default logging configuration
self.setup(sensor='rgb_mask')
# assign local arguments
self.leftonly = self.args.left
def get_maskfilename_bounds(self, file_name, datestamp):
"""Determines the name of the masking file and loads the boundaries of the file
Args:
file_name(str): path of the file to create a mask from
datestamp(str): the date to use when creating file paths
Return:
"""
mask_name, bounds = (None, None)
if not self.get_terraref_metadata is None:
key = 'left' if file_name.endswith('_left.tif') else 'right'
mask_name = self.sensors.create_sensor_path(datestamp, opts=[key])
bounds = geojson_to_tuples(self.get_terraref_metadata['spatial_metadata'][key]['bounding_box'])
else:
mask_name = self.sensors.create_sensor_path(datestamp)
bounds = image_get_geobounds(file_name)
bounds_len = len(bounds)
if bounds_len <= 0 or bounds[0] == np.nan:
bounds = None
return (mask_name, bounds)
def check_message(self, connector, host, secret_key, resource, parameters):
if "rulechecked" in parameters and parameters["rulechecked"]:
return CheckMessage.download
self.start_check(resource)
if not is_latest_file(resource):
self.log_skip(resource, "not latest file")
return CheckMessage.ignore
# Check metadata to verify we have what we need
md = download_metadata(connector, host, secret_key, resource['id'])
if get_terraref_metadata(md):
# Check for a left and right TIF file - skip if not found
# If we're only processing the left files, don't check for the right file
needed_files = ['_left.tif']
if not self.leftonly:
needed_files.append('_right.tif')
if not contains_required_files(resource, needed_files):
self.log_skip(resource, "missing required files")
return CheckMessage.ignore
if get_extractor_metadata(md, self.extractor_info['name'],
self.extractor_info['version']):
# Make sure outputs properly exist
timestamp = resource['dataset_info']['name'].split(" - ")[1]
left_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
right_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
if (self.leftonly and file_exists(left_mask_tiff)) or \
(not (file_exists(left_mask_tiff) and file_exists(right_mask_tiff))):
self.log_skip(resource, "metadata v%s and outputs already exist" % \
self.extractor_info['version'])
return CheckMessage.ignore
# Check for other images to create a mask on
elif not contains_required_files(resource, ['.tif']):
self.log_skip(resource, "missing required tiff file")
return CheckMessage.ignore
# Have TERRA-REF metadata, but not any from this extractor
return CheckMessage.download
def process_message(self, connector, host, secret_key, resource, parameters):
super(rgbEnhancementExtractor, self).process_message(connector, host, secret_key,
resource, parameters)
self.start_message(resource)
# Get left/right files and metadata
process_files = []
if not self.get_terraref_metadata is None:
process_files = find_terraref_files(resource)
else:
process_files = find_image_files(self.args.identify_binary, resource,
self.file_infodata_file_ending)
# Get the best username, password, and space
old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace)
self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context()
# Ensure that the clowder information is valid
if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user,
self.clowder_pass):
self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
"request")
self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space)
self.end_message(resource)
return
# Change the base path of files to include the user by tweaking the sensor's value
sensor_old_base = None
if self.get_terraref_metadata is None:
_, new_base = self.get_username_with_base_path(host, secret_key, resource['id'],
self.sensors.base)
sensor_old_base = self.sensors.base
self.sensors.base = new_base
# Prepare for processing files
timestamp = timestamp_to_terraref(self.find_timestamp(resource['dataset_info']['name']))
target_dsid = resource['id']
uploaded_file_ids = []
ratios = []
try:
for one_file in process_files:
mask_source = one_file
# Make sure the source image is in the correct EPSG space
epsg = get_epsg(one_file)
if epsg != self.default_epsg:
self.log_info(resource, "Reprojecting from " + str(epsg) +
" to default " + str(self.default_epsg))
_, tmp_name = tempfile.mkstemp()
src = gdal.Open(one_file)
gdal.Warp(tmp_name, src, dstSRS='EPSG:'+str(self.default_epsg))
mask_source = tmp_name
# Get the bounds of the image to see if we can process it. Also get the mask filename
rgb_mask_tif, bounds = self.get_maskfilename_bounds(mask_source, timestamp)
if bounds is None:
self.log_skip(resource, "Skipping non-georeferenced image: " + \
os.path.basename(one_file))
if mask_source != one_file:
os.remove(mask_source)
continue
if not file_exists(rgb_mask_tif) or self.overwrite:
self.log_info(resource, "creating %s" % rgb_mask_tif)
mask_ratio, mask_rgb = gen_cc_enhanced(mask_source)
ratios.append(mask_ratio)
# Bands must be reordered to avoid swapping R and B
mask_rgb = cv2.cvtColor(mask_rgb, cv2.COLOR_BGR2RGB)
create_geotiff(mask_rgb, bounds, rgb_mask_tif, None, False, self.extractor_info,
self.get_terraref_metadata)
compress_geotiff(rgb_mask_tif)
# Remove any temporary file
if mask_source != one_file:
os.remove(mask_source)
self.created += 1
self.bytes += os.path.getsize(rgb_mask_tif)
found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid,
rgb_mask_tif, remove=self.overwrite)
if not found_in_dest:
self.log_info(resource, "uploading %s" % rgb_mask_tif)
fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass,
target_dsid, rgb_mask_tif)
uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") +
"files/" + fileid)
# Tell Clowder this is completed so subsequent file updates don't daisy-chain
if not self.get_terraref_metadata is None:
ratios_len = len(ratios)
left_ratio = (ratios[0] if ratios_len > 0 else None)
right_ratio = (ratios[1] if ratios_len > 1 else None)
md = {
"files_created": uploaded_file_ids
}
if not left_ratio is None:
md["left_mask_ratio"] = left_ratio
if not self.leftonly and not right_ratio is None:
md["right_mask_ratio"] = right_ratio
extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset')
self.log_info(resource, "uploading extractor metadata to Lv1 dataset")
remove_metadata(connector, host, secret_key, resource['id'],
self.extractor_info['name'])
upload_metadata(connector, host, secret_key, resource['id'], extractor_md)
finally:
# Signal end of processing message and restore changed variables. Be sure to restore
# changed variables above with early returns
if not sensor_old_base is None:
self.sensors.base = sensor_old_base
self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space)
self.end_message(resource)
if __name__ == "__main__":
extractor = rgbEnhancementExtractor()
extractor.start()