Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
vardanagarwal
GitHub Repository: vardanagarwal/Proctoring-AI
Path: blob/master/coco models/tflite mobnetv1 ssd/visualization_utils.py
455 views
1
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
# http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
# ==============================================================================
15
16
"""A set of functions that are used for visualization.
17
18
These functions often receive an image, perform some visualization on the image.
19
The functions do not return a value, instead they modify the image itself.
20
21
"""
22
from __future__ import absolute_import
23
from __future__ import division
24
from __future__ import print_function
25
26
import abc
27
import collections
28
# Set headless-friendly backend.
29
import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements
30
import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top
31
import numpy as np
32
import PIL.Image as Image
33
import PIL.ImageColor as ImageColor
34
import PIL.ImageDraw as ImageDraw
35
import PIL.ImageFont as ImageFont
36
import six
37
from six.moves import range
38
from six.moves import zip
39
import tensorflow.compat.v1 as tf
40
41
from object_detection.core import keypoint_ops
42
from object_detection.core import standard_fields as fields
43
from object_detection.utils import shape_utils
44
45
_TITLE_LEFT_MARGIN = 10
46
_TITLE_TOP_MARGIN = 10
47
STANDARD_COLORS = [
48
'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
49
'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
50
'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
51
'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
52
'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
53
'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
54
'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
55
'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
56
'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
57
'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
58
'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
59
'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
60
'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
61
'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
62
'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
63
'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
64
'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
65
'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
66
'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
67
'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
68
'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
69
'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
70
'WhiteSmoke', 'Yellow', 'YellowGreen'
71
]
72
73
74
def _get_multiplier_for_color_randomness():
75
"""Returns a multiplier to get semi-random colors from successive indices.
76
77
This function computes a prime number, p, in the range [2, 17] that:
78
- is closest to len(STANDARD_COLORS) / 10
79
- does not divide len(STANDARD_COLORS)
80
81
If no prime numbers in that range satisfy the constraints, p is returned as 1.
82
83
Once p is established, it can be used as a multiplier to select
84
non-consecutive colors from STANDARD_COLORS:
85
colors = [(p * i) % len(STANDARD_COLORS) for i in range(20)]
86
"""
87
num_colors = len(STANDARD_COLORS)
88
prime_candidates = [5, 7, 11, 13, 17]
89
90
# Remove all prime candidates that divide the number of colors.
91
prime_candidates = [p for p in prime_candidates if num_colors % p]
92
if not prime_candidates:
93
return 1
94
95
# Return the closest prime number to num_colors / 10.
96
abs_distance = [np.abs(num_colors / 10. - p) for p in prime_candidates]
97
num_candidates = len(abs_distance)
98
inds = [i for _, i in sorted(zip(abs_distance, range(num_candidates)))]
99
return prime_candidates[inds[0]]
100
101
102
def save_image_array_as_png(image, output_path):
103
"""Saves an image (represented as a numpy array) to PNG.
104
105
Args:
106
image: a numpy array with shape [height, width, 3].
107
output_path: path to which image should be written.
108
"""
109
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
110
with tf.gfile.Open(output_path, 'w') as fid:
111
image_pil.save(fid, 'PNG')
112
113
114
def encode_image_array_as_png_str(image):
115
"""Encodes a numpy array into a PNG string.
116
117
Args:
118
image: a numpy array with shape [height, width, 3].
119
120
Returns:
121
PNG encoded image string.
122
"""
123
image_pil = Image.fromarray(np.uint8(image))
124
output = six.BytesIO()
125
image_pil.save(output, format='PNG')
126
png_string = output.getvalue()
127
output.close()
128
return png_string
129
130
131
def draw_bounding_box_on_image_array(image,
132
ymin,
133
xmin,
134
ymax,
135
xmax,
136
color='red',
137
thickness=4,
138
display_str_list=(),
139
use_normalized_coordinates=True):
140
"""Adds a bounding box to an image (numpy array).
141
142
Bounding box coordinates can be specified in either absolute (pixel) or
143
normalized coordinates by setting the use_normalized_coordinates argument.
144
145
Args:
146
image: a numpy array with shape [height, width, 3].
147
ymin: ymin of bounding box.
148
xmin: xmin of bounding box.
149
ymax: ymax of bounding box.
150
xmax: xmax of bounding box.
151
color: color to draw bounding box. Default is red.
152
thickness: line thickness. Default value is 4.
153
display_str_list: list of strings to display in box
154
(each to be shown on its own line).
155
use_normalized_coordinates: If True (default), treat coordinates
156
ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
157
coordinates as absolute.
158
"""
159
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
160
draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
161
thickness, display_str_list,
162
use_normalized_coordinates)
163
np.copyto(image, np.array(image_pil))
164
165
166
def draw_bounding_box_on_image(image,
167
ymin,
168
xmin,
169
ymax,
170
xmax,
171
color='red',
172
thickness=4,
173
display_str_list=(),
174
use_normalized_coordinates=True):
175
"""Adds a bounding box to an image.
176
177
Bounding box coordinates can be specified in either absolute (pixel) or
178
normalized coordinates by setting the use_normalized_coordinates argument.
179
180
Each string in display_str_list is displayed on a separate line above the
181
bounding box in black text on a rectangle filled with the input 'color'.
182
If the top of the bounding box extends to the edge of the image, the strings
183
are displayed below the bounding box.
184
185
Args:
186
image: a PIL.Image object.
187
ymin: ymin of bounding box.
188
xmin: xmin of bounding box.
189
ymax: ymax of bounding box.
190
xmax: xmax of bounding box.
191
color: color to draw bounding box. Default is red.
192
thickness: line thickness. Default value is 4.
193
display_str_list: list of strings to display in box
194
(each to be shown on its own line).
195
use_normalized_coordinates: If True (default), treat coordinates
196
ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
197
coordinates as absolute.
198
"""
199
draw = ImageDraw.Draw(image)
200
im_width, im_height = image.size
201
if use_normalized_coordinates:
202
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
203
ymin * im_height, ymax * im_height)
204
else:
205
(left, right, top, bottom) = (xmin, xmax, ymin, ymax)
206
if thickness > 0:
207
draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
208
(left, top)],
209
width=thickness,
210
fill=color)
211
try:
212
font = ImageFont.truetype('arial.ttf', 24)
213
except IOError:
214
font = ImageFont.load_default()
215
216
# If the total height of the display strings added to the top of the bounding
217
# box exceeds the top of the image, stack the strings below the bounding box
218
# instead of above.
219
display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
220
# Each display_str has a top and bottom margin of 0.05x.
221
total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
222
223
if top > total_display_str_height:
224
text_bottom = top
225
else:
226
text_bottom = bottom + total_display_str_height
227
# Reverse list and print from bottom to top.
228
for display_str in display_str_list[::-1]:
229
text_width, text_height = font.getsize(display_str)
230
margin = np.ceil(0.05 * text_height)
231
draw.rectangle(
232
[(left, text_bottom - text_height - 2 * margin), (left + text_width,
233
text_bottom)],
234
fill=color)
235
draw.text(
236
(left + margin, text_bottom - text_height - margin),
237
display_str,
238
fill='black',
239
font=font)
240
text_bottom -= text_height - 2 * margin
241
242
243
def draw_bounding_boxes_on_image_array(image,
244
boxes,
245
color='red',
246
thickness=4,
247
display_str_list_list=()):
248
"""Draws bounding boxes on image (numpy array).
249
250
Args:
251
image: a numpy array object.
252
boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
253
The coordinates are in normalized format between [0, 1].
254
color: color to draw bounding box. Default is red.
255
thickness: line thickness. Default value is 4.
256
display_str_list_list: list of list of strings.
257
a list of strings for each bounding box.
258
The reason to pass a list of strings for a
259
bounding box is that it might contain
260
multiple labels.
261
262
Raises:
263
ValueError: if boxes is not a [N, 4] array
264
"""
265
image_pil = Image.fromarray(image)
266
draw_bounding_boxes_on_image(image_pil, boxes, color, thickness,
267
display_str_list_list)
268
np.copyto(image, np.array(image_pil))
269
270
271
def draw_bounding_boxes_on_image(image,
272
boxes,
273
color='red',
274
thickness=4,
275
display_str_list_list=()):
276
"""Draws bounding boxes on image.
277
278
Args:
279
image: a PIL.Image object.
280
boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
281
The coordinates are in normalized format between [0, 1].
282
color: color to draw bounding box. Default is red.
283
thickness: line thickness. Default value is 4.
284
display_str_list_list: list of list of strings.
285
a list of strings for each bounding box.
286
The reason to pass a list of strings for a
287
bounding box is that it might contain
288
multiple labels.
289
290
Raises:
291
ValueError: if boxes is not a [N, 4] array
292
"""
293
boxes_shape = boxes.shape
294
if not boxes_shape:
295
return
296
if len(boxes_shape) != 2 or boxes_shape[1] != 4:
297
raise ValueError('Input must be of size [N, 4]')
298
for i in range(boxes_shape[0]):
299
display_str_list = ()
300
if display_str_list_list:
301
display_str_list = display_str_list_list[i]
302
draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
303
boxes[i, 3], color, thickness, display_str_list)
304
305
306
def create_visualization_fn(category_index,
307
include_masks=False,
308
include_keypoints=False,
309
include_keypoint_scores=False,
310
include_track_ids=False,
311
**kwargs):
312
"""Constructs a visualization function that can be wrapped in a py_func.
313
314
py_funcs only accept positional arguments. This function returns a suitable
315
function with the correct positional argument mapping. The positional
316
arguments in order are:
317
0: image
318
1: boxes
319
2: classes
320
3: scores
321
[4]: masks (optional)
322
[4-5]: keypoints (optional)
323
[4-6]: keypoint_scores (optional)
324
[4-7]: track_ids (optional)
325
326
-- Example 1 --
327
vis_only_masks_fn = create_visualization_fn(category_index,
328
include_masks=True, include_keypoints=False, include_track_ids=False,
329
**kwargs)
330
image = tf.py_func(vis_only_masks_fn,
331
inp=[image, boxes, classes, scores, masks],
332
Tout=tf.uint8)
333
334
-- Example 2 --
335
vis_masks_and_track_ids_fn = create_visualization_fn(category_index,
336
include_masks=True, include_keypoints=False, include_track_ids=True,
337
**kwargs)
338
image = tf.py_func(vis_masks_and_track_ids_fn,
339
inp=[image, boxes, classes, scores, masks, track_ids],
340
Tout=tf.uint8)
341
342
Args:
343
category_index: a dict that maps integer ids to category dicts. e.g.
344
{1: {1: 'dog'}, 2: {2: 'cat'}, ...}
345
include_masks: Whether masks should be expected as a positional argument in
346
the returned function.
347
include_keypoints: Whether keypoints should be expected as a positional
348
argument in the returned function.
349
include_keypoint_scores: Whether keypoint scores should be expected as a
350
positional argument in the returned function.
351
include_track_ids: Whether track ids should be expected as a positional
352
argument in the returned function.
353
**kwargs: Additional kwargs that will be passed to
354
visualize_boxes_and_labels_on_image_array.
355
356
Returns:
357
Returns a function that only takes tensors as positional arguments.
358
"""
359
360
def visualization_py_func_fn(*args):
361
"""Visualization function that can be wrapped in a tf.py_func.
362
363
Args:
364
*args: First 4 positional arguments must be:
365
image - uint8 numpy array with shape (img_height, img_width, 3).
366
boxes - a numpy array of shape [N, 4].
367
classes - a numpy array of shape [N].
368
scores - a numpy array of shape [N] or None.
369
-- Optional positional arguments --
370
instance_masks - a numpy array of shape [N, image_height, image_width].
371
keypoints - a numpy array of shape [N, num_keypoints, 2].
372
keypoint_scores - a numpy array of shape [N, num_keypoints].
373
track_ids - a numpy array of shape [N] with unique track ids.
374
375
Returns:
376
uint8 numpy array with shape (img_height, img_width, 3) with overlaid
377
boxes.
378
"""
379
image = args[0]
380
boxes = args[1]
381
classes = args[2]
382
scores = args[3]
383
masks = keypoints = keypoint_scores = track_ids = None
384
pos_arg_ptr = 4 # Positional argument for first optional tensor (masks).
385
if include_masks:
386
masks = args[pos_arg_ptr]
387
pos_arg_ptr += 1
388
if include_keypoints:
389
keypoints = args[pos_arg_ptr]
390
pos_arg_ptr += 1
391
if include_keypoint_scores:
392
keypoint_scores = args[pos_arg_ptr]
393
pos_arg_ptr += 1
394
if include_track_ids:
395
track_ids = args[pos_arg_ptr]
396
397
return visualize_boxes_and_labels_on_image_array(
398
image,
399
boxes,
400
classes,
401
scores,
402
category_index=category_index,
403
instance_masks=masks,
404
keypoints=keypoints,
405
keypoint_scores=keypoint_scores,
406
track_ids=track_ids,
407
**kwargs)
408
return visualization_py_func_fn
409
410
411
def draw_heatmaps_on_image(image, heatmaps):
412
"""Draws heatmaps on an image.
413
414
The heatmaps are handled channel by channel and different colors are used to
415
paint different heatmap channels.
416
417
Args:
418
image: a PIL.Image object.
419
heatmaps: a numpy array with shape [image_height, image_width, channel].
420
Note that the image_height and image_width should match the size of input
421
image.
422
"""
423
draw = ImageDraw.Draw(image)
424
channel = heatmaps.shape[2]
425
for c in range(channel):
426
heatmap = heatmaps[:, :, c] * 255
427
heatmap = heatmap.astype('uint8')
428
bitmap = Image.fromarray(heatmap, 'L')
429
bitmap.convert('1')
430
draw.bitmap(
431
xy=[(0, 0)],
432
bitmap=bitmap,
433
fill=STANDARD_COLORS[c])
434
435
436
def draw_heatmaps_on_image_array(image, heatmaps):
437
"""Overlays heatmaps to an image (numpy array).
438
439
The function overlays the heatmaps on top of image. The heatmap values will be
440
painted with different colors depending on the channels. Similar to
441
"draw_heatmaps_on_image_array" function except the inputs are numpy arrays.
442
443
Args:
444
image: a numpy array with shape [height, width, 3].
445
heatmaps: a numpy array with shape [height, width, channel].
446
447
Returns:
448
An uint8 numpy array representing the input image painted with heatmap
449
colors.
450
"""
451
if not isinstance(image, np.ndarray):
452
image = image.numpy()
453
if not isinstance(heatmaps, np.ndarray):
454
heatmaps = heatmaps.numpy()
455
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
456
draw_heatmaps_on_image(image_pil, heatmaps)
457
return np.array(image_pil)
458
459
460
def draw_heatmaps_on_image_tensors(images,
461
heatmaps,
462
apply_sigmoid=False):
463
"""Draws heatmaps on batch of image tensors.
464
465
Args:
466
images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
467
channels will be ignored. If C = 1, then we convert the images to RGB
468
images.
469
heatmaps: [N, h, w, channel] float32 tensor of heatmaps. Note that the
470
heatmaps will be resized to match the input image size before overlaying
471
the heatmaps with input images. Theoretically the heatmap height width
472
should have the same aspect ratio as the input image to avoid potential
473
misalignment introduced by the image resize.
474
apply_sigmoid: Whether to apply a sigmoid layer on top of the heatmaps. If
475
the heatmaps come directly from the prediction logits, then we should
476
apply the sigmoid layer to make sure the values are in between [0.0, 1.0].
477
478
Returns:
479
4D image tensor of type uint8, with heatmaps overlaid on top.
480
"""
481
# Additional channels are being ignored.
482
if images.shape[3] > 3:
483
images = images[:, :, :, 0:3]
484
elif images.shape[3] == 1:
485
images = tf.image.grayscale_to_rgb(images)
486
487
_, height, width, _ = shape_utils.combined_static_and_dynamic_shape(images)
488
if apply_sigmoid:
489
heatmaps = tf.math.sigmoid(heatmaps)
490
resized_heatmaps = tf.image.resize(heatmaps, size=[height, width])
491
492
elems = [images, resized_heatmaps]
493
494
def draw_heatmaps(image_and_heatmaps):
495
"""Draws heatmaps on image."""
496
image_with_heatmaps = tf.py_function(
497
draw_heatmaps_on_image_array,
498
image_and_heatmaps,
499
tf.uint8)
500
return image_with_heatmaps
501
images = tf.map_fn(draw_heatmaps, elems, dtype=tf.uint8, back_prop=False)
502
return images
503
504
505
def _resize_original_image(image, image_shape):
506
image = tf.expand_dims(image, 0)
507
image = tf.image.resize_images(
508
image,
509
image_shape,
510
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
511
align_corners=True)
512
return tf.cast(tf.squeeze(image, 0), tf.uint8)
513
514
515
def draw_bounding_boxes_on_image_tensors(images,
516
boxes,
517
classes,
518
scores,
519
category_index,
520
original_image_spatial_shape=None,
521
true_image_shape=None,
522
instance_masks=None,
523
keypoints=None,
524
keypoint_scores=None,
525
keypoint_edges=None,
526
track_ids=None,
527
max_boxes_to_draw=20,
528
min_score_thresh=0.2,
529
use_normalized_coordinates=True):
530
"""Draws bounding boxes, masks, and keypoints on batch of image tensors.
531
532
Args:
533
images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
534
channels will be ignored. If C = 1, then we convert the images to RGB
535
images.
536
boxes: [N, max_detections, 4] float32 tensor of detection boxes.
537
classes: [N, max_detections] int tensor of detection classes. Note that
538
classes are 1-indexed.
539
scores: [N, max_detections] float32 tensor of detection scores.
540
category_index: a dict that maps integer ids to category dicts. e.g.
541
{1: {1: 'dog'}, 2: {2: 'cat'}, ...}
542
original_image_spatial_shape: [N, 2] tensor containing the spatial size of
543
the original image.
544
true_image_shape: [N, 3] tensor containing the spatial size of unpadded
545
original_image.
546
instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
547
instance masks.
548
keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
549
with keypoints.
550
keypoint_scores: A 3D float32 tensor of shape [N, max_detection,
551
num_keypoints] with keypoint scores.
552
keypoint_edges: A list of tuples with keypoint indices that specify which
553
keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
554
edges from keypoint 0 to 1 and from keypoint 2 to 4.
555
track_ids: [N, max_detections] int32 tensor of unique tracks ids (i.e.
556
instance ids for each object). If provided, the color-coding of boxes is
557
dictated by these ids, and not classes.
558
max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
559
min_score_thresh: Minimum score threshold for visualization. Default 0.2.
560
use_normalized_coordinates: Whether to assume boxes and kepoints are in
561
normalized coordinates (as opposed to absolute coordiantes).
562
Default is True.
563
564
Returns:
565
4D image tensor of type uint8, with boxes drawn on top.
566
"""
567
# Additional channels are being ignored.
568
if images.shape[3] > 3:
569
images = images[:, :, :, 0:3]
570
elif images.shape[3] == 1:
571
images = tf.image.grayscale_to_rgb(images)
572
visualization_keyword_args = {
573
'use_normalized_coordinates': use_normalized_coordinates,
574
'max_boxes_to_draw': max_boxes_to_draw,
575
'min_score_thresh': min_score_thresh,
576
'agnostic_mode': False,
577
'line_thickness': 4,
578
'keypoint_edges': keypoint_edges
579
}
580
if true_image_shape is None:
581
true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3])
582
else:
583
true_shapes = true_image_shape
584
if original_image_spatial_shape is None:
585
original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2])
586
else:
587
original_shapes = original_image_spatial_shape
588
589
visualize_boxes_fn = create_visualization_fn(
590
category_index,
591
include_masks=instance_masks is not None,
592
include_keypoints=keypoints is not None,
593
include_keypoint_scores=keypoint_scores is not None,
594
include_track_ids=track_ids is not None,
595
**visualization_keyword_args)
596
597
elems = [true_shapes, original_shapes, images, boxes, classes, scores]
598
if instance_masks is not None:
599
elems.append(instance_masks)
600
if keypoints is not None:
601
elems.append(keypoints)
602
if keypoint_scores is not None:
603
elems.append(keypoint_scores)
604
if track_ids is not None:
605
elems.append(track_ids)
606
607
def draw_boxes(image_and_detections):
608
"""Draws boxes on image."""
609
true_shape = image_and_detections[0]
610
original_shape = image_and_detections[1]
611
if true_image_shape is not None:
612
image = shape_utils.pad_or_clip_nd(image_and_detections[2],
613
[true_shape[0], true_shape[1], 3])
614
if original_image_spatial_shape is not None:
615
image_and_detections[2] = _resize_original_image(image, original_shape)
616
617
image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:],
618
tf.uint8)
619
return image_with_boxes
620
621
images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False)
622
return images
623
624
625
def draw_side_by_side_evaluation_image(eval_dict,
626
category_index,
627
max_boxes_to_draw=20,
628
min_score_thresh=0.2,
629
use_normalized_coordinates=True,
630
keypoint_edges=None):
631
"""Creates a side-by-side image with detections and groundtruth.
632
633
Bounding boxes (and instance masks, if available) are visualized on both
634
subimages.
635
636
Args:
637
eval_dict: The evaluation dictionary returned by
638
eval_util.result_dict_for_batched_example() or
639
eval_util.result_dict_for_single_example().
640
category_index: A category index (dictionary) produced from a labelmap.
641
max_boxes_to_draw: The maximum number of boxes to draw for detections.
642
min_score_thresh: The minimum score threshold for showing detections.
643
use_normalized_coordinates: Whether to assume boxes and keypoints are in
644
normalized coordinates (as opposed to absolute coordinates).
645
Default is True.
646
keypoint_edges: A list of tuples with keypoint indices that specify which
647
keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
648
edges from keypoint 0 to 1 and from keypoint 2 to 4.
649
650
Returns:
651
A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left
652
corresponds to detections, while the subimage on the right corresponds to
653
groundtruth.
654
"""
655
detection_fields = fields.DetectionResultFields()
656
input_data_fields = fields.InputDataFields()
657
658
images_with_detections_list = []
659
660
# Add the batch dimension if the eval_dict is for single example.
661
if len(eval_dict[detection_fields.detection_classes].shape) == 1:
662
for key in eval_dict:
663
if (key != input_data_fields.original_image and
664
key != input_data_fields.image_additional_channels):
665
eval_dict[key] = tf.expand_dims(eval_dict[key], 0)
666
667
for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
668
instance_masks = None
669
if detection_fields.detection_masks in eval_dict:
670
instance_masks = tf.cast(
671
tf.expand_dims(
672
eval_dict[detection_fields.detection_masks][indx], axis=0),
673
tf.uint8)
674
keypoints = None
675
keypoint_scores = None
676
if detection_fields.detection_keypoints in eval_dict:
677
keypoints = tf.expand_dims(
678
eval_dict[detection_fields.detection_keypoints][indx], axis=0)
679
if detection_fields.detection_keypoint_scores in eval_dict:
680
keypoint_scores = tf.expand_dims(
681
eval_dict[detection_fields.detection_keypoint_scores][indx], axis=0)
682
else:
683
keypoint_scores = tf.cast(keypoint_ops.set_keypoint_visibilities(
684
keypoints), dtype=tf.float32)
685
686
groundtruth_instance_masks = None
687
if input_data_fields.groundtruth_instance_masks in eval_dict:
688
groundtruth_instance_masks = tf.cast(
689
tf.expand_dims(
690
eval_dict[input_data_fields.groundtruth_instance_masks][indx],
691
axis=0), tf.uint8)
692
groundtruth_keypoints = None
693
groundtruth_keypoint_scores = None
694
gt_kpt_vis_fld = input_data_fields.groundtruth_keypoint_visibilities
695
if input_data_fields.groundtruth_keypoints in eval_dict:
696
groundtruth_keypoints = tf.expand_dims(
697
eval_dict[input_data_fields.groundtruth_keypoints][indx], axis=0)
698
if gt_kpt_vis_fld in eval_dict:
699
groundtruth_keypoint_scores = tf.expand_dims(
700
tf.cast(eval_dict[gt_kpt_vis_fld][indx], dtype=tf.float32), axis=0)
701
else:
702
groundtruth_keypoint_scores = tf.cast(
703
keypoint_ops.set_keypoint_visibilities(
704
groundtruth_keypoints), dtype=tf.float32)
705
706
images_with_detections = draw_bounding_boxes_on_image_tensors(
707
tf.expand_dims(
708
eval_dict[input_data_fields.original_image][indx], axis=0),
709
tf.expand_dims(
710
eval_dict[detection_fields.detection_boxes][indx], axis=0),
711
tf.expand_dims(
712
eval_dict[detection_fields.detection_classes][indx], axis=0),
713
tf.expand_dims(
714
eval_dict[detection_fields.detection_scores][indx], axis=0),
715
category_index,
716
original_image_spatial_shape=tf.expand_dims(
717
eval_dict[input_data_fields.original_image_spatial_shape][indx],
718
axis=0),
719
true_image_shape=tf.expand_dims(
720
eval_dict[input_data_fields.true_image_shape][indx], axis=0),
721
instance_masks=instance_masks,
722
keypoints=keypoints,
723
keypoint_scores=keypoint_scores,
724
keypoint_edges=keypoint_edges,
725
max_boxes_to_draw=max_boxes_to_draw,
726
min_score_thresh=min_score_thresh,
727
use_normalized_coordinates=use_normalized_coordinates)
728
images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
729
tf.expand_dims(
730
eval_dict[input_data_fields.original_image][indx], axis=0),
731
tf.expand_dims(
732
eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
733
tf.expand_dims(
734
eval_dict[input_data_fields.groundtruth_classes][indx], axis=0),
735
tf.expand_dims(
736
tf.ones_like(
737
eval_dict[input_data_fields.groundtruth_classes][indx],
738
dtype=tf.float32),
739
axis=0),
740
category_index,
741
original_image_spatial_shape=tf.expand_dims(
742
eval_dict[input_data_fields.original_image_spatial_shape][indx],
743
axis=0),
744
true_image_shape=tf.expand_dims(
745
eval_dict[input_data_fields.true_image_shape][indx], axis=0),
746
instance_masks=groundtruth_instance_masks,
747
keypoints=groundtruth_keypoints,
748
keypoint_scores=groundtruth_keypoint_scores,
749
keypoint_edges=keypoint_edges,
750
max_boxes_to_draw=None,
751
min_score_thresh=0.0,
752
use_normalized_coordinates=use_normalized_coordinates)
753
images_to_visualize = tf.concat([images_with_detections,
754
images_with_groundtruth], axis=2)
755
756
if input_data_fields.image_additional_channels in eval_dict:
757
images_with_additional_channels_groundtruth = (
758
draw_bounding_boxes_on_image_tensors(
759
tf.expand_dims(
760
eval_dict[input_data_fields.image_additional_channels][indx],
761
axis=0),
762
tf.expand_dims(
763
eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
764
tf.expand_dims(
765
eval_dict[input_data_fields.groundtruth_classes][indx],
766
axis=0),
767
tf.expand_dims(
768
tf.ones_like(
769
eval_dict[input_data_fields.groundtruth_classes][indx],
770
dtype=tf.float32),
771
axis=0),
772
category_index,
773
original_image_spatial_shape=tf.expand_dims(
774
eval_dict[input_data_fields.original_image_spatial_shape]
775
[indx],
776
axis=0),
777
true_image_shape=tf.expand_dims(
778
eval_dict[input_data_fields.true_image_shape][indx], axis=0),
779
instance_masks=groundtruth_instance_masks,
780
keypoints=None,
781
keypoint_edges=None,
782
max_boxes_to_draw=None,
783
min_score_thresh=0.0,
784
use_normalized_coordinates=use_normalized_coordinates))
785
images_to_visualize = tf.concat(
786
[images_to_visualize, images_with_additional_channels_groundtruth],
787
axis=2)
788
images_with_detections_list.append(images_to_visualize)
789
790
return images_with_detections_list
791
792
793
def draw_densepose_visualizations(eval_dict,
794
max_boxes_to_draw=20,
795
min_score_thresh=0.2,
796
num_parts=24,
797
dp_coord_to_visualize=0):
798
"""Draws DensePose visualizations.
799
800
Args:
801
eval_dict: The evaluation dictionary returned by
802
eval_util.result_dict_for_batched_example().
803
max_boxes_to_draw: The maximum number of boxes to draw for detections.
804
min_score_thresh: The minimum score threshold for showing detections.
805
num_parts: The number of different densepose parts.
806
dp_coord_to_visualize: Whether to visualize v-coordinates (0) or
807
u-coordinates (0) overlaid on the person masks.
808
809
Returns:
810
A list of [1, H, W, C] uint8 tensor, each element corresponding to an image
811
in the batch.
812
813
Raises:
814
ValueError: If `dp_coord_to_visualize` is not 0 or 1.
815
"""
816
if dp_coord_to_visualize not in (0, 1):
817
raise ValueError('`dp_coord_to_visualize` must be either 0 for v '
818
'coordinates), or 1 for u coordinates, but instead got '
819
'{}'.format(dp_coord_to_visualize))
820
detection_fields = fields.DetectionResultFields()
821
input_data_fields = fields.InputDataFields()
822
823
if detection_fields.detection_masks not in eval_dict:
824
raise ValueError('Expected `detection_masks` in `eval_dict`.')
825
if detection_fields.detection_surface_coords not in eval_dict:
826
raise ValueError('Expected `detection_surface_coords` in `eval_dict`.')
827
828
images_with_detections_list = []
829
for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
830
# Note that detection masks have already been resized to the original image
831
# shapes, but `original_image` has not.
832
# TODO(ronnyvotel): Consider resizing `original_image` in
833
# eval_util.result_dict_for_batched_example().
834
true_shape = eval_dict[input_data_fields.true_image_shape][indx]
835
original_shape = eval_dict[
836
input_data_fields.original_image_spatial_shape][indx]
837
image = eval_dict[input_data_fields.original_image][indx]
838
image = shape_utils.pad_or_clip_nd(image, [true_shape[0], true_shape[1], 3])
839
image = _resize_original_image(image, original_shape)
840
841
scores = eval_dict[detection_fields.detection_scores][indx]
842
detection_masks = eval_dict[detection_fields.detection_masks][indx]
843
surface_coords = eval_dict[detection_fields.detection_surface_coords][indx]
844
845
def draw_densepose_py_func(image, detection_masks, surface_coords, scores):
846
"""Overlays part masks and surface coords on original images."""
847
surface_coord_image = np.copy(image)
848
for i, (score, surface_coord, mask) in enumerate(
849
zip(scores, surface_coords, detection_masks)):
850
if i == max_boxes_to_draw:
851
break
852
if score > min_score_thresh:
853
draw_part_mask_on_image_array(image, mask, num_parts=num_parts)
854
draw_float_channel_on_image_array(
855
surface_coord_image, surface_coord[:, :, dp_coord_to_visualize],
856
mask)
857
return np.concatenate([image, surface_coord_image], axis=1)
858
859
image_with_densepose = tf.py_func(
860
draw_densepose_py_func,
861
[image, detection_masks, surface_coords, scores],
862
tf.uint8)
863
images_with_detections_list.append(
864
image_with_densepose[tf.newaxis, :, :, :])
865
return images_with_detections_list
866
867
868
def draw_keypoints_on_image_array(image,
869
keypoints,
870
keypoint_scores=None,
871
min_score_thresh=0.5,
872
color='red',
873
radius=2,
874
use_normalized_coordinates=True,
875
keypoint_edges=None,
876
keypoint_edge_color='green',
877
keypoint_edge_width=2):
878
"""Draws keypoints on an image (numpy array).
879
880
Args:
881
image: a numpy array with shape [height, width, 3].
882
keypoints: a numpy array with shape [num_keypoints, 2].
883
keypoint_scores: a numpy array with shape [num_keypoints]. If provided, only
884
those keypoints with a score above score_threshold will be visualized.
885
min_score_thresh: A scalar indicating the minimum keypoint score required
886
for a keypoint to be visualized. Note that keypoint_scores must be
887
provided for this threshold to take effect.
888
color: color to draw the keypoints with. Default is red.
889
radius: keypoint radius. Default value is 2.
890
use_normalized_coordinates: if True (default), treat keypoint values as
891
relative to the image. Otherwise treat them as absolute.
892
keypoint_edges: A list of tuples with keypoint indices that specify which
893
keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
894
edges from keypoint 0 to 1 and from keypoint 2 to 4.
895
keypoint_edge_color: color to draw the keypoint edges with. Default is red.
896
keypoint_edge_width: width of the edges drawn between keypoints. Default
897
value is 2.
898
"""
899
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
900
draw_keypoints_on_image(image_pil,
901
keypoints,
902
keypoint_scores=keypoint_scores,
903
min_score_thresh=min_score_thresh,
904
color=color,
905
radius=radius,
906
use_normalized_coordinates=use_normalized_coordinates,
907
keypoint_edges=keypoint_edges,
908
keypoint_edge_color=keypoint_edge_color,
909
keypoint_edge_width=keypoint_edge_width)
910
np.copyto(image, np.array(image_pil))
911
912
913
def draw_keypoints_on_image(image,
914
keypoints,
915
keypoint_scores=None,
916
min_score_thresh=0.5,
917
color='red',
918
radius=2,
919
use_normalized_coordinates=True,
920
keypoint_edges=None,
921
keypoint_edge_color='green',
922
keypoint_edge_width=2):
923
"""Draws keypoints on an image.
924
925
Args:
926
image: a PIL.Image object.
927
keypoints: a numpy array with shape [num_keypoints, 2].
928
keypoint_scores: a numpy array with shape [num_keypoints].
929
min_score_thresh: a score threshold for visualizing keypoints. Only used if
930
keypoint_scores is provided.
931
color: color to draw the keypoints with. Default is red.
932
radius: keypoint radius. Default value is 2.
933
use_normalized_coordinates: if True (default), treat keypoint values as
934
relative to the image. Otherwise treat them as absolute.
935
keypoint_edges: A list of tuples with keypoint indices that specify which
936
keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
937
edges from keypoint 0 to 1 and from keypoint 2 to 4.
938
keypoint_edge_color: color to draw the keypoint edges with. Default is red.
939
keypoint_edge_width: width of the edges drawn between keypoints. Default
940
value is 2.
941
"""
942
draw = ImageDraw.Draw(image)
943
im_width, im_height = image.size
944
keypoints = np.array(keypoints)
945
keypoints_x = [k[1] for k in keypoints]
946
keypoints_y = [k[0] for k in keypoints]
947
if use_normalized_coordinates:
948
keypoints_x = tuple([im_width * x for x in keypoints_x])
949
keypoints_y = tuple([im_height * y for y in keypoints_y])
950
if keypoint_scores is not None:
951
keypoint_scores = np.array(keypoint_scores)
952
valid_kpt = np.greater(keypoint_scores, min_score_thresh)
953
else:
954
valid_kpt = np.where(np.any(np.isnan(keypoints), axis=1),
955
np.zeros_like(keypoints[:, 0]),
956
np.ones_like(keypoints[:, 0]))
957
valid_kpt = [v for v in valid_kpt]
958
959
for keypoint_x, keypoint_y, valid in zip(keypoints_x, keypoints_y, valid_kpt):
960
if valid:
961
draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
962
(keypoint_x + radius, keypoint_y + radius)],
963
outline=color, fill=color)
964
if keypoint_edges is not None:
965
for keypoint_start, keypoint_end in keypoint_edges:
966
if (keypoint_start < 0 or keypoint_start >= len(keypoints) or
967
keypoint_end < 0 or keypoint_end >= len(keypoints)):
968
continue
969
if not (valid_kpt[keypoint_start] and valid_kpt[keypoint_end]):
970
continue
971
edge_coordinates = [
972
keypoints_x[keypoint_start], keypoints_y[keypoint_start],
973
keypoints_x[keypoint_end], keypoints_y[keypoint_end]
974
]
975
draw.line(
976
edge_coordinates, fill=keypoint_edge_color, width=keypoint_edge_width)
977
978
979
def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
980
"""Draws mask on an image.
981
982
Args:
983
image: uint8 numpy array with shape (img_height, img_height, 3)
984
mask: a uint8 numpy array of shape (img_height, img_height) with
985
values between either 0 or 1.
986
color: color to draw the keypoints with. Default is red.
987
alpha: transparency value between 0 and 1. (default: 0.4)
988
989
Raises:
990
ValueError: On incorrect data type for image or masks.
991
"""
992
if image.dtype != np.uint8:
993
raise ValueError('`image` not of type np.uint8')
994
if mask.dtype != np.uint8:
995
raise ValueError('`mask` not of type np.uint8')
996
if image.shape[:2] != mask.shape:
997
raise ValueError('The image has spatial dimensions %s but the mask has '
998
'dimensions %s' % (image.shape[:2], mask.shape))
999
rgb = ImageColor.getrgb(color)
1000
pil_image = Image.fromarray(image)
1001
1002
solid_color = np.expand_dims(
1003
np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
1004
pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
1005
pil_mask = Image.fromarray(np.uint8(255.0*alpha*(mask > 0))).convert('L')
1006
pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
1007
np.copyto(image, np.array(pil_image.convert('RGB')))
1008
1009
1010
def draw_part_mask_on_image_array(image, mask, alpha=0.4, num_parts=24):
1011
"""Draws part mask on an image.
1012
1013
Args:
1014
image: uint8 numpy array with shape (img_height, img_height, 3)
1015
mask: a uint8 numpy array of shape (img_height, img_height) with
1016
1-indexed parts (0 for background).
1017
alpha: transparency value between 0 and 1 (default: 0.4)
1018
num_parts: the maximum number of parts that may exist in the image (default
1019
24 for DensePose).
1020
1021
Raises:
1022
ValueError: On incorrect data type for image or masks.
1023
"""
1024
if image.dtype != np.uint8:
1025
raise ValueError('`image` not of type np.uint8')
1026
if mask.dtype != np.uint8:
1027
raise ValueError('`mask` not of type np.uint8')
1028
if image.shape[:2] != mask.shape:
1029
raise ValueError('The image has spatial dimensions %s but the mask has '
1030
'dimensions %s' % (image.shape[:2], mask.shape))
1031
1032
pil_image = Image.fromarray(image)
1033
part_colors = np.zeros_like(image)
1034
mask_1_channel = mask[:, :, np.newaxis]
1035
for i, color in enumerate(STANDARD_COLORS[:num_parts]):
1036
rgb = np.array(ImageColor.getrgb(color), dtype=np.uint8)
1037
part_colors += (mask_1_channel == i + 1) * rgb[np.newaxis, np.newaxis, :]
1038
pil_part_colors = Image.fromarray(np.uint8(part_colors)).convert('RGBA')
1039
pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L')
1040
pil_image = Image.composite(pil_part_colors, pil_image, pil_mask)
1041
np.copyto(image, np.array(pil_image.convert('RGB')))
1042
1043
1044
def draw_float_channel_on_image_array(image, channel, mask, alpha=0.9,
1045
cmap='YlGn'):
1046
"""Draws a floating point channel on an image array.
1047
1048
Args:
1049
image: uint8 numpy array with shape (img_height, img_height, 3)
1050
channel: float32 numpy array with shape (img_height, img_height). The values
1051
should be in the range [0, 1], and will be mapped to colors using the
1052
provided colormap `cmap` argument.
1053
mask: a uint8 numpy array of shape (img_height, img_height) with
1054
1-indexed parts (0 for background).
1055
alpha: transparency value between 0 and 1 (default: 0.9)
1056
cmap: string with the colormap to use.
1057
1058
Raises:
1059
ValueError: On incorrect data type for image or masks.
1060
"""
1061
if image.dtype != np.uint8:
1062
raise ValueError('`image` not of type np.uint8')
1063
if channel.dtype != np.float32:
1064
raise ValueError('`channel` not of type np.float32')
1065
if mask.dtype != np.uint8:
1066
raise ValueError('`mask` not of type np.uint8')
1067
if image.shape[:2] != channel.shape:
1068
raise ValueError('The image has spatial dimensions %s but the channel has '
1069
'dimensions %s' % (image.shape[:2], channel.shape))
1070
if image.shape[:2] != mask.shape:
1071
raise ValueError('The image has spatial dimensions %s but the mask has '
1072
'dimensions %s' % (image.shape[:2], mask.shape))
1073
1074
cm = plt.get_cmap(cmap)
1075
pil_image = Image.fromarray(image)
1076
colored_channel = cm(channel)[:, :, :3]
1077
pil_colored_channel = Image.fromarray(
1078
np.uint8(colored_channel * 255)).convert('RGBA')
1079
pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L')
1080
pil_image = Image.composite(pil_colored_channel, pil_image, pil_mask)
1081
np.copyto(image, np.array(pil_image.convert('RGB')))
1082
1083
1084
def visualize_boxes_and_labels_on_image_array(
1085
image,
1086
boxes,
1087
classes,
1088
scores,
1089
category_index,
1090
instance_masks=None,
1091
instance_boundaries=None,
1092
keypoints=None,
1093
keypoint_scores=None,
1094
keypoint_edges=None,
1095
track_ids=None,
1096
use_normalized_coordinates=False,
1097
max_boxes_to_draw=20,
1098
min_score_thresh=.5,
1099
agnostic_mode=False,
1100
line_thickness=4,
1101
groundtruth_box_visualization_color='black',
1102
skip_boxes=False,
1103
skip_scores=False,
1104
skip_labels=False,
1105
skip_track_ids=False):
1106
"""Overlay labeled boxes on an image with formatted scores and label names.
1107
1108
This function groups boxes that correspond to the same location
1109
and creates a display string for each detection and overlays these
1110
on the image. Note that this function modifies the image in place, and returns
1111
that same image.
1112
1113
Args:
1114
image: uint8 numpy array with shape (img_height, img_width, 3)
1115
boxes: a numpy array of shape [N, 4]
1116
classes: a numpy array of shape [N]. Note that class indices are 1-based,
1117
and match the keys in the label map.
1118
scores: a numpy array of shape [N] or None. If scores=None, then
1119
this function assumes that the boxes to be plotted are groundtruth
1120
boxes and plot all boxes as black with no classes or scores.
1121
category_index: a dict containing category dictionaries (each holding
1122
category index `id` and category name `name`) keyed by category indices.
1123
instance_masks: a uint8 numpy array of shape [N, image_height, image_width],
1124
can be None.
1125
instance_boundaries: a numpy array of shape [N, image_height, image_width]
1126
with values ranging between 0 and 1, can be None.
1127
keypoints: a numpy array of shape [N, num_keypoints, 2], can
1128
be None.
1129
keypoint_scores: a numpy array of shape [N, num_keypoints], can be None.
1130
keypoint_edges: A list of tuples with keypoint indices that specify which
1131
keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
1132
edges from keypoint 0 to 1 and from keypoint 2 to 4.
1133
track_ids: a numpy array of shape [N] with unique track ids. If provided,
1134
color-coding of boxes will be determined by these ids, and not the class
1135
indices.
1136
use_normalized_coordinates: whether boxes is to be interpreted as
1137
normalized coordinates or not.
1138
max_boxes_to_draw: maximum number of boxes to visualize. If None, draw
1139
all boxes.
1140
min_score_thresh: minimum score threshold for a box or keypoint to be
1141
visualized.
1142
agnostic_mode: boolean (default: False) controlling whether to evaluate in
1143
class-agnostic mode or not. This mode will display scores but ignore
1144
classes.
1145
line_thickness: integer (default: 4) controlling line width of the boxes.
1146
groundtruth_box_visualization_color: box color for visualizing groundtruth
1147
boxes
1148
skip_boxes: whether to skip the drawing of bounding boxes.
1149
skip_scores: whether to skip score when drawing a single detection
1150
skip_labels: whether to skip label when drawing a single detection
1151
skip_track_ids: whether to skip track id when drawing a single detection
1152
1153
Returns:
1154
uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
1155
"""
1156
# Create a display string (and color) for every box location, group any boxes
1157
# that correspond to the same location.
1158
box_to_display_str_map = collections.defaultdict(list)
1159
box_to_color_map = collections.defaultdict(str)
1160
box_to_instance_masks_map = {}
1161
box_to_instance_boundaries_map = {}
1162
box_to_keypoints_map = collections.defaultdict(list)
1163
box_to_keypoint_scores_map = collections.defaultdict(list)
1164
box_to_track_ids_map = {}
1165
if not max_boxes_to_draw:
1166
max_boxes_to_draw = boxes.shape[0]
1167
for i in range(boxes.shape[0]):
1168
if max_boxes_to_draw == len(box_to_color_map):
1169
break
1170
if scores is None or scores[i] > min_score_thresh:
1171
box = tuple(boxes[i].tolist())
1172
if instance_masks is not None:
1173
box_to_instance_masks_map[box] = instance_masks[i]
1174
if instance_boundaries is not None:
1175
box_to_instance_boundaries_map[box] = instance_boundaries[i]
1176
if keypoints is not None:
1177
box_to_keypoints_map[box].extend(keypoints[i])
1178
if keypoint_scores is not None:
1179
box_to_keypoint_scores_map[box].extend(keypoint_scores[i])
1180
if track_ids is not None:
1181
box_to_track_ids_map[box] = track_ids[i]
1182
if scores is None:
1183
box_to_color_map[box] = groundtruth_box_visualization_color
1184
else:
1185
display_str = ''
1186
if not skip_labels:
1187
if not agnostic_mode:
1188
if classes[i] in six.viewkeys(category_index):
1189
class_name = category_index[classes[i]]['name']
1190
else:
1191
class_name = 'N/A'
1192
display_str = str(class_name)
1193
if not skip_scores:
1194
if not display_str:
1195
display_str = '{}%'.format(round(100*scores[i]))
1196
else:
1197
display_str = '{}: {}%'.format(display_str, round(100*scores[i]))
1198
if not skip_track_ids and track_ids is not None:
1199
if not display_str:
1200
display_str = 'ID {}'.format(track_ids[i])
1201
else:
1202
display_str = '{}: ID {}'.format(display_str, track_ids[i])
1203
box_to_display_str_map[box].append(display_str)
1204
if agnostic_mode:
1205
box_to_color_map[box] = 'DarkOrange'
1206
elif track_ids is not None:
1207
prime_multipler = _get_multiplier_for_color_randomness()
1208
box_to_color_map[box] = STANDARD_COLORS[
1209
(prime_multipler * track_ids[i]) % len(STANDARD_COLORS)]
1210
else:
1211
box_to_color_map[box] = STANDARD_COLORS[
1212
classes[i] % len(STANDARD_COLORS)]
1213
1214
# Draw all boxes onto image.
1215
for box, color in box_to_color_map.items():
1216
ymin, xmin, ymax, xmax = box
1217
if instance_masks is not None:
1218
draw_mask_on_image_array(
1219
image,
1220
box_to_instance_masks_map[box],
1221
color=color
1222
)
1223
if instance_boundaries is not None:
1224
draw_mask_on_image_array(
1225
image,
1226
box_to_instance_boundaries_map[box],
1227
color='red',
1228
alpha=1.0
1229
)
1230
draw_bounding_box_on_image_array(
1231
image,
1232
ymin,
1233
xmin,
1234
ymax,
1235
xmax,
1236
color=color,
1237
thickness=0 if skip_boxes else line_thickness,
1238
display_str_list=box_to_display_str_map[box],
1239
use_normalized_coordinates=use_normalized_coordinates)
1240
if keypoints is not None:
1241
keypoint_scores_for_box = None
1242
if box_to_keypoint_scores_map:
1243
keypoint_scores_for_box = box_to_keypoint_scores_map[box]
1244
draw_keypoints_on_image_array(
1245
image,
1246
box_to_keypoints_map[box],
1247
keypoint_scores_for_box,
1248
min_score_thresh=min_score_thresh,
1249
color=color,
1250
radius=line_thickness / 2,
1251
use_normalized_coordinates=use_normalized_coordinates,
1252
keypoint_edges=keypoint_edges,
1253
keypoint_edge_color=color,
1254
keypoint_edge_width=line_thickness // 2)
1255
1256
return image
1257
1258
1259
def add_cdf_image_summary(values, name):
1260
"""Adds a tf.summary.image for a CDF plot of the values.
1261
1262
Normalizes `values` such that they sum to 1, plots the cumulative distribution
1263
function and creates a tf image summary.
1264
1265
Args:
1266
values: a 1-D float32 tensor containing the values.
1267
name: name for the image summary.
1268
"""
1269
def cdf_plot(values):
1270
"""Numpy function to plot CDF."""
1271
normalized_values = values / np.sum(values)
1272
sorted_values = np.sort(normalized_values)
1273
cumulative_values = np.cumsum(sorted_values)
1274
fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32)
1275
/ cumulative_values.size)
1276
fig = plt.figure(frameon=False)
1277
ax = fig.add_subplot('111')
1278
ax.plot(fraction_of_examples, cumulative_values)
1279
ax.set_ylabel('cumulative normalized values')
1280
ax.set_xlabel('fraction of examples')
1281
fig.canvas.draw()
1282
width, height = fig.get_size_inches() * fig.get_dpi()
1283
image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
1284
1, int(height), int(width), 3)
1285
return image
1286
cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
1287
tf.summary.image(name, cdf_plot)
1288
1289
1290
def add_hist_image_summary(values, bins, name):
1291
"""Adds a tf.summary.image for a histogram plot of the values.
1292
1293
Plots the histogram of values and creates a tf image summary.
1294
1295
Args:
1296
values: a 1-D float32 tensor containing the values.
1297
bins: bin edges which will be directly passed to np.histogram.
1298
name: name for the image summary.
1299
"""
1300
1301
def hist_plot(values, bins):
1302
"""Numpy function to plot hist."""
1303
fig = plt.figure(frameon=False)
1304
ax = fig.add_subplot('111')
1305
y, x = np.histogram(values, bins=bins)
1306
ax.plot(x[:-1], y)
1307
ax.set_ylabel('count')
1308
ax.set_xlabel('value')
1309
fig.canvas.draw()
1310
width, height = fig.get_size_inches() * fig.get_dpi()
1311
image = np.fromstring(
1312
fig.canvas.tostring_rgb(), dtype='uint8').reshape(
1313
1, int(height), int(width), 3)
1314
return image
1315
hist_plot = tf.py_func(hist_plot, [values, bins], tf.uint8)
1316
tf.summary.image(name, hist_plot)
1317
1318
1319
class EvalMetricOpsVisualization(six.with_metaclass(abc.ABCMeta, object)):
1320
"""Abstract base class responsible for visualizations during evaluation.
1321
1322
Currently, summary images are not run during evaluation. One way to produce
1323
evaluation images in Tensorboard is to provide tf.summary.image strings as
1324
`value_ops` in tf.estimator.EstimatorSpec's `eval_metric_ops`. This class is
1325
responsible for accruing images (with overlaid detections and groundtruth)
1326
and returning a dictionary that can be passed to `eval_metric_ops`.
1327
"""
1328
1329
def __init__(self,
1330
category_index,
1331
max_examples_to_draw=5,
1332
max_boxes_to_draw=20,
1333
min_score_thresh=0.2,
1334
use_normalized_coordinates=True,
1335
summary_name_prefix='evaluation_image',
1336
keypoint_edges=None):
1337
"""Creates an EvalMetricOpsVisualization.
1338
1339
Args:
1340
category_index: A category index (dictionary) produced from a labelmap.
1341
max_examples_to_draw: The maximum number of example summaries to produce.
1342
max_boxes_to_draw: The maximum number of boxes to draw for detections.
1343
min_score_thresh: The minimum score threshold for showing detections.
1344
use_normalized_coordinates: Whether to assume boxes and keypoints are in
1345
normalized coordinates (as opposed to absolute coordinates).
1346
Default is True.
1347
summary_name_prefix: A string prefix for each image summary.
1348
keypoint_edges: A list of tuples with keypoint indices that specify which
1349
keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
1350
edges from keypoint 0 to 1 and from keypoint 2 to 4.
1351
"""
1352
1353
self._category_index = category_index
1354
self._max_examples_to_draw = max_examples_to_draw
1355
self._max_boxes_to_draw = max_boxes_to_draw
1356
self._min_score_thresh = min_score_thresh
1357
self._use_normalized_coordinates = use_normalized_coordinates
1358
self._summary_name_prefix = summary_name_prefix
1359
self._keypoint_edges = keypoint_edges
1360
self._images = []
1361
1362
def clear(self):
1363
self._images = []
1364
1365
def add_images(self, images):
1366
"""Store a list of images, each with shape [1, H, W, C]."""
1367
if len(self._images) >= self._max_examples_to_draw:
1368
return
1369
1370
# Store images and clip list if necessary.
1371
self._images.extend(images)
1372
if len(self._images) > self._max_examples_to_draw:
1373
self._images[self._max_examples_to_draw:] = []
1374
1375
def get_estimator_eval_metric_ops(self, eval_dict):
1376
"""Returns metric ops for use in tf.estimator.EstimatorSpec.
1377
1378
Args:
1379
eval_dict: A dictionary that holds an image, groundtruth, and detections
1380
for a batched example. Note that, we use only the first example for
1381
visualization. See eval_util.result_dict_for_batched_example() for a
1382
convenient method for constructing such a dictionary. The dictionary
1383
contains
1384
fields.InputDataFields.original_image: [batch_size, H, W, 3] image.
1385
fields.InputDataFields.original_image_spatial_shape: [batch_size, 2]
1386
tensor containing the size of the original image.
1387
fields.InputDataFields.true_image_shape: [batch_size, 3]
1388
tensor containing the spatial size of the upadded original image.
1389
fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4]
1390
float32 tensor with groundtruth boxes in range [0.0, 1.0].
1391
fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes]
1392
int64 tensor with 1-indexed groundtruth classes.
1393
fields.InputDataFields.groundtruth_instance_masks - (optional)
1394
[batch_size, num_boxes, H, W] int64 tensor with instance masks.
1395
fields.InputDataFields.groundtruth_keypoints - (optional)
1396
[batch_size, num_boxes, num_keypoints, 2] float32 tensor with
1397
keypoint coordinates in format [y, x].
1398
fields.InputDataFields.groundtruth_keypoint_visibilities - (optional)
1399
[batch_size, num_boxes, num_keypoints] bool tensor with
1400
keypoint visibilities.
1401
fields.DetectionResultFields.detection_boxes - [batch_size,
1402
max_num_boxes, 4] float32 tensor with detection boxes in range [0.0,
1403
1.0].
1404
fields.DetectionResultFields.detection_classes - [batch_size,
1405
max_num_boxes] int64 tensor with 1-indexed detection classes.
1406
fields.DetectionResultFields.detection_scores - [batch_size,
1407
max_num_boxes] float32 tensor with detection scores.
1408
fields.DetectionResultFields.detection_masks - (optional) [batch_size,
1409
max_num_boxes, H, W] float32 tensor of binarized masks.
1410
fields.DetectionResultFields.detection_keypoints - (optional)
1411
[batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with
1412
keypoints.
1413
fields.DetectionResultFields.detection_keypoint_scores - (optional)
1414
[batch_size, max_num_boxes, num_keypoints] float32 tensor with
1415
keypoints scores.
1416
1417
Returns:
1418
A dictionary of image summary names to tuple of (value_op, update_op). The
1419
`update_op` is the same for all items in the dictionary, and is
1420
responsible for saving a single side-by-side image with detections and
1421
groundtruth. Each `value_op` holds the tf.summary.image string for a given
1422
image.
1423
"""
1424
if self._max_examples_to_draw == 0:
1425
return {}
1426
images = self.images_from_evaluation_dict(eval_dict)
1427
1428
def get_images():
1429
"""Returns a list of images, padded to self._max_images_to_draw."""
1430
images = self._images
1431
while len(images) < self._max_examples_to_draw:
1432
images.append(np.array(0, dtype=np.uint8))
1433
self.clear()
1434
return images
1435
1436
def image_summary_or_default_string(summary_name, image):
1437
"""Returns image summaries for non-padded elements."""
1438
return tf.cond(
1439
tf.equal(tf.size(tf.shape(image)), 4),
1440
lambda: tf.summary.image(summary_name, image),
1441
lambda: tf.constant(''))
1442
1443
if tf.executing_eagerly():
1444
update_op = self.add_images([[images[0]]])
1445
image_tensors = get_images()
1446
else:
1447
update_op = tf.py_func(self.add_images, [[images[0]]], [])
1448
image_tensors = tf.py_func(
1449
get_images, [], [tf.uint8] * self._max_examples_to_draw)
1450
eval_metric_ops = {}
1451
for i, image in enumerate(image_tensors):
1452
summary_name = self._summary_name_prefix + '/' + str(i)
1453
value_op = image_summary_or_default_string(summary_name, image)
1454
eval_metric_ops[summary_name] = (value_op, update_op)
1455
return eval_metric_ops
1456
1457
@abc.abstractmethod
1458
def images_from_evaluation_dict(self, eval_dict):
1459
"""Converts evaluation dictionary into a list of image tensors.
1460
1461
To be overridden by implementations.
1462
1463
Args:
1464
eval_dict: A dictionary with all the necessary information for producing
1465
visualizations.
1466
1467
Returns:
1468
A list of [1, H, W, C] uint8 tensors.
1469
"""
1470
raise NotImplementedError
1471
1472
1473
class VisualizeSingleFrameDetections(EvalMetricOpsVisualization):
1474
"""Class responsible for single-frame object detection visualizations."""
1475
1476
def __init__(self,
1477
category_index,
1478
max_examples_to_draw=5,
1479
max_boxes_to_draw=20,
1480
min_score_thresh=0.2,
1481
use_normalized_coordinates=True,
1482
summary_name_prefix='Detections_Left_Groundtruth_Right',
1483
keypoint_edges=None):
1484
super(VisualizeSingleFrameDetections, self).__init__(
1485
category_index=category_index,
1486
max_examples_to_draw=max_examples_to_draw,
1487
max_boxes_to_draw=max_boxes_to_draw,
1488
min_score_thresh=min_score_thresh,
1489
use_normalized_coordinates=use_normalized_coordinates,
1490
summary_name_prefix=summary_name_prefix,
1491
keypoint_edges=keypoint_edges)
1492
1493
def images_from_evaluation_dict(self, eval_dict):
1494
return draw_side_by_side_evaluation_image(eval_dict, self._category_index,
1495
self._max_boxes_to_draw,
1496
self._min_score_thresh,
1497
self._use_normalized_coordinates,
1498
self._keypoint_edges)
1499
1500