CoCalc -- visualization

GitHub Repository: vardanagarwal/Proctoring-AI
Path: blob/master/coco models/tflite mobnetv1 ssd/visualization_utils.py
⁴⁵⁵ views
1
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
# ==============================================================================
15

16
"""A set of functions that are used for visualization.
17

18
These functions often receive an image, perform some visualization on the image.
19
The functions do not return a value, instead they modify the image itself.
20

21
"""
22
from __future__ import absolute_import
23
from __future__ import division
24
from __future__ import print_function
25

26
import abc
27
import collections
28
# Set headless-friendly backend.
29
import matplotlib; matplotlib.use('Agg')  # pylint: disable=multiple-statements
30
import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
31
import numpy as np
32
import PIL.Image as Image
33
import PIL.ImageColor as ImageColor
34
import PIL.ImageDraw as ImageDraw
35
import PIL.ImageFont as ImageFont
36
import six
37
from six.moves import range
38
from six.moves import zip
39
import tensorflow.compat.v1 as tf
40

41
from object_detection.core import keypoint_ops
42
from object_detection.core import standard_fields as fields
43
from object_detection.utils import shape_utils
44

45
_TITLE_LEFT_MARGIN = 10
46
_TITLE_TOP_MARGIN = 10
47
STANDARD_COLORS = [
48
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
49
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
50
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
51
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
52
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
53
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
54
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
55
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
56
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
57
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
58
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
59
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
60
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
61
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
62
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
63
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
64
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
65
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
66
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
67
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
68
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
69
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
70
    'WhiteSmoke', 'Yellow', 'YellowGreen'
71
]
72

73

74
def _get_multiplier_for_color_randomness():
75
  """Returns a multiplier to get semi-random colors from successive indices.
76

77
  This function computes a prime number, p, in the range [2, 17] that:
78
  - is closest to len(STANDARD_COLORS) / 10
79
  - does not divide len(STANDARD_COLORS)
80

81
  If no prime numbers in that range satisfy the constraints, p is returned as 1.
82

83
  Once p is established, it can be used as a multiplier to select
84
  non-consecutive colors from STANDARD_COLORS:
85
  colors = [(p * i) % len(STANDARD_COLORS) for i in range(20)]
86
  """
87
  num_colors = len(STANDARD_COLORS)
88
  prime_candidates = [5, 7, 11, 13, 17]
89

90
  # Remove all prime candidates that divide the number of colors.
91
  prime_candidates = [p for p in prime_candidates if num_colors % p]
92
  if not prime_candidates:
93
    return 1
94

95
  # Return the closest prime number to num_colors / 10.
96
  abs_distance = [np.abs(num_colors / 10. - p) for p in prime_candidates]
97
  num_candidates = len(abs_distance)
98
  inds = [i for _, i in sorted(zip(abs_distance, range(num_candidates)))]
99
  return prime_candidates[inds[0]]
100

101

102
def save_image_array_as_png(image, output_path):
103
  """Saves an image (represented as a numpy array) to PNG.
104

105
  Args:
106
    image: a numpy array with shape [height, width, 3].
107
    output_path: path to which image should be written.
108
  """
109
  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
110
  with tf.gfile.Open(output_path, 'w') as fid:
111
    image_pil.save(fid, 'PNG')
112

113

114
def encode_image_array_as_png_str(image):
115
  """Encodes a numpy array into a PNG string.
116

117
  Args:
118
    image: a numpy array with shape [height, width, 3].
119

120
  Returns:
121
    PNG encoded image string.
122
  """
123
  image_pil = Image.fromarray(np.uint8(image))
124
  output = six.BytesIO()
125
  image_pil.save(output, format='PNG')
126
  png_string = output.getvalue()
127
  output.close()
128
  return png_string
129

130

131
def draw_bounding_box_on_image_array(image,
132
                                     ymin,
133
                                     xmin,
134
                                     ymax,
135
                                     xmax,
136
                                     color='red',
137
                                     thickness=4,
138
                                     display_str_list=(),
139
                                     use_normalized_coordinates=True):
140
  """Adds a bounding box to an image (numpy array).
141

142
  Bounding box coordinates can be specified in either absolute (pixel) or
143
  normalized coordinates by setting the use_normalized_coordinates argument.
144

145
  Args:
146
    image: a numpy array with shape [height, width, 3].
147
    ymin: ymin of bounding box.
148
    xmin: xmin of bounding box.
149
    ymax: ymax of bounding box.
150
    xmax: xmax of bounding box.
151
    color: color to draw bounding box. Default is red.
152
    thickness: line thickness. Default value is 4.
153
    display_str_list: list of strings to display in box
154
                      (each to be shown on its own line).
155
    use_normalized_coordinates: If True (default), treat coordinates
156
      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
157
      coordinates as absolute.
158
  """
159
  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
160
  draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
161
                             thickness, display_str_list,
162
                             use_normalized_coordinates)
163
  np.copyto(image, np.array(image_pil))
164

165

166
def draw_bounding_box_on_image(image,
167
                               ymin,
168
                               xmin,
169
                               ymax,
170
                               xmax,
171
                               color='red',
172
                               thickness=4,
173
                               display_str_list=(),
174
                               use_normalized_coordinates=True):
175
  """Adds a bounding box to an image.
176

177
  Bounding box coordinates can be specified in either absolute (pixel) or
178
  normalized coordinates by setting the use_normalized_coordinates argument.
179

180
  Each string in display_str_list is displayed on a separate line above the
181
  bounding box in black text on a rectangle filled with the input 'color'.
182
  If the top of the bounding box extends to the edge of the image, the strings
183
  are displayed below the bounding box.
184

185
  Args:
186
    image: a PIL.Image object.
187
    ymin: ymin of bounding box.
188
    xmin: xmin of bounding box.
189
    ymax: ymax of bounding box.
190
    xmax: xmax of bounding box.
191
    color: color to draw bounding box. Default is red.
192
    thickness: line thickness. Default value is 4.
193
    display_str_list: list of strings to display in box
194
                      (each to be shown on its own line).
195
    use_normalized_coordinates: If True (default), treat coordinates
196
      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
197
      coordinates as absolute.
198
  """
199
  draw = ImageDraw.Draw(image)
200
  im_width, im_height = image.size
201
  if use_normalized_coordinates:
202
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
203
                                  ymin * im_height, ymax * im_height)
204
  else:
205
    (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
206
  if thickness > 0:
207
    draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
208
               (left, top)],
209
              width=thickness,
210
              fill=color)
211
  try:
212
    font = ImageFont.truetype('arial.ttf', 24)
213
  except IOError:
214
    font = ImageFont.load_default()
215

216
  # If the total height of the display strings added to the top of the bounding
217
  # box exceeds the top of the image, stack the strings below the bounding box
218
  # instead of above.
219
  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
220
  # Each display_str has a top and bottom margin of 0.05x.
221
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
222

223
  if top > total_display_str_height:
224
    text_bottom = top
225
  else:
226
    text_bottom = bottom + total_display_str_height
227
  # Reverse list and print from bottom to top.
228
  for display_str in display_str_list[::-1]:
229
    text_width, text_height = font.getsize(display_str)
230
    margin = np.ceil(0.05 * text_height)
231
    draw.rectangle(
232
        [(left, text_bottom - text_height - 2 * margin), (left + text_width,
233
                                                          text_bottom)],
234
        fill=color)
235
    draw.text(
236
        (left + margin, text_bottom - text_height - margin),
237
        display_str,
238
        fill='black',
239
        font=font)
240
    text_bottom -= text_height - 2 * margin
241

242

243
def draw_bounding_boxes_on_image_array(image,
244
                                       boxes,
245
                                       color='red',
246
                                       thickness=4,
247
                                       display_str_list_list=()):
248
  """Draws bounding boxes on image (numpy array).
249

250
  Args:
251
    image: a numpy array object.
252
    boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
253
           The coordinates are in normalized format between [0, 1].
254
    color: color to draw bounding box. Default is red.
255
    thickness: line thickness. Default value is 4.
256
    display_str_list_list: list of list of strings.
257
                           a list of strings for each bounding box.
258
                           The reason to pass a list of strings for a
259
                           bounding box is that it might contain
260
                           multiple labels.
261

262
  Raises:
263
    ValueError: if boxes is not a [N, 4] array
264
  """
265
  image_pil = Image.fromarray(image)
266
  draw_bounding_boxes_on_image(image_pil, boxes, color, thickness,
267
                               display_str_list_list)
268
  np.copyto(image, np.array(image_pil))
269

270

271
def draw_bounding_boxes_on_image(image,
272
                                 boxes,
273
                                 color='red',
274
                                 thickness=4,
275
                                 display_str_list_list=()):
276
  """Draws bounding boxes on image.
277

278
  Args:
279
    image: a PIL.Image object.
280
    boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
281
           The coordinates are in normalized format between [0, 1].
282
    color: color to draw bounding box. Default is red.
283
    thickness: line thickness. Default value is 4.
284
    display_str_list_list: list of list of strings.
285
                           a list of strings for each bounding box.
286
                           The reason to pass a list of strings for a
287
                           bounding box is that it might contain
288
                           multiple labels.
289

290
  Raises:
291
    ValueError: if boxes is not a [N, 4] array
292
  """
293
  boxes_shape = boxes.shape
294
  if not boxes_shape:
295
    return
296
  if len(boxes_shape) != 2 or boxes_shape[1] != 4:
297
    raise ValueError('Input must be of size [N, 4]')
298
  for i in range(boxes_shape[0]):
299
    display_str_list = ()
300
    if display_str_list_list:
301
      display_str_list = display_str_list_list[i]
302
    draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
303
                               boxes[i, 3], color, thickness, display_str_list)
304

305

306
def create_visualization_fn(category_index,
307
                            include_masks=False,
308
                            include_keypoints=False,
309
                            include_keypoint_scores=False,
310
                            include_track_ids=False,
311
                            **kwargs):
312
  """Constructs a visualization function that can be wrapped in a py_func.
313

314
  py_funcs only accept positional arguments. This function returns a suitable
315
  function with the correct positional argument mapping. The positional
316
  arguments in order are:
317
  0: image
318
  1: boxes
319
  2: classes
320
  3: scores
321
  [4]: masks (optional)
322
  [4-5]: keypoints (optional)
323
  [4-6]: keypoint_scores (optional)
324
  [4-7]: track_ids (optional)
325

326
  -- Example 1 --
327
  vis_only_masks_fn = create_visualization_fn(category_index,
328
    include_masks=True, include_keypoints=False, include_track_ids=False,
329
    **kwargs)
330
  image = tf.py_func(vis_only_masks_fn,
331
                     inp=[image, boxes, classes, scores, masks],
332
                     Tout=tf.uint8)
333

334
  -- Example 2 --
335
  vis_masks_and_track_ids_fn = create_visualization_fn(category_index,
336
    include_masks=True, include_keypoints=False, include_track_ids=True,
337
    **kwargs)
338
  image = tf.py_func(vis_masks_and_track_ids_fn,
339
                     inp=[image, boxes, classes, scores, masks, track_ids],
340
                     Tout=tf.uint8)
341

342
  Args:
343
    category_index: a dict that maps integer ids to category dicts. e.g.
344
      {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
345
    include_masks: Whether masks should be expected as a positional argument in
346
      the returned function.
347
    include_keypoints: Whether keypoints should be expected as a positional
348
      argument in the returned function.
349
    include_keypoint_scores: Whether keypoint scores should be expected as a
350
      positional argument in the returned function.
351
    include_track_ids: Whether track ids should be expected as a positional
352
      argument in the returned function.
353
    **kwargs: Additional kwargs that will be passed to
354
      visualize_boxes_and_labels_on_image_array.
355

356
  Returns:
357
    Returns a function that only takes tensors as positional arguments.
358
  """
359

360
  def visualization_py_func_fn(*args):
361
    """Visualization function that can be wrapped in a tf.py_func.
362

363
    Args:
364
      *args: First 4 positional arguments must be:
365
        image - uint8 numpy array with shape (img_height, img_width, 3).
366
        boxes - a numpy array of shape [N, 4].
367
        classes - a numpy array of shape [N].
368
        scores - a numpy array of shape [N] or None.
369
        -- Optional positional arguments --
370
        instance_masks - a numpy array of shape [N, image_height, image_width].
371
        keypoints - a numpy array of shape [N, num_keypoints, 2].
372
        keypoint_scores - a numpy array of shape [N, num_keypoints].
373
        track_ids - a numpy array of shape [N] with unique track ids.
374

375
    Returns:
376
      uint8 numpy array with shape (img_height, img_width, 3) with overlaid
377
      boxes.
378
    """
379
    image = args[0]
380
    boxes = args[1]
381
    classes = args[2]
382
    scores = args[3]
383
    masks = keypoints = keypoint_scores = track_ids = None
384
    pos_arg_ptr = 4  # Positional argument for first optional tensor (masks).
385
    if include_masks:
386
      masks = args[pos_arg_ptr]
387
      pos_arg_ptr += 1
388
    if include_keypoints:
389
      keypoints = args[pos_arg_ptr]
390
      pos_arg_ptr += 1
391
    if include_keypoint_scores:
392
      keypoint_scores = args[pos_arg_ptr]
393
      pos_arg_ptr += 1
394
    if include_track_ids:
395
      track_ids = args[pos_arg_ptr]
396

397
    return visualize_boxes_and_labels_on_image_array(
398
        image,
399
        boxes,
400
        classes,
401
        scores,
402
        category_index=category_index,
403
        instance_masks=masks,
404
        keypoints=keypoints,
405
        keypoint_scores=keypoint_scores,
406
        track_ids=track_ids,
407
        **kwargs)
408
  return visualization_py_func_fn
409

410

411
def draw_heatmaps_on_image(image, heatmaps):
412
  """Draws heatmaps on an image.
413

414
  The heatmaps are handled channel by channel and different colors are used to
415
  paint different heatmap channels.
416

417
  Args:
418
    image: a PIL.Image object.
419
    heatmaps: a numpy array with shape [image_height, image_width, channel].
420
      Note that the image_height and image_width should match the size of input
421
      image.
422
  """
423
  draw = ImageDraw.Draw(image)
424
  channel = heatmaps.shape[2]
425
  for c in range(channel):
426
    heatmap = heatmaps[:, :, c] * 255
427
    heatmap = heatmap.astype('uint8')
428
    bitmap = Image.fromarray(heatmap, 'L')
429
    bitmap.convert('1')
430
    draw.bitmap(
431
        xy=[(0, 0)],
432
        bitmap=bitmap,
433
        fill=STANDARD_COLORS[c])
434

435

436
def draw_heatmaps_on_image_array(image, heatmaps):
437
  """Overlays heatmaps to an image (numpy array).
438

439
  The function overlays the heatmaps on top of image. The heatmap values will be
440
  painted with different colors depending on the channels. Similar to
441
  "draw_heatmaps_on_image_array" function except the inputs are numpy arrays.
442

443
  Args:
444
    image: a numpy array with shape [height, width, 3].
445
    heatmaps: a numpy array with shape [height, width, channel].
446

447
  Returns:
448
    An uint8 numpy array representing the input image painted with heatmap
449
    colors.
450
  """
451
  if not isinstance(image, np.ndarray):
452
    image = image.numpy()
453
  if not isinstance(heatmaps, np.ndarray):
454
    heatmaps = heatmaps.numpy()
455
  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
456
  draw_heatmaps_on_image(image_pil, heatmaps)
457
  return np.array(image_pil)
458

459

460
def draw_heatmaps_on_image_tensors(images,
461
                                   heatmaps,
462
                                   apply_sigmoid=False):
463
  """Draws heatmaps on batch of image tensors.
464

465
  Args:
466
    images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
467
      channels will be ignored. If C = 1, then we convert the images to RGB
468
      images.
469
    heatmaps: [N, h, w, channel] float32 tensor of heatmaps. Note that the
470
      heatmaps will be resized to match the input image size before overlaying
471
      the heatmaps with input images. Theoretically the heatmap height width
472
      should have the same aspect ratio as the input image to avoid potential
473
      misalignment introduced by the image resize.
474
    apply_sigmoid: Whether to apply a sigmoid layer on top of the heatmaps. If
475
      the heatmaps come directly from the prediction logits, then we should
476
      apply the sigmoid layer to make sure the values are in between [0.0, 1.0].
477

478
  Returns:
479
    4D image tensor of type uint8, with heatmaps overlaid on top.
480
  """
481
  # Additional channels are being ignored.
482
  if images.shape[3] > 3:
483
    images = images[:, :, :, 0:3]
484
  elif images.shape[3] == 1:
485
    images = tf.image.grayscale_to_rgb(images)
486

487
  _, height, width, _ = shape_utils.combined_static_and_dynamic_shape(images)
488
  if apply_sigmoid:
489
    heatmaps = tf.math.sigmoid(heatmaps)
490
  resized_heatmaps = tf.image.resize(heatmaps, size=[height, width])
491

492
  elems = [images, resized_heatmaps]
493

494
  def draw_heatmaps(image_and_heatmaps):
495
    """Draws heatmaps on image."""
496
    image_with_heatmaps = tf.py_function(
497
        draw_heatmaps_on_image_array,
498
        image_and_heatmaps,
499
        tf.uint8)
500
    return image_with_heatmaps
501
  images = tf.map_fn(draw_heatmaps, elems, dtype=tf.uint8, back_prop=False)
502
  return images
503

504

505
def _resize_original_image(image, image_shape):
506
  image = tf.expand_dims(image, 0)
507
  image = tf.image.resize_images(
508
      image,
509
      image_shape,
510
      method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
511
      align_corners=True)
512
  return tf.cast(tf.squeeze(image, 0), tf.uint8)
513

514

515
def draw_bounding_boxes_on_image_tensors(images,
516
                                         boxes,
517
                                         classes,
518
                                         scores,
519
                                         category_index,
520
                                         original_image_spatial_shape=None,
521
                                         true_image_shape=None,
522
                                         instance_masks=None,
523
                                         keypoints=None,
524
                                         keypoint_scores=None,
525
                                         keypoint_edges=None,
526
                                         track_ids=None,
527
                                         max_boxes_to_draw=20,
528
                                         min_score_thresh=0.2,
529
                                         use_normalized_coordinates=True):
530
  """Draws bounding boxes, masks, and keypoints on batch of image tensors.
531

532
  Args:
533
    images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
534
      channels will be ignored. If C = 1, then we convert the images to RGB
535
      images.
536
    boxes: [N, max_detections, 4] float32 tensor of detection boxes.
537
    classes: [N, max_detections] int tensor of detection classes. Note that
538
      classes are 1-indexed.
539
    scores: [N, max_detections] float32 tensor of detection scores.
540
    category_index: a dict that maps integer ids to category dicts. e.g.
541
      {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
542
    original_image_spatial_shape: [N, 2] tensor containing the spatial size of
543
      the original image.
544
    true_image_shape: [N, 3] tensor containing the spatial size of unpadded
545
      original_image.
546
    instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
547
      instance masks.
548
    keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
549
      with keypoints.
550
    keypoint_scores: A 3D float32 tensor of shape [N, max_detection,
551
      num_keypoints] with keypoint scores.
552
    keypoint_edges: A list of tuples with keypoint indices that specify which
553
      keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
554
      edges from keypoint 0 to 1 and from keypoint 2 to 4.
555
    track_ids: [N, max_detections] int32 tensor of unique tracks ids (i.e.
556
      instance ids for each object). If provided, the color-coding of boxes is
557
      dictated by these ids, and not classes.
558
    max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
559
    min_score_thresh: Minimum score threshold for visualization. Default 0.2.
560
    use_normalized_coordinates: Whether to assume boxes and kepoints are in
561
      normalized coordinates (as opposed to absolute coordiantes).
562
      Default is True.
563

564
  Returns:
565
    4D image tensor of type uint8, with boxes drawn on top.
566
  """
567
  # Additional channels are being ignored.
568
  if images.shape[3] > 3:
569
    images = images[:, :, :, 0:3]
570
  elif images.shape[3] == 1:
571
    images = tf.image.grayscale_to_rgb(images)
572
  visualization_keyword_args = {
573
      'use_normalized_coordinates': use_normalized_coordinates,
574
      'max_boxes_to_draw': max_boxes_to_draw,
575
      'min_score_thresh': min_score_thresh,
576
      'agnostic_mode': False,
577
      'line_thickness': 4,
578
      'keypoint_edges': keypoint_edges
579
  }
580
  if true_image_shape is None:
581
    true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3])
582
  else:
583
    true_shapes = true_image_shape
584
  if original_image_spatial_shape is None:
585
    original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2])
586
  else:
587
    original_shapes = original_image_spatial_shape
588

589
  visualize_boxes_fn = create_visualization_fn(
590
      category_index,
591
      include_masks=instance_masks is not None,
592
      include_keypoints=keypoints is not None,
593
      include_keypoint_scores=keypoint_scores is not None,
594
      include_track_ids=track_ids is not None,
595
      **visualization_keyword_args)
596

597
  elems = [true_shapes, original_shapes, images, boxes, classes, scores]
598
  if instance_masks is not None:
599
    elems.append(instance_masks)
600
  if keypoints is not None:
601
    elems.append(keypoints)
602
  if keypoint_scores is not None:
603
    elems.append(keypoint_scores)
604
  if track_ids is not None:
605
    elems.append(track_ids)
606

607
  def draw_boxes(image_and_detections):
608
    """Draws boxes on image."""
609
    true_shape = image_and_detections[0]
610
    original_shape = image_and_detections[1]
611
    if true_image_shape is not None:
612
      image = shape_utils.pad_or_clip_nd(image_and_detections[2],
613
                                         [true_shape[0], true_shape[1], 3])
614
    if original_image_spatial_shape is not None:
615
      image_and_detections[2] = _resize_original_image(image, original_shape)
616

617
    image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:],
618
                                  tf.uint8)
619
    return image_with_boxes
620

621
  images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False)
622
  return images
623

624

625
def draw_side_by_side_evaluation_image(eval_dict,
626
                                       category_index,
627
                                       max_boxes_to_draw=20,
628
                                       min_score_thresh=0.2,
629
                                       use_normalized_coordinates=True,
630
                                       keypoint_edges=None):
631
  """Creates a side-by-side image with detections and groundtruth.
632

633
  Bounding boxes (and instance masks, if available) are visualized on both
634
  subimages.
635

636
  Args:
637
    eval_dict: The evaluation dictionary returned by
638
      eval_util.result_dict_for_batched_example() or
639
      eval_util.result_dict_for_single_example().
640
    category_index: A category index (dictionary) produced from a labelmap.
641
    max_boxes_to_draw: The maximum number of boxes to draw for detections.
642
    min_score_thresh: The minimum score threshold for showing detections.
643
    use_normalized_coordinates: Whether to assume boxes and keypoints are in
644
      normalized coordinates (as opposed to absolute coordinates).
645
      Default is True.
646
    keypoint_edges: A list of tuples with keypoint indices that specify which
647
      keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
648
      edges from keypoint 0 to 1 and from keypoint 2 to 4.
649

650
  Returns:
651
    A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left
652
      corresponds to detections, while the subimage on the right corresponds to
653
      groundtruth.
654
  """
655
  detection_fields = fields.DetectionResultFields()
656
  input_data_fields = fields.InputDataFields()
657

658
  images_with_detections_list = []
659

660
  # Add the batch dimension if the eval_dict is for single example.
661
  if len(eval_dict[detection_fields.detection_classes].shape) == 1:
662
    for key in eval_dict:
663
      if (key != input_data_fields.original_image and
664
          key != input_data_fields.image_additional_channels):
665
        eval_dict[key] = tf.expand_dims(eval_dict[key], 0)
666

667
  for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
668
    instance_masks = None
669
    if detection_fields.detection_masks in eval_dict:
670
      instance_masks = tf.cast(
671
          tf.expand_dims(
672
              eval_dict[detection_fields.detection_masks][indx], axis=0),
673
          tf.uint8)
674
    keypoints = None
675
    keypoint_scores = None
676
    if detection_fields.detection_keypoints in eval_dict:
677
      keypoints = tf.expand_dims(
678
          eval_dict[detection_fields.detection_keypoints][indx], axis=0)
679
      if detection_fields.detection_keypoint_scores in eval_dict:
680
        keypoint_scores = tf.expand_dims(
681
            eval_dict[detection_fields.detection_keypoint_scores][indx], axis=0)
682
      else:
683
        keypoint_scores = tf.cast(keypoint_ops.set_keypoint_visibilities(
684
            keypoints), dtype=tf.float32)
685

686
    groundtruth_instance_masks = None
687
    if input_data_fields.groundtruth_instance_masks in eval_dict:
688
      groundtruth_instance_masks = tf.cast(
689
          tf.expand_dims(
690
              eval_dict[input_data_fields.groundtruth_instance_masks][indx],
691
              axis=0), tf.uint8)
692
    groundtruth_keypoints = None
693
    groundtruth_keypoint_scores = None
694
    gt_kpt_vis_fld = input_data_fields.groundtruth_keypoint_visibilities
695
    if input_data_fields.groundtruth_keypoints in eval_dict:
696
      groundtruth_keypoints = tf.expand_dims(
697
          eval_dict[input_data_fields.groundtruth_keypoints][indx], axis=0)
698
      if gt_kpt_vis_fld in eval_dict:
699
        groundtruth_keypoint_scores = tf.expand_dims(
700
            tf.cast(eval_dict[gt_kpt_vis_fld][indx], dtype=tf.float32), axis=0)
701
      else:
702
        groundtruth_keypoint_scores = tf.cast(
703
            keypoint_ops.set_keypoint_visibilities(
704
                groundtruth_keypoints), dtype=tf.float32)
705

706
    images_with_detections = draw_bounding_boxes_on_image_tensors(
707
        tf.expand_dims(
708
            eval_dict[input_data_fields.original_image][indx], axis=0),
709
        tf.expand_dims(
710
            eval_dict[detection_fields.detection_boxes][indx], axis=0),
711
        tf.expand_dims(
712
            eval_dict[detection_fields.detection_classes][indx], axis=0),
713
        tf.expand_dims(
714
            eval_dict[detection_fields.detection_scores][indx], axis=0),
715
        category_index,
716
        original_image_spatial_shape=tf.expand_dims(
717
            eval_dict[input_data_fields.original_image_spatial_shape][indx],
718
            axis=0),
719
        true_image_shape=tf.expand_dims(
720
            eval_dict[input_data_fields.true_image_shape][indx], axis=0),
721
        instance_masks=instance_masks,
722
        keypoints=keypoints,
723
        keypoint_scores=keypoint_scores,
724
        keypoint_edges=keypoint_edges,
725
        max_boxes_to_draw=max_boxes_to_draw,
726
        min_score_thresh=min_score_thresh,
727
        use_normalized_coordinates=use_normalized_coordinates)
728
    images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
729
        tf.expand_dims(
730
            eval_dict[input_data_fields.original_image][indx], axis=0),
731
        tf.expand_dims(
732
            eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
733
        tf.expand_dims(
734
            eval_dict[input_data_fields.groundtruth_classes][indx], axis=0),
735
        tf.expand_dims(
736
            tf.ones_like(
737
                eval_dict[input_data_fields.groundtruth_classes][indx],
738
                dtype=tf.float32),
739
            axis=0),
740
        category_index,
741
        original_image_spatial_shape=tf.expand_dims(
742
            eval_dict[input_data_fields.original_image_spatial_shape][indx],
743
            axis=0),
744
        true_image_shape=tf.expand_dims(
745
            eval_dict[input_data_fields.true_image_shape][indx], axis=0),
746
        instance_masks=groundtruth_instance_masks,
747
        keypoints=groundtruth_keypoints,
748
        keypoint_scores=groundtruth_keypoint_scores,
749
        keypoint_edges=keypoint_edges,
750
        max_boxes_to_draw=None,
751
        min_score_thresh=0.0,
752
        use_normalized_coordinates=use_normalized_coordinates)
753
    images_to_visualize = tf.concat([images_with_detections,
754
                                     images_with_groundtruth], axis=2)
755

756
    if input_data_fields.image_additional_channels in eval_dict:
757
      images_with_additional_channels_groundtruth = (
758
          draw_bounding_boxes_on_image_tensors(
759
              tf.expand_dims(
760
                  eval_dict[input_data_fields.image_additional_channels][indx],
761
                  axis=0),
762
              tf.expand_dims(
763
                  eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
764
              tf.expand_dims(
765
                  eval_dict[input_data_fields.groundtruth_classes][indx],
766
                  axis=0),
767
              tf.expand_dims(
768
                  tf.ones_like(
769
                      eval_dict[input_data_fields.groundtruth_classes][indx],
770
                      dtype=tf.float32),
771
                  axis=0),
772
              category_index,
773
              original_image_spatial_shape=tf.expand_dims(
774
                  eval_dict[input_data_fields.original_image_spatial_shape]
775
                  [indx],
776
                  axis=0),
777
              true_image_shape=tf.expand_dims(
778
                  eval_dict[input_data_fields.true_image_shape][indx], axis=0),
779
              instance_masks=groundtruth_instance_masks,
780
              keypoints=None,
781
              keypoint_edges=None,
782
              max_boxes_to_draw=None,
783
              min_score_thresh=0.0,
784
              use_normalized_coordinates=use_normalized_coordinates))
785
      images_to_visualize = tf.concat(
786
          [images_to_visualize, images_with_additional_channels_groundtruth],
787
          axis=2)
788
    images_with_detections_list.append(images_to_visualize)
789

790
  return images_with_detections_list
791

792

793
def draw_densepose_visualizations(eval_dict,
794
                                  max_boxes_to_draw=20,
795
                                  min_score_thresh=0.2,
796
                                  num_parts=24,
797
                                  dp_coord_to_visualize=0):
798
  """Draws DensePose visualizations.
799

800
  Args:
801
    eval_dict: The evaluation dictionary returned by
802
      eval_util.result_dict_for_batched_example().
803
    max_boxes_to_draw: The maximum number of boxes to draw for detections.
804
    min_score_thresh: The minimum score threshold for showing detections.
805
    num_parts: The number of different densepose parts.
806
    dp_coord_to_visualize: Whether to visualize v-coordinates (0) or
807
      u-coordinates (0) overlaid on the person masks.
808

809
  Returns:
810
    A list of [1, H, W, C] uint8 tensor, each element corresponding to an image
811
    in the batch.
812

813
  Raises:
814
    ValueError: If `dp_coord_to_visualize` is not 0 or 1.
815
  """
816
  if dp_coord_to_visualize not in (0, 1):
817
    raise ValueError('`dp_coord_to_visualize` must be either 0 for v '
818
                     'coordinates), or 1 for u coordinates, but instead got '
819
                     '{}'.format(dp_coord_to_visualize))
820
  detection_fields = fields.DetectionResultFields()
821
  input_data_fields = fields.InputDataFields()
822

823
  if detection_fields.detection_masks not in eval_dict:
824
    raise ValueError('Expected `detection_masks` in `eval_dict`.')
825
  if detection_fields.detection_surface_coords not in eval_dict:
826
    raise ValueError('Expected `detection_surface_coords` in `eval_dict`.')
827

828
  images_with_detections_list = []
829
  for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
830
    # Note that detection masks have already been resized to the original image
831
    # shapes, but `original_image` has not.
832
    # TODO(ronnyvotel): Consider resizing `original_image` in
833
    # eval_util.result_dict_for_batched_example().
834
    true_shape = eval_dict[input_data_fields.true_image_shape][indx]
835
    original_shape = eval_dict[
836
        input_data_fields.original_image_spatial_shape][indx]
837
    image = eval_dict[input_data_fields.original_image][indx]
838
    image = shape_utils.pad_or_clip_nd(image, [true_shape[0], true_shape[1], 3])
839
    image = _resize_original_image(image, original_shape)
840

841
    scores = eval_dict[detection_fields.detection_scores][indx]
842
    detection_masks = eval_dict[detection_fields.detection_masks][indx]
843
    surface_coords = eval_dict[detection_fields.detection_surface_coords][indx]
844

845
    def draw_densepose_py_func(image, detection_masks, surface_coords, scores):
846
      """Overlays part masks and surface coords on original images."""
847
      surface_coord_image = np.copy(image)
848
      for i, (score, surface_coord, mask) in enumerate(
849
          zip(scores, surface_coords, detection_masks)):
850
        if i == max_boxes_to_draw:
851
          break
852
        if score > min_score_thresh:
853
          draw_part_mask_on_image_array(image, mask, num_parts=num_parts)
854
          draw_float_channel_on_image_array(
855
              surface_coord_image, surface_coord[:, :, dp_coord_to_visualize],
856
              mask)
857
      return np.concatenate([image, surface_coord_image], axis=1)
858

859
    image_with_densepose = tf.py_func(
860
        draw_densepose_py_func,
861
        [image, detection_masks, surface_coords, scores],
862
        tf.uint8)
863
    images_with_detections_list.append(
864
        image_with_densepose[tf.newaxis, :, :, :])
865
  return images_with_detections_list
866

867

868
def draw_keypoints_on_image_array(image,
869
                                  keypoints,
870
                                  keypoint_scores=None,
871
                                  min_score_thresh=0.5,
872
                                  color='red',
873
                                  radius=2,
874
                                  use_normalized_coordinates=True,
875
                                  keypoint_edges=None,
876
                                  keypoint_edge_color='green',
877
                                  keypoint_edge_width=2):
878
  """Draws keypoints on an image (numpy array).
879

880
  Args:
881
    image: a numpy array with shape [height, width, 3].
882
    keypoints: a numpy array with shape [num_keypoints, 2].
883
    keypoint_scores: a numpy array with shape [num_keypoints]. If provided, only
884
      those keypoints with a score above score_threshold will be visualized.
885
    min_score_thresh: A scalar indicating the minimum keypoint score required
886
      for a keypoint to be visualized. Note that keypoint_scores must be
887
      provided for this threshold to take effect.
888
    color: color to draw the keypoints with. Default is red.
889
    radius: keypoint radius. Default value is 2.
890
    use_normalized_coordinates: if True (default), treat keypoint values as
891
      relative to the image.  Otherwise treat them as absolute.
892
    keypoint_edges: A list of tuples with keypoint indices that specify which
893
      keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
894
      edges from keypoint 0 to 1 and from keypoint 2 to 4.
895
    keypoint_edge_color: color to draw the keypoint edges with. Default is red.
896
    keypoint_edge_width: width of the edges drawn between keypoints. Default
897
      value is 2.
898
  """
899
  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
900
  draw_keypoints_on_image(image_pil,
901
                          keypoints,
902
                          keypoint_scores=keypoint_scores,
903
                          min_score_thresh=min_score_thresh,
904
                          color=color,
905
                          radius=radius,
906
                          use_normalized_coordinates=use_normalized_coordinates,
907
                          keypoint_edges=keypoint_edges,
908
                          keypoint_edge_color=keypoint_edge_color,
909
                          keypoint_edge_width=keypoint_edge_width)
910
  np.copyto(image, np.array(image_pil))
911

912

913
def draw_keypoints_on_image(image,
914
                            keypoints,
915
                            keypoint_scores=None,
916
                            min_score_thresh=0.5,
917
                            color='red',
918
                            radius=2,
919
                            use_normalized_coordinates=True,
920
                            keypoint_edges=None,
921
                            keypoint_edge_color='green',
922
                            keypoint_edge_width=2):
923
  """Draws keypoints on an image.
924

925
  Args:
926
    image: a PIL.Image object.
927
    keypoints: a numpy array with shape [num_keypoints, 2].
928
    keypoint_scores: a numpy array with shape [num_keypoints].
929
    min_score_thresh: a score threshold for visualizing keypoints. Only used if
930
      keypoint_scores is provided.
931
    color: color to draw the keypoints with. Default is red.
932
    radius: keypoint radius. Default value is 2.
933
    use_normalized_coordinates: if True (default), treat keypoint values as
934
      relative to the image.  Otherwise treat them as absolute.
935
    keypoint_edges: A list of tuples with keypoint indices that specify which
936
      keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
937
      edges from keypoint 0 to 1 and from keypoint 2 to 4.
938
    keypoint_edge_color: color to draw the keypoint edges with. Default is red.
939
    keypoint_edge_width: width of the edges drawn between keypoints. Default
940
      value is 2.
941
  """
942
  draw = ImageDraw.Draw(image)
943
  im_width, im_height = image.size
944
  keypoints = np.array(keypoints)
945
  keypoints_x = [k[1] for k in keypoints]
946
  keypoints_y = [k[0] for k in keypoints]
947
  if use_normalized_coordinates:
948
    keypoints_x = tuple([im_width * x for x in keypoints_x])
949
    keypoints_y = tuple([im_height * y for y in keypoints_y])
950
  if keypoint_scores is not None:
951
    keypoint_scores = np.array(keypoint_scores)
952
    valid_kpt = np.greater(keypoint_scores, min_score_thresh)
953
  else:
954
    valid_kpt = np.where(np.any(np.isnan(keypoints), axis=1),
955
                         np.zeros_like(keypoints[:, 0]),
956
                         np.ones_like(keypoints[:, 0]))
957
  valid_kpt = [v for v in valid_kpt]
958

959
  for keypoint_x, keypoint_y, valid in zip(keypoints_x, keypoints_y, valid_kpt):
960
    if valid:
961
      draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
962
                    (keypoint_x + radius, keypoint_y + radius)],
963
                   outline=color, fill=color)
964
  if keypoint_edges is not None:
965
    for keypoint_start, keypoint_end in keypoint_edges:
966
      if (keypoint_start < 0 or keypoint_start >= len(keypoints) or
967
          keypoint_end < 0 or keypoint_end >= len(keypoints)):
968
        continue
969
      if not (valid_kpt[keypoint_start] and valid_kpt[keypoint_end]):
970
        continue
971
      edge_coordinates = [
972
          keypoints_x[keypoint_start], keypoints_y[keypoint_start],
973
          keypoints_x[keypoint_end], keypoints_y[keypoint_end]
974
      ]
975
      draw.line(
976
          edge_coordinates, fill=keypoint_edge_color, width=keypoint_edge_width)
977

978

979
def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
980
  """Draws mask on an image.
981

982
  Args:
983
    image: uint8 numpy array with shape (img_height, img_height, 3)
984
    mask: a uint8 numpy array of shape (img_height, img_height) with
985
      values between either 0 or 1.
986
    color: color to draw the keypoints with. Default is red.
987
    alpha: transparency value between 0 and 1. (default: 0.4)
988

989
  Raises:
990
    ValueError: On incorrect data type for image or masks.
991
  """
992
  if image.dtype != np.uint8:
993
    raise ValueError('`image` not of type np.uint8')
994
  if mask.dtype != np.uint8:
995
    raise ValueError('`mask` not of type np.uint8')
996
  if image.shape[:2] != mask.shape:
997
    raise ValueError('The image has spatial dimensions %s but the mask has '
998
                     'dimensions %s' % (image.shape[:2], mask.shape))
999
  rgb = ImageColor.getrgb(color)
1000
  pil_image = Image.fromarray(image)
1001

1002
  solid_color = np.expand_dims(
1003
      np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
1004
  pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
1005
  pil_mask = Image.fromarray(np.uint8(255.0*alpha*(mask > 0))).convert('L')
1006
  pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
1007
  np.copyto(image, np.array(pil_image.convert('RGB')))
1008

1009

1010
def draw_part_mask_on_image_array(image, mask, alpha=0.4, num_parts=24):
1011
  """Draws part mask on an image.
1012

1013
  Args:
1014
    image: uint8 numpy array with shape (img_height, img_height, 3)
1015
    mask: a uint8 numpy array of shape (img_height, img_height) with
1016
      1-indexed parts (0 for background).
1017
    alpha: transparency value between 0 and 1 (default: 0.4)
1018
    num_parts: the maximum number of parts that may exist in the image (default
1019
      24 for DensePose).
1020

1021
  Raises:
1022
    ValueError: On incorrect data type for image or masks.
1023
  """
1024
  if image.dtype != np.uint8:
1025
    raise ValueError('`image` not of type np.uint8')
1026
  if mask.dtype != np.uint8:
1027
    raise ValueError('`mask` not of type np.uint8')
1028
  if image.shape[:2] != mask.shape:
1029
    raise ValueError('The image has spatial dimensions %s but the mask has '
1030
                     'dimensions %s' % (image.shape[:2], mask.shape))
1031

1032
  pil_image = Image.fromarray(image)
1033
  part_colors = np.zeros_like(image)
1034
  mask_1_channel = mask[:, :, np.newaxis]
1035
  for i, color in enumerate(STANDARD_COLORS[:num_parts]):
1036
    rgb = np.array(ImageColor.getrgb(color), dtype=np.uint8)
1037
    part_colors += (mask_1_channel == i + 1) * rgb[np.newaxis, np.newaxis, :]
1038
  pil_part_colors = Image.fromarray(np.uint8(part_colors)).convert('RGBA')
1039
  pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L')
1040
  pil_image = Image.composite(pil_part_colors, pil_image, pil_mask)
1041
  np.copyto(image, np.array(pil_image.convert('RGB')))
1042

1043

1044
def draw_float_channel_on_image_array(image, channel, mask, alpha=0.9,
1045
                                      cmap='YlGn'):
1046
  """Draws a floating point channel on an image array.
1047

1048
  Args:
1049
    image: uint8 numpy array with shape (img_height, img_height, 3)
1050
    channel: float32 numpy array with shape (img_height, img_height). The values
1051
      should be in the range [0, 1], and will be mapped to colors using the
1052
      provided colormap `cmap` argument.
1053
    mask: a uint8 numpy array of shape (img_height, img_height) with
1054
      1-indexed parts (0 for background).
1055
    alpha: transparency value between 0 and 1 (default: 0.9)
1056
    cmap: string with the colormap to use.
1057

1058
  Raises:
1059
    ValueError: On incorrect data type for image or masks.
1060
  """
1061
  if image.dtype != np.uint8:
1062
    raise ValueError('`image` not of type np.uint8')
1063
  if channel.dtype != np.float32:
1064
    raise ValueError('`channel` not of type np.float32')
1065
  if mask.dtype != np.uint8:
1066
    raise ValueError('`mask` not of type np.uint8')
1067
  if image.shape[:2] != channel.shape:
1068
    raise ValueError('The image has spatial dimensions %s but the channel has '
1069
                     'dimensions %s' % (image.shape[:2], channel.shape))
1070
  if image.shape[:2] != mask.shape:
1071
    raise ValueError('The image has spatial dimensions %s but the mask has '
1072
                     'dimensions %s' % (image.shape[:2], mask.shape))
1073

1074
  cm = plt.get_cmap(cmap)
1075
  pil_image = Image.fromarray(image)
1076
  colored_channel = cm(channel)[:, :, :3]
1077
  pil_colored_channel = Image.fromarray(
1078
      np.uint8(colored_channel * 255)).convert('RGBA')
1079
  pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L')
1080
  pil_image = Image.composite(pil_colored_channel, pil_image, pil_mask)
1081
  np.copyto(image, np.array(pil_image.convert('RGB')))
1082

1083

1084
def visualize_boxes_and_labels_on_image_array(
1085
    image,
1086
    boxes,
1087
    classes,
1088
    scores,
1089
    category_index,
1090
    instance_masks=None,
1091
    instance_boundaries=None,
1092
    keypoints=None,
1093
    keypoint_scores=None,
1094
    keypoint_edges=None,
1095
    track_ids=None,
1096
    use_normalized_coordinates=False,
1097
    max_boxes_to_draw=20,
1098
    min_score_thresh=.5,
1099
    agnostic_mode=False,
1100
    line_thickness=4,
1101
    groundtruth_box_visualization_color='black',
1102
    skip_boxes=False,
1103
    skip_scores=False,
1104
    skip_labels=False,
1105
    skip_track_ids=False):
1106
  """Overlay labeled boxes on an image with formatted scores and label names.
1107

1108
  This function groups boxes that correspond to the same location
1109
  and creates a display string for each detection and overlays these
1110
  on the image. Note that this function modifies the image in place, and returns
1111
  that same image.
1112

1113
  Args:
1114
    image: uint8 numpy array with shape (img_height, img_width, 3)
1115
    boxes: a numpy array of shape [N, 4]
1116
    classes: a numpy array of shape [N]. Note that class indices are 1-based,
1117
      and match the keys in the label map.
1118
    scores: a numpy array of shape [N] or None.  If scores=None, then
1119
      this function assumes that the boxes to be plotted are groundtruth
1120
      boxes and plot all boxes as black with no classes or scores.
1121
    category_index: a dict containing category dictionaries (each holding
1122
      category index `id` and category name `name`) keyed by category indices.
1123
    instance_masks: a uint8 numpy array of shape [N, image_height, image_width],
1124
      can be None.
1125
    instance_boundaries: a numpy array of shape [N, image_height, image_width]
1126
      with values ranging between 0 and 1, can be None.
1127
    keypoints: a numpy array of shape [N, num_keypoints, 2], can
1128
      be None.
1129
    keypoint_scores: a numpy array of shape [N, num_keypoints], can be None.
1130
    keypoint_edges: A list of tuples with keypoint indices that specify which
1131
      keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
1132
      edges from keypoint 0 to 1 and from keypoint 2 to 4.
1133
    track_ids: a numpy array of shape [N] with unique track ids. If provided,
1134
      color-coding of boxes will be determined by these ids, and not the class
1135
      indices.
1136
    use_normalized_coordinates: whether boxes is to be interpreted as
1137
      normalized coordinates or not.
1138
    max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
1139
      all boxes.
1140
    min_score_thresh: minimum score threshold for a box or keypoint to be
1141
      visualized.
1142
    agnostic_mode: boolean (default: False) controlling whether to evaluate in
1143
      class-agnostic mode or not.  This mode will display scores but ignore
1144
      classes.
1145
    line_thickness: integer (default: 4) controlling line width of the boxes.
1146
    groundtruth_box_visualization_color: box color for visualizing groundtruth
1147
      boxes
1148
    skip_boxes: whether to skip the drawing of bounding boxes.
1149
    skip_scores: whether to skip score when drawing a single detection
1150
    skip_labels: whether to skip label when drawing a single detection
1151
    skip_track_ids: whether to skip track id when drawing a single detection
1152

1153
  Returns:
1154
    uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
1155
  """
1156
  # Create a display string (and color) for every box location, group any boxes
1157
  # that correspond to the same location.
1158
  box_to_display_str_map = collections.defaultdict(list)
1159
  box_to_color_map = collections.defaultdict(str)
1160
  box_to_instance_masks_map = {}
1161
  box_to_instance_boundaries_map = {}
1162
  box_to_keypoints_map = collections.defaultdict(list)
1163
  box_to_keypoint_scores_map = collections.defaultdict(list)
1164
  box_to_track_ids_map = {}
1165
  if not max_boxes_to_draw:
1166
    max_boxes_to_draw = boxes.shape[0]
1167
  for i in range(boxes.shape[0]):
1168
    if max_boxes_to_draw == len(box_to_color_map):
1169
      break
1170
    if scores is None or scores[i] > min_score_thresh:
1171
      box = tuple(boxes[i].tolist())
1172
      if instance_masks is not None:
1173
        box_to_instance_masks_map[box] = instance_masks[i]
1174
      if instance_boundaries is not None:
1175
        box_to_instance_boundaries_map[box] = instance_boundaries[i]
1176
      if keypoints is not None:
1177
        box_to_keypoints_map[box].extend(keypoints[i])
1178
      if keypoint_scores is not None:
1179
        box_to_keypoint_scores_map[box].extend(keypoint_scores[i])
1180
      if track_ids is not None:
1181
        box_to_track_ids_map[box] = track_ids[i]
1182
      if scores is None:
1183
        box_to_color_map[box] = groundtruth_box_visualization_color
1184
      else:
1185
        display_str = ''
1186
        if not skip_labels:
1187
          if not agnostic_mode:
1188
            if classes[i] in six.viewkeys(category_index):
1189
              class_name = category_index[classes[i]]['name']
1190
            else:
1191
              class_name = 'N/A'
1192
            display_str = str(class_name)
1193
        if not skip_scores:
1194
          if not display_str:
1195
            display_str = '{}%'.format(round(100*scores[i]))
1196
          else:
1197
            display_str = '{}: {}%'.format(display_str, round(100*scores[i]))
1198
        if not skip_track_ids and track_ids is not None:
1199
          if not display_str:
1200
            display_str = 'ID {}'.format(track_ids[i])
1201
          else:
1202
            display_str = '{}: ID {}'.format(display_str, track_ids[i])
1203
        box_to_display_str_map[box].append(display_str)
1204
        if agnostic_mode:
1205
          box_to_color_map[box] = 'DarkOrange'
1206
        elif track_ids is not None:
1207
          prime_multipler = _get_multiplier_for_color_randomness()
1208
          box_to_color_map[box] = STANDARD_COLORS[
1209
              (prime_multipler * track_ids[i]) % len(STANDARD_COLORS)]
1210
        else:
1211
          box_to_color_map[box] = STANDARD_COLORS[
1212
              classes[i] % len(STANDARD_COLORS)]
1213

1214
  # Draw all boxes onto image.
1215
  for box, color in box_to_color_map.items():
1216
    ymin, xmin, ymax, xmax = box
1217
    if instance_masks is not None:
1218
      draw_mask_on_image_array(
1219
          image,
1220
          box_to_instance_masks_map[box],
1221
          color=color
1222
      )
1223
    if instance_boundaries is not None:
1224
      draw_mask_on_image_array(
1225
          image,
1226
          box_to_instance_boundaries_map[box],
1227
          color='red',
1228
          alpha=1.0
1229
      )
1230
    draw_bounding_box_on_image_array(
1231
        image,
1232
        ymin,
1233
        xmin,
1234
        ymax,
1235
        xmax,
1236
        color=color,
1237
        thickness=0 if skip_boxes else line_thickness,
1238
        display_str_list=box_to_display_str_map[box],
1239
        use_normalized_coordinates=use_normalized_coordinates)
1240
    if keypoints is not None:
1241
      keypoint_scores_for_box = None
1242
      if box_to_keypoint_scores_map:
1243
        keypoint_scores_for_box = box_to_keypoint_scores_map[box]
1244
      draw_keypoints_on_image_array(
1245
          image,
1246
          box_to_keypoints_map[box],
1247
          keypoint_scores_for_box,
1248
          min_score_thresh=min_score_thresh,
1249
          color=color,
1250
          radius=line_thickness / 2,
1251
          use_normalized_coordinates=use_normalized_coordinates,
1252
          keypoint_edges=keypoint_edges,
1253
          keypoint_edge_color=color,
1254
          keypoint_edge_width=line_thickness // 2)
1255

1256
  return image
1257

1258

1259
def add_cdf_image_summary(values, name):
1260
  """Adds a tf.summary.image for a CDF plot of the values.
1261

1262
  Normalizes `values` such that they sum to 1, plots the cumulative distribution
1263
  function and creates a tf image summary.
1264

1265
  Args:
1266
    values: a 1-D float32 tensor containing the values.
1267
    name: name for the image summary.
1268
  """
1269
  def cdf_plot(values):
1270
    """Numpy function to plot CDF."""
1271
    normalized_values = values / np.sum(values)
1272
    sorted_values = np.sort(normalized_values)
1273
    cumulative_values = np.cumsum(sorted_values)
1274
    fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32)
1275
                            / cumulative_values.size)
1276
    fig = plt.figure(frameon=False)
1277
    ax = fig.add_subplot('111')
1278
    ax.plot(fraction_of_examples, cumulative_values)
1279
    ax.set_ylabel('cumulative normalized values')
1280
    ax.set_xlabel('fraction of examples')
1281
    fig.canvas.draw()
1282
    width, height = fig.get_size_inches() * fig.get_dpi()
1283
    image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
1284
        1, int(height), int(width), 3)
1285
    return image
1286
  cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
1287
  tf.summary.image(name, cdf_plot)
1288

1289

1290
def add_hist_image_summary(values, bins, name):
1291
  """Adds a tf.summary.image for a histogram plot of the values.
1292

1293
  Plots the histogram of values and creates a tf image summary.
1294

1295
  Args:
1296
    values: a 1-D float32 tensor containing the values.
1297
    bins: bin edges which will be directly passed to np.histogram.
1298
    name: name for the image summary.
1299
  """
1300

1301
  def hist_plot(values, bins):
1302
    """Numpy function to plot hist."""
1303
    fig = plt.figure(frameon=False)
1304
    ax = fig.add_subplot('111')
1305
    y, x = np.histogram(values, bins=bins)
1306
    ax.plot(x[:-1], y)
1307
    ax.set_ylabel('count')
1308
    ax.set_xlabel('value')
1309
    fig.canvas.draw()
1310
    width, height = fig.get_size_inches() * fig.get_dpi()
1311
    image = np.fromstring(
1312
        fig.canvas.tostring_rgb(), dtype='uint8').reshape(
1313
            1, int(height), int(width), 3)
1314
    return image
1315
  hist_plot = tf.py_func(hist_plot, [values, bins], tf.uint8)
1316
  tf.summary.image(name, hist_plot)
1317

1318

1319
class EvalMetricOpsVisualization(six.with_metaclass(abc.ABCMeta, object)):
1320
  """Abstract base class responsible for visualizations during evaluation.
1321

1322
  Currently, summary images are not run during evaluation. One way to produce
1323
  evaluation images in Tensorboard is to provide tf.summary.image strings as
1324
  `value_ops` in tf.estimator.EstimatorSpec's `eval_metric_ops`. This class is
1325
  responsible for accruing images (with overlaid detections and groundtruth)
1326
  and returning a dictionary that can be passed to `eval_metric_ops`.
1327
  """
1328

1329
  def __init__(self,
1330
               category_index,
1331
               max_examples_to_draw=5,
1332
               max_boxes_to_draw=20,
1333
               min_score_thresh=0.2,
1334
               use_normalized_coordinates=True,
1335
               summary_name_prefix='evaluation_image',
1336
               keypoint_edges=None):
1337
    """Creates an EvalMetricOpsVisualization.
1338

1339
    Args:
1340
      category_index: A category index (dictionary) produced from a labelmap.
1341
      max_examples_to_draw: The maximum number of example summaries to produce.
1342
      max_boxes_to_draw: The maximum number of boxes to draw for detections.
1343
      min_score_thresh: The minimum score threshold for showing detections.
1344
      use_normalized_coordinates: Whether to assume boxes and keypoints are in
1345
        normalized coordinates (as opposed to absolute coordinates).
1346
        Default is True.
1347
      summary_name_prefix: A string prefix for each image summary.
1348
      keypoint_edges: A list of tuples with keypoint indices that specify which
1349
        keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
1350
        edges from keypoint 0 to 1 and from keypoint 2 to 4.
1351
    """
1352

1353
    self._category_index = category_index
1354
    self._max_examples_to_draw = max_examples_to_draw
1355
    self._max_boxes_to_draw = max_boxes_to_draw
1356
    self._min_score_thresh = min_score_thresh
1357
    self._use_normalized_coordinates = use_normalized_coordinates
1358
    self._summary_name_prefix = summary_name_prefix
1359
    self._keypoint_edges = keypoint_edges
1360
    self._images = []
1361

1362
  def clear(self):
1363
    self._images = []
1364

1365
  def add_images(self, images):
1366
    """Store a list of images, each with shape [1, H, W, C]."""
1367
    if len(self._images) >= self._max_examples_to_draw:
1368
      return
1369

1370
    # Store images and clip list if necessary.
1371
    self._images.extend(images)
1372
    if len(self._images) > self._max_examples_to_draw:
1373
      self._images[self._max_examples_to_draw:] = []
1374

1375
  def get_estimator_eval_metric_ops(self, eval_dict):
1376
    """Returns metric ops for use in tf.estimator.EstimatorSpec.
1377

1378
    Args:
1379
      eval_dict: A dictionary that holds an image, groundtruth, and detections
1380
        for a batched example. Note that, we use only the first example for
1381
        visualization. See eval_util.result_dict_for_batched_example() for a
1382
        convenient method for constructing such a dictionary. The dictionary
1383
        contains
1384
        fields.InputDataFields.original_image: [batch_size, H, W, 3] image.
1385
        fields.InputDataFields.original_image_spatial_shape: [batch_size, 2]
1386
          tensor containing the size of the original image.
1387
        fields.InputDataFields.true_image_shape: [batch_size, 3]
1388
          tensor containing the spatial size of the upadded original image.
1389
        fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4]
1390
          float32 tensor with groundtruth boxes in range [0.0, 1.0].
1391
        fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes]
1392
          int64 tensor with 1-indexed groundtruth classes.
1393
        fields.InputDataFields.groundtruth_instance_masks - (optional)
1394
          [batch_size, num_boxes, H, W] int64 tensor with instance masks.
1395
        fields.InputDataFields.groundtruth_keypoints - (optional)
1396
          [batch_size, num_boxes, num_keypoints, 2] float32 tensor with
1397
          keypoint coordinates in format [y, x].
1398
        fields.InputDataFields.groundtruth_keypoint_visibilities - (optional)
1399
          [batch_size, num_boxes, num_keypoints] bool tensor with
1400
          keypoint visibilities.
1401
        fields.DetectionResultFields.detection_boxes - [batch_size,
1402
          max_num_boxes, 4] float32 tensor with detection boxes in range [0.0,
1403
          1.0].
1404
        fields.DetectionResultFields.detection_classes - [batch_size,
1405
          max_num_boxes] int64 tensor with 1-indexed detection classes.
1406
        fields.DetectionResultFields.detection_scores - [batch_size,
1407
          max_num_boxes] float32 tensor with detection scores.
1408
        fields.DetectionResultFields.detection_masks - (optional) [batch_size,
1409
          max_num_boxes, H, W] float32 tensor of binarized masks.
1410
        fields.DetectionResultFields.detection_keypoints - (optional)
1411
          [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with
1412
          keypoints.
1413
        fields.DetectionResultFields.detection_keypoint_scores - (optional)
1414
          [batch_size, max_num_boxes, num_keypoints] float32 tensor with
1415
          keypoints scores.
1416

1417
    Returns:
1418
      A dictionary of image summary names to tuple of (value_op, update_op). The
1419
      `update_op` is the same for all items in the dictionary, and is
1420
      responsible for saving a single side-by-side image with detections and
1421
      groundtruth. Each `value_op` holds the tf.summary.image string for a given
1422
      image.
1423
    """
1424
    if self._max_examples_to_draw == 0:
1425
      return {}
1426
    images = self.images_from_evaluation_dict(eval_dict)
1427

1428
    def get_images():
1429
      """Returns a list of images, padded to self._max_images_to_draw."""
1430
      images = self._images
1431
      while len(images) < self._max_examples_to_draw:
1432
        images.append(np.array(0, dtype=np.uint8))
1433
      self.clear()
1434
      return images
1435

1436
    def image_summary_or_default_string(summary_name, image):
1437
      """Returns image summaries for non-padded elements."""
1438
      return tf.cond(
1439
          tf.equal(tf.size(tf.shape(image)), 4),
1440
          lambda: tf.summary.image(summary_name, image),
1441
          lambda: tf.constant(''))
1442

1443
    if tf.executing_eagerly():
1444
      update_op = self.add_images([[images[0]]])
1445
      image_tensors = get_images()
1446
    else:
1447
      update_op = tf.py_func(self.add_images, [[images[0]]], [])
1448
      image_tensors = tf.py_func(
1449
          get_images, [], [tf.uint8] * self._max_examples_to_draw)
1450
    eval_metric_ops = {}
1451
    for i, image in enumerate(image_tensors):
1452
      summary_name = self._summary_name_prefix + '/' + str(i)
1453
      value_op = image_summary_or_default_string(summary_name, image)
1454
      eval_metric_ops[summary_name] = (value_op, update_op)
1455
    return eval_metric_ops
1456

1457
  @abc.abstractmethod
1458
  def images_from_evaluation_dict(self, eval_dict):
1459
    """Converts evaluation dictionary into a list of image tensors.
1460

1461
    To be overridden by implementations.
1462

1463
    Args:
1464
      eval_dict: A dictionary with all the necessary information for producing
1465
        visualizations.
1466

1467
    Returns:
1468
      A list of [1, H, W, C] uint8 tensors.
1469
    """
1470
    raise NotImplementedError
1471

1472

1473
class VisualizeSingleFrameDetections(EvalMetricOpsVisualization):
1474
  """Class responsible for single-frame object detection visualizations."""
1475

1476
  def __init__(self,
1477
               category_index,
1478
               max_examples_to_draw=5,
1479
               max_boxes_to_draw=20,
1480
               min_score_thresh=0.2,
1481
               use_normalized_coordinates=True,
1482
               summary_name_prefix='Detections_Left_Groundtruth_Right',
1483
               keypoint_edges=None):
1484
    super(VisualizeSingleFrameDetections, self).__init__(
1485
        category_index=category_index,
1486
        max_examples_to_draw=max_examples_to_draw,
1487
        max_boxes_to_draw=max_boxes_to_draw,
1488
        min_score_thresh=min_score_thresh,
1489
        use_normalized_coordinates=use_normalized_coordinates,
1490
        summary_name_prefix=summary_name_prefix,
1491
        keypoint_edges=keypoint_edges)
1492

1493
  def images_from_evaluation_dict(self, eval_dict):
1494
    return draw_side_by_side_evaluation_image(eval_dict, self._category_index,
1495
                                              self._max_boxes_to_draw,
1496
                                              self._min_score_thresh,
1497
                                              self._use_normalized_coordinates,
1498
                                              self._keypoint_edges)
1499

1500
Product

Resources

Company