CoCalc -- datasets.py

hukaixuan19970627
GitHub Repository: hukaixuan19970627/yolov5_obb
Path: blob/master/utils/datasets.py
²²⁸⁸ views
1
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
"""
3
Dataloaders and dataset utils
4
"""
5

6
import glob
7
import hashlib
8
import json
9
import os
10
import random
11
import shutil
12
import time
13
from itertools import repeat
14
from multiprocessing.pool import Pool, ThreadPool
15
from pathlib import Path
16
from threading import Thread
17
from zipfile import ZipFile
18

19
import cv2
20
import numpy as np
21
import torch
22
import torch.nn.functional as F
23
import yaml
24
from PIL import ExifTags, Image, ImageOps
25
from torch.utils.data import DataLoader, Dataset, dataloader, distributed
26
from tqdm import tqdm
27

28
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
29
from utils.general import (LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
30
                           segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
31
from utils.torch_utils import torch_distributed_zero_first
32
from utils.rboxs_utils import poly_filter, poly2rbox
33

34
# Parameters
35
HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
36
IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']  # acceptable image suffixes
37
VID_FORMATS = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv']  # acceptable video suffixes
38
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))  # DPP
39

40
# Get orientation exif tag
41
for orientation in ExifTags.TAGS.keys():
42
    if ExifTags.TAGS[orientation] == 'Orientation':
43
        break
44

45

46
def get_hash(paths):
47
    # Returns a single hash value of a list of paths (files or dirs)
48
    size = sum(os.path.getsize(p) for p in paths if os.path.exists(p))  # sizes
49
    h = hashlib.md5(str(size).encode())  # hash sizes
50
    h.update(''.join(paths).encode())  # hash paths
51
    return h.hexdigest()  # return hash
52

53

54
def exif_size(img):
55
    # Returns exif-corrected PIL size
56
    s = img.size  # (width, height)
57
    try:
58
        rotation = dict(img._getexif().items())[orientation]
59
        if rotation == 6:  # rotation 270
60
            s = (s[1], s[0])
61
        elif rotation == 8:  # rotation 90
62
            s = (s[1], s[0])
63
    except:
64
        pass
65

66
    return s
67

68

69
def exif_transpose(image):
70
    """
71
    Transpose a PIL image accordingly if it has an EXIF Orientation tag.
72
    Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose()
73

74
    :param image: The image to transpose.
75
    :return: An image.
76
    """
77
    exif = image.getexif()
78
    orientation = exif.get(0x0112, 1)  # default 1
79
    if orientation > 1:
80
        method = {2: Image.FLIP_LEFT_RIGHT,
81
                  3: Image.ROTATE_180,
82
                  4: Image.FLIP_TOP_BOTTOM,
83
                  5: Image.TRANSPOSE,
84
                  6: Image.ROTATE_270,
85
                  7: Image.TRANSVERSE,
86
                  8: Image.ROTATE_90,
87
                  }.get(orientation)
88
        if method is not None:
89
            image = image.transpose(method)
90
            del exif[0x0112]
91
            image.info["exif"] = exif.tobytes()
92
    return image
93

94

95
def create_dataloader(path, imgsz, batch_size, stride, names, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0,
96
                      rect=False, rank=-1, workers=8, image_weights=False, quad=False, prefix='', shuffle=False):
97
    if rect and shuffle:
98
        LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
99
        shuffle = False
100
    with torch_distributed_zero_first(rank):  # init dataset *.cache only once if DDP
101
        dataset = LoadImagesAndLabels(path, names, imgsz, batch_size,
102
                                      augment=augment,  # augmentation
103
                                      hyp=hyp,  # hyperparameters
104
                                      rect=rect,  # rectangular batches
105
                                      cache_images=cache,
106
                                      single_cls=single_cls,
107
                                      stride=int(stride),
108
                                      pad=pad,
109
                                      image_weights=image_weights,
110
                                      prefix=prefix)
111

112
    batch_size = min(batch_size, len(dataset))
113
    nw = min([os.cpu_count() // WORLD_SIZE, batch_size if batch_size > 1 else 0, workers])  # number of workers
114
    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
115
    loader = DataLoader if image_weights else InfiniteDataLoader  # only DataLoader allows for attribute updates
116
    return loader(dataset,
117
                  batch_size=batch_size,
118
                  shuffle=shuffle and sampler is None,
119
                  num_workers=nw,
120
                  sampler=sampler,
121
                  pin_memory=True,
122
                  collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn), dataset
123

124

125
class InfiniteDataLoader(dataloader.DataLoader):
126
    """ Dataloader that reuses workers
127

128
    Uses same syntax as vanilla DataLoader
129
    """
130

131
    def __init__(self, *args, **kwargs):
132
        super().__init__(*args, **kwargs)
133
        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
134
        self.iterator = super().__iter__()
135

136
    def __len__(self):
137
        return len(self.batch_sampler.sampler)
138

139
    def __iter__(self):
140
        for i in range(len(self)):
141
            yield next(self.iterator)
142

143

144
class _RepeatSampler:
145
    """ Sampler that repeats forever
146

147
    Args:
148
        sampler (Sampler)
149
    """
150

151
    def __init__(self, sampler):
152
        self.sampler = sampler
153

154
    def __iter__(self):
155
        while True:
156
            yield from iter(self.sampler)
157

158

159
class LoadImages:
160
    # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
161
    def __init__(self, path, img_size=640, stride=32, auto=True):
162
        p = str(Path(path).resolve())  # os-agnostic absolute path
163
        if '*' in p:
164
            files = sorted(glob.glob(p, recursive=True))  # glob
165
        elif os.path.isdir(p):
166
            files = sorted(glob.glob(os.path.join(p, '*.*')))  # dir
167
        elif os.path.isfile(p):
168
            files = [p]  # files
169
        else:
170
            raise Exception(f'ERROR: {p} does not exist')
171

172
        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
173
        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
174
        ni, nv = len(images), len(videos)
175

176
        self.img_size = img_size
177
        self.stride = stride
178
        self.files = images + videos
179
        self.nf = ni + nv  # number of files
180
        self.video_flag = [False] * ni + [True] * nv
181
        self.mode = 'image'
182
        self.auto = auto
183
        if any(videos):
184
            self.new_video(videos[0])  # new video
185
        else:
186
            self.cap = None
187
        assert self.nf > 0, f'No images or videos found in {p}. ' \
188
                            f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
189

190
    def __iter__(self):
191
        self.count = 0
192
        return self
193

194
    def __next__(self):
195
        if self.count == self.nf:
196
            raise StopIteration
197
        path = self.files[self.count]
198

199
        if self.video_flag[self.count]:
200
            # Read video
201
            self.mode = 'video'
202
            ret_val, img0 = self.cap.read()
203
            while not ret_val:
204
                self.count += 1
205
                self.cap.release()
206
                if self.count == self.nf:  # last video
207
                    raise StopIteration
208
                else:
209
                    path = self.files[self.count]
210
                    self.new_video(path)
211
                    ret_val, img0 = self.cap.read()
212

213
            self.frame += 1
214
            s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
215

216
        else:
217
            # Read image
218
            self.count += 1
219
            img0 = cv2.imread(path)  # BGR
220
            assert img0 is not None, f'Image Not Found {path}'
221
            s = f'image {self.count}/{self.nf} {path}: '
222

223
        # Padded resize
224
        img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]
225

226
        # Convert
227
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
228
        img = np.ascontiguousarray(img)
229

230
        return path, img, img0, self.cap, s
231

232
    def new_video(self, path):
233
        self.frame = 0
234
        self.cap = cv2.VideoCapture(path)
235
        self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
236

237
    def __len__(self):
238
        return self.nf  # number of files
239

240

241
class LoadWebcam:  # for inference
242
    # YOLOv5 local webcam dataloader, i.e. `python detect.py --source 0`
243
    def __init__(self, pipe='0', img_size=640, stride=32):
244
        self.img_size = img_size
245
        self.stride = stride
246
        self.pipe = eval(pipe) if pipe.isnumeric() else pipe
247
        self.cap = cv2.VideoCapture(self.pipe)  # video capture object
248
        self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3)  # set buffer size
249

250
    def __iter__(self):
251
        self.count = -1
252
        return self
253

254
    def __next__(self):
255
        self.count += 1
256
        if cv2.waitKey(1) == ord('q'):  # q to quit
257
            self.cap.release()
258
            cv2.destroyAllWindows()
259
            raise StopIteration
260

261
        # Read frame
262
        ret_val, img0 = self.cap.read()
263
        img0 = cv2.flip(img0, 1)  # flip left-right
264

265
        # Print
266
        assert ret_val, f'Camera Error {self.pipe}'
267
        img_path = 'webcam.jpg'
268
        s = f'webcam {self.count}: '
269

270
        # Padded resize
271
        img = letterbox(img0, self.img_size, stride=self.stride)[0]
272

273
        # Convert
274
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
275
        img = np.ascontiguousarray(img)
276

277
        return img_path, img, img0, None, s
278

279
    def __len__(self):
280
        return 0
281

282

283
class LoadStreams:
284
    # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
285
    def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True):
286
        self.mode = 'stream'
287
        self.img_size = img_size
288
        self.stride = stride
289

290
        if os.path.isfile(sources):
291
            with open(sources) as f:
292
                sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
293
        else:
294
            sources = [sources]
295

296
        n = len(sources)
297
        self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
298
        self.sources = [clean_str(x) for x in sources]  # clean source names for later
299
        self.auto = auto
300
        for i, s in enumerate(sources):  # index, source
301
            # Start thread to read frames from video stream
302
            st = f'{i + 1}/{n}: {s}... '
303
            if 'youtube.com/' in s or 'youtu.be/' in s:  # if source is YouTube video
304
                check_requirements(('pafy', 'youtube_dl'))
305
                import pafy
306
                s = pafy.new(s).getbest(preftype="mp4").url  # YouTube URL
307
            s = eval(s) if s.isnumeric() else s  # i.e. s = '0' local webcam
308
            cap = cv2.VideoCapture(s)
309
            assert cap.isOpened(), f'{st}Failed to open {s}'
310
            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
311
            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
312
            self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0  # 30 FPS fallback
313
            self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf')  # infinite stream fallback
314

315
            _, self.imgs[i] = cap.read()  # guarantee first frame
316
            self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
317
            LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
318
            self.threads[i].start()
319
        LOGGER.info('')  # newline
320

321
        # check for common shapes
322
        s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs])
323
        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
324
        if not self.rect:
325
            LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')
326

327
    def update(self, i, cap, stream):
328
        # Read stream `i` frames in daemon thread
329
        n, f, read = 0, self.frames[i], 1  # frame number, frame array, inference every 'read' frame
330
        while cap.isOpened() and n < f:
331
            n += 1
332
            # _, self.imgs[index] = cap.read()
333
            cap.grab()
334
            if n % read == 0:
335
                success, im = cap.retrieve()
336
                if success:
337
                    self.imgs[i] = im
338
                else:
339
                    LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')
340
                    self.imgs[i] = np.zeros_like(self.imgs[i])
341
                    cap.open(stream)  # re-open stream if signal was lost
342
            time.sleep(1 / self.fps[i])  # wait time
343

344
    def __iter__(self):
345
        self.count = -1
346
        return self
347

348
    def __next__(self):
349
        self.count += 1
350
        if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'):  # q to quit
351
            cv2.destroyAllWindows()
352
            raise StopIteration
353

354
        # Letterbox
355
        img0 = self.imgs.copy()
356
        img = [letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0]
357

358
        # Stack
359
        img = np.stack(img, 0)
360

361
        # Convert
362
        img = img[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW
363
        img = np.ascontiguousarray(img)
364

365
        return self.sources, img, img0, None, ''
366

367
    def __len__(self):
368
        return len(self.sources)  # 1E12 frames = 32 streams at 30 FPS for 30 years
369

370

371
def img2label_paths(img_paths):
372
    # Define label paths as a function of image paths
373
    sa, sb = os.sep + 'images' + os.sep, os.sep + 'labelTxt' + os.sep  # /images/, /labels/ substrings
374
    return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
375

376

377
class LoadImagesAndLabels(Dataset):
378
    # YOLOv5 train_loader/val_loader, loads images and labels for training and validation
379
    cache_version = 0.6  # dataset labels *.cache version
380

381
    def __init__(self, path, cls_names, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
382
                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
383
        """
384
        Returns:
385
            Dataset.labels (list): n_imgs * array(num_gt_perimg, [cls_id, poly])
386
            Dataset.shapes (array): (n_imgs, [ori_img_width, ori_img_height])
387

388
            Dataset.batch_shapes (array): (n_batches, [h_rect, w_rect])
389
        """
390
        self.img_size = img_size
391
        self.augment = augment
392
        self.hyp = hyp
393
        self.image_weights = image_weights
394
        self.rect = False if image_weights else rect
395
        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
396
        self.mosaic_border = [-img_size // 2, -img_size // 2]
397
        self.stride = stride
398
        self.path = path
399
        self.albumentations = Albumentations() if augment else None
400
        self.cls_names = cls_names
401

402
        try:
403
            f = []  # image files
404
            for p in path if isinstance(path, list) else [path]:
405
                p = Path(p)  # os-agnostic
406
                if p.is_dir():  # dir
407
                    f += glob.glob(str(p / '**' / '*.*'), recursive=True)
408
                    # f = list(p.rglob('*.*'))  # pathlib
409
                elif p.is_file():  # file
410
                    with open(p) as t:
411
                        t = t.read().strip().splitlines()
412
                        parent = str(p.parent) + os.sep
413
                        f += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path
414
                        # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
415
                else:
416
                    raise Exception(f'{prefix}{p} does not exist')
417
            self.img_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
418
            # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
419
            assert self.img_files, f'{prefix}No images found'
420
        except Exception as e:
421
            raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}')
422

423
        # Check cache
424
        self.label_files = img2label_paths(self.img_files)  # labels
425
        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')
426
        try:
427
            cache, exists = np.load(cache_path, allow_pickle=True).item(), True  # load dict
428
            assert cache['version'] == self.cache_version  # same version
429
            assert cache['hash'] == get_hash(self.label_files + self.img_files)  # same hash
430
        except:
431
            cache, exists = self.cache_labels(cache_path, prefix), False  # cache
432

433
        # Display cache
434
        nf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupted, total
435
        if exists:
436
            d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
437
            tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
438
            if cache['msgs']:
439
                LOGGER.info('\n'.join(cache['msgs']))  # display warnings
440
        assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
441

442
        # Read cache
443
        [cache.pop(k) for k in ('hash', 'version', 'msgs')]  # remove items
444
        labels, shapes, self.segments = zip(*cache.values())
445
        self.labels = list(labels) # labels(list[array]): n_imgs * array(num_gt_perimg, [cls_id, poly])
446
        self.shapes = np.array(shapes, dtype=np.float64) # img_ori shape
447
        self.img_files = list(cache.keys())  # update
448
        self.label_files = img2label_paths(cache.keys())  # update
449
        n = len(shapes)  # number of images
450
        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
451
        nb = bi[-1] + 1  # number of batches
452
        self.batch = bi  # batch index of image
453
        self.n = n
454
        self.indices = range(n)
455

456
        # Update labels
457
        include_class = []  # filter labels to include only these classes (optional)
458
        include_class_array = np.array(include_class).reshape(1, -1)
459
        for i, (label, segment) in enumerate(zip(self.labels, self.segments)):
460
            if include_class:
461
                j = (label[:, 0:1] == include_class_array).any(1)
462
                self.labels[i] = label[j]
463
                if segment:
464
                    self.segments[i] = segment[j]
465
            if single_cls:  # single-class training, merge all classes into 0
466
                self.labels[i][:, 0] = 0
467
                if segment:
468
                    self.segments[i][:, 0] = 0
469

470
        # Rectangular Training
471
        if self.rect:
472
            # Sort by aspect ratio
473
            s = self.shapes  # wh
474
            ar = s[:, 1] / s[:, 0]  # aspect ratio
475
            irect = ar.argsort()
476
            self.img_files = [self.img_files[i] for i in irect]
477
            self.label_files = [self.label_files[i] for i in irect]
478
            self.labels = [self.labels[i] for i in irect]
479
            self.shapes = s[irect]  # wh
480
            ar = ar[irect]
481

482
            # Set training image shapes
483
            shapes = [[1, 1]] * nb
484
            for i in range(nb):
485
                ari = ar[bi == i]
486
                mini, maxi = ari.min(), ari.max()
487
                if maxi < 1: # batch图像高宽比均小于1时, shape=[h/w, 1] = [h_ratio, w_ratio]
488
                    shapes[i] = [maxi, 1] 
489
                elif mini > 1: # batch图像高宽比均大于1时, shape=[1, w/h] = [h_ratio, w_ratio]
490
                    shapes[i] = [1, 1 / mini]
491

492
            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride # (nb, [h_rect, w_rect])
493

494
        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
495
        self.imgs, self.img_npy = [None] * n, [None] * n
496
        if cache_images:
497
            if cache_images == 'disk':
498
                self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
499
                self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
500
                self.im_cache_dir.mkdir(parents=True, exist_ok=True)
501
            gb = 0  # Gigabytes of cached images
502
            self.img_hw0, self.img_hw = [None] * n, [None] * n
503
            results = ThreadPool(NUM_THREADS).imap(lambda x: load_image_label(*x), zip(repeat(self), range(n)))
504
            pbar = tqdm(enumerate(results), total=n)
505
            for i, x in pbar:
506
                if cache_images == 'disk':
507
                    if not self.img_npy[i].exists():
508
                        np.save(self.img_npy[i].as_posix(), x[0])
509
                    gb += self.img_npy[i].stat().st_size
510
                else:
511
                    self.imgs[i], self.img_hw0[i], self.img_hw[i], self.labels[i] = x  # im, hw_orig, hw_resized, label_resized = load_image_label(self, i)
512
                    gb += self.imgs[i].nbytes
513
                pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
514
            pbar.close()
515

516
    def cache_labels(self, path=Path('./labels.cache'), prefix=''):
517
        # Cache dataset labels, check images and read shapes
518
        x = {}  # dict
519
        nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
520
        desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
521
        with Pool(NUM_THREADS) as pool:
522
            pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix), repeat(self.cls_names))),
523
                        desc=desc, total=len(self.img_files))
524
            for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
525
                nm += nm_f
526
                nf += nf_f
527
                ne += ne_f
528
                nc += nc_f
529
                if im_file:
530
                    x[im_file] = [l, shape, segments] 
531
                if msg:
532
                    msgs.append(msg)
533
                pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
534

535
        pbar.close()
536
        if msgs:
537
            LOGGER.info('\n'.join(msgs))
538
        if nf == 0:
539
            LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
540
        x['hash'] = get_hash(self.label_files + self.img_files)
541
        x['results'] = nf, nm, ne, nc, len(self.img_files)
542
        x['msgs'] = msgs  # warnings
543
        x['version'] = self.cache_version  # cache version
544
        try:
545
            np.save(path, x)  # save cache for next time
546
            path.with_suffix('.cache.npy').rename(path)  # remove .npy suffix
547
            LOGGER.info(f'{prefix}New cache created: {path}')
548
        except Exception as e:
549
            LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}')  # not writeable
550
        return x
551

552
    def __len__(self):
553
        return len(self.img_files)
554

555
    # def __iter__(self):
556
    #     self.count = -1
557
    #     print('ran dataset iter')
558
    #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
559
    #     return self
560

561
    def __getitem__(self, index):
562
        '''
563
        Augment the [clsid poly] labels and trans label format to rbox.
564
        Returns:
565
            img (tensor): (3, height, width), RGB
566
            labels_out (tensor): (n, [None clsid cx cy l s theta gaussian_θ_labels]) θ∈[-pi/2, pi/2)
567
            img_file (str): img_dir 
568
            shapes : None or [(h_raw, w_raw), (hw_ratios, wh_paddings)], for COCO mAP rescaling
569
        '''
570
        index = self.indices[index]  # linear, shuffled, or image_weights
571

572
        hyp = self.hyp
573
        mosaic = self.mosaic and random.random() < hyp['mosaic']
574
        if mosaic:
575
            # Load mosaic
576
            img, labels = load_mosaic(self, index)
577
            shapes = None
578

579
            # MixUp augmentation
580
            if random.random() < hyp['mixup']:
581
                img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
582

583
        else:
584
            # Load image and label
585
            img, (h0, w0), (h, w), img_label = load_image_label(self, index) 
586

587
            # Letterbox
588
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape [h_rect, w_rect]
589
            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) # ratio[w_ratio, h_ratio], pad[w_padding, h_padding]
590
            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling [(h_raw, w_raw), (hw_ratios, wh_paddings)]
591

592
            labels = img_label.copy() # labels (array): (num_gt_perimg, [cls_id, poly])
593
            if labels.size:  
594
                # labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
595
                labels[:, [1, 3, 5, 7]] = img_label[:, [1, 3, 5, 7]] * ratio[0] + pad[0]
596
                labels[:, [2, 4, 6, 8]] = img_label[:, [2, 4, 6, 8]] * ratio[1] + pad[1]
597

598
            if self.augment:
599
                img, labels = random_perspective(img, labels,
600
                                                 degrees=hyp['degrees'],
601
                                                 translate=hyp['translate'],
602
                                                 scale=hyp['scale'],
603
                                                 shear=hyp['shear'],
604
                                                 perspective=hyp['perspective'])
605

606
        nl = len(labels)  # number of labels
607
        # if nl:
608
        #     labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)
609

610

611
        if self.augment:
612
            # Albumentations
613
            # img, labels = self.albumentations(img, labels)
614
            # nl = len(labels)  # update after albumentations
615

616
            # HSV color-space
617
            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
618

619
            img_h, img_w = img.shape[0], img.shape[1]
620
            # Flip up-down
621
            if random.random() < hyp['flipud']:
622
                img = np.flipud(img)
623
                if nl:
624
                    # labels[:, 2] = 1 - labels[:, 2]
625
                    labels[:, 2::2] = img_h - labels[:, 2::2] - 1
626

627
            # Flip left-right
628
            if random.random() < hyp['fliplr']:
629
                img = np.fliplr(img)
630
                if nl:
631
                    # labels[:, 1] = 1 - labels[:, 1]
632
                    labels[:, 1::2] = img_w - labels[:, 1::2] - 1
633

634
            # Cutouts
635
            # labels = cutout(img, labels, p=0.5)
636
            # nl = len(labels)  # update after cutout
637
        if nl:
638
        # *[clsid poly] to *[clsid cx cy l s theta gaussian_θ_labels] θ∈[-pi/2, pi/2) non-normalized
639
            rboxes, csl_labels  = poly2rbox(polys=labels[:, 1:], 
640
                                            num_cls_thata=hyp['cls_theta'] if hyp else 180, 
641
                                            radius=hyp['csl_radius'] if hyp else 6.0, 
642
                                            use_pi=True, use_gaussian=True)
643
            labels_obb = np.concatenate((labels[:, :1], rboxes, csl_labels), axis=1)
644
            labels_mask = (rboxes[:, 0] >= 0) & (rboxes[:, 0] < img.shape[1]) \
645
                        & (rboxes[:, 1] >= 0) & (rboxes[:, 0] < img.shape[0]) \
646
                        & (rboxes[:, 2] > 5) | (rboxes[:, 3] > 5)
647
            labels_obb = labels_obb[labels_mask]
648
            nl = len(labels_obb)  # update after filter
649
        
650
        if hyp:
651
            c_num = 7 + hyp['cls_theta'] # [index_of_batch clsid cx cy l s theta gaussian_θ_labels]
652
        else:
653
            c_num = 187
654

655
        # labels_out = torch.zeros((nl, 6))
656
        labels_out = torch.zeros((nl, c_num))
657
        if nl:
658
            # labels_out[:, 1:] = torch.from_numpy(labels)
659
            labels_out[:, 1:] = torch.from_numpy(labels_obb)
660

661
        # Convert
662
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
663
        img = np.ascontiguousarray(img)
664

665
        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
666

667
    @staticmethod
668
    def collate_fn(batch):
669
        img, label, path, shapes = zip(*batch)  # transposed; (tupe(b*tensor))
670
        for i, l in enumerate(label):
671
            l[:, 0] = i  # add target image index for build_targets()
672
        return torch.stack(img, 0), torch.cat(label, 0), path, shapes
673

674
    @staticmethod
675
    def collate_fn4(batch):
676
        img, label, path, shapes = zip(*batch)  # transposed
677
        n = len(shapes) // 4
678
        img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
679

680
        ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
681
        wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
682
        s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]])  # scale
683
        for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
684
            i *= 4
685
            if random.random() < 0.5:
686
                im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', align_corners=False)[
687
                    0].type(img[i].type())
688
                l = label[i]
689
            else:
690
                im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
691
                l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
692
            img4.append(im)
693
            label4.append(l)
694

695
        for i, l in enumerate(label4):
696
            l[:, 0] = i  # add target image index for build_targets()
697

698
        return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
699

700

701
# Ancillary functions --------------------------------------------------------------------------------------------------
702
def load_image_label(self, i):
703
    # loads 1 image from dataset index 'i', returns im, original hw, resized hw
704
    im = self.imgs[i]
705
    label = self.labels[i].copy() # labels (array): (num_gt_perimg, [cls_id, poly])
706
    if im is None:  # not cached in ram
707
        npy = self.img_npy[i]
708
        if npy and npy.exists():  # load npy
709
            im = np.load(npy)
710
        else:  # read image
711
            path = self.img_files[i]
712
            im = cv2.imread(path)  # BGR
713
            assert im is not None, f'Image Not Found {path}'
714
        h0, w0 = im.shape[:2]  # orig hw
715
        r = self.img_size / max(h0, w0)  # ratio
716
        if r != 1:  # if sizes are not equal
717
            im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
718
                            interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
719
            label[:, 1:] *= r
720
        return im, (h0, w0), im.shape[:2], label  # im, hw_original, hw_resized, resized_label
721
    else:
722
        return self.imgs[i], self.img_hw0[i], self.img_hw[i], self.labels[i]  # im, hw_original, hw_resized, resized_label
723

724

725
def load_mosaic(self, index):
726
    # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
727
    labels4, segments4 = [], []
728
    s = self.img_size
729
    yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y
730
    indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
731
    random.shuffle(indices)
732
    for i, index in enumerate(indices):
733
        # Load image
734
        img, _, (h, w), img_label = load_image_label(self, index)
735

736
        # place img in img4
737
        if i == 0:  # top left
738
            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
739
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
740
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
741
        elif i == 1:  # top right
742
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
743
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
744
        elif i == 2:  # bottom left
745
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
746
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
747
        elif i == 3:  # bottom right
748
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
749
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
750

751
        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
752
        padw = x1a - x1b
753
        padh = y1a - y1b
754

755
        # Labels
756
        labels, segments = img_label.copy(), self.segments[index].copy() # labels (array): (num_gt_perimg, [cls_id, poly])
757
        if labels.size:
758
            # labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
759
            labels[:, [1, 3, 5, 7]] = img_label[:, [1, 3, 5, 7]] + padw
760
            labels[:, [2, 4, 6, 8]] = img_label[:, [2, 4, 6, 8]] + padh
761
            segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
762
        labels4.append(labels)
763
        segments4.extend(segments)
764

765
    # Concat/clip labels
766
    labels4 = np.concatenate(labels4, 0)
767
    # for x in (labels4[:, 1:], *segments4):
768
    for x in (segments4):
769
        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
770
    h_filter = 2 * s
771
    w_filter = 2 * s
772
    labels_mask = poly_filter(polys=labels4[:, 1:].copy(), h=h_filter, w=w_filter)
773
    labels4 = labels4[labels_mask]
774
    # img4, labels4 = replicate(img4, labels4)  # replicate
775

776
    # Augment
777
    img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste'])
778
    img4, labels4 = random_perspective(img4, labels4, segments4,
779
                                       degrees=self.hyp['degrees'],
780
                                       translate=self.hyp['translate'],
781
                                       scale=self.hyp['scale'],
782
                                       shear=self.hyp['shear'],
783
                                       perspective=self.hyp['perspective'],
784
                                       border=self.mosaic_border)  # border to remove
785

786
    return img4, labels4
787

788

789
def load_mosaic9(self, index):
790
    # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
791
    labels9, segments9 = [], []
792
    s = self.img_size
793
    indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
794
    random.shuffle(indices)
795
    for i, index in enumerate(indices):
796
        # Load image
797
        img, _, (h, w), img_label = load_image_label(self, index)
798

799
        # place img in img9
800
        if i == 0:  # center
801
            img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
802
            h0, w0 = h, w
803
            c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
804
        elif i == 1:  # top
805
            c = s, s - h, s + w, s
806
        elif i == 2:  # top right
807
            c = s + wp, s - h, s + wp + w, s
808
        elif i == 3:  # right
809
            c = s + w0, s, s + w0 + w, s + h
810
        elif i == 4:  # bottom right
811
            c = s + w0, s + hp, s + w0 + w, s + hp + h
812
        elif i == 5:  # bottom
813
            c = s + w0 - w, s + h0, s + w0, s + h0 + h
814
        elif i == 6:  # bottom left
815
            c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
816
        elif i == 7:  # left
817
            c = s - w, s + h0 - h, s, s + h0
818
        elif i == 8:  # top left
819
            c = s - w, s + h0 - hp - h, s, s + h0 - hp
820

821
        padx, pady = c[:2]
822
        x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
823

824
        # Labels
825
        labels, segments = img_label.copy(), self.segments[index].copy() # labels (array): (num_gt_perimg, [cls_id, poly])
826
        if labels.size:
827
            # labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady)  # normalized xywh to pixel xyxy format
828
            segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
829
            labels_ = labels.clone() if isinstance(labels, torch.Tensor) else np.copy(labels)
830
            labels_[:, [1, 3, 5, 7]] = labels[:, [1, 3, 5, 7]] + padx
831
            labels_[:, [2, 4, 6, 8]] = labels[:, [2, 4, 6, 8]] + pady
832
            labels = labels_
833

834
        labels9.append(labels)
835
        segments9.extend(segments)
836

837
        # Image
838
        img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
839
        hp, wp = h, w  # height, width previous
840

841
    # Offset
842
    yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border)  # mosaic center x, y
843
    img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
844

845
    # Concat/clip labels
846
    labels9 = np.concatenate(labels9, 0)
847
    # labels9[:, [1, 3]] -= xc
848
    # labels9[:, [2, 4]] -= yc
849
    labels9[:, [1, 3, 5, 7]] -= xc
850
    labels9[:, [2, 4, 6, 8]] -= yc
851

852
    c = np.array([xc, yc])  # centers
853
    segments9 = [x - c for x in segments9]
854

855
    # for x in (labels9[:, 1:], *segments9):
856
    for x in (segments9):
857
        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
858
    h_filter = 2 * s
859
    w_filter = 2 * s
860
    labels_mask = poly_filter(polys=labels9[:, 1:].copy(), h=h_filter, w=w_filter)
861
    labels9 = labels9[labels_mask]
862
    # img9, labels9 = replicate(img9, labels9)  # replicate
863

864
    # Augment
865
    img9, labels9 = random_perspective(img9, labels9, segments9,
866
                                       degrees=self.hyp['degrees'],
867
                                       translate=self.hyp['translate'],
868
                                       scale=self.hyp['scale'],
869
                                       shear=self.hyp['shear'],
870
                                       perspective=self.hyp['perspective'],
871
                                       border=self.mosaic_border)  # border to remove
872

873
    return img9, labels9
874

875

876
def create_folder(path='./new'):
877
    # Create folder
878
    if os.path.exists(path):
879
        shutil.rmtree(path)  # delete output folder
880
    os.makedirs(path)  # make new output folder
881

882

883
def flatten_recursive(path='../datasets/coco128'):
884
    # Flatten a recursive directory by bringing all files to top level
885
    new_path = Path(path + '_flat')
886
    create_folder(new_path)
887
    for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
888
        shutil.copyfile(file, new_path / Path(file).name)
889

890

891
def extract_boxes(path='../datasets/coco128'):  # from utils.datasets import *; extract_boxes()
892
    # Convert detection dataset into classification dataset, with one directory per class
893
    path = Path(path)  # images dir
894
    shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None  # remove existing
895
    files = list(path.rglob('*.*'))
896
    n = len(files)  # number of files
897
    for im_file in tqdm(files, total=n):
898
        if im_file.suffix[1:] in IMG_FORMATS:
899
            # image
900
            im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB
901
            h, w = im.shape[:2]
902

903
            # labels
904
            lb_file = Path(img2label_paths([str(im_file)])[0])
905
            if Path(lb_file).exists():
906
                with open(lb_file) as f:
907
                    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels
908

909
                for j, x in enumerate(lb):
910
                    c = int(x[0])  # class
911
                    f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filename
912
                    if not f.parent.is_dir():
913
                        f.parent.mkdir(parents=True)
914

915
                    b = x[1:] * [w, h, w, h]  # box
916
                    # b[2:] = b[2:].max()  # rectangle to square
917
                    b[2:] = b[2:] * 1.2 + 3  # pad
918
                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
919

920
                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
921
                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
922
                    assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
923

924

925
def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
926
    """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
927
    Usage: from utils.datasets import *; autosplit()
928
    Arguments
929
        path:            Path to images directory
930
        weights:         Train, val, test weights (list, tuple)
931
        annotated_only:  Only use images with an annotated txt file
932
    """
933
    path = Path(path)  # images dir
934
    files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS)  # image files only
935
    n = len(files)  # number of files
936
    random.seed(0)  # for reproducibility
937
    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
938

939
    txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files
940
    [(path.parent / x).unlink(missing_ok=True) for x in txt]  # remove existing
941

942
    print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
943
    for i, img in tqdm(zip(indices, files), total=n):
944
        if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check label
945
            with open(path.parent / txt[i], 'a') as f:
946
                f.write('./' + img.relative_to(path.parent).as_posix() + '\n')  # add image to txt file
947

948

949
def verify_image_label(args):
950
    # Verify one image-label pair
951
    im_file, lb_file, prefix, cls_name_list = args
952
    nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', []  # number (missing, found, empty, corrupt), message, segments
953
    try:
954
        # verify images
955
        im = Image.open(im_file)
956
        im.verify()  # PIL verify
957
        shape = exif_size(im)  # image size
958
        assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
959
        assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
960
        if im.format.lower() in ('jpg', 'jpeg'):
961
            with open(im_file, 'rb') as f:
962
                f.seek(-2, 2)
963
                if f.read() != b'\xff\xd9':  # corrupt JPEG
964
                    ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100)
965
                    msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved'
966

967
        # verify labels
968
        if os.path.isfile(lb_file):
969
            nf = 1  # label found
970
            with open(lb_file) as f:
971
                labels = [x.split() for x in f.read().strip().splitlines() if len(x)]
972

973
                # Yolov5-obb does not support segment labels yet
974
                # if any([len(x) > 8 for x in l]):  # is segment
975
                #     classes = np.array([x[0] for x in l], dtype=np.float32)
976
                #     segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)
977
                #     l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
978
                l_ = []
979
                for label in labels:
980
                    if label[-1] == "2": # diffcult
981
                        continue
982
                    cls_id = cls_name_list.index(label[8])
983
                    l_.append(np.concatenate((cls_id, label[:8]), axis=None))
984
                l = np.array(l_, dtype=np.float32)
985
            nl = len(l)
986
            if nl:
987
                assert len(label) == 10, f'Yolov5-OBB labels require 10 columns, which same as DOTA Dataset, {len(label)} columns detected'
988
                assert (l >= 0).all(), f'negative label values {l[l < 0]}, please check your dota format labels'
989
                #assert (l[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}'
990
                _, i = np.unique(l, axis=0, return_index=True)
991
                if len(i) < nl:  # duplicate row check
992
                    l = l[i]  # remove duplicates
993
                    if segments:
994
                        segments = segments[i]
995
                    msg = f'{prefix}WARNING: {im_file}: {nl - len(i)} duplicate labels removed'
996
            else:
997
                ne = 1  # label empty
998
                # l = np.zeros((0, 5), dtype=np.float32)
999
                l = np.zeros((0, 9), dtype=np.float32)
1000
        else:
1001
            nm = 1  # label missing
1002
            # l = np.zeros((0, 5), dtype=np.float32)
1003
            l = np.zeros((0, 9), dtype=np.float32)
1004
        return im_file, l, shape, segments, nm, nf, ne, nc, msg
1005
    except Exception as e:
1006
        nc = 1
1007
        msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}'
1008
        return [None, None, None, None, nm, nf, ne, nc, msg]
1009

1010

1011
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False):
1012
    """ Return dataset statistics dictionary with images and instances counts per split per class
1013
    To run in parent directory: export PYTHONPATH="$PWD/yolov5"
1014
    Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
1015
    Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
1016
    Arguments
1017
        path:           Path to data.yaml or data.zip (with data.yaml inside data.zip)
1018
        autodownload:   Attempt to download dataset if not found locally
1019
        verbose:        Print stats dictionary
1020
    """
1021

1022
    def round_labels(labels):
1023
        # Update labels to integer class and 6 decimal place floats
1024
        return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]
1025

1026
    def unzip(path):
1027
        # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
1028
        if str(path).endswith('.zip'):  # path is data.zip
1029
            assert Path(path).is_file(), f'Error unzipping {path}, file not found'
1030
            ZipFile(path).extractall(path=path.parent)  # unzip
1031
            dir = path.with_suffix('')  # dataset directory == zip name
1032
            return True, str(dir), next(dir.rglob('*.yaml'))  # zipped, data_dir, yaml_path
1033
        else:  # path is data.yaml
1034
            return False, None, path
1035

1036
    def hub_ops(f, max_dim=1920):
1037
        # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
1038
        f_new = im_dir / Path(f).name  # dataset-hub image filename
1039
        try:  # use PIL
1040
            im = Image.open(f)
1041
            r = max_dim / max(im.height, im.width)  # ratio
1042
            if r < 1.0:  # image too large
1043
                im = im.resize((int(im.width * r), int(im.height * r)))
1044
            im.save(f_new, 'JPEG', quality=75, optimize=True)  # save
1045
        except Exception as e:  # use OpenCV
1046
            print(f'WARNING: HUB ops PIL failure {f}: {e}')
1047
            im = cv2.imread(f)
1048
            im_height, im_width = im.shape[:2]
1049
            r = max_dim / max(im_height, im_width)  # ratio
1050
            if r < 1.0:  # image too large
1051
                im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
1052
            cv2.imwrite(str(f_new), im)
1053

1054
    zipped, data_dir, yaml_path = unzip(Path(path))
1055
    with open(check_yaml(yaml_path), errors='ignore') as f:
1056
        data = yaml.safe_load(f)  # data dict
1057
        if zipped:
1058
            data['path'] = data_dir  # TODO: should this be dir.resolve()?
1059
    check_dataset(data, autodownload)  # download dataset if missing
1060
    hub_dir = Path(data['path'] + ('-hub' if hub else ''))
1061
    stats = {'nc': data['nc'], 'names': data['names']}  # statistics dictionary
1062
    for split in 'train', 'val', 'test':
1063
        if data.get(split) is None:
1064
            stats[split] = None  # i.e. no test set
1065
            continue
1066
        x = []
1067
        dataset = LoadImagesAndLabels(data[split])  # load dataset
1068
        for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics'):
1069
            x.append(np.bincount(label[:, 0].astype(int), minlength=data['nc']))
1070
        x = np.array(x)  # shape(128x80)
1071
        stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},
1072
                        'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
1073
                                        'per_class': (x > 0).sum(0).tolist()},
1074
                        'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
1075
                                   zip(dataset.img_files, dataset.labels)]}
1076

1077
        if hub:
1078
            im_dir = hub_dir / 'images'
1079
            im_dir.mkdir(parents=True, exist_ok=True)
1080
            for _ in tqdm(ThreadPool(NUM_THREADS).imap(hub_ops, dataset.img_files), total=dataset.n, desc='HUB Ops'):
1081
                pass
1082

1083
    # Profile
1084
    stats_path = hub_dir / 'stats.json'
1085
    if profile:
1086
        for _ in range(1):
1087
            file = stats_path.with_suffix('.npy')
1088
            t1 = time.time()
1089
            np.save(file, stats)
1090
            t2 = time.time()
1091
            x = np.load(file, allow_pickle=True)
1092
            print(f'stats.npy times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
1093

1094
            file = stats_path.with_suffix('.json')
1095
            t1 = time.time()
1096
            with open(file, 'w') as f:
1097
                json.dump(stats, f)  # save stats *.json
1098
            t2 = time.time()
1099
            with open(file) as f:
1100
                x = json.load(f)  # load hyps dict
1101
            print(f'stats.json times: {time.time() - t2:.3f}s read, {t2 - t1:.3f}s write')
1102

1103
    # Save, print and return
1104
    if hub:
1105
        print(f'Saving {stats_path.resolve()}...')
1106
        with open(stats_path, 'w') as f:
1107
            json.dump(stats, f)  # save stats.json
1108
    if verbose:
1109
        print(json.dumps(stats, indent=2, sort_keys=False))
1110
    return stats
1111

1112
Product

Resources

Company