yolov5源码地址:https://github.com/ultralytics/yolov5 文末有完整代码
克隆下最新的yolov5,在根目录下创建landmark文件夹

文件夹中创建三个文件(predict, test ,train_landmark),类似yolov5的分类和语义分割任务

没有提及修改的都是直接添加就行
landmark/train_landmark.py
import argparseimport loggingimport mathimport osimport sysimport randomimport timefrom pathlib import Pathfrom threading import Threadfrom warnings import warn import numpy as npimport torch.distributed as distimport torch.nn as nnimport torch.nn.functional as Fimport torch.optim as optimimport torch.optim.lr_scheduler as lr_schedulerimport torch.utils.dataimport yamlfrom torch.cuda import ampfrom torch.nn.parallel import DistributedDataParallel as DDPfrom torch.utils.tensorboard import SummaryWriterfrom tqdm import tqdm FILE = Path(__file__).resolve()ROOT = FILE.parents[1] # YOLOv5 root directoryif str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATHROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import landmark.test as test # import test.py to get mAP after each epochfrom models.experimental import attempt_loadfrom models.yolo import Modelfrom utils.callbacks import Callbacksfrom utils.autoanchor import check_anchors# from utils.facedataloaders import create_dataloaderfrom utils.landmark.dataloaders import create_dataloaderfrom utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \ print_mutation, set_logging, intersect_dicts, check_suffix, check_yaml,colorstr,LOGGERfrom utils.downloads import attempt_download,is_urlfrom utils.landmark.loss import compute_lossfrom utils.landmark.plots import plot_labelsf, plot_resultsf, plot_evolutionfrom utils.plots import plot_imagesfrom utils.torch_utils import ModelEMA, select_device, torch_distributed_zero_first logger = logging.getLogger(__name__) try: import wandbexcept ImportError: wandb = None # logger.info("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)") def train(hyp, opt, device, tb_writer=None, wandb=None): logger.info(f'Hyperparameters {hyp}') save_dir, epochs, batch_size, total_batch_size, weights, rank = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Directories wdir = save_dir / 'weights' wdir.mkdir(parents=True, exist_ok=True) # make dir last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = save_dir / 'results.txt' # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure plots = not opt.evolve # create plots cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint if hyp.get('anchors'): ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Logging if wandb and wandb.run is None: opt.hyp = hyp # add hyperparameters wandb_run = wandb.init(config=opt, resume="allow", project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, name=save_dir.stem, id=ckpt.get('wandb_id') if 'ckpt' in locals() else None) loggers = {'wandb': wandb} # loggers dict # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs # start_epoch = ckpt['epoch'] + 1 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs) if epochs < start_epoch: logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(max(model.stride)) # grid size (max stride) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # EMA ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers, image_weights=opt.image_weights,prefix=colorstr('train: ')) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, # testloader hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5,prefix=colorstr('val: '))[0] if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) if plots: plot_labelsf(labels, save_dir, loggers) if tb_writer: tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info('Image sizes %g train, %g test\n' 'Using %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs)) for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(5, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 9) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'landmark', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 7) % ( '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 3: f = save_dir / f'train_batch{ni}.jpg' # filename Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard elif plots and ni == 3 and wandb: wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]}) # end batch ------------------------------------------------------------------------------------------------ # end epoch ---------------------------------------------------------------------------------------------------- # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0] and epoch >= 0: # mAP if ema: ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, plots=False, log_imgs=opt.log_imgs if wandb else 0) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Log tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2'] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard if wandb: wandb.log({tag: x}) # W&B # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = {'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict(), 'wandb_id': wandb_run.id if wandb else None} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers final = best if best.exists() else last # final model for f in [last, best]: if f.exists(): strip_optimizer(f) # strip optimizers if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload # Plots if plots: plot_resultsf(save_dir=save_dir) # save as results.png if wandb: files = ['results.png', 'precision_recall_curve.png', 'confusion_matrix.png'] wandb.log({"Results": [wandb.Image(str(save_dir / f), caption=f) for f in files if (save_dir / f).exists()]}) if opt.log_artifacts: wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem) # Test best.pt logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = test.test( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, conf_thres=0.001, iou_thres=0.65, model=attempt_load(final, device), single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, save_json=False, plots=True) # val best model with plots else: dist.destroy_process_group() wandb.run.finish() if wandb and wandb.run else None torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolov5n-lmk.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default=ROOT / 'models/landmark/yolov5n-landmark.yaml',help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/widerface.yaml', help='data.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=300) parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') parser.add_argument('--img-size', nargs='+', type=int, default=[800, 800], help='[train, test] image sizes') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', default=False, help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') parser.add_argument('--log-imgs', type=int, default=16, help='number of images for W&B logging, max 100') parser.add_argument('--log-artifacts', action='store_true', help='log artifacts, i.e. final trained model') parser.add_argument('--workers', type=int, default=4, help='maximum number of dataloader workers') parser.add_argument('--project', default=ROOT / 'runs/train-lmk', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): # Set DDP variables opt.total_batch_size = opt.batch_size opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 set_logging(str(opt.global_rank)) if opt.global_rank in [-1, 0]: check_git_status() # Resume if opt.resume and not opt.evolve: # resume from specified or most recent last.pt last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors='ignore') as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location='cpu')['opt'] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if opt.local_rank != -1: assert torch.cuda.device_count() > opt.local_rank torch.cuda.set_device(opt.local_rank) device = torch.device('cuda', opt.local_rank) dist.init_process_group(backend='nccl', init_method='env://') # distributed backend assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' opt.batch_size = opt.total_batch_size // opt.world_size # Hyperparameters with open(opt.hyp) as f: hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps if 'box' not in hyp: warn('Compatibility: %s missing "box" which was renamed from "giou" in %s' % (opt.hyp, 'https://github.com/ultralytics/yolov5/pull/1120')) hyp['box'] = hyp.pop('giou') # Train logger.info(opt) if not opt.evolve: tb_writer = None # init loggers if opt.global_rank in [-1, 0]: logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/') tb_writer = SummaryWriter(opt.save_dir) # Tensorboard train(hyp, opt, device, tb_writer, wandb) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0)} # image mixup (probability) assert opt.local_rank == -1, 'DDP mode not implemented for --evolve' opt.notest, opt.nosave = True, True # only test/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml' # save best result here if opt.bucket: os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists for _ in range(opt.evolve): # generations to evolve if Path('evolve.txt').exists(): # if evolve.txt exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt('evolve.txt', ndmin=2) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([x[0] for x in meta.values()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, wandb=wandb) # Write mutation results # keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', # 'val/obj_loss', 'val/cls_loss') print_mutation(hyp.copy(), results, yaml_file, opt.bucket) # print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolution(yaml_file) print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n' f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}') def run(**kwargs): # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if __name__ == "__main__": opt = parse_opt() main(opt)landmark/test.py
import argparseimport jsonimport osimport sysfrom pathlib import Pathfrom threading import Thread import numpy as npimport torchimport yamlfrom tqdm import tqdm FILE = Path(__file__).resolve()ROOT = FILE.parents[1] # YOLOv5 root directoryif str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATHROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.experimental import attempt_loadfrom utils.landmark.dataloaders import create_dataloaderfrom utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, box_iou, \ scale_boxes, xyxy2xywh, xywh2xyxy, set_logging, increment_pathfrom utils.landmark.general import non_max_suppression_facefrom utils.landmark.loss import compute_lossfrom utils.metrics import ConfusionMatrixfrom utils.landmark.metrics import ap_per_classffrom utils.landmark.plots import plot_imagesf, output_to_targetf, plot_study_txtfrom utils.torch_utils import select_device, time_sync def test(data, weights=None, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_hybrid=False, # for hybrid auto-labelling save_conf=False, # save auto-label confidences plots=True, log_imgs=0): # number of logged images # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() is_coco = data.endswith('coco.yaml') # is COCO dataset with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() # Logging log_imgs, wandb = min(log_imgs, 100), None # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model t = time_sync() inf_out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_sync() - t # Compute loss if training: loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS targets[:, 2:6] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t = time_sync() #output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) output = non_max_suppression_face(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) t1 += time_sync() - t # Statistics per image for si, pred in enumerate(output): pred = torch.cat((pred[:, :5], pred[:, 15:]), 1) # throw landmark in thresh labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) if plots: confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) continue # Predictions predn = pred.clone() scale_boxes(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging if plots and len(wandb_images) < log_imgs: box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": {"class_score": conf}, "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({'image_id': image_id, 'category_id': coco91class[int(p[15])] if is_coco else int(p[15]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_boxes(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: plot_imagesf(img, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels plot_imagesf(img, output_to_targetf(output), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_classf(*stats, plot=plots, save_dir=save_dir, names=names) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb and wandb.run: wandb.log({"Images": wandb_images}) wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]}) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t if __name__ == '__main__': parser = argparse.ArgumentParser(prog='test.py') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5n-lmk.pt', help='model.pt path(s)') parser.add_argument('--data', type=str, default=ROOT / 'data/widerface.yaml', help='*.data path') parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS') parser.add_argument('--task', default='val', help="'val', 'test', 'study'") parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--verbose', action='store_true', help='report mAP by class') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') parser.add_argument('--project', default='runs/test', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') opt = parser.parse_args() opt.save_json |= opt.data.endswith('coco.yaml') opt.data = check_file(opt.data) # check file print(opt) if opt.task in ['val', 'test']: # run normally test(opt.data, opt.weights, opt.batch_size, opt.img_size, opt.conf_thres, opt.iou_thres, opt.save_json, opt.single_cls, opt.augment, opt.verbose, save_txt=opt.save_txt | opt.save_hybrid, save_hybrid=opt.save_hybrid, save_conf=opt.save_conf, ) elif opt.task == 'study': # run over a range of settings and save/plot for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to x = list(range(320, 800, 64)) # x axis y = [] # y axis for i in x: # img-size print('\nRunning %s point %s...' % (f, i)) r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json, plots=False) y.append(r + t) # results and times np.savetxt(f, y, fmt='%10.4g') # save os.system('zip -r study.zip study_*.txt') plot_study_txt(f, x) # plotlandmark/predict.py
import argparseimport osimport platformimport sysfrom pathlib import Path import torch FILE = Path(__file__).resolve()ROOT = FILE.parents[1] # YOLOv5 root directoryif str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATHROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackendfrom utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreamsfrom utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, print_args, scale_boxes,strip_optimizer)from utils.landmark.general import non_max_suppression_face,scale_coords_landmarksfrom utils.plots import Annotator, colors, save_one_boxfrom utils.torch_utils import select_device, smart_inference_mode @smart_inference_mode()def run( weights=ROOT / 'yolov5n-lmk.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam) data=ROOT / 'data/coco128.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/predict-lmk', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) screenshot = source.lower().startswith('screen') if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader bs = 1 # batch_size if webcam: view_img = check_imshow() dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) bs = len(dataset) elif screenshot: dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.from_numpy(im).to(model.device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim # Inference with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred, proto = model(im, augment=augment, visualize=visualize)[:2] # NMS with dt[2]: pred = non_max_suppression_face(pred, conf_thres, iou_thres) # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, 5].unique(): n = (det[:, 5] == c).sum() # detections per class # s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string det[:, 5:15] = scale_coords_landmarks(im.shape[2:], det[:, 5:15], im0.shape).round() # Write results for j in range(det.size()[0]): xyxy = det[j, :4].view(-1).tolist() conf = det[j, 4].cpu().detach().numpy() landmarks = det[j, 5:15].view(-1).tolist() cls = det[j, 15].cpu().detach().numpy() if save_txt: line = (cls, *xyxy, conf) if save_conf else (cls, *xyxy) with open(f'{txt_path}.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) annotator.landmark(landmarks) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: if platform.system() == 'Linux' and p not in windows: windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") # Print results t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5n-lmk.pt', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)') parser.add_argument('--data', type=str, default=ROOT / 'data/widerface.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='show results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default=ROOT / 'runs/predict-lmk', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) return opt def main(opt): check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) landmark/export.py
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobitFormat | `export.py --include` | Model--- | --- | ---PyTorch | - | yolov5s.ptTorchScript | `torchscript` | yolov5s.torchscriptONNX | `onnx` | yolov5s.onnxOpenVINO | `openvino` | yolov5s_openvino_model/TensorRT | `engine` | yolov5s.engineCoreML | `coreml` | yolov5s.mlmodelTensorFlow SavedModel | `saved_model` | yolov5s_saved_model/TensorFlow GraphDef | `pb` | yolov5s.pbTensorFlow Lite | `tflite` | yolov5s.tfliteTensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tfliteTensorFlow.js | `tfjs` | yolov5s_web_model/PaddlePaddle | `paddle` | yolov5s_paddle_model/Requirements: $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPUUsage: $ python export.py --weights yolov5s.pt --include torchscript onnx openvino engine coreml tflite ...Inference: $ python detect.py --weights yolov5s.pt # PyTorch yolov5s.torchscript # TorchScript yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov5s.xml # OpenVINO yolov5s.engine # TensorRT yolov5s.mlmodel # CoreML (macOS-only) yolov5s_saved_model # TensorFlow SavedModel yolov5s.pb # TensorFlow GraphDef yolov5s.tflite # TensorFlow Lite yolov5s_edgetpu.tflite # TensorFlow Edge TPU yolov5s_paddle_model # PaddlePaddleTensorFlow.js: $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example $ npm install $ ln -s ../../yolov5/yolov5s_web_model public/yolov5s_web_model $ npm start""" import argparseimport jsonimport osimport platformimport reimport subprocessimport sysimport timeimport warningsfrom pathlib import Path import pandas as pdimport torchimport torch.nn as nnfrom torch.utils.mobile_optimizer import optimize_for_mobile FILE = Path(__file__).resolve()ROOT = FILE.parents[1] # YOLOv5 root directoryif str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATHif platform.system() != 'Windows': ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.experimental import attempt_loadffrom models.yolo import ClassificationModel, Detect, DetectionModel, SegmentationModel, LmkDetectfrom utils.dataloaders import LoadImagesfrom utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_version, check_yaml, colorstr, file_size, get_default_args, print_args, url2file, yaml_save)from utils.torch_utils import select_device, smart_inference_mode MACOS = platform.system() == 'Darwin' # macOS environment def export_formats(): # YOLOv5 export formats x = [ ['PyTorch', '-', '.pt', True, True], ['TorchScript', 'torchscript', '.torchscript', True, True], ['ONNX', 'onnx', '.onnx', True, True], ['OpenVINO', 'openvino', '_openvino_model', True, False], ['TensorRT', 'engine', '.engine', False, True], ['CoreML', 'coreml', '.mlmodel', True, False], ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True], ['TensorFlow GraphDef', 'pb', '.pb', True, True], ['TensorFlow Lite', 'tflite', '.tflite', True, False], ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False], ['TensorFlow.js', 'tfjs', '_web_model', False, False], ['PaddlePaddle', 'paddle', '_paddle_model', True, True], ] return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU']) def try_export(inner_func): # YOLOv5 export decorator, i..e @try_export inner_args = get_default_args(inner_func) def outer_func(*args, **kwargs): prefix = inner_args['prefix'] try: with Profile() as dt: f, model = inner_func(*args, **kwargs) LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)') return f, model except Exception as e: LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}') return None, None return outer_func @try_exportdef export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')): # YOLOv5 TorchScript model export LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...') f = file.with_suffix('.torchscript') ts = torch.jit.trace(model, im, strict=False) d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names} extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap() if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files) else: ts.save(str(f), _extra_files=extra_files) return f, None @try_exportdef export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')): # YOLOv5 ONNX export check_requirements('onnx') import onnx LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') f = file.with_suffix('.onnx') output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0'] if dynamic: dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640) if isinstance(model, SegmentationModel): dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85) dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160) elif isinstance(model, DetectionModel): dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85) torch.onnx.export( model.cpu() if dynamic else model, # --dynamic only compatible with cpu im.cpu() if dynamic else im, f, verbose=False, opset_version=opset, do_constant_folding=True, input_names=['images'], output_names=output_names, dynamic_axes=dynamic or None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # Metadata d = {'stride': int(max(model.stride)), 'names': model.names} for k, v in d.items(): meta = model_onnx.metadata_props.add() meta.key, meta.value = k, str(v) onnx.save(model_onnx, f) # Simplify if simplify: try: cuda = torch.cuda.is_available() check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1')) import onnxsim LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') model_onnx, check = onnxsim.simplify(model_onnx) assert check, 'assert check failed' onnx.save(model_onnx, f) except Exception as e: LOGGER.info(f'{prefix} simplifier failure: {e}') return f, model_onnx @try_exportdef export_openvino(file, metadata, half, prefix=colorstr('OpenVINO:')): # YOLOv5 OpenVINO export check_requirements('openvino-dev') # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.inference_engine as ie LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...') f = str(file).replace('.pt', f'_openvino_model{os.sep}') cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} --data_type {'FP16' if half else 'FP32'}" subprocess.run(cmd.split(), check=True, env=os.environ) # export yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata) # add metadata.yaml return f, None @try_exportdef export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')): # YOLOv5 Paddle export check_requirements(('paddlepaddle', 'x2paddle')) import x2paddle from x2paddle.convert import pytorch2paddle LOGGER.info(f'\n{prefix} starting export with X2Paddle {x2paddle.__version__}...') f = str(file).replace('.pt', f'_paddle_model{os.sep}') pytorch2paddle(module=model, save_dir=f, jit_type='trace', input_examples=[im]) # export yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata) # add metadata.yaml return f, None @try_exportdef export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')): # YOLOv5 CoreML export check_requirements('coremltools') import coremltools as ct LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...') f = file.with_suffix('.mlmodel') ts = torch.jit.trace(model, im, strict=False) # TorchScript model ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])]) bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None) if bits < 32: if MACOS: # quantization only supported on macOS with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress numpy==1.20 float warning ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode) else: print(f'{prefix} quantization only supported on macOS, skipping...') ct_model.save(f) return f, ct_model @try_exportdef export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')): # YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`' try: import tensorrt as trt except Exception: if platform.system() == 'Linux': check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com') import tensorrt as trt if trt.__version__[0] == '7': # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012 grid = model.model[-1].anchor_grid model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid] export_onnx(model, im, file, 12, dynamic, simplify) # opset 12 model.model[-1].anchor_grid = grid else: # TensorRT >= 8 check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0 export_onnx(model, im, file, 12, dynamic, simplify) # opset 12 onnx = file.with_suffix('.onnx') LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...') assert onnx.exists(), f'failed to export ONNX file: {onnx}' f = file.with_suffix('.engine') # TensorRT engine file logger = trt.Logger(trt.Logger.INFO) if verbose: logger.min_severity = trt.Logger.Severity.VERBOSE builder = trt.Builder(logger) config = builder.create_builder_config() config.max_workspace_size = workspace * 1 << 30 # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(str(onnx)): raise RuntimeError(f'failed to load ONNX file: {onnx}') inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] for inp in inputs: LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}') for out in outputs: LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}') if dynamic: if im.shape[0] <= 1: LOGGER.warning(f"{prefix} WARNING ⚠️ --dynamic model requires maximum --batch-size argument") profile = builder.create_optimization_profile() for inp in inputs: profile.set_shape(inp.name, (1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape) config.add_optimization_profile(profile) LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine as {f}') if builder.platform_has_fast_fp16 and half: config.set_flag(trt.BuilderFlag.FP16) with builder.build_engine(network, config) as engine, open(f, 'wb') as t: t.write(engine.serialize()) return f, None @try_exportdef export_saved_model(model, im, file, dynamic, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, keras=False, prefix=colorstr('TensorFlow SavedModel:')): # YOLOv5 TensorFlow SavedModel export try: import tensorflow as tf except Exception: check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}") import tensorflow as tf from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 from models.tf import TFModel LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') f = str(file).replace('.pt', '_saved_model') batch_size, ch, *imgsz = list(im.shape) # BCHW tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz) im = tf.zeros((batch_size, *imgsz, ch)) # BHWC order for TensorFlow _ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size) outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) keras_model = tf.keras.Model(inputs=inputs, outputs=outputs) keras_model.trainable = False keras_model.summary() if keras: keras_model.save(f, save_format='tf') else: spec = tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype) m = tf.function(lambda x: keras_model(x)) # full model m = m.get_concrete_function(spec) frozen_func = convert_variables_to_constants_v2(m) tfm = tf.Module() tfm.__call__ = tf.function(lambda x: frozen_func(x)[:4] if tf_nms else frozen_func(x), [spec]) tfm.__call__(im) tf.saved_model.save(tfm, f, options=tf.saved_model.SaveOptions(experimental_custom_gradients=False) if check_version( tf.__version__, '2.6') else tf.saved_model.SaveOptions()) return f, keras_model @try_exportdef export_pb(keras_model, file, prefix=colorstr('TensorFlow GraphDef:')): # YOLOv5 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow import tensorflow as tf from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') f = file.with_suffix('.pb') m = tf.function(lambda x: keras_model(x)) # full model m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)) frozen_func = convert_variables_to_constants_v2(m) frozen_func.graph.as_graph_def() tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False) return f, None @try_exportdef export_tflite(keras_model, im, file, int8, data, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')): # YOLOv5 TensorFlow Lite export import tensorflow as tf LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') batch_size, ch, *imgsz = list(im.shape) # BCHW f = str(file).replace('.pt', '-fp16.tflite') converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] converter.target_spec.supported_types = [tf.float16] converter.optimizations = [tf.lite.Optimize.DEFAULT] if int8: from models.tf import representative_dataset_gen dataset = LoadImages(check_dataset(check_yaml(data))['train'], img_size=imgsz, auto=False) converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib=100) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] converter.target_spec.supported_types = [] converter.inference_input_type = tf.uint8 # or tf.int8 converter.inference_output_type = tf.uint8 # or tf.int8 converter.experimental_new_quantizer = True f = str(file).replace('.pt', '-int8.tflite') if nms or agnostic_nms: converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS) tflite_model = converter.convert() open(f, "wb").write(tflite_model) return f, None @try_exportdef export_edgetpu(file, prefix=colorstr('Edge TPU:')): # YOLOv5 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/ cmd = 'edgetpu_compiler --version' help_url = 'https://coral.ai/docs/edgetpu/compiler/' assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}' if subprocess.run(f'{cmd} >/dev/null', shell=True).returncode != 0: LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}') sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0 # sudo installed on system for c in ( 'curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -', 'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list', 'sudo apt-get update', 'sudo apt-get install edgetpu-compiler'): subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True) ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1] LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...') f = str(file).replace('.pt', '-int8_edgetpu.tflite') # Edge TPU model f_tfl = str(file).replace('.pt', '-int8.tflite') # TFLite model cmd = f"edgetpu_compiler -s -d -k 10 --out_dir {file.parent} {f_tfl}" subprocess.run(cmd.split(), check=True) return f, None @try_exportdef export_tfjs(file, prefix=colorstr('TensorFlow.js:')): # YOLOv5 TensorFlow.js export check_requirements('tensorflowjs') import tensorflowjs as tfjs LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...') f = str(file).replace('.pt', '_web_model') # js dir f_pb = file.with_suffix('.pb') # *.pb path f_json = f'{f}/model.json' # *.json path cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \ f'--output_node_names=Identity,Identity_1,Identity_2,Identity_3 {f_pb} {f}' subprocess.run(cmd.split()) json = Path(f_json).read_text() with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order subst = re.sub( r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}}}', r'{"outputs": {"Identity": {"name": "Identity"}, ' r'"Identity_1": {"name": "Identity_1"}, ' r'"Identity_2": {"name": "Identity_2"}, ' r'"Identity_3": {"name": "Identity_3"}}}', json) j.write(subst) return f, None @smart_inference_mode()def run( data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' weights=ROOT / 'yolov5s.pt', # weights path imgsz=(640, 640), # image (height, width) batch_size=1, # batch size device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu include=('torchscript', 'onnx'), # include formats half=False, # FP16 half-precision export inplace=False, # set YOLOv5 Detect() inplace=True keras=False, # use Keras optimize=False, # TorchScript: optimize for mobile int8=False, # CoreML/TF INT8 quantization dynamic=False, # ONNX/TF/TensorRT: dynamic axes simplify=False, # ONNX: simplify model opset=12, # ONNX: opset version verbose=False, # TensorRT: verbose log workspace=4, # TensorRT: workspace size (GB) nms=False, # TF: add NMS to model agnostic_nms=False, # TF: add agnostic NMS to model topk_per_class=100, # TF.js NMS: topk per class to keep topk_all=100, # TF.js NMS: topk for all classes to keep iou_thres=0.45, # TF.js NMS: IoU threshold conf_thres=0.25, # TF.js NMS: confidence threshold): t = time.time() include = [x.lower() for x in include] # to lowercase fmts = tuple(export_formats()['Argument'][1:]) # --include arguments flags = [x in include for x in fmts] assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {fmts}' jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle = flags # export booleans file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights) # PyTorch weights # Load PyTorch model device = select_device(device) if half: assert device.type != 'cpu' or coreml, '--half only compatible with GPU export, i.e. use --device 0' assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both' model = attempt_loadf(weights, device=device, inplace=True, fuse=True) # load FP32 model # Checks imgsz *= 2 if len(imgsz) == 1 else 1 # expand if optimize: assert device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu' # Input gs = int(max(model.stride)) # grid size (max stride) imgsz = [check_img_size(x, gs) for x in imgsz] # verify img_size are gs-multiples im = torch.zeros(batch_size, 3, *imgsz).to(device) # image size(1,3,320,192) BCHW iDetection # Update model model.eval() for k, m in model.named_modules(): m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility if isinstance(m, (Detect, LmkDetect)): m.inplace = inplace m.dynamic = dynamic m.export = True for _ in range(2): y = model(im) # dry runs if half and not coreml: im, model = im.half(), model.half() # to FP16 shape = tuple((y[0] if isinstance(y, tuple) else y).shape) # model output shape metadata = {'stride': int(max(model.stride)), 'names': model.names} # model metadata LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)") # Exports f = [''] * len(fmts) # exported filenames warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) # suppress TracerWarning if jit: # TorchScript f[0], _ = export_torchscript(model, im, file, optimize) if engine: # TensorRT required before ONNX f[1], _ = export_engine(model, im, file, half, dynamic, simplify, workspace, verbose) if onnx or xml: # OpenVINO requires ONNX f[2], _ = export_onnx(model, im, file, opset, dynamic, simplify) if xml: # OpenVINO f[3], _ = export_openvino(file, metadata, half) if coreml: # CoreML f[4], _ = export_coreml(model, im, file, int8, half) if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.' assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.' f[5], s_model = export_saved_model(model.cpu(), im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs, agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class, topk_all=topk_all, iou_thres=iou_thres, conf_thres=conf_thres, keras=keras) if pb or tfjs: # pb prerequisite to tfjs f[6], _ = export_pb(s_model, file) if tflite or edgetpu: f[7], _ = export_tflite(s_model, im, file, int8 or edgetpu, data=data, nms=nms, agnostic_nms=agnostic_nms) if edgetpu: f[8], _ = export_edgetpu(file) if tfjs: f[9], _ = export_tfjs(file) if paddle: # PaddlePaddle f[10], _ = export_paddle(model, im, file, metadata) # Finish f = [str(x) for x in f if x] # filter out '' and None if any(f): cls, det, seg = (isinstance(model, x) for x in (ClassificationModel, DetectionModel, SegmentationModel)) # type dir = Path('segment' if seg else 'classify' if cls else '') h = '--half' if half else '' # --half FP16 inference arg s = "# WARNING ⚠️ ClassificationModel not yet supported for PyTorch Hub AutoShape inference" if cls else \ "# WARNING ⚠️ SegmentationModel not yet supported for PyTorch Hub AutoShape inference" if seg else '' LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)' f"\nResults saved to {colorstr('bold', file.parent.resolve())}" f"\nDetect: python {dir / ('detect.py' if det else 'predict.py')} --weights {f[-1]} {h}" f"\nValidate: python {dir / 'val.py'} --weights {f[-1]} {h}" f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}') {s}" f"\nVisualize: https://netron.app") return f # return list of exported files/dirs def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/widerface.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5n-lmk.pt', help='model.pt path(s)') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)') parser.add_argument('--batch-size', type=int, default=1, help='batch size') parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--half', action='store_true', help='FP16 half-precision export') parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True') parser.add_argument('--keras', action='store_true', help='TF: use Keras') parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile') parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization') parser.add_argument('--dynamic', action='store_true', help='ONNX/TF/TensorRT: dynamic axes') parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model') parser.add_argument('--opset', type=int, default=11, help='ONNX: opset version') parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log') parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)') parser.add_argument('--nms', action='store_true', help='TF: add NMS to model') parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model') parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep') parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep') parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold') parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold') parser.add_argument( '--include', nargs='+', default=['onnx'], help='torchscript, onnx, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle') opt = parser.parse_args() print_args(vars(opt)) return opt def main(opt): for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]): run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt)添加完以上三个文件后在utils文件夹中添加landmark文件夹


utils/landmark/general.py
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""General utils""" import time import torchimport torchvision from ..metrics import box_ioufrom ..general import xywh2xyxy # for face landmarkdef non_max_suppression_face(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx16 (x1, y1, x2, y2, conf, keypoint*10, cls) """ nc = prediction.shape[2] - 15 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() output = [torch.zeros((0, 16), device=prediction.device)] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # Cat apriori labels if autolabelling if labels and len(labels[xi]): l = labels[xi] v = torch.zeros((len(l), nc + 15), device=x.device) v[:, :4] = l[:, 1:5] # box v[:, 4] = 1.0 # conf v[range(len(l)), l[:, 0].long() + 15] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 15:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, landmarks, cls) if multi_label: i, j = (x[:, 15:] > conf_thres).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, j + 15, None], x[i, 5:15], j[:, None].float()), 1) else: # best class only conf, j = x[:, 15:].max(1, keepdim=True) x = torch.cat((box, conf, x[:, 5:15], j.float()), 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Batched NMS c = x[:, 15:16] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS # if i.shape[0] > max_det: # limit detections # i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy output[xi] = x[i] if (time.time() - t) > time_limit: break # time limit exceeded return output def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None): # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding coords[:, :10] /= gain # clip_coords(coords, img0_shape) coords[:, 0].clamp_(0, img0_shape[1]) # x1 coords[:, 1].clamp_(0, img0_shape[0]) # y1 coords[:, 2].clamp_(0, img0_shape[1]) # x2 coords[:, 3].clamp_(0, img0_shape[0]) # y2 coords[:, 4].clamp_(0, img0_shape[1]) # x3 coords[:, 5].clamp_(0, img0_shape[0]) # y3 coords[:, 6].clamp_(0, img0_shape[1]) # x4 coords[:, 7].clamp_(0, img0_shape[0]) # y4 coords[:, 8].clamp_(0, img0_shape[1]) # x5 coords[:, 9].clamp_(0, img0_shape[0]) # y5 return coordsutils/landmark/loss.py
import numpy as npimport torchimport torch.nn as nn from ..loss import FocalLoss, smooth_BCEfrom ..landmark.metrics import bbox_iouffrom ..torch_utils import is_parallel class WingLoss(nn.Module): def __init__(self, w=10, e=2): super(WingLoss, self).__init__() # https://arxiv.org/pdf/1711.06753v4.pdf Figure 5 self.w = w self.e = e self.C = self.w - self.w * np.log(1 + self.w / self.e) def forward(self, x, t, sigma=1): weight = torch.ones_like(t) weight[torch.where(t == -1)] = 0 diff = weight * (x - t) abs_diff = diff.abs() flag = (abs_diff.data < self.w).float() y = flag * self.w * torch.log(1 + abs_diff / self.e) + (1 - flag) * (abs_diff - self.C) return y.sum() class LandmarksLoss(nn.Module): # BCEwithLogitLoss() with reduced missing label effects. def __init__(self, alpha=1.0): super(LandmarksLoss, self).__init__() self.loss_fcn = WingLoss() # nn.SmoothL1Loss(reduction='sum') self.alpha = alpha def forward(self, pred, truel, mask): loss = self.loss_fcn(pred * mask, truel * mask) return loss / (torch.sum(mask) + 10e-14) def compute_loss(p, targets, model): # predictions, targets, model device = targets.device lcls, lbox, lobj, lmark = torch.zeros(1, device=device), \ torch.zeros(1, device=device), \ torch.zeros(1,device=device), \ torch.zeros(1, device=device) tcls, tbox, indices, anchors, tlandmarks, lmks_mask = build_targets(p, targets, model) # targets h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) # weight=model.class_weights) BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) landmarks_loss = LandmarksLoss(1.0) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 cp, cn = smooth_BCE(eps=0.0) # Focal loss g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) # Losses nt = 0 # number of targets no = len(p) # number of outputs balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image, anchor, gridy, gridx tobj = torch.zeros_like(pi[..., 0], device=device) # target obj n = b.shape[0] # number of targets if n: nt += n # cumulative targets l_ = [b, a, gj, gi] ps = pi[b, a, gj, gi] # prediction subset corresponding to targets # Regression pxy = ps[:, :2].sigmoid() * 2. - 0.5 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box iou = bbox_iouf(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness # 获取target所对应的obj,网格中存在gt目标的会被标记为iou与gt的交并比 (这个obj相当于前景和背景的关系) tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio # Classification if model.nc > 1: # cls loss (only if multiple classes) t = torch.full_like(ps[:, 15:], cn, device=device) # targets t[range(n), tcls[i]] = cp lcls += BCEcls(ps[:, 15:], t) # BCE # Append targets to text file # with open('targets.txt', 'a') as file: # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] # landmarks loss # plandmarks = ps[:,5:15].sigmoid() * 8. - 4. plandmarks = ps[:, 5:15] plandmarks[:, 0:2] = plandmarks[:, 0:2] * anchors[i] plandmarks[:, 2:4] = plandmarks[:, 2:4] * anchors[i] plandmarks[:, 4:6] = plandmarks[:, 4:6] * anchors[i] plandmarks[:, 6:8] = plandmarks[:, 6:8] * anchors[i] plandmarks[:, 8:10] = plandmarks[:, 8:10] * anchors[i] lmark += landmarks_loss(plandmarks, tlandmarks[i], lmks_mask[i]) lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss s = 3 / no # output count scaling lbox *= h['box'] * s lobj *= h['obj'] * s * (1.4 if no == 4 else 1.) lcls *= h['cls'] * s lmark *= h['landmark'] * s bs = tobj.shape[0] # batch size loss = lbox + lobj + lcls + lmark return loss * bs, torch.cat((lbox, lobj, lcls, lmark, loss)).detach() def build_targets(p, targets, model): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module na, nt = det.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch, landmarks, lmks_mask = [], [], [], [], [], [] # gain = torch.ones(7, device=targets.device) # normalized to gridspace gain gain = torch.ones(17, device=targets.device) ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices g = 0.5 # bias off = torch.tensor([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm ], device=targets.device).float() * g # offsets for i in range(det.nl): anchors = det.anchors[i] gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain # landmarks 10 gain[6:16] = torch.tensor(p[i].shape)[[3, 2, 3, 2, 3, 2, 3, 2, 3, 2]] # xyxy gain # Match targets to anchors t = targets * gain if nt: # Matches r = t[:, :, 4:6] / anchors[:, None] # wh ratio j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[[2, 3]] - gxy # inverse j, k = ((gxy % 1. < g) & (gxy > 1.)).T l, m = ((gxi % 1. < g) & (gxi > 1.)).T j = torch.stack((torch.ones_like(j), j, k, l, m)) t = t.repeat((5, 1, 1))[j] offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] else: t = targets[0] offsets = 0 # Define b, c = t[:, :2].long().T # image, class gxy = t[:, 2:4] # grid xy gwh = t[:, 4:6] # grid wh gij = (gxy - offsets).long() gi, gj = gij.T # grid xy indices # Append a = t[:, 16].long() # anchor indices indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class # landmarks lks = t[:, 6:16] # lks_mask = lks > 0 # lks_mask = lks_mask.float() lks_mask = torch.where(lks < 0, torch.full_like(lks, 0.), torch.full_like(lks, 1.0)) # 应该是关键点的坐标除以anch的宽高才对,便于模型学习。使用gwh会导致不同关键点的编码不同,没有统一的参考标准 lks[:, [0, 1]] = (lks[:, [0, 1]] - gij) lks[:, [2, 3]] = (lks[:, [2, 3]] - gij) lks[:, [4, 5]] = (lks[:, [4, 5]] - gij) lks[:, [6, 7]] = (lks[:, [6, 7]] - gij) lks[:, [8, 9]] = (lks[:, [8, 9]] - gij) lks_mask_new = lks_mask lmks_mask.append(lks_mask_new) landmarks.append(lks) # print('lks: ', lks.size()) return tcls, tbox, indices, anch, landmarks, lmks_mask utils/landmark/metrics.py
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""Model validation metrics""" import mathfrom pathlib import Path import numpy as npimport torch from ..metrics import plot_pr_curve, compute_ap def ap_per_classf(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=(), prefix=""): """ Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments tp: True positives (nparray, nx1 or nx10). conf: Objectness value from 0-1 (nparray). pred_cls: Predicted object classes (nparray). target_cls: True object classes (nparray). plot: Plot precision-recall curve at mAP@0.5 save_dir: Plot save directory # Returns The average precision as computed in py-faster-rcnn. """ # Sort by objectness i = np.argsort(-conf) tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] # Find unique classes unique_classes = np.unique(target_cls) # Create Precision-Recall curve and compute AP for each class px, py = np.linspace(0, 1, 1000), [] # for plotting pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) for ci, c in enumerate(unique_classes): i = pred_cls == c n_l = (target_cls == c).sum() # number of labels n_p = i.sum() # number of predictions if n_p == 0 or n_l == 0: continue else: # Accumulate FPs and TPs fpc = (1 - tp[i]).cumsum(0) tpc = tp[i].cumsum(0) # Recall recall = tpc / (n_l + 1e-16) # recall curve r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases # Precision precision = tpc / (tpc + fpc) # precision curve p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score # AP from recall-precision curve for j in range(tp.shape[1]): ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) if plot and (j == 0): py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 # Compute F1 score (harmonic mean of precision and recall) f1 = 2 * p * r / (p + r + 1e-16) if plot: # plot_pr_curve(px, py, ap, save_dir, names) plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names) return p, r, ap, f1, unique_classes.astype('int32') def bbox_iouf(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9): # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 box2 = box2.T # Get the coordinates of bounding boxes if x1y1x2y2: # x1, y1, x2, y2 = box1 b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] else: # transform from xywh to xyxy b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 # Intersection area inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) # Union Area w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps union = w1 * h1 + w2 * h2 - inter + eps iou = inter / union if GIoU or DIoU or CIoU: # convex (smallest enclosing box) width cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared if DIoU: return iou - rho2 / c2 # DIoU elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 v = (4 / math.pi ** 2) * \ torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / ((1 + eps) - iou + v) return iou - (rho2 / c2 + v * alpha) # CIoU else: # GIoU https://arxiv.org/pdf/1902.09630.pdf c_area = cw * ch + eps # convex area return iou - (c_area - union) / c_area # GIoU else: return iou # IoUutils/landmark/plots.py
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""Plotting utils""" import mathimport osimport randomfrom pathlib import Path import cv2import matplotlibimport matplotlib.pyplot as pltimport numpy as npimport pandas as pdimport seaborn as snimport torchimport yamlfrom PIL import Image from ..metrics import fitnessfrom .. import threadedfrom ..general import xywh2xyxy,xyxy2xywhfrom ..plots import Annotator, colors,hist2d # add for landmarkdef plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.plots import *; plot_evolution() # Plot hyperparameter evolution results in evolve.txt with open(yaml_file) as f: hyp = yaml.load(f, Loader=yaml.SafeLoader) x = np.loadtxt('evolve.txt', ndmin=2) f = fitness(x) # weights = (f - f.min()) ** 2 # for weighted results plt.figure(figsize=(10, 12), tight_layout=True) matplotlib.rc('font', **{'size': 8}) for i, (k, v) in enumerate(hyp.items()): y = x[:, i + 7] # mu = (y * weights).sum() / weights.sum() # best weighted result mu = y[f.argmax()] # best single result plt.subplot(6, 5, i + 1) plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none') plt.plot(mu, f.max(), 'k+', markersize=15) plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters if i % 5 != 0: plt.yticks([]) print('%15s: %.3g' % (k, mu)) plt.savefig('evolve.png', dpi=200) print('\nPlot saved as evolve.png') def plot_study_txt(path='study/', x=None): # from utils.plots import *; plot_study_txt() # Plot study.txt generated by test.py fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True) ax = ax.ravel() fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) for f in [Path(path) / f'study_coco_{x}.txt' for x in ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']]: y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T x = np.arange(y.shape[1]) if x is None else np.array(x) s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)'] for i in range(7): ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8) ax[i].set_title(s[i]) j = y[3].argmax() + 1 ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8, label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO')) ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet') ax2.grid() ax2.set_yticks(np.arange(30, 60, 5)) ax2.set_xlim(0, 30) ax2.set_ylim(29, 51) ax2.set_xlabel('GPU Speed (ms/img)') ax2.set_ylabel('COCO AP val') ax2.legend(loc='lower right') plt.savefig('test_study.png', dpi=300) def plot_resultsf(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''): # Plot training 'results*.txt'. from utils.plots import *; plot_results(save_dir='runs/train/exp') fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) ax = ax.ravel() s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall', 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95'] if bucket: # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id] files = ['results%g.txt' % x for x in id] c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/results%g.txt' % (bucket, x) for x in id) os.system(c) else: files = list(Path(save_dir).glob('results*.txt')) assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir) for fi, f in enumerate(files): try: results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T n = results.shape[1] # number of rows x = range(start, min(stop, n) if stop else n) for i in range(10): y = results[i, x] if i in [0, 1, 2, 5, 6, 7]: y[y == 0] = np.nan # don't show zero loss values # y /= y[0] # normalize label = labels[fi] if len(labels) else f.stem ax[i].plot(x, y, marker='.', label=label, linewidth=2, markersize=8) ax[i].set_title(s[i]) # if i in [5, 6, 7]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) except Exception as e: print('Warning: Plotting error for %s; %s' % (f, e)) ax[1].legend() fig.savefig(Path(save_dir) / 'results.png', dpi=200) def plot_one_box(x, img, color=None, label=None, line_thickness=None): # Plots one bounding box on image img tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness color = color or [random.randint(0, 255) for _ in range(3)] c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) if label: tf = max(tl - 1, 1) # font thickness t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) def plot_labelsf(labels, save_dir=Path(''), loggers=None): # plot dataset labels print('Plotting labels... ') c, b = labels[:, 0], labels[:, 1:5].transpose() # classes, boxes nc = int(c.max() + 1) # number of classes x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) # seaborn correlogram sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9)) plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200) plt.close() # matplotlib labels matplotlib.use('svg') # faster ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) ax[0].set_xlabel('classes') sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9) sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9) # rectangles labels[:, 1:3] = 0.5 # center labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000 img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255) # for cls, *box in labels[:1000]: # ImageDraw.Draw(img).rectangle(box, width=1, outline=colors[int(cls) % 10]) # plot ax[1].imshow(img) ax[1].axis('off') for a in [0, 1, 2, 3]: for s in ['top', 'right', 'left', 'bottom']: ax[a].spines[s].set_visible(False) plt.savefig(save_dir / 'labels.jpg', dpi=200) matplotlib.use('Agg') plt.close() # loggers for k, v in loggers.items() or {}: if k == 'wandb' and v: v.log({"Labels": [v.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.jpg')]}) def output_to_targetf(output): # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] targets = [] for i, o in enumerate(output): # for *box, conf, cls in o.cpu().numpy(): # targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) for item in o.cpu().numpy(): # (x, y, w, h), conf, *keypoints, cls box = item[:4] conf = item[4] keypoints = item[5:15] cls = item[15] targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])),*keypoints, conf]) return np.array(targets) @threadeddef plot_imagesf(images, targets, paths=None, fname='images.jpg', names=None): # Plot image grid with labels if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() max_size = 1920 # max image size max_subplots = 16 # max image subplots, i.e. 4x4 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) if np.max(images[0]) <= 1: images *= 255 # de-normalise (optional) # Build Image mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init for i, im in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin im = im.transpose(1, 2, 0) mosaic[y:y + h, x:x + w, :] = im # Resize (optional) scale = max_size / ns / max(h, w) if scale < 1: h = math.ceil(scale * h) w = math.ceil(scale * w) mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) # Annotate fs = int((h + w) * ns * 0.01) # font size annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) for i in range(i + 1): x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders if paths: annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames if len(targets) > 0: ti = targets[targets[:, 0] == i] # image targets boxes = xywh2xyxy(ti[:, 2:6]).T classes = ti[:, 1].astype('int') labels = ti.shape[1] == 16 # labels if no conf column conf = None if labels else ti[:, 16] # check for confidence presence (label vs pred) keypoints = ti[:, 6:16].T if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 boxes[[0, 2]] *= w # scale to pixels boxes[[1, 3]] *= h elif scale < 1: # absolute coords need scale if image scales boxes *= scale boxes[[0, 2]] += x boxes[[1, 3]] += y if keypoints.shape[1]: if keypoints.max() <= 1.01: # if normalized with tolerance 0.01 keypoints[[0,2,4,6,8]] *= w # scale to pixels keypoints[[1,3,5,7,9]] *= h elif scale < 1: # absolute coords need scale if image scales keypoints *= scale keypoints[[0,2,4,6,8]] += x keypoints[[1,3,5,7,9]] += y for j, (box,kpy) in enumerate(zip(boxes.T.tolist(),keypoints.T.tolist())): cls = classes[j] color = colors(cls) cls = names[cls] if names else cls if labels or conf[j] > 0.25: # 0.25 conf thresh label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' annotator.box_label(box, label, color=color) annotator.landmark(kpy) annotator.im.save(fname) # saveutils/plots.py还有个函数需要修改
class Annotator: # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'): assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.' non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic self.pil = pil or non_ascii if self.pil: # use PIL self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.draw = ImageDraw.Draw(self.im) self.font = check_pil_font(font='Arial.Unicode.ttf' if non_ascii else font, size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12)) else: # use cv2 self.im = im self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): # Add one xyxy box to image with label if self.pil or not is_ascii(label): self.draw.rectangle(box, width=self.lw, outline=color) # box if label: w, h = self.font.getsize(label) # text width, height outside = box[1] - h >= 0 # label fits outside box self.draw.rectangle( (box[0], box[1] - h if outside else box[1], box[0] + w + 1, box[1] + 1 if outside else box[1] + h + 1), fill=color, ) # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font) else: # cv2 p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) if label: tf = max(self.lw - 1, 1) # font thickness w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height outside = p1[1] - h >= 3 p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) # 新增landmark def landmark(self,landmarks): clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)] if self.pil: for i in range(len(landmarks)//2): point_x = int(landmarks[2 * i]) point_y = int(landmarks[2 * i + 1]) self.draw.point((point_x,point_y),clors[i]) else: # cv2 h, w, c = self.im.shape tl = 1 or round(0.002 * (h + w) / 2) + 1 # line/font thickness for i in range(5): point_x = int(landmarks[2 * i]) point_y = int(landmarks[2 * i + 1]) cv2.circle(self.im, (point_x, point_y), tl + 1, clors[i], -1) def masks(self, masks, colors, im_gpu=None, alpha=0.5): """Plot masks at once. Args: masks (tensor): predicted masks on cuda, shape: [n, h, w] colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque """ if self.pil: # convert to numpy first self.im = np.asarray(self.im).copy() if im_gpu is None: # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) if len(masks) == 0: return if isinstance(masks, torch.Tensor): masks = torch.as_tensor(masks, dtype=torch.uint8) masks = masks.permute(1, 2, 0).contiguous() masks = masks.cpu().numpy() # masks = np.ascontiguousarray(masks.transpose(1, 2, 0)) masks = scale_image(masks.shape[:2], masks, self.im.shape) masks = np.asarray(masks, dtype=np.float32) colors = np.asarray(colors, dtype=np.float32) # shape(n,3) s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3) self.im[:] = masks * alpha + self.im * (1 - s * alpha) else: if len(masks) == 0: self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0 colors = colors[:, None, None] # shape(n,1,1,3) masks = masks.unsqueeze(3) # shape(n,h,w,1) masks_color = masks * (colors * alpha) # shape(n,h,w,3) inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3) im_gpu = im_gpu.flip(dims=[0]) # flip channel im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) im_gpu = im_gpu * inv_alph_masks[-1] + mcs im_mask = (im_gpu * 255).byte().cpu().numpy() self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape) if self.pil: # convert im back to PIL and update draw self.fromarray(self.im) def rectangle(self, xy, fill=None, outline=None, width=1): # Add rectangle to image (PIL-only) self.draw.rectangle(xy, fill, outline, width) def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'): # Add text to image (PIL-only) if anchor == 'bottom': # start y from font bottom w, h = self.font.getsize(text) # text width, height xy[1] += 1 - h self.draw.text(xy, text, fill=txt_color, font=self.font) def fromarray(self, im): # Update self.im from a numpy array self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.draw = ImageDraw.Draw(self.im) def result(self): # Return annotated image as array return np.asarray(self.im)utils/landmark/dataloaders.py
import globimport loggingimport mathimport osimport randomimport shutilfrom itertools import repeatfrom multiprocessing.pool import ThreadPoolfrom pathlib import Path import cv2import numpy as npimport torchimport torch.nn.functional as Ffrom PIL import Image, ExifTagsfrom torch.utils.data import Datasetfrom tqdm import tqdm from ..general import xyxy2xywh, xywh2xyxyfrom ..torch_utils import torch_distributed_zero_first # Parametershelp_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixesvid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixeslogger = logging.getLogger(__name__) # Get orientation exif tagfor orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == 'Orientation': break def get_hash(files): # Returns a single hash value of a list of files return sum(os.path.getsize(f) for f in files if os.path.isfile(f)) def img2label_paths(img_paths): # Define label paths as a function of image paths sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings return [x.replace(sa, sb, 1).replace('.' + x.split('.')[-1], '.txt') for x in img_paths] def exif_size(img): # Returns exif-corrected PIL size s = img.size # (width, height) try: rotation = dict(img._getexif().items())[orientation] if rotation == 6: # rotation 270 s = (s[1], s[0]) elif rotation == 8: # rotation 90 s = (s[1], s[0]) except: pass return s def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''): # Make sure only the first process in DDP process the dataset first, and the following others can use the cache with torch_distributed_zero_first(rank): dataset = LoadFaceImagesAndLabels(path, imgsz, batch_size, augment=augment, # augment images hyp=hyp, # augmentation hyperparameters rect=rect, # rectangular training cache_images=cache, single_cls=opt.single_cls, stride=int(stride), pad=pad, image_weights=image_weights, prefix=prefix) batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader() dataloader = loader(dataset, batch_size=batch_size, num_workers=nw, sampler=sampler, pin_memory=True, collate_fn=LoadFaceImagesAndLabels.collate_fn4 if quad else LoadFaceImagesAndLabels.collate_fn) return dataloader, dataset class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader): """ Dataloader that reuses workers Uses same syntax as vanilla DataLoader """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) self.iterator = super().__iter__() def __len__(self): return len(self.batch_sampler.sampler) def __iter__(self): for i in range(len(self)): yield next(self.iterator) class _RepeatSampler(object): """ Sampler that repeats forever Args: sampler (Sampler) """ def __init__(self, sampler): self.sampler = sampler def __iter__(self): while True: yield from iter(self.sampler) class LoadFaceImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''): self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride try: f = [] # image files for p in path if isinstance(path, list) else [path]: p = Path(p) # os-agnostic if p.is_dir(): # dir f += glob.glob(str(p / '**' / '*.*'), recursive=True) elif p.is_file(): # file with open(p, 'r') as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path else: raise FileNotFoundError(f'{prefix}{p} does not exist') self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) assert self.img_files, f'{prefix}No images found' except Exception as e: raise Exception(f'{prefix}Error loading data from {path}: {e}\n{help_url}') # Check cache self.label_files = img2label_paths(self.img_files) # labels cache_path = Path(self.label_files[0]).parent.with_suffix('.cache') # cached labels if cache_path.is_file(): cache = torch.load(cache_path) # load if cache['hash'] != get_hash(self.label_files + self.img_files) or 'results' not in cache: # changed cache = self.cache_labels(cache_path) # re-cache else: cache = self.cache_labels(cache_path) # cache # Display cache [nf, nm, ne, nc, n] = cache.pop('results') # found, missing, empty, corrupted, total desc = f"Scanning '{cache_path}' for images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted" tqdm(None, desc=prefix + desc, total=n, initial=n) assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {help_url}' # Read cache cache.pop('hash') # remove hash labels, shapes = zip(*cache.values()) self.labels = list(labels) self.shapes = np.array(shapes, dtype=np.float64) self.img_files = list(cache.keys()) # update self.label_files = img2label_paths(cache.keys()) # update if single_cls: for x in self.labels: x[:, 0] = 0 n = len(shapes) # number of images bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches self.batch = bi # batch index of image self.n = n self.indices = range(n) # Rectangular Training if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() self.img_files = [self.img_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) self.imgs = [None] * n if cache_images: gb = 0 # Gigabytes of cached images self.img_hw0, self.img_hw = [None] * n, [None] * n results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads pbar = tqdm(enumerate(results), total=n) for i, x in pbar: self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i) gb += self.imgs[i].nbytes pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)' def cache_labels(self, path=Path('./labels.cache'),prefix=''): # Cache dataset labels, check images and read shapes x = {} # dict nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, duplicate desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..." pbar = tqdm(zip(self.img_files, self.label_files), desc=desc, total=len(self.img_files)) for i, (im_file, lb_file) in enumerate(pbar): try: # verify images im = Image.open(im_file) im.verify() # PIL verify shape = exif_size(im) # image size assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels' # verify labels if os.path.isfile(lb_file): nf += 1 # label found with open(lb_file, 'r') as f: l = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels if len(l): assert l.shape[1] == 15, 'labels require 15 columns each' assert (l >= -1).all(), 'negative labels' assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels' assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels' else: ne += 1 # label empty l = np.zeros((0, 15), dtype=np.float32) else: nm += 1 # label missing l = np.zeros((0, 15), dtype=np.float32) x[im_file] = [l, shape] except Exception as e: nc += 1 print(f'{prefix}WARNING: Ignoring corrupted image and/or label %s: %s' % (im_file, e)) pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' for images and labels... " \ f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted" if nf == 0: print(f'{prefix}WARNING: No labels found in {path}. See {help_url}') x['hash'] = get_hash(self.label_files + self.img_files) x['results'] = [nf, nm, ne, nc, i + 1] torch.save(x, path) # save for next time logging.info(f"{prefix}New cache created: {path}") return x def __len__(self): return len(self.img_files) # def __iter__(self): # self.count = -1 # print('ran dataset iter') # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) # return self def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights hyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] if mosaic: # Load mosaic img, labels = load_mosaic_face(self, index) shapes = None # MixUp https://arxiv.org/pdf/1710.09412.pdf if random.random() < hyp['mixup']: img2, labels2 = load_mosaic_face(self, random.randint(0, self.n - 1)) r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 img = (img * r + img2 * (1 - r)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) else: # Load image img, (h0, w0), (h, w) = load_image(self, index) # Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling # Load labels labels = [] x = self.labels[index] if x.size > 0: # Normalized xywh to pixel xyxy format labels = x.copy() labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] # labels[:, 5] = ratio[0] * w * x[:, 5] + pad[0] # pad width labels[:, 5] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 5] + pad[0]) + ( np.array(x[:, 5] > 0, dtype=np.int32) - 1) labels[:, 6] = np.array(x[:, 6] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 6] + pad[1]) + ( np.array(x[:, 6] > 0, dtype=np.int32) - 1) labels[:, 7] = np.array(x[:, 7] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 7] + pad[0]) + ( np.array(x[:, 7] > 0, dtype=np.int32) - 1) labels[:, 8] = np.array(x[:, 8] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 8] + pad[1]) + ( np.array(x[:, 8] > 0, dtype=np.int32) - 1) labels[:, 9] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 9] + pad[0]) + ( np.array(x[:, 9] > 0, dtype=np.int32) - 1) labels[:, 10] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 10] + pad[1]) + ( np.array(x[:, 10] > 0, dtype=np.int32) - 1) labels[:, 11] = np.array(x[:, 11] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 11] + pad[0]) + ( np.array(x[:, 11] > 0, dtype=np.int32) - 1) labels[:, 12] = np.array(x[:, 12] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 12] + pad[1]) + ( np.array(x[:, 12] > 0, dtype=np.int32) - 1) labels[:, 13] = np.array(x[:, 13] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 13] + pad[0]) + ( np.array(x[:, 13] > 0, dtype=np.int32) - 1) labels[:, 14] = np.array(x[:, 14] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 14] + pad[1]) + ( np.array(x[:, 14] > 0, dtype=np.int32) - 1) if self.augment: # Augment imagespace if not mosaic: img, labels = random_perspective(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective']) # Augment colorspace augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: # labels = cutout(img, labels) nL = len(labels) # number of labels if nL: labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1 labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1 labels[:, [5, 7, 9, 11, 13]] /= img.shape[1] # normalized landmark x 0-1 labels[:, [5, 7, 9, 11, 13]] = np.where(labels[:, [5, 7, 9, 11, 13]] < 0, -1, labels[:, [5, 7, 9, 11, 13]]) labels[:, [6, 8, 10, 12, 14]] /= img.shape[0] # normalized landmark y 0-1 labels[:, [6, 8, 10, 12, 14]] = np.where(labels[:, [6, 8, 10, 12, 14]] < 0, -1, labels[:, [6, 8, 10, 12, 14]]) if self.augment: # flip up-down if random.random() < hyp['flipud']: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] labels[:, 6] = np.where(labels[:, 6] < 0, -1, 1 - labels[:, 6]) labels[:, 8] = np.where(labels[:, 8] < 0, -1, 1 - labels[:, 8]) labels[:, 10] = np.where(labels[:, 10] < 0, -1, 1 - labels[:, 10]) labels[:, 12] = np.where(labels[:, 12] < 0, -1, 1 - labels[:, 12]) labels[:, 14] = np.where(labels[:, 14] < 0, -1, 1 - labels[:, 14]) # flip left-right if random.random() < hyp['fliplr']: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] labels[:, 5] = np.where(labels[:, 5] < 0, -1, 1 - labels[:, 5]) labels[:, 7] = np.where(labels[:, 7] < 0, -1, 1 - labels[:, 7]) labels[:, 9] = np.where(labels[:, 9] < 0, -1, 1 - labels[:, 9]) labels[:, 11] = np.where(labels[:, 11] < 0, -1, 1 - labels[:, 11]) labels[:, 13] = np.where(labels[:, 13] < 0, -1, 1 - labels[:, 13]) # 左右镜像的时候,左眼、右眼, 左嘴角、右嘴角无法区分, 应该交换位置,便于网络学习 eye_left = np.copy(labels[:, [5, 6]]) mouth_left = np.copy(labels[:, [11, 12]]) labels[:, [5, 6]] = labels[:, [7, 8]] labels[:, [7, 8]] = eye_left labels[:, [11, 12]] = labels[:, [13, 14]] labels[:, [13, 14]] = mouth_left labels_out = torch.zeros((nL, 16)) if nL: labels_out[:, 1:] = torch.from_numpy(labels) # showlabels(img, labels[:, 1:5], labels[:, 5:15]) # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) # print(index, ' --- labels_out: ', labels_out) # if nL: # print( ' : landmarks : ', torch.max(labels_out[:, 5:15]), ' --- ', torch.min(labels_out[:, 5:15])) return torch.from_numpy(img), labels_out, self.img_files[index], shapes @staticmethod def collate_fn(batch): img, label, path, shapes = zip(*batch) # transposed for i, l in enumerate(label): l[:, 0] = i # add target image index for build_targets() return torch.stack(img, 0), torch.cat(label, 0), path, shapes @staticmethod def collate_fn4(batch): im, label, path, shapes = zip(*batch) # transposed n = len(shapes) // 4 im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW i *= 4 if random.random() < 0.5: im1 = F.interpolate(im[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', align_corners=False)[0].type(im[i].type()) lb = label[i] else: im1 = torch.cat((torch.cat((im[i], im[i + 1]), 1), torch.cat((im[i + 2], im[i + 3]), 1)), 2) lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s im4.append(im1) label4.append(lb) for i, lb in enumerate(label4): lb[:, 0] = i # add target image index for build_targets() return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4 def showlabels(img, boxs, landmarks): for box in boxs: x, y, w, h = box[0] * img.shape[1], box[1] * img.shape[0], box[2] * img.shape[1], box[3] * img.shape[0] # cv2.rectangle(image, (x,y), (x+w,y+h), (0,255,0), 2) cv2.rectangle(img, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (0, 255, 0), 2) for landmark in landmarks: # cv2.circle(img,(60,60),30,(0,0,255)) for i in range(5): cv2.circle(img, (int(landmark[2 * i] * img.shape[1]), int(landmark[2 * i + 1] * img.shape[0])), 3, (0, 0, 255), -1) cv2.imshow('test', img) cv2.waitKey(0) def load_mosaic_face(self, index): # loads images in a mosaic labels4 = [] s = self.img_size yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y indices = [index] + [self.indices[random.randint(0, self.n - 1)] for _ in range(3)] # 3 additional image indices for i, index in enumerate(indices): # Load image img, _, (h, w) = load_image(self, index) # place img in img4 if i == 0: # top left img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b # Labels x = self.labels[index] labels = x.copy() if x.size > 0: # Normalized xywh to pixel xyxy format # box, x1,y1,x2,y2 labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh # 10 landmarks labels[:, 5] = np.array(x[:, 5] > 0, dtype=np.int32) * (w * x[:, 5] + padw) + ( np.array(x[:, 5] > 0, dtype=np.int32) - 1) labels[:, 6] = np.array(x[:, 6] > 0, dtype=np.int32) * (h * x[:, 6] + padh) + ( np.array(x[:, 6] > 0, dtype=np.int32) - 1) labels[:, 7] = np.array(x[:, 7] > 0, dtype=np.int32) * (w * x[:, 7] + padw) + ( np.array(x[:, 7] > 0, dtype=np.int32) - 1) labels[:, 8] = np.array(x[:, 8] > 0, dtype=np.int32) * (h * x[:, 8] + padh) + ( np.array(x[:, 8] > 0, dtype=np.int32) - 1) labels[:, 9] = np.array(x[:, 9] > 0, dtype=np.int32) * (w * x[:, 9] + padw) + ( np.array(x[:, 9] > 0, dtype=np.int32) - 1) labels[:, 10] = np.array(x[:, 10] > 0, dtype=np.int32) * (h * x[:, 10] + padh) + ( np.array(x[:, 10] > 0, dtype=np.int32) - 1) labels[:, 11] = np.array(x[:, 11] > 0, dtype=np.int32) * (w * x[:, 11] + padw) + ( np.array(x[:, 11] > 0, dtype=np.int32) - 1) labels[:, 12] = np.array(x[:, 12] > 0, dtype=np.int32) * (h * x[:, 12] + padh) + ( np.array(x[:, 12] > 0, dtype=np.int32) - 1) labels[:, 13] = np.array(x[:, 13] > 0, dtype=np.int32) * (w * x[:, 13] + padw) + ( np.array(x[:, 13] > 0, dtype=np.int32) - 1) labels[:, 14] = np.array(x[:, 14] > 0, dtype=np.int32) * (h * x[:, 14] + padh) + ( np.array(x[:, 14] > 0, dtype=np.int32) - 1) labels4.append(labels) # Concat/clip labels if len(labels4): labels4 = np.concatenate(labels4, 0) np.clip(labels4[:, 1:5], 0, 2 * s, out=labels4[:, 1:5]) # use with random_perspective # img4, labels4 = replicate(img4, labels4) # replicate # landmarks labels4[:, 5:] = np.where(labels4[:, 5:] < 0, -1, labels4[:, 5:]) labels4[:, 5:] = np.where(labels4[:, 5:] > 2 * s, -1, labels4[:, 5:]) labels4[:, 5] = np.where(labels4[:, 6] == -1, -1, labels4[:, 5]) labels4[:, 6] = np.where(labels4[:, 5] == -1, -1, labels4[:, 6]) labels4[:, 7] = np.where(labels4[:, 8] == -1, -1, labels4[:, 7]) labels4[:, 8] = np.where(labels4[:, 7] == -1, -1, labels4[:, 8]) labels4[:, 9] = np.where(labels4[:, 10] == -1, -1, labels4[:, 9]) labels4[:, 10] = np.where(labels4[:, 9] == -1, -1, labels4[:, 10]) labels4[:, 11] = np.where(labels4[:, 12] == -1, -1, labels4[:, 11]) labels4[:, 12] = np.where(labels4[:, 11] == -1, -1, labels4[:, 12]) labels4[:, 13] = np.where(labels4[:, 14] == -1, -1, labels4[:, 13]) labels4[:, 14] = np.where(labels4[:, 13] == -1, -1, labels4[:, 14]) # Augment img4, labels4 = random_perspective(img4, labels4, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], border=self.mosaic_border) # border to remove return img4, labels4 # Ancillary functions --------------------------------------------------------------------------------------------------def load_image(self, index): # loads 1 image from dataset, returns img, original hw, resized hw img = self.imgs[index] if img is None: # not cached path = self.img_files[index] img = cv2.imread(path) # BGR assert img is not None, 'Image Not Found ' + path h0, w0 = img.shape[:2] # orig hw r = self.img_size / max(h0, w0) # resize image to img_size if r != 1: # always resize down, only resize up if training with augmentation interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized else: return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) dtype = img.dtype # uint8 x = np.arange(0, 256, dtype=np.int16) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed # Histogram equalization # if random.random() < 0.2: # for i in range(3): # img[:, :, i] = cv2.equalizeHist(img[:, :, i]) def replicate(img, labels): # Replicate labels h, w = img.shape[:2] boxes = labels[:, 1:].astype(int) x1, y1, x2, y2 = boxes.T s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices x1b, y1b, x2b, y2b = boxes[i] bh, bw = y2b - y1b, x2b - x1b yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) return img, labels def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 shape = img.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scaleup: # only scale down, do not scale up (for better test mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimum rectangle dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return img, ratio, (dw, dh) def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] height = img.shape[0] + border[0] * 2 # shape(h,w,c) width = img.shape[1] + border[1] * 2 # Center C = np.eye(3) C[0, 2] = -img.shape[1] / 2 # x translation (pixels) C[1, 2] = -img.shape[0] / 2 # y translation (pixels) # Perspective P = np.eye(3) P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) # Rotation and Scale R = np.eye(3) a = random.uniform(-degrees, degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - scale, 1 + scale) # s = 2 ** random.uniform(-scale, scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) # Shear S = np.eye(3) S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) # Translation T = np.eye(3) T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) # Combined rotation matrix M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed if perspective: img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114)) else: # affine img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) # Visualize # import matplotlib.pyplot as plt # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() # ax[0].imshow(img[:, :, ::-1]) # base # ax[1].imshow(img2[:, :, ::-1]) # warped # Transform label coordinates n = len(targets) if n: # warp points # xy = np.ones((n * 4, 3)) xy = np.ones((n * 9, 3)) xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]].reshape(n * 9, 2) # x1y1, x2y2, x1y2, x2y1 xy = xy @ M.T # transform if perspective: xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 18) # rescale else: # affine xy = xy[:, :2].reshape(n, 18) # create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] landmarks = xy[:, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17]] mask = np.array(targets[:, 5:] > 0, dtype=np.int32) landmarks = landmarks * mask landmarks = landmarks + mask - 1 landmarks = np.where(landmarks < 0, -1, landmarks) landmarks[:, [0, 2, 4, 6, 8]] = np.where(landmarks[:, [0, 2, 4, 6, 8]] > width, -1, landmarks[:, [0, 2, 4, 6, 8]]) landmarks[:, [1, 3, 5, 7, 9]] = np.where(landmarks[:, [1, 3, 5, 7, 9]] > height, -1, landmarks[:, [1, 3, 5, 7, 9]]) landmarks[:, 0] = np.where(landmarks[:, 1] == -1, -1, landmarks[:, 0]) landmarks[:, 1] = np.where(landmarks[:, 0] == -1, -1, landmarks[:, 1]) landmarks[:, 2] = np.where(landmarks[:, 3] == -1, -1, landmarks[:, 2]) landmarks[:, 3] = np.where(landmarks[:, 2] == -1, -1, landmarks[:, 3]) landmarks[:, 4] = np.where(landmarks[:, 5] == -1, -1, landmarks[:, 4]) landmarks[:, 5] = np.where(landmarks[:, 4] == -1, -1, landmarks[:, 5]) landmarks[:, 6] = np.where(landmarks[:, 7] == -1, -1, landmarks[:, 6]) landmarks[:, 7] = np.where(landmarks[:, 6] == -1, -1, landmarks[:, 7]) landmarks[:, 8] = np.where(landmarks[:, 9] == -1, -1, landmarks[:, 8]) landmarks[:, 9] = np.where(landmarks[:, 8] == -1, -1, landmarks[:, 9]) targets[:, 5:] = landmarks xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T # # apply angle-based reduction of bounding boxes # radians = a * math.pi / 180 # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 # x = (xy[:, 2] + xy[:, 0]) / 2 # y = (xy[:, 3] + xy[:, 1]) / 2 # w = (xy[:, 2] - xy[:, 0]) * reduction # h = (xy[:, 3] - xy[:, 1]) * reduction # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T # clip boxes xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) # filter candidates i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T) targets = targets[i] targets[:, 1:5] = xy[i] return img, targets def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n) # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates def cutout(image, labels): # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 h, w = image.shape[:2] def bbox_ioa(box1, box2): # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2 box2 = box2.transpose() # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] # Intersection area inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) # box2 area box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16 # Intersection over box2 area return inter_area / box2_area # create random masks scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction for s in scales: mask_h = random.randint(1, int(h * s)) mask_w = random.randint(1, int(w * s)) # box xmin = max(0, random.randint(0, w) - mask_w // 2) ymin = max(0, random.randint(0, h) - mask_h // 2) xmax = min(w, xmin + mask_w) ymax = min(h, ymin + mask_h) # apply random color mask image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] # return unobscured labels if len(labels) and s > 0.03: box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area labels = labels[ioa < 0.60] # remove >60% obscured labels return labels def create_folder(path='./new'): # Create folder if os.path.exists(path): shutil.rmtree(path) # delete output folder os.makedirs(path) # make new output folder def flatten_recursive(path='../coco128'): # Flatten a recursive directory by bringing all files to top level new_path = Path(path + '_flat') create_folder(new_path) for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)): shutil.copyfile(file, new_path / Path(file).name) def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_boxes('../coco128') # Convert detection dataset into classification dataset, with one directory per class path = Path(path) # images dir shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing files = list(path.rglob('*.*')) n = len(files) # number of files for im_file in tqdm(files, total=n): if im_file.suffix[1:] in img_formats: # image im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB h, w = im.shape[:2] # labels lb_file = Path(img2label_paths([str(im_file)])[0]) if Path(lb_file).exists(): with open(lb_file, 'r') as f: lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels for j, x in enumerate(lb): c = int(x[0]) # class f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename if not f.parent.is_dir(): f.parent.mkdir(parents=True) b = x[1:] * [w, h, w, h] # box # b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.2 + 3 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0)): # from utils.datasets import *; autosplit('../coco128') """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files # Arguments path: Path to images directory weights: Train, val, test weights (list) """ path = Path(path) # images dir files = list(path.rglob('*.*')) n = len(files) # number of files indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files [(path / x).unlink() for x in txt if (path / x).exists()] # remove existing for i, img in tqdm(zip(indices, files), total=n): if img.suffix[1:] in img_formats: with open(path / txt[i], 'a') as f: f.write(str(img) + '\n') # add image to txt filemodels/ yolo .py
class LmkDetect(nn.Module): stride = None # strides computed during build export = False # onnx export cat output def __init__(self, nc=80, anchors=(), ch=()): # detection layer super(LmkDetect, self).__init__() self.nc = nc # number of classes # self.no = nc + 5 # number of outputs per anchor self.no = nc + 5 + 10 # number of outputs per anchor self.nl = len(anchors) # number of detection layers self.na = len(anchors[0]) // 2 # number of anchors self.grid = [torch.zeros(1)] * self.nl # init grid a = torch.tensor(anchors).float().view(self.nl, -1, 2) self.register_buffer('anchors', a) # shape(nl,na,2) self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv def forward(self, x): # x = x.copy() # for profiling z = [] # inference output if self.export: for i in range(self.nl): x[i] = self.m[i](x[i]) # conv bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if self.grid[i].shape[2:4] != x[i].shape[2:4]: # self.grid[i] = self._make_grid(nx, ny).to(x[i].device) self.grid[i], self.anchor_grid[i] = self._make_grid_new(nx, ny, i) y = torch.full_like(x[i], 0) y = y + torch.cat((x[i][:, :, :, :, 0:5].sigmoid(), torch.cat((x[i][:, :, :, :, 5:15], x[i][:, :, :, :, 15:15 + self.nc].sigmoid()), 4)), 4) box_xy = (y[:, :, :, :, 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy box_wh = (y[:, :, :, :, 2:4] * 2) ** 2 * self.anchor_grid[i] # wh # box_conf = torch.cat((box_xy, torch.cat((box_wh, y[:, :, :, :, 4:5]), 4)), 4) landm1 = y[:, :, :, :, 5:7] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x1 y1 landm2 = y[:, :, :, :, 7:9] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x2 y2 landm3 = y[:, :, :, :, 9:11] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x3 y3 landm4 = y[:, :, :, :, 11:13] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x4 y4 landm5 = y[:, :, :, :, 13:15] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x5 y5 # landm = torch.cat((landm1, torch.cat((landm2, torch.cat((landm3, torch.cat((landm4, landm5), 4)), 4)), 4)), 4) # y = torch.cat((box_conf, torch.cat((landm, y[:, :, :, :, 15:15+self.nc]), 4)), 4) y = torch.cat([box_xy, box_wh, y[:, :, :, :, 4:5], landm1, landm2, landm3, landm4, landm5, y[:, :, :, :, 15:15 + self.nc]], -1) z.append(y.view(bs, -1, self.no)) return torch.cat(z, 1) for i in range(self.nl): x[i] = self.m[i](x[i]) # conv bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i] = self._make_grid(nx, ny).to(x[i].device) y = torch.full_like(x[i], 0) class_range = list(range(5)) + list(range(15, 15 + self.nc)) y[..., class_range] = x[i][..., class_range].sigmoid() y[..., 5:15] = x[i][..., 5:15] # y = x[i].sigmoid() y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh # y[..., 5:15] = y[..., 5:15] * 8 - 4 y[..., 5:7] = y[..., 5:7] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x1 y1 y[..., 7:9] = y[..., 7:9] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x2 y2 y[..., 9:11] = y[..., 9:11] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x3 y3 y[..., 11:13] = y[..., 11:13] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x4 y4 y[..., 13:15] = y[..., 13:15] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[ i] # landmark x5 y5 # y[..., 5:7] = (y[..., 5:7] * 2 -1) * self.anchor_grid[i] # landmark x1 y1 # y[..., 7:9] = (y[..., 7:9] * 2 -1) * self.anchor_grid[i] # landmark x2 y2 # y[..., 9:11] = (y[..., 9:11] * 2 -1) * self.anchor_grid[i] # landmark x3 y3 # y[..., 11:13] = (y[..., 11:13] * 2 -1) * self.anchor_grid[i] # landmark x4 y4 # y[..., 13:15] = (y[..., 13:15] * 2 -1) * self.anchor_grid[i] # landmark x5 y5 z.append(y.view(bs, -1, self.no)) return x if self.training else (torch.cat(z, 1), x) @staticmethod def _make_grid(nx=20, ny=20): yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() def _make_grid_new(self, nx=20, ny=20, i=0): d = self.anchors[i].device if '1.10.0' in torch.__version__: # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)], indexing='ij') else: yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)]) grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float() anchor_grid = (self.anchors[i].clone() * self.stride[i]).view((1, self.na, 1, 1, 2)).expand( (1, self.na, ny, nx, 2)).float() return grid, anchor_grid同时在以下两个函数中增加LmkDetect
1、DetectionModel中 if isinstance(m, (Detect, Segment, LmkDetect)) 2、parse_model中 elif m in {Detect, Segment, LmkDetect}
models/experimental.py添加
def attempt_loadf(weights, device=None, inplace=True, fuse=True): # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a from models.yolo import Detect, Model, LmkDetect model = Ensemble() for w in weights if isinstance(weights, list) else [weights]: ckpt = torch.load(attempt_download(w), map_location='cpu') # load ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model # Model compatibility updates if not hasattr(ckpt, 'stride'): ckpt.stride = torch.tensor([32.]) if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)): ckpt.names = dict(enumerate(ckpt.names)) # convert to dict model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode # Module compatibility updates for m in model.modules(): t = type(m) if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model,LmkDetect): m.inplace = inplace # torch 1.7.0 compatibility if t is Detect and not isinstance(m.anchor_grid, list): delattr(m, 'anchor_grid') setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) elif t is LmkDetect and not isinstance(m.anchor_grid, list): delattr(m, 'anchor_grid') setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): m.recompute_scale_factor = None # torch 1.11.0 compatibility # Return model if len(model) == 1: return model[-1] # Return detection ensemble print(f'Ensemble created with {weights}\n') for k in 'names', 'nc', 'yaml': setattr(model, k, getattr(model[0], k)) model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' return model模型 配置文件
models/landmark/yolov5n-landmark.yaml
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license # Parametersnc: 80 # number of classesdepth_multiple: 0.33 # model depth multiplewidth_multiple: 0.25 # layer channel multipleanchors: - [4,5, 8,10, 13,16] # P3/8 - [23,29, 43,55, 73,105] # P4/16 - [146,217, 231,300, 335,433] # P5/32 # YOLOv5 v6.0 backbonebackbone: #channel * (1/width_multiple) # [from, number, module, args] input size [N, 3, 640, 640] [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [N, 16, 320, 320] [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [N, 32, 160, 160] [-1, 3, C3, [128]], # [N, 32, 160, 160] [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [N, 64, 80, 80] [-1, 6, C3, [256]], # [N, 64, 80, 80] [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [N, 128, 40, 40] [-1, 9, C3, [512]], # [N, 128, 40, 40] [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [N, 256, 20, 20] [-1, 1, SPPF, [1024, 5]], # 9 [N, 256, 20, 20] [-1, 3, C3, [1024, False]],# [N, 256, 20, 20] ] # YOLOv5 v6.0 headhead: [[-1, 1, Conv, [512, 1, 1]], # [N, 128, 20, 20] [-1, 1, nn.Upsample, [None, 2, 'nearest']], # [N, 128, 40, 40] [[-1, 6], 1, Concat, [1]], # cat backbone P4 [N, 256, 40, 40] [-1, 3, C3, [512, False]], # 13 [N, 128, 40, 40] [-1, 1, Conv, [256, 1, 1]], # [N, 64, 40, 40] [-1, 1, nn.Upsample, [None, 2, 'nearest']], # [N, 64, 80, 80] [[-1, 4], 1, Concat, [1]], # cat backbone P3 [N, 128, 80, 80] [-1, 3, C3, [256, False]], # 17 (P3/8-small) [N, 64, 80, 80] [-1, 1, Conv, [256, 3, 2]], # [N, 64, 40, 40] [[-1, 14], 1, Concat, [1]], # cat head P4 [N, 128, 40, 40] [-1, 3, C3, [512, False]], # 20 (P4/16-medium) [N, 128, 40, 40] [-1, 1, Conv, [512, 3, 2]], # [N, 128, 20, 20] [[-1, 10], 1, Concat, [1]], # cat head P5 [N, 256, 20, 20] [-1, 3, C3, [1024, False]], # 23 (P5/32-large) [N, 256, 20, 20] [[17, 20, 23], 1, LmkDetect, [nc, anchors]], # Detect(P3, P4, P5) ] #YOLOv5n summary: 270 layers, 1872157 parameters, 1872157 gradients, 4.5 GFLOPs超参 data /hyps/hyp.scratch.yaml
# Hyperparameters for COCO training from scratch# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)momentum: 0.937 # SGD momentum/Adam beta1weight_decay: 0.0005 # optimizer weight decay 5e-4warmup_epochs: 3.0 # warmup epochs (fractions ok)warmup_momentum: 0.8 # warmup initial momentumwarmup_bias_lr: 0.1 # warmup initial bias lrbox: 0.05 # box loss gaincls: 0.5 # cls loss gainlandmark: 0.005 # landmark loss gaincls_pw: 1.0 # cls BCELoss positive_weightobj: 1.0 # obj loss gain (scale with pixels)obj_pw: 1.0 # obj BCELoss positive_weightiou_t: 0.20 # IoU training thresholdanchor_t: 4.0 # anchor-multiple threshold# anchors: 3 # anchors per output layer (0 to ignore)fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)hsv_h: 0.015 # image HSV-Hue augmentation (fraction)hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)hsv_v: 0.4 # image HSV-Value augmentation (fraction)degrees: 0.0 # image rotation (+/- deg)translate: 0.1 # image translation (+/- fraction)scale: 0.5 # image scale (+/- gain)shear: 0.5 # image shear (+/- deg)perspective: 0.0 # image perspective (+/- fraction), range 0-0.001flipud: 0.0 # image flip up-down (probability)fliplr: 0.5 # image flip left-right (probability)mosaic: 0.5 # image mosaic (probability)mixup: 0.0 # image mixup (probability)
五个关键点(眼睛,鼻子,嘴)
完整代码:GitHub
免责声明:本文系网络转载或改编,未找到原创作者,版权归原作者所有。如涉及版权,请联系删