こちらは「Jetson Xavier NX上でSimSiamとCIFAR-10で表現学習をやってみた(5) facebookreseach github train関数の変更」の続きになります
今回が最終回になります
technoxs-stacker.hatenablog.com
目次
スポンサーリンク
1.実行環境
Jetson Xavier NX
ubuntu18.04
docker
python3.x
pytorch
->Jetson Xavier NX上におけるpytrorch環境構築は以下でやってますので、ご参考までに(^^)/
technoxs-stacker.hatenablog.com
2.変更後のコード
変更後のコードのまとめは以下になります
#!/usr/bin/env python # Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. import argparse import math import os import random import shutil import time import warnings import torch import torch.nn as nn import torch.nn.parallel import torch.backends.cudnn as cudnn import torch.distributed as dist import torch.optim import torch.multiprocessing as mp import torch.utils.data import torch.utils.data.distributed import torchvision.transforms as transforms import torchvision.datasets as datasets import torchvision.models as models import torchvision import simsiam.loader import simsiam.builder model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__") and callable(models.__dict__[name])) parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet50', choices=model_names, help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet50)') parser.add_argument('-j', '--workers', default=0, type=int, metavar='N', help='number of data loading workers (default: 0)') parser.add_argument('--epochs', default=100, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('-b', '--batch-size', default=512, type=int, metavar='N', help='mini-batch size (default: 512), this is the total ' 'batch size of all GPUs on the current node when ' 'using Data Parallel or Distributed Data Parallel') parser.add_argument('--lr', '--learning-rate', default=0.05, type=float, metavar='LR', help='initial (base) learning rate', dest='lr') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum of SGD solver') parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') parser.add_argument('-p', '--print-freq', default=10, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--resume', default='./checkpoint/', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--seed', default=None, type=int, help='seed for initializing training. ') parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') # simsiam specific configs: parser.add_argument('--dim', default=2048, type=int, help='feature dimension (default: 2048)') parser.add_argument('--pred-dim', default=512, type=int, help='hidden dimension of the predictor (default: 512)') parser.add_argument('--fix-pred-lr', action='store_true', help='Fix learning rate for the predictor') def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') main_worker(args.gpu, args) def main_worker(gpu, args): args.gpu = gpu # create model print("=> creating model '{}'".format(args.arch)) model = simsiam.builder.SimSiam( models.__dict__[args.arch], args.dim, args.pred_dim) # infer learning rate before changing batch size init_lr = args.lr * args.batch_size / 256 torch.cuda.set_device(gpu) model = model.cuda(gpu) # define loss function (criterion) and optimizer criterion = nn.CosineSimilarity(dim=1).cuda(gpu) if args.fix_pred_lr: optim_params = [{'params': model.module.encoder.parameters(), 'fix_lr': False}, {'params': model.module.predictor.parameters(), 'fix_lr': True}] else: optim_params = model.parameters() optimizer = torch.optim.SGD(optim_params, init_lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709 augmentation = [ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply([ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([simsiam.loader.GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=simsiam.loader.TwoCropsTransform(transforms.Compose(augmentation))) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, init_lr, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args, gpu) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), }, is_best=False, filename='checkpoint_{:04d}.pth.tar'.format(epoch)) torch.save(model.state_dict(), args.checkpoint_dir / 'latest.pth') def train(train_loader, model, criterion, optimizer, epoch, args, gpu): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, ((y1, y2), _) in enumerate(train_loader, start=epoch * len(train_loader)): # measure data loading time data_time.update(time.time() - end) y1 = y1.cuda(gpu, non_blocking=True) y2 = y2.cuda(gpu, non_blocking=True) # compute output and loss p1, p2, z1, z2 = model(x1=y1, x2=y2) loss = -(criterion(p1, z2).mean() + criterion(p2, z1).mean()) * 0.5 # losses.update(loss.item(), images[0].size(0)) losses.update(loss.item(), y1.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) class Transform: def __init__(self): self.transform = transforms.Compose([ transforms.RandomResizedCrop(224, interpolation=Image.BICUBIC), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply( [transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)], p=0.8 ), transforms.RandomGrayscale(p=0.2), GaussianBlur(p=1.0), Solarization(p=0.0), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.transform_prime = transforms.Compose([ transforms.RandomResizedCrop(224, interpolation=Image.BICUBIC), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply( [transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)], p=0.8 ), transforms.RandomGrayscale(p=0.2), GaussianBlur(p=0.1), Solarization(p=0.2), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def __call__(self, x): y1 = self.transform(x) y2 = self.transform_prime(x) return y1, y2 def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): torch.save(state, filename) if is_best: shutil.copyfile(filename, 'model_best.pth.tar') class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self, name, fmt=':f'): self.name = name self.fmt = fmt self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count def __str__(self): fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' return fmtstr.format(**self.__dict__) class ProgressMeter(object): def __init__(self, num_batches, meters, prefix=""): self.batch_fmtstr = self._get_batch_fmtstr(num_batches) self.meters = meters self.prefix = prefix def display(self, batch): entries = [self.prefix + self.batch_fmtstr.format(batch)] entries += [str(meter) for meter in self.meters] print('\t'.join(entries)) def _get_batch_fmtstr(self, num_batches): num_digits = len(str(num_batches // 1)) fmt = '{:' + str(num_digits) + 'd}' return '[' + fmt + '/' + fmt.format(num_batches) + ']' def adjust_learning_rate(optimizer, init_lr, epoch, args): """Decay the learning rate based on schedule""" cur_lr = init_lr * 0.5 * (1. + math.cos(math.pi * epoch / args.epochs)) for param_group in optimizer.param_groups: if 'fix_lr' in param_group and param_group['fix_lr']: param_group['lr'] = init_lr else: param_group['lr'] = cur_lr if __name__ == '__main__': main()
3.実行
以下のコマンドで実行します
python3 main_simsiam_single_v2.py -a resnet50 --gpu 0 -b 8 --print-freq 10
コマンドオプションの説明
-a resnet50:modelはresnet50を使用
--gpu 0:gpu No 0を使用
-b 8:バッチサイズは8を指定
--print-freq 10:コンソール出力は10イテレーション毎に行う
感想
50epoch程度でさちってきますね うまく表現を学習できていそうです
参考
スポンサーリンク