| from tqdm import tqdm import pickle as pkl import torch import sys import time import os
def train_val(net, optimizer, n_epochs, trainBS, valBS, trainDataset, trainLoader, valDataset, valLoader, expLrScheduler, modelPath, modelName): """ net (torchvision.models) : Net model for training/validation optimizer (torch.optim) : Optimizer for training n_epochs (int) : Training epochs trainBS (int) : Training batchsize valBS (int) : Validation batchsize trainDataset (torch.Dataset) trainLoader (torch.DataLoader) valDataset (torch.Dataset) valLoader (torch.DataLoader) expLrScheduler (torch.optim.lr_scheduler) : Learning rate decay strategy modelPath (str) : Path to save the learnt model modelName (str) : Model name """ lossLog = dict({'train': [], 'val': []}) accLog = dict({'train': [], 'val': []}) dataSet = {'train': trainDataset, 'val': valDataset} dataLoader = {'train': trainLoader, 'val': valLoader} dataSize = {x: dataSet[x].__len__() for x in ['train', 'val']} batchSize = {'train': trainBS, 'val': valBS} iterNum = {x: np.ceil(dataSize[x] / batchSize[x]).astype('int32') for x in ['train', 'val']}
print('dataSize: {}'.format(dataSize)) print('batchSize: {}'.format(batchSize)) print('iterNum: {}'.format(iterNum))
best_acc = 0.0 start = time.time() for epoch in tqdm(range(n_epochs), desc='Epoch'): print('Epoch {}/{}, lr = {} [best_acc = {:.4f}%]'.format(epoch+1, n_epochs, optimizer.param_groups[0]['lr'], best_acc)) print('-' * 10) epochStart = time.time() for phase in ['train', 'val']: if(phase == 'train'): expLrScheduler.step() net.train() else: net.eval() losses = AverageMeter() top1 = AverageMeter()
for i, data in enumerate(dataLoader[phase], 0): inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() with torch.set_grad_enabled(phase == 'train'): outputs = net(inputs) loss = criterion(outputs, labels)
if(phase == 'train'): optimizer.zero_grad() loss.backward() optimizer.step()
losses.update(loss.item()*inputs.size(0), inputs.size(0))
sys.stdout.write(' \r') sys.stdout.flush() prec1 = accuracy(outputs, torch.argmax(labels, 1), topk=(1,))[0] top1.update(prec1.item(), inputs.size(0)) sys.stdout.write('Iter: {} / {} ({:.0f}s)\tLoss= {:.4f} ({:.4f})\tAcc= {:.2f}% ({:.0f}/{:.0f})\r' .format(i+1, iterNum[phase], time.time() - epochStart, loss.item(), losses.avg, prec1/inputs.size(0)*100, top1.sum, top1.count)) sys.stdout.flush() sys.stdout.write(' \r') sys.stdout.flush()
epoch_loss = losses.avg epoch_acc = top1.avg*100 accLog[phase].append(epoch_acc/100) lossLog[phase].append(epoch_loss) epochDuration = time.time() - epochStart epochStart = time.time() hour, minute, second = convert_secs2time(epochDuration) print('[ {} ] Loss: {:.4f} Acc: {:.3f}% ({:.0f}/{:.0f}) ({:.0f}h {:.0f}m {:.2f}s)' .format(phase, epoch_loss, epoch_acc, top1.sum, top1.count, hour, minute, second))
if(phase == 'val' and epoch_acc > best_acc): print('Saving best model to {}'.format(os.path.join(modelPath, modelName))) state = {'net': net.state_dict(), 'opt': optimizer, 'acc': epoch_acc, 'epoch': epoch, 'classes': classes} torch.save(state, os.path.join(modelPath, modelName)) best_acc = epoch_acc if(phase == 'val' and epoch == n_epochs - 1): finalModelName = 'final-{}'.format(modelName) print('Saving final model to {}'.format(os.path.join(modelPath, finalModelName))) state = {'net': net.state_dict(), 'opt': optimizer, 'acc': epoch_acc, 'epoch': epoch, 'classes': classes} torch.save(state, os.path.join(modelPath, finalModelName)) print('')
log = dict({'acc': accLog, 'loss': lossLog}) with open(os.path.join(modelPath, 'log.pkl'), 'wb') as f: pkl.dump(log, f) if(epoch + 1 == n_epochs): print("Training logs saved to : {}".format(os.path.join(modelPath, 'log.pkl'))) plot_acc_loss(log, 'both', modelPath, '{}_'.format(modelName), '', (epoch + 1 == n_epochs)) plot_acc_loss(log, 'loss', modelPath, '{}_'.format(modelName), '', (epoch + 1 == n_epochs)) plot_acc_loss(log, 'accuracy', modelPath, '{}_'.format(modelName), '', (epoch + 1 == n_epochs)) duration = time.time() - start print('Training complete in {:.0f}h {:.0f}m {:.2f}s'.format(duration // 3600, (duration % 3600) // 60, duration % 60)) print('Best val Acc: {:4f}'.format(best_acc))
return best_acc
trainBS = 100 testBS = 100 myTransform = transforms.Compose([ transforms.ToTensor(), ])
trainDataset = myDataset(trainImage, torch.Tensor(trainLabel).long(), classes, transform=myTransform, to_onehot=True) trainLoader = DataLoader(trainDataset, batch_size=trainBS, shuffle=True, num_workers=8) testDataset = myDataset(testImage, torch.Tensor(testLabel).long(), classes, transform=myTransform, to_onehot=True) testLoader = DataLoader(testDataset, batch_size=testBS, shuffle=False, num_workers=8)
net = models.resnet18(pretrained=False) fc_features = net.fc.in_features net.fc = nn.Linear(fc_features, len(classes)) net.avgpool = nn.AdaptiveAvgPool2d((1, 1)) net = net.cuda()
lr = 0.1 momentum = 0.9 weightDecay = 5e-4 criterion = myLoss().cuda() optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=weightDecay, nesterov=True) expLrScheduler = optim.lr_scheduler.MultiStepLR(optimizer, [50, 75], gamma=0.1)
modelPath = 'xxxxxxxxxxxxxx/pytorch_model_learnt' modelName = 'resnet18.ckpt'
best_acc = train_val( net, optimizer, 10, trainBS, testBS, trainDataset, trainLoader, testDataset, testLoader, expLrScheduler, modelPath, modelName)