torch.nonzero(tensor) # index of non-zero elements torch.nonzero(tensor==0) # index of zero elements torch.nonzero(tensor).size(0) # number of non-zero elements torch.nonzero(tensor == 0).size(0) # number of zero elements
判断两个张量相等
1 2
torch.allclose(tensor1, tensor2) # float tensor torch.equal(tensor1, tensor2) # int tensor
张量扩展
1 2 3
# Expand tensor of shape 64*512 to shape 64*512*7*7. tensor = torch.rand(64,512) torch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)
# model_new代表新的模型 # model_saved代表其他模型,比如用torch.load导入的已保存的模型 model_new_dict = model_new.state_dict() model_common_dict = {k:v for k, v in model_saved.items() if k in model_new_dict.keys()} model_new_dict.update(model_common_dict) model_new.load_state_dict(model_new_dict)
# Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model total_step = len(train_loader) for epoch inrange(num_epochs): for i ,(images, labels) inenumerate(train_loader): images = images.to(device) labels = labels.to(device)
# Forward pass outputs = model(images) loss = criterion(outputs, labels)
# Backward and optimizer optimizer.zero_grad() loss.backward() optimizer.step()
self.log_softmax = nn.LogSoftmax(dim=1) self.e = e self.reduction = reduction
def_one_hot(self, labels, classes, value=1): """ Convert labels to one hot vectors Args: labels: torch tensor in format [label1, label2, label3, ...] classes: int, number of classes value: label value in one hot vector, default to 1 Returns: return one hot format labels in shape [batchsize, classes] """
one_hot = torch.zeros(labels.size(0), classes)
#labels and value_added size must match labels = labels.view(labels.size(0), -1) value_added = torch.Tensor(labels.size(0), 1).fill_(value)
def_smooth_label(self, target, length, smooth_factor): """convert targets to one-hot format, and smooth them. Args: target: target in form with [label1, label2, label_batchsize] length: length of one-hot format(number of classes) smooth_factor: smooth factor for label smooth Returns: smoothed labels in one hot format """ one_hot = self._one_hot(target, length, value=1 - smooth_factor) one_hot += smooth_factor / (length - 1)
return one_hot.to(target.device)
defforward(self, x, target):
if x.size(0) != target.size(0): raise ValueError('Expected input batchsize ({}) to match target batch_size({})' .format(x.size(0), target.size(0)))
if x.dim() < 2: raise ValueError('Expected input tensor to have least 2 dimensions(got {})' .format(x.size(0)))
if x.dim() != 2: raise ValueError('Only 2 dimension tensor are implemented, (got {})' .format(x.size()))
smoothed_target = self._smooth_label(target, x.size(1), self.e) x = self.log_softmax(x) loss = torch.sum(- x * smoothed_target, dim=1)
else: raise ValueError('unrecognized option, expect reduction to be one of none, mean, sum')
或者直接在训练文件里做label smoothing
1 2 3 4 5 6 7 8 9 10 11 12 13
for images, labels in train_loader: images, labels = images.cuda(), labels.cuda() N = labels.size(0) # C is the number of classes. smoothed_labels = torch.full(size=(N, C), fill_value=0.1 / (C - 1)).cuda() smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)
score = model(images) log_prob = torch.nn.functional.log_softmax(score, dim=1) loss = -torch.sum(log_prob * smoothed_labels) / N optimizer.zero_grad() loss.backward() optimizer.step()
Mixup训练
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
beta_distribution = torch.distributions.beta.Beta(alpha, alpha) for images, labels in train_loader: images, labels = images.cuda(), labels.cuda()
l1_regularization = torch.nn.L1Loss(reduction='sum') loss = ... # Standard cross-entropy loss for param in model.parameters(): loss += torch.sum(torch.abs(param)) loss.backward()
不对偏置项进行权重衰减(weight
decay)
pytorch里的weight decay相当于l2正则
1 2 3 4 5
bias_list = (param for name, param in model.named_parameters() if name[-4:] == 'bias') others_list = (param for name, param in model.named_parameters() if name[-4:] != 'bias') parameters = [{'parameters': bias_list, 'weight_decay': 0}, {'parameters': others_list}] optimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)
classFeatureExtractor(torch.nn.Module): """Helper class to extract several convolution features from the given pre-trained model. Attributes: _model, torch.nn.Module. _layers_to_extract, list<str> or set<str> Example: >>> model = torchvision.models.resnet152(pretrained=True) >>> model = torch.nn.Sequential(collections.OrderedDict( list(model.named_children())[:-1])) >>> conv_representation = FeatureExtractor( pretrained_model=model, layers_to_extract={'layer1', 'layer2', 'layer3', 'layer4'})(image) """ def__init__(self, pretrained_model, layers_to_extract): torch.nn.Module.__init__(self) self._model = pretrained_model self._model.eval() self._layers_to_extract = set(layers_to_extract)
defforward(self, x): with torch.no_grad(): conv_representation = [] for name, layer in self._model.named_children(): x = layer(x) if name in self._layers_to_extract: conv_representation.append(x) return conv_representation
微调全连接层
1 2 3 4 5
model = torchvision.models.resnet18(pretrained=True) for param in model.parameters(): param.requires_grad = False model.fc = nn.Linear(512, 100) # Replace the last fc layer optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)
以较大学习率微调全连接层,较小学习率微调卷积层
1 2 3 4 5 6
model = torchvision.models.resnet18(pretrained=True) finetuned_parameters = list(map(id, model.fc.parameters())) conv_parameters = (p for p in model.parameters() ifid(p) notin finetuned_parameters) parameters = [{'params': conv_parameters, 'lr': 1e-3}, {'params': model.fc.parameters()}] optimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)