resnest50模型的图像分类baseline

深度学习是在b站跟着李沐老师学的,选的kaggle竞赛也是动手学深度学习里的.

因为大二要跟老师做大创,所以做些kaggle竞赛来检验一下学习成果. 同时也是记录一下图像分类的方法方便以后再使用,代码是NEKO KIKU的simple resnet baseline. 自己完全理解后写的本文.

关于数据

训练样本数	预测样本数	类别数	图片尺寸
18353	8800	176	224*224

数据图片1 数据图片2 数据图片3

创建自己的Dataset

class MyDataset(Dataset):
    def __init__(self, csv_path, file_path, mode='train', valid_ratio=0.2,
                 resize_height=224, resize_width=224):
        # csv_path (string): csv 文件路径
        
        # img_path (string): 图像文件所在路径
        
        # mode (string): 训练模式还是测试模式
        
        # valid_ratio (float): 验证集比例
        
        # resize_height, resize_width (int): 调整图片尺寸
        
        self.resize_height = resize_height
        self.resize_width = resize_width
        self.file_path = file_path
        self.mode = mode
        
        self.data_info = pd.read_csv(csv.path, header=None)
        # header=None为去掉表头部分
        
        self.data_len = len(self.data_info.index) - 1
        self.train_len = int(self.data_len * (1 - valid_ratio)) 
        #训练样本数 
        
        if mode == 'train': 
            self.train_image = np.asarray(self.data_info.iloc[1:self.train_len, 0])
            self.train_label = np.asarray(self.data_info.iloc[1:self.train_len, 1])
            self.image_arr = self.train_image
            self.label_arr = self.train_label
        elif mode == 'valid': 
            self.test_image = np.asarray(self.data_info.iloc[self.train_len:, 0])
            self.test_label = np.asarray(self.data_info.iloc[self.train_len:, 1])
            self.image_arr = self.train_image
            self.label_arr = self.train_label
        elif mode == 'test': 
            self.test_image = np.asarray(self.data_info.iloc[1:, 0])
            self.image_arr = self.test_image
            
        self.len = len(self.image_arr)
        
        print('{}数据集,{}个样本'.format(mode, self.len))
        
    def __getitem__(self, index):
        # 获取文件名
        
        image_name = self.image_arr[index] 
        
        # 读取图片
        
        img = Image.open(self.file_path + image_name) 
        
        # 训练模式进行图像增广
        
        if self.mode == 'train': 
            transform = transforms.Compose([
                # 图像裁剪
                
                transforms.RandomResizedCrop(64, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0)),
                # 随机水平翻转
                
                transforms.RandomHorizontalFlip(),
                # 修改亮度,对比度,饱和度
                
                transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        else: 
            # 验证集与测试集无需图像增广 
            
            transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
            
        # 图像增广
        
        img = transform(img) 
        
        if self.mode == 'test':
            # 测试集仅返回图片
            
            return img
        else:
            # 训练集返回图片与对应的类别标签
            
            num_labels = class_to_num[self.label_arr[index]]
            # 此处需先进行类别到数字的映射
            
            return img, num_labels
        
    def __len__(self):
        return self.len

继承pytorch的dataset,创建自己的dataset. 可以将数据转换为可迭代的iter,便于训练和预测.
主要实现为在__init__中读取csv文件,提取特征与标签.
在__getitem__中返回特征与标签(测试集仅返回特征)并进行图像增广.
在__len__中返回样本数.

resnest50模型使用

ResNeSt50残差网络

ResNeSt50 ResNeSt 的全称是：Split-Attention Networks，特别引入了Split-Attention模块。
具体还不懂,爬去看论文了:(

resnest库安装

pip install resnest --pre

resnest50导入

from resnest.torch import resnest50

模型使用

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        model = model
        for param in model.parameters():
            param.requires_grad = False
            
def res_model(num_classes, feature_extract=False, use_pretrained=True):

    # pretrained=True导入模型预训练参数
    
    model_ft = resnest50(pretrained=use_pretrained)
    # set_parameter_requires_grad函数冻住前面的层
    
    set_parameter_requires_grad(model_ft, feature_extract)
    
    # 修改全连接层
    
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes))

    return model_ft

# 176为当前数据集类别数

model = res_model(176)

训练/测试/预测

超参数设置

学习率	权重衰减	训练轮数	损失函数	优化器
1e-4	1e-3	50	交叉熵损失	Adam

训练与测试

learning_rate = 1e-4
weight_decay = 1e-3
num_epoch = 50
model_path = '/kaggle/working/pre_res_model.ckpt'

model = res_model(176)
model = model.to(device)
model.device = device

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)

n_epochs = num_epoch
best_acc = 0.0
for epoch in range(n_epochs):
    
    model.train() 
    train_loss = []
    train_accs = []
    for batch in tqdm(train_loader):
        imgs, labels = batch
        imgs = imgs.to(device)
        labels = labels.to(device)
        
        logits = model(imgs)
        loss = criterion(logits, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        acc = (logits.argmax(dim=-1) == labels).float().mean()
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
   
    model.eval()
    valid_loss = []
    valid_accs = []
    # tqdm为进度条显示
    
    for batch in tqdm(val_loader):
        imgs, labels = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
            
        loss = criterion(logits, labels.to(device))
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        valid_loss.append(loss.item())
        valid_accs.append(acc)
        
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
    # 更新最佳模型并保存
    
    if valid_acc > best_acc:
        best_acc = valid_acc
        torch.save(model.state_dict(), model_path)
        print('saving model with acc {:.3f}'.format(best_acc))

tqdm显示训练进度条很好用:D
每轮训练时储存最好的模型参数,发现损失不下降了可以提前终止训练,直接进行预测.

预测

saveFileName = '/kaggle/working/submission.csv'
model = res_model(176)
model = model.to(device)
# 读取保存的最佳模型 

model.load_state_dict(torch.load(model_path))

model.eval()
predictions = []
for batch in tqdm(test_loader):
    
    imgs = batch
    with torch.no_grad():
        logits = model(imgs.to(device))
    
    predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

preds = []
for i in predictions:
    preds.append(num_to_class[i])

test_data = pd.read_csv(test_path)
test_data['label'] = pd.Series(preds)
submission = pd.concat([test_data['image'], test_data['label']], axis=1)
submission.to_csv(saveFileName, index=False)