Skip to content

Latest commit

 

History

History
209 lines (159 loc) · 7.4 KB

2019-4-15-pytorch模型读取问题.md

File metadata and controls

209 lines (159 loc) · 7.4 KB

加载自己的模型出错

问题描述如上,网上找了一圈也没找到解决方案

但是我的我没有写错,因为模型是可以跑的。

但是我也没有多写呀

import torch
from torch import nn
from torch.nn import init
import torch.nn.functional as F

base = {'352': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']}
extra = {'352': [2, 7, 14, 21, 28]}

def vgg(cfg, i, batch_norm=False):
    layers = []
    in_channels = i
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return layers


class ConvConstract(nn.Module):
    def __init__(self, in_channel):
        super(ConvConstract, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, 128, kernel_size=3, padding=1)
        self.cons1 = nn.AvgPool2d(3, stride=1, padding=1)

    def forward(self, x):
        x = F.relu(self.conv1(x), inplace=True)
        x2 = self.cons1(x)
        return x, x - x2

def extra_layer(vgg, cfg):
    feat_layers, pool_layers = [], []
    for k, v in enumerate(cfg):
        feat_layers += [ConvConstract(vgg[v].out_channels)]
        if k == 0:
            pool_layers += [nn.Conv2d(128 * (6 - k), 128 * (5 - k), 1)]
        else:
            # TODO: change this to sampling
            pool_layers += [nn.ConvTranspose2d(128 * (6 - k), 128 * (5 - k), 3, 2, 1, 1)]
    return vgg, feat_layers, pool_layers


class NLDF(nn.Module):
    def __init__(self, base, feat_layers, pool_layers):
        super(NLDF, self).__init__()
        self.pos = [4, 9, 16, 23, 30]
        self.base = nn.ModuleList(base)
        self.feat = nn.ModuleList(feat_layers)
        self.pool = nn.ModuleList(pool_layers)
        self.glob = nn.Sequential(nn.Conv2d(512, 128, 5), nn.ReLU(inplace=True), nn.Conv2d(128, 128, 5),
                                  nn.ReLU(inplace=True), nn.Conv2d(128, 128, 3))
        self.conv_g = nn.Conv2d(128, 1, 1)
        self.conv_l = nn.Conv2d(640, 1, 1)

    def forward(self, x):
        sources, num = list(), 0
        for k in range(len(self.base)):
            x = self.base[k](x)
            if k in self.pos:
                sources.append(self.feat[num](x))
                num = num + 1
        for k in range(4, -1, -1):
            if k == 4:
                out = F.relu(self.pool[k](torch.cat([sources[k][0], sources[k][1]], dim=1)), inplace=True)
            else:
                out = self.pool[k](torch.cat([sources[k][0], sources[k][1], out], dim=1)) if k == 0 else F.relu(
                    self.pool[k](torch.cat([sources[k][0], sources[k][1], out], dim=1)), inplace=True)

        score = self.conv_g(self.glob(x)) + self.conv_l(out)
        prob = torch.sigmoid(score)
        return prob


def build_model():
    return NLDF(*extra_layer(vgg(base['352'], 3), extra['352']))


def xavier(param):
    init.xavier_uniform_(param)


def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        m.bias.data.zero_()
import os
import numpy as np

import torch
import torch.nn
import torchvision.models as models
from torch.autograd import Variable
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from visualization.nldf1 import build_model
from PIL import Image
TARGET_IMG_SIZE = 352

def make_model():
    model = build_model()
    model.load_state_dict(torch.load('E:\\实验数据\\1cr:1bou\\best.pth', map_location=lambda storage, loc: storage))
    # yy = torch.sum(model.features[2].weight.data)
    # print(yy)
    # model.load_state_dict(torch.load('E:\\实验数据\\1cr:1bou\\best.pth', map_location=lambda storage, loc: storage),
    #                       strict=False)

    model = model.base[:4]
    model = model.eval()  # 一定要有这行,不然运算速度会变慢(要求梯度)而且会影响结果

    return model



def extract_feature(model, imgpath):
    model.eval()  # 必须要有,不然会影响特征提取结果

    img = Image.open(imgpath)  # 读取图片
    img = img.resize((TARGET_IMG_SIZE, TARGET_IMG_SIZE))
    tensor = transforms.ToTensor()(img).unsqueeze(0)  # 将图片转化成tensor

    result = model(Variable(tensor))
    result_npy = result.data.cpu().numpy()  # 保存的时候一定要记得转成cpu形式的,不然可能会出错

    return result_npy[0]  # 返回的矩阵shape是[1, 512, 14, 14],这么做是为了让shape变回[512, 14,14]
def get_row_col(num_pic):
    squr = num_pic ** 0.5
    row = round(squr)
    col = row + 1 if squr - row > 0 else row
    return row, col

if __name__ == "__main__":
    plt.figure(figsize=(20, 20))
    model = make_model()
    imgpath = '001420.jpg'
    tmp = extract_feature(model, imgpath)

    row, col = get_row_col(64)
    for i in range(0, 64):
        feature_map_split = tmp[i, :, :]

        plt.subplot(row, col, i + 1)
        plt.imshow(feature_map_split)
        plt.axis('off')
    plt.savefig('123.png')  # 保存图像到本地
    plt.show()

不知道哪里有毛病。。。。自闭了

模型类型问题

pytorch允许把在GPU上训练的模型加载到CPU上,也允许把在CPU上训练的模型加载到GPU上。 但是直接加载是不行的

GPU->CPU

torch.load('XXX.pth', map_location=lambda storage, loc: storage)

CPU->GPU

torch.load('XXX.pth', map_location=lambda storage, loc: storage.cuda())

自己的模型文件用于相似的模型

一般会出现这种错误

unexpected key module.xxx.weight

这时就需要strict出场了 model.load_state_dict(torch.load(file_path, map_location=lambda storage, loc: storage),strict=False) 必须得加strict=False 但是这个有个问题,如果你只是作为一次使用,这个是没有毛病的,你要是多次使用,每次读的值都不一样,目前不知道为什么 用strict=False进行加载模型,则“能塞则塞,不能塞则丢”。load一般是依据key来加载的,一旦有key不匹配则出错。如果设置strict=False,则直接忽略不匹配的key,对于匹配的key则进行正常的赋值。

关于pytorch在Windows多进程数据读取模块报错

AttributeError: Can't pickle local object 'get_loader.<locals>.<lambda>'

问题如上所示,但是Google全是这种

或者这种

它还有一个报错是

EOFError: Ran out of input

又或者这种

无解啊,报错的是数据读取模块,但是我的程序在Linux系统下是没有问题的,但是到了Windows系统下就有问题了,不知道为什么。

解决方案

data.DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=num_thread) 把num_workers指定为0,或者直接不写。不知道为啥?这个是在windws服务器上的,cpu核心好像是16个,但是这个为什么不能多开几个进程,没有搞懂,而且在Linux系统下是可行的。。。。。Linux的cpu核心也是16个。。