初步接触深度学习,使用 pytorch 框架,关键代码如下:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 50, 5, stride=1, padding=1, bias=False)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(50, 100, 7, stride=1, padding=1, bias=False)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(1600 , 400)
self.fc2 = nn.Linear(400, 100)
self.fc3 = nn.Linear(100, 2)
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(5): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(data_loader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs = data['image']
labels = data['label']
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
inputs(输入值):
tensor([[[[-0.3725, -0.3412, -0.3098, ..., 0.5451, 0.5686, 0.6314],
[-0.3020, -0.3098, -0.3333, ..., 0.4039, 0.5451, 0.5294],
[-0.2078, -0.2784, -0.3255, ..., -0.0118, 0.2471, 0.5216],
...,
[-0.4431, -0.5294, -0.4902, ..., -1.0000, -0.1216, 0.4588],
[-0.4196, -0.4431, -0.5451, ..., -0.1843, 0.5451, 0.5294],
[-0.2549, -0.4039, -0.5686, ..., 0.4824, 0.5294, 0.5137]]],
[[[ 0.4588, 0.3882, 0.3490, ..., 0.0745, 0.3725, 0.5451],
[ 0.5451, 0.4902, 0.4745, ..., 0.0902, 0.4588, 0.5922],
[ 0.5608, 0.5373, 0.5373, ..., 0.2784, 0.5216, 0.6314],
...,
label(标签):
tensor([3, 4, 1, 5, 6, 3, 4, 0, 4, 3, 6, 3, 3, 3, 3, 3, 6, 3, 0, 5, 3, 3, 6, 0,
5, 3, 0, 0, 2, 4, 3, 3, 4, 5, 4, 2, 2, 6, 3, 3, 3, 3, 3, 0, 3, 5, 3, 3,
6, 5])
output(网络输出值?)
tensor([[ 0.0508, -0.0647],
[ 0.0380, -0.0434],
[ 0.0412, -0.0596],
[ 0.0499, -0.0559],
[ 0.0579, -0.0619],
[ 0.0455, -0.0614],
[ 0.0494, -0.0628],
[ 0.0494, -0.0491],
[ 0.0439, -0.0690],
[ 0.0512, -0.0562],
[ 0.0491, -0.0516],
[ 0.0493, -0.0699],
[ 0.0468, -0.0654],
[ 0.0606, -0.0682],
[ 0.0603, -0.0597],
[ 0.0522, -0.0604],
[ 0.0422, -0.0535],
[ 0.0616, -0.0678],
[ 0.0366, -0.0472],
[ 0.0388, -0.0439],
[ 0.0575, -0.0728],
[ 0.0408, -0.0586],
[ 0.0426, -0.0641],
[ 0.0467, -0.0601],
[ 0.0389, -0.0470],
[ 0.0481, -0.0654],
[ 0.0575, -0.0677],
[ 0.0484, -0.0633],
[ 0.0398, -0.0525],
[ 0.0490, -0.0641],
[ 0.0438, -0.0625],
[ 0.0429, -0.0583],
[ 0.0426, -0.0521],
[ 0.0692, -0.0447],
[ 0.0541, -0.0779],
[ 0.0330, -0.0317],
[ 0.0473, -0.0488],
[ 0.0484, -0.0528],
[ 0.0438, -0.0542],
[ 0.0414, -0.0508],
[ 0.0443, -0.0413],
[ 0.0483, -0.0577],
[ 0.0426, -0.0607],
[ 0.0430, -0.0570],
[ 0.0472, -0.0560],
[ 0.0452, -0.0608],
[ 0.0381, -0.0407],
[ 0.0396, -0.0379],
[ 0.0444, -0.0607],
[ 0.0564, -0.0578]], grad_fn=<AddmmBackward>)
CrossEntropyLoss
就会报错:indexError: Target 2 is out of bounds.
1
heart4lor 2020-03-13 23:41:42 +08:00
看起来 output 和 label 的 shape 不一样啊?
|
2
heart4lor 2020-03-13 23:47:47 +08:00
把最后一个全连接层 fc3 改成(100, 1)试试
|
3
huntzhan 2020-03-14 00:08:06 +08:00 1
|
4
ipwx 2020-03-14 00:11:47 +08:00 1
self.fc3 = nn.Linear(100, 7)
|
5
neosfung 2020-03-14 00:31:02 +08:00 1
self.fc3 = nn.Linear(100, 2) 输出是两个类别的 probability
但是你的 label 是有 7 种 |
6
longbye0 2020-03-14 01:38:54 +08:00 1
@heart4lor 别误导人。pytorch 的 CE 会自动做 softmax 和 nll 的,所以 label 的 shape 是(N,)。
如果你真要做 7 分类,也就是 label 是[0,1,2,3,4,5,6] 中的之一,#4#5 就是对的。 to 楼主,至少学个 ufldl 或者 ng 的机器学习再来调包吧,科班一点看个几章 prml 或者花书总要吧。 |
7
SlipStupig OP |
8
ipwx 2020-03-14 14:33:32 +08:00
顺便提一句,如果是二分类,self.fc3 = nn.Linear(100, 1) 就行了,然后用 nn. BCEWithLogitsLoss
|
9
chizuo 2020-03-16 13:54:05 +08:00
@ipwx 二分类 loutdim=2 也可以的,配 crossentropy 和 outdim=1 与 BCEWithLogitsLoss 效果一样的。
|
10
ipwx 2020-03-16 13:59:19 +08:00
@chizuo PyTorch 有特殊处理嘛? softmax 和 sigmoid 的自由度还是不一样的。二分类的 softmax 自由度是 2,sigmoid 自由度是 1。这种场景下我觉得自由度少 1 更好。
|
11
chizuo 2020-03-16 14:17:02 +08:00
@ipwx 二分类这跟自由度有什么关系呢?? softmax 在二分类问题中与 sigmoid 效果“一模一样”。具体你把 p1, p2 分别用 sigmoid 和 softmax 写出来就明白了
|
12
ipwx 2020-03-16 14:58:01 +08:00
|
13
ipwx 2020-03-16 14:59:13 +08:00
第二种自由度大了以后,x_1 和 x_2 也许会发生 covariance shift,导致训练一直在向没有意义的方向走。虽然 momemtum 方法以及后续的 adaxxx 方法都在这方面有长足改进,还有 batch norm 之类的抵抗 covariance shift。但是能去掉一点风险就是一点风险,不是么?
|
14
chizuo 2020-03-16 15:44:58 +08:00
|