-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmain.py
74 lines (51 loc) · 2.63 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# ================================================================ #
# Torch Autograd #
# ================================================================ #
import torch
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)
# We create `Q` tensor using `a` and `b` tensors
Q = 3 * a ** 3 - b ** 2
# Let's assume `a` and `b` are parameters of the model and `Q` is a error function
# During the training we calculate the gradients w.r.t parameters `a`, `b`
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)
# When .backward() means taking a derivative of Q w.r.t `a`, `b`
# Gradients are now deposited in `a.grad` and `b.grad`
# Let's check the derivatives, f(a) = 3*a**3 -> f`(a) = 9*a**2, f(b) = b**2 -> f`(b) = 2*b
print(9 * a ** 2 == a.grad)
print(-2 * b == b.grad)
'''Result:
tensor([True, True])
tensor([True, True])
'''
# In above example we see the derivatives because the tensor `a` and `b` have `requires_grad=True` parameter. For this
# reason gradient was calculated if we do not set `requires_grad=True` then there will not be any gradient calculations for these variables.
# In the example below x and y have `requires_grad=False` by default
# The output tensor of an operation will require gradients even if only a single input tensor has ``requires_grad=True`
x = torch.rand(5, 5)
y = torch.rand(5, 5)
z = torch.rand((5, 5), requires_grad=True)
a = x + y # False + False = False
print(f"Does `a` require gradients? : {a.requires_grad}")
b = x + z # False + True = True
print(f"Does `b` require gradients?: {b.requires_grad}")
'''Result:
Does `a` require gradients? : False
Does `b` require gradients?: True
'''
# ================================================================ #
# Finetunig, Freeze the model weights #
# ================================================================ #
# In finetuning, we freeze most of the model and typically only modify the classifier layers to make predictions on new labels.
from torch import nn, optim
model = torchvision.models.resnet18(pretrained=True)
# Freeze all the parameters in the network
for param in model.parameters():
param.requires_grad = False
# Let's say we want to finetune the model on a new dataset with 10 labels.
# In resnet, the classifier is the last linear layer ``model.fc``.
# We can simply replace it with a new linear layer (unfrozen by default) that acts as our classifier.
model.fc = nn.Linear(512, 10)
# Optimize only the classifier
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)