Sekarang kita mencoba untuk hands-on ke kodingan, disini kita menggunakan framwork dari pytorch untuk membuat Neural Netwok.
A. Forward
1. First Implementation
pada implementasi pertama ini kita coba implementasikan sebuah NN:
input : 5
output : 1
aktivasi: sigmoid
# First, import PyTorch
import torch
def activation(x):
""" Sigmoid activation function
x: torch.Tensor
return 1/(1+torch.exp(-x))
### Generate some data
torch.manual_seed(7) # Set the random seed so things are predictable
# Features are 3 random normal variables
features = torch.randn((1, 5))
# True weights for our data, random normal variables again
weights = torch.randn_like(features)
# and a true bias term
bias = torch.randn((1, 1))
y = activation(torch.sum(features * weights) + bias)
2. Second Implementation
input : 3
hidden : 2
output : 1
aktivasi: sigmoid
# First, import PyTorch
import torch
def activation(x):
""" Sigmoid activation function
x: torch.Tensor
return 1/(1+torch.exp(-x))
### Generate some data
torch.manual_seed(7) # Set the random seed so things are predictable
# Features are 3 random normal variables
features = torch.randn((1, 3))
# Define the size of each layer in our network
n_input = features.shape[1] # Number of input units, must match number of input features
n_hidden = 2 # Number of hidden units
n_output = 1 # Number of output units
# Weights for inputs to hidden layer
W1 = torch.randn(n_input, n_hidden)
# Weights for hidden layer to output layer
W2 = torch.randn(n_hidden, n_output)
# and bias terms for hidden and output layers
B1 = torch.randn((1, n_hidden))
B2 = torch.randn((1, n_output))
h = activation(, W1) + B1)
output = activation(, W2) + B2)
3. Third Implementation
input : 784
hidden : 256
output : 10
- hidden layer : sigmoid
- output : softmax# First, import PyTorch
import torch
def activation(x):
return 1/(1+torch.exp(-x))
def softmax(x):
return torch.exp(x)/torch.sum(torch.exp(x), dim=1).view(-1, 1)
# Flatten the input images
inputs = images.view(images.shape[0], -1) #shape 64, 784
# Create parameters
w1 = torch.randn(784, 256)
b1 = torch.randn(256)
w2 = torch.randn(256, 10)
b2 = torch.randn(10)
h = activation(, w1) + b1)
out =, w2) + b2
y = softmax(out)
4. Fourth Implementation (using nn)
input : 784
hidden : 256
output : 10
- hidden layer : sigmoid
- output : softmaxfrom torch import nn
class Network(nn.Module):
def __init__(self):
# Inputs to hidden layer linear transformation
self.hidden = nn.Linear(784, 256)
# Output layer, 10 units - one for each digit
self.output = nn.Linear(256, 10)
# Define sigmoid activation and softmax output
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
# Pass the input tensor through each of our operations
x = self.hidden(x)
x = self.sigmoid(x)
x = self.output(x)
x = self.softmax(x)
return x
# Create the network and look at it's text representation
model = Network()
5. Fifth Implementation (using Functional)
input : 784
hidden : 256
output : 10
- hidden layer : sigmoid
- output : softmaximport torch.nn.functional as F
from torch import nn
class Network(nn.Module):
def __init__(self):
# Inputs to hidden layer linear transformation
self.hidden = nn.Linear(784, 256)
# Output layer, 10 units - one for each digit
self.output = nn.Linear(256, 10)
def forward(self, x):
# Hidden layer with sigmoid activation
x = F.sigmoid(self.hidden(x))
# Output layer with softmax activation
x = F.softmax(self.output(x), dim=1)
return x
# Create the network and look at it's text representation
model = Network()
# TODO::You should put the data here
data = None
ps = model.forward(data)
6. Sixth Implementation (using Sequential)
input : 784
hidden : 256
output : 10
- hidden layer : sigmoid
- output : softmaxfrom torch import nn
# Hyperparameters for our network
input_size = 784
hidden_sizes = [128, 64]
output_size = 10
# Build a feed-forward network
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
nn.Linear(hidden_sizes[0], hidden_sizes[1]),
nn.Linear(hidden_sizes[1], output_size),
# Forward pass through the network and display output
# TODO::You should put the data here
data = None
ps = model.forward(data)
# Sequential + orderDict
from torch import nn
from collections import OrderedDict
model = nn.Sequential(OrderedDict([
('fc1', nn.Linear(input_size, hidden_sizes[0])),
('relu1', nn.ReLU()),
('fc2', nn.Linear(hidden_sizes[0], hidden_sizes[1])),
('relu2', nn.ReLU()),
('output', nn.Linear(hidden_sizes[1], output_size)),
('softmax', nn.Softmax(dim=1))]))
# Forward pass through the network and display output
# TODO::You should put the data here
data = None
ps = model.forward(data)
B. Losses and optimize
1. Loses number
# Build a feed-forward network model = nn.Sequential(nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, 10), nn.LogSoftmax(dim=1)) #criterion = nn.CrossEntropyLoss() criterion = nn.NLLLoss() # TODO:: put the data data = None logps = model(data) loss = criterion(logps, labels)
print('Before backward pass: \n', model[0].weight.grad)
print('After backward pass: \n', model[0].weight.grad
2. Optimizer
from torch import optim
from torch import nn
model = nn.Sequential(nn.Linear(784, 128),
nn.Linear(128, 64),
nn.Linear(64, 10),
# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(model.parameters(), lr=0.01)
print('Initial weights - ', model[0].weight)
images, labels = next(iter(trainloader))
images.resize_(64, 784)
# Clear the gradients, do this because gradients are accumulated
# Forward pass, then backward pass, then update weights
output = model.forward(images)
loss = criterion(output, labels)
print('Gradient -', model[0].weight.grad)
# Take an update step and few the new weights
print('Updated weights - ', model[0].weight)
C. Train
1. Train
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
# Download and load the training data
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader =, batch_size=64, shuffle=True)
## Your solution here
model = nn.Sequential(nn.Linear(784, 128),
nn.Linear(128, 64),
nn.Linear(64, 10),
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)
epochs = 5
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
# Flatten MNIST images into a 784 long vector
images = images.view(images.shape[0], -1)
# TODO: Training pass
output = model.forward(images)
loss = criterion(output, labels)
running_loss += loss.item()
print("Training loss: {}".format(running_loss/len(trainloader)))
%matplotlib inline
import helper
images, labels = next(iter(trainloader))
img = images[0].view(1, 784)
# Turn off gradients to speed up this part
with torch.no_grad():
logits = model.forward(img)
# Output of the network are logits, need to take softmax for probabilities
ps = F.softmax(logits, dim=1)
helper.view_classify(img.view(1, 28, 28), ps)
Sumber: link
2. Train + validation
3. Train + validation +dropout
D. Model post processing
4. Save and Load model
Save model:
Load model:
Informasi tentang arsitektur di NN harus di simpan. Hal itu memudahkan kita jika ingin melakukan perubahan arsitektur nya
Save model:
import torch
from torchvision import datasets, transforms
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# Download and load the training data
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
trainloader =, batch_size=64, shuffle=True)
# Download and load the test data
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
testloader =, batch_size=64, shuffle=True)
from torch import nn, optim
import torch.nn.functional as F
class Classifier(nn.Module):
def __init__(self):
self.fc1 = nn.Linear(784, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 64)
self.fc4 = nn.Linear(64, 10)
def forward(self, x):
# make sure input tensor is flattened
x = x.view(x.shape[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.log_softmax(self.fc4(x), dim=1)
return x
model = Classifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)
epochs = 30
steps = 0
train_losses, test_losses = [], []
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
log_ps = model(images)
loss = criterion(log_ps, labels)
running_loss += loss.item()
test_loss = 0
accuracy = 0
# Turn off gradients for validation, saves memory and computations
with torch.no_grad():
for images, labels in testloader:
log_ps = model(images)
test_loss += criterion(log_ps, labels)
ps = torch.exp(log_ps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
print("Epoch: {}/{}.. ".format(e+1, epochs),
"Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),
"Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
"Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
class Classifier(nn.Module): def __init__(self): super().__init__() self.fc1 = nn.Linear(784, 256) self.fc2 = nn.Linear(256, 128) self.fc3 = nn.Linear(128, 64) self.fc4 = nn.Linear(64, 10) # Dropout module with 0.2 drop probability self.dropout = nn.Dropout(p=0.2) def forward(self, x): # make sure input tensor is flattened x = x.view(x.shape[0], -1) # Now with dropout x = self.dropout(F.relu(self.fc1(x))) x = self.dropout(F.relu(self.fc2(x))) x = self.dropout(F.relu(self.fc3(x))) # output so no dropout here x = F.log_softmax(self.fc4(x), dim=1) return x
model = Classifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)
epochs = 30
steps = 0
train_losses, test_losses = [], []
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
log_ps = model(images)
loss = criterion(log_ps, labels)
running_loss += loss.item()
test_loss = 0
accuracy = 0
# Turn off gradients for validation, saves memory and computations
with torch.no_grad():
for images, labels in testloader:
log_ps = model(images)
test_loss += criterion(log_ps, labels)
ps = torch.exp(log_ps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
print("Epoch: {}/{}.. ".format(e+1, epochs),
"Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),
"Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
"Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
checkpoint = {'input_size': 784,
'output_size': 10,
'hidden_layers': [each.out_features for each in model.hidden_layers],
'state_dict': model.state_dict()}, 'checkpoint.pth')
Load model:
def load_checkpoint(filepath):
checkpoint = torch.load(filepath)
model = fc_model.Network(checkpoint['input_size'],
return model
model = load_checkpoint('checkpoint.pth')
E. Transfer Learning
Disini kita belajar unutk mencoba menggunakan pre-train network yang yang sudah jadi dan digunakan sebagai modal awal untuk kasus khusus kita. Kita menggunakan Network nya bukan menggunakan training setnya.
pada contoh dibawah, kita menggunakan pre-train model dari ImageNet untuk klasifikasi gambar. Akhirnya kita coba menggunakan Network nya untuk mengklasifikasikan gambar kita menjadi hanya 2 output:
model = models.densenet121(pretrained=True)
# Freeze parameters so we don't backprop through them
for param in model.parameters():
param.requires_grad = False
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([
('fc1', nn.Linear(1024, 500)),
('relu', nn.ReLU()),
('fc2', nn.Linear(500, 2)),
('output', nn.LogSoftmax(dim=1))
model.classifier = classifier
