## Torch libraries
import torch
import torchvision
import numpy as np


from IPython.display import HTML
HTML('<img src="https://learnopencv.com/wp-content/uploads/2019/05/transfer-learning-1024x574.jpg">')


## Load efficient net b0 weights
efnet_weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

## Set up efficientnet_b0 model using those weights
efnet_model = torchvision.models.efficientnet_b0(weights = efnet_weights)


# print(efnet_model) # Uncomment if curious, the output is very long


## Efficient net preprocessing transforms
print(efnet_weights.transforms())

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


### Read flattened, processed data
import pandas as pd
fash_mnist = pd.read_csv("https://remiller1450.github.io/data/fashion_mnist_train.csv")

## Train-test split
from sklearn.model_selection import train_test_split
train_fash, test_fash = train_test_split(fash_mnist, test_size=0.1, random_state=5)

### Separate the label column (outcome)
train_y = train_fash['y']
train_X = train_fash.drop(['y'], axis=1)
test_y = test_fash['y']
test_X = test_fash.drop(['y'], axis=1)

### Convert to numpy array then reshape to 900 by 28 by 28
mnist_unflattened = train_X.to_numpy()
mnist_unflattened = mnist_unflattened.reshape(900,28,28)

## Convert to tensor
mnist_tensor = torch.from_numpy(mnist_unflattened)
mnist_tensor =  torch.unsqueeze(mnist_tensor, dim=1)

## Transform to proper input shape (duplicate the single color channel to produce 3 channels)
new_mnist_tensors = mnist_tensor.expand(-1, 3, -1, -1)

## Store in DataLoader
from torch.utils.data import DataLoader, TensorDataset
y_tensor = torch.Tensor(train_y)
train_loader = DataLoader(TensorDataset(new_mnist_tensors.type(torch.FloatTensor), y_tensor.type(torch.LongTensor)), batch_size=100)


## Loop through each parameter and set `requires_grad` to false
for param in efnet_model.features.parameters():
    param.requires_grad = False


## Replace the existing "classifier" layer with our own creation
efnet_model.classifier = torch.nn.Sequential( 
    torch.nn.Linear(in_features=1280, out_features=10, bias=True))


## Hyperparms
epochs = 100
lrate = 0.1

## Cost Function
from torch import nn
cost_fn = nn.CrossEntropyLoss()

## Network model
torch.manual_seed(7) # For reproduction purposes (should be minor since only the last layers are randomly intialized)
net = efnet_model

## Optimizer (using ADAM, a more flexible algorithm than SGD this time)
optimizer = torch.optim.Adam(net.parameters(), lr=lrate)

## Initial values for cost tracking
track_cost = np.zeros(epochs)
cur_cost = 0.0

## Loop through the data
for epoch in range(epochs):
    
    cur_cost = 0.0
    correct = 0.0
    
    ## train_loader is iterable and numbers knows the batch
    for i, data in enumerate(train_loader, 0):
        
        ## The input tensor and labels tensor for the current batch
        inputs, labels = data
        
        ## Clear the gradient from the previous batch
        optimizer.zero_grad()
        
        ## Provide the input tensor into the network to get outputs
        outputs = net(inputs)
        
        ## Calculate the cost for the current batch
        ## nn.Softmax is used because net outputs prediction scores and our cost function expects probabilities and labels
        cost = cost_fn(nn.Softmax(dim=1)(outputs), labels)
        
        ## Calculate the gradient
        cost.backward()
        
        ## Update the model parameters using the gradient
        optimizer.step()
        
        ## Track the current cost (accumulating across batches)
        cur_cost += cost.item()
    
    ## Store the accumulated cost at each epoch
    track_cost[epoch] = cur_cost
    # print(f"Epoch: {epoch} Cost: {cur_cost}")


## Print the cost curve
import matplotlib.pyplot as plt
plt.plot(np.linspace(0, epochs, epochs), track_cost)
plt.show()


## Initialize objects for counting correct/total 
correct = 0
total = 0

# Specify no changes to the gradient in the subsequent steps (since we're not using these data for training)
with torch.no_grad():
    for data in train_loader:
        # Current batch of data
        images, labels = data
        
        # pass each batch into the network
        outputs = net(images)
        
        # the class with the maximum score is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        
        # add size of the current batch
        total += labels.size(0)
        
        # add the number of correct predictions in the current batch
        correct += (predicted == labels).sum().item()

## Calculate and print the proportion correct
print(correct/total)

0.7255555555555555


## Start w/ our prior model and "unfreeze" parameters
for param in net.features.parameters():
    param.requires_grad = True


## ## Hyperparms
epochs = 100
lrate = 0.00001

## Cost Function
cost_fn = nn.CrossEntropyLoss()

## Optimizer (using a more flexible algorithm than SGD this time)
optimizer = torch.optim.Adam(net.parameters(), lr=lrate)

## Initial values for cost tracking
import numpy as np
track_cost = np.zeros(epochs)
cur_cost = 0.0

## Loop through the data
for epoch in range(epochs):
    
    cur_cost = 0.0
    correct = 0.0
    
    ## train_loader is iterable and numbers knows the batch
    for i, data in enumerate(train_loader, 0):
        
        ## The input tensor and labels tensor for the current batch
        inputs, labels = data
        
        ## Clear the gradient from the previous batch
        optimizer.zero_grad()
        
        ## Provide the input tensor into the network to get outputs
        outputs = net(inputs)
        
        ## Calculate the cost for the current batch
        ## nn.Softmax is used because net outputs prediction scores and our cost function expects probabilities and labels
        cost = cost_fn(nn.Softmax(dim=1)(outputs), labels)
        
        ## Calculate the gradient
        cost.backward()
        
        ## Update the model parameters using the gradient
        optimizer.step()
        
        ## Track the current cost (accumulating across batches)
        cur_cost += cost.item()
    
    ## Store the accumulated cost at each epoch
    track_cost[epoch] = cur_cost
    # print(f"Epoch: {epoch} Cost: {cur_cost}") ## Uncomment this if you want printed updates


## Initialize objects for counting correct/total 
correct = 0
total = 0

# Specify no changes to the gradient in the subsequent steps (since we're not using these data for training)
with torch.no_grad():
    for data in train_loader:
        # Current batch of data
        images, labels = data
        
        # pass each batch into the network
        outputs = net(images)
        
        # the class with the maximum score is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        
        # add size of the current batch
        total += labels.size(0)
        
        # add the number of correct predictions in the current batch
        correct += (predicted == labels).sum().item()

## Calculate and print the proportion correct
print(correct/total)

0.7988888888888889


## Make test outcomes into a tensor
test_y_tensor = torch.Tensor(test_y.to_numpy())

## Convert to numpy array then reshape
test_unflattened = test_X.to_numpy().reshape(len(test_y),1,28,28)

## Convert test images into a tensor
test_tensor = torch.from_numpy(test_unflattened)

## Expand to have 3 channels
test_tensor = test_tensor.expand(-1, 3, -1, -1)

## Combine X and y tensors into a TensorDataset and DataLoader
test_loader = DataLoader(TensorDataset(test_tensor.type(torch.FloatTensor), 
                                       test_y_tensor.type(torch.LongTensor)), batch_size=100)

## Repeat evaluation loop suing the test data
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(correct/total)

0.51


## Reload the EfficientNet model
efnet_weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
efnet_model = torchvision.models.efficientnet_b0(weights = efnet_weights)

## Freeze parameters
for param in efnet_model.features.parameters():
    param.requires_grad = False

## Set up new classifier layer
efnet_model.classifier = torch.nn.Sequential(torch.nn.Linear(in_features=1280, out_features=10, bias=True))

## Initialize with same random seed as before
torch.manual_seed(7) 
net = efnet_model

## ## Hyperparms
epochs = 100
lrate = 0.1

## Cost Function
from torch import nn
cost_fn = nn.CrossEntropyLoss()

## Optimizer (using a more flexible algorithm than SGD this time)
optimizer = torch.optim.Adam(net.parameters(), lr=lrate)

## Initial values for cost tracking
track_cost = np.zeros(epochs)
track_val_cost  = np.zeros(epochs)
cur_cost = 0.0
val_cur_cost = 0.0

## Loop through the data
for epoch in range(epochs):
    
    cur_cost = 0.0
    val_cur_cost = 0.0
    
    ## train_loader is iterable and numbers knows the batch
    for i, data in enumerate(train_loader, 0):
        
        ## The input tensor and labels tensor for the current batch
        inputs, labels = data
        
        ## Clear the gradient from the previous batch
        optimizer.zero_grad()
        
        ## Provide the input tensor into the network to get outputs
        outputs = net(inputs)
        
        ## Calculate the cost for the current batch
        ## nn.Softmax is used because net outputs prediction scores and our cost function expects probabilities and labels
        cost = cost_fn(nn.Softmax(dim=1)(outputs), labels)
        
        ## Calculate the gradient
        cost.backward()
        
        ## Update the model parameters using the gradient
        optimizer.step()
        
        cur_cost += cost.item()
        
    for i, data in enumerate(test_loader, 0):
        inputs, labels = data
        
        val_outputs = net(inputs)
        val_cost = cost_fn(nn.Softmax(dim=1)(val_outputs), labels)
        val_cur_cost += val_cost.item()
    
    
    ## Store the accumulated cost at each epoch
    track_cost[epoch] = cur_cost
    track_val_cost[epoch] = 9*val_cur_cost  ## Multiplying by 9 puts these on the same scale
   # print(f"Epoch: {epoch} Cost: {cur_cost} Validation Cost: {val_cur_cost}") ## Uncomment this if you want printed updates


## Plot cost curves for both training and validation sets
plt.plot(np.linspace(0, epochs, epochs), np.column_stack((track_cost, track_val_cost)))
plt.show()

Introduction to PyTorch - Part 3 (transfer learning)¶

Part 1 - Introduction¶

Part 2 - Pre-trained Models in PyTorch¶

Part 3 - Transfer Learning via Feature Extraction¶

Part 4 - Training our Feature Extraction Model¶

Part 5 - Transfer Learning via Fine Tuning¶

Part 6 - Application (our final revisiting of cats vs. dogs)¶