import torch as th
import matplotlib.pyplot as plt

# The ReLU, Sigmoid, and Tanh activation functions can be plotted in different panels of a figure as below:

x = th.linspace(-10, 10, 100)
x.requires_grad = True  # Enable gradient tracking for x

# Create a figure with two rows and three columns of subplots
fig, axes = plt.subplots(2, 3, figsize=(12, 8))

# Plot the original functions in the top row
# ReLU
y_relu = th.relu(x)
axes[0, 0].plot(x.detach().numpy(), y_relu.detach().numpy())
axes[0, 0].set_title('ReLU')
axes[0, 0].set_ylim(-1, 10.1)

# Sigmoid
y_sigmoid = th.sigmoid(x)
axes[0, 1].plot(x.detach().numpy(), y_sigmoid.detach().numpy())
axes[0, 1].set_title('Sigmoid')
axes[0, 1].set_ylim(-0.1, 1.1)

# Tanh
y_tanh = th.tanh(x)
axes[0, 2].plot(x.detach().numpy(), y_tanh.detach().numpy())
axes[0, 2].set_title('Tanh')
axes[0, 2].set_ylim(-1.1, 1.1)

# Compute gradients
# Compute gradients using the automatic differentiation provided by PyTorch
grad_relu = th.autograd.grad(y_relu.sum(), x)[0].detach().numpy()
grad_sigmoid = th.autograd.grad(y_sigmoid.sum(), x)[0].detach().numpy()
grad_tanh = th.autograd.grad(y_tanh.sum(), x)[0].detach().numpy()

# Plot the gradients in the bottom row
axes[1, 0].plot(x.detach().numpy(), grad_relu)
axes[1, 0].set_title('ReLU Gradient')
axes[1, 0].set_ylim(-0.1, 1.2)

axes[1, 1].plot(x.detach().numpy(), grad_sigmoid)
axes[1, 1].set_title('Sigmoid Gradient')
axes[1, 1].set_ylim(-0.02, 0.26)  # Adjusted y-axis limits for sigmoid gradient

axes[1, 2].plot(x.detach().numpy(), grad_tanh)
axes[1, 2].set_title('Tanh Gradient')
axes[1, 2].set_ylim(-0.1, 1.1)

plt.tight_layout()

plt.show()

# MNIST digits classification with a simple deep neural network

import numpy as np
import matplotlib.pyplot as plt
import torch as th

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torchvision import datasets, transforms

# Load the data from torchvision

# Define the transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Download the data
train_ds = datasets.MNIST('data', train=True, 
            download=True, transform=transform)
test_ds = datasets.MNIST('data', train=False,
            download=True, transform=transform)

# Create the data loaders
batch_size = 64 # Define the batch size
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

# Define the model
model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 512),
    nn.ReLU(),
    nn.Linear(512, 1024),
    nn.ReLU(),
    nn.Linear(1024, 10)
)

# Define the loss function
loss_fn = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = th.optim.SGD(model.parameters(), lr=1e-4)

# Train the model
for epoch in range(100):
    print(epoch)
    # Randomly coose a mini-batch of data samples
    for xb, yb in train_dl:
        # Forward pass
        y_pred = model(xb.view(-1, 784))
        loss = loss_fn(y_pred, yb)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    # Print the loss
    print(f'Epoch {epoch+1}, loss: {loss.item():.4f}')

# Evaluate the model on the test set
def evaluate(model, test_dl):
    model.eval()
    correct = 0
    for xb, yb in test_dl:
        y_pred = model(xb.view(-1, 784))
        _, preds = th.max(y_pred, dim=1)
        correct += accuracy_score(yb, preds)
    return correct / len(test_dl)

print(f'Test accuracy: {evaluate(model, test_dl):.4f}')

0
Epoch 1, loss: 2.2655
1
Epoch 2, loss: 2.2440
2
Epoch 3, loss: 2.2269
3
Epoch 4, loss: 2.2115
4
Epoch 5, loss: 2.1360
5
Epoch 6, loss: 2.1052
6
Epoch 7, loss: 2.0069
7
Epoch 8, loss: 1.9794
8
Epoch 9, loss: 1.8881
9
Epoch 10, loss: 1.8433
10
Epoch 11, loss: 1.7294
11
Epoch 12, loss: 1.6692
12
Epoch 13, loss: 1.6743
13
Epoch 14, loss: 1.4617
14
Epoch 15, loss: 1.3049
15
Epoch 16, loss: 1.5321
16
Epoch 17, loss: 1.4860
17
Epoch 18, loss: 1.1417
18
Epoch 19, loss: 1.3915
19
Epoch 20, loss: 1.1111
20
Epoch 21, loss: 1.0564
21
Epoch 22, loss: 0.9029
22
Epoch 23, loss: 0.9960
23
Epoch 24, loss: 0.8241
24
Epoch 25, loss: 0.7931
25
Epoch 26, loss: 0.8374
26
Epoch 27, loss: 0.9556
27
Epoch 28, loss: 0.7198
28
Epoch 29, loss: 0.7623
29
Epoch 30, loss: 0.9868
30
Epoch 31, loss: 0.6667
31
Epoch 32, loss: 0.8076
32
Epoch 33, loss: 0.7017
33
Epoch 34, loss: 0.4743
34
Epoch 35, loss: 0.9481
35
Epoch 36, loss: 0.4997
36
Epoch 37, loss: 0.5927
37
Epoch 38, loss: 0.4575
38
Epoch 39, loss: 0.5152
39
Epoch 40, loss: 0.3891
40
Epoch 41, loss: 0.5633
41
Epoch 42, loss: 0.4361
42
Epoch 43, loss: 0.4209
43
Epoch 44, loss: 0.5948
44
Epoch 45, loss: 0.4152
45
Epoch 46, loss: 0.5346
46
Epoch 47, loss: 0.3340
47
Epoch 48, loss: 0.4292
48
Epoch 49, loss: 0.2602
49
Epoch 50, loss: 0.5033
Test accuracy: 0.8802

Neuron (Perceptron)¶

Neural Network (Multilayer Perceptron)¶

Training a Neural Network: Backpropagation¶

Stochastic Gradient Descent¶