Rozpoznávání číslic pomocí neuronových sítí#

!pip install -r requirements.txt

import matplotlib.pyplot as plt
import numpy as np
import torch
# Make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device
'cpu'
#device = 'cpu'  # Force CPU for this example
torch.device(device)
device(type='cpu')

Příprava dat#

from torchvision.datasets import MNIST

dataset = MNIST(root = 'datasets/', download = True)
print(len(dataset))
100.0%
100.0%
100.0%
100.0%
60000

N = 64
plt.figure(figsize=(6, 8))
for i in range(N):
    a = int(np.sqrt(N))
    plt.subplot(a, a, i + 1)
    image, label = dataset[i]
    #image = np.array(image)
    #image = image.reshape((28, 28))
    plt.axis('off')
    plt.imshow(image, cmap = 'gray')
    plt.title(f'label: {label}')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.suptitle('MNIST Dataset Samples', fontsize=16)
plt.show()
../_images/3866cc90231b168a5f6227765555771ceedfa51fb76fe9ad79b54bde05975e4f.png
test_fraction = 0.2

Ntrain = int(len(dataset) * (1 - test_fraction))
print(f'Train size: {Ntrain}, Test size: {len(dataset) - Ntrain}')

X_train, y_train = dataset.data[:Ntrain], dataset.targets[:Ntrain]
X_test, y_test = dataset.data[Ntrain:], dataset.targets[Ntrain:]
Train size: 48000, Test size: 12000
# Scale images to the [0, 1] range
X_train = X_train.float() / 255
X_test = X_test.float() / 255

X_train = X_train.unsqueeze(1)  # Add channel dimension
X_test = X_test.unsqueeze(1)    # Add channel dimension

def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return torch.eye(num_classes)[y].float()

num_classes = 10

# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
X_train.shape
torch.Size([48000, 1, 28, 28])
y_train[0], y_train.shape
(tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]), torch.Size([48000, 10]))

Konstrukce modelu#

MLP model#

import torch.nn as nn
import torch.optim as optim

input_size = (1, 28, 28)  # (channels, height, width)

hidden_size = 64  # Example hidden layer size

num_classes = 10

# Define the CNN model
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(input_size[-1]**2, hidden_size),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(hidden_size, 20),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(20, num_classes),
    #nn.ReLU(),
    #nn.Softmax(dim=0)  # Softmax for multi-class classification
).to(device)
print(model)

# loss function and optimizer
loss_fn = nn.CrossEntropyLoss().to(device)  # cross entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)
Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=64, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.2, inplace=False)
  (4): Linear(in_features=64, out_features=20, bias=True)
  (5): ReLU()
  (6): Dropout(p=0.2, inplace=False)
  (7): Linear(in_features=20, out_features=10, bias=True)
)

CNN model#

torch.cuda.empty_cache()
import torch.nn as nn
import torch.optim as optim

input_size = (1, 28, 28)  # (channels, height, width)

kernel_size = 5
padding = kernel_size // 2  # Padding to keep the output size same as input size

channels_1 = 64
channels_2 = 128

kernel_size_pool = 3
stride_pool = 3

num_classes = 10

# Define the CNN model
model = nn.Sequential(
    nn.Conv2d(kernel_size=kernel_size, in_channels=1, out_channels=channels_1, padding=padding),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=kernel_size_pool, stride=stride_pool),
    nn.Dropout(0.4),
    nn.Conv2d(kernel_size=kernel_size, in_channels=channels_1, out_channels=channels_2, padding=padding),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=kernel_size_pool, stride=stride_pool),
    nn.Flatten(),  # Flatten the output for the fully connected layer
    nn.Dropout(0.4),
    nn.Linear((input_size[-1]//stride_pool**2)**2*channels_2, num_classes),
    #nn.ReLU(),
    #nn.Linear(40, 10),
    #nn.Softmax(dim=0)  # Softmax for multi-class classification
).to(device)
print(model)

# loss function and optimizer
loss_fn = nn.CrossEntropyLoss().to(device)  # cross entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)
Sequential(
  (0): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (3): Dropout(p=0.4, inplace=False)
  (4): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (5): ReLU()
  (6): MaxPool2d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
  (7): Flatten(start_dim=1, end_dim=-1)
  (8): Dropout(p=0.4, inplace=False)
  (9): Linear(in_features=1152, out_features=10, bias=True)
)
from torchsummary import summary

summary(model, input_size=input_size, device=device)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 64, 28, 28]           1,664
              ReLU-2           [-1, 64, 28, 28]               0
         MaxPool2d-3             [-1, 64, 9, 9]               0
           Dropout-4             [-1, 64, 9, 9]               0
            Conv2d-5            [-1, 128, 9, 9]         204,928
              ReLU-6            [-1, 128, 9, 9]               0
         MaxPool2d-7            [-1, 128, 3, 3]               0
           Flatten-8                 [-1, 1152]               0
           Dropout-9                 [-1, 1152]               0
           Linear-10                   [-1, 10]          11,530
================================================================
Total params: 218,122
Trainable params: 218,122
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.03
Params size (MB): 0.83
Estimated Total Size (MB): 1.86
----------------------------------------------------------------

Trénování neuronové sítě#

To keep track of how long training takes I am adding a timeSince(timestamp) function which returns a human readable string:

import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    y_true = torch.argmax(y_true, dim=1)  # Convert one-hot encoded labels to class indices
    y_pred = torch.argmax(y_pred, dim=1)  # Convert model outputs to class indices
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc
n_epochs = 20    # number of epochs to run
batch_size = 256  # size of each batch
batches_per_epoch = len(X_train) // batch_size

loss_array = []  # to store loss values
acc_array = []  # to store accuracy values
test_acc_array = []  # to store test accuracy values

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

start_time = time.time()

# Build training and evaluation loop
for epoch in range(n_epochs):
    ### Training
    model.train()
    
    for i in range(batches_per_epoch):    
        start = i * batch_size
        # take a batch
        Xbatch = X_train[start:start+batch_size]
        ybatch = y_train[start:start+batch_size]
        # forward pass
        y_pred = model(Xbatch)
        #print(y_pred.shape, y_pred[0,:])
        loss = loss_fn(y_pred, ybatch)
        acc = accuracy_fn(ybatch, y_pred) 
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
        
        loss_array.append(loss.item())
        acc_array.append(acc)
        
        print(f'Epoch {epoch + 1}/{n_epochs}, Batch {i + 1}/{batches_per_epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}%', end='\r')    
        
    ### Testing
    model.eval()
    with torch.inference_mode():
        # 1. Forward pass
        idx = torch.randperm(X_test.shape[0])[:batch_size]
        y_pred = model(X_test[idx]).squeeze()
        # 2. Caculate loss/accuracy
        test_loss = loss_fn(y_pred, y_test[idx])
        test_acc = accuracy_fn(y_test[idx], y_pred)
        
        test_acc_array.append(test_acc)

        # Print out what's happening every epoch
        print(f"{timeSince(start_time)} | Epoch: {epoch + 1} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")
        
Epoch 1/20, Batch 73/187 | Loss: 0.12768, Accuracy: 96.88%
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[22], line 26
     24 ybatch = y_train[start:start+batch_size]
     25 # forward pass
---> 26 y_pred = model(Xbatch)
     27 #print(y_pred.shape, y_pred[0,:])
     28 loss = loss_fn(y_pred, ybatch)

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\torch\nn\modules\module.py:1751, in Module._wrapped_call_impl(self, *args, **kwargs)
   1749     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1750 else:
-> 1751     return self._call_impl(*args, **kwargs)

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\torch\nn\modules\module.py:1762, in Module._call_impl(self, *args, **kwargs)
   1757 # If we don't have any hooks, we want to skip the rest of the logic in
   1758 # this function, and just call forward.
   1759 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1760         or _global_backward_pre_hooks or _global_backward_hooks
   1761         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1762     return forward_call(*args, **kwargs)
   1764 result = None
   1765 called_always_called_hooks = set()

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\torch\nn\modules\container.py:240, in Sequential.forward(self, input)
    238 def forward(self, input):
    239     for module in self:
--> 240         input = module(input)
    241     return input

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\torch\nn\modules\module.py:1751, in Module._wrapped_call_impl(self, *args, **kwargs)
   1749     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1750 else:
-> 1751     return self._call_impl(*args, **kwargs)

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\torch\nn\modules\module.py:1762, in Module._call_impl(self, *args, **kwargs)
   1757 # If we don't have any hooks, we want to skip the rest of the logic in
   1758 # this function, and just call forward.
   1759 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1760         or _global_backward_pre_hooks or _global_backward_hooks
   1761         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1762     return forward_call(*args, **kwargs)
   1764 result = None
   1765 called_always_called_hooks = set()

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\torch\nn\modules\pooling.py:213, in MaxPool2d.forward(self, input)
    212 def forward(self, input: Tensor):
--> 213     return F.max_pool2d(
    214         input,
    215         self.kernel_size,
    216         self.stride,
    217         self.padding,
    218         self.dilation,
    219         ceil_mode=self.ceil_mode,
    220         return_indices=self.return_indices,
    221     )

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\torch\_jit_internal.py:622, in boolean_dispatch.<locals>.fn(*args, **kwargs)
    620     return if_true(*args, **kwargs)
    621 else:
--> 622     return if_false(*args, **kwargs)

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\torch\nn\functional.py:830, in _max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices)
    828 if stride is None:
    829     stride = torch.jit.annotate(list[int], [])
--> 830 return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)

KeyboardInterrupt: 
def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0)) 
    r_avg = (cumsum[N:] - cumsum[:-N]) / float(N)
    res = np.zeros(len(x))
    res[N-1:] = r_avg
    for i in range(N-1):
        res[i] = (cumsum[i+1]) / float(i+1)
    return res

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(loss_array, label='Training Loss')
N_avg = 50  # Number of batches to average over
plt.plot(running_mean(loss_array, N_avg), '-', label=f'Running mean ({N_avg} batches)')
plt.title('Training Loss')
plt.xlabel('Batch Number')
plt.ylabel('Loss')
plt.yscale('log')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(acc_array, label='Train Accuracy')
plt.plot((np.arange(n_epochs)+1)*batches_per_epoch, test_acc_array, '.r', label='Test Accuracy')
plt.legend()
plt.title('Accuracy')
plt.xlabel('Batch Number')
plt.ylabel('Accuracy (%)')
plt.tight_layout()
plt.savefig('loss_accuracy.png')
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[67], line 22
     20 plt.subplot(1, 2, 2)
     21 plt.plot(acc_array, label='Train Accuracy')
---> 22 plt.plot((np.arange(n_epochs)+1)*batches_per_epoch, test_acc_array, '.r', label='Test Accuracy')
     23 plt.legend()
     24 plt.title('Accuracy')

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\matplotlib\pyplot.py:3838, in plot(scalex, scaley, data, *args, **kwargs)
   3830 @_copy_docstring_and_deprecators(Axes.plot)
   3831 def plot(
   3832     *args: float | ArrayLike | str,
   (...)
   3836     **kwargs,
   3837 ) -> list[Line2D]:
-> 3838     return gca().plot(
   3839         *args,
   3840         scalex=scalex,
   3841         scaley=scaley,
   3842         **({"data": data} if data is not None else {}),
   3843         **kwargs,
   3844     )

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\matplotlib\axes\_axes.py:1777, in Axes.plot(self, scalex, scaley, data, *args, **kwargs)
   1534 """
   1535 Plot y versus x as lines and/or markers.
   1536 
   (...)
   1774 (``'green'``) or hex strings (``'#008000'``).
   1775 """
   1776 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
-> 1777 lines = [*self._get_lines(self, *args, data=data, **kwargs)]
   1778 for line in lines:
   1779     self.add_line(line)

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\matplotlib\axes\_base.py:297, in _process_plot_var_args.__call__(self, axes, data, return_kwargs, *args, **kwargs)
    295     this += args[0],
    296     args = args[1:]
--> 297 yield from self._plot_args(
    298     axes, this, kwargs, ambiguous_fmt_datakey=ambiguous_fmt_datakey,
    299     return_kwargs=return_kwargs
    300 )

File c:\Users\jiral\Home\TydenVedy\TVJBook\.venv\lib\site-packages\matplotlib\axes\_base.py:494, in _process_plot_var_args._plot_args(self, axes, tup, kwargs, return_kwargs, ambiguous_fmt_datakey)
    491     axes.yaxis.update_units(y)
    493 if x.shape[0] != y.shape[0]:
--> 494     raise ValueError(f"x and y must have same first dimension, but "
    495                      f"have shapes {x.shape} and {y.shape}")
    496 if x.ndim > 2 or y.ndim > 2:
    497     raise ValueError(f"x and y can be no greater than 2D, but have "
    498                      f"shapes {x.shape} and {y.shape}")

ValueError: x and y must have same first dimension, but have shapes (20,) and (0,)
../_images/bb980ea4036c9ce8228f11ae9a4ab475248e508d96e50a445ec710633f040968.png

Ohodnocení modelu#

# Make predictions
correct = 0

model.eval()
# Evaluete model using batch_size, to minimize GPU RAM usage !
with torch.inference_mode():
    for i in range(len(X_test) // batch_size):
        y_preds = model(X_test[i*batch_size:i*batch_size+batch_size])  # Apply softmax to get probabilities
        y_pred_nums = np.argmax(y_preds.cpu(), axis=1)
        correct += np.sum((y_pred_nums == np.argmax(y_test[i*batch_size:i*batch_size+batch_size].cpu(), axis=1)).numpy())  # compare with true labels
    i = len(X_test) // batch_size
    y_preds = model(X_test[i*batch_size:])  # Apply softmax to get probabilities
    y_pred_nums = np.argmax(y_preds.cpu(), axis=1)
    correct += np.sum((y_pred_nums == np.argmax(y_test[i*batch_size:].cpu(), axis=1)).numpy())  # compare with true labels
    
print(f'Accuracy: {correct / len(y_test) * 100:.2f}%')
Accuracy: 97.07%
y_preds = model(X_test[:500])
N = 64
plt.figure(figsize=(6, 8))
for i in range(N):
    a = int(np.sqrt(N))
    plt.subplot(a, a, i + 1)
    image = X_test[i].cpu().squeeze().numpy()  # Remove channel dimension
    label = np.argmax(y_preds[i].cpu().detach().numpy())
    plt.axis('off')
    plt.imshow(image, cmap = 'gray')
    plt.title(f'label: {label}')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.suptitle('MNIST Predictions', fontsize=16)
plt.savefig('predicted.png')
plt.show()
../_images/ece22c636cc571fac5f7ec32046eeafdee873a1dea80f9c745b4e11c4dcd5a25.png
import datetime

dt = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
print(dt)

import os
os.makedirs('results', exist_ok=True)

torch.save(model.state_dict(), f'results/model_mnist_cnn_{dt}.pth')
20250624095837
#model.load_state_dict(torch.load('results/model_mnist_cnn_20250621162737.pth'))

Vizualizace#

!pip install torchview

from torchviz import make_dot
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin/'

y_pred = model(X_train[0:1])  # Get predictions for the first training sample

make_dot(y_pred, params=dict(list(model.named_parameters()))).render("cnn_torchviz", format="png")
'cnn_torchviz.png'
import torchvision
from torchview import draw_graph

model_graph = draw_graph(model, input_size=(1,1,28,28), expand_nested=True, device=device)
graph = model_graph.visual_graph
graph_svg = graph.pipe(format='svg') # convert to binary data
with open('graph.svg', 'wb') as f:
    f.write(graph_svg)
(process:42512): Pango-WARNING **: 11:26:13.667: couldn't load font "Linux libertine Not-Rotated 10", falling back to "Sans Not-Rotated 10", expect ugly output.

Váhy konvolučních vrstev#

weight_list = []

for m in model.modules():
    if isinstance(m, nn.Conv2d):
        print(m.weight.shape)
        weight_list.append(m.weight)
torch.Size([64, 1, 5, 5])
torch.Size([128, 64, 5, 5])
cnv1_weights = weight_list[0]
plt.figure(figsize=(8, 8))
filter_count = np.minimum(cnv1_weights.shape[0], 4)
for i in range(filter_count):
    plt.subplot(filter_count, 1, i + 1)
    plt.imshow(cnv1_weights[i].cpu().squeeze().detach().numpy(), cmap='gray')
    plt.axis('off')
    plt.title(f'Filter {i + 1}')
plt.tight_layout()
../_images/cfb781fe61476cf4b8ef79b7d45fa95014b142ae93fda13adf9443b7ca174fa5.png
cnv2_weights = weight_list[1]
plt.figure(figsize=(12, 8))
filter_count1 = np.minimum(cnv2_weights.shape[0], 4)
filter_count2 = np.minimum(cnv2_weights.shape[1], 8)
for i in range(filter_count1):
    for j in range(filter_count2):
        plt.subplot(filter_count1, filter_count2, i*filter_count2 + j + 1)
        plt.imshow(cnv2_weights[i,j].cpu().squeeze().detach().numpy(), cmap='gray')
        plt.axis('off')
        plt.title(f'Filter ({i + 1}, {j + 1})')
plt.tight_layout()
../_images/66db4291a503ade20bf2db757e9c289b509d8fea5bd9f562f2ce54c6e1b70750.png

Aktivační hodnoty v neuronech#

idx = 47 #58

plt.figure(figsize=(3, 4))
image = X_test[idx].cpu().squeeze().numpy()  # Remove channel dimension
label = np.argmax(y_preds[idx].cpu().detach().numpy())
plt.axis('off')
plt.imshow(image, cmap = 'gray')
plt.title(f'label: {label}')
#plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.tight_layout()
#plt.suptitle('MNIST Predictions', fontsize=16)
plt.show()
../_images/fda05e25f7096bcc9bdd35339ca05c081f702b6009cf087f090e5ee41e52af39.png
conv2d_layers = [m for m in model.modules() if isinstance(m, nn.Conv2d)]

activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

conv2d_layers[0].register_forward_hook(get_activation('cnv1'))
conv2d_layers[1].register_forward_hook(get_activation('cnv2'))

y_pred = model(X_test[idx:idx+1].to(device))
num_pred = torch.argmax(y_pred, dim=1).detach().cpu().numpy().item()
print(f'Predicted label: {num_pred}, True label: {y_test[idx].argmax().item()}')

print(activation['cnv1'].shape)
print(activation['cnv2'].shape)
Predicted label: 4, True label: 4
torch.Size([1, 64, 28, 28])
torch.Size([1, 128, 9, 9])
plt.figure(figsize=(8, 3))
filter_count = np.minimum(activation['cnv1'].shape[1], 4)
for i in range(filter_count):
    plt.subplot(1, filter_count, i + 1)
    plt.imshow(activation['cnv1'][0, i].cpu().squeeze().numpy(), cmap='gray')
    plt.axis('off')
    plt.title(f'Filter {i + 1}')
plt.tight_layout()
plt.suptitle('Activation Maps for cnv2', fontsize=16);
../_images/7d8bd7517f7c1dc43ae5b71cfc95a4f31be72c40359e750a42f672ce653d7c62.png
plt.figure(figsize=(16, 3))
filter_count = np.minimum(activation['cnv2'].shape[1], 8)
for i in range(filter_count):
    plt.subplot(1, filter_count, i + 1)
    plt.imshow(activation['cnv2'][0, i].cpu().squeeze().numpy(), cmap='gray')
    plt.axis('off')
    plt.title(f'Filter {i + 1}')
plt.tight_layout()
plt.suptitle('Activation Maps for cnv2', fontsize=16);
../_images/2c06077308c42c4e5176b9151852e169768e7d6ca893dd0b2573e52d9c3741f8.png

#

Analýza aktivací pro jednotlivé číslice#

activation['cnv1'].cpu().numpy().shape[1:]  # Get the shape of the activation map for cnv1
(64, 28, 28)
activ_cnv1_stats = np.zeros((*activation['cnv1'].cpu().numpy().shape[1:], 10))  # (filters_shape, num_classes)
activ_cnv2_stats = np.zeros((*activation['cnv2'].cpu().numpy().shape[1:], 10))  # (filters_shape, num_classes)

for i in range(X_test.shape[0]):
    y_pred = model(X_test[i:i+1].to(device))
    num_pred = torch.argmax(y_pred, dim=1).detach().cpu().numpy().item()
    if num_pred == y_test[i].argmax().item():
        # Add to statistics
        activ_cnv1_stats[:,:,:,num_pred] += activation['cnv1'].cpu().numpy().squeeze()
        activ_cnv2_stats[:,:,:,num_pred] += activation['cnv2'].cpu().numpy().squeeze()
        
for i in range(num_classes):
    activ_cnv1_stats[:,:,:,i] /= np.sum(y_test.cpu().numpy()[:, i])  # Normalize by the number of samples for each class
    activ_cnv2_stats[:,:,:,i] /= np.sum(y_test.cpu().numpy()[:, i])  # Normalize by the number of samples for each class
activ_cnv1_stats.shape
(64, 28, 28, 10)
activ_cnv1_num = np.argmax(activ_cnv1_stats, axis=-1)  # Get the index of the maximum value along the last axis (num_classes)
#print(activ_cnv1_num[1, :, :])

import matplotlib

cmap = plt.get_cmap('tab10', 10)  # Use a colormap with 10 distinct colors

# define the bins and normalize
bounds = np.arange(0, 11)-0.5
norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)

fig = plt.figure(figsize=(12, 4))
filter_count = np.minimum(activ_cnv1_num.shape[0], 4)
for i in range(filter_count):
    plt.subplot(1, filter_count, i + 1)
    im = plt.imshow(activ_cnv1_num[i], cmap=cmap, norm=norm)
    plt.axis('off')
    plt.title(f'Filter {i + 1}')
cbar_ax = fig.add_axes([0.92, 0.2, 0.02, 0.65])
fig.colorbar(im, cax=cbar_ax, ticks=np.arange(11))
plt.suptitle('Most Activated Class for cnv1 Filters', fontsize=16)
#plt.tight_layout(rect=[0, 0.03, 0.9, 0.95])
plt.show()
../_images/5eaaf491abacce8c67dc632f66575c6548775924b1afeb640e75dff147cb7478.png
activ_cnv2_num = np.argmax(activ_cnv2_stats, axis=-1)  # Get the index of the maximum value along the last axis (num_classes)

fig = plt.figure(figsize=(18, 5))
filter_count = np.minimum(activ_cnv2_num.shape[0], 4)
for i in range(filter_count):
    plt.subplot(1, filter_count, i + 1)
    im = plt.imshow(activ_cnv2_num[i], cmap=cmap, norm=norm)
    plt.axis('off')
    plt.title(f'Filter {i + 1}')
cbar_ax = fig.add_axes([0.92, 0.2, 0.015, 0.65])
fig.colorbar(im, cax=cbar_ax, ticks=np.arange(11))
plt.suptitle('Most Activated Class for cnv2 Filters', fontsize=16)
#plt.tight_layout(rect=[0, 0.03, 0.9, 0.95])
plt.show()
../_images/24633e232b2c45c3c1ab54027269299cd9aea46fbe4f0cfc6ada500c823dccd7.png

Chybné klasifikace#

y_test.shape
torch.Size([12000, 10])
def draw_output(idx_nums):
    plt.figure(figsize = (20, 20))
    plt.xticks( range(10) )
    x = np.ceil(np.sqrt(len(idx_nums)))
    cnt = 1
    for ph in idx_nums:
        plt.subplot(x, x, cnt)
        curr_photo = y_test[ph]
        
        plt.xlim(0, 10)
        plt.title("Digit: {0}\n idx: {1} ".format(np.argmax(y_test[ph]), ph), fontsize = 10) 
        plt.bar(range(10), y_pred[ph])
        
        cnt += 1
y_test_cpu = y_test.cpu().detach().numpy()
y_pred = model(X_test.to(device))
y_pred_cpu = y_pred.cpu().detach().numpy()

idx_noteq = np.argmax(y_test_cpu, axis=1) != np.argmax(y_pred_cpu, axis=1)
print(idx_noteq.sum(), "samples are misclassified")

cnt_error = np.argmax(y_test_cpu[idx_noteq], axis=1)
cnt_error = np.unique(cnt_error, return_counts = True)

plt.figure(figsize = (15, 7))
plt.title("Error distribution", fontsize=20)
plt.bar(cnt_error[0], cnt_error[1], color=plt.get_cmap('tab10').colors)
plt.xticks(np.arange(10))
plt.show()
83 samples are misclassified
../_images/95aea2727fab476803c6ea7d4b5aced649aa6015f36f44785495254801b4a59a.png
cnt_ind = 1
list_idx = []
fig = plt.figure(figsize=(8, 8))

# Find indices of misclassified samples
misclassified_indices = np.where(np.argmax(y_test_cpu, axis=1) != np.argmax(y_pred_cpu, axis=1))[0]

# Randomly select 64 misclassified indices (or fewer if not enough)
num_to_plot = np.minimum(36, len(misclassified_indices))
#np.random.seed(42)
selected_indices = np.random.choice(misclassified_indices, size=num_to_plot, replace=False)

for i, idx in enumerate(selected_indices, 1):
    plt.subplot(6, 6, i)
    plt.imshow(X_test[idx, 0, :, :].cpu().numpy(), cmap='gray', interpolation='none')
    true_label = np.argmax(y_test_cpu[idx])
    pred_label = np.argmax(y_pred_cpu[idx])
    plt.title(f'True:{true_label}, Predicted:{pred_label}\nidx={idx}', fontsize=8)
    plt.axis('off')
plt.suptitle('Selected Misclassified Samples', fontsize=16)
plt.tight_layout()
../_images/0e6e7caed049b9e5dd729142d519dc0e9c8259eaacb91d94ff8a25cecae32345.png

Zdroje#

https://www.kaggle.com/code/jedrzejdudzicz/mnist-dataset-100-accuracy