shakes76 · CMWL4501 · Sep 19, 2023 · Sep 19, 2023 · Sep 19, 2023 · Sep 19, 2023
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+recognition/SiameseNetwork/__pycache__/dataset.cpython-311.pyc
+recognition/SiameseNetwork/__pycache__/modules.cpython-311.pyc
+recognition/s4627382_SiameseNetwork/__pycache__/dataset.cpython-311.pyc
+recognition/s4627382_SiameseNetwork/__pycache__/modules.cpython-311.pyc
+recognition/s4627382_SiameseNetwork/SiameseNet.pth
+test/dataset.py
+test/modules.py
+test/train.py
+recognition/s4627382_SiameseNetwork/knn.pkl
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.analysis.typeCheckingMode": "off"
+}
diff --git a/README.md b/README.md
diff --git a/recognition/README.md b/recognition/README.md
diff --git a/recognition/s4627382_SiameseNetwork/Images/AD_sample.jpeg b/recognition/s4627382_SiameseNetwork/Images/AD_sample.jpeg
diff --git a/recognition/s4627382_SiameseNetwork/Images/NC_sample.jpeg b/recognition/s4627382_SiameseNetwork/Images/NC_sample.jpeg
diff --git a/recognition/s4627382_SiameseNetwork/Images/SiameseNet.png b/recognition/s4627382_SiameseNetwork/Images/SiameseNet.png
diff --git a/recognition/s4627382_SiameseNetwork/Images/ep1.png b/recognition/s4627382_SiameseNetwork/Images/ep1.png
diff --git a/recognition/s4627382_SiameseNetwork/Images/ep3.png b/recognition/s4627382_SiameseNetwork/Images/ep3.png
diff --git a/recognition/s4627382_SiameseNetwork/Images/ep5.png b/recognition/s4627382_SiameseNetwork/Images/ep5.png
diff --git a/recognition/s4627382_SiameseNetwork/Images/ep7.png b/recognition/s4627382_SiameseNetwork/Images/ep7.png
diff --git a/recognition/s4627382_SiameseNetwork/Images/ep9.png b/recognition/s4627382_SiameseNetwork/Images/ep9.png
diff --git a/recognition/s4627382_SiameseNetwork/Images/input_image.png b/recognition/s4627382_SiameseNetwork/Images/input_image.png
diff --git a/recognition/s4627382_SiameseNetwork/README.md b/recognition/s4627382_SiameseNetwork/README.md
@@ -0,0 +1,153 @@
+# KNN classifier based on siamese network embedding
+Liang Kaige s4627382
+## Introduction
+The purpose of this project is to construct a [Siamese network](#siamese-network) and use its embedding to train a [knn classifier](#k-nearest-neighbour-classifier) to classify the [Alzheimer's Disease Neuroimaging Initiative (ADNI)](#adni-dataset) brain dataset.
+
+### ADNI Dataset
+The ADNI dataset that use in here comprises 30,520 MRI brain slice in total. Of these, 14,860 images are associated with Alzheimer’s disease (AD), while 15,660 images correspond to cognitively normal (NC) conditions.  
+![AD sample](Images/AD_sample.jpeg) ![NC sample](Images/NC_sample.jpeg).  
+AD sample and NC sample
+
+### Siamese Network
+A Siamese network is a distance-based neural network. It consists of two weight-shared subnetworks and a designated loss function. The network takes two images as inputs, and then pass through their corresponding subnetworks for feature extraction. These subnetworks produce two flattened layers, called embeddings, which are then fed into the loss function. 
+![Siamese Network Architecture](Images/SiameseNet.png).
+
+In this project, contrastive loss will be used. The definition of contrastive loss is $$L(x_1, x_2, y) = (1 - y) \times \frac{1} {2} D^2 + y \times \frac {1} {2} max(0, m - D)^2$$ where $y$ is label, $D$ is distance and $m$ is margin.  
+When the distance between two inputs are smaller than margin, they will be considered as similar (y = 0), dissimilar otherwise (y = 1). This loss function will pull similar samples closer to each other while push dissimilar samples away.
+
+### K Nearest Neighbour classifier
+The knn classifier utilizes the embeddings from the Siamese network as its dataset. It predicts the label of new sample based on the majority vote from its k nearest neighbors. 
+
+## Training process
+### Data loading
+The data images should save in a folder with following stracture:  
+- AD_NC
+  - Train
+    - AD
+      - images
+    - NC
+      - images
+  - Test
+    - AD
+      - images
+    - NC
+      - images 
+
+```python
+def load_data(train_folder_path, train_ad_path, train_nc_path, test_ad_path, test_nc_path, batch_size=batch_size):
+    # calculate mean and std for train set
+    mean, std = compute_mean_std(train_folder_path)
+
+    # define transform
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((mean,), (std,))
+    ])
+
+    # create dataset
+    train_set = CustomDataset(ad_dir=train_ad_path, nc_dir=train_nc_path, transform=transform, validate=False, split_ratio=0.8)
+    validation_set = CustomDataset(ad_dir=train_ad_path, nc_dir=train_nc_path, transform=transform, validate=True, split_ratio=0.8)
+    test_set = CustomDataset(ad_dir=test_ad_path, nc_dir=test_nc_path, transform=transform, validate=False, split_ratio=1)
+
+    # create dataloader
+    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
+    validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False)
+    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
+
+    return train_loader, validation_loader, test_loader
+```
+The function above defines how to load the data. It uses **Train** folder to get all images inside and calculate the mean and standard deviation. Then it splits the training data into two parts, one part (80%) for train, and another part (20%) for validate. This will keep test data totally unknown to prevent overfitting.  
+
+The data will be maked into 4 pairs, with label 0 when they are considered as similar and 1 otherwise.  
+(ad, ad, 0), (nc, nc, 0), (ad, nc, 1), (nc, ad, 1)  
+These four group of data will be evenly selected, but the image will be randomly choose from their belonging class.  
+
+### Model training
+The embedding model is a simple convolutional neural network, which accepts images in the size 256*240 as input and return a flatten layer with size in 256.
+```python
+class Embedding(nn.Module):
+    def __init__(self):
+        super(Embedding, self).__init__()
+        self.conv = nn.Sequential(
+
+            nn.Conv2d(1, 32, kernel_size=3, padding=1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 256*240 -> 128*120
+
+            nn.Conv2d(32, 64, kernel_size=3, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 128*120 -> 64*60
+
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 64*60 -> 32*30
+            )
+
+        self.fc = nn.Sequential(
+            nn.Linear(64*32*30, 512),
+            nn.ReLU(inplace=True),
+
+            nn.Linear(512, 256),
+            )
+
+    def forward(self, x):
+        out = self.conv(x)
+        out = out.view(out.size()[0], -1)
+        out = self.fc(out)
+        return out
+```
+After training, the siamese net and knn model will be saved to the given directory, but the trained model will not be uploaded to github.
+
+I trained the model in 10 epochs with the margin of 1, and draw the embedding visulization in every two epochs here are the accuracies and the losses.  
+Epoch [1/10], Loss: 0.8536, Accuracy: 0.5056, validate loss: 0.1353, validate accuracy: 0.5180  
+![embedding visulization in epoch 1](Images/ep1.png).  
+Epoch [2/10], Loss: 0.2237, Accuracy: 0.5203, validate loss: 0.1375, validate accuracy: 0.4966  
+Epoch [3/10], Loss: 0.1366, Accuracy: 0.5546, validate loss: 0.1245, validate accuracy: 0.5851  
+![embedding visulization in epoch 3](Images/ep3.png).  
+Epoch [4/10], Loss: 0.1373, Accuracy: 0.6181, validate loss: 0.1189, validate accuracy: 0.6135  
+Epoch [5/10], Loss: 0.1001, Accuracy: 0.7056, validate loss: 0.1202, validate accuracy: 0.6541  
+![embedding visulization in epoch 5](Images/ep5.png).  
+Epoch [6/10], Loss: 0.1335, Accuracy: 0.6911, validate loss: 0.1147, validate accuracy: 0.6692  
+Epoch [7/10], Loss: 0.0730, Accuracy: 0.7964, validate loss: 0.1131, validate accuracy: 0.6788  
+![embedding visulization in epoch 7](Images/ep7.png).  
+Epoch [8/10], Loss: 0.0565, Accuracy: 0.8521, validate loss: 0.1001, validate accuracy: 0.7147  
+Epoch [9/10], Loss: 0.0654, Accuracy: 0.8594, validate loss: 0.1076, validate accuracy: 0.6868  
+![embedding visulization in epoch 9](Images/ep9.png).  
+Epoch [10/10], Loss: 0.0594, Accuracy: 0.8644, validate loss: 0.1047, validate accuracy: 0.7055  
+KNN Accuracy: 0.4992
+
+As can see, the model start overfitting in about epoch 7. The data tend to separate to two parts but finally become a squiggle. However, the two classes are still evenly mixed together and has no sign to seperate apart. I tried tons of different hyperparameters (different combinations of learing rate, margin and epochs), different model (tried many different way to construct embedding network), and different loss function (triplet loss, contrastive loss), but they all perform similarly. I also did normalize the data, and use hard sample mining, semi-hard sample mining, but they are all not work well. However, I have done everything I could to improve the model, but with little success, so this is my final version.
+
+### Prediction
+```python
+predict_image(image_path)
+```
+This function takes a path of image as the input and it will return a predict value.  
+Here is the example usage:  
+```python
+image_path = "D:/Study/MLDataSet/AD_NC/test/AD/388206_78.jpeg"
+predicted_label = predict_image(image_path)
+display_image(image_path)
+print(f"Predicted label: {predicted_label}, Ture label: AD")
+```
+
+The return is:  
+
+Predicted label: ad, Ture label: ad
+
+![Input image](Images/input_image.png).  
+
+## Dependencies
+- python 3.11.5
+- pytorch 2.1.0 py3.11_cuda11.8_cudnn8_0 pytorch
+- torchvision 0.16.0
+- matplotlib 3.7.2
+- PIL  10.0.1
+- sklearn  1.2.2
+
+
+## Reference
+Khandelwal, R. (2021, January 28). One Shot Learning with Siamese Network. The Startup. Medium. Retrieved from https://medium.com/swlh/one-shot-learning-with-siamese-network-1c7404c35fda
diff --git a/recognition/s4627382_SiameseNetwork/dataset.py b/recognition/s4627382_SiameseNetwork/dataset.py
@@ -0,0 +1,132 @@
+import os
+import random
+import torch
+import torchvision.transforms as transforms
+from torch.utils.data import Dataset, DataLoader
+from PIL import Image
+
+device = torch.device('cuda')
+batch_size = 32
+
+class CustomDataset(Dataset):
+    def __init__(self, ad_dir, nc_dir, transform=None, validate=False, split_ratio=0.8):
+        # get the file path
+        self.ad_folder = ad_dir
+        self.nc_folder = nc_dir
+
+        # get the samples' name
+        self.ad_names = os.listdir(ad_dir)
+        self.nc_names = os.listdir(nc_dir)
+
+        # define the transform
+        self.transform = transform
+
+        # splite data to train set and validation set
+        total_ad_samples = len(self.ad_names)
+        split_ad_samples = int(total_ad_samples * split_ratio)
+        total_nc_samples = len(self.nc_names)
+        split_nc_samples = int(total_nc_samples * split_ratio)
+
+        if validate:
+            self.ad_names = self.ad_names[split_ad_samples:]
+            self.nc_names = self.nc_names[split_nc_samples:]
+        else:
+            self.ad_names = self.ad_names[:split_ad_samples]
+            self.nc_names = self.nc_names[:split_nc_samples]
+
+
+    def __len__(self):
+        return 2 * min(len(self.ad_names), len(self.nc_names))
+
+    def __getitem__(self, index):
+        # Depending on the index, choose the type of pair
+        pair_type = index % 4
+
+        if pair_type == 0:  # (ad, ad, 0)
+            img1_path = os.path.join(self.ad_folder, random.choice(self.ad_names))
+            img2_path = os.path.join(self.ad_folder, random.choice(self.ad_names))
+            label = 0
+        elif pair_type == 1:  # (nc, nc, 0)
+            img1_path = os.path.join(self.nc_folder, random.choice(self.nc_names))
+            img2_path = os.path.join(self.nc_folder, random.choice(self.nc_names))
+            label = 0
+        elif pair_type == 2:  # (ad, nc, 1)
+            img1_path = os.path.join(self.ad_folder, random.choice(self.ad_names))
+            img2_path = os.path.join(self.nc_folder, random.choice(self.nc_names))
+            label = 1
+        else:  # (nc, ad, 1)
+            img1_path = os.path.join(self.nc_folder, random.choice(self.nc_names))
+            img2_path = os.path.join(self.ad_folder, random.choice(self.ad_names))
+            label = 1
+
+        # open images
+        with Image.open(img1_path) as img1, Image.open(img2_path) as img2:
+            # apply transformation
+            if self.transform:
+                img1 = self.transform(img1)
+                img2 = self.transform(img2)
+
+        return img1, img2, torch.tensor(label, dtype=torch.float32)
+
+
+# calculate the mean and std of the dataset
+# input: The folder containing folders containing images
+# outupt: mean and std of all images across all subfolders
+def compute_mean_std(img_folder):
+    # get subfolders
+    subfolders = [dir for dir in os.listdir(img_folder) if os.path.isdir(os.path.join(img_folder, dir))]
+
+    # transformer
+    transform = transforms.Compose([
+        # transform image from numpy.ndarray to tensor
+        # and normalize pixels from 0~255 to 0~1
+        transforms.ToTensor()
+        ])
+
+    num_px = torch.tensor(0, dtype=torch.float64)
+    sum_px = torch.tensor(0, dtype=torch.float64)
+    sum_px_sq = torch.tensor(0, dtype=torch.float64)
+
+    for subfolder in subfolders:
+        subfolder_path = os.path.join(img_folder, subfolder)
+        img_names = os.listdir(subfolder_path)
+
+        for img_name in img_names:
+            # open the image and put them into GPU
+            img_path = os.path.join(subfolder_path, img_name)
+            img = Image.open(img_path)
+            img_tensor = transform(img)
+            img.close()
+
+            num_px += img_tensor.numel()  # get the # of px
+            sum_px += torch.sum(img_tensor)
+            sum_px_sq += torch.sum(img_tensor ** 2)
+
+    # calculate mean and std for all images across all subfolders
+    mean = sum_px / num_px
+    std = torch.sqrt((sum_px_sq / num_px) - (mean ** 2))
+
+    print("mean: ", mean.item(), "std: ", std.item())
+    return mean.item(), std.item()
+
+def load_data(train_folder_path, train_ad_path, train_nc_path, test_ad_path, test_nc_path, batch_size=batch_size):
+    # calculate mean and std for train set
+    mean, std = compute_mean_std(train_folder_path)
+
+    # define transform
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((mean,), (std,))
+    ])
+
+    # create dataset
+    train_set = CustomDataset(ad_dir=train_ad_path, nc_dir=train_nc_path, transform=transform, validate=False, split_ratio=0.8)
+    validation_set = CustomDataset(ad_dir=train_ad_path, nc_dir=train_nc_path, transform=transform, validate=True, split_ratio=0.8)
+    test_set = CustomDataset(ad_dir=test_ad_path, nc_dir=test_nc_path, transform=transform, validate=False, split_ratio=1)
+
+    # create dataloader
+    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
+    validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False)
+    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
+
+    return train_loader, validation_loader, test_loader