diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..15fef1351 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +recognition/SiameseNetwork/__pycache__/dataset.cpython-311.pyc +recognition/SiameseNetwork/__pycache__/modules.cpython-311.pyc +recognition/s4627382_SiameseNetwork/__pycache__/dataset.cpython-311.pyc +recognition/s4627382_SiameseNetwork/__pycache__/modules.cpython-311.pyc +recognition/s4627382_SiameseNetwork/SiameseNet.pth +test/dataset.py +test/modules.py +test/train.py +recognition/s4627382_SiameseNetwork/knn.pkl diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..a6735e59a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.analysis.typeCheckingMode": "off" +} \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 4a064f841..000000000 --- a/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Pattern Analysis -Pattern Analysis of various datasets by COMP3710 students at the University of Queensland. - -We create pattern recognition and image processing library for Tensorflow (TF), PyTorch or JAX. - -This library is created and maintained by The University of Queensland [COMP3710](https://my.uq.edu.au/programs-courses/course.html?course_code=comp3710) students. - -The library includes the following implemented in Tensorflow: -* fractals -* recognition problems - -In the recognition folder, you will find many recognition problems solved including: -* OASIS brain segmentation -* Classification -etc. diff --git a/recognition/README.md b/recognition/README.md deleted file mode 100644 index 5c646231c..000000000 --- a/recognition/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Recognition Tasks -Various recognition tasks solved in deep learning frameworks. - -Tasks may include: -* Image Segmentation -* Object detection -* Graph node classification -* Image super resolution -* Disease classification -* Generative modelling with StyleGAN and Stable Diffusion diff --git a/recognition/s4627382_SiameseNetwork/Images/AD_sample.jpeg b/recognition/s4627382_SiameseNetwork/Images/AD_sample.jpeg new file mode 100644 index 000000000..70dd9a5f8 Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/AD_sample.jpeg differ diff --git a/recognition/s4627382_SiameseNetwork/Images/NC_sample.jpeg b/recognition/s4627382_SiameseNetwork/Images/NC_sample.jpeg new file mode 100644 index 000000000..6792673a1 Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/NC_sample.jpeg differ diff --git a/recognition/s4627382_SiameseNetwork/Images/SiameseNet.png b/recognition/s4627382_SiameseNetwork/Images/SiameseNet.png new file mode 100644 index 000000000..f160f344c Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/SiameseNet.png differ diff --git a/recognition/s4627382_SiameseNetwork/Images/ep1.png b/recognition/s4627382_SiameseNetwork/Images/ep1.png new file mode 100644 index 000000000..b627678b8 Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/ep1.png differ diff --git a/recognition/s4627382_SiameseNetwork/Images/ep3.png b/recognition/s4627382_SiameseNetwork/Images/ep3.png new file mode 100644 index 000000000..edfaafc83 Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/ep3.png differ diff --git a/recognition/s4627382_SiameseNetwork/Images/ep5.png b/recognition/s4627382_SiameseNetwork/Images/ep5.png new file mode 100644 index 000000000..df47f9cae Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/ep5.png differ diff --git a/recognition/s4627382_SiameseNetwork/Images/ep7.png b/recognition/s4627382_SiameseNetwork/Images/ep7.png new file mode 100644 index 000000000..7bae2117a Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/ep7.png differ diff --git a/recognition/s4627382_SiameseNetwork/Images/ep9.png b/recognition/s4627382_SiameseNetwork/Images/ep9.png new file mode 100644 index 000000000..80fece35a Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/ep9.png differ diff --git a/recognition/s4627382_SiameseNetwork/Images/input_image.png b/recognition/s4627382_SiameseNetwork/Images/input_image.png new file mode 100644 index 000000000..ef256a41a Binary files /dev/null and b/recognition/s4627382_SiameseNetwork/Images/input_image.png differ diff --git a/recognition/s4627382_SiameseNetwork/README.md b/recognition/s4627382_SiameseNetwork/README.md new file mode 100644 index 000000000..f1eaea9af --- /dev/null +++ b/recognition/s4627382_SiameseNetwork/README.md @@ -0,0 +1,153 @@ +# KNN classifier based on siamese network embedding +Liang Kaige s4627382 +## Introduction +The purpose of this project is to construct a [Siamese network](#siamese-network) and use its embedding to train a [knn classifier](#k-nearest-neighbour-classifier) to classify the [Alzheimer's Disease Neuroimaging Initiative (ADNI)](#adni-dataset) brain dataset. + +### ADNI Dataset +The ADNI dataset that use in here comprises 30,520 MRI brain slice in total. Of these, 14,860 images are associated with Alzheimer’s disease (AD), while 15,660 images correspond to cognitively normal (NC) conditions. +![AD sample](Images/AD_sample.jpeg) ![NC sample](Images/NC_sample.jpeg). +AD sample and NC sample + +### Siamese Network +A Siamese network is a distance-based neural network. It consists of two weight-shared subnetworks and a designated loss function. The network takes two images as inputs, and then pass through their corresponding subnetworks for feature extraction. These subnetworks produce two flattened layers, called embeddings, which are then fed into the loss function. +![Siamese Network Architecture](Images/SiameseNet.png). + +In this project, contrastive loss will be used. The definition of contrastive loss is $$L(x_1, x_2, y) = (1 - y) \times \frac{1} {2} D^2 + y \times \frac {1} {2} max(0, m - D)^2$$ where $y$ is label, $D$ is distance and $m$ is margin. +When the distance between two inputs are smaller than margin, they will be considered as similar (y = 0), dissimilar otherwise (y = 1). This loss function will pull similar samples closer to each other while push dissimilar samples away. + +### K Nearest Neighbour classifier +The knn classifier utilizes the embeddings from the Siamese network as its dataset. It predicts the label of new sample based on the majority vote from its k nearest neighbors. + +## Training process +### Data loading +The data images should save in a folder with following stracture: +- AD_NC + - Train + - AD + - images + - NC + - images + - Test + - AD + - images + - NC + - images + +```python +def load_data(train_folder_path, train_ad_path, train_nc_path, test_ad_path, test_nc_path, batch_size=batch_size): + # calculate mean and std for train set + mean, std = compute_mean_std(train_folder_path) + + # define transform + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((mean,), (std,)) + ]) + + # create dataset + train_set = CustomDataset(ad_dir=train_ad_path, nc_dir=train_nc_path, transform=transform, validate=False, split_ratio=0.8) + validation_set = CustomDataset(ad_dir=train_ad_path, nc_dir=train_nc_path, transform=transform, validate=True, split_ratio=0.8) + test_set = CustomDataset(ad_dir=test_ad_path, nc_dir=test_nc_path, transform=transform, validate=False, split_ratio=1) + + # create dataloader + train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) + validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False) + test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False) + + return train_loader, validation_loader, test_loader +``` +The function above defines how to load the data. It uses **Train** folder to get all images inside and calculate the mean and standard deviation. Then it splits the training data into two parts, one part (80%) for train, and another part (20%) for validate. This will keep test data totally unknown to prevent overfitting. + +The data will be maked into 4 pairs, with label 0 when they are considered as similar and 1 otherwise. +(ad, ad, 0), (nc, nc, 0), (ad, nc, 1), (nc, ad, 1) +These four group of data will be evenly selected, but the image will be randomly choose from their belonging class. + +### Model training +The embedding model is a simple convolutional neural network, which accepts images in the size 256*240 as input and return a flatten layer with size in 256. +```python +class Embedding(nn.Module): + def __init__(self): + super(Embedding, self).__init__() + self.conv = nn.Sequential( + + nn.Conv2d(1, 32, kernel_size=3, padding=1), + nn.BatchNorm2d(32), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 256*240 -> 128*120 + + nn.Conv2d(32, 64, kernel_size=3, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 128*120 -> 64*60 + + nn.Conv2d(64, 64, kernel_size=3, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 64*60 -> 32*30 + ) + + self.fc = nn.Sequential( + nn.Linear(64*32*30, 512), + nn.ReLU(inplace=True), + + nn.Linear(512, 256), + ) + + def forward(self, x): + out = self.conv(x) + out = out.view(out.size()[0], -1) + out = self.fc(out) + return out +``` +After training, the siamese net and knn model will be saved to the given directory, but the trained model will not be uploaded to github. + +I trained the model in 10 epochs with the margin of 1, and draw the embedding visulization in every two epochs here are the accuracies and the losses. +Epoch [1/10], Loss: 0.8536, Accuracy: 0.5056, validate loss: 0.1353, validate accuracy: 0.5180 +![embedding visulization in epoch 1](Images/ep1.png). +Epoch [2/10], Loss: 0.2237, Accuracy: 0.5203, validate loss: 0.1375, validate accuracy: 0.4966 +Epoch [3/10], Loss: 0.1366, Accuracy: 0.5546, validate loss: 0.1245, validate accuracy: 0.5851 +![embedding visulization in epoch 3](Images/ep3.png). +Epoch [4/10], Loss: 0.1373, Accuracy: 0.6181, validate loss: 0.1189, validate accuracy: 0.6135 +Epoch [5/10], Loss: 0.1001, Accuracy: 0.7056, validate loss: 0.1202, validate accuracy: 0.6541 +![embedding visulization in epoch 5](Images/ep5.png). +Epoch [6/10], Loss: 0.1335, Accuracy: 0.6911, validate loss: 0.1147, validate accuracy: 0.6692 +Epoch [7/10], Loss: 0.0730, Accuracy: 0.7964, validate loss: 0.1131, validate accuracy: 0.6788 +![embedding visulization in epoch 7](Images/ep7.png). +Epoch [8/10], Loss: 0.0565, Accuracy: 0.8521, validate loss: 0.1001, validate accuracy: 0.7147 +Epoch [9/10], Loss: 0.0654, Accuracy: 0.8594, validate loss: 0.1076, validate accuracy: 0.6868 +![embedding visulization in epoch 9](Images/ep9.png). +Epoch [10/10], Loss: 0.0594, Accuracy: 0.8644, validate loss: 0.1047, validate accuracy: 0.7055 +KNN Accuracy: 0.4992 + +As can see, the model start overfitting in about epoch 7. The data tend to separate to two parts but finally become a squiggle. However, the two classes are still evenly mixed together and has no sign to seperate apart. I tried tons of different hyperparameters (different combinations of learing rate, margin and epochs), different model (tried many different way to construct embedding network), and different loss function (triplet loss, contrastive loss), but they all perform similarly. I also did normalize the data, and use hard sample mining, semi-hard sample mining, but they are all not work well. However, I have done everything I could to improve the model, but with little success, so this is my final version. + +### Prediction +```python +predict_image(image_path) +``` +This function takes a path of image as the input and it will return a predict value. +Here is the example usage: +```python +image_path = "D:/Study/MLDataSet/AD_NC/test/AD/388206_78.jpeg" +predicted_label = predict_image(image_path) +display_image(image_path) +print(f"Predicted label: {predicted_label}, Ture label: AD") +``` + +The return is: + +Predicted label: ad, Ture label: ad + +![Input image](Images/input_image.png). + +## Dependencies +- python 3.11.5 +- pytorch 2.1.0 py3.11_cuda11.8_cudnn8_0 pytorch +- torchvision 0.16.0 +- matplotlib 3.7.2 +- PIL 10.0.1 +- sklearn 1.2.2 + + +## Reference +Khandelwal, R. (2021, January 28). One Shot Learning with Siamese Network. The Startup. Medium. Retrieved from https://medium.com/swlh/one-shot-learning-with-siamese-network-1c7404c35fda diff --git a/recognition/s4627382_SiameseNetwork/dataset.py b/recognition/s4627382_SiameseNetwork/dataset.py new file mode 100644 index 000000000..58517ed9f --- /dev/null +++ b/recognition/s4627382_SiameseNetwork/dataset.py @@ -0,0 +1,132 @@ +import os +import random +import torch +import torchvision.transforms as transforms +from torch.utils.data import Dataset, DataLoader +from PIL import Image + +device = torch.device('cuda') +batch_size = 32 + +class CustomDataset(Dataset): + def __init__(self, ad_dir, nc_dir, transform=None, validate=False, split_ratio=0.8): + # get the file path + self.ad_folder = ad_dir + self.nc_folder = nc_dir + + # get the samples' name + self.ad_names = os.listdir(ad_dir) + self.nc_names = os.listdir(nc_dir) + + # define the transform + self.transform = transform + + # splite data to train set and validation set + total_ad_samples = len(self.ad_names) + split_ad_samples = int(total_ad_samples * split_ratio) + total_nc_samples = len(self.nc_names) + split_nc_samples = int(total_nc_samples * split_ratio) + + if validate: + self.ad_names = self.ad_names[split_ad_samples:] + self.nc_names = self.nc_names[split_nc_samples:] + else: + self.ad_names = self.ad_names[:split_ad_samples] + self.nc_names = self.nc_names[:split_nc_samples] + + + def __len__(self): + return 2 * min(len(self.ad_names), len(self.nc_names)) + + def __getitem__(self, index): + # Depending on the index, choose the type of pair + pair_type = index % 4 + + if pair_type == 0: # (ad, ad, 0) + img1_path = os.path.join(self.ad_folder, random.choice(self.ad_names)) + img2_path = os.path.join(self.ad_folder, random.choice(self.ad_names)) + label = 0 + elif pair_type == 1: # (nc, nc, 0) + img1_path = os.path.join(self.nc_folder, random.choice(self.nc_names)) + img2_path = os.path.join(self.nc_folder, random.choice(self.nc_names)) + label = 0 + elif pair_type == 2: # (ad, nc, 1) + img1_path = os.path.join(self.ad_folder, random.choice(self.ad_names)) + img2_path = os.path.join(self.nc_folder, random.choice(self.nc_names)) + label = 1 + else: # (nc, ad, 1) + img1_path = os.path.join(self.nc_folder, random.choice(self.nc_names)) + img2_path = os.path.join(self.ad_folder, random.choice(self.ad_names)) + label = 1 + + # open images + with Image.open(img1_path) as img1, Image.open(img2_path) as img2: + # apply transformation + if self.transform: + img1 = self.transform(img1) + img2 = self.transform(img2) + + return img1, img2, torch.tensor(label, dtype=torch.float32) + + +# calculate the mean and std of the dataset +# input: The folder containing folders containing images +# outupt: mean and std of all images across all subfolders +def compute_mean_std(img_folder): + # get subfolders + subfolders = [dir for dir in os.listdir(img_folder) if os.path.isdir(os.path.join(img_folder, dir))] + + # transformer + transform = transforms.Compose([ + # transform image from numpy.ndarray to tensor + # and normalize pixels from 0~255 to 0~1 + transforms.ToTensor() + ]) + + num_px = torch.tensor(0, dtype=torch.float64) + sum_px = torch.tensor(0, dtype=torch.float64) + sum_px_sq = torch.tensor(0, dtype=torch.float64) + + for subfolder in subfolders: + subfolder_path = os.path.join(img_folder, subfolder) + img_names = os.listdir(subfolder_path) + + for img_name in img_names: + # open the image and put them into GPU + img_path = os.path.join(subfolder_path, img_name) + img = Image.open(img_path) + img_tensor = transform(img) + img.close() + + num_px += img_tensor.numel() # get the # of px + sum_px += torch.sum(img_tensor) + sum_px_sq += torch.sum(img_tensor ** 2) + + # calculate mean and std for all images across all subfolders + mean = sum_px / num_px + std = torch.sqrt((sum_px_sq / num_px) - (mean ** 2)) + + print("mean: ", mean.item(), "std: ", std.item()) + return mean.item(), std.item() + +def load_data(train_folder_path, train_ad_path, train_nc_path, test_ad_path, test_nc_path, batch_size=batch_size): + # calculate mean and std for train set + mean, std = compute_mean_std(train_folder_path) + + # define transform + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((mean,), (std,)) + ]) + + # create dataset + train_set = CustomDataset(ad_dir=train_ad_path, nc_dir=train_nc_path, transform=transform, validate=False, split_ratio=0.8) + validation_set = CustomDataset(ad_dir=train_ad_path, nc_dir=train_nc_path, transform=transform, validate=True, split_ratio=0.8) + test_set = CustomDataset(ad_dir=test_ad_path, nc_dir=test_nc_path, transform=transform, validate=False, split_ratio=1) + + # create dataloader + train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) + validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=False) + test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False) + + return train_loader, validation_loader, test_loader \ No newline at end of file diff --git a/recognition/s4627382_SiameseNetwork/modules.py b/recognition/s4627382_SiameseNetwork/modules.py new file mode 100644 index 000000000..28d878571 --- /dev/null +++ b/recognition/s4627382_SiameseNetwork/modules.py @@ -0,0 +1,126 @@ +# containing the source code of the components of your model. +# Each component must be implementated as a class or a function + +import torch +import torch.nn as nn +import pickle +from sklearn.neighbors import KNeighborsClassifier +from sklearn.metrics import accuracy_score +device = torch.device('cuda') + +# Build CNN network and get its embedding vector +class Embedding(nn.Module): + def __init__(self): + super(Embedding, self).__init__() + self.conv = nn.Sequential( + + nn.Conv2d(1, 32, kernel_size=3, padding=1), + nn.BatchNorm2d(32), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 256*240 -> 128*120 + + nn.Conv2d(32, 64, kernel_size=3, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 128*120 -> 64*60 + + nn.Conv2d(64, 64, kernel_size=3, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # size: 64*60 -> 32*30 + ) + + self.fc = nn.Sequential( + nn.Linear(64*32*30, 512), + nn.ReLU(inplace=True), + + nn.Linear(512, 256), + ) + + def forward(self, x): + out = self.conv(x) + out = out.view(out.size()[0], -1) + out = self.fc(out) + return out + + +# construct the triplet loss +# formular: L = (1 - y) * 1/2 * D^2 + y * 1/2 * max(0, m - D)^2 +# where D = sample distance, m = margin, y = label, same: label = 0; diff, label = 1 +class ContrastiveLoss(nn.Module): + def __init__(self, margin=0.5): + super(ContrastiveLoss, self).__init__() + self.margin = margin + + def forward(self, img1, img2, label): + # calculate euclidean distance + distance = (img1 - img2).pow(2).sum(1).sqrt() + + # calculate loss, use relu to ensure loss are non-negative + loss_same = (1 - label) * 0.5 * (distance ** 2) + loss_diff = label * 0.5 * torch.relu(self.margin - distance).pow(2) + loss = loss_same + loss_diff + + return loss.mean() + + +# get the trained embedding network +def extract_embeddings(loader, model): + model.eval() + embeddings = [] + labels_list = [] + + with torch.no_grad(): + for img1, img2, labels in loader: + img1, img2, labels = img1.to(device), img2.to(device), labels.to(device) + + emb1 = model.get_embedding(img1) + emb2 = model.get_embedding(img2) + + embeddings.append(emb1.cpu()) + embeddings.append(emb2.cpu()) + + labels_list.extend(labels.cpu().numpy()) + labels_list.extend(labels.cpu().numpy()) + + embeddings = torch.cat(embeddings, dim=0) + return embeddings, labels_list + + +# construct the siamese network +class SiameseNet(nn.Module): + def __init__(self, embedding): + super(SiameseNet, self).__init__() + self.embedding = embedding + + def forward(self, img1, img2): + emb1 = self.embedding(img1) + emb2 = self.embedding(img2) + return emb1, emb2 + + def get_embedding(self, x): + return self.embedding(x) + + +# use embedding net to train knn clasifier +def knn(train_loader, val_loader, model, n_neighbors=5): + + # Extract embeddings from the train set + train_embeddings, train_labels = extract_embeddings(train_loader, model) + + # Train a KNN classifier + knn = KNeighborsClassifier(n_neighbors=n_neighbors) + knn.fit(train_embeddings, train_labels) + + # Extract embeddings from the validation set + val_embeddings, val_labels = extract_embeddings(val_loader, model) + + # Predict the labels of the validation set + test_preds = knn.predict(val_embeddings) + + # Calculate the accuracy + accuracy = accuracy_score(val_labels, test_preds) + print(f"KNN Accuracy: {accuracy:.4f}") + with open ("D:/Study/GitHubDTClone/COMP3710A3/PatternAnalysis-2023/recognition/s4627382_SiameseNetwork/knn.pkl", "wb") as f: + pickle.dump(knn, f) + return accuracy \ No newline at end of file diff --git a/recognition/s4627382_SiameseNetwork/predict.py b/recognition/s4627382_SiameseNetwork/predict.py new file mode 100644 index 000000000..34d389e6e --- /dev/null +++ b/recognition/s4627382_SiameseNetwork/predict.py @@ -0,0 +1,62 @@ +# showing example usage of your trained model. +# Print out any results and / or provide visualisations where applicable + +import modules +import torch +import pickle +import torchvision.transforms as transforms +import matplotlib.image as mpimg +import matplotlib.pyplot as plt +from PIL import Image + +device = torch.device('cuda') + +def predict_image(image_path): + # load the image + image = Image.open(image_path) + + # trainsform the data + transform = transforms.Compose([ + transforms.ToTensor(), + ]) + + # move to gpu + image_tensor = transform(image).unsqueeze(0).to(device) + + # define and load model + embeddingNet = modules.Embedding() + model = modules.SiameseNet(embeddingNet) + model = model.to(device) + model.load_state_dict(torch.load("D:/Study/GitHubDTClone/COMP3710A3/PatternAnalysis-2023/recognition/s4627382_SiameseNetwork/SiameseNet.pth")) + with open("D:/Study/GitHubDTClone/COMP3710A3/PatternAnalysis-2023/recognition/s4627382_SiameseNetwork/knn.pkl", "rb") as f: + knn = pickle.load(f) + + # extract embedding + embedding = model.get_embedding(image_tensor) + embedding_numpy = embedding.detach().cpu().numpy() + + # predict using KNN + prediction = knn.predict(embedding_numpy) + + # convert prediction to 'ad' or 'nc' + label_map = {0: 'ad', 1: 'nc'} + predicted_label = label_map[prediction[0]] + + return predicted_label + + +# display given image +def display_image(image_path): + # load image from given path + img = mpimg.imread(image_path) + + # plot img + plt.imshow(img) + plt.axis("off") + plt.show + +# Example usage: +image_path = "D:/Study/MLDataSet/AD_NC/test/AD/388206_78.jpeg" +predicted_label = predict_image(image_path) +display_image(image_path) +print(f"Predicted label: {predicted_label}, Ture label: ad") diff --git a/recognition/s4627382_SiameseNetwork/train.py b/recognition/s4627382_SiameseNetwork/train.py new file mode 100644 index 000000000..418242923 --- /dev/null +++ b/recognition/s4627382_SiameseNetwork/train.py @@ -0,0 +1,171 @@ +import dataset, modules +import torch +import torch.optim as optim +import matplotlib.pyplot as plt +from sklearn.manifold import TSNE + +device = torch.device('cuda') + +# data path +train_folder_path = "D:/Study/MLDataSet/AD_NC/train" +train_ad_path = "D:/Study/MLDataSet/AD_NC/train/AD" +train_nc_path = "D:/Study/MLDataSet/AD_NC/train/NC" +test_ad_path = "D:/Study/MLDataSet/AD_NC/test/AD" +test_nc_path = "D:/Study/MLDataSet/AD_NC/test/NC" + +margin = 1 +epoches = 10 + +# create data loader +train_loader, validation_loader, test_loader = dataset.load_data( + train_folder_path, train_ad_path, train_nc_path, test_ad_path, test_nc_path, batch_size=32) + +# define models +embbeding = modules.Embedding() +model = modules.SiameseNet(embbeding) +model.to(device) + +# define loss function +criterion = modules.ContrastiveLoss(margin) +optimizer = optim.Adam(model.parameters(), lr=0.001) + +def train(train_loader, epoches): + for epoch in range(epoches): + # set model to train mode + model.train() + total_loss = 0 + total_accuracy = 0 + total_samples = 0 + + for img1, img2, labels in train_loader: + # move data to gpu + img1, img2, labels = img1.to(device), img2.to(device), labels.to(device) + + # front propagation + emb1, emb2 = model(img1, img2) + loss = criterion(emb1, emb2, labels) + + # calculate accuracy + batch_accuracy = calculate_accuracy(emb1, emb2, labels) + total_loss += loss.item() * img1.size(0) + total_accuracy += batch_accuracy * img1.size(0) + total_samples += img1.size(0) + + # back propagation + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # calculate accuracy and loss + avg_accuracy = total_accuracy / total_samples + avg_loss = total_loss / total_samples + + validate_loss, validate_accuracy = validate(validation_loader) + + print(f"Epoch [{epoch+1}/{epoches}], Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.4f}, validate loss: {validate_loss:.4f}, validate accuracy: {validate_accuracy:.4f}") + + if epoch % 2 == 0: + visualize_embeddings(train_loader, model) + + # save the model + torch.save(model.state_dict(), + "D:/Study/GitHubDTClone/COMP3710A3/PatternAnalysis-2023/recognition/s4627382_SiameseNetwork/SiameseNet.pth") + print("Model saved") + +def validate(validation_loader): + # set model to evaluation mode + model.eval() + total_loss = 0 + total_accuracy = 0 + + with torch.no_grad(): + for img1, img2, labels in validation_loader: + # move data to gpu + img1, img2, labels = img1.to(device), img2.to(device), labels.to(device) + + # front propagation + emb1, emb2 = model(img1, img2) + loss = criterion(emb1, emb2, labels) + + # get loss and accuracy + total_loss += loss.item() + total_accuracy += calculate_accuracy(emb1, emb2, labels) + + # calculate average loss and average accuracy + validate_loss = total_loss/len(validation_loader) + validate_accuracy = total_accuracy/len(validation_loader) + + return validate_loss, validate_accuracy + + +# calculate accuracy, +# if distance < threshold, these two samples will be considered same +def calculate_accuracy(img1, img2, labels, threshold=0.5): + + # calculate the distance between two samples + distance = (img1 - img2).pow(2).sum(1).sqrt() + + # Predict similarity: 0 for same (distance < threshold), 1 for diff + predicts = (distance >= threshold).float() + + # Calculate accuracy by comparing predictions to labels + correct = (predicts == labels).float() + accuracy = correct.sum().item() / len(labels) + + return accuracy + +def visualize_embeddings(loader, model, num_samples=300): + model.eval() + embeddings = [] + labels_list = [] + + # Define label to color mapping + label_to_color = {0: 'red', 1: 'blue'} + + with torch.no_grad(): + for i, (img1, img2, labels) in enumerate(loader): + if i * loader.batch_size > num_samples: + break + + img1, img2 = img1.to(device), img2.to(device) + + emb1 = model.get_embedding(img1) + emb2 = model.get_embedding(img2) + + embeddings.append(emb1.cpu()) + embeddings.append(emb2.cpu()) + + labels_list.extend(labels.cpu().numpy()) + labels_list.extend(labels.cpu().numpy()) + + # Convert labels to colors + color_labels = [label_to_color[label] for label in labels_list] + + embeddings = torch.cat(embeddings, dim=0) + tsne = TSNE(n_components=2, random_state=42) + tsne_results = tsne.fit_transform(embeddings) + + plt.figure(figsize=(10, 7)) + plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=color_labels, s=50, alpha=0.6) + plt.title("Embedding visualization") + plt.show() + + +def main(): + mode = 0 + if mode == 0: + print("Training") + train(train_loader, epoches) + modules.knn(train_loader, validation_loader, model, n_neighbors=5) + + if mode == 1: + print("Train classifier") + model.load_state_dict(torch.load("D:/Study/GitHubDTClone/COMP3710A3/PatternAnalysis-2023/recognition/s4627382_SiameseNetwork/SiameseNet.pth")) + modules.knn(train_loader, validation_loader, model, n_neighbors=5) + + elif mode == 1: + print("Testing") + + +if __name__ == "__main__": + main() \ No newline at end of file