diff --git a/recognition/README.md b/recognition/README.md new file mode 100644 index 000000000..5c646231c --- /dev/null +++ b/recognition/README.md @@ -0,0 +1,10 @@ +# Recognition Tasks +Various recognition tasks solved in deep learning frameworks. + +Tasks may include: +* Image Segmentation +* Object detection +* Graph node classification +* Image super resolution +* Disease classification +* Generative modelling with StyleGAN and Stable Diffusion diff --git a/recognition/vision-transformer-4696689/README.md b/recognition/vision-transformer-4696689/README.md new file mode 100644 index 000000000..e836a8439 --- /dev/null +++ b/recognition/vision-transformer-4696689/README.md @@ -0,0 +1,79 @@ +# ADNI brain data classification with Vision Transformer + +## Summary + +Goal of the project is to classify Alzheimer's disease (normal or AD) of the ADNI +brain data using a Vision Transformer. Each sample consists of 20 slices of 240x256 +greyscale image corresponding to a patient, which is to be classified as either NC +or AD. Experiments were also done with data augmentation. + +## How to use + +There is four files, dataset.py, modules.py, train.py, predict.py. The only files which +need to be run are train.py or predict.py. train.py is responsible for training (and +testing) the module, with the option of saving the model as well as the loss and +validation accuracy of each epoch, for use in predict.py. predict.py is able to load +this data and retest the model on any of the dataloaders (train, validation, test) or +graph the loss/accuracy curves with matplotlib. + +Key point: Inside the dataset.py file, there is a directory address for the images +(local). Make sure that these are pointing in the right direction. + +Key point: The save model section of the train.py file is commented. Make sure to +uncomment to use this functionality + +Key point: The test section of the predict.py file is commented. Make sure to uncomment +to use this functionality. + +Key point: Since the dataset is so large, training might need to be done on 4x p100 gpus +(rangpur). + +## Architecture + +The default Vision Transformer upgraded to include a pre-convolutional module, of +which there is two designs. The convolutional layers result in less, smaller patches +so the model is sped up. It is also supposed to introduced inductive bias into the +model. 3D patches are utilised offering massive boosts to speed. Data augmentation is +done by flipping images to result in 4x as much data which is said to be very important +for transformer models. + +![Basic Transformer Model](extra/ViT.png) + +The standard vision transformer works by inputting embeddings of patches of images, along +with a positional encoding, into a transformer model. Only the encoder is used, and +cross entropy loss is used for the classification. Switching the order of normalisation +allows for better propagation of gradient and training stability. If using this patch based +model it is important to use 3D patches for both speed and performance. The later design +used a CNN to instead reduce the image into channels (similar sized to patches) which are +inputted. This further improves speed without impacting performance. + +## Training + +Training is done for 100 epochs which was found experimentally to be long enough. +AdamW optimiser is used with a learning rate of 3e-4, this was decreased from 1e-3 +(which did not train well) but also increased from 1e-4. The data is split into train, +validation and test sets. Majority of the data is in train set, and the validation and +test sets are of equal size. + +Hyperparameter tuning was done manually. Learning rate schedulers eg. cyclic, warm up +were found to be ineffective. A learning rate of 1e-3 didn't permit training, but 1e-4 +was too slow and didn't perform as good as the final 3e-4. The 20 slices for each image +correspond to the patient-level split. + +## Result + +Overall, the test accuracy was 68.0% which is ok. The test accuracy was +the same as the validation accuracy, the latter of which became stable during training. +This was about the same time the loss had rapidly decreased and became stable also. +This could indicate that the model has adapated very well to the training set and is +not generalising. This was the key motivator for data augmentation. However, it could +also indicate that the learning rate is too small and stuck in a local optima. This +is the key motivator for increasing the learnign rate from 1e-4 to 3e-4. + +![Trianing accuracy and epoch](extra/train.png) +![Validation accuracy and epoch](extra/acc.png) +![Training Loss and epoch](extra/loss.png) + +## References + +Dosovitskiy, A. (2021) An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale, Papers with code. Available at: https://paperswithcode.com/paper/an-image-is-worth-16x16-words-transformers-1 (Accessed: 18 November 2023). \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/dataset.py b/recognition/vision-transformer-4696689/dataset.py new file mode 100644 index 000000000..d2a3ea4e5 --- /dev/null +++ b/recognition/vision-transformer-4696689/dataset.py @@ -0,0 +1,126 @@ +""" +Imports Here +""" +"""numpy and torch""" +import numpy as np +import torch + +"""PIL""" +from PIL import Image + +"""torchvision and utils""" +import torchvision.transforms as transforms +from torch.utils.data import DataLoader, Dataset + +"""os""" +import os + +""" +Loading data from local file +""" +"""Assumes images have pixel values in range [0,255]""" +def getImages(trainDIRs, testDIRS): + """Get image to tensor""" + transform = transforms.Compose([ + transforms.PILToTensor() + ]) + hflip = transforms.Compose([ + transforms.RandomHorizontalFlip(p=1.0), + transforms.PILToTensor() + ]) + vflip = transforms.Compose([ + transforms.RandomVerticalFlip(p=1.0), + transforms.PILToTensor() + ]) + dflip = transforms.Compose([ + transforms.RandomHorizontalFlip(p=1.0), + transforms.RandomVerticalFlip(p=1.0), + transforms.PILToTensor() + ]) + tlist = [transform, hflip, vflip, dflip] + """Loading data into arrays""" + xtrain, xtrain, xtest, ytest = [], [], [], [] + """training data""" + size = [0, 0] + for i, DIR in enumerate(trainDIRs): + for t in tlist: + px = [] + j = 0 + for filename in sorted(os.listdir(DIR)): + f = os.path.join(DIR, filename) + img = Image.open(f) + tensor = t(img).float() + tensor.require_grad = True + px.append(tensor/255) + j = (j+1) % 20 + if j == 0: + xtrain.append(torch.stack(px)) + px = [] + size[i] += 1 + xtrain = torch.stack(xtrain) + ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0)) + + """testing data""" + size = [0, 0] + for i, DIR in enumerate(testDIRs): + for t in tlist: + px = [] + j = 0 + for filename in sorted(os.listdir(DIR)): + f = os.path.join(DIR, filename) + img = Image.open(f) + tensor = t(img).float() + tensor.require_grad = True + px.append(tensor/255) + j = (j+1) % 20 + if j == 0: + xtest.append(torch.stack(px)) + px = [] + size[i] += 1 + xtest = torch.stack(xtest) + idx = torch.randperm(xtest.size(0)) + xtest = xtest[idx, :] + splitsize = int(xtest.shape[0]/2) + xval, xtest = xtest.split(splitsize, dim=0) + ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0)) + ytest = ytest[idx] + yval, ytest = ytest.split(splitsize, dim=0) + return xtrain, ytrain, xtest, ytest, xval, yval +""" +Dataloader +""" +class DatasetWrapper(Dataset): + def __init__(self, X, y=None): + self.X, self.y = X, y + + def __len__(self): + return len(self.X) + + def __getitem__(self, idx): + if self.y is None: + return self.X[idx] + else: + return self.X[idx], self.y[idx] + +trainDIRs = ['AD_NC/train/AD/', 'AD_NC/train/NC'] +testDIRs = ['AD_NC/test/AD/', 'AD_NC/test/NC'] +xtrain, ytrain, xtest, ytest, xval, yval = getImages(trainDIRs, testDIRs) +ytrain, ytest = ytrain.type(torch.LongTensor), ytest.type(torch.LongTensor) +xtrain = xtrain.permute(0, 2, 1, 3, 4) +xtest = xtest.permute(0, 2, 1, 3, 4) +xval = xval.permute(0, 2, 1, 3, 4) + +def trainloader(batchsize=16): + return DataLoader(DatasetWrapper(xtrain, ytrain), batch_size=batchsize, shuffle=True, pin_memory=True) + +def valloader(): + return DataLoader(DatasetWrapper(xval, yval), batch_size=1, shuffle=True, pin_memory=True) + +def testloader(): + return DataLoader(DatasetWrapper(xtest, ytest), batch_size=1, shuffle=True, pin_memory=True) + +def trainshape(): + return xtrain.shape + +def testshape(): + return xtest.shape \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/extra/ViT.png b/recognition/vision-transformer-4696689/extra/ViT.png new file mode 100644 index 000000000..4ddbcd305 Binary files /dev/null and b/recognition/vision-transformer-4696689/extra/ViT.png differ diff --git a/recognition/vision-transformer-4696689/extra/acc.png b/recognition/vision-transformer-4696689/extra/acc.png new file mode 100644 index 000000000..0375c2626 Binary files /dev/null and b/recognition/vision-transformer-4696689/extra/acc.png differ diff --git a/recognition/vision-transformer-4696689/extra/conv-block.py b/recognition/vision-transformer-4696689/extra/conv-block.py new file mode 100644 index 000000000..7e27374ee --- /dev/null +++ b/recognition/vision-transformer-4696689/extra/conv-block.py @@ -0,0 +1,37 @@ +""" +Conv v2 +""" +class ConvLayer2(nn.Module): + def __init__(self): + super().__init__() + #pool + self.pool = nn.MaxPool2d(kernel_size=3, stride=2) + self.relu = nn.ReLU() + #first layer + self.conv11_x = nn.Conv2d(20, 48, kernel_size=(11,11), stride=(4,4), padding=(0,0)) + self.conv11_y = nn.Conv2d(240, 48, kernel_size=(11,3), stride=(4,1), padding=(0,0)) + self.conv11_z = nn.Conv2d(256, 48, kernel_size=(3,11), stride=(1,4), padding=(0,0)) + #second layer + self.conv5_x = nn.Conv2d(48, 192, kernel_size=(5,5), stride=(2,2), padding=(0,0)) + self.conv5_y = nn.Conv2d(48, 192, kernel_size=(5,3), stride=(2,1), padding=(0,0)) + self.conv5_z = nn.Conv2d(48, 192, kernel_size=(3,5), stride=(1,2), padding=(0,0)) + #projection + self.l_x = nn.Linear(30, 32) + self.l_y = nn.Linear(12, 32) + self.l_z = nn.Linear(10, 32) + + def forward(self, imgs): + #input N, C, L, W, H + #first layer + x_x = self.relu(self.pool(self.conv11_x(imgs.flatten(1,2)))) + x_y = self.relu(self.pool(self.conv11_y(imgs.permute(0,1,3,4,2).flatten(1,2)))) + x_z = self.relu(self.pool(self.conv11_z(imgs.permute(0,1,4,2,3).flatten(1,2)))) + #second layer + x_x = self.relu(self.pool(self.conv5_x(x_x))) + x_y = self.relu(self.pool(self.conv5_y(x_y))) + x_z = self.relu(self.pool(self.conv5_z(x_z))) + #projection + x_x = self.l_x(x_x.flatten(2,3)) + x_y = self.l_y(x_y.flatten(2,3)) + x_z = self.l_z(x_z.flatten(2,3)) + return torch.cat([x_x, x_y, x_z], dim=2) \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/extra/loss.png b/recognition/vision-transformer-4696689/extra/loss.png new file mode 100644 index 000000000..f1d8fbdc0 Binary files /dev/null and b/recognition/vision-transformer-4696689/extra/loss.png differ diff --git a/recognition/vision-transformer-4696689/extra/parameters.txt b/recognition/vision-transformer-4696689/extra/parameters.txt new file mode 100644 index 000000000..2f7b1c0cf --- /dev/null +++ b/recognition/vision-transformer-4696689/extra/parameters.txt @@ -0,0 +1,20 @@ +AdamW lr=1e-4, 175 epochs, 192, 120, heads=4, embed=360, fflscale=2, nblocks=4 +LOSS = [0.72875, 0.70531, 0.66767, 0.61233, 0.53435, 0.49842, 0.43119, 0.45669, 0.38625, 0.35263, 0.36537, 0.32514, 0.26318, 0.2506, 0.24311, 0.18782, 0.17435, 0.13011, 0.14882, 0.17382, 0.10999, 0.13796, 0.07506, 0.06944, 0.06198, 0.03524, 0.07395, 0.09999, 0.04692, 0.03988, 0.0566, 0.02929, 0.01366, 0.01277, 0.01246, 0.01824, 0.04371, 0.0791, 0.04064, 0.04082, 0.01846, 0.00784, 0.00725, 0.00714, 0.0071, 0.00703, 0.00697, 0.00684, 0.00686, 0.00677, 0.00665, 0.00629, 0.00595, 0.01606, 0.11788, 0.21843, 0.02893, 0.01473, 0.04044, 0.02642, 0.02621, 0.00663, 0.00604, 0.00071, 0.00035, 0.00026, 0.00022, 0.0002, 0.00018, 0.00016, 0.00015, 0.00014, 0.00013, 0.00012, 0.00011, 0.0001, 0.0001, 9e-05, 8e-05, 8e-05, 7e-05, 7e-05, 7e-05, 6e-05, 6e-05, 6e-05, 5e-05, 5e-05, 5e-05, 5e-05, 4e-05, 4e-05, 4e-05, 4e-05, 4e-05, 4e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ACC = [50.67, 51.11, 58.67, 63.11, 57.78, 62.67, 63.56, 66.22, 66.22, 67.11, 66.67, 65.78, 67.56, 65.33, 68.0, 68.44, 67.11, 64.89, 64.89, 67.56, 68.0, 69.33, 67.11, 67.56, 68.0, 67.56, 66.22, 71.11, 69.33, 67.11, 66.67, 69.78, 69.33, 69.78, 69.78, 68.0, 66.67, 68.89, 69.78, 69.78, 68.44, 67.56, 67.11, 67.56, 67.56, 67.56, 68.0, 68.0, 68.0, 68.0, 68.0, 67.56, 67.56, 68.0, 66.22, 70.67, 67.56, 66.67, 68.89, 65.33, 66.67, 70.22, 68.0, 69.78, 68.89, 68.0, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44] + +to plot: +import matplotlib.pyplot as plt +steps = range(175) +plt.plot(steps, LOSS) +plt.ylabel('LOSS') +plt.xlabel('epoch') +plt.show() +plt.plot(steps, ACC) +plt.ylabel('ACCURACY') +plt.xlabel('epoch') +plt.show() + +cuda +training time: 27699.315416812897 +test acc: tensor(0.6800) +TIME = [147.563, 146.343, 144.501, 147.546, 144.388, 143.652, 146.672, 144.336, 145.402, 146.032, 144.47, 144.527, 145.94, 145.326, 144.034, 145.458, 146.047, 143.858, 146.212, 144.663, 144.781, 146.169, 143.851, 146.982, 143.694, 145.329, 145.16, 146.066, 144.08, 145.364, 145.876, 143.906, 145.965, 144.99, 144.381, 147.893, 146.199, 144.357, 145.847, 144.55, 144.047, 145.702, 144.852, 143.926, 145.867, 144.55, 144.213, 146.131, 144.313, 144.568, 145.913, 144.292, 147.893, 147.291, 148.067, 148.66, 149.459, 148.164, 148.963, 149.543, 144.27, 145.208, 145.364, 143.899, 146.17, 143.49, 146.005, 144.319, 144.524, 145.954, 143.908, 145.923, 149.609, 148.143, 149.126, 147.25, 143.868, 145.934, 144.889, 144.385, 146.232, 144.071, 145.286, 145.871, 143.787, 145.719, 148.777, 147.816, 149.28, 148.8, 148.009, 149.313, 149.438, 147.923, 148.943, 149.355, 148.399, 148.242, 149.209, 149.388, 148.377, 148.594, 149.603, 148.353, 148.588, 149.617, 148.425, 148.436, 149.528, 148.536, 148.31, 149.578, 148.509, 148.387, 149.569, 148.542, 148.188, 149.53, 148.641, 148.101, 149.468, 148.894, 148.149, 148.935, 149.422, 148.588, 148.187, 149.229, 149.147, 149.19, 148.44, 148.16, 149.419, 148.88, 148.568, 148.514, 148.583, 148.594, 148.789, 148.996, 149.07, 149.142, 148.768, 148.309, 148.454, 148.685, 149.076, 149.272, 148.759, 148.253, 148.44, 149.121, 149.245, 148.525, 148.261, 148.695, 149.247, 149.253, 148.579, 148.307, 149.357, 147.468, 148.775, 147.945, 149.511, 148.644, 148.232, 149.552, 148.53, 148.147, 149.467, 148.824, 148.064, 149.387, 149.3] \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/extra/train.png b/recognition/vision-transformer-4696689/extra/train.png new file mode 100644 index 000000000..44ae174f4 Binary files /dev/null and b/recognition/vision-transformer-4696689/extra/train.png differ diff --git a/recognition/vision-transformer-4696689/modules.py b/recognition/vision-transformer-4696689/modules.py new file mode 100644 index 000000000..07fabd881 --- /dev/null +++ b/recognition/vision-transformer-4696689/modules.py @@ -0,0 +1,106 @@ +""" +Imports Here +""" +import numpy as np +import torch +import torch.nn as nn + +class Attention(nn.Module): + def __init__(self, heads, embed): + super().__init__() + self.heads = heads + self.attn = nn.MultiheadAttention(embed, heads, batch_first=True) + self.Q = nn.Linear(embed, embed, bias=False) + self.K = nn.Linear(embed, embed, bias=False) + self.V = nn.Linear(embed, embed, bias=False) + + def forward(self, x): + Q = self.Q(x) + K = self.K(x) + V = self.V(x) + attnout, attnweights = self.attn(Q, K, V) + return attnout + +class TransBlock(nn.Module): + def __init__(self, heads, embed, fflsize): + super().__init__() + self.fnorm = nn.LayerNorm(embed) + self.snorm = nn.LayerNorm(embed) + self.attn = Attention(heads, embed) + self.ffl = nn.Sequential( + nn.Linear(embed, fflsize), + nn.GELU(), + nn.Linear(fflsize, embed) + ) + + def forward(self, x): + """ + Switching to pre-MHA LayerNorm is supposed to give better performance, + this is used in other models such as LLMs like GPT. Gradients are meant + to be stabilised. This is different to the original ViT paper. + """ + x = x + self.attn(self.fnorm(x)) + x = x + self.ffl(self.snorm(x)) + return x +""" +Convolution pre +""" +class ConvLayer(nn.Module): + def __init__(self): + super().__init__() + self.pool = nn.MaxPool3d(kernel_size=3, stride=2) + self.relu = nn.ReLU() + self.conv11 = nn.Conv3d(1, 48, kernel_size=(3,11,11), stride=(1,4,4), padding=(1,0,0)) + self.conv5 = nn.Conv3d(48, 192, kernel_size=(3,5,5), stride=(1,2,2), padding=(1,0,0)) + + def forward(self, imgs): + x = self.conv11(imgs) + x = self.relu(self.pool(x)) + x = self.conv5(x) + x = self.relu(self.pool(x)) + return x +""" +Vision Transformer Class to create a vision transformer model +""" +class VisionTransformer(nn.Module): + def __init__(self, classes=2, inputsize=(1,1,1), heads=2, embed=64, fflscale=2, nblocks=1): + super().__init__() + (self.N, self.Np, self.P) = inputsize + """components""" + self.proj = nn.Linear(self.P, embed) + self.clstoken = nn.Parameter(torch.randn(1, 1, embed)) + self.posembed = self.embedding(self.Np+1, embed) + self.transformer = nn.Sequential( + *((TransBlock(heads, embed, int(fflscale*embed)),)*nblocks) + ) + self.classifier = nn.Sequential( + nn.LayerNorm(embed), + nn.Linear(embed, classes) + ) + """convolutional components""" + self.conv = ConvLayer() + + def embedding(self, npatches, embed, freq=10000): #10000 is described in ViT paper + posembed = torch.zeros(npatches, embed) + for i in range(npatches): + for j in range(embed): + if j % 2 == 0: + posembed[i][j] = np.sin(i/(freq**(j/embed))) + else: + posembed[i][j] = np.cos(i/(freq**((j-1)/embed))) + return posembed + + def forward(self, imgs): #assume size checking done by createPatches + """Convolutional layer""" + imgs = self.conv(imgs) + imgs = imgs.flatten(2,4) + """Linear Projection and Positional Embedding""" + tokens = self.proj(imgs) #perform linear projection + clstoken = self.clstoken.repeat(imgs.shape[0], 1, 1) + tokens = torch.cat([clstoken, tokens], dim=1) #concat the class token + x = tokens + self.posembed.repeat(imgs.shape[0], 1, 1) #add positional encoding + """Transformer""" + x = self.transformer(x) + """Classification""" + y = x[:,0] + return self.classifier(y) \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataloader_torch-checkpoint.ipynb b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataloader_torch-checkpoint.ipynb new file mode 100644 index 000000000..b4222436e --- /dev/null +++ b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataloader_torch-checkpoint.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "7f66ae1f", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "\"\"\"numpy and torch\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "\n", + "\"\"\"PIL\"\"\"\n", + "from PIL import Image\n", + "\n", + "\"\"\"torchvision and utils\"\"\"\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import DataLoader, Dataset\n", + "\n", + "\"\"\"os\"\"\"\n", + "import os\n", + "\n", + "\"\"\"\n", + "Loading data from local file\n", + "\"\"\"\n", + "\"\"\"Assumes images have pixel values in range [0,255]\"\"\"\n", + "def getImages(trainDIRs, testDIRS):\n", + " \"\"\"Get image to tensor\"\"\"\n", + " transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + " ])\n", + " \"\"\"Loading data into arrays\"\"\"\n", + " xtrain, xtrain, xtest, ytest = [], [], [], []\n", + " \"\"\"training data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(trainDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtrain.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtrain = torch.stack(xtrain)\n", + " ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + "\n", + " \n", + " \"\"\"testing data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(testDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtest.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtest = torch.stack(xtest)\n", + " idx = torch.randperm(xtest.size(0))\n", + " xtest = xtest[idx, :]\n", + " splitsize = int(xtest.shape[0]/2)\n", + " xval, xtest = xtest.split(splitsize, dim=0)\n", + " ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " ytest = ytest[idx]\n", + " yval, ytest = ytest.split(splitsize, dim=0)\n", + " return xtrain, ytrain, xtest, ytest, xval, yval\n", + "\n", + "\"\"\"\n", + "Dataloader\n", + "\"\"\"\n", + "class DatasetWrapper(Dataset):\n", + " def __init__(self, X, y=None):\n", + " self.X, self.y = X, y\n", + "\n", + " def __len__(self):\n", + " return len(self.X)\n", + "\n", + " def __getitem__(self, idx):\n", + " if self.y is None:\n", + " return self.X[idx]\n", + " else:\n", + " return self.X[idx], self.y[idx]\n", + "\n", + "trainDIRs = ['AD_NC/train/AD/', 'AD_NC/train/NC']\n", + "testDIRs = ['AD_NC/test/AD/', 'AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest, xval, yval = getImages(trainDIRs, testDIRs)\n", + "ytrain, ytest = ytrain.type(torch.LongTensor), ytest.type(torch.LongTensor)\n", + "xtrain = xtrain.permute(0, 2, 1, 3, 4)\n", + "xtest = xtest.permute(0, 2, 1, 3, 4)\n", + "xval = xval.permute(0, 2, 1, 3, 4)\n", + "\n", + "def trainloader(batchsize=16):\n", + " return DataLoader(DatasetWrapper(xtrain, ytrain), batch_size=batchsize, shuffle=True, pin_memory=True)\n", + "\n", + "def valloader():\n", + " return DataLoader(DatasetWrapper(xval, yval), batch_size=1, shuffle=True, pin_memory=True)\n", + "\n", + "def testloader():\n", + " return DataLoader(DatasetWrapper(xtest, ytest), batch_size=1, shuffle=True, pin_memory=True)\n", + "\n", + "def trainshape():\n", + " return xtrain.shape\n", + "\n", + "def testshape():\n", + " return xtest.shape" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataset-checkpoint.ipynb b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataset-checkpoint.ipynb new file mode 100644 index 000000000..024c99e75 --- /dev/null +++ b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataset-checkpoint.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "id": "338da719", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "\"\"\"numpy and torch\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "\n", + "\"\"\"PIL\"\"\"\n", + "from PIL import Image\n", + "\n", + "\"\"\"torchvision and utils\"\"\"\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import DataLoader, Dataset\n", + "\n", + "\"\"\"os\"\"\"\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "65011ff4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nLoading data from local file\\n'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Loading data from local file\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "206e485b", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Assumes images have pixel values in range [0,255]\"\"\"\n", + "def getImages(trainDIRs, testDIRS):\n", + " \"\"\"Get image to tensor\"\"\"\n", + " transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + " ])\n", + " \"\"\"Loading data into arrays\"\"\"\n", + " xtrain, xtrain, xtest, ytest = [], [], [], []\n", + " \"\"\"training data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(trainDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtrain.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtrain = torch.stack(xtrain)\n", + " ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " \n", + " \"\"\"testing data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(testDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " if j == 0:\n", + " xtest.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtest = torch.stack(xtest)\n", + " ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " return xtrain, ytrain, xtest, ytest" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a3c45c1a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1076, 20, 1, 240, 256])\n", + "torch.Size([9000, 1, 1, 240, 256])\n" + ] + } + ], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "292100c2", + "metadata": {}, + "outputs": [], + "source": [ + "def createPatches(imgs, patchsize):\n", + " (N, M, C, W, H) = imgs.shape\n", + " (wsize, hsize) = patchsize\n", + " \"\"\"check for errors with sizing\"\"\"\n", + " if (W % wsize != 0) or (H % hsize != 0):\n", + " raise Exception(\"patchsize is not appropriate\")\n", + " if (C != C) or (H != H):\n", + " raise Exception(\"given sizes do not match\")\n", + " size = (N, M, C, W // wsize, wsize, H // hsize, hsize)\n", + " perm = (0, 1, 3, 5, 2, 4, 6) #bring col, row index of patch to front\n", + " flat = (2, 3) #flatten (col, row) index into col*row entry index for patches\n", + " imgs = imgs.reshape(size).permute(perm).flatten(*flat)\n", + " return imgs #in format Nimgs, Npatches, C, Wpatch, Hpatch\n", + " \n", + "def flattenPatches(imgs): #takes input (N, M, Npatches, C, W, H) returns (N, M*Npatches, C*W*H)\n", + " return imgs.flatten(3, 5).flatten(1, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e0897522", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nDataloader\\n'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Dataloader\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "05c80732", + "metadata": {}, + "outputs": [], + "source": [ + "class DatasetWrapper(Dataset):\n", + " def __init__(self, X, y=None):\n", + " self.X, self.y = X, y\n", + "\n", + " def __len__(self):\n", + " return len(self.X)\n", + "\n", + " def __getitem__(self, idx):\n", + " if self.y is None:\n", + " return self.X[idx]\n", + " else:\n", + " return self.X[idx], self.y[idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "ea41eef5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1076, 20, 1, 240, 256])\n", + "torch.Size([9000, 1, 1, 240, 256])\n" + ] + } + ], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "1f077f43", + "metadata": {}, + "outputs": [], + "source": [ + "xtrain = flattenPatches(createPatches(xtrain, (16,16)))\n", + "xtest = flattenPatches(createPatches(xtest, (16,16)))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "a02e05bd", + "metadata": {}, + "outputs": [], + "source": [ + "def trainloader(batchsize=16):\n", + " return DataLoader(DatasetWrapper(xtrain, ytrain), batchsize=batchsize, shuffle=True)\n", + "\n", + "def testloader():\n", + " return DataLoader(DatasetWrapper(xtest, ytest), batchsize=1, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "18d6ca10", + "metadata": {}, + "outputs": [], + "source": [ + "def trainshape():\n", + " return xtrain.shape\n", + "\n", + "def testshape():\n", + " return xtest.shape" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataset3d-checkpoint.ipynb b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataset3d-checkpoint.ipynb new file mode 100644 index 000000000..1cf393de9 --- /dev/null +++ b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/dataset3d-checkpoint.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "338da719", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: 'dlopen(/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so, 6): Library not loaded: @rpath/libpng16.16.dylib\n", + " Referenced from: /Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so\n", + " Reason: Incompatible library version: image.so requires version 56.0.0 or later, but libpng16.16.dylib provides version 54.0.0'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "\"\"\"numpy and torch\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "\n", + "\"\"\"PIL\"\"\"\n", + "from PIL import Image\n", + "\n", + "\"\"\"torchvision and utils\"\"\"\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import DataLoader, Dataset\n", + "\n", + "\"\"\"os\"\"\"\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "65011ff4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nLoading data from local file\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Loading data from local file\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "206e485b", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Assumes images have pixel values in range [0,255]\"\"\"\n", + "def getImages(trainDIRs, testDIRS):\n", + " \"\"\"Get image to tensor\"\"\"\n", + " transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + " ])\n", + " \"\"\"Loading data into arrays\"\"\"\n", + " xtrain, xtrain, xtest, ytest = [], [], [], []\n", + " \"\"\"training data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(trainDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtrain.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtrain = torch.stack(xtrain)\n", + " ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " \n", + " \"\"\"testing data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(testDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " if j == 0:\n", + " xtest.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtest = torch.stack(xtest)\n", + " ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " return xtrain, ytrain, xtest, ytest" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a3c45c1a", + "metadata": {}, + "outputs": [], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "292100c2", + "metadata": {}, + "outputs": [], + "source": [ + "def createPatches(imgs, patchsize):\n", + " (N, M, C, W, H) = imgs.shape\n", + " (wsize, hsize) = patchsize\n", + " \"\"\"check for errors with sizing\"\"\"\n", + " if (W % wsize != 0) or (H % hsize != 0):\n", + " raise Exception(\"patchsize is not appropriate\")\n", + " if (C != C) or (H != H):\n", + " raise Exception(\"given sizes do not match\")\n", + " size = (N, M, C, W // wsize, wsize, H // hsize, hsize)\n", + " perm = (0, 1, 3, 5, 2, 4, 6) #bring col, row index of patch to front\n", + " flat = (2, 3) #flatten (col, row) index into col*row entry index for patches\n", + " imgs = imgs.reshape(size).permute(perm).flatten(*flat)\n", + " return imgs #in format Nimgs, Npatches, C, Wpatch, Hpatch\n", + " \n", + "def flattenPatches(imgs): #takes input (N, M, Npatches, C, W, H) returns (N, M*Npatches, C*W*H)\n", + " return imgs.flatten(3, 5).flatten(1, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e0897522", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nDataloader\\n'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Dataloader\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "05c80732", + "metadata": {}, + "outputs": [], + "source": [ + "class DatasetWrapper(Dataset):\n", + " def __init__(self, X, y=None):\n", + " self.X, self.y = X, y\n", + "\n", + " def __len__(self):\n", + " return len(self.X)\n", + "\n", + " def __getitem__(self, idx):\n", + " if self.y is None:\n", + " return self.X[idx]\n", + " else:\n", + " return self.X[idx], self.y[idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ea41eef5", + "metadata": {}, + "outputs": [], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1f077f43", + "metadata": {}, + "outputs": [], + "source": [ + "xtrain = flattenPatches(createPatches(xtrain, (16,16)))\n", + "xtest = flattenPatches(createPatches(xtest, (16,16)))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a02e05bd", + "metadata": {}, + "outputs": [], + "source": [ + "def trainloader(batchsize=16):\n", + " return DataLoader(DatasetWrapper(xtrain, ytrain), batchsize=batchsize, shuffle=True)\n", + "\n", + "def testloader():\n", + " return DataLoader(DatasetWrapper(xtest, ytest), batchsize=1, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "18d6ca10", + "metadata": {}, + "outputs": [], + "source": [ + "def trainshape():\n", + " return xtrain.shape\n", + "\n", + "def testshape():\n", + " return xtest.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8979dcd1", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "00d6d9fb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1076, 4800, 256])\n" + ] + }, + { + "ename": "RuntimeError", + "evalue": "Given groups=1, weight of size [8, 1, 3, 3], expected input[1, 1076, 4800, 256] to have 1 channels, but got 1076 channels instead", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [17]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconv(x)\u001b[38;5;241m.\u001b[39mshape)\n\u001b[1;32m 14\u001b[0m imgsize \u001b[38;5;241m=\u001b[39m xtrain\u001b[38;5;241m.\u001b[39mshape\n\u001b[0;32m---> 15\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mTest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimgsize\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mxtrain\u001b[49m\u001b[43m)\u001b[49m\n", + "Input \u001b[0;32mIn [17]\u001b[0m, in \u001b[0;36mTest.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(x\u001b[38;5;241m.\u001b[39mshape)\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mshape)\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/container.py:217\u001b[0m, in \u001b[0;36mSequential.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m):\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28minput\u001b[39m\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/conv.py:463\u001b[0m, in \u001b[0;36mConv2d.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m--> 463\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_conv_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/conv.py:459\u001b[0m, in \u001b[0;36mConv2d._conv_forward\u001b[0;34m(self, input, weight, bias)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mzeros\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m F\u001b[38;5;241m.\u001b[39mconv2d(F\u001b[38;5;241m.\u001b[39mpad(\u001b[38;5;28minput\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reversed_padding_repeated_twice, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode),\n\u001b[1;32m 457\u001b[0m weight, bias, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstride,\n\u001b[1;32m 458\u001b[0m _pair(\u001b[38;5;241m0\u001b[39m), \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdilation, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgroups)\n\u001b[0;32m--> 459\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv2d\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroups\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mRuntimeError\u001b[0m: Given groups=1, weight of size [8, 1, 3, 3], expected input[1, 1076, 4800, 256] to have 1 channels, but got 1076 channels instead" + ] + } + ], + "source": [ + "class Test(nn.Module):\n", + " def __init__(self, imgsize):\n", + " super().__init__()\n", + " (self.N, self.Np, self.P) = imgsize\n", + " self.conv = nn.Sequential(\n", + " nn.Conv2d(1, 8, kernel_size=3, padding=1),\n", + " nn.ReLU(),\n", + " nn.MaxPool2d(2, 2)\n", + " )\n", + " def forward(self, x):\n", + " print(x.shape)\n", + " print(self.conv(x).shape)\n", + "\n", + "imgsize = xtrain.shape\n", + "model = Test(imgsize).forward(xtrain)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "690cd78c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/datasetconv-checkpoint.ipynb b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/datasetconv-checkpoint.ipynb new file mode 100644 index 000000000..786bbfade --- /dev/null +++ b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/datasetconv-checkpoint.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "338da719", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: 'dlopen(/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so, 6): Library not loaded: @rpath/libpng16.16.dylib\n", + " Referenced from: /Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so\n", + " Reason: Incompatible library version: image.so requires version 56.0.0 or later, but libpng16.16.dylib provides version 54.0.0'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "\"\"\"numpy and torch\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "\n", + "\"\"\"PIL\"\"\"\n", + "from PIL import Image\n", + "\n", + "\"\"\"torchvision and utils\"\"\"\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import DataLoader, Dataset\n", + "\n", + "\"\"\"os\"\"\"\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "65011ff4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nLoading data from local file\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Loading data from local file\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "206e485b", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Assumes images have pixel values in range [0,255]\"\"\"\n", + "def getImages(trainDIRs, testDIRS):\n", + " \"\"\"Get image to tensor\"\"\"\n", + " transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + " ])\n", + " \"\"\"Loading data into arrays\"\"\"\n", + " xtrain, xtrain, xtest, ytest = [], [], [], []\n", + " \"\"\"training data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(trainDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtrain.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtrain = torch.stack(xtrain)\n", + " ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " \n", + " \"\"\"testing data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(testDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtest.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtest = torch.stack(xtest)\n", + " ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " return xtrain, ytrain, xtest, ytest" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a3c45c1a", + "metadata": {}, + "outputs": [], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "292100c2", + "metadata": {}, + "outputs": [], + "source": [ + "def createPatches(imgs, patchsize):\n", + " (N, M, C, W, H) = imgs.shape\n", + " (wsize, hsize) = patchsize\n", + " \"\"\"check for errors with sizing\"\"\"\n", + " if (W % wsize != 0) or (H % hsize != 0):\n", + " raise Exception(\"patchsize is not appropriate\")\n", + " if (C != C) or (H != H):\n", + " raise Exception(\"given sizes do not match\")\n", + " size = (N, M, C, W // wsize, wsize, H // hsize, hsize)\n", + " perm = (0, 1, 3, 5, 2, 4, 6) #bring col, row index of patch to front\n", + " flat = (2, 3) #flatten (col, row) index into col*row entry index for patches\n", + " imgs = imgs.reshape(size).permute(perm).flatten(*flat)\n", + " return imgs #in format Nimgs, Npatches, C, Wpatch, Hpatch\n", + " \n", + "def flattenPatches(imgs): #takes input (N, M, Npatches, C, W, H) returns (N, M*Npatches, C*W*H)\n", + " return imgs.flatten(3, 5).flatten(1, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e0897522", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nDataloader\\n'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Dataloader\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "05c80732", + "metadata": {}, + "outputs": [], + "source": [ + "class DatasetWrapper(Dataset):\n", + " def __init__(self, X, y=None):\n", + " self.X, self.y = X, y\n", + "\n", + " def __len__(self):\n", + " return len(self.X)\n", + "\n", + " def __getitem__(self, idx):\n", + " if self.y is None:\n", + " return self.X[idx]\n", + " else:\n", + " return self.X[idx], self.y[idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ea41eef5", + "metadata": {}, + "outputs": [], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1f077f43", + "metadata": {}, + "outputs": [], + "source": [ + "xtrain = flattenPatches(createPatches(xtrain, (16,16)))\n", + "xtest = flattenPatches(createPatches(xtest, (16,16)))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a02e05bd", + "metadata": {}, + "outputs": [], + "source": [ + "def trainloader(batchsize=16):\n", + " return DataLoader(DatasetWrapper(xtrain, ytrain), batchsize=batchsize, shuffle=True)\n", + "\n", + "def testloader():\n", + " return DataLoader(DatasetWrapper(xtest, ytest), batchsize=1, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "18d6ca10", + "metadata": {}, + "outputs": [], + "source": [ + "def trainshape():\n", + " return xtrain.shape\n", + "\n", + "def testshape():\n", + " return xtest.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8979dcd1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1076, 4800, 256])\n", + "torch.Size([450, 4800, 256])\n" + ] + } + ], + "source": [ + "print(xtrain.shape)\n", + "print(xtest.shape)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/matplots-checkpoint.ipynb b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/matplots-checkpoint.ipynb new file mode 100644 index 000000000..363fcab7e --- /dev/null +++ b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/matplots-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/model-checkpoint.ipynb b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/model-checkpoint.ipynb new file mode 100644 index 000000000..3263fcf63 --- /dev/null +++ b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/model-checkpoint.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 37, + "id": "fc1d26a6", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "00044d75", + "metadata": {}, + "outputs": [], + "source": [ + "class Attention(nn.Module):\n", + " def __init__(self, heads, EMBED_DIMENSION):\n", + " super().__init__()\n", + " self.heads = heads\n", + " self.attn = nn.MultiheadAttention(EMBED_DIMENSION, heads, batch_first=True)\n", + " self.Q = nn.Linear(EMBED_DIMENSION, EMBED_DIMENSION, bias=False)\n", + " self.K = nn.Linear(EMBED_DIMENSION, EMBED_DIMENSION, bias=False)\n", + " self.V = nn.Linear(EMBED_DIMENSION, EMBED_DIMENSION, bias=False)\n", + " \n", + " def forward(self, x):\n", + " Q = self.Q(x)\n", + " K = self.K(x)\n", + " V = self.V(x)\n", + " \n", + " attnout, attnweights = self.attn(Q, K, V)\n", + " return attnout" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "733599f9", + "metadata": {}, + "outputs": [], + "source": [ + "class TransBlock(nn.Module):\n", + " def __init__(self, heads, EMBED_DIMENSION, fflsize):\n", + " super().__init__()\n", + " self.fnorm = nn.LayerNorm(EMBED_DIMENSION)\n", + " self.snorm = nn.LayerNorm(EMBED_DIMENSION)\n", + " self.attn = Attention(heads, EMBED_DIMENSION)\n", + " self.ffl = nn.Sequential(\n", + " nn.Linear(EMBED_DIMENSION, fflsize),\n", + " nn.GELU(),\n", + " nn.Linear(fflsize, EMBED_DIMENSION)\n", + " )\n", + " \n", + " def forward(self, x):\n", + " \"\"\"\n", + " Switching to pre-MHA LayerNorm is supposed to give better performance,\n", + " this is used in other models such as LLMs like GPT. Gradients are meant\n", + " to be stabilised. This is different to the original ViT paper.\n", + " \"\"\"\n", + " x = x + self.attn(self.fnorm(x))[0]\n", + " x = x + self.ffl(self.snorm(x))\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2a5e050", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Inception module for efficient 7x7 convolution\n", + "\"\"\"\n", + "class Inception(nn.Module):\n", + " def __init__(self, dimin, dimout):\n", + " super().__init__()\n", + " self.branch1 = nn.Sequential(\n", + " nn.Conv2d(dimin, dimout[0], 1, stride=(1,1)),\n", + " nn.Conv2d(dimout[0], dimout[0], 3, stride=(1,1), padding=1),\n", + " nn.Conv2d(dimout[0], dimout[0], 3, stride=(1,1), padding=1)\n", + " )\n", + " self.branch2 = nn.Sequential(\n", + " nn.Conv2d(dimin, dimout[1]), 1, stride=(1,1),\n", + " nn.Conv2d(dimout[1], dimout[1], 3, stride=(1,1), padding=1)\n", + " )\n", + " self.branch3 = nn.Sequential(\n", + " nn.AvgPool2d(3, stride=(1,1), padding=1),\n", + " nn.Conv2d(dimin, dimout[2], 1, stride=(1,1))\n", + " )\n", + " self.branch4 = nn.Sequential(\n", + " nn.Conv2d(dimin, dimout[3], 1, stride=(1,1))\n", + " )\n", + " def forward(self, imgs)\n", + " x1 = self.branch1(imgs)\n", + " x2 = self.branch2(imgs)\n", + " x3 = self.branch3(imgs)\n", + " x4 = self.branch4(imgs)\n", + " return torch.cat([x1, x2, x3, x4], dim=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "e6ac9e2b", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Vision Transformer Class to create a vision transformer model\n", + "\"\"\"\n", + "class VisionTransformer(nn.Module):\n", + " def __init__(self, classes=2, inputsize=(1,1,1), heads=2, fflscale=2, nblocks=1):\n", + " super().__init__()\n", + " (self.N, self.Np, self.P) = inputsize\n", + " \"\"\"components\"\"\"\n", + " self.proj = nn.Linear(self.P, EMBED_DIMENSION)\n", + " self.clstoken = nn.Parameter(torch.zeros(1, 1, EMBED_DIMENSION))\n", + " self.posembed = self.embedding(self.Np+1, EMBED_DIMENSION, freq=10000) #10000 is described in ViT paper\n", + " self.posembed = self.posembed.repeat(self.N, 1, 1)\n", + " self.transformer = nn.Sequential(\n", + " *((TransBlock(heads, EMBED_DIMENSION, int(fflscale*EMBED_DIMENSION)),)*nblocks)\n", + " )\n", + " self.classifier = nn.Sequential(\n", + " nn.LayerNorm(EMBED_DIMENSION),\n", + " nn.Linear(EMBED_DIMENSION, classes)\n", + " )\n", + " \n", + " def embedding(npatches, EMBED_DIMENSION, freq):\n", + " posembed = torch.zeros(npatches, EMBED_DIMENSION)\n", + " for i in range(npatches):\n", + " for j in range(EMBED_DIMENSION):\n", + " if j % 2 == 0:\n", + " posembed[i][j] = np.sin(i/(freq**(j/EMBED_DIMENSION)))\n", + " else:\n", + " posembed[i][j] = np.cos(i/(freq**((j-1)/EMBED_DIMENSION)))\n", + " return posembed\n", + " \n", + " def forward(self, imgs): #assume size checking done by createPatches\n", + " \"\"\"Linear Projection and Positional Embedding\"\"\"\n", + " tokens = self.proj(imgs) #perform linear projection\n", + " clstoken = self.clstoken.repeat(self.N, 1, 1)\n", + " tokens = torch.cat([clstoken, tokens], dim=1) #concat the class token\n", + " x = tokens + self.posembed #add positional encoding\n", + " \"\"\"Transformer\"\"\"\n", + " x = self.transformer(x)\n", + " \"\"\"Classification\"\"\"\n", + " y = x[0]\n", + " return self.classifier(y)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/train-checkpoint.ipynb b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/train-checkpoint.ipynb new file mode 100644 index 000000000..04b276bce --- /dev/null +++ b/recognition/vision-transformer-4696689/old/.ipynb_checkpoints/train-checkpoint.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "73ebb771", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: 'dlopen(/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so, 6): Library not loaded: @rpath/libpng16.16.dylib\n", + " Referenced from: /Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so\n", + " Reason: Incompatible library version: image.so requires version 56.0.0 or later, but libpng16.16.dylib provides version 54.0.0'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "from dataset import trainloader\n", + "from dataset import testloader\n", + "from dataset import trainaccloader\n", + "from dataset import trainshape\n", + "from dataset import testshape" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "df0ea69a", + "metadata": {}, + "outputs": [], + "source": [ + "from model import VisionTransformer\n", + "from model import Attention\n", + "from model import TransBlock\n", + "from model3d import Inception" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "ae8aebe7", + "metadata": {}, + "outputs": [], + "source": [ + "TRAIN_LOSS = []\n", + "TRAIN_ACC = []\n", + "\n", + "def train(model, dataloader, accloader, lossfunc, optimiser, lr=0.1, momentum=0.9, batchsize=16, nepochs=10):\n", + " device = next(model.parameters()).device # check what device the net parameters are on\n", + " \n", + " \"\"\"training\"\"\"\n", + " for i in range(nepochs): # for each epoch\n", + " epoch_loss = 0\n", + " model.train()\n", + " n_batches = 0\n", + " time1 = time.time()\n", + " for (x, y) in dataloader: # for each mini-batch\n", + " optimiser.zero_grad(set_to_none=True)\n", + " loss = lossfunc(model.forward(x), y)\n", + " loss.backward()\n", + " optimiser.step()\n", + " epoch_loss += loss\n", + " n_batches += 1\n", + " time2 = time.time()\n", + " print(\"Done an epoch\", time2-time1)\n", + " epoch_loss /= n_batches\n", + " \n", + " \"\"\"evaluating\"\"\"\n", + " model.eval()\n", + " accuracy = test(model, accloader)\n", + "\n", + " \"\"\"get performance\"\"\"\n", + " TRAIN_LOSS.append(epoch_loss.item())\n", + " TRAIN_ACC.append(accuracy)\n", + "\n", + "def test(model, dataloader):\n", + " with torch.no_grad(): # disable automatic gradient computation for efficiency\n", + " device = next(model.parameters()).device\n", + " \n", + " \"\"\"make predictions\"\"\"\n", + " pcls = []\n", + " items = 0\n", + " time1=time.time()\n", + " for x, y in dataloader:\n", + " x = x.to(device)\n", + " pcls.append(abs(y.cpu()-torch.max(model(x), 1)[1].cpu()))\n", + " items += 1\n", + " time2 = time.time()\n", + " print(\"found accuracy in:\", time2-time1)\n", + "\n", + " \"\"\"get accuracy\"\"\"\n", + " pcls = torch.cat(pcls) # concat predictions on the mini-batches\n", + " accuracy = 1 - (pcls.sum().float() / items)\n", + " print(\"accuracy:\", accuracy)\n", + " return accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "75a45973", + "metadata": {}, + "outputs": [], + "source": [ + "batchsize=16\n", + "N, Np, P = trainshape()\n", + "model = VisionTransformer(inputsize=(batchsize, Np, P), embed=128, fflscale=2, nblocks=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "7b54a6f0", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimiser = optim.AdamW(model.parameters(), lr=1e-4)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "18488555", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done an epoch 346.20038080215454\n", + "found accuracy in: 135.9069368839264\n", + "accuracy: tensor(0.5288)\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [43]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m start \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[0;32m----> 2\u001b[0m \u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrainloader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatchsize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatchsize\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrainaccloader\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcriterion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptimiser\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m end \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtraining time: \u001b[39m\u001b[38;5;124m\"\u001b[39m, end\u001b[38;5;241m-\u001b[39mstart)\n", + "Input \u001b[0;32mIn [40]\u001b[0m, in \u001b[0;36mtrain\u001b[0;34m(model, dataloader, accloader, lossfunc, optimiser, lr, momentum, batchsize, nepochs)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m (x, y) \u001b[38;5;129;01min\u001b[39;00m dataloader: \u001b[38;5;66;03m# for each mini-batch\u001b[39;00m\n\u001b[1;32m 14\u001b[0m optimiser\u001b[38;5;241m.\u001b[39mzero_grad(set_to_none\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 15\u001b[0m loss \u001b[38;5;241m=\u001b[39m lossfunc(\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m, y)\n\u001b[1;32m 16\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[1;32m 17\u001b[0m optimiser\u001b[38;5;241m.\u001b[39mstep()\n", + "File \u001b[0;32m~/Desktop/COMP3710 Project/PatternAnalysis-2023/recognition/vision-transformer-4696689/model.py:84\u001b[0m, in \u001b[0;36mVisionTransformer.forward\u001b[0;34m(self, imgs)\u001b[0m\n\u001b[1;32m 82\u001b[0m x \u001b[38;5;241m=\u001b[39m tokens \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mposembed\u001b[38;5;241m.\u001b[39mrepeat(imgs\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m) \u001b[38;5;66;03m#add positional encoding\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;124;03m\"\"\"Transformer\"\"\"\u001b[39;00m\n\u001b[0;32m---> 84\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransformer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124;03m\"\"\"Classification\"\"\"\u001b[39;00m\n\u001b[1;32m 86\u001b[0m y \u001b[38;5;241m=\u001b[39m x[:,\u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/container.py:217\u001b[0m, in \u001b[0;36mSequential.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m):\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28minput\u001b[39m\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/Desktop/COMP3710 Project/PatternAnalysis-2023/recognition/vision-transformer-4696689/model.py:43\u001b[0m, in \u001b[0;36mTransBlock.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 39\u001b[0m \u001b[38;5;124;03m Switching to pre-MHA LayerNorm is supposed to give better performance,\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;124;03m this is used in other models such as LLMs like GPT. Gradients are meant\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;124;03m to be stabilised. This is different to the original ViT paper.\u001b[39;00m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 43\u001b[0m x \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m+\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfnorm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 44\u001b[0m x \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mffl(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msnorm(x))\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/Desktop/COMP3710 Project/PatternAnalysis-2023/recognition/vision-transformer-4696689/model.py:22\u001b[0m, in \u001b[0;36mAttention.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 19\u001b[0m K \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mK(x)\n\u001b[1;32m 20\u001b[0m V \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mV(x)\n\u001b[0;32m---> 22\u001b[0m attnout, attnweights \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mQ\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mK\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mV\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m attnout\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/activation.py:1189\u001b[0m, in \u001b[0;36mMultiheadAttention.forward\u001b[0;34m(self, query, key, value, key_padding_mask, need_weights, attn_mask, average_attn_weights, is_causal)\u001b[0m\n\u001b[1;32m 1175\u001b[0m attn_output, attn_output_weights \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mmulti_head_attention_forward(\n\u001b[1;32m 1176\u001b[0m query, key, value, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membed_dim, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_heads,\n\u001b[1;32m 1177\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39min_proj_weight, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39min_proj_bias,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1186\u001b[0m average_attn_weights\u001b[38;5;241m=\u001b[39maverage_attn_weights,\n\u001b[1;32m 1187\u001b[0m is_causal\u001b[38;5;241m=\u001b[39mis_causal)\n\u001b[1;32m 1188\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1189\u001b[0m attn_output, attn_output_weights \u001b[38;5;241m=\u001b[39m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmulti_head_attention_forward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1190\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membed_dim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_heads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1191\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43min_proj_weight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43min_proj_bias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1192\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias_k\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias_v\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_zero_attn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1193\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mout_proj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mout_proj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1194\u001b[0m \u001b[43m \u001b[49m\u001b[43mtraining\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1195\u001b[0m \u001b[43m \u001b[49m\u001b[43mkey_padding_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey_padding_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1196\u001b[0m \u001b[43m \u001b[49m\u001b[43mneed_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mneed_weights\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1197\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattn_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1198\u001b[0m \u001b[43m \u001b[49m\u001b[43maverage_attn_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maverage_attn_weights\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1199\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_causal\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_causal\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbatch_first \u001b[38;5;129;01mand\u001b[39;00m is_batched:\n\u001b[1;32m 1201\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m attn_output\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m0\u001b[39m), attn_output_weights\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/functional.py:5313\u001b[0m, in \u001b[0;36mmulti_head_attention_forward\u001b[0;34m(query, key, value, embed_dim_to_check, num_heads, in_proj_weight, in_proj_bias, bias_k, bias_v, add_zero_attn, dropout_p, out_proj_weight, out_proj_bias, training, key_padding_mask, need_weights, attn_mask, use_separate_proj_weight, q_proj_weight, k_proj_weight, v_proj_weight, static_k, static_v, average_attn_weights, is_causal)\u001b[0m\n\u001b[1;32m 5311\u001b[0m attn_output_weights \u001b[38;5;241m=\u001b[39m attn_output_weights\u001b[38;5;241m.\u001b[39mview(bsz, num_heads, tgt_len, src_len)\n\u001b[1;32m 5312\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m average_attn_weights:\n\u001b[0;32m-> 5313\u001b[0m attn_output_weights \u001b[38;5;241m=\u001b[39m \u001b[43mattn_output_weights\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmean\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5315\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_batched:\n\u001b[1;32m 5316\u001b[0m \u001b[38;5;66;03m# squeeze the output if input was unbatched\u001b[39;00m\n\u001b[1;32m 5317\u001b[0m attn_output \u001b[38;5;241m=\u001b[39m attn_output\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m1\u001b[39m)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "start = time.time()\n", + "train(model, trainloader(batchsize=batchsize), trainaccloader(), criterion, optimiser, nepochs=10)\n", + "end = time.time()\n", + "print(\"training time: \", end-start)\n", + "test(model, testloader())" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "bbaac2fc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n", + "[]\n" + ] + } + ], + "source": [ + "print(TRAIN_LOSS)\n", + "print(TRAIN_ACC)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "94178617", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "280706\n" + ] + } + ], + "source": [ + "print(sum(p.numel() for p in model.parameters()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ccfcbae", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/conv b/recognition/vision-transformer-4696689/old/conv new file mode 100644 index 000000000..e69de29bb diff --git a/recognition/vision-transformer-4696689/old/dataloader_torch.ipynb b/recognition/vision-transformer-4696689/old/dataloader_torch.ipynb new file mode 100644 index 000000000..48b59d112 --- /dev/null +++ b/recognition/vision-transformer-4696689/old/dataloader_torch.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "id": "b8467df9", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "\"\"\"numpy and torch\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "\n", + "\"\"\"PIL\"\"\"\n", + "from PIL import Image\n", + "\n", + "\"\"\"torchvision and utils\"\"\"\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import DataLoader, Dataset\n", + "\n", + "\"\"\"os\"\"\"\n", + "import os\n", + "\n", + "\"\"\"\n", + "Loading data from local file\n", + "\"\"\"\n", + "\n", + "\"\"\"Assumes images have pixel values in range [0,255]\"\"\"\n", + "def getImages(trainDIRs, testDIRS):\n", + " \"\"\"Get image to tensor\"\"\"\n", + " transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + " ])\n", + " hflip = transforms.Compose([\n", + " transforms.RandomHorizontalFlip(p=1.0),\n", + " transforms.PILToTensor()\n", + " ])\n", + " vflip = transforms.Compose([\n", + " transforms.RandomVerticalFlip(p=1.0),\n", + " transforms.PILToTensor()\n", + " ])\n", + " dflip = transforms.Compose([\n", + " transforms.RandomHorizontalFlip(p=1.0),\n", + " transforms.RandomVerticalFlip(p=1.0),\n", + " transforms.PILToTensor()\n", + " ])\n", + " tlist = [transform, hflip, vflip, dflip]\n", + " \"\"\"Loading data into arrays\"\"\"\n", + " xtrain, xtrain, xtest, ytest = [], [], [], []\n", + " \"\"\"training data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(trainDIRs):\n", + " for t in tlist:\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = t(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtrain.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtrain = torch.stack(xtrain)\n", + " ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + "\n", + "\n", + " \"\"\"testing data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(testDIRs):\n", + " for t in tlist:\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtest.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtest = torch.stack(xtest)\n", + " idx = torch.randperm(xtest.size(0))\n", + " xtest = xtest[idx, :]\n", + " splitsize = int(xtest.shape[0]/2)\n", + " xval, xtest = xtest.split(splitsize, dim=0)\n", + " ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " ytest = ytest[idx]\n", + " yval, ytest = ytest.split(splitsize, dim=0)\n", + " return xtrain, ytrain, xtest, ytest, xval, yval\n", + "\n", + "\"\"\"\n", + "Dataloader\n", + "\"\"\"\n", + "class DatasetWrapper(Dataset):\n", + " def __init__(self, X, y=None):\n", + " self.X, self.y = X, y\n", + "\n", + " def __len__(self):\n", + " return len(self.X)\n", + "\n", + " def __getitem__(self, idx):\n", + " if self.y is None:\n", + " return self.X[idx]\n", + " else:\n", + " return self.X[idx], self.y[idx]\n", + "\n", + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest, xval, yval = getImages(trainDIRs, testDIRs)\n", + "ytrain, ytest = ytrain.type(torch.LongTensor), ytest.type(torch.LongTensor)\n", + "xtrain = xtrain.permute(0, 2, 1, 3, 4)\n", + "xtest = xtest.permute(0, 2, 1, 3, 4)\n", + "xval = xval.permute(0, 2, 1, 3, 4)\n", + "\n", + "def trainloader(batchsize=16):\n", + " return DataLoader(DatasetWrapper(xtrain, ytrain), batch_size=batchsize, shuffle=True, pin_memory=True)\n", + "\n", + "def valloader():\n", + " return DataLoader(DatasetWrapper(xval, yval), batch_size=1, shuffle=True, pin_memory=True)\n", + "\n", + "def testloader():\n", + " return DataLoader(DatasetWrapper(xtest, ytest), batch_size=1, shuffle=True, pin_memory=True)\n", + "\n", + "def trainshape():\n", + " return xtrain.shape\n", + "\n", + "def testshape():\n", + " return xtest.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "0334d1ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([4304, 1, 20, 240, 256]) torch.Size([900, 1, 20, 240, 256])\n" + ] + } + ], + "source": [ + "print(trainshape(), testshape())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/dataset.ipynb b/recognition/vision-transformer-4696689/old/dataset.ipynb new file mode 100644 index 000000000..f0321fffd --- /dev/null +++ b/recognition/vision-transformer-4696689/old/dataset.ipynb @@ -0,0 +1,304 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "338da719", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: 'dlopen(/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so, 6): Library not loaded: @rpath/libpng16.16.dylib\n", + " Referenced from: /Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so\n", + " Reason: Incompatible library version: image.so requires version 56.0.0 or later, but libpng16.16.dylib provides version 54.0.0'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "\"\"\"numpy and torch\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "\n", + "\"\"\"PIL\"\"\"\n", + "from PIL import Image\n", + "\n", + "\"\"\"torchvision and utils\"\"\"\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import DataLoader, Dataset\n", + "\n", + "\"\"\"os\"\"\"\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97e1d5de", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "65011ff4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nLoading data from local file\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Loading data from local file\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "206e485b", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Assumes images have pixel values in range [0,255]\"\"\"\n", + "def getImages(trainDIRs, testDIRS):\n", + " \"\"\"Get image to tensor\"\"\"\n", + " transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + " ])\n", + " augment = transforms.Compose([\n", + " Rescale(256), \n", + " RandomCrop(224), \n", + " ToTensor()\n", + " ])\n", + " \"\"\"Loading data into arrays\"\"\"\n", + " xtrain, xtrain, xtest, ytest = [], [], [], []\n", + " \"\"\"training data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(trainDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtrain.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtrain = torch.stack(xtrain)\n", + " ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " \n", + " \"\"\"testing data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(testDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtest.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtest = torch.stack(xtest)\n", + " ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " return xtrain, ytrain, xtest, ytest" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a3c45c1a", + "metadata": {}, + "outputs": [], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "292100c2", + "metadata": {}, + "outputs": [], + "source": [ + "def createPatches(imgs, patchsize):\n", + " (N, M, C, W, H) = imgs.shape\n", + " (wsize, hsize) = patchsize\n", + " \"\"\"check for errors with sizing\"\"\"\n", + " if (W % wsize != 0) or (H % hsize != 0):\n", + " raise Exception(\"patchsize is not appropriate\")\n", + " if (C != C) or (H != H):\n", + " raise Exception(\"given sizes do not match\")\n", + " size = (N, M, C, W // wsize, wsize, H // hsize, hsize)\n", + " perm = (0, 1, 3, 5, 2, 4, 6) #bring col, row index of patch to front\n", + " flat = (2, 3) #flatten (col, row) index into col*row entry index for patches\n", + " imgs = imgs.reshape(size).permute(perm).flatten(*flat)\n", + " return imgs #in format Nimgs, Npatches, C, Wpatch, Hpatch\n", + " \n", + "def flattenPatches(imgs): #takes input (N, M, Npatches, C, W, H) returns (N, M*Npatches, C*W*H)\n", + " return imgs.flatten(3, 5).flatten(1, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e0897522", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nDataloader\\n'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Dataloader\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "05c80732", + "metadata": {}, + "outputs": [], + "source": [ + "class DatasetWrapper(Dataset):\n", + " def __init__(self, X, y=None):\n", + " self.X, self.y = X, y\n", + "\n", + " def __len__(self):\n", + " return len(self.X)\n", + "\n", + " def __getitem__(self, idx):\n", + " if self.y is None:\n", + " return self.X[idx]\n", + " else:\n", + " return self.X[idx], self.y[idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ea41eef5", + "metadata": {}, + "outputs": [], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1f077f43", + "metadata": {}, + "outputs": [], + "source": [ + "xtrain = flattenPatches(createPatches(xtrain, (16,16)))\n", + "xtest = flattenPatches(createPatches(xtest, (16,16)))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a02e05bd", + "metadata": {}, + "outputs": [], + "source": [ + "def trainloader(batchsize=16):\n", + " return DataLoader(DatasetWrapper(xtrain, ytrain), batchsize=batchsize, shuffle=True)\n", + "\n", + "def testloader():\n", + " return DataLoader(DatasetWrapper(xtest, ytest), batchsize=1, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "18d6ca10", + "metadata": {}, + "outputs": [], + "source": [ + "def trainshape():\n", + " return xtrain.shape\n", + "\n", + "def testshape():\n", + " return xtest.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8979dcd1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1076, 4800, 256])\n", + "torch.Size([450, 4800, 256])\n" + ] + } + ], + "source": [ + "print(xtrain.shape)\n", + "print(xtest.shape)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/dataset3d.ipynb b/recognition/vision-transformer-4696689/old/dataset3d.ipynb new file mode 100644 index 000000000..11983c0d9 --- /dev/null +++ b/recognition/vision-transformer-4696689/old/dataset3d.ipynb @@ -0,0 +1,319 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "338da719", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: 'dlopen(/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so, 6): Library not loaded: @rpath/libpng16.16.dylib\n", + " Referenced from: /Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so\n", + " Reason: Incompatible library version: image.so requires version 56.0.0 or later, but libpng16.16.dylib provides version 54.0.0'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "\"\"\"numpy and torch\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "\n", + "\"\"\"PIL\"\"\"\n", + "from PIL import Image\n", + "\n", + "\"\"\"torchvision and utils\"\"\"\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import DataLoader, Dataset\n", + "\n", + "\"\"\"os\"\"\"\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "65011ff4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nLoading data from local file\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Loading data from local file\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "206e485b", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Assumes images have pixel values in range [0,255]\"\"\"\n", + "def getImages(trainDIRs, testDIRS):\n", + " \"\"\"Get image to tensor\"\"\"\n", + " transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + " ])\n", + " \"\"\"Loading data into arrays\"\"\"\n", + " xtrain, xtrain, xtest, ytest = [], [], [], []\n", + " \"\"\"training data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(trainDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtrain.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtrain = torch.stack(xtrain)\n", + " ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " \n", + " \"\"\"testing data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(testDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtest.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtest = torch.stack(xtest)\n", + " splitsize = int(xtest.shape[0]/2)\n", + " xval, xtest = xtest.split(splitsize, dim=0)\n", + " yval, ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0)).split(splitsize)\n", + " return xtrain, ytrain, xtest, ytest, xval, yval" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "292100c2", + "metadata": {}, + "outputs": [], + "source": [ + "def createPatches(imgs, patchsize):\n", + " (N, M, C, W, H) = imgs.shape\n", + " (dsize, wsize, hsize) = patchsize\n", + " \"\"\"check for errors with sizing\"\"\"\n", + " if (M % dsize != 0) or (W % wsize != 0) or (H % hsize != 0):\n", + " raise Exception(\"patchsize is not appropriate\")\n", + " imgs = imgs.permute(0, 2, 1, 3, 4) # switch M and C\n", + " size = (N, C, M // dsize, dsize, W // wsize, wsize, H // hsize, hsize)\n", + " perm = (0, 2, 4, 6, 1, 3, 5, 7)\n", + " imgs = imgs.reshape(size).permute(perm).flatten(1, 3).flatten(2, 5)\n", + " return imgs #in format Nimgs, Npatches, patchsize" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "e0897522", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nDataloader\\n'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Dataloader\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "05c80732", + "metadata": {}, + "outputs": [], + "source": [ + "class DatasetWrapper(Dataset):\n", + " def __init__(self, X, y=None):\n", + " self.X, self.y = X, y\n", + "\n", + " def __len__(self):\n", + " return len(self.X)\n", + "\n", + " def __getitem__(self, idx):\n", + " if self.y is None:\n", + " return self.X[idx]\n", + " else:\n", + " return self.X[idx], self.y[idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "ea41eef5", + "metadata": {}, + "outputs": [], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest, xval, yval = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "1f077f43", + "metadata": {}, + "outputs": [], + "source": [ + "#xtrain = createPatches(xtrain, (4,16,16))\n", + "#xtest = createPatches(xtest, (4,16,16))\n", + "xval = createPatches(xval, (4,16,16))" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "a02e05bd", + "metadata": {}, + "outputs": [], + "source": [ + "def trainloader(batchsize=16):\n", + " return DataLoader(DatasetWrapper(xtrain, ytrain), batchsize=batchsize, shuffle=True)\n", + "\n", + "def testloader():\n", + " return DataLoader(DatasetWrapper(xtest, ytest), batchsize=1, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "18d6ca10", + "metadata": {}, + "outputs": [], + "source": [ + "def trainshape():\n", + " return xtrain.shape\n", + "\n", + "def testshape():\n", + " return xtest.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "690cd78c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1076, 1200, 1024])\n" + ] + } + ], + "source": [ + "print(xtrain.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "c24aa902", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([225, 1200, 1024])\n" + ] + } + ], + "source": [ + "print(xtest.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "a39f7b82", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([225, 1200, 1024])\n" + ] + } + ], + "source": [ + "print(xval.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9183e53b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/datasetconv.ipynb b/recognition/vision-transformer-4696689/old/datasetconv.ipynb new file mode 100644 index 000000000..715659242 --- /dev/null +++ b/recognition/vision-transformer-4696689/old/datasetconv.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "338da719", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: 'dlopen(/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so, 6): Library not loaded: @rpath/libpng16.16.dylib\n", + " Referenced from: /Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so\n", + " Reason: Incompatible library version: image.so requires version 56.0.0 or later, but libpng16.16.dylib provides version 54.0.0'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "\"\"\"numpy and torch\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "\n", + "\"\"\"PIL\"\"\"\n", + "from PIL import Image\n", + "\n", + "\"\"\"torchvision and utils\"\"\"\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import DataLoader, Dataset\n", + "\n", + "\"\"\"os\"\"\"\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "65011ff4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nLoading data from local file\\n'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Loading data from local file\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "206e485b", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Assumes images have pixel values in range [0,255]\"\"\"\n", + "def getImages(trainDIRs, testDIRS):\n", + " \"\"\"Get image to tensor\"\"\"\n", + " transform = transforms.Compose([\n", + " transforms.PILToTensor()\n", + " ])\n", + " \"\"\"Loading data into arrays\"\"\"\n", + " xtrain, xtrain, xtest, ytest = [], [], [], []\n", + " \"\"\"training data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(trainDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtrain.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtrain = torch.stack(xtrain)\n", + " ytrain = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " \n", + " \"\"\"testing data\"\"\"\n", + " size = [0, 0]\n", + " for i, DIR in enumerate(testDIRs):\n", + " px = []\n", + " j = 0\n", + " for filename in sorted(os.listdir(DIR)):\n", + " f = os.path.join(DIR, filename)\n", + " img = Image.open(f)\n", + " tensor = transform(img).float()\n", + " tensor.require_grad = True\n", + " px.append(tensor/255)\n", + " j = (j+1) % 20\n", + " if j == 0:\n", + " xtest.append(torch.stack(px))\n", + " px = []\n", + " size[i] += 1\n", + " xtest = torch.stack(xtest)\n", + " ytest = torch.from_numpy(np.concatenate((np.ones(size[0]), np.zeros(size[1])), axis=0))\n", + " return xtrain, ytrain, xtest, ytest" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e0897522", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nDataloader\\n'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Dataloader\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "05c80732", + "metadata": {}, + "outputs": [], + "source": [ + "class DatasetWrapper(Dataset):\n", + " def __init__(self, X, y=None):\n", + " self.X, self.y = X, y\n", + "\n", + " def __len__(self):\n", + " return len(self.X)\n", + "\n", + " def __getitem__(self, idx):\n", + " if self.y is None:\n", + " return self.X[idx]\n", + " else:\n", + " return self.X[idx], self.y[idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ea41eef5", + "metadata": {}, + "outputs": [], + "source": [ + "trainDIRs = ['../../../AD_NC/train/AD/', '../../../AD_NC/train/NC']\n", + "testDIRs = ['../../../AD_NC/test/AD/', '../../../AD_NC/test/NC']\n", + "xtrain, ytrain, xtest, ytest = getImages(trainDIRs, testDIRs)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a161d76a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1076, 20, 1, 240, 256])\n", + "torch.Size([450, 20, 1, 240, 256])\n", + "torch.Size([450])\n" + ] + } + ], + "source": [ + "print(xtrain.shape)\n", + "print(xtest.shape)\n", + "print(ytest.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "848190bc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([16, 192, 96])\n", + "0.2741\n" + ] + } + ], + "source": [ + "class ConvLayer2(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " #pool\n", + " self.pool = nn.MaxPool2d(kernel_size=3, stride=2)\n", + " self.relu = nn.ReLU()\n", + " #first layer\n", + " self.conv11_x = nn.Conv2d(20, 48, kernel_size=(11,11), stride=(4,4), padding=(0,0))\n", + " self.conv11_y = nn.Conv2d(240, 48, kernel_size=(11,3), stride=(4,1), padding=(0,0))\n", + " self.conv11_z = nn.Conv2d(256, 48, kernel_size=(3,11), stride=(1,4), padding=(0,0))\n", + " #second layer\n", + " self.conv5_x = nn.Conv2d(48, 192, kernel_size=(5,5), stride=(2,2), padding=(0,0))\n", + " self.conv5_y = nn.Conv2d(48, 192, kernel_size=(5,3), stride=(2,1), padding=(0,0))\n", + " self.conv5_z = nn.Conv2d(48, 192, kernel_size=(3,5), stride=(1,2), padding=(0,0))\n", + " #projection\n", + " self.l_x = nn.Linear(30, 32)\n", + " self.l_y = nn.Linear(12, 32)\n", + " self.l_z = nn.Linear(10, 32)\n", + "\n", + " def forward(self, imgs):\n", + " #input N, C, L, W, H\n", + " #first layer\n", + " x_x = self.relu(self.pool(self.conv11_x(imgs.flatten(1,2))))\n", + " x_y = self.relu(self.pool(self.conv11_y(imgs.permute(0,1,3,4,2).flatten(1,2))))\n", + " x_z = self.relu(self.pool(self.conv11_z(imgs.permute(0,1,4,2,3).flatten(1,2))))\n", + " #second layer\n", + " x_x = self.relu(self.pool(self.conv5_x(x_x)))\n", + " x_y = self.relu(self.pool(self.conv5_y(x_y)))\n", + " x_z = self.relu(self.pool(self.conv5_z(x_z)))\n", + " #projection\n", + " x_x = self.l_x(x_x.flatten(2,3))\n", + " x_y = self.l_y(x_y.flatten(2,3))\n", + " x_z = self.l_z(x_z.flatten(2,3))\n", + " return torch.cat([x_x, x_y, x_z], dim=2)\n", + "import time\n", + "start = time.time()\n", + "conv=ConvLayer2()\n", + "print(conv(xtrain[0:16,:].permute(0,2,1,3,4)).shape)\n", + "end = time.time()\n", + "print(round(end-start, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "f295ee82", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.004753589630127\n" + ] + } + ], + "source": [ + "conv_11 = nn.Conv2d(20, 64, kernel_size=(11,11), stride=(4,4), padding=(0,0))\n", + "import time\n", + "total = 0\n", + "for i in range(10):\n", + " start=time.time()\n", + " x = conv_11(xtrain[0:16, :].permute(0,2,1,3,4).flatten(1,2))\n", + " end = time.time()\n", + " total += end-start\n", + "print(total)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c5e39d11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1076, 256, 4, 5, 6])\n" + ] + } + ], + "source": [ + "import torch.nn as nn\n", + "class ConvLayer(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.conv11 = nn.Conv3d(1, 64, kernel_size=(3,11,11), stride=(1,4,4), padding=(1,0,0))\n", + " self.firstpool = nn.MaxPool3d(kernel_size=3, stride=2)\n", + " self.conv5 = nn.Conv3d(64, 256, kernel_size=(3,5,5), stride=(1,2,2), padding=(1,0,0))\n", + " self.secondpool = nn.MaxPool3d(kernel_size=3, stride=2)\n", + "\n", + " def forward(self, imgs):\n", + " x = self.conv11(imgs)\n", + " x = self.firstpool(x)\n", + " x = self.conv5(x)\n", + " x = self.secondpool(x)\n", + " return x\n", + " \n", + "conv = ConvLayer()\n", + "x = conv(xtrain.permute(0,2,1,3,4))\n", + "print(x.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a02e05bd", + "metadata": {}, + "outputs": [], + "source": [ + "def trainloader(batchsize=16):\n", + " return DataLoader(DatasetWrapper(xtrain, ytrain), batchsize=batchsize, shuffle=True)\n", + "\n", + "def testloader():\n", + " return DataLoader(DatasetWrapper(xtest, ytest), batchsize=1, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "18d6ca10", + "metadata": {}, + "outputs": [], + "source": [ + "def trainshape():\n", + " return xtrain.shape\n", + "\n", + "def testshape():\n", + " return xtest.shape" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/matplots.ipynb b/recognition/vision-transformer-4696689/old/matplots.ipynb new file mode 100644 index 000000000..2fed751f6 --- /dev/null +++ b/recognition/vision-transformer-4696689/old/matplots.ipynb @@ -0,0 +1,88 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "46c2d9b9", + "metadata": {}, + "outputs": [], + "source": [ + "LOSS = [0.72875, 0.70531, 0.66767, 0.61233, 0.53435, 0.49842, 0.43119, 0.45669, 0.38625, 0.35263, 0.36537, 0.32514, 0.26318, 0.2506, 0.24311, 0.18782, 0.17435, 0.13011, 0.14882, 0.17382, 0.10999, 0.13796, 0.07506, 0.06944, 0.06198, 0.03524, 0.07395, 0.09999, 0.04692, 0.03988, 0.0566, 0.02929, 0.01366, 0.01277, 0.01246, 0.01824, 0.04371, 0.0791, 0.04064, 0.04082, 0.01846, 0.00784, 0.00725, 0.00714, 0.0071, 0.00703, 0.00697, 0.00684, 0.00686, 0.00677, 0.00665, 0.00629, 0.00595, 0.01606, 0.11788, 0.21843, 0.02893, 0.01473, 0.04044, 0.02642, 0.02621, 0.00663, 0.00604, 0.00071, 0.00035, 0.00026, 0.00022, 0.0002, 0.00018, 0.00016, 0.00015, 0.00014, 0.00013, 0.00012, 0.00011, 0.0001, 0.0001, 9e-05, 8e-05, 8e-05, 7e-05, 7e-05, 7e-05, 6e-05, 6e-05, 6e-05, 5e-05, 5e-05, 5e-05, 5e-05, 4e-05, 4e-05, 4e-05, 4e-05, 4e-05, 4e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n", + "ACC = [50.67, 51.11, 58.67, 63.11, 57.78, 62.67, 63.56, 66.22, 66.22, 67.11, 66.67, 65.78, 67.56, 65.33, 68.0, 68.44, 67.11, 64.89, 64.89, 67.56, 68.0, 69.33, 67.11, 67.56, 68.0, 67.56, 66.22, 71.11, 69.33, 67.11, 66.67, 69.78, 69.33, 69.78, 69.78, 68.0, 66.67, 68.89, 69.78, 69.78, 68.44, 67.56, 67.11, 67.56, 67.56, 67.56, 68.0, 68.0, 68.0, 68.0, 68.0, 67.56, 67.56, 68.0, 66.22, 70.67, 67.56, 66.67, 68.89, 65.33, 66.67, 70.22, 68.0, 69.78, 68.89, 68.0, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6983675d", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "steps = range(175)\n", + "plt.plot(steps, LOSS)\n", + "plt.ylabel('LOSS')\n", + "plt.xlabel('epoch')\n", + "plt.show()\n", + "plt.plot(steps, ACC)\n", + "plt.ylabel('ACCURACY')\n", + "plt.xlabel('epoch')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d7c1506", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/model.ipynb b/recognition/vision-transformer-4696689/old/model.ipynb new file mode 100644 index 000000000..3263fcf63 --- /dev/null +++ b/recognition/vision-transformer-4696689/old/model.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 37, + "id": "fc1d26a6", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "import numpy as np\n", + "import torch\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "00044d75", + "metadata": {}, + "outputs": [], + "source": [ + "class Attention(nn.Module):\n", + " def __init__(self, heads, EMBED_DIMENSION):\n", + " super().__init__()\n", + " self.heads = heads\n", + " self.attn = nn.MultiheadAttention(EMBED_DIMENSION, heads, batch_first=True)\n", + " self.Q = nn.Linear(EMBED_DIMENSION, EMBED_DIMENSION, bias=False)\n", + " self.K = nn.Linear(EMBED_DIMENSION, EMBED_DIMENSION, bias=False)\n", + " self.V = nn.Linear(EMBED_DIMENSION, EMBED_DIMENSION, bias=False)\n", + " \n", + " def forward(self, x):\n", + " Q = self.Q(x)\n", + " K = self.K(x)\n", + " V = self.V(x)\n", + " \n", + " attnout, attnweights = self.attn(Q, K, V)\n", + " return attnout" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "733599f9", + "metadata": {}, + "outputs": [], + "source": [ + "class TransBlock(nn.Module):\n", + " def __init__(self, heads, EMBED_DIMENSION, fflsize):\n", + " super().__init__()\n", + " self.fnorm = nn.LayerNorm(EMBED_DIMENSION)\n", + " self.snorm = nn.LayerNorm(EMBED_DIMENSION)\n", + " self.attn = Attention(heads, EMBED_DIMENSION)\n", + " self.ffl = nn.Sequential(\n", + " nn.Linear(EMBED_DIMENSION, fflsize),\n", + " nn.GELU(),\n", + " nn.Linear(fflsize, EMBED_DIMENSION)\n", + " )\n", + " \n", + " def forward(self, x):\n", + " \"\"\"\n", + " Switching to pre-MHA LayerNorm is supposed to give better performance,\n", + " this is used in other models such as LLMs like GPT. Gradients are meant\n", + " to be stabilised. This is different to the original ViT paper.\n", + " \"\"\"\n", + " x = x + self.attn(self.fnorm(x))[0]\n", + " x = x + self.ffl(self.snorm(x))\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2a5e050", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Inception module for efficient 7x7 convolution\n", + "\"\"\"\n", + "class Inception(nn.Module):\n", + " def __init__(self, dimin, dimout):\n", + " super().__init__()\n", + " self.branch1 = nn.Sequential(\n", + " nn.Conv2d(dimin, dimout[0], 1, stride=(1,1)),\n", + " nn.Conv2d(dimout[0], dimout[0], 3, stride=(1,1), padding=1),\n", + " nn.Conv2d(dimout[0], dimout[0], 3, stride=(1,1), padding=1)\n", + " )\n", + " self.branch2 = nn.Sequential(\n", + " nn.Conv2d(dimin, dimout[1]), 1, stride=(1,1),\n", + " nn.Conv2d(dimout[1], dimout[1], 3, stride=(1,1), padding=1)\n", + " )\n", + " self.branch3 = nn.Sequential(\n", + " nn.AvgPool2d(3, stride=(1,1), padding=1),\n", + " nn.Conv2d(dimin, dimout[2], 1, stride=(1,1))\n", + " )\n", + " self.branch4 = nn.Sequential(\n", + " nn.Conv2d(dimin, dimout[3], 1, stride=(1,1))\n", + " )\n", + " def forward(self, imgs)\n", + " x1 = self.branch1(imgs)\n", + " x2 = self.branch2(imgs)\n", + " x3 = self.branch3(imgs)\n", + " x4 = self.branch4(imgs)\n", + " return torch.cat([x1, x2, x3, x4], dim=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "e6ac9e2b", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "Vision Transformer Class to create a vision transformer model\n", + "\"\"\"\n", + "class VisionTransformer(nn.Module):\n", + " def __init__(self, classes=2, inputsize=(1,1,1), heads=2, fflscale=2, nblocks=1):\n", + " super().__init__()\n", + " (self.N, self.Np, self.P) = inputsize\n", + " \"\"\"components\"\"\"\n", + " self.proj = nn.Linear(self.P, EMBED_DIMENSION)\n", + " self.clstoken = nn.Parameter(torch.zeros(1, 1, EMBED_DIMENSION))\n", + " self.posembed = self.embedding(self.Np+1, EMBED_DIMENSION, freq=10000) #10000 is described in ViT paper\n", + " self.posembed = self.posembed.repeat(self.N, 1, 1)\n", + " self.transformer = nn.Sequential(\n", + " *((TransBlock(heads, EMBED_DIMENSION, int(fflscale*EMBED_DIMENSION)),)*nblocks)\n", + " )\n", + " self.classifier = nn.Sequential(\n", + " nn.LayerNorm(EMBED_DIMENSION),\n", + " nn.Linear(EMBED_DIMENSION, classes)\n", + " )\n", + " \n", + " def embedding(npatches, EMBED_DIMENSION, freq):\n", + " posembed = torch.zeros(npatches, EMBED_DIMENSION)\n", + " for i in range(npatches):\n", + " for j in range(EMBED_DIMENSION):\n", + " if j % 2 == 0:\n", + " posembed[i][j] = np.sin(i/(freq**(j/EMBED_DIMENSION)))\n", + " else:\n", + " posembed[i][j] = np.cos(i/(freq**((j-1)/EMBED_DIMENSION)))\n", + " return posembed\n", + " \n", + " def forward(self, imgs): #assume size checking done by createPatches\n", + " \"\"\"Linear Projection and Positional Embedding\"\"\"\n", + " tokens = self.proj(imgs) #perform linear projection\n", + " clstoken = self.clstoken.repeat(self.N, 1, 1)\n", + " tokens = torch.cat([clstoken, tokens], dim=1) #concat the class token\n", + " x = tokens + self.posembed #add positional encoding\n", + " \"\"\"Transformer\"\"\"\n", + " x = self.transformer(x)\n", + " \"\"\"Classification\"\"\"\n", + " y = x[0]\n", + " return self.classifier(y)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/old/model.py b/recognition/vision-transformer-4696689/old/model.py new file mode 100644 index 000000000..b8fd3eeca --- /dev/null +++ b/recognition/vision-transformer-4696689/old/model.py @@ -0,0 +1,86 @@ +""" +Imports Here +""" +import numpy as np +import torch +import torch.nn as nn + +class Attention(nn.Module): + def __init__(self, heads, embed): + super().__init__() + self.heads = heads + self.attn = nn.MultiheadAttention(embed, heads, batch_first=True) + self.Q = nn.Linear(embed, embed, bias=False) + self.K = nn.Linear(embed, embed, bias=False) + self.V = nn.Linear(embed, embed, bias=False) + + def forward(self, x): + Q = self.Q(x) + K = self.K(x) + V = self.V(x) + + attnout, attnweights = self.attn(Q, K, V) + return attnout + +class TransBlock(nn.Module): + def __init__(self, heads, embed, fflsize): + super().__init__() + self.fnorm = nn.LayerNorm(embed) + self.snorm = nn.LayerNorm(embed) + self.attn = Attention(heads, embed) + self.ffl = nn.Sequential( + nn.Linear(embed, fflsize), + nn.GELU(), + nn.Linear(fflsize, embed) + ) + + def forward(self, x): + """ + Switching to pre-MHA LayerNorm is supposed to give better performance, + this is used in other models such as LLMs like GPT. Gradients are meant + to be stabilised. This is different to the original ViT paper. + """ + x = x + self.attn(self.fnorm(x)) + x = x + self.ffl(self.snorm(x)) + return x + +""" +Vision Transformer Class to create a vision transformer model +""" +class VisionTransformer(nn.Module): + def __init__(self, classes=2, inputsize=(1,1,1), heads=2, embed=64, fflscale=2, nblocks=1): + super().__init__() + (self.N, self.Np, self.P) = inputsize + """components""" + self.proj = nn.Linear(self.P, embed) + self.clstoken = nn.Parameter(torch.zeros(1, 1, embed)) + self.posembed = self.embedding(self.Np+1, embed) + self.transformer = nn.Sequential( + *((TransBlock(heads, embed, int(fflscale*embed)),)*nblocks) + ) + self.classifier = nn.Sequential( + nn.LayerNorm(embed), + nn.Linear(embed, classes) + ) + + def embedding(self, npatches, embed, freq=10000): #10000 is described in ViT paper + posembed = torch.zeros(npatches, embed) + for i in range(npatches): + for j in range(embed): + if j % 2 == 0: + posembed[i][j] = np.sin(i/(freq**(j/embed))) + else: + posembed[i][j] = np.cos(i/(freq**((j-1)/embed))) + return posembed + + def forward(self, imgs): #assume size checking done by createPatches + """Linear Projection and Positional Embedding""" + tokens = self.proj(imgs) #perform linear projection + clstoken = self.clstoken.repeat(imgs.shape[0], 1, 1) + tokens = torch.cat([clstoken, tokens], dim=1) #concat the class token + x = tokens + self.posembed.repeat(imgs.shape[0], 1, 1) #add positional encoding + """Transformer""" + x = self.transformer(x) + """Classification""" + y = x[:,0] + return self.classifier(y) \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/old/model2 output b/recognition/vision-transformer-4696689/old/model2 output new file mode 100644 index 000000000..0df3b0d5f --- /dev/null +++ b/recognition/vision-transformer-4696689/old/model2 output @@ -0,0 +1,6 @@ +cuda +training time: 27699.315416812897 +test acc: tensor(0.6800) +[0.72875, 0.70531, 0.66767, 0.61233, 0.53435, 0.49842, 0.43119, 0.45669, 0.38625, 0.35263, 0.36537, 0.32514, 0.26318, 0.2506, 0.24311, 0.18782, 0.17435, 0.13011, 0.14882, 0.17382, 0.10999, 0.13796, 0.07506, 0.06944, 0.06198, 0.03524, 0.07395, 0.09999, 0.04692, 0.03988, 0.0566, 0.02929, 0.01366, 0.01277, 0.01246, 0.01824, 0.04371, 0.0791, 0.04064, 0.04082, 0.01846, 0.00784, 0.00725, 0.00714, 0.0071, 0.00703, 0.00697, 0.00684, 0.00686, 0.00677, 0.00665, 0.00629, 0.00595, 0.01606, 0.11788, 0.21843, 0.02893, 0.01473, 0.04044, 0.02642, 0.02621, 0.00663, 0.00604, 0.00071, 0.00035, 0.00026, 0.00022, 0.0002, 0.00018, 0.00016, 0.00015, 0.00014, 0.00013, 0.00012, 0.00011, 0.0001, 0.0001, 9e-05, 8e-05, 8e-05, 7e-05, 7e-05, 7e-05, 6e-05, 6e-05, 6e-05, 5e-05, 5e-05, 5e-05, 5e-05, 4e-05, 4e-05, 4e-05, 4e-05, 4e-05, 4e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 3e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 2e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 1e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +[50.67, 51.11, 58.67, 63.11, 57.78, 62.67, 63.56, 66.22, 66.22, 67.11, 66.67, 65.78, 67.56, 65.33, 68.0, 68.44, 67.11, 64.89, 64.89, 67.56, 68.0, 69.33, 67.11, 67.56, 68.0, 67.56, 66.22, 71.11, 69.33, 67.11, 66.67, 69.78, 69.33, 69.78, 69.78, 68.0, 66.67, 68.89, 69.78, 69.78, 68.44, 67.56, 67.11, 67.56, 67.56, 67.56, 68.0, 68.0, 68.0, 68.0, 68.0, 67.56, 67.56, 68.0, 66.22, 70.67, 67.56, 66.67, 68.89, 65.33, 66.67, 70.22, 68.0, 69.78, 68.89, 68.0, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44, 68.44] +[147.563, 146.343, 144.501, 147.546, 144.388, 143.652, 146.672, 144.336, 145.402, 146.032, 144.47, 144.527, 145.94, 145.326, 144.034, 145.458, 146.047, 143.858, 146.212, 144.663, 144.781, 146.169, 143.851, 146.982, 143.694, 145.329, 145.16, 146.066, 144.08, 145.364, 145.876, 143.906, 145.965, 144.99, 144.381, 147.893, 146.199, 144.357, 145.847, 144.55, 144.047, 145.702, 144.852, 143.926, 145.867, 144.55, 144.213, 146.131, 144.313, 144.568, 145.913, 144.292, 147.893, 147.291, 148.067, 148.66, 149.459, 148.164, 148.963, 149.543, 144.27, 145.208, 145.364, 143.899, 146.17, 143.49, 146.005, 144.319, 144.524, 145.954, 143.908, 145.923, 149.609, 148.143, 149.126, 147.25, 143.868, 145.934, 144.889, 144.385, 146.232, 144.071, 145.286, 145.871, 143.787, 145.719, 148.777, 147.816, 149.28, 148.8, 148.009, 149.313, 149.438, 147.923, 148.943, 149.355, 148.399, 148.242, 149.209, 149.388, 148.377, 148.594, 149.603, 148.353, 148.588, 149.617, 148.425, 148.436, 149.528, 148.536, 148.31, 149.578, 148.509, 148.387, 149.569, 148.542, 148.188, 149.53, 148.641, 148.101, 149.468, 148.894, 148.149, 148.935, 149.422, 148.588, 148.187, 149.229, 149.147, 149.19, 148.44, 148.16, 149.419, 148.88, 148.568, 148.514, 148.583, 148.594, 148.789, 148.996, 149.07, 149.142, 148.768, 148.309, 148.454, 148.685, 149.076, 149.272, 148.759, 148.253, 148.44, 149.121, 149.245, 148.525, 148.261, 148.695, 149.247, 149.253, 148.579, 148.307, 149.357, 147.468, 148.775, 147.945, 149.511, 148.644, 148.232, 149.552, 148.53, 148.147, 149.467, 148.824, 148.064, 149.387, 149.3] \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/old/model3d.py b/recognition/vision-transformer-4696689/old/model3d.py new file mode 100644 index 000000000..d842d2e7d --- /dev/null +++ b/recognition/vision-transformer-4696689/old/model3d.py @@ -0,0 +1,114 @@ +""" +Imports Here +""" +import numpy as np +import torch +import torch.nn as nn + +class Attention(nn.Module): + def __init__(self, heads, embed): + super().__init__() + self.heads = heads + self.attn = nn.MultiheadAttention(embed, heads, batch_first=True) + self.Q = nn.Linear(embed, embed, bias=False) + self.K = nn.Linear(embed, embed, bias=False) + self.V = nn.Linear(embed, embed, bias=False) + + def forward(self, x): + Q = self.Q(x) + K = self.K(x) + V = self.V(x) + + attnout, attnweights = self.attn(Q, K, V) + return attnout + +class TransBlock(nn.Module): + def __init__(self, heads, embed, fflsize): + super().__init__() + self.fnorm = nn.LayerNorm(embed) + self.snorm = nn.LayerNorm(embed) + self.attn = Attention(heads, embed) + self.ffl = nn.Sequential( + nn.Linear(embed, fflsize), + nn.GELU(), + nn.Linear(fflsize, embed) + ) + + def forward(self, x): + """ + Switching to pre-MHA LayerNorm is supposed to give better performance, + this is used in other models such as LLMs like GPT. Gradients are meant + to be stabilised. This is different to the original ViT paper. + """ + x = x + self.attn(self.fnorm(x)) + x = x + self.ffl(self.snorm(x)) + return x +""" +Inception module for efficient 7x7 convolution +""" +class Inception(nn.Module): + def __init__(self, dimin, dimout): + super().__init__() + self.branch1 = nn.Sequential( + nn.Conv2d(dimin, dimout[0], 1, stride=(1,1)), + nn.Conv2d(dimout[0], dimout[0], 3, stride=(1,1), padding=1), + nn.Conv2d(dimout[0], dimout[0], 3, stride=(1,1), padding=1) + ) + self.branch2 = nn.Sequential( + nn.Conv2d(dimin, dimout[1], 1, stride=(1,1)), + nn.Conv2d(dimout[1], dimout[1], 3, stride=(1,1), padding=1) + ) + self.branch3 = nn.Sequential( + nn.AvgPool2d(3, stride=(1,1), padding=1), + nn.Conv2d(dimin, dimout[2], 1, stride=(1,1)) + ) + self.branch4 = nn.Sequential( + nn.Conv2d(dimin, dimout[3], 1, stride=(1,1)) + ) + self.maxpool = nn.MaxPool2d(2, 2) + def forward(self, imgs): + x1 = self.branch1(imgs) + x2 = self.branch2(imgs) + x3 = self.branch3(imgs) + x4 = self.branch4(imgs) + return self.maxpool(torch.cat([x1, x2, x3, x4], dim=1)) +""" +Vision Transformer Class to create a vision transformer model +""" +class VisionTransformer(nn.Module): + def __init__(self, classes=2, inputsize=(1,1,1), heads=2, embed=64, fflscale=2, nblocks=1): + super().__init__() + (self.N, self.Np, self.P) = inputsize + """components""" + self.proj = nn.Linear(self.P, embed) + self.clstoken = nn.Parameter(torch.zeros(1, 1, embed)) + self.posembed = self.embedding(self.Np+1, embed) + self.transformer = nn.Sequential( + *((TransBlock(heads, embed, int(fflscale*embed)),)*nblocks) + ) + self.classifier = nn.Sequential( + nn.LayerNorm(embed), + nn.Linear(embed, classes) + ) + + def embedding(self, npatches, embed, freq=10000): #10000 is described in ViT paper + posembed = torch.zeros(npatches, embed) + for i in range(npatches): + for j in range(embed): + if j % 2 == 0: + posembed[i][j] = np.sin(i/(freq**(j/embed))) + else: + posembed[i][j] = np.cos(i/(freq**((j-1)/embed))) + return posembed + + def forward(self, imgs): #assume size checking done by createPatches + """Linear Projection and Positional Embedding""" + tokens = self.proj(imgs) #perform linear projection + clstoken = self.clstoken.repeat(imgs.shape[0], 1, 1) + tokens = torch.cat([clstoken, tokens], dim=1) #concat the class token + x = tokens + self.posembed.repeat(imgs.shape[0], 1, 1) #add positional encoding + """Transformer""" + x = self.transformer(x) + """Classification""" + y = x[:,0] + return self.classifier(y) \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/old/output 2 epochs, lr=1e-4 b/recognition/vision-transformer-4696689/old/output 2 epochs, lr=1e-4 new file mode 100644 index 000000000..e69de29bb diff --git a/recognition/vision-transformer-4696689/old/train.ipynb b/recognition/vision-transformer-4696689/old/train.ipynb new file mode 100644 index 000000000..a6473b90c --- /dev/null +++ b/recognition/vision-transformer-4696689/old/train.ipynb @@ -0,0 +1,315 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 52, + "id": "73ebb771", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: 'dlopen(/Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so, 6): Library not loaded: @rpath/libpng16.16.dylib\n", + " Referenced from: /Users/oliver/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so\n", + " Reason: Incompatible library version: image.so requires version 56.0.0 or later, but libpng16.16.dylib provides version 54.0.0'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "Imports Here\n", + "\"\"\"\n", + "from dataset import trainloader\n", + "from dataset import testloader\n", + "from dataset import trainaccloader\n", + "from dataset import trainshape\n", + "from dataset import testshape" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "df0ea69a", + "metadata": {}, + "outputs": [], + "source": [ + "from model import VisionTransformer\n", + "from model import Attention\n", + "from model import TransBlock\n", + "from model3d import Inception" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "ae8aebe7", + "metadata": {}, + "outputs": [], + "source": [ + "TRAIN_LOSS = []\n", + "TRAIN_ACC = []\n", + "\n", + "def train(model, dataloader, accloader, lossfunc, optimiser, lr=0.1, momentum=0.9, batchsize=16, nepochs=10):\n", + " device = next(model.parameters()).device # check what device the net parameters are on\n", + " \n", + " \"\"\"training\"\"\"\n", + " for i in range(nepochs): # for each epoch\n", + " epoch_loss = 0\n", + " model.train()\n", + " n_batches = 0\n", + " time1 = time.time()\n", + " for (x, y) in dataloader: # for each mini-batch\n", + " optimiser.zero_grad(set_to_none=True)\n", + " loss = lossfunc(model.forward(x), y)\n", + " loss.backward()\n", + " optimiser.step()\n", + " epoch_loss += loss.detach().item()\n", + " n_batches += 1\n", + " time2 = time.time()\n", + " print(\"Done an epoch\", time2-time1)\n", + " epoch_loss /= n_batches\n", + " \n", + " \"\"\"evaluating\"\"\"\n", + " model.eval()\n", + " accuracy = test(model, accloader).detach().item()\n", + "\n", + " \"\"\"get performance\"\"\"\n", + " TRAIN_LOSS.append(epoch_loss)\n", + " TRAIN_ACC.append(accuracy)\n", + "\n", + "def test(model, dataloader):\n", + " with torch.no_grad(): # disable automatic gradient computation for efficiency\n", + " device = next(model.parameters()).device\n", + " \n", + " \"\"\"make predictions\"\"\"\n", + " pcls = []\n", + " items = 0\n", + " time1=time.time()\n", + " for x, y in dataloader:\n", + " x = x.to(device)\n", + " pcls.append(abs(y.cpu()-torch.max(model(x), 1)[1].cpu()))\n", + " items += 1\n", + " time2 = time.time()\n", + " print(\"found accuracy in:\", time2-time1)\n", + "\n", + " \"\"\"get accuracy\"\"\"\n", + " pcls = torch.cat(pcls) # concat predictions on the mini-batches\n", + " accuracy = 1 - (pcls.sum().float() / items)\n", + " print(\"accuracy:\", accuracy)\n", + " return accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "26cde279", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([16, 1, 20, 240, 256])\n" + ] + } + ], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "rand = torch.rand((16, 1, 20, 240, 256))\n", + "print(rand.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "f6d1dc89", + "metadata": {}, + "outputs": [], + "source": [ + "class ConvLayer(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.conv11 = nn.Conv3d(1, 64, kernel_size=(3,11,11), stride=(1,4,4), padding=(1,0,0))\n", + " self.firstpool = nn.MaxPool3d(kernel_size=3, stride=2)\n", + " self.conv5 = nn.Conv3d(64, 256, kernel_size=(3,5,5), stride=(1,2,2), padding=(1,0,0))\n", + " self.secondpool = nn.MaxPool3d(kernel_size=3, stride=2)\n", + " \n", + " def forward(self, imgs):\n", + " x = self.conv11(imgs)\n", + " x = self.firstpool(x)\n", + " x = self.conv5(x)\n", + " x = self.secondpool(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "e1bd3d40", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time: 0.8001\n", + "torch.Size([16, 256, 4, 5, 6])\n" + ] + } + ], + "source": [ + "import time\n", + "conv = ConvLayer()\n", + "start = time.time()\n", + "out = conv(rand)\n", + "end = time.time()\n", + "print(\"time:\", round(end-start,4))\n", + "print(out.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75a45973", + "metadata": {}, + "outputs": [], + "source": [ + "batchsize=16\n", + "N, Np, P = trainshape()\n", + "model = VisionTransformer(inputsize=(batchsize, Np, P), embed=128, fflscale=2, nblocks=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "7b54a6f0", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimiser = optim.AdamW(model.parameters(), lr=1e-4)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "18488555", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done an epoch 346.20038080215454\n", + "found accuracy in: 135.9069368839264\n", + "accuracy: tensor(0.5288)\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [43]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m start \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[0;32m----> 2\u001b[0m \u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrainloader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatchsize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatchsize\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrainaccloader\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcriterion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptimiser\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m end \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtraining time: \u001b[39m\u001b[38;5;124m\"\u001b[39m, end\u001b[38;5;241m-\u001b[39mstart)\n", + "Input \u001b[0;32mIn [40]\u001b[0m, in \u001b[0;36mtrain\u001b[0;34m(model, dataloader, accloader, lossfunc, optimiser, lr, momentum, batchsize, nepochs)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m (x, y) \u001b[38;5;129;01min\u001b[39;00m dataloader: \u001b[38;5;66;03m# for each mini-batch\u001b[39;00m\n\u001b[1;32m 14\u001b[0m optimiser\u001b[38;5;241m.\u001b[39mzero_grad(set_to_none\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 15\u001b[0m loss \u001b[38;5;241m=\u001b[39m lossfunc(\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m, y)\n\u001b[1;32m 16\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[1;32m 17\u001b[0m optimiser\u001b[38;5;241m.\u001b[39mstep()\n", + "File \u001b[0;32m~/Desktop/COMP3710 Project/PatternAnalysis-2023/recognition/vision-transformer-4696689/model.py:84\u001b[0m, in \u001b[0;36mVisionTransformer.forward\u001b[0;34m(self, imgs)\u001b[0m\n\u001b[1;32m 82\u001b[0m x \u001b[38;5;241m=\u001b[39m tokens \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mposembed\u001b[38;5;241m.\u001b[39mrepeat(imgs\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m) \u001b[38;5;66;03m#add positional encoding\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;124;03m\"\"\"Transformer\"\"\"\u001b[39;00m\n\u001b[0;32m---> 84\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransformer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124;03m\"\"\"Classification\"\"\"\u001b[39;00m\n\u001b[1;32m 86\u001b[0m y \u001b[38;5;241m=\u001b[39m x[:,\u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/container.py:217\u001b[0m, in \u001b[0;36mSequential.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m):\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28minput\u001b[39m\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/Desktop/COMP3710 Project/PatternAnalysis-2023/recognition/vision-transformer-4696689/model.py:43\u001b[0m, in \u001b[0;36mTransBlock.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 39\u001b[0m \u001b[38;5;124;03m Switching to pre-MHA LayerNorm is supposed to give better performance,\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;124;03m this is used in other models such as LLMs like GPT. Gradients are meant\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;124;03m to be stabilised. This is different to the original ViT paper.\u001b[39;00m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 43\u001b[0m x \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m+\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfnorm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 44\u001b[0m x \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mffl(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msnorm(x))\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/Desktop/COMP3710 Project/PatternAnalysis-2023/recognition/vision-transformer-4696689/model.py:22\u001b[0m, in \u001b[0;36mAttention.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 19\u001b[0m K \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mK(x)\n\u001b[1;32m 20\u001b[0m V \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mV(x)\n\u001b[0;32m---> 22\u001b[0m attnout, attnweights \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mQ\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mK\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mV\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m attnout\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/activation.py:1189\u001b[0m, in \u001b[0;36mMultiheadAttention.forward\u001b[0;34m(self, query, key, value, key_padding_mask, need_weights, attn_mask, average_attn_weights, is_causal)\u001b[0m\n\u001b[1;32m 1175\u001b[0m attn_output, attn_output_weights \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mmulti_head_attention_forward(\n\u001b[1;32m 1176\u001b[0m query, key, value, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membed_dim, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_heads,\n\u001b[1;32m 1177\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39min_proj_weight, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39min_proj_bias,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1186\u001b[0m average_attn_weights\u001b[38;5;241m=\u001b[39maverage_attn_weights,\n\u001b[1;32m 1187\u001b[0m is_causal\u001b[38;5;241m=\u001b[39mis_causal)\n\u001b[1;32m 1188\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1189\u001b[0m attn_output, attn_output_weights \u001b[38;5;241m=\u001b[39m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmulti_head_attention_forward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1190\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membed_dim\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_heads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1191\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43min_proj_weight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43min_proj_bias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1192\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias_k\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias_v\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_zero_attn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1193\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mout_proj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mout_proj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1194\u001b[0m \u001b[43m \u001b[49m\u001b[43mtraining\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1195\u001b[0m \u001b[43m \u001b[49m\u001b[43mkey_padding_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey_padding_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1196\u001b[0m \u001b[43m \u001b[49m\u001b[43mneed_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mneed_weights\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1197\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattn_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1198\u001b[0m \u001b[43m \u001b[49m\u001b[43maverage_attn_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maverage_attn_weights\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1199\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_causal\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_causal\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbatch_first \u001b[38;5;129;01mand\u001b[39;00m is_batched:\n\u001b[1;32m 1201\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m attn_output\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m0\u001b[39m), attn_output_weights\n", + "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/functional.py:5313\u001b[0m, in \u001b[0;36mmulti_head_attention_forward\u001b[0;34m(query, key, value, embed_dim_to_check, num_heads, in_proj_weight, in_proj_bias, bias_k, bias_v, add_zero_attn, dropout_p, out_proj_weight, out_proj_bias, training, key_padding_mask, need_weights, attn_mask, use_separate_proj_weight, q_proj_weight, k_proj_weight, v_proj_weight, static_k, static_v, average_attn_weights, is_causal)\u001b[0m\n\u001b[1;32m 5311\u001b[0m attn_output_weights \u001b[38;5;241m=\u001b[39m attn_output_weights\u001b[38;5;241m.\u001b[39mview(bsz, num_heads, tgt_len, src_len)\n\u001b[1;32m 5312\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m average_attn_weights:\n\u001b[0;32m-> 5313\u001b[0m attn_output_weights \u001b[38;5;241m=\u001b[39m \u001b[43mattn_output_weights\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmean\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5315\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_batched:\n\u001b[1;32m 5316\u001b[0m \u001b[38;5;66;03m# squeeze the output if input was unbatched\u001b[39;00m\n\u001b[1;32m 5317\u001b[0m attn_output \u001b[38;5;241m=\u001b[39m attn_output\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m1\u001b[39m)\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "start = time.time()\n", + "train(model, trainloader(batchsize=batchsize), trainaccloader(), criterion, optimiser, nepochs=1)\n", + "end = time.time()\n", + "print(\"training time: \", end-start)\n", + "test(model, testloader())" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "bbaac2fc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n", + "[]\n" + ] + } + ], + "source": [ + "print(TRAIN_LOSS)\n", + "print(TRAIN_ACC)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "94178617", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "280706\n" + ] + } + ], + "source": [ + "print(sum(p.numel() for p in model.parameters()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ccfcbae", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/recognition/vision-transformer-4696689/predict.py b/recognition/vision-transformer-4696689/predict.py new file mode 100644 index 000000000..895f9d74e --- /dev/null +++ b/recognition/vision-transformer-4696689/predict.py @@ -0,0 +1,56 @@ +""" +Imports +""" +import torch +from train import test +from dataset import trainloader, valloader, testloader +from numpy import loadtxt +import matplotlib.pyplot as plt + +model = torch.jit.load('model_trained.pt') +model.eval() + +#try load and plot loss curve +try: + loss = loadtxt('loss.txt') + steps = len(loss) + plt.plot(steps, LOSS) + plt.ylabel('LOSS') + plt.xlabel('epoch') + plt.title('Training Loss') + plt.show() +except: + print("No training loss!") + +#try load and plot accuracy curve +try: + loss = loadtxt('acc.txt') + steps = len(loss) + plt.plot(steps, LOSS) + plt.ylabel('ACCURACY') + plt.xlabel('epoch') + plt.title('Validation Accuracy') + plt.show() +except: + print("No accuracy!") + +#try load and plot train accuracy curve +try: + loss = loadtxt('train.txt') + steps = len(loss) + plt.plot(steps, LOSS) + plt.ylabel('ACCURACY') + plt.xlabel('epoch') + plt.title('Training Accuracy') + plt.show() +except: + print("No training accuracy") + +"""train models on datasets""" +# train_acc = test(model, trainloader) #test on train set +# val_acc = test(model, valloader) #test on validation set +# test_acc = test(model, testloader) #test on test set + +# print("accuracy on training set:", train_acc) +# print("accuracy on validation set:", val_acc) +# print("accuracy on test set:", test_acc) \ No newline at end of file diff --git a/recognition/vision-transformer-4696689/train.py b/recognition/vision-transformer-4696689/train.py new file mode 100644 index 000000000..ee98fdd6c --- /dev/null +++ b/recognition/vision-transformer-4696689/train.py @@ -0,0 +1,96 @@ +""" +Imports Here +""" +from dataset import trainloader +from dataset import testloader +from dataset import valloader +from dataset import trainshape +from dataset import testshape + +from modules import VisionTransformer +from modules import Attention +from modules import TransBlock +from modules import ConvLayer + +import time +import torch +import torch.nn as nn +import torch.optim as optim + +from numpy import savetxt + +"""for results""" +TRAIN_LOSS = [] +TRAIN_ACC = [] +TRAIN_TIMES = [] + +""" +function to train the model +""" +def train(model, dataloader, accloader, lossfunc, optimiser, nepochs=10): + """training""" + for i in range(nepochs): # for each epoch + epoch_loss = 0 + model.train() + n_batches = 0 + time1 = time.time() + for (x, y) in dataloader: # for each mini-batch + optimiser.zero_grad(set_to_none=True) + loss = lossfunc(model.forward(x), y) + loss.backward() + optimiser.step() + epoch_loss += loss.detach().item() + n_batches += 1 + time2 = time.time() + TRAIN_TIMES.append(round(time2-time1,3)) + epoch_loss /= n_batches + + """evaluating""" + model.eval() + accuracy = test(model, accloader).detach().item() + + """get performance""" + TRAIN_LOSS.append(round(epoch_loss,5)) + TRAIN_ACC.append(round(accuracy*100,2)) + + +""" +function to test the model +""" +def test(model, dataloader): + with torch.no_grad(): # disable automatic gradient computation for efficiency + """make predictions""" + pcls = [] + items = 0 + for x, y in dataloader: + pcls.append(abs(y.cpu()-torch.max(model(x), 1)[1].cpu())) + items += 1 + + """get accuracy""" + pcls = torch.cat(pcls) # concat predictions on the mini-batches + accuracy = 1 - (pcls.sum().float() / items) + return accuracy + +"""model training""" +batchsize=16 +N, Np, L, W, H = trainshape() +model = VisionTransformer(inputsize=(batchsize, 192, 120), heads=4, embed=360, fflscale=2, nblocks=4) +criterion = nn.CrossEntropyLoss() +optimiser = optim.AdamW(model.parameters(), lr=3e-4) +start = time.time() +train(model, trainloader(batchsize=batchsize), valloader(), criterion, optimiser, nepochs=100) +end = time.time() +print("training time: ", end-start) +print("test acc: ", test(model, testloader())) +print(TRAIN_LOSS) +print(TRAIN_ACC) +print(TRAIN_TIMES) +test(model, testloader()) +print(TRAIN_LOSS) +print(TRAIN_ACC) + +"""saving model""" +# model_trained = torch.jit.script(model) +# model_trained.save('model_trained.pt') +savetxt('loss.txt', TRAIN_LOSS) +savetxt('acc.txt', TRAIN_ACC) \ No newline at end of file