diff --git a/code/Datafusion1.0.ipynb b/code/Datafusion1.0.ipynb
new file mode 100644
index 0000000..8485feb
--- /dev/null
+++ b/code/Datafusion1.0.ipynb
@@ -0,0 +1,559 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Funktionen comparee_all_coefficients(smiles), tar fram en matris \n",
+    "# som visar nivån av likhet mellan alla molekyler två och två med datafused\n",
+    "# likhetskoefficienter.\n",
+    "# Anledningen till varför hälften av matrisen har NaN värden är för att annars blir det\n",
+    "# en upprepniing av alla värden. \n",
+    "\n",
+    "# Funktionen compare_all_coefiicients(smiles)\n",
+    "# 1. Skriver ut värdena för 4 olika likhetsmått. En datafusion görs som \n",
+    "# skrivs ut. \n",
+    "\n",
+    "# 2. Kollar om de olika likhetskoefficienterna kommer fram till \n",
+    "# samma molekyl som är mest lik referensmolekylen. Om de inte gör det \n",
+    "# skrivs ett meddelande ut som säger vilka likhetskoefficienter som \n",
+    "# inte överenstämmer.\n",
+    "\n",
+    "# 3.Radar upp alla molekyler och motsvarande molekyl som är mest lik."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem\n",
+    "from rdkit import DataStructs\n",
+    "from rdkit.Chem import AllChem, MACCSkeys\n",
+    "from rdkit.Chem.Fingerprints import FingerprintMols\n",
+    "#Urpsungliga fps, ta inte hänsyn till om refernsen är i smiles strängen. \n",
+    "descriptors = {\n",
+    "    'maccs':       lambda m: MACCSkeys.GenMACCSKeys(m),\n",
+    "    'morgan3':     lambda m: AllChem.GetMorganFingerprintAsBitVect(m,3),\n",
+    "    'morgan5':     lambda m: AllChem.GetMorganFingerprintAsBitVect(m,5),\n",
+    "    'rdkit':       lambda m: FingerprintMols.FingerprintMol(m)\n",
+    "}\n",
+    "\n",
+    "metrics = {\n",
+    "    'asymmetric':    DataStructs.AsymmetricSimilarity,\n",
+    "    'braunblanquet': DataStructs.BulkBraunBlanquetSimilarity,\n",
+    "    'cosine':        DataStructs.BulkCosineSimilarity,\n",
+    "    'dice':          DataStructs.BulkDiceSimilarity,\n",
+    "    'kulczynski':    DataStructs.BulkKulczynskiSimilarity,\n",
+    "    'mcconnaughey':  DataStructs.BulkMcConnaugheySimilarity,\n",
+    "    'rogotgoldberg': DataStructs.BulkRogotGoldbergSimilarity,\n",
+    "    'russel':        DataStructs.BulkRusselSimilarity,\n",
+    "    'sokal':         DataStructs.BulkSokalSimilarity,\n",
+    "    'tanimoto':      DataStructs.BulkTanimotoSimilarity\n",
+    "}\n",
+    "\n",
+    "\"\"\"\n",
+    "Returns a list of similarity scores for a list of smiles strings compared to a\n",
+    "reference compound. The fingerprints and similarity coefficients can be chosen\n",
+    "from the list of descriptors and metrics (default 'rdkit' and 'tanimoto').\n",
+    "\"\"\"\n",
+    "def fpss_sim(ref, smiles, descriptor='rdkit', metric='tanimoto'):\n",
+    "    # ref - reference smiles\n",
+    "    # smiles - list of smiles to compare with\n",
+    "    # descriptor - fingerprint type from \"descriptors\", default 'rdkit'\n",
+    "    # similarity score from \"metrics\", default 'tanimoto'\n",
+    "\n",
+    "    if descriptor not in descriptors:\n",
+    "        raise ValueError('Invalid descriptor name ' + descriptor)\n",
+    "\n",
+    "    if metric not in metrics:\n",
+    "        raise ValueError('Invalid metric ' + metric)\n",
+    "        \n",
+    "    \n",
+    "    ref_ms = Chem.MolFromSmiles(ref)\n",
+    "    ms=[]\n",
+    "    fps=[]\n",
+    "    for x in smiles: \n",
+    "        \n",
+    "            lo=Chem.MolFromSmiles(x)\n",
+    "            ms.append(lo)\n",
+    "            fingerprint = descriptors[descriptor]\n",
+    "            ref_fps = fingerprint(ref_ms)\n",
+    "            fps.append(fingerprint(lo))\n",
+    "            score = metrics[metric] \n",
+    "            \n",
+    "\n",
+    "   \n",
+    "    return score(ref_fps, fps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from sklearn.feature_extraction import DictVectorizer\n",
+    "import pandas as pd\n",
+    "from copy import copy\n",
+    "#itertols.combination\n",
+    "#Får ut en matris av alla Koefficienter i smiles. \n",
+    "def comparee_all_coefficients(smiles): \n",
+    " \n",
+    "    datafusion1=[]\n",
+    "\n",
+    "    scores=[\"Tan\",\"Cos\",\"Dice\",\"Sokal\"]\n",
+    "    scores.sort()\n",
+    "\n",
+    "    count=0\n",
+    "    ref1=[]\n",
+    "\n",
+    "    while count < len(smiles):\n",
+    "\n",
+    "        ref=smiles[count]\n",
+    "        \n",
+    "    \n",
+    "        tan=fpss_sim(ref, smiles, descriptor='rdkit', metric='tanimoto')\n",
+    "        dice=fpss_sim(ref, smiles, descriptor='rdkit', metric='dice')\n",
+    "        cos=fpss_sim(ref, smiles, descriptor='rdkit', metric='cosine')\n",
+    "        sokal=fpss_sim(ref, smiles, descriptor='rdkit', metric='sokal')\n",
+    "        \n",
+    "\n",
+    "        df = pd.DataFrame({'Cos':cos,'Dice':dice, 'Sokal': sokal, 'Tan':tan}, index=smiles)\n",
+    "        covariance = df.cov()\n",
+    "        #print(df)\n",
+    "\n",
+    "        #Får ut datafusion i vektor  \n",
+    "        datafusion=[]\n",
+    "        col_list= list(df)\n",
+    "        data=(df.loc[:,col_list].sum(axis=1).values)/4\n",
+    "        for i in data:\n",
+    "             datafusion.append(i)\n",
+    "             \n",
+    "        datafusion1.append(datafusion)\n",
+    "        \n",
+    "     \n",
+    "    \n",
+    "        count+=1\n",
+    "        ref1.append(ref)\n",
+    "    df1 = pd.DataFrame(datafusion1,columns=smiles,index=ref1,dtype=float)\n",
+    "    ble=df1.values\n",
+    "    \n",
+    "    n=0\n",
+    "    k=0\n",
+    "    while n<len(smiles):\n",
+    "      while k<len(smiles):\n",
+    "           ble[k][n] = np.nan\n",
+    "           k+=1\n",
+    "      n+=1\n",
+    "      k=n\n",
+    "    \n",
+    "    df2 = pd.DataFrame(ble,columns=smiles,index=ref1,dtype=float)\n",
+    "    print(df2.sort_values('C1CCCCC1', ascending=False))\n",
+    "    \n",
+    "    \n",
+    "   \n",
+    "\n",
+    "        \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "           CC(=O)OC  CC(=O)OO  CC(=O)OCC     O=C=O  CCN(CC)CC  C1CCCCC1\n",
+      "CC(=O)OO        NaN       NaN   0.483232  0.079922   0.220703  0.200719\n",
+      "CC(=O)OC        NaN  0.523807   0.785214  0.144338   0.244964  0.181995\n",
+      "CC(=O)OCC       NaN       NaN        NaN  0.180797   0.246912  0.154282\n",
+      "CCN(CC)CC       NaN       NaN        NaN       NaN        NaN  0.153655\n",
+      "O=C=O           NaN       NaN        NaN       NaN   0.000000  0.000000\n",
+      "C1CCCCC1        NaN       NaN        NaN       NaN        NaN       NaN\n"
+     ]
+    }
+   ],
+   "source": [
+    "comparee_all_coefficients([\"CC(=O)OC\",\"CC(=O)OO\",\"CC(=O)OCC\", \"O=C=O\", \"CCN(CC)CC\", \"C1CCCCC1\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from sklearn.feature_extraction import DictVectorizer\n",
+    "import pandas as pd\n",
+    "from copy import copy\n",
+    "\n",
+    "# 1. Skriver ut värdena för 4 olika likhetsmått. En datafusion görs som \n",
+    "# skrivs ut. \n",
+    "\n",
+    "# 2. Kollar om de olika likhetskoefficienterna kommer fram till \n",
+    "# samma molekyl som är mest lik referensmolekylen. Om de inte gör det \n",
+    "# skrivs ett meddelande ut som säger vilka likhetskoefficienter som \n",
+    "# inte överenstämmer.\n",
+    "\n",
+    "# 3.Radar upp alla molekyler och motsvarande molekyl som är mest lik.\n",
+    "def compare_all_coefficients(smiles): \n",
+    " \n",
+    "    TotalSumDF=[]\n",
+    "    MaxValueDF=[]\n",
+    "    MaxMolecule=[]\n",
+    "\n",
+    "     \n",
+    " \n",
+    "    scoras=[\"Tan\",\"Cos\",\"Dice\",\"Sokal\"]\n",
+    "    scoras.sort()\n",
+    "    count=0\n",
+    "    ref1=[]\n",
+    "\n",
+    "    #Går igenom alla substrat. Tar ett element i vektorn och jämför med alla andra substrat. \n",
+    "    while count < len(smiles):\n",
+    "        scores=copy(scoras)\n",
+    "    \n",
+    "        datafusion=[]\n",
+    "        #referens molekylen som ska jämföras med alla andra\n",
+    "        ref=smiles[count]\n",
+    "        print(\"REFERENSEN ÄR \" + ref)\n",
+    "        smales= copy(smiles)\n",
+    "        \n",
+    "        #fps_sim tar fram likhetsmåtten för alla smiles. Koden är justerad så ref tar inte hänsyn till sig själv i smiles-vektorn. \n",
+    "        tan=fps_sim(ref, smales, descriptor='rdkit', metric='tanimoto')\n",
+    "        dice=fps_sim(ref, smales, descriptor='rdkit', metric='dice')\n",
+    "        cos=fps_sim(ref, smales, descriptor='rdkit', metric='cosine')\n",
+    "        sokal=fps_sim(ref, smales, descriptor='rdkit', metric='sokal')\n",
+    "        #mcconnaughey=fps_sim(ref, smales, descriptor='rdkit', metric='mcconnaughey')\n",
+    "\n",
+    "        \n",
+    "        #skapar en dataframe med alla likhetsmått som rad och alla smiles förutom refernsmolekylen\n",
+    "        df = pd.DataFrame({'Cos':cos,'Dice':dice, 'Sokal': sokal, 'Tan':tan}, index=smales)\n",
+    "        covariance = df.cov()\n",
+    "        print(df)\n",
+    "        print(\" \")\n",
+    "        \n",
+    "        #Får ut datafusion i vektor  \n",
+    "        ref1.append(ref)\n",
+    "        \n",
+    "        #summerar över vaje rad och skriver ut datafused coeffcients\n",
+    "        print(\"Datafused coefficients:\")\n",
+    "        print((df.sum(axis=1))/4)\n",
+    "        print(\" \")\n",
+    "        df3=(df.sum(axis=1))/4\n",
+    "        MaxMolecule.append(df3.idxmax(0))\n",
+    "        \n",
+    "        \n",
+    "        #lagrar summan i vektorn \"Data\"\n",
+    "        col_list=list(df)\n",
+    "        data=(df.loc[:,col_list].sum(axis=1).values)/4\n",
+    "        arr=np.array(data)\n",
+    "        MaxValueDF.append(np.amax(arr))\n",
+    "        \n",
+    "      \n",
+    "        \n",
+    "        for i in data:\n",
+    "             datafusion.append(i)\n",
+    "        SumDF=sum(datafusion)\n",
+    "        TotalSumDF.append(SumDF/len(smales))\n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "\n",
+    "     #Prints the molecule with the highest similarity score  + the coefficient itself+ highest similarity score for each coefficient\n",
+    "        g=0\n",
+    "        #print(\"Molecule with highest similarity score with corresponding score:\")\n",
+    "        for i in scoras:\n",
+    "          #print ('\\033[91m' + df[[i]].idxmax()+'\\033[0m' )\n",
+    "          #print ('\\033[1m' +\" - Max value: \" + str(df.max(axis=0)[g])+ '\\033[0m')\n",
+    "          g+=1 \n",
+    "          print(\" \")\n",
+    "\n",
+    " # check if the coefficients are the same. \n",
+    "        for i in scores: \n",
+    "            blu= (df.index[df[i] == df[i].max()][0])\n",
+    "            for b in scores:\n",
+    "                bla= (df.index[df[b] == df[b].max()][0])\n",
+    "                if bla!=blu:\n",
+    "                    print(\"The similarity coefficients doesn't agree on which molecule is most similar to the query\")\n",
+    "                    print(b+ \" and \"+ i+ \" shows different molecules\" )\n",
+    "        \n",
+    "            scores.remove(i)   \n",
+    "        \n",
+    "    \n",
+    "        count+=1\n",
+    "        \n",
+    "       \n",
+    "    df2 = pd.DataFrame(TotalSumDF,index=ref1,dtype=float)\n",
+    "    arrays = [ref1, MaxMolecule]\n",
+    "    index=pd.MultiIndex.from_arrays(arrays, names=('Referens', 'Molekyl mest lik referensen'))\n",
+    "    df4 = pd.DataFrame(MaxValueDF,index,dtype=float)\n",
+    "    print(df4)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "REFERENSEN ÄR CC(=O)OC\n",
+      "                Cos      Dice     Sokal       Tan\n",
+      "CC(=O)OO   0.650791  0.648649  0.315789  0.480000\n",
+      "CC(=O)OCC  0.877058  0.869565  0.625000  0.769231\n",
+      "O=C=O      0.258199  0.173913  0.050000  0.095238\n",
+      "CCN(CC)CC  0.335410  0.333333  0.111111  0.200000\n",
+      "C1CCCCC1   0.258199  0.250000  0.076923  0.142857\n",
+      " \n",
+      "Datafused coefficients:\n",
+      "CC(=O)OO     0.523807\n",
+      "CC(=O)OCC    0.785214\n",
+      "O=C=O        0.144338\n",
+      "CCN(CC)CC    0.244964\n",
+      "C1CCCCC1     0.181995\n",
+      "dtype: float64\n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      "REFERENSEN ÄR CC(=O)OO\n",
+      "                Cos      Dice     Sokal       Tan\n",
+      "CC(=O)OC   0.650791  0.648649  0.315789  0.480000\n",
+      "CC(=O)OCC  0.618347  0.604651  0.276596  0.433333\n",
+      "O=C=O      0.140028  0.100000  0.027027  0.052632\n",
+      "CCN(CC)CC  0.303170  0.303030  0.098039  0.178571\n",
+      "C1CCCCC1   0.280056  0.275862  0.086957  0.160000\n",
+      " \n",
+      "Datafused coefficients:\n",
+      "CC(=O)OC     0.523807\n",
+      "CC(=O)OCC    0.483232\n",
+      "O=C=O        0.079922\n",
+      "CCN(CC)CC    0.220703\n",
+      "C1CCCCC1     0.200719\n",
+      "dtype: float64\n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      "REFERENSEN ÄR CC(=O)OCC\n",
+      "                Cos      Dice     Sokal       Tan\n",
+      "CC(=O)OC   0.877058  0.869565  0.625000  0.769231\n",
+      "CC(=O)OO   0.618347  0.604651  0.276596  0.433333\n",
+      "O=C=O      0.339683  0.206897  0.061224  0.115385\n",
+      "CCN(CC)CC  0.343203  0.333333  0.111111  0.200000\n",
+      "C1CCCCC1   0.226455  0.210526  0.062500  0.117647\n",
+      " \n",
+      "Datafused coefficients:\n",
+      "CC(=O)OC     0.785214\n",
+      "CC(=O)OO     0.483232\n",
+      "O=C=O        0.180797\n",
+      "CCN(CC)CC    0.246912\n",
+      "C1CCCCC1     0.154282\n",
+      "dtype: float64\n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      "REFERENSEN ÄR O=C=O\n",
+      "                Cos      Dice     Sokal       Tan\n",
+      "CC(=O)OC   0.258199  0.173913  0.050000  0.095238\n",
+      "CC(=O)OO   0.140028  0.100000  0.027027  0.052632\n",
+      "CC(=O)OCC  0.339683  0.206897  0.061224  0.115385\n",
+      "CCN(CC)CC  0.000000  0.000000  0.000000  0.000000\n",
+      "C1CCCCC1   0.000000  0.000000  0.000000  0.000000\n",
+      " \n",
+      "Datafused coefficients:\n",
+      "CC(=O)OC     0.144338\n",
+      "CC(=O)OO     0.079922\n",
+      "CC(=O)OCC    0.180797\n",
+      "CCN(CC)CC    0.000000\n",
+      "C1CCCCC1     0.000000\n",
+      "dtype: float64\n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      "REFERENSEN ÄR CCN(CC)CC\n",
+      "                Cos      Dice     Sokal       Tan\n",
+      "CC(=O)OC   0.335410  0.333333  0.111111  0.200000\n",
+      "CC(=O)OO   0.303170  0.303030  0.098039  0.178571\n",
+      "CC(=O)OCC  0.343203  0.333333  0.111111  0.200000\n",
+      "O=C=O      0.000000  0.000000  0.000000  0.000000\n",
+      "C1CCCCC1   0.216506  0.214286  0.063830  0.120000\n",
+      " \n",
+      "Datafused coefficients:\n",
+      "CC(=O)OC     0.244964\n",
+      "CC(=O)OO     0.220703\n",
+      "CC(=O)OCC    0.246912\n",
+      "O=C=O        0.000000\n",
+      "C1CCCCC1     0.153655\n",
+      "dtype: float64\n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      "The similarity coefficients doesn't agree on which molecule is most similar to the query\n",
+      "Dice and Cos shows different molecules\n",
+      "The similarity coefficients doesn't agree on which molecule is most similar to the query\n",
+      "Sokal and Cos shows different molecules\n",
+      "The similarity coefficients doesn't agree on which molecule is most similar to the query\n",
+      "Tan and Cos shows different molecules\n",
+      "REFERENSEN ÄR C1CCCCC1\n",
+      "                Cos      Dice     Sokal       Tan\n",
+      "CC(=O)OC   0.258199  0.250000  0.076923  0.142857\n",
+      "CC(=O)OO   0.280056  0.275862  0.086957  0.160000\n",
+      "CC(=O)OCC  0.226455  0.210526  0.062500  0.117647\n",
+      "O=C=O      0.000000  0.000000  0.000000  0.000000\n",
+      "CCN(CC)CC  0.216506  0.214286  0.063830  0.120000\n",
+      " \n",
+      "Datafused coefficients:\n",
+      "CC(=O)OC     0.181995\n",
+      "CC(=O)OO     0.200719\n",
+      "CC(=O)OCC    0.154282\n",
+      "O=C=O        0.000000\n",
+      "CCN(CC)CC    0.153655\n",
+      "dtype: float64\n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      " \n",
+      "                                                0\n",
+      "Referens  Molekyl mest\\n lik referensen          \n",
+      "CC(=O)OC  CC(=O)OCC                      0.785214\n",
+      "CC(=O)OO  CC(=O)OC                       0.523807\n",
+      "CC(=O)OCC CC(=O)OC                       0.785214\n",
+      "O=C=O     CC(=O)OCC                      0.180797\n",
+      "CCN(CC)CC CC(=O)OCC                      0.246912\n",
+      "C1CCCCC1  CC(=O)OO                       0.200719\n"
+     ]
+    }
+   ],
+   "source": [
+    "compare_all_coefficients([\"CC(=O)OC\",\"CC(=O)OO\",\"CC(=O)OCC\", \"O=C=O\", \"CCN(CC)CC\", \"C1CCCCC1\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem\n",
+    "from rdkit import DataStructs\n",
+    "from rdkit.Chem import AllChem, MACCSkeys\n",
+    "from rdkit.Chem.Fingerprints import FingerprintMols\n",
+    "\n",
+    "descriptors = {\n",
+    "    'maccs':       lambda m: MACCSkeys.GenMACCSKeys(m),\n",
+    "    'morgan3':     lambda m: AllChem.GetMorganFingerprintAsBitVect(m,3),\n",
+    "    'morgan5':     lambda m: AllChem.GetMorganFingerprintAsBitVect(m,5),\n",
+    "    'rdkit':       lambda m: FingerprintMols.FingerprintMol(m)\n",
+    "}\n",
+    "\n",
+    "metrics = {\n",
+    "    'asymmetric':    DataStructs.AsymmetricSimilarity,\n",
+    "    'braunblanquet': DataStructs.BulkBraunBlanquetSimilarity,\n",
+    "    'cosine':        DataStructs.BulkCosineSimilarity,\n",
+    "    'dice':          DataStructs.BulkDiceSimilarity,\n",
+    "    'kulczynski':    DataStructs.BulkKulczynskiSimilarity,\n",
+    "    'mcconnaughey':  DataStructs.BulkMcConnaugheySimilarity,\n",
+    "    'rogotgoldberg': DataStructs.BulkRogotGoldbergSimilarity,\n",
+    "    'russel':        DataStructs.BulkRusselSimilarity,\n",
+    "    'sokal':         DataStructs.BulkSokalSimilarity,\n",
+    "    'tanimoto':      DataStructs.BulkTanimotoSimilarity\n",
+    "}\n",
+    "\n",
+    "\"\"\"\n",
+    "Returns a list of similarity scores for a list of smiles strings compared to a\n",
+    "reference compound. The fingerprints and similarity coefficients can be chosen\n",
+    "from the list of descriptors and metrics (default 'rdkit' and 'tanimoto').\n",
+    "\"\"\"\n",
+    "def fps_sim(ref, smiles, descriptor='rdkit', metric='tanimoto'):\n",
+    "    # ref - reference smiles\n",
+    "    # smiles - list of smiles to compare with\n",
+    "    # descriptor - fingerprint type from \"descriptors\", default 'rdkit'\n",
+    "    # similarity score from \"metrics\", default 'tanimoto'\n",
+    "\n",
+    "    if descriptor not in descriptors:\n",
+    "        raise ValueError('Invalid descriptor name ' + descriptor)\n",
+    "\n",
+    "    if metric not in metrics:\n",
+    "        raise ValueError('Invalid metric ' + metric)\n",
+    "        \n",
+    "    #compare ref with smiles\n",
+    "    #mols\n",
+    "    ref_ms = Chem.MolFromSmiles(ref)\n",
+    "    ms=[]\n",
+    "    fps=[]\n",
+    "    for x in smiles: \n",
+    "        if ref!=x: \n",
+    "            lo=Chem.MolFromSmiles(x)\n",
+    "            ms.append(lo)\n",
+    "            fingerprint = descriptors[descriptor]\n",
+    "            ref_fps = fingerprint(ref_ms)\n",
+    "            fps.append(fingerprint(lo))\n",
+    "            score = metrics[metric] # similarity score\n",
+    "            \n",
+    "        else:\n",
+    "           pass  \n",
+    "    for b in smiles: \n",
+    "        if b==ref:\n",
+    "            smiles.remove(ref)\n",
+    "   \n",
+    "    return score(ref_fps, fps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}