Horje
tanimoto coefficient rdkit Code Example
tanimoto coefficient rdkit
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem.Fingerprints import FingerprintMols
import pandas as pd

# read and Conconate the csv's
df_1 = pd.read_csv('first.csv')
df_2 = pd.read_csv('second.csv')
df_3 = pd.concat([df_1, df_2])

# proof and make a list of SMILES
df_smiles = df_3['smiles']
c_smiles = []
for ds in df_smiles:
    try:
        cs = Chem.CanonSmiles(ds)
        c_smiles.append(cs)
    except:
        print('Invalid SMILES:', ds)
print()

# make a list of mols
ms = [Chem.MolFromSmiles(x) for x in c_smiles]

# make a list of fingerprints (fp)
fps = [FingerprintMols.FingerprintMol(x) for x in ms]

# the list for the dataframe
qu, ta, sim = [], [], []

# compare all fp pairwise without duplicates
for n in range(len(fps)-1): # -1 so the last fp will not be used
    s = DataStructs.BulkTanimotoSimilarity(fps[n], fps[n+1:]) # +1 compare with the next to the last fp
    print(c_smiles[n], c_smiles[n+1:]) # witch mol is compared with what group
    # collect the SMILES and values
    for m in range(len(s)):
        qu.append(c_smiles[n])
        ta.append(c_smiles[n+1:][m])
        sim.append(s[m])
print()

# build the dataframe and sort it
d = {'query':qu, 'target':ta, 'Similarity':sim}
df_final = pd.DataFrame(data=d)
df_final = df_final.sort_values('Similarity', ascending=False)
print(df_final)

# save as csv
df_final.to_csv('third.csv', index=False, sep=',')




Python

Related
python seperate int into digit array Code Example python seperate int into digit array Code Example
python import list from py file Code Example python import list from py file Code Example
WAP THAT ASKS A USER FOR A NUMBER OF YEARS AND THEN PRINTS OUT THE NUMBER OF DAYS, HOURS ,MINUTES AND SECONDS IN THAT NO. OF YEARS. Code Example WAP THAT ASKS A USER FOR A NUMBER OF YEARS AND THEN PRINTS OUT THE NUMBER OF DAYS, HOURS ,MINUTES AND SECONDS IN THAT NO. OF YEARS. Code Example
get the list of column names whose data type is float python Code Example get the list of column names whose data type is float python Code Example
def Dijsktra(graph,source): dist = [0]*5 dist[0] = source v = 1 unvisited = {place: None for place in graph.keys()} visited = {} current = source currentDistance = 0 unvisited[current] = curr def Dijsktra(graph,source): dist = [0]*5 dist[0] = source v = 1 unvisited = {place: None for place in graph.keys()} visited = {} current = source currentDistance = 0 unvisited[current] = curr

Type:
Code Example
Category:
Coding
Sub Category:
Code Example
Uploaded by:
Admin
Views:
8