-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchemutils.py
106 lines (85 loc) · 2.91 KB
/
chemutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import rdkit.Chem as Chem
from rdkit.Chem import BRICS
def get_mol(smiles):
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return None
# Chem.Kekulize(mol)
return mol
def get_smiles(mol):
return Chem.MolToSmiles(mol, kekuleSmiles=True)
def sanitize(mol):
try:
smiles = get_smiles(mol)
mol = get_mol(smiles)
except Exception as e:
return None
return mol
def copy_atom(atom):
new_atom = Chem.Atom(atom.GetSymbol())
new_atom.SetFormalCharge(atom.GetFormalCharge())
new_atom.SetAtomMapNum(atom.GetAtomMapNum())
return new_atom
def copy_edit_mol(mol):
new_mol = Chem.RWMol(Chem.MolFromSmiles(''))
for atom in mol.GetAtoms():
new_atom = copy_atom(atom)
new_mol.AddAtom(new_atom)
for bond in mol.GetBonds():
a1 = bond.GetBeginAtom().GetIdx()
a2 = bond.GetEndAtom().GetIdx()
bt = bond.GetBondType()
new_mol.AddBond(a1, a2, bt)
return new_mol
def get_clique_mol(mol, atoms):
# get the fragment of clique
Chem.Kekulize(mol)
smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True)
new_mol = Chem.MolFromSmiles(smiles, sanitize=False)
new_mol = copy_edit_mol(new_mol).GetMol()
new_mol = sanitize(new_mol) # We assume this is not None
Chem.SanitizeMol(mol)
return new_mol
def motif_decomp(mol):
n_atoms = mol.GetNumAtoms()
if n_atoms == 1:
return [[0]]
cliques = []
for bond in mol.GetBonds():
a1 = bond.GetBeginAtom().GetIdx()
a2 = bond.GetEndAtom().GetIdx()
cliques.append([a1, a2])
res = list(BRICS.FindBRICSBonds(mol))
if len(res) != 0:
for bond in res:
if [bond[0][0], bond[0][1]] in cliques:
cliques.remove([bond[0][0], bond[0][1]])
else:
cliques.remove([bond[0][1], bond[0][0]])
cliques.append([bond[0][0]])
cliques.append([bond[0][1]])
# merge cliques
for c in range(len(cliques) - 1):
if c >= len(cliques):
break
for k in range(c + 1, len(cliques)):
if k >= len(cliques):
break
if len(set(cliques[c]) & set(cliques[k])) > 0:
cliques[c] = list(set(cliques[c]) | set(cliques[k]))
cliques[k] = []
cliques = [c for c in cliques if len(c) > 0]
cliques = [c for c in cliques if n_atoms> len(c) > 0]
num_cli = len(cliques)
ssr_mol = Chem.GetSymmSSSR(mol)
for i in range(num_cli):
c = cliques[i]
cmol = get_clique_mol(mol, c)
ssr = Chem.GetSymmSSSR(cmol)
if len(ssr)>1:
for ring in ssr_mol:
if len(set(list(ring)) & set(c)) == len(list(ring)):
cliques.append(list(ring))
cliques[i] = list(set(cliques[i]) - set(list(ring)))
cliques = [c for c in cliques if n_atoms> len(c) > 0]
return cliques