forked from mwormleonhard/ChemSpiPy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchemspipy.py
221 lines (188 loc) · 8.06 KB
/
chemspipy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# -*- coding: utf-8 -*-
"""
ChemSpiPy
Python wrapper for the ChemSpider API.
https://github.com/mcs07/ChemSpiPy
Forked from ChemSpiPy by Cameron Neylon
https://github.com/cameronneylon/ChemSpiPy
"""
import urllib2
from xml.etree import ElementTree as ET
__author__ = 'Matt Swain'
__email__ = '[email protected]'
__version__ = '1.0'
__license__ = 'MIT'
from private_token import TOKEN
class Compound(object):
""" A class for retrieving record details about a compound by CSID.
The purpose of this class is to provide access to various parts of the
ChemSpider API that return information about a compound given its CSID.
Information is loaded lazily when requested, and cached for future access.
"""
def __init__(self,csid):
""" Initialize with a CSID as an int or str """
if type(csid) is str and csid.isdigit():
self.csid = csid
elif type(csid) == int:
self.csid = str(csid)
else:
raise TypeError('Compound must be initialised with a CSID as an int or str')
self._imageurl = None
self._mf = None
self._smiles = None
self._inchi = None
self._inchikey = None
self._averagemass = None
self._molecularweight = None
self._monoisotopicmass = None
self._nominalmass = None
self._alogp = None
self._xlogp = None
self._commonname = None
self._image = None
self._mol = None
self._mol3d = None
def __repr__(self):
return "Compound(%r)" % self.csid
@property
def imageurl(self):
""" Return the URL of a png image of the 2D structure """
if self._imageurl is None:
self._imageurl = 'http://www.chemspider.com/ImagesHandler.ashx?id=%s' % self.csid
return self._imageurl
@property
def mf(self):
""" Retrieve molecular formula from ChemSpider """
if self._mf is None:
self.loadextendedcompoundinfo()
return self._mf
@property
def smiles(self):
""" Retrieve SMILES string from ChemSpider """
if self._smiles is None:
self.loadextendedcompoundinfo()
return self._smiles
@property
def inchi(self):
""" Retrieve InChi string from ChemSpider """
if self._inchi is None:
self.loadextendedcompoundinfo()
return self._inchi
@property
def inchikey(self):
""" Retrieve InChi string from ChemSpider """
if self._inchikey is None:
self.loadextendedcompoundinfo()
return self._inchikey
@property
def averagemass(self):
""" Retrieve average mass from ChemSpider """
if self._averagemass is None:
self.loadextendedcompoundinfo()
return self._averagemass
@property
def molecularweight(self):
""" Retrieve molecular weight from ChemSpider """
if self._molecularweight is None:
self.loadextendedcompoundinfo()
return self._molecularweight
@property
def monoisotopicmass(self):
""" Retrieve monoisotropic mass from ChemSpider """
if self._monoisotopicmass is None:
self.loadextendedcompoundinfo()
return self._monoisotopicmass
@property
def nominalmass(self):
""" Retrieve nominal mass from ChemSpider """
if self._nominalmass is None:
self.loadextendedcompoundinfo()
return self._nominalmass
@property
def alogp(self):
""" Retrieve ALogP from ChemSpider """
if self._alogp is None:
self.loadextendedcompoundinfo()
return self._alogp
@property
def xlogp(self):
""" Retrieve XLogP from ChemSpider """
if self._xlogp is None:
self.loadextendedcompoundinfo()
return self._xlogp
@property
def commonname(self):
""" Retrieve common name from ChemSpider """
if self._commonname is None:
self.loadextendedcompoundinfo()
return self._commonname
def loadextendedcompoundinfo(self):
""" Load extended compound info from the Mass Spec API """
apiurl = 'http://www.chemspider.com/MassSpecAPI.asmx/GetExtendedCompoundInfo?CSID=%s&token=%s' % (self.csid,TOKEN)
response = urllib2.urlopen(apiurl)
tree = ET.parse(response)
mf = tree.find('{http://www.chemspider.com/}MF')
self._mf = mf.text.encode('utf-8') if mf is not None else None
smiles = tree.find('{http://www.chemspider.com/}SMILES')
self._smiles = smiles.text.encode('utf-8') if smiles is not None else None
inchi = tree.find('{http://www.chemspider.com/}InChI')
self._inchi = inchi.text.encode('utf-8') if inchi is not None else None
inchikey = tree.find('{http://www.chemspider.com/}InChIKey')
self._inchikey = inchikey.text.encode('utf-8') if inchikey is not None else None
averagemass = tree.find('{http://www.chemspider.com/}AverageMass')
self._averagemass = float(averagemass.text.encode('utf-8')) if averagemass is not None else None
molecularweight = tree.find('{http://www.chemspider.com/}MolecularWeight')
self._molecularweight = float(molecularweight.text.encode('utf-8')) if molecularweight is not None else None
monoisotopicmass = tree.find('{http://www.chemspider.com/}MonoisotopicMass')
self._monoisotopicmass = float(monoisotopicmass.text.encode('utf-8')) if monoisotopicmass is not None else None
nominalmass = tree.find('{http://www.chemspider.com/}NominalMass')
self._nominalmass = float(nominalmass.text.encode('utf-8')) if nominalmass is not None else None
alogp = tree.find('{http://www.chemspider.com/}ALogP')
self._alogp = float(alogp.text.encode('utf-8')) if alogp is not None else None
xlogp = tree.find('{http://www.chemspider.com/}XLogP')
self._xlogp = float(xlogp.text.encode('utf-8')) if xlogp is not None else None
commonname = tree.find('{http://www.chemspider.com/}CommonName')
self._commonname = commonname.text.encode('utf-8') if commonname is not None else None
@property
def image(self):
""" Return string containing PNG binary image data of 2D structure image """
if self._image is None:
apiurl = 'http://www.chemspider.com/Search.asmx/GetCompoundThumbnail?id=%s&token=%s' % (self.csid,TOKEN)
response = urllib2.urlopen(apiurl)
tree = ET.parse(response)
self._image = tree.getroot().text
return self._image
@property
def mol(self):
""" Return record in MOL format """
if self._mol is None:
apiurl = 'http://www.chemspider.com/MassSpecAPI.asmx/GetRecordMol?csid=%s&calc3d=false&token=%s' % (self.csid,TOKEN)
response = urllib2.urlopen(apiurl)
tree = ET.parse(response)
self._mol = tree.getroot().text
return self._mol
@property
def mol3d(self):
""" Return record in MOL format with 3D coordinates calculated """
if self._mol3d is None:
apiurl = 'http://www.chemspider.com/MassSpecAPI.asmx/GetRecordMol?csid=%s&calc3d=true&token=%s' % (self.csid,TOKEN)
response = urllib2.urlopen(apiurl)
tree = ET.parse(response)
self._mol3d = tree.getroot().text
return self._mol3d
def find(query):
""" Search by Name, SMILES, InChI, InChIKey, etc. Returns first 100 Compounds """
assert type(query) == str or type(query) == unicode, 'query not a string object'
searchurl = 'http://www.chemspider.com/Search.asmx/SimpleSearch?query=%s&token=%s' % (urllib2.quote(query), TOKEN)
response = urllib2.urlopen(searchurl)
tree = ET.parse(response)
elem = tree.getroot()
csid_tags = elem.getiterator('{http://www.chemspider.com/}int')
compoundlist = []
for tag in csid_tags:
compoundlist.append(Compound(tag.text))
return compoundlist if compoundlist else None
def find_one(query):
""" Search by Name, SMILES, InChI, InChIKey, etc. Returns a single Compound """
compoundlist = find(query)
return compoundlist[0] if compoundlist else None