-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
203447c
commit 678160e
Showing
6 changed files
with
359 additions
and
130 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,114 +1,114 @@ | ||
group|detection_smarts|smarts|contribute|composed|molecular_weight | ||
CH3|[CX4H3]||"{""CH3"": 1}"|n|15.035 | ||
CH2|[CX4H2]||"{""CH2"": 1}"|n|14.027 | ||
CH|[CX4H]||"{""CH"": 1}"|n|13.019 | ||
C|[CX4H0]||"{""C"": 1}"|n|12.011 | ||
CH2=CH|[CH2]=[CH]||"{""CH2=CH"": 1}"|n|27.046 | ||
CH=CH|[CH]=[CH]||"{""CH=CH"": 1}"|n|26.038 | ||
CH2=C|[CH2]=[CH0]||"{""CH2=C"": 1}"|n|26.038 | ||
CH=C|[CH]=[CH0]||"{""CH=C"": 1}"|n|25.03 | ||
ACH|[cH]||"{""ACH"": 1}"|n|13.019 | ||
AC|[cH0]||"{""AC"": 1}"|n|12.011 | ||
ACCH3|[c][CX4H3]||"{""ACCH3"": 1, ""CH3"": -1, ""AC"": -1}"|y|27.046 | ||
ACCH2|[c][CX4H2]||"{""ACCH2"": 1, ""CH2"": -1, ""AC"": -1}"|y|26.038 | ||
ACCH|[c][CX4H]||"{""ACCH"": 1, ""CH"": -1, ""AC"": -1}"|y|25.03 | ||
OH|[OH]||"{""OH"": 1}"|n|17.007 | ||
CH3OH|[CH3][OH]||"{""CH3OH"": 1, ""CH3"": -1, ""OH"": -1}"|n|32.042 | ||
H2O|[OH2]||"{""H2O"": 1}"|n|18.015 | ||
ACOH|[cH0][OH]||"{""ACOH"": 1, ""OH"": -1, ""AC"": -1}"|y|29.018 | ||
CH3CO|[CH3]C(=O)([#6,Si])|[CH3]C(=O)|"{""CH3CO"": 1, ""CH3"" : -1}"|n|43.045 | ||
CH2CO|[CH2]C(=O)[$([#6,Si]);!$([CH3])]|[CH2]C(=O)|"{""CH2CO"": 1, ""CH2"": -1}"|n|42.037 | ||
HCO|[CH](=O)([#6,Si])|[CH](=O)|"{""HCO"": 1}"|n|29.018 | ||
CH3COO|[CH3][C](=O)[OH0]||"{""CH3COO"": 1, ""CH3"": -1, ""COO"": -1}"|y|59.044 | ||
CH2COO|[CH2][C](=O)[OH0]||"{""CH2COO"": 1, ""CH2"": -1, ""COO"": -1}"|y|58.036 | ||
HCOO|[CH](=O)[OH0]||"{""HCOO"": 1, ""COO"": -1}"|n|45.017 | ||
CH3O|[CH3]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O])]|[CH3][OH0]|"{""CH3O"": 1, ""CH3"": -1}"|n|31.034 | ||
CH2O|[CH2]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O]);!$([CH3])]|[CH2][OH0]|"{""CH2O"": 1, ""CH2"": -1}"|n|30.026 | ||
CHO|[CH]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O]);!$([CH3]);!$([CH2])]|[CH][OH0]|"{""CHO"": 1, ""CH"": -1}"|n|29.018 | ||
THF|[C]1[CH2]O[CH2][C]1|[CH2;R][O;R]|"{""THF"": 1, ""CH2O"": -1}"|n|30.026 | ||
CH3NH2|[CH3][NH2]||"{""CH3NH2"": 1, ""CH3"": -1}"|n|31.058 | ||
CH2NH2|[CH2][NH2]||"{""CH2NH2"": 1, ""CH2"": -1}"|n|30.05 | ||
CHNH2|[CH][NH2]||"{""CHNH2"": 1, ""CH"": -1}"|n|29.042 | ||
CH3NH|[CH3][NH][#6,Si;!$([CH](=O));!$(C(=O)[!N])]|[CH3][NH]|"{""CH3NH"": 1, ""CH3"": -1}"|n|30.05 | ||
CH2NH|[CH2][NH][$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])]|[CH2][NH]|"{""CH2NH"": 1, ""CH2"": -1}"|n|29.042 | ||
CHNH|[CH][NH][$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])&!$([CH2])]|[CH][NH]|"{""CHNH"": 1, ""CH"": -1}"|n|28.034 | ||
CH3N|[CH3][NH0]([#6,Si;!$([CH](=O));!$(C(=O)[!N])])[#6,Si;!$([CH](=O));!$(C(=O)[!N])]|[CH3][NH0]|"{""CH3N"": 1, ""CH3"": -1}"|n|29.042 | ||
CH2N|[CH2][NH0]([$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])])[$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])]|[CH2][NH0]|"{""CH2N"": 1, ""CH2"": -1}"|n|28.034 | ||
ACNH2|[cH0][NH2]||"{""ACNH2"": 1, ""AC"": -1}"|n|28.034 | ||
C5H5N|[n](:[cH]:[cH]1):[cH]:[cH]:[cH]:1||"{""C5H5N"": 1, ""ACH"": -5}"|n|79.102 | ||
C5H4N|[n](:[cH0]:[cH]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH0]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH]:[cH0]:1||"{""C5H4N"": 1, ""ACH"": -4, ""AC"":-1}"|n|78.094 | ||
C5H3N|[n](:[cH]:[cH]1):[cH0]:[cH0]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH0]:1,[n](:[cH]:[cH0]1):[cH0]:[cH]:[cH]:1,[n](:[cH0]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH0]:[cH0]:1||"{""C5H3N"": 1, ""ACH"": -3, ""AC"": -2}"|n|77.086 | ||
CH3CN|[CH3][C]#[N]||"{""CH3CN"": 1, ""CH3"": -1}"|n|41.053 | ||
CH2CN|[CH2][C]#[N]||"{""CH2CN"": 1, ""CH2"": -1}"|n|40.045 | ||
COOH|[CH0](*)(=O)(-[OH])|[CH0](=O)(-[OH])|"{""COOH"": 1, ""OH"": -1}"|n|45.017 | ||
HCOOH|[CH](=O)(-[OH])||"{""HCOOH"": 1, ""OH"": -1}"|n|46.025 | ||
CH2CL|[CH2](Cl)([!Cl])|[CH2][Cl]|"{""CH2CL"": 1, ""CH2"": -1}"|n|49.48 | ||
CHCL|[CH](Cl)([!Cl])([!Cl])|[CH][Cl]|"{""CHCL"": 1, ""CH"": -1}"|n|48.472 | ||
CCL|[CH0](Cl)([!Cl])([!Cl])([!Cl])|[CH0][Cl]|"{""CCL"": 1, ""C"": -1}"|n|47.464 | ||
CH2CL2|[CH2](Cl)(Cl)||"{""CH2CL2"": 1, ""CH2"": -1}"|n|84.933 | ||
CHCL2|[CH](Cl)(Cl)([!Cl])|[CH](Cl)(Cl)|"{""CHCL2"": 1, ""CH"": -1}"|n|83.925 | ||
CCL2|[CH0](Cl)(Cl)([!Cl])([!Cl])|[CH0](Cl)(Cl)|"{""CCL2"": 1, ""C"": -1}"|n|82.917 | ||
CHCL3|[CH](Cl)(Cl)(Cl)||"{""CHCL3"": 1, ""CH"": -1}"|n|119.378 | ||
CCL3|[CH0](Cl)(Cl)(Cl)([!Cl])|[CH0](Cl)(Cl)(Cl)|"{""CCL3"": 1, ""C"": -1}"|n|118.37 | ||
CCL4|[CH0](Cl)(Cl)(Cl)(Cl)||"{""CCL4"": 1, ""C"": -1}"|n|153.823 | ||
ACCL|[cH0](Cl)||"{""ACCL"": 1, ""AC"": -1}"|n|47.464 | ||
CH3NO2|[CH3]N(=O)(O)||"{""CH3NO2"": 1, ""CH3"": -1}"|n|61.04 | ||
CH2NO2|[CH2]N(=O)(O)||"{""CH2NO2"": 1, ""CH2"": -1}"|n|60.032 | ||
CHNO2|[CH]N(=O)(O)||"{""CHNO2"": 1, ""CH"": -1}"|n|59.024 | ||
ACNO2|[cH0]N(=O)(O)||"{""ACNO2"": 1, ""AC"": -1}"|n|58.016 | ||
CS2|C(=S)(=S)||"{""CS2"": 1}"|n|76.141 | ||
CH3SH|[CH3][SH]||"{""CH3SH"": 1, ""CH3"": -1}"|n|48.1072 | ||
CH2SH|[CH2][SH]||"{""CH2SH"": 1, ""CH2"": -1}"|n|47.0994 | ||
FURFURAL|c1coc(c1)[CH]=O||"{""FURFURAL"": 1, ""ACH"": -3, ""HCO"": -1, ""CH"": 1, ""AC"": -1}"|n|96.0842 | ||
DOH|[CH2]([OH])[CH2][OH]||"{""DOH"": 1, ""CH2"": -2, ""OH"": -2}"|n|62.0668 | ||
I|[I][#6,Si]|[I]|"{""I"": 1}"|n|126.9 | ||
BR|[Br][#6,Si]|[Br]|"{""BR"": 1}"|n|79.904 | ||
CH=-C|[CH]#[C][*]|[CH]#[C]|"{""CH=-C"": 1}"|n|25.0298 | ||
C=-C|[*][CH0]#[CH0][*]|[CH0]#[CH0]|"{""C=-C"": 1}"|n|24.022 | ||
DMSO|[CH3][S](=O)[CH3]||"{""DMSO"": 1, ""CH3"": -1, ""CH3S"": -1}"|n|78.1328 | ||
ACRY|[CH2]=[CH]-C#N||"{""ACRY"": 1, ""CH2=CH"": -1}"|n|53.0634 | ||
CL-(C=C)|Cl[C]=[C]||"{""CL-(C=C)"": 1}"|n|35.453 | ||
C=C|[CH0]=[CH0]||"{""C=C"": 1}"|n|24.022 | ||
ACF|[cH0]F||"{""ACF"": 1, ""AC"": -1}"|n|31.009 | ||
DMF|[CH3]N([CH3])[CH]=O||"{""DMF"": 1, ""CH3"": -2}"|n|73.0936 | ||
HCON(CH2)2|[*][CH2]N([CH2][*])[CH]=O|[CH2]N([CH2])[CH]=O|"{""HCON(CH2)2"": 1, ""CH2"": -2}"|n|71.078 | ||
CF3|[CH0](F)(F)(F)[!F]|[CH0](F)(F)(F)|"{""CF3"": 1, ""C"": -1}"|n|69.005 | ||
CF2|[CH0](F)(F)([!F])[!F]|[CH0](F)(F)|"{""CF2"": 1, ""C"": -1}"|n|50.007 | ||
CF|[CH0](F)([!F])([!F])[!F]|[CH0](F)|"{""CF"": 1, ""C"": -1}"|n|31.009 | ||
COO|[C](*)(=O)[OH0][#6,Si]|[C](=O)[OH0]|"{""COO"": 1}"|n|44.009 | ||
SIH3|[SiX4H3]||"{""SIH3"": 1}"|n|31.1094 | ||
SIH2|[SiX4H2]||"{""SIH2"": 1}"|n|30.1016 | ||
SIH|[SiX4H]||"{""SIH"": 1}"|n|29.0938 | ||
SI|[SiX4H0]||"{""SI"": 1}"|n|28.086 | ||
SIH2O|[SiX4H2]O[Si,CH0;!$([CH](=O));!$(C(=O)[!O])]|[SiX4H2]O|"{""SIH2O"": 1, ""SIH2"": -1}"|n|46.1006 | ||
SIHO|[SiX4H]O[Si,CH0;!$([SiH2]);!$([CH](=O));!$(C(=O)[!O])]|[SiX4H]O|"{""SIHO"": 1, ""SIH"": -1}"|n|45.0928 | ||
SIO|[SiX4H0]O[Si,CH0;!$([SiH]);!$([SiH2]);!$([CH](=O));!$(C(=O)[!O])]|[SiX4H0]O|"{""SIO"": 1, ""SI"": -1}"|n|44.085 | ||
NMP|[CH2]1N([CH3])[C](=O)[CH2][CH2]1||"{""NMP"": 1, ""CH2"": -2, ""AMCH3CH2"": -1}"|n|99.1312 | ||
CCL3F|[CH0](Cl)(Cl)(Cl)(F)||"{""CCL3F"": 1, ""CCL3"": -1, ""CF"": -1, ""C"": 1}"|n|137.368 | ||
CCL2F|[CH0](Cl)(Cl)(F)([!Cl&!F])|[CH0](Cl)(Cl)(F)|"{""CCL2F"": 1, ""CCL2"": -1, ""CF"": -1, ""C"": 1}"|n|101.915 | ||
HCCL2F|[CH](Cl)(Cl)(F)||"{""HCCL2F"": 1, ""CHCL2"": -1}"|n|102.9228 | ||
HCCLF|[CH](Cl)(F)([!Cl&!F])|[CH](Cl)(F)|"{""HCCLF"": 1, ""CHCL"": -1}"|n|67.4698 | ||
CCLF2|[CH0](Cl)(F)(F)([!Cl&!F])|[CH0](Cl)(F)(F)|"{""CCLF2"": 1, ""CCL"": -1, ""CF2"": -1, ""C"": 1}"|n|85.46 | ||
HCCLF2|[CH](Cl)(F)(F)||"{""HCCLF2"": 1, ""CHCL"": -1}"|n|86.4678 | ||
CCLF3|[CH0](Cl)(F)(F)(F)||"{""CCLF3"": 1, ""CCL"": -1, ""CF3"": -1, ""C"": 1}"|n|104.458 | ||
CCL2F2|[CH0](Cl)(Cl)(F)(F)||"{""CCL2F2"": 1, ""CCL2"": -1, ""CF2"": -1, ""C"": 1}"|n|120.913 | ||
AMH2|[CH0](=O)([NH2])([!O])|[CH0](=O)([NH2])|"{""AMH2"": 1}"|n|44.0326 | ||
AMHCH3|[CH0](=O)([NH][CH3])([!O])|[CH0](=O)([NH][CH3])|"{""AMHCH3"": 1, ""CH3"": -1}"|n|58.0592 | ||
AMHCH2|[CH0](=O)([NH][CH2])([!O])|[CH0](=O)([NH][CH2])|"{""AMHCH2"": 1, ""CH2"": -1}"|n|57.0514 | ||
AM(CH3)2|[CH0](=O)([N]([CH3])[CH3])([!O])|[CH0](=O)([N]([CH3])[CH3])|"{""AM(CH3)2"": 1, ""CH3"": -2}"|n|72.0858 | ||
AMCH3CH2|[CH0](=O)([N]([CH3])[CH2])([!O])|[CH0](=O)([N]([CH3])[CH2])|"{""AMCH3CH2"": 1, ""CH3"": -1, ""CH2"": -1}"|n|71.078 | ||
AM(CH2)2|[CH0](=O)([N]([CH2])[CH2])([!O])|[CH0](=O)([N]([CH2])[CH2])|"{""AM(CH2)2"": 1, ""CH2"": -2}"|n|70.0702 | ||
C2H5O2|[OH][CH2][CH2]O[#6,Si]|[OH][CH2][CH2]O|"{""C2H5O2"": 1, ""OH"": -1, ""CH2"": -1, ""CH2O"": -1}"|y|61.059 | ||
C2H4O2|[OH][CH2][CH]O[#6,Si]|[OH][CH2][CH]O|"{""C2H4O2"": 1, ""OH"": -1, ""CHO"": -1, ""CH2"": -1}"|y|60.0512 | ||
CH3S|[CH3][S][#6,Si]|[CH3][S]|"{""CH3S"": 1, ""CH3"": -1}"|n|47.0994 | ||
CH2S|[CH2][S][$([#6,Si])&!$([CH3])]|[CH2][S]|"{""CH2S"": 1, ""CH2"": -1}"|n|46.0916 | ||
CHS|[CH][S][$([#6,Si])&!$([CH3])&!$([CH2])]|[CH][S]|"{""CHS"": 1, ""CH"": -1}"|n|45.0838 | ||
MORPH|[CH2]1O[CH2][CH2][NH][CH2]1||"{""MORPH"": 1, ""CH2"": -2, ""CH2O"": -1, ""CH2NH"": -1}"|n|87.1202 | ||
C4H4S|[s](:[cH]:[cH]1):[cH]:[cH]:1||"{""C4H4S"": 1, ""ACH"": -4}"|n|84.1402 | ||
C4H3S|[s](:[cH0]:[cH]1):[cH]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH]:1||"{""C4H3S"": 1, ""ACH"": -3, ""AC"": -1}"|n|83.1324 | ||
C4H2S|[s](:[cH]:[cH]1):[cH0]:[cH0]:1,[s](:[cH]:[cH0]1):[cH0]:[cH]:1,[s](:[cH0]:[cH]1):[cH0]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH0]:1||"{""C4H2S"": 1, ""ACH"": -2, ""AC"": -2}"|n|82.1246 | ||
NCO|[NX2H0]=[CX2H0]=[OX1H0]||"{""NCO"": 1}"|n|42.017 | ||
(CH2)2SU|[CH2]S(=O)(=O)[CH2]||"{""(CH2)2SU"": 1, ""CH2"": -1, ""CH2S"": -1}"|n|92.1162 | ||
CH2CHSU|[CH2]S(=O)(=O)[CH]||"{""CH2CHSU"": 1, ""CH"": -1, ""CH2S"": -1}"|n|91.1084 | ||
IMIDAZOL|[c]1:[c]:[n]:[c]:[n]:1||"{""IMIDAZOL"": 1, ""ACH"": -3}"|n|68.0782 | ||
BTI|C(F)(F)(F)S(=O)(=O)[N-]S(=O)(=O)C(F)(F)F||"{""BTI"": 1, ""CF3"": -2}"|n|279.91784 | ||
group|smarts|molecular_weight | ||
CH3|[CX4H3]|15.035 | ||
CH2|[CX4H2]|14.027 | ||
CH|[CX4H]|13.019 | ||
C|[CX4H0]|12.011 | ||
CH2=CH|[CH2]=[CH]|27.046 | ||
CH=CH|[CH]=[CH]|26.038 | ||
CH2=C|[CH2]=[CH0]|26.038 | ||
CH=C|[CH]=[CH0]|25.03 | ||
ACH|[cH]|13.019 | ||
AC|[cH0]|12.011 | ||
ACCH3|[c][CX4H3]|27.046 | ||
ACCH2|[c][CX4H2]|26.038 | ||
ACCH|[c][CX4H]|25.03 | ||
OH|[OH]|17.007 | ||
CH3OH|[CH3][OH]|32.042 | ||
H2O|[OH2]|18.015 | ||
ACOH|[cH0][OH]|29.018 | ||
CH3CO|[CH3]C(=O)|43.045 | ||
CH2CO|[CH2]C(=O)|42.037 | ||
HCO|[CH](=O)|29.018 | ||
CH3COO|[CH3][C](=O)[OH0]|59.044 | ||
CH2COO|[CH2][C](=O)[OH0]|58.036 | ||
HCOO|[CH](=O)[OH0]|45.017 | ||
CH3O|[CH3][OH0]|31.034 | ||
CH2O|[CH2][OH0]|30.026 | ||
CHO|[CH][OH0]|29.018 | ||
THF|[CH2;R][O;R]|30.026 | ||
CH3NH2|[CH3][NH2]|31.058 | ||
CH2NH2|[CH2][NH2]|30.05 | ||
CHNH2|[CH][NH2]|29.042 | ||
CH3NH|[CH3][NH]|30.05 | ||
CH2NH|[CH2][NH]|29.042 | ||
CHNH|[CH][NH]|28.034 | ||
CH3N|[CH3][NH0]|29.042 | ||
CH2N|[CH2][NH0]|28.034 | ||
ACNH2|[cH0][NH2]|28.034 | ||
C5H5N|[n](:[cH]:[cH]1):[cH]:[cH]:[cH]:1|79.102 | ||
C5H4N|[n](:[cH0]:[cH]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH0]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH]:[cH0]:1|78.094 | ||
C5H3N|[n](:[cH]:[cH]1):[cH0]:[cH0]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH0]:1,[n](:[cH]:[cH0]1):[cH0]:[cH]:[cH]:1,[n](:[cH0]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH0]:[cH0]:1|77.086 | ||
CH3CN|[CH3][C]#[N]|41.053 | ||
CH2CN|[CH2][C]#[N]|40.045 | ||
COOH|[CH0](=O)[OH]|45.017 | ||
HCOOH|[CH](=O)[OH]|46.025 | ||
CH2CL|[CH2][Cl]|49.48 | ||
CHCL|[CH][Cl]|48.472 | ||
CCL|[CH0][Cl]|47.464 | ||
CH2CL2|[CH2](Cl)(Cl)|84.933 | ||
CHCL2|[CH](Cl)(Cl)|83.925 | ||
CCL2|[CH0](Cl)(Cl)|82.917 | ||
CHCL3|[CH](Cl)(Cl)(Cl)|119.378 | ||
CCL3|[CH0](Cl)(Cl)(Cl)|118.37 | ||
CCL4|[CH0](Cl)(Cl)(Cl)(Cl)|153.823 | ||
ACCL|[cH0](Cl)|47.464 | ||
CH3NO2|[CH3]N(=O)(O)|61.04 | ||
CH2NO2|[CH2]N(=O)(O)|60.032 | ||
CHNO2|[CH]N(=O)(O)|59.024 | ||
ACNO2|[cH0]N(=O)(O)|58.016 | ||
CS2|C(=S)(=S)|76.141 | ||
CH3SH|[CH3][SH]|48.1072 | ||
CH2SH|[CH2][SH]|47.0994 | ||
FURFURAL|c1coc(c1)[CH]=O|96.0842 | ||
DOH|[CH2]([OH])[CH2][OH]|62.0668 | ||
I|[I]|126.9 | ||
BR|[Br]|79.904 | ||
CH=-C|[CH]#[C]|25.0298 | ||
C=-C|[CH0]#[CH0]|24.022 | ||
DMSO|[CH3][S](=O)[CH3]|78.1328 | ||
ACRY|[CH2]=[CH]-C#N|53.0634 | ||
CL-(C=C)|Cl[C]=[C]|35.453 | ||
C=C|[CH0]=[CH0]|24.022 | ||
ACF|[cH0]F|31.009 | ||
DMF|[CH3]N([CH3])[CH]=O|73.0936 | ||
HCON(CH2)2|[CH2]N([CH2])[CH]=O|71.078 | ||
CF3|[CH0](F)(F)(F)|69.005 | ||
CF2|[CH0](F)(F)|50.007 | ||
CF|[CH0](F)|31.009 | ||
COO|[C](=O)[OH0]|44.009 | ||
SIH3|[SiX4H3]|31.1094 | ||
SIH2|[SiX4H2]|30.1016 | ||
SIH|[SiX4H]|29.0938 | ||
SI|[SiX4H0]|28.086 | ||
SIH2O|[SiX4H2][OH0]|46.1006 | ||
SIHO|[SiX4H][OH0]|45.0928 | ||
SIO|[SiX4H0][OH0]|44.085 | ||
NMP|[CH2]1N([CH3])[C](=O)[CH2][CH2]1|99.1312 | ||
CCL3F|[CH0](Cl)(Cl)(Cl)(F)|137.368 | ||
CCL2F|[CH0](Cl)(Cl)(F)|101.915 | ||
HCCL2F|[CH](Cl)(Cl)(F)|102.9228 | ||
HCCLF|[CH](Cl)(F)|67.4698 | ||
CCLF2|[CH0](Cl)(F)(F)|85.46 | ||
HCCLF2|[CH](Cl)(F)(F)|86.4678 | ||
CCLF3|[CH0](Cl)(F)(F)(F)|104.458 | ||
CCL2F2|[CH0](Cl)(Cl)(F)(F)|120.913 | ||
AMH2|[CH0](=O)([NH2])|44.0326 | ||
AMHCH3|[CH0](=O)([NH][CH3])|58.0592 | ||
AMHCH2|[CH0](=O)([NH][CH2])|57.0514 | ||
AM(CH3)2|[CH0](=O)([N]([CH3])[CH3])|72.0858 | ||
AMCH3CH2|[CH0](=O)([N]([CH3])[CH2])|71.078 | ||
AM(CH2)2|[CH0](=O)([N]([CH2])[CH2])|70.0702 | ||
C2H5O2|[OH][CH2][CH2][OH0]|61.059 | ||
C2H4O2|[OH][CH2][CH][OH0]|60.0512 | ||
CH3S|[CH3][SH0]|47.0994 | ||
CH2S|[CH2][SH0]|46.0916 | ||
CHS|[CH][SH0]|45.0838 | ||
MORPH|[CH2]1O[CH2][CH2][NH][CH2]1|87.1202 | ||
C4H4S|[s](:[cH]:[cH]1):[cH]:[cH]:1|84.1402 | ||
C4H3S|[s](:[cH0]:[cH]1):[cH]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH]:1|83.1324 | ||
C4H2S|[s](:[cH]:[cH]1):[cH0]:[cH0]:1,[s](:[cH]:[cH0]1):[cH0]:[cH]:1,[s](:[cH0]:[cH]1):[cH0]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH0]:1|82.1246 | ||
NCO|[NX2H0]=[CX2H0]=[OX1H0]|42.017 | ||
(CH2)2SU|[CH2]S(=O)(=O)[CH2]|92.1162 | ||
CH2CHSU|[CH2]S(=O)(=O)[CH]|91.1084 | ||
IMIDAZOL|[c]1:[c]:[n]:[c]:[n]:1|68.0782 | ||
BTI|C(F)(F)(F)S(=O)(=O)[N-]S(=O)(=O)C(F)(F)F|279.91784 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
from .fragment import Fragment | ||
from .fragmentation_model import FragmentationModel | ||
from .fragmentation_unifac import unifac | ||
from .fragmentation_unifac import unifac2 | ||
|
||
|
||
__all__ = ["Fragment", "FragmentationModel", "unifac"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,118 @@ | ||
from typing import List | ||
|
||
import itertools | ||
|
||
from ugropy.refactor.fragment import Fragment | ||
|
||
from rdkit import Chem | ||
|
||
import numpy as np | ||
|
||
import pulp | ||
|
||
|
||
class FragmentationModel: | ||
def __init__(self, fragments: List[Fragment]): | ||
self.fragments = fragments | ||
|
||
def detect_fragments(self, molecule: Chem.rdchem.Mol): | ||
detected = {} | ||
batch = DetectionBatch(molecule) | ||
|
||
for fragment in self.fragments: | ||
match = molecule.GetSubstructMatches(fragment.mol_object) | ||
|
||
if match: | ||
detected[fragment.name] = match | ||
batch.add_fragment(fragment.name, match) | ||
return batch | ||
|
||
class DetectionBatch: | ||
def __init__(self, molecule: Chem.rdchem.Mol): | ||
self.n = molecule.GetNumAtoms() | ||
self.fragments = {} | ||
self.overlaped_fragments = {} | ||
self.selected_fragments = [] | ||
self.overlaped_atoms = [] | ||
self.atoms_matrix = np.array([]) | ||
self.solution_atoms = {} | ||
self.solution = {} | ||
|
||
self.has_overlap = False | ||
|
||
|
||
def get_groups(self): | ||
self.build_overlap_matrix() | ||
self.get_overlaped_fragments() | ||
self.solve_overlap() | ||
|
||
for frag in self.overlaped_fragments.keys(): | ||
if frag not in self.selected_fragments: | ||
self.fragments.pop(frag) | ||
|
||
for frag in self.fragments.keys(): | ||
name = frag.split("_")[0] | ||
|
||
if name not in self.solution_atoms.keys(): | ||
self.solution_atoms[name] = [self.fragments[frag]] | ||
else: | ||
self.solution_atoms[name].append(self.fragments[frag]) | ||
|
||
for frag in self.solution_atoms.keys(): | ||
self.solution[frag] = len(self.solution_atoms[frag]) | ||
|
||
|
||
def add_fragment(self, fragment_name: str, fragments: tuple): | ||
for i, f in enumerate(fragments): | ||
self.fragments[f"{fragment_name}_{i}"] = list(f) | ||
|
||
def build_overlap_matrix(self): | ||
self.atoms_matrix = np.zeros((len(self.fragments), self.n)) | ||
|
||
for i, fragment in enumerate(self.fragments.values()): | ||
self.atoms_matrix[i, fragment] = 1 | ||
|
||
def get_overlaped_fragments(self): | ||
overlap = np.sum(self.atoms_matrix, axis=0) | ||
self.overlaped_atoms = np.argwhere(overlap > 1).flatten() | ||
|
||
for name, frag in self.fragments.items(): | ||
if np.isin(frag, self.overlaped_atoms).any(): | ||
self.overlaped_fragments[name] = frag | ||
|
||
def solve_overlap(self): | ||
universe = set(self.overlaped_atoms) | ||
|
||
all_elements = set(itertools.chain.from_iterable(self.overlaped_fragments.values())) | ||
|
||
universe.update(all_elements) | ||
|
||
problem = pulp.LpProblem("Set_Cover_Problem", pulp.LpMinimize) | ||
|
||
n_frag = len(self.overlaped_fragments) | ||
|
||
x = pulp.LpVariable.dicts("x", range(n_frag), cat="Binary") | ||
|
||
problem += pulp.lpSum([x[i] for i in range(n_frag)]) | ||
|
||
for elem in universe: | ||
sum_list = [] | ||
for i, subset in enumerate(self.overlaped_fragments.values()): | ||
if elem in subset: | ||
sum_list.append(x[i]) | ||
|
||
# print(f"Restricción para el elemento {elem}: {sum_list} == 1") | ||
problem += pulp.lpSum(sum_list) == 1 | ||
|
||
solver = pulp.getSolver('PULP_CBC_CMD', msg=False) | ||
|
||
problem.solve(solver) | ||
|
||
selected_subsets = [name for i, name in enumerate(self.overlaped_fragments.keys()) if pulp.value(x[i]) == 1] | ||
|
||
self.selected_fragments = selected_subsets | ||
|
||
|
||
|
||
|
||
|
||
return detected | ||
|
||
|
Oops, something went wrong.