Skip to content

Commit

Permalink
very promising
Browse files Browse the repository at this point in the history
  • Loading branch information
SalvadorBrandolin committed Sep 6, 2024
1 parent 203447c commit 678160e
Show file tree
Hide file tree
Showing 6 changed files with 359 additions and 130 deletions.
2 changes: 1 addition & 1 deletion ugropy/groupscsv/unifac/unifac_info.csv
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,4 @@ NCO|109|51|1.0567|0.732
(CH2)2SU|118|55|2.6869|2.12
CH2CHSU|119|55|2.4595|1.808
IMIDAZOL|178|84|2.026|0.868
BTI|179|85|5.774|4.932
BTI|179|85|5.774|4.932
228 changes: 114 additions & 114 deletions ugropy/groupscsv/unifac/unifac_subgroups.csv
Original file line number Diff line number Diff line change
@@ -1,114 +1,114 @@
group|detection_smarts|smarts|contribute|composed|molecular_weight
CH3|[CX4H3]||"{""CH3"": 1}"|n|15.035
CH2|[CX4H2]||"{""CH2"": 1}"|n|14.027
CH|[CX4H]||"{""CH"": 1}"|n|13.019
C|[CX4H0]||"{""C"": 1}"|n|12.011
CH2=CH|[CH2]=[CH]||"{""CH2=CH"": 1}"|n|27.046
CH=CH|[CH]=[CH]||"{""CH=CH"": 1}"|n|26.038
CH2=C|[CH2]=[CH0]||"{""CH2=C"": 1}"|n|26.038
CH=C|[CH]=[CH0]||"{""CH=C"": 1}"|n|25.03
ACH|[cH]||"{""ACH"": 1}"|n|13.019
AC|[cH0]||"{""AC"": 1}"|n|12.011
ACCH3|[c][CX4H3]||"{""ACCH3"": 1, ""CH3"": -1, ""AC"": -1}"|y|27.046
ACCH2|[c][CX4H2]||"{""ACCH2"": 1, ""CH2"": -1, ""AC"": -1}"|y|26.038
ACCH|[c][CX4H]||"{""ACCH"": 1, ""CH"": -1, ""AC"": -1}"|y|25.03
OH|[OH]||"{""OH"": 1}"|n|17.007
CH3OH|[CH3][OH]||"{""CH3OH"": 1, ""CH3"": -1, ""OH"": -1}"|n|32.042
H2O|[OH2]||"{""H2O"": 1}"|n|18.015
ACOH|[cH0][OH]||"{""ACOH"": 1, ""OH"": -1, ""AC"": -1}"|y|29.018
CH3CO|[CH3]C(=O)([#6,Si])|[CH3]C(=O)|"{""CH3CO"": 1, ""CH3"" : -1}"|n|43.045
CH2CO|[CH2]C(=O)[$([#6,Si]);!$([CH3])]|[CH2]C(=O)|"{""CH2CO"": 1, ""CH2"": -1}"|n|42.037
HCO|[CH](=O)([#6,Si])|[CH](=O)|"{""HCO"": 1}"|n|29.018
CH3COO|[CH3][C](=O)[OH0]||"{""CH3COO"": 1, ""CH3"": -1, ""COO"": -1}"|y|59.044
CH2COO|[CH2][C](=O)[OH0]||"{""CH2COO"": 1, ""CH2"": -1, ""COO"": -1}"|y|58.036
HCOO|[CH](=O)[OH0]||"{""HCOO"": 1, ""COO"": -1}"|n|45.017
CH3O|[CH3]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O])]|[CH3][OH0]|"{""CH3O"": 1, ""CH3"": -1}"|n|31.034
CH2O|[CH2]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O]);!$([CH3])]|[CH2][OH0]|"{""CH2O"": 1, ""CH2"": -1}"|n|30.026
CHO|[CH]O[$([Si,#6]);!$([CH](=O));!$(C(=O)[!O]);!$([CH3]);!$([CH2])]|[CH][OH0]|"{""CHO"": 1, ""CH"": -1}"|n|29.018
THF|[C]1[CH2]O[CH2][C]1|[CH2;R][O;R]|"{""THF"": 1, ""CH2O"": -1}"|n|30.026
CH3NH2|[CH3][NH2]||"{""CH3NH2"": 1, ""CH3"": -1}"|n|31.058
CH2NH2|[CH2][NH2]||"{""CH2NH2"": 1, ""CH2"": -1}"|n|30.05
CHNH2|[CH][NH2]||"{""CHNH2"": 1, ""CH"": -1}"|n|29.042
CH3NH|[CH3][NH][#6,Si;!$([CH](=O));!$(C(=O)[!N])]|[CH3][NH]|"{""CH3NH"": 1, ""CH3"": -1}"|n|30.05
CH2NH|[CH2][NH][$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])]|[CH2][NH]|"{""CH2NH"": 1, ""CH2"": -1}"|n|29.042
CHNH|[CH][NH][$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])&!$([CH2])]|[CH][NH]|"{""CHNH"": 1, ""CH"": -1}"|n|28.034
CH3N|[CH3][NH0]([#6,Si;!$([CH](=O));!$(C(=O)[!N])])[#6,Si;!$([CH](=O));!$(C(=O)[!N])]|[CH3][NH0]|"{""CH3N"": 1, ""CH3"": -1}"|n|29.042
CH2N|[CH2][NH0]([$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])])[$([#6,Si;!$([CH](=O));!$(C(=O)[!N])])&!$([CH3])]|[CH2][NH0]|"{""CH2N"": 1, ""CH2"": -1}"|n|28.034
ACNH2|[cH0][NH2]||"{""ACNH2"": 1, ""AC"": -1}"|n|28.034
C5H5N|[n](:[cH]:[cH]1):[cH]:[cH]:[cH]:1||"{""C5H5N"": 1, ""ACH"": -5}"|n|79.102
C5H4N|[n](:[cH0]:[cH]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH0]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH]:[cH0]:1||"{""C5H4N"": 1, ""ACH"": -4, ""AC"":-1}"|n|78.094
C5H3N|[n](:[cH]:[cH]1):[cH0]:[cH0]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH0]:1,[n](:[cH]:[cH0]1):[cH0]:[cH]:[cH]:1,[n](:[cH0]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH0]:[cH0]:1||"{""C5H3N"": 1, ""ACH"": -3, ""AC"": -2}"|n|77.086
CH3CN|[CH3][C]#[N]||"{""CH3CN"": 1, ""CH3"": -1}"|n|41.053
CH2CN|[CH2][C]#[N]||"{""CH2CN"": 1, ""CH2"": -1}"|n|40.045
COOH|[CH0](*)(=O)(-[OH])|[CH0](=O)(-[OH])|"{""COOH"": 1, ""OH"": -1}"|n|45.017
HCOOH|[CH](=O)(-[OH])||"{""HCOOH"": 1, ""OH"": -1}"|n|46.025
CH2CL|[CH2](Cl)([!Cl])|[CH2][Cl]|"{""CH2CL"": 1, ""CH2"": -1}"|n|49.48
CHCL|[CH](Cl)([!Cl])([!Cl])|[CH][Cl]|"{""CHCL"": 1, ""CH"": -1}"|n|48.472
CCL|[CH0](Cl)([!Cl])([!Cl])([!Cl])|[CH0][Cl]|"{""CCL"": 1, ""C"": -1}"|n|47.464
CH2CL2|[CH2](Cl)(Cl)||"{""CH2CL2"": 1, ""CH2"": -1}"|n|84.933
CHCL2|[CH](Cl)(Cl)([!Cl])|[CH](Cl)(Cl)|"{""CHCL2"": 1, ""CH"": -1}"|n|83.925
CCL2|[CH0](Cl)(Cl)([!Cl])([!Cl])|[CH0](Cl)(Cl)|"{""CCL2"": 1, ""C"": -1}"|n|82.917
CHCL3|[CH](Cl)(Cl)(Cl)||"{""CHCL3"": 1, ""CH"": -1}"|n|119.378
CCL3|[CH0](Cl)(Cl)(Cl)([!Cl])|[CH0](Cl)(Cl)(Cl)|"{""CCL3"": 1, ""C"": -1}"|n|118.37
CCL4|[CH0](Cl)(Cl)(Cl)(Cl)||"{""CCL4"": 1, ""C"": -1}"|n|153.823
ACCL|[cH0](Cl)||"{""ACCL"": 1, ""AC"": -1}"|n|47.464
CH3NO2|[CH3]N(=O)(O)||"{""CH3NO2"": 1, ""CH3"": -1}"|n|61.04
CH2NO2|[CH2]N(=O)(O)||"{""CH2NO2"": 1, ""CH2"": -1}"|n|60.032
CHNO2|[CH]N(=O)(O)||"{""CHNO2"": 1, ""CH"": -1}"|n|59.024
ACNO2|[cH0]N(=O)(O)||"{""ACNO2"": 1, ""AC"": -1}"|n|58.016
CS2|C(=S)(=S)||"{""CS2"": 1}"|n|76.141
CH3SH|[CH3][SH]||"{""CH3SH"": 1, ""CH3"": -1}"|n|48.1072
CH2SH|[CH2][SH]||"{""CH2SH"": 1, ""CH2"": -1}"|n|47.0994
FURFURAL|c1coc(c1)[CH]=O||"{""FURFURAL"": 1, ""ACH"": -3, ""HCO"": -1, ""CH"": 1, ""AC"": -1}"|n|96.0842
DOH|[CH2]([OH])[CH2][OH]||"{""DOH"": 1, ""CH2"": -2, ""OH"": -2}"|n|62.0668
I|[I][#6,Si]|[I]|"{""I"": 1}"|n|126.9
BR|[Br][#6,Si]|[Br]|"{""BR"": 1}"|n|79.904
CH=-C|[CH]#[C][*]|[CH]#[C]|"{""CH=-C"": 1}"|n|25.0298
C=-C|[*][CH0]#[CH0][*]|[CH0]#[CH0]|"{""C=-C"": 1}"|n|24.022
DMSO|[CH3][S](=O)[CH3]||"{""DMSO"": 1, ""CH3"": -1, ""CH3S"": -1}"|n|78.1328
ACRY|[CH2]=[CH]-C#N||"{""ACRY"": 1, ""CH2=CH"": -1}"|n|53.0634
CL-(C=C)|Cl[C]=[C]||"{""CL-(C=C)"": 1}"|n|35.453
C=C|[CH0]=[CH0]||"{""C=C"": 1}"|n|24.022
ACF|[cH0]F||"{""ACF"": 1, ""AC"": -1}"|n|31.009
DMF|[CH3]N([CH3])[CH]=O||"{""DMF"": 1, ""CH3"": -2}"|n|73.0936
HCON(CH2)2|[*][CH2]N([CH2][*])[CH]=O|[CH2]N([CH2])[CH]=O|"{""HCON(CH2)2"": 1, ""CH2"": -2}"|n|71.078
CF3|[CH0](F)(F)(F)[!F]|[CH0](F)(F)(F)|"{""CF3"": 1, ""C"": -1}"|n|69.005
CF2|[CH0](F)(F)([!F])[!F]|[CH0](F)(F)|"{""CF2"": 1, ""C"": -1}"|n|50.007
CF|[CH0](F)([!F])([!F])[!F]|[CH0](F)|"{""CF"": 1, ""C"": -1}"|n|31.009
COO|[C](*)(=O)[OH0][#6,Si]|[C](=O)[OH0]|"{""COO"": 1}"|n|44.009
SIH3|[SiX4H3]||"{""SIH3"": 1}"|n|31.1094
SIH2|[SiX4H2]||"{""SIH2"": 1}"|n|30.1016
SIH|[SiX4H]||"{""SIH"": 1}"|n|29.0938
SI|[SiX4H0]||"{""SI"": 1}"|n|28.086
SIH2O|[SiX4H2]O[Si,CH0;!$([CH](=O));!$(C(=O)[!O])]|[SiX4H2]O|"{""SIH2O"": 1, ""SIH2"": -1}"|n|46.1006
SIHO|[SiX4H]O[Si,CH0;!$([SiH2]);!$([CH](=O));!$(C(=O)[!O])]|[SiX4H]O|"{""SIHO"": 1, ""SIH"": -1}"|n|45.0928
SIO|[SiX4H0]O[Si,CH0;!$([SiH]);!$([SiH2]);!$([CH](=O));!$(C(=O)[!O])]|[SiX4H0]O|"{""SIO"": 1, ""SI"": -1}"|n|44.085
NMP|[CH2]1N([CH3])[C](=O)[CH2][CH2]1||"{""NMP"": 1, ""CH2"": -2, ""AMCH3CH2"": -1}"|n|99.1312
CCL3F|[CH0](Cl)(Cl)(Cl)(F)||"{""CCL3F"": 1, ""CCL3"": -1, ""CF"": -1, ""C"": 1}"|n|137.368
CCL2F|[CH0](Cl)(Cl)(F)([!Cl&!F])|[CH0](Cl)(Cl)(F)|"{""CCL2F"": 1, ""CCL2"": -1, ""CF"": -1, ""C"": 1}"|n|101.915
HCCL2F|[CH](Cl)(Cl)(F)||"{""HCCL2F"": 1, ""CHCL2"": -1}"|n|102.9228
HCCLF|[CH](Cl)(F)([!Cl&!F])|[CH](Cl)(F)|"{""HCCLF"": 1, ""CHCL"": -1}"|n|67.4698
CCLF2|[CH0](Cl)(F)(F)([!Cl&!F])|[CH0](Cl)(F)(F)|"{""CCLF2"": 1, ""CCL"": -1, ""CF2"": -1, ""C"": 1}"|n|85.46
HCCLF2|[CH](Cl)(F)(F)||"{""HCCLF2"": 1, ""CHCL"": -1}"|n|86.4678
CCLF3|[CH0](Cl)(F)(F)(F)||"{""CCLF3"": 1, ""CCL"": -1, ""CF3"": -1, ""C"": 1}"|n|104.458
CCL2F2|[CH0](Cl)(Cl)(F)(F)||"{""CCL2F2"": 1, ""CCL2"": -1, ""CF2"": -1, ""C"": 1}"|n|120.913
AMH2|[CH0](=O)([NH2])([!O])|[CH0](=O)([NH2])|"{""AMH2"": 1}"|n|44.0326
AMHCH3|[CH0](=O)([NH][CH3])([!O])|[CH0](=O)([NH][CH3])|"{""AMHCH3"": 1, ""CH3"": -1}"|n|58.0592
AMHCH2|[CH0](=O)([NH][CH2])([!O])|[CH0](=O)([NH][CH2])|"{""AMHCH2"": 1, ""CH2"": -1}"|n|57.0514
AM(CH3)2|[CH0](=O)([N]([CH3])[CH3])([!O])|[CH0](=O)([N]([CH3])[CH3])|"{""AM(CH3)2"": 1, ""CH3"": -2}"|n|72.0858
AMCH3CH2|[CH0](=O)([N]([CH3])[CH2])([!O])|[CH0](=O)([N]([CH3])[CH2])|"{""AMCH3CH2"": 1, ""CH3"": -1, ""CH2"": -1}"|n|71.078
AM(CH2)2|[CH0](=O)([N]([CH2])[CH2])([!O])|[CH0](=O)([N]([CH2])[CH2])|"{""AM(CH2)2"": 1, ""CH2"": -2}"|n|70.0702
C2H5O2|[OH][CH2][CH2]O[#6,Si]|[OH][CH2][CH2]O|"{""C2H5O2"": 1, ""OH"": -1, ""CH2"": -1, ""CH2O"": -1}"|y|61.059
C2H4O2|[OH][CH2][CH]O[#6,Si]|[OH][CH2][CH]O|"{""C2H4O2"": 1, ""OH"": -1, ""CHO"": -1, ""CH2"": -1}"|y|60.0512
CH3S|[CH3][S][#6,Si]|[CH3][S]|"{""CH3S"": 1, ""CH3"": -1}"|n|47.0994
CH2S|[CH2][S][$([#6,Si])&!$([CH3])]|[CH2][S]|"{""CH2S"": 1, ""CH2"": -1}"|n|46.0916
CHS|[CH][S][$([#6,Si])&!$([CH3])&!$([CH2])]|[CH][S]|"{""CHS"": 1, ""CH"": -1}"|n|45.0838
MORPH|[CH2]1O[CH2][CH2][NH][CH2]1||"{""MORPH"": 1, ""CH2"": -2, ""CH2O"": -1, ""CH2NH"": -1}"|n|87.1202
C4H4S|[s](:[cH]:[cH]1):[cH]:[cH]:1||"{""C4H4S"": 1, ""ACH"": -4}"|n|84.1402
C4H3S|[s](:[cH0]:[cH]1):[cH]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH]:1||"{""C4H3S"": 1, ""ACH"": -3, ""AC"": -1}"|n|83.1324
C4H2S|[s](:[cH]:[cH]1):[cH0]:[cH0]:1,[s](:[cH]:[cH0]1):[cH0]:[cH]:1,[s](:[cH0]:[cH]1):[cH0]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH0]:1||"{""C4H2S"": 1, ""ACH"": -2, ""AC"": -2}"|n|82.1246
NCO|[NX2H0]=[CX2H0]=[OX1H0]||"{""NCO"": 1}"|n|42.017
(CH2)2SU|[CH2]S(=O)(=O)[CH2]||"{""(CH2)2SU"": 1, ""CH2"": -1, ""CH2S"": -1}"|n|92.1162
CH2CHSU|[CH2]S(=O)(=O)[CH]||"{""CH2CHSU"": 1, ""CH"": -1, ""CH2S"": -1}"|n|91.1084
IMIDAZOL|[c]1:[c]:[n]:[c]:[n]:1||"{""IMIDAZOL"": 1, ""ACH"": -3}"|n|68.0782
BTI|C(F)(F)(F)S(=O)(=O)[N-]S(=O)(=O)C(F)(F)F||"{""BTI"": 1, ""CF3"": -2}"|n|279.91784
group|smarts|molecular_weight
CH3|[CX4H3]|15.035
CH2|[CX4H2]|14.027
CH|[CX4H]|13.019
C|[CX4H0]|12.011
CH2=CH|[CH2]=[CH]|27.046
CH=CH|[CH]=[CH]|26.038
CH2=C|[CH2]=[CH0]|26.038
CH=C|[CH]=[CH0]|25.03
ACH|[cH]|13.019
AC|[cH0]|12.011
ACCH3|[c][CX4H3]|27.046
ACCH2|[c][CX4H2]|26.038
ACCH|[c][CX4H]|25.03
OH|[OH]|17.007
CH3OH|[CH3][OH]|32.042
H2O|[OH2]|18.015
ACOH|[cH0][OH]|29.018
CH3CO|[CH3]C(=O)|43.045
CH2CO|[CH2]C(=O)|42.037
HCO|[CH](=O)|29.018
CH3COO|[CH3][C](=O)[OH0]|59.044
CH2COO|[CH2][C](=O)[OH0]|58.036
HCOO|[CH](=O)[OH0]|45.017
CH3O|[CH3][OH0]|31.034
CH2O|[CH2][OH0]|30.026
CHO|[CH][OH0]|29.018
THF|[CH2;R][O;R]|30.026
CH3NH2|[CH3][NH2]|31.058
CH2NH2|[CH2][NH2]|30.05
CHNH2|[CH][NH2]|29.042
CH3NH|[CH3][NH]|30.05
CH2NH|[CH2][NH]|29.042
CHNH|[CH][NH]|28.034
CH3N|[CH3][NH0]|29.042
CH2N|[CH2][NH0]|28.034
ACNH2|[cH0][NH2]|28.034
C5H5N|[n](:[cH]:[cH]1):[cH]:[cH]:[cH]:1|79.102
C5H4N|[n](:[cH0]:[cH]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH0]1):[cH]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH]:[cH0]:1|78.094
C5H3N|[n](:[cH]:[cH]1):[cH0]:[cH0]:[cH]:1,[n](:[cH]:[cH]1):[cH0]:[cH]:[cH0]:1,[n](:[cH]:[cH0]1):[cH0]:[cH]:[cH]:1,[n](:[cH0]:[cH]1):[cH0]:[cH]:[cH]:1,[n](:[cH]:[cH]1):[cH]:[cH0]:[cH0]:1|77.086
CH3CN|[CH3][C]#[N]|41.053
CH2CN|[CH2][C]#[N]|40.045
COOH|[CH0](=O)[OH]|45.017
HCOOH|[CH](=O)[OH]|46.025
CH2CL|[CH2][Cl]|49.48
CHCL|[CH][Cl]|48.472
CCL|[CH0][Cl]|47.464
CH2CL2|[CH2](Cl)(Cl)|84.933
CHCL2|[CH](Cl)(Cl)|83.925
CCL2|[CH0](Cl)(Cl)|82.917
CHCL3|[CH](Cl)(Cl)(Cl)|119.378
CCL3|[CH0](Cl)(Cl)(Cl)|118.37
CCL4|[CH0](Cl)(Cl)(Cl)(Cl)|153.823
ACCL|[cH0](Cl)|47.464
CH3NO2|[CH3]N(=O)(O)|61.04
CH2NO2|[CH2]N(=O)(O)|60.032
CHNO2|[CH]N(=O)(O)|59.024
ACNO2|[cH0]N(=O)(O)|58.016
CS2|C(=S)(=S)|76.141
CH3SH|[CH3][SH]|48.1072
CH2SH|[CH2][SH]|47.0994
FURFURAL|c1coc(c1)[CH]=O|96.0842
DOH|[CH2]([OH])[CH2][OH]|62.0668
I|[I]|126.9
BR|[Br]|79.904
CH=-C|[CH]#[C]|25.0298
C=-C|[CH0]#[CH0]|24.022
DMSO|[CH3][S](=O)[CH3]|78.1328
ACRY|[CH2]=[CH]-C#N|53.0634
CL-(C=C)|Cl[C]=[C]|35.453
C=C|[CH0]=[CH0]|24.022
ACF|[cH0]F|31.009
DMF|[CH3]N([CH3])[CH]=O|73.0936
HCON(CH2)2|[CH2]N([CH2])[CH]=O|71.078
CF3|[CH0](F)(F)(F)|69.005
CF2|[CH0](F)(F)|50.007
CF|[CH0](F)|31.009
COO|[C](=O)[OH0]|44.009
SIH3|[SiX4H3]|31.1094
SIH2|[SiX4H2]|30.1016
SIH|[SiX4H]|29.0938
SI|[SiX4H0]|28.086
SIH2O|[SiX4H2][OH0]|46.1006
SIHO|[SiX4H][OH0]|45.0928
SIO|[SiX4H0][OH0]|44.085
NMP|[CH2]1N([CH3])[C](=O)[CH2][CH2]1|99.1312
CCL3F|[CH0](Cl)(Cl)(Cl)(F)|137.368
CCL2F|[CH0](Cl)(Cl)(F)|101.915
HCCL2F|[CH](Cl)(Cl)(F)|102.9228
HCCLF|[CH](Cl)(F)|67.4698
CCLF2|[CH0](Cl)(F)(F)|85.46
HCCLF2|[CH](Cl)(F)(F)|86.4678
CCLF3|[CH0](Cl)(F)(F)(F)|104.458
CCL2F2|[CH0](Cl)(Cl)(F)(F)|120.913
AMH2|[CH0](=O)([NH2])|44.0326
AMHCH3|[CH0](=O)([NH][CH3])|58.0592
AMHCH2|[CH0](=O)([NH][CH2])|57.0514
AM(CH3)2|[CH0](=O)([N]([CH3])[CH3])|72.0858
AMCH3CH2|[CH0](=O)([N]([CH3])[CH2])|71.078
AM(CH2)2|[CH0](=O)([N]([CH2])[CH2])|70.0702
C2H5O2|[OH][CH2][CH2][OH0]|61.059
C2H4O2|[OH][CH2][CH][OH0]|60.0512
CH3S|[CH3][SH0]|47.0994
CH2S|[CH2][SH0]|46.0916
CHS|[CH][SH0]|45.0838
MORPH|[CH2]1O[CH2][CH2][NH][CH2]1|87.1202
C4H4S|[s](:[cH]:[cH]1):[cH]:[cH]:1|84.1402
C4H3S|[s](:[cH0]:[cH]1):[cH]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH]:1|83.1324
C4H2S|[s](:[cH]:[cH]1):[cH0]:[cH0]:1,[s](:[cH]:[cH0]1):[cH0]:[cH]:1,[s](:[cH0]:[cH]1):[cH0]:[cH]:1,[s](:[cH]:[cH0]1):[cH]:[cH0]:1|82.1246
NCO|[NX2H0]=[CX2H0]=[OX1H0]|42.017
(CH2)2SU|[CH2]S(=O)(=O)[CH2]|92.1162
CH2CHSU|[CH2]S(=O)(=O)[CH]|91.1084
IMIDAZOL|[c]1:[c]:[n]:[c]:[n]:1|68.0782
BTI|C(F)(F)(F)S(=O)(=O)[N-]S(=O)(=O)C(F)(F)F|279.91784
2 changes: 1 addition & 1 deletion ugropy/refactor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .fragment import Fragment
from .fragmentation_model import FragmentationModel
from .fragmentation_unifac import unifac
from .fragmentation_unifac import unifac2


__all__ = ["Fragment", "FragmentationModel", "unifac"]
103 changes: 100 additions & 3 deletions ugropy/refactor/fragmentation_model.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,118 @@
from typing import List

import itertools

from ugropy.refactor.fragment import Fragment

from rdkit import Chem

import numpy as np

import pulp


class FragmentationModel:
def __init__(self, fragments: List[Fragment]):
self.fragments = fragments

def detect_fragments(self, molecule: Chem.rdchem.Mol):
detected = {}
batch = DetectionBatch(molecule)

for fragment in self.fragments:
match = molecule.GetSubstructMatches(fragment.mol_object)

if match:
detected[fragment.name] = match
batch.add_fragment(fragment.name, match)
return batch

class DetectionBatch:
def __init__(self, molecule: Chem.rdchem.Mol):
self.n = molecule.GetNumAtoms()
self.fragments = {}
self.overlaped_fragments = {}
self.selected_fragments = []
self.overlaped_atoms = []
self.atoms_matrix = np.array([])
self.solution_atoms = {}
self.solution = {}

self.has_overlap = False


def get_groups(self):
self.build_overlap_matrix()
self.get_overlaped_fragments()
self.solve_overlap()

for frag in self.overlaped_fragments.keys():
if frag not in self.selected_fragments:
self.fragments.pop(frag)

for frag in self.fragments.keys():
name = frag.split("_")[0]

if name not in self.solution_atoms.keys():
self.solution_atoms[name] = [self.fragments[frag]]
else:
self.solution_atoms[name].append(self.fragments[frag])

for frag in self.solution_atoms.keys():
self.solution[frag] = len(self.solution_atoms[frag])


def add_fragment(self, fragment_name: str, fragments: tuple):
for i, f in enumerate(fragments):
self.fragments[f"{fragment_name}_{i}"] = list(f)

def build_overlap_matrix(self):
self.atoms_matrix = np.zeros((len(self.fragments), self.n))

for i, fragment in enumerate(self.fragments.values()):
self.atoms_matrix[i, fragment] = 1

def get_overlaped_fragments(self):
overlap = np.sum(self.atoms_matrix, axis=0)
self.overlaped_atoms = np.argwhere(overlap > 1).flatten()

for name, frag in self.fragments.items():
if np.isin(frag, self.overlaped_atoms).any():
self.overlaped_fragments[name] = frag

def solve_overlap(self):
universe = set(self.overlaped_atoms)

all_elements = set(itertools.chain.from_iterable(self.overlaped_fragments.values()))

universe.update(all_elements)

problem = pulp.LpProblem("Set_Cover_Problem", pulp.LpMinimize)

n_frag = len(self.overlaped_fragments)

x = pulp.LpVariable.dicts("x", range(n_frag), cat="Binary")

problem += pulp.lpSum([x[i] for i in range(n_frag)])

for elem in universe:
sum_list = []
for i, subset in enumerate(self.overlaped_fragments.values()):
if elem in subset:
sum_list.append(x[i])

# print(f"Restricción para el elemento {elem}: {sum_list} == 1")
problem += pulp.lpSum(sum_list) == 1

solver = pulp.getSolver('PULP_CBC_CMD', msg=False)

problem.solve(solver)

selected_subsets = [name for i, name in enumerate(self.overlaped_fragments.keys()) if pulp.value(x[i]) == 1]

self.selected_fragments = selected_subsets





return detected


Loading

0 comments on commit 678160e

Please sign in to comment.