Skip to content

Commit

Permalink
#2287 Wrong smiles in HELM (#2288)
Browse files Browse the repository at this point in the history
  • Loading branch information
AliaksandrDziarkach authored Aug 31, 2024
1 parent 8b28699 commit 757fbb3
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 89 deletions.
2 changes: 1 addition & 1 deletion api/tests/integration/tests/formats/ket_to_helm.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def find_diff(a, b):
"aminoacids_variants": "PEPTIDE1{(D+N).(L+I).(E+Q).(A+C+D+E+F+G+H+I+K+L+M+N+O+P+Q+R+S+T+U+V+W+Y)}$$$$V2.0",
"dna_variants": "RNA1{[dR](C+G+T)P.[dR](A+C+G+T)}$$$$V2.0",
"rna_variants": "RNA1{R(G+T)P.R(A+C+G+T)}$$$$V2.0",
"helm_monomer_molecule": "PEPTIDE1{A}|CHEM1{[C(N%91)=C%92.[*:1]%92.[*:2]%91 |$;;;_R1;_R2$|}$CHEM1,PEPTIDE1,1:R2-1:R1$$$V2.0",
"helm_monomer_molecule": "PEPTIDE1{A}|PEPTIDE2{G}|CHEM1{[C(N[*:2])=C[*:1] |$;;_R2;;_R1$|]}$CHEM1,PEPTIDE1,1:R2-1:R1$$$V2.0",
}

for filename in sorted(helm_data.keys()):
Expand Down
164 changes: 148 additions & 16 deletions api/tests/integration/tests/formats/ref/helm_monomer_molecule.ket
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
"root": {
"nodes": [
{
"$ref": "monomer2"
"$ref": "monomer98"
},
{
"$ref": "monomer102"
},
{
"$ref": "mol0"
Expand All @@ -16,14 +19,17 @@
"atomId": "3"
},
"endpoint2": {
"monomerId": "monomer2",
"monomerId": "monomer98",
"attachmentPointId": "R1"
}
}
],
"templates": [
{
"$ref": "monomerTemplate-A___Alanine"
},
{
"$ref": "monomerTemplate-G___Glycine"
}
]
},
Expand All @@ -33,40 +39,40 @@
{
"label": "C",
"location": [
15.525000000000002,
-7.1499999999999995,
1.25,
-1.5,
0
]
},
{
"label": "C",
"location": [
16.39102540378444,
-6.649999999999999,
2.1160254037844375,
-0.9999999999999982,
0
]
},
{
"label": "H",
"location": [
17.25705080756888,
-7.150000000000001,
2.9820508075688785,
-1.5,
0
]
},
{
"label": "N",
"location": [
14.658974596215565,
-6.649999999999999,
0.3839745962155625,
-0.9999999999999982,
0
]
},
{
"label": "H",
"location": [
13.792949192431125,
-7.1499999999999995,
-0.482050807568875,
-1.5,
0
]
}
Expand Down Expand Up @@ -129,12 +135,12 @@
}
]
},
"monomer2": {
"monomer98": {
"type": "monomer",
"id": "2",
"id": "98",
"position": {
"x": 17.88125,
"y": -6.8812500000000005
"x": 2.75,
"y": -1.25
},
"alias": "A",
"templateId": "A___Alanine"
Expand Down Expand Up @@ -271,5 +277,131 @@
}
],
"naturalAnalogShort": "A"
},
"monomer102": {
"type": "monomer",
"id": "102",
"position": {
"x": 6.706250000000001,
"y": -12.01875
},
"alias": "G",
"templateId": "G___Glycine"
},
"monomerTemplate-G___Glycine": {
"type": "monomerTemplate",
"atoms": [
{
"label": "C",
"location": [
-0.27106427853591913,
0.43089789861820504,
0
]
},
{
"label": "C",
"location": [
0.8002965273812491,
-0.08922633254215358,
0
]
},
{
"label": "O",
"location": [
0.869049970613821,
-1.0389588314980664,
0
]
},
{
"label": "O",
"location": [
1.5885833677265626,
0.44492263381453273,
0
]
},
{
"label": "N",
"location": [
-1.0687815442718667,
-0.08922633254215358,
0
]
},
{
"label": "H",
"location": [
-1.918084042913847,
0.3415909641496358,
0
]
}
],
"bonds": [
{
"type": 1,
"atoms": [
0,
1
]
},
{
"type": 2,
"atoms": [
1,
2
]
},
{
"type": 1,
"atoms": [
1,
3
]
},
{
"type": 1,
"atoms": [
0,
4
]
},
{
"type": 1,
"atoms": [
4,
5
]
}
],
"class": "AminoAcid",
"classHELM": "PEPTIDE",
"id": "G___Glycine",
"fullName": "Glycine",
"alias": "G",
"attachmentPoints": [
{
"attachmentAtom": 4,
"leavingGroup": {
"atoms": [
5
]
},
"type": "left"
},
{
"attachmentAtom": 1,
"leavingGroup": {
"atoms": [
3
]
},
"type": "right"
}
],
"naturalAnalogShort": "G"
}
}
145 changes: 73 additions & 72 deletions core/indigo-core/molecule/src/sequence_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1168,84 +1168,85 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vector<std::dequ
}
if (monomer_idx)
helm_string += '}'; // Finish polymer
auto& molecules = document.jsonMolecules();
int molecule_idx = 0;
rapidjson::Document json{};
for (rapidjson::SizeType i = 0; i < molecules.Size(); i++)
}
auto& molecules = document.jsonMolecules();
int molecule_idx = 0;
rapidjson::Document json{};
for (rapidjson::SizeType i = 0; i < molecules.Size(); i++)
{
const auto& molecule = molecules[i];
std::string mol_id = "mol" + std::to_string(molecule_idx++);
rapidjson::Value marr(rapidjson::kArrayType);
marr.PushBack(json.CopyFrom(molecule, json.GetAllocator()), json.GetAllocator());
MoleculeJsonLoader loader(marr);
BaseMolecule* pbmol;
Molecule mol;
QueryMolecule qmol;
try
{
const auto& molecule = molecules[i];
std::string mol_id = "mol" + std::to_string(molecule_idx++);
rapidjson::Value marr(rapidjson::kArrayType);
marr.PushBack(json.CopyFrom(molecule, json.GetAllocator()), json.GetAllocator());
MoleculeJsonLoader loader(marr);
BaseMolecule* pbmol;
Molecule mol;
QueryMolecule qmol;
try
{
loader.loadMolecule(mol);
pbmol = &mol;
}
catch (...)
{
loader.loadMolecule(qmol);
pbmol = &qmol;
}
// convert Sup sgroup without name attachment points to rg-labels
auto& sgroups = pbmol->sgroups;
for (int i = sgroups.begin(); i != sgroups.end(); i = sgroups.next(i))
loader.loadMolecule(mol);
pbmol = &mol;
}
catch (...)
{
loader.loadMolecule(qmol);
pbmol = &qmol;
}
// convert Sup sgroup without name attachment points to rg-labels
auto& sgroups = pbmol->sgroups;
for (int i = sgroups.begin(); i != sgroups.end(); i = sgroups.next(i))
{
auto& sgroup = sgroups.getSGroup(i);
if (sgroup.sgroup_type != SGroup::SG_TYPE_SUP)
continue;
Superatom& sa = static_cast<Superatom&>(sgroup);
if (sa.subscript.size() != 0 && sa.subscript.ptr()[0] != 0)
continue;
// convert leaving atom H to rg-ref
auto res = mol_atom_to_ap.try_emplace(mol_id);
auto& atom_to_ap = res.first;
static std::string apid_prefix{'R'};
Array<int> leaving_atoms;
for (int ap_id = sa.attachment_points.begin(); ap_id != sa.attachment_points.end(); ap_id = sa.attachment_points.next(ap_id))
{
auto& sgroup = sgroups.getSGroup(i);
if (sgroup.sgroup_type != SGroup::SG_TYPE_SUP)
continue;
Superatom& sa = static_cast<Superatom&>(sgroup);
if (sa.subscript.size() != 0 && sa.subscript.ptr()[0] != 0)
continue;
// convert leaving atom H to rg-ref
auto res = mol_atom_to_ap.try_emplace(mol_id);
auto& atom_to_ap = res.first;
static std::string apid_prefix{'R'};
Array<int> leaving_atoms;
for (int ap_id = sa.attachment_points.begin(); ap_id != sa.attachment_points.end(); ap_id = sa.attachment_points.next(ap_id))
auto& ap = sa.attachment_points.at(ap_id);
std::string apid = apid_prefix + ap.apid.ptr();
atom_to_ap->second.emplace(ap.aidx, apid);
int leaving_atom = ap.lvidx;
int ap_idx = std::stoi(ap.apid.ptr());
if (pbmol == &mol)
{
auto& ap = sa.attachment_points.at(ap_id);
std::string apid = apid_prefix + ap.apid.ptr();
atom_to_ap->second.emplace(ap.aidx, apid);
int leaving_atom = ap.lvidx;
int ap_idx = std::stoi(ap.apid.ptr());
if (pbmol == &mol)
{
mol.resetAtom(leaving_atom, ELEM_RSITE);
mol.allowRGroupOnRSite(leaving_atom, ap_idx);
}
else
{
auto rsite = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_RSITE, 0);
qmol.resetAtom(leaving_atom, rsite.release());
qmol.allowRGroupOnRSite(leaving_atom, ap_idx);
}
mol.resetAtom(leaving_atom, ELEM_RSITE);
mol.allowRGroupOnRSite(leaving_atom, ap_idx);
}
else
{
auto rsite = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_RSITE, 0);
qmol.resetAtom(leaving_atom, rsite.release());
qmol.allowRGroupOnRSite(leaving_atom, ap_idx);
}
sgroups.remove(i);
}
// generate smiles
std::string smiles;
StringOutput s_out(smiles);
SmilesSaver saver(s_out);
if (pbmol == &mol)
saver.saveMolecule(mol);
else
saver.saveQueryMolecule(qmol);
// save as chem
if (helm_string.size() > 0)
helm_string += '|';
helm_string += "CHEM";
polymer_idx = ++chem_idx;
helm_string += std::to_string(polymer_idx);
helm_string += "{[";
helm_string += smiles;
helm_string += '}';
monomer_id_to_monomer_info.emplace(std::make_pair(mol_id, std::make_tuple(HELMType::Chem, polymer_idx, 1)));
sgroups.remove(i);
}
// generate smiles
std::string smiles;
StringOutput s_out(smiles);
SmilesSaver saver(s_out);
saver.separate_rsites = false;
if (pbmol == &mol)
saver.saveMolecule(mol);
else
saver.saveQueryMolecule(qmol);
// save as chem
if (helm_string.size() > 0)
helm_string += '|';
helm_string += "CHEM";
int polymer_idx = ++chem_idx;
helm_string += std::to_string(polymer_idx);
helm_string += "{[";
helm_string += smiles;
helm_string += "]}";
monomer_id_to_monomer_info.emplace(std::make_pair(mol_id, std::make_tuple(HELMType::Chem, polymer_idx, 1)));
}
helm_string += '$';
// Add connections
Expand Down

0 comments on commit 757fbb3

Please sign in to comment.