Skip to content

Commit

Permalink
#2681 - Molfile V3000 with SGROUP type DAT fails to load in Ketcher …
Browse files Browse the repository at this point in the history
…due to missing spaces in FIELDDISP (#2683)
  • Loading branch information
AliaksandrDziarkach authored Dec 11, 2024
1 parent 07ab49b commit 2c40a7f
Show file tree
Hide file tree
Showing 4 changed files with 363 additions and 37 deletions.
134 changes: 134 additions & 0 deletions api/tests/integration/ref/basic/sgroups_basic.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -3346,3 +3346,137 @@ k
38 C
39 C
40 C

molecules/basic/2681-mol3000-fielddisp.mol

1 A G meS A G
-INDIGO-01000000002D

0 0 0 0 0 0 0 0 0 0 0 V3000
M V30 BEGIN CTAB
M V30 COUNTS 5 4 2 0 0
M V30 BEGIN ATOM
M V30 1 Ala 1.5563 -9.2328 0.0 0 CLASS=AA SEQID=1 ATTCHORD=(2 2 Br)
M V30 2 Gly 2.7137 -9.23 0.0 0 CLASS=AA SEQID=2 ATTCHORD=(4 3 Br 1 Al)
M V30 3 meS 3.8712 -9.23 0.0 0 CLASS=AA SEQID=3 ATTCHORD=(4 4 Br 2 Al)
M V30 4 Ala 5.4539 -9.23 0.0 0 CLASS=AA SEQID=4 ATTCHORD=(4 3 Al 5 Br)
M V30 5 Gly 6.6114 -9.23 0.0 0 CLASS=AA SEQID=5 ATTCHORD=(2 4 Al)
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 2 1
M V30 2 1 3 2
M V30 3 1 4 3
M V30 4 1 5 4
M V30 END BOND
M V30 BEGIN SGROUP
M V30 1 DAT 1 ATOMS=(1 1) FIELDNAME="SMMX:sequence position data" FIELDDISP=-
M V30 " 1.5563 -9.2328 ARU ALL 1 1 "
M V30 2 DAT 2 ATOMS=(1 2) FIELDNAME="SMMX:sequence pos data 2" FIELDDISP=" -
M V30 2.5563 -9.2328 AA ALL 1 1 "
M V30 END SGROUP
M V30 END CTAB
M V30 BEGIN TEMPLATE
M V30 TEMPLATE 1 AA/Ala/A/
M V30 BEGIN CTAB
M V30 COUNTS 7 6 3 0 0
M V30 BEGIN ATOM
M V30 1 O 6.6266 -2.0662 0.0 0
M V30 2 H 5.0016 -2.0876 0.0 0
M V30 3 N 5.1358 -2.0784 0.0 0
M V30 4 C 5.7844 -1.5983 0.0 0 CFG=2
M V30 5 C 6.4753 -2.0653 0.0 0
M V30 6 O 6.4753 -2.8977 0.0 0
M V30 7 C 5.7844 -0.7662 0.0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 3 4
M V30 2 1 4 5
M V30 3 2 5 6
M V30 4 1 4 7 CFG=1
M V30 5 1 3 2
M V30 6 1 5 1
M V30 END BOND
M V30 BEGIN COLLECTION
M V30 MDLV30/STEABS ATOMS=(1 4)
M V30 END COLLECTION
M V30 BEGIN SGROUP
M V30 1 SUP 1 ATOMS=(1 1) XBONDS=(1 6) BRKXYZ=(9 7.020000 -2.260000 0.000000-
M V30 7.020000 -1.850000 0.000000 0.000000 0.000000 0.000000) CSTATE=(4 6 --
M V30 0.820000 -0.010000 0.000000) LABEL=OH CLASS=LGRP
M V30 2 SUP 2 ATOMS=(1 2) XBONDS=(1 5) BRKXYZ=(9 4.580000 -1.870000 0.000000-
M V30 4.600000 -2.280000 0.000000 0.000000 0.000000 0.000000) CSTATE=(4 5 0-
M V30 .800000 0.020000 0.000000) LABEL=H CLASS=LGRP
M V30 3 SUP 3 ATOMS=(5 3 4 5 6 7) XBONDS=(2 5 6) BRKXYZ=(9 3.950000 -3.33000-
M V30 0 0.000000 3.950000 -0.380000 0.000000 0.000000 0.000000 0.000000) CST-
M V30 ATE=(4 5 -0.800000 -0.020000 0.000000) CSTATE=(4 6 0.820000 0.010000 0-
M V30 .000000) LABEL=A CLASS=AA SAP=(3 3 2 Al) SAP=(3 5 1 Br)
M V30 END SGROUP
M V30 END CTAB
M V30 TEMPLATE 2 AA/Gly/G/
M V30 BEGIN CTAB
M V30 COUNTS 6 5 3 0 0
M V30 BEGIN ATOM
M V30 1 N 3.676 -12.5274 0.0 0
M V30 2 C 4.2675 -12.095 0.0 0
M V30 3 O 4.8932 -13.2691 0.0 0
M V30 4 C 4.8904 -12.5161 0.0 0
M V30 5 O 5.1042 -12.5167 0.0 0
M V30 6 H 3.4542 -12.5125 0.0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 2 4
M V30 3 2 4 3
M V30 4 1 4 5
M V30 5 1 1 6
M V30 END BOND
M V30 BEGIN SGROUP
M V30 1 SUP 1 ATOMS=(1 5) XBONDS=(1 4) CSTATE=(4 4 -0.820000 -0.010000 0.000-
M V30 000) LABEL=OH CLASS=LGRP
M V30 2 SUP 2 ATOMS=(4 1 2 3 4) XBONDS=(2 4 5) CSTATE=(4 4 0.820000 0.010000-
M V30 0.000000) CSTATE=(4 5 -0.830000 0.010000 0.000000) LABEL=G CLASS=AA S-
M V30 AP=(3 4 5 Br) SAP=(3 1 6 Al)
M V30 3 SUP 3 ATOMS=(1 6) XBONDS=(1 5) CSTATE=(4 5 0.830000 -0.010000 0.0000-
M V30 00) LABEL=H CLASS=LGRP
M V30 END SGROUP
M V30 END CTAB
M V30 TEMPLATE 3 AA/meS/meS/ NATREPLACE=AA/S
M V30 BEGIN CTAB
M V30 COUNTS 9 8 3 0 0
M V30 BEGIN ATOM
M V30 1 C 9.9525 -5.6641 0.0 0
M V30 2 O 9.9451 -6.8641 0.0 0
M V30 3 N 7.3518 -5.6442 0.0 0
M V30 4 C 8.6579 -4.9049 0.0 0 CFG=1
M V30 5 C 8.6671 -3.4041 0.0 0
M V30 6 O 7.6319 -2.7971 0.0 0
M V30 7 C 6.3173 -5.0361 0.0 0
M V30 8 O 10.8217 -5.1697 0.0 0
M V30 9 H 7.3436 -6.6442 0.0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 2 2 1
M V30 2 1 1 4
M V30 3 1 4 3
M V30 4 1 4 5 CFG=1
M V30 5 1 5 6
M V30 6 1 3 7
M V30 7 1 1 8
M V30 8 1 3 9
M V30 END BOND
M V30 BEGIN COLLECTION
M V30 MDLV30/STEABS ATOMS=(1 4)
M V30 END COLLECTION
M V30 BEGIN SGROUP
M V30 1 SUP 1 ATOMS=(7 1 2 3 4 5 6 7) XBONDS=(2 7 8) CSTATE=(4 7 2.170000 0.-
M V30 480000 0.000000) CSTATE=(4 8 -1.310000 -0.990000 0.000000) LABEL=meS C-
M V30 LASS=AA SAP=(3 1 8 Br) SAP=(3 3 9 Al) NATREPLACE=AA/S
M V30 2 SUP 2 ATOMS=(1 8) XBONDS=(1 7) CSTATE=(4 7 -2.170000 -0.480000 0.000-
M V30 000) LABEL=OH CLASS=LGRP
M V30 3 SUP 3 ATOMS=(1 9) XBONDS=(1 8) CSTATE=(4 8 1.310000 0.990000 0.00000-
M V30 0) LABEL=H CLASS=LGRP
M V30 END SGROUP
M V30 END CTAB
M V30 END TEMPLATE
M END

7 changes: 6 additions & 1 deletion api/tests/integration/tests/basic/sgroups_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
os.path.join(os.path.abspath(__file__), "..", "..", "..", "common")
)
)
from env_indigo import * # noqa
from env_indigo import Indigo, dataPath # noqa

indigo = Indigo()
indigo.setOption("molfile-saving-skip-date", True)
Expand Down Expand Up @@ -151,3 +151,8 @@
print(g.getSGroupMultiplier())
for a in g.iterateAtoms():
print("{0} {1}".format(a.index(), a.symbol()))

fname = "molecules/basic/2681-mol3000-fielddisp.mol"
m = indigo.loadMoleculeFromFile(dataPath(fname))
print("\n%s\n" % fname)
print(m.molfile())
132 changes: 96 additions & 36 deletions core/indigo-core/molecule/src/molfile_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3600,6 +3600,8 @@ void MolfileLoader::_readSGroup3000(const char* str)
QS_DEF(Array<char>, substr);
substr.clear();
_readStringInQuotes(scanner, &substr);
if (substr.size() > 0)
substr.pop(); // remove trailing 0
if (dsg != 0)
{
BufferScanner subscan(substr);
Expand Down Expand Up @@ -3903,46 +3905,104 @@ void MolfileLoader::_readTGroups3000()

void MolfileLoader::_readSGroupDisplay(Scanner& scanner, DataSGroup& dsg)
{
dsg.display_pos.x = scanner.readFloatFix(10);
dsg.display_pos.y = scanner.readFloatFix(10);
scanner.skip(4);
if (scanner.readChar() == 'A') // means "attached"
dsg.detached = false;
else
dsg.detached = true;
if (scanner.readChar() == 'R')
dsg.relative = true;
if (scanner.readChar() == 'U')
dsg.display_units = true;

long long cur = scanner.tell();
scanner.seek(0LL, SEEK_END);
long long end = scanner.tell();
scanner.seek(cur, SEEK_SET);

scanner.skip(3);

char chars[4] = {0, 0, 0, 0};
scanner.readCharsFix(3, chars);
if (strncmp(chars, "ALL", 3) == 0)
dsg.num_chars = 0;
else
try
{
scanner.seek(cur + 3, SEEK_CUR);
dsg.num_chars = scanner.readInt1();
}
int constexpr MIN_SDD_SIZE = 36;
bool well_formatted = scanner.length() >= MIN_SDD_SIZE;
dsg.display_pos.x = scanner.readFloatFix(10);
dsg.display_pos.y = scanner.readFloatFix(10);
int ch = ' ';
if (well_formatted)
{
scanner.skip(4);
ch = scanner.readChar();
}
else
{
for (int i = 0; i < 5 && ch == ' '; i++)
ch = scanner.readChar();
}
if (ch == 'A') // means "attached"
dsg.detached = false;
else if (ch == 'D')
dsg.detached = true;
else
throw Error("Expected 'A' or 'D' but got '%c'.", ch);
ch = scanner.readChar();
if (ch == 'R')
dsg.relative = true;
else if (ch != 'A')
throw Error("Expected 'A' or 'R' but got '%c'.", ch);
ch = scanner.readChar();
if (ch == 'U')
dsg.display_units = true;
else if (ch != ' ')
throw Error("Expected 'U' or ' ' but got '%c'.", ch);

if (well_formatted)
{
scanner.skip(3);
}
else
{
for (int i = 0; i < 4; i++)
{
ch = scanner.lookNext();
if (ch != ' ')
break;
scanner.skip(1);
}
}

long long cur = scanner.tell();

char chars[4] = {0, 0, 0, 0};
scanner.readCharsFix(3, chars);
if (strncmp(chars, "ALL", 3) == 0)
dsg.num_chars = 0;
else
{
scanner.seek(cur, SEEK_CUR);
dsg.num_chars = scanner.readInt1();
}

if (well_formatted)
{
scanner.skip(7);
dsg.tag = scanner.readChar();
}
else
{
ch = ' ';
// read kkk: Number of lines to display (unused, always 1)
for (int i = 0; i < 3 && ch == ' '; i++)
ch = scanner.readChar();
ch = ' ';
// read tag
for (int i = 0; i < 5 && ch == ' '; i++)
ch = scanner.readChar();
if (ch != ' ')
dsg.tag = ch;
}

scanner.skip(7);
dsg.tag = scanner.readChar();
cur = scanner.tell();
scanner.seek(0LL, SEEK_END);
long long end = scanner.tell();
scanner.seek(cur, SEEK_SET);

if (end - cur + 1 > 16)
if (end - cur + 1 > 2)
{
scanner.skip(2);
if (scanner.lookNext() == '\n' || scanner.lookNext() == '\r')
return;
int c = scanner.readChar();
if (c >= '1' && c <= '9')
dsg.dasp_pos = c - '0';
}
}
catch (Scanner::Error)
{
scanner.skip(2);
if (scanner.lookNext() == '\n' || scanner.lookNext() == '\r')
return;
int c = scanner.readChar();
if (c >= '1' && c <= '9')
dsg.dasp_pos = c - '0';
// Ignore scanner error - just use default values.
}
}

Expand Down
Loading

0 comments on commit 2c40a7f

Please sign in to comment.