From d1616f09b2d455cf9db5af50eb85f024a1a6abb2 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 16 Oct 2024 12:23:12 +0200 Subject: [PATCH] have cg charges --- cgsmiles/read_cgsmiles.py | 12 +++++++++- cgsmiles/tests/test_cgsmile_parsing.py | 31 ++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/cgsmiles/read_cgsmiles.py b/cgsmiles/read_cgsmiles.py index f56180c..e3368df 100644 --- a/cgsmiles/read_cgsmiles.py +++ b/cgsmiles/read_cgsmiles.py @@ -185,6 +185,16 @@ def read_cgsmiles(pattern): # the fragname starts at the second character and ends # one before the last according to the above pattern fragname = match.group(0)[2:-1] + # check for charge + charge = 0.0 + for sign in ["+", "-"]: + if sign in fragname: + fragname, charge = fragname.split(sign) + if len(charge) == 0: + charge = float(sign+"1") + else: + charge = float(sign+charge) + # if this residue is part of a branch we store it in # the recipe dict together with the anchor residue # and expansion number @@ -194,7 +204,7 @@ def read_cgsmiles(pattern): # new we add new residue as often as required connection = [] for _ in range(0, n_mon): - mol_graph.add_node(current, fragname=fragname) + mol_graph.add_node(current, fragname=fragname, charge=charge) if prev_node is not None: mol_graph.add_edge(prev_node, current, order=1) diff --git a/cgsmiles/tests/test_cgsmile_parsing.py b/cgsmiles/tests/test_cgsmile_parsing.py index ccedcc0..f66e934 100644 --- a/cgsmiles/tests/test_cgsmile_parsing.py +++ b/cgsmiles/tests/test_cgsmile_parsing.py @@ -3,50 +3,65 @@ from cgsmiles import read_cgsmiles from cgsmiles.read_fragments import strip_bonding_descriptors, fragment_iter -@pytest.mark.parametrize('smile, nodes, edges, orders',( +@pytest.mark.parametrize('smile, nodes, charges, edges, orders',( # smiple linear sequence ("{[#PMA][#PEO][#PMA]}", ["PMA", "PEO", "PMA"], + None, + [(0, 1), (1, 2)], + [1, 1]), + # smiple charges + ("{[#PMA+][#PEO][#PMA-0.25]}", + ["PMA", "PEO", "PMA"], + {0: 1.0, 1: 0.0, 2:-0.25}, [(0, 1), (1, 2)], [1, 1]), # smiple linear sequenece with multi-edge ("{[#PMA]1[#PEO]1}", ["PMA", "PEO"], + None, [(0, 1)], [2]), # simple branched sequence ("{[#PMA][#PMA]([#PEO][#PEO])[#PMA]}", ["PMA", "PMA", "PEO", "PEO", "PMA"], + None, [(0, 1), (1, 2), (2, 3), (1, 4)], [1, 1, 1, 1]), # simple sequence two branches ("{[#PMA][#PMA][#PMA]([#PEO][#PEO])([#CH3])[#PMA]}", ["PMA", "PMA", "PMA", "PEO", "PEO", "CH3", "PMA"], + None, [(0, 1), (1, 2), (2, 3), (3, 4), (2, 5), (2, 6)], [1, 1, 1, 1, 1, 1]), # simple linear sequence with expansion ("{[#PMA]|3}", ["PMA", "PMA", "PMA"], + None, [(0, 1), (1, 2)], [1, 1]), # smiple cycle sequence ("{[#PMA]1[#PEO][#PMA]1}", ["PMA", "PEO", "PMA"], + None, [(0, 1), (1, 2), (0, 2)], [1, 1, 1]), # smiple cycle sequence with % ("{[#PMA]%123[#PEO][#PMA]%123}", ["PMA", "PEO", "PMA"], + None, [(0, 1), (1, 2), (0, 2)], [1, 1, 1]), # complex cycle ("{[#PMA]1[#PEO]2[#PMA]1[#PEO]2}", ["PMA", "PEO", "PMA", "PEO"], + None, [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3)], [1, 1, 1, 1, 1]), # complex cycle with % ("{[#PMA]%134[#PEO]%256[#PMA]%134[#PEO]%256}", ["PMA", "PEO", "PMA", "PEO"], + None, [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3)], [1, 1, 1, 1, 1]), # # complex cycle with three times same ID @@ -59,6 +74,7 @@ # in cycle ("{[#PMA]12[#PMA][#PMA][#PEO]12}", ["PMA", "PMA", "PMA", "PEO"], + None, [(0, 1), (1, 2), (2, 3), (0, 3)], [1, 1, 1, 2]), # simple branch expension @@ -66,6 +82,7 @@ ["PMA", "PEO", "PEO", "OHter", "PMA", "PEO", "PEO", "OHter", "PMA", "PEO", "PEO", "OHter"], + None, [(0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (6, 7), (4, 8), (8, 9), (9, 10), (10, 11)], @@ -74,6 +91,7 @@ ("{[#PMA]([#PEO]|3)|2}", ["PMA", "PEO", "PEO", "PEO", "PMA", "PEO", "PEO", "PEO"], + None, [(0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (6, 7)], [1, 1, 1, 1, 1, 1, 1]), @@ -82,6 +100,7 @@ ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])[#PMA]}", ["PMA", "PMA", "PEO", "PEO", "OH", "PEO", "PMA"], + None, [(0, 1), (1, 2), (2, 3), (3, 4), (3, 5), (1, 6)], [1, 1, 1, 1, 1, 1]), @@ -90,6 +109,7 @@ ("{[#PMA][#PMA]([#PEO][#PEO]([#OH]|2)[#PEO])[#PMA]}", ["PMA", "PMA", "PEO", "PEO", "OH", "OH", "PEO", "PMA"], + None, [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (3, 6), (1, 7)], [1, 1, 1, 1, 1, 1, 1]), @@ -99,6 +119,7 @@ ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])|2[#PMA]}", ["PMA", "PMA", "PEO", "PEO", "OH", "PEO", "PMA", "PEO", "PEO", "PEO", "OH", "PMA"], + None, [(0, 1), (1, 2), (2, 3), (3, 4), (3, 5), (1, 6), (6, 7), (7, 8), (8, 9), (8, 10), (6, 11)], @@ -115,6 +136,7 @@ ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]}", ["PMA", "PMA", "PEO", "PQ", "OH", "PQ", "OH", "PQ", "OH", "PEO", "PMA"], + None, [(0, 1), (1, 2), (1, 10), (2, 3), (3, 4), (3, 5), (5, 6), (5, 7), (7, 8), (7, 9)], @@ -134,12 +156,13 @@ ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]([#CH3])|2}", ["PMA", "PMA", "PEO", "PQ", "OH", "PQ", "OH", "PQ", "OH", "PEO", "PMA", "CH3", "PMA", "CH3"], + None, [(0, 1), (1, 2), (1, 10), (2, 3), (3, 4), (3, 5), (5, 6), (5, 7), (7, 8), (7, 9), (10, 11), (10, 12), (12, 13)], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), )) -def test_read_cgsmiles(smile, nodes, edges, orders): +def test_read_cgsmiles(smile, nodes, charges, edges, orders): """ Test that the meta-molecule is correctly reproduced from the simplified smile string syntax. @@ -153,6 +176,10 @@ def test_read_cgsmiles(smile, nodes, edges, orders): fragnames = nx.get_node_attributes(meta_mol, 'fragname') assert nodes == list(fragnames.values()) + if charges: + set_charges = nx.get_node_attributes(meta_mol, 'charge') + assert set_charges == charges + @pytest.mark.parametrize('big_smile, smile, bonding, rs, ez',( # smiple symmetric bonding ("[$]COC[$]",