From d1616f09b2d455cf9db5af50eb85f024a1a6abb2 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 16 Oct 2024 12:23:12 +0200
Subject: [PATCH] have cg charges

---
 cgsmiles/read_cgsmiles.py              | 12 +++++++++-
 cgsmiles/tests/test_cgsmile_parsing.py | 31 ++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/cgsmiles/read_cgsmiles.py b/cgsmiles/read_cgsmiles.py
index f56180c..e3368df 100644
--- a/cgsmiles/read_cgsmiles.py
+++ b/cgsmiles/read_cgsmiles.py
@@ -185,6 +185,16 @@ def read_cgsmiles(pattern):
         # the fragname starts at the second character and ends
         # one before the last according to the above pattern
         fragname = match.group(0)[2:-1]
+        # check for charge
+        charge = 0.0
+        for sign in ["+", "-"]:
+            if sign in fragname:
+                fragname, charge = fragname.split(sign)
+                if len(charge) == 0:
+                    charge = float(sign+"1")
+                else:
+                    charge = float(sign+charge)
+
         # if this residue is part of a branch we store it in
         # the recipe dict together with the anchor residue
         # and expansion number
@@ -194,7 +204,7 @@ def read_cgsmiles(pattern):
         # new we add new residue as often as required
         connection = []
         for _ in range(0, n_mon):
-            mol_graph.add_node(current, fragname=fragname)
+            mol_graph.add_node(current, fragname=fragname, charge=charge)
 
             if prev_node is not None:
                 mol_graph.add_edge(prev_node, current, order=1)
diff --git a/cgsmiles/tests/test_cgsmile_parsing.py b/cgsmiles/tests/test_cgsmile_parsing.py
index ccedcc0..f66e934 100644
--- a/cgsmiles/tests/test_cgsmile_parsing.py
+++ b/cgsmiles/tests/test_cgsmile_parsing.py
@@ -3,50 +3,65 @@
 from cgsmiles import read_cgsmiles
 from cgsmiles.read_fragments import strip_bonding_descriptors, fragment_iter
 
-@pytest.mark.parametrize('smile, nodes, edges, orders',(
+@pytest.mark.parametrize('smile, nodes, charges, edges, orders',(
                         # smiple linear sequence
                         ("{[#PMA][#PEO][#PMA]}",
                         ["PMA", "PEO", "PMA"],
+                        None,
+                        [(0, 1), (1, 2)],
+                        [1, 1]),
+                        # smiple charges
+                        ("{[#PMA+][#PEO][#PMA-0.25]}",
+                        ["PMA", "PEO", "PMA"],
+                        {0: 1.0, 1: 0.0, 2:-0.25},
                         [(0, 1), (1, 2)],
                         [1, 1]),
                         # smiple linear sequenece with multi-edge
                         ("{[#PMA]1[#PEO]1}",
                         ["PMA", "PEO"],
+                        None,
                         [(0, 1)],
                         [2]),
                         # simple branched sequence
                         ("{[#PMA][#PMA]([#PEO][#PEO])[#PMA]}",
                         ["PMA", "PMA", "PEO", "PEO", "PMA"],
+                        None,
                         [(0, 1), (1, 2), (2, 3), (1, 4)],
                         [1, 1, 1, 1]),
                         # simple sequence two branches
                         ("{[#PMA][#PMA][#PMA]([#PEO][#PEO])([#CH3])[#PMA]}",
                         ["PMA", "PMA", "PMA", "PEO", "PEO", "CH3", "PMA"],
+                        None,
                         [(0, 1), (1, 2), (2, 3), (3, 4), (2, 5), (2, 6)],
                         [1, 1, 1, 1, 1, 1]),
                         # simple linear sequence with expansion
                         ("{[#PMA]|3}",
                         ["PMA", "PMA", "PMA"],
+                        None,
                         [(0, 1), (1, 2)],
                         [1, 1]),
                         # smiple cycle sequence
                         ("{[#PMA]1[#PEO][#PMA]1}",
                         ["PMA", "PEO", "PMA"],
+                        None,
                         [(0, 1), (1, 2), (0, 2)],
                         [1, 1, 1]),
                         # smiple cycle sequence with %
                         ("{[#PMA]%123[#PEO][#PMA]%123}",
                         ["PMA", "PEO", "PMA"],
+                        None,
                         [(0, 1), (1, 2), (0, 2)],
                         [1, 1, 1]),
                         # complex cycle
                         ("{[#PMA]1[#PEO]2[#PMA]1[#PEO]2}",
                         ["PMA", "PEO", "PMA", "PEO"],
+                        None,
                         [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3)],
                         [1, 1, 1, 1, 1]),
                         # complex cycle with %
                         ("{[#PMA]%134[#PEO]%256[#PMA]%134[#PEO]%256}",
                         ["PMA", "PEO", "PMA", "PEO"],
+                        None,
                         [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3)],
                         [1, 1, 1, 1, 1]),
                      #  # complex cycle with three times same ID
@@ -59,6 +74,7 @@
                         # in cycle
                         ("{[#PMA]12[#PMA][#PMA][#PEO]12}",
                         ["PMA", "PMA", "PMA", "PEO"],
+                        None,
                         [(0, 1), (1, 2), (2, 3), (0, 3)],
                         [1, 1, 1, 2]),
                         # simple branch expension
@@ -66,6 +82,7 @@
                         ["PMA", "PEO", "PEO", "OHter",
                          "PMA", "PEO", "PEO", "OHter",
                          "PMA", "PEO", "PEO", "OHter"],
+                        None,
                         [(0, 1), (1, 2), (2, 3),
                          (0, 4), (4, 5), (5, 6), (6, 7),
                          (4, 8), (8, 9), (9, 10), (10, 11)],
@@ -74,6 +91,7 @@
                         ("{[#PMA]([#PEO]|3)|2}",
                         ["PMA", "PEO", "PEO", "PEO",
                          "PMA", "PEO", "PEO", "PEO"],
+                        None,
                         [(0, 1), (1, 2), (2, 3),
                          (0, 4), (4, 5), (5, 6), (6, 7)],
                         [1, 1, 1, 1, 1, 1, 1]),
@@ -82,6 +100,7 @@
                         ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])[#PMA]}",
                         ["PMA", "PMA", "PEO", "PEO", "OH",
                          "PEO", "PMA"],
+                        None,
                         [(0, 1), (1, 2), (2, 3),
                          (3, 4), (3, 5), (1, 6)],
                         [1, 1, 1, 1, 1, 1]),
@@ -90,6 +109,7 @@
                         ("{[#PMA][#PMA]([#PEO][#PEO]([#OH]|2)[#PEO])[#PMA]}",
                         ["PMA", "PMA", "PEO", "PEO", "OH", "OH",
                          "PEO", "PMA"],
+                        None,
                         [(0, 1), (1, 2), (2, 3),
                          (3, 4), (4, 5), (3, 6), (1, 7)],
                         [1, 1, 1, 1, 1, 1, 1]),
@@ -99,6 +119,7 @@
                         ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])|2[#PMA]}",
                         ["PMA", "PMA", "PEO", "PEO", "OH", "PEO",
                          "PMA", "PEO", "PEO", "PEO", "OH", "PMA"],
+                        None,
                         [(0, 1), (1, 2), (2, 3),
                          (3, 4), (3, 5), (1, 6), (6, 7), (7, 8),
                          (8, 9), (8, 10), (6, 11)],
@@ -115,6 +136,7 @@
                         ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]}",
                         ["PMA", "PMA", "PEO", "PQ", "OH",
                          "PQ", "OH", "PQ", "OH", "PEO", "PMA"],
+                        None,
                         [(0, 1), (1, 2), (1, 10),
                          (2, 3), (3, 4), (3, 5), (5, 6),
                          (5, 7), (7, 8), (7, 9)],
@@ -134,12 +156,13 @@
                         ("{[#PMA][#PMA]([#PEO][#PQ]([#OH])|3[#PEO])[#PMA]([#CH3])|2}",
                         ["PMA", "PMA", "PEO", "PQ", "OH",
                          "PQ", "OH", "PQ", "OH", "PEO", "PMA", "CH3", "PMA", "CH3"],
+                        None,
                         [(0, 1), (1, 2), (1, 10),
                          (2, 3), (3, 4), (3, 5), (5, 6),
                          (5, 7), (7, 8), (7, 9), (10, 11), (10, 12), (12, 13)],
                         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 ))
-def test_read_cgsmiles(smile, nodes, edges, orders):
+def test_read_cgsmiles(smile, nodes, charges, edges, orders):
     """
     Test that the meta-molecule is correctly reproduced
     from the simplified smile string syntax.
@@ -153,6 +176,10 @@ def test_read_cgsmiles(smile, nodes, edges, orders):
     fragnames = nx.get_node_attributes(meta_mol, 'fragname')
     assert nodes == list(fragnames.values())
 
+    if charges:
+        set_charges = nx.get_node_attributes(meta_mol, 'charge')
+        assert set_charges == charges
+
 @pytest.mark.parametrize('big_smile, smile, bonding, rs, ez',(
                         # smiple symmetric bonding
                         ("[$]COC[$]",