From de750eeffdf45f56ac95f17e4c7177586c941d87 Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Fri, 23 Feb 2024 16:24:22 +0100
Subject: [PATCH 01/13] Implemented parsing of total mulliken charges from
 siesta stdout

---
 src/sisl/io/siesta/stdout.py | 92 +++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index e35f22a18f..ca55c4cb67 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -1126,7 +1126,97 @@ def _parse_charge(line):
         # define helper function for reading voronoi+hirshfeld charges
         def _mulliken_charges():
             """Read output from Mulliken charges"""
-            raise NotImplementedError("Mulliken charges are not implemented currently")
+            nonlocal pd
+
+            # Expecting something like this (NC/SOC)
+            # mulliken: Atomic and Orbital Populations:
+
+            # Species: Cl
+
+            # Atom      Orb        Charge      Spin       Svec
+            # ----------------------------------------------------------------
+            #     1  1 3s         1.75133   0.00566      0.004   0.004  -0.000
+            #     1  2 3s         0.09813   0.00658     -0.005  -0.005   0.000
+            #     1  3 3py        1.69790   0.21531     -0.161  -0.142   0.018
+            #     1  4 3pz        1.72632   0.15770     -0.086  -0.132  -0.008
+            #     1  5 3px        1.81369   0.01618     -0.004   0.015  -0.004
+            #     1  6 3py       -0.04663   0.02356     -0.017  -0.016  -0.000
+            #     1  7 3pz       -0.04167   0.01560     -0.011  -0.011   0.001
+            #     1  8 3px       -0.02977   0.00920     -0.006  -0.007   0.000
+            #     1  9 3Pdxy      0.00595   0.00054     -0.000  -0.000  -0.000
+            #     1 10 3Pdyz      0.00483   0.00073     -0.001  -0.001  -0.000
+            #     1 11 3Pdz2      0.00515   0.00098     -0.001  -0.001  -0.000
+            #     1 12 3Pdxz      0.00604   0.00039     -0.000  -0.000   0.000
+            #     1 13 3Pdx2-y2   0.00607   0.00099     -0.001  -0.001   0.000
+            #     1     Total     6.99733   0.41305     -0.288  -0.296   0.007
+
+            # Define the function that parses the charges
+            def _parse_charge_total(line):
+                atom_idx, _, *vals = line.split()
+                # assert that this is a proper line
+                # this should catch cases where the following line of charge output
+                # is still parseable
+                # atom_idx = int(atom_idx)
+                return int(atom_idx), list(map(float, vals))
+
+            # Define the function that parses a single species
+            def _parse_species():
+                nonlocal header, atom_idx, atom_charges
+
+                # The mulliken charges are organized per species where the charges
+                # for each species are enclosed by dashes (-----)
+
+                # Step to header
+                _, line = self.step_to('Atom', allow_reread=False)
+                if header is None:
+                    header = (
+                        line
+                        .replace("Orb", "")  # For now only total is parsed
+                        .replace("Svec", "Sx Sy Sz")  # Qatom in 4.1
+                        .split()
+                    )
+
+                # Skip over the starting ---- line
+                self.readline()
+
+                # Read until closing ---- line
+                line = ""
+                while "----" not in line:
+                    line = self.readline()
+                    if "Total" in line:
+                        ia, charge_vals = _parse_charge_total(line)
+                        atom_idx.append(ia)
+                        atom_charges.append(charge_vals)
+
+            # Parse as long as we find new species
+            atom_idx = []
+            atom_charges = []
+            header = None
+            found, _ = self.step_to("Species:", allow_reread=False)
+            while found:
+                _parse_species()
+                found, _ = self.step_to("Species:", allow_reread=False)
+
+            # Convert to array and sort in the order of the atoms
+            sort_idx = np.argsort(atom_idx)
+            atom_charges_array = _a.arrayf(atom_charges)[sort_idx]
+
+            if pd is None:
+                # not as_dataframe
+                return atom_charges_array
+
+            # determine how many columns we have
+            # this will remove atom indices and species, so only inside
+            ncols = len(atom_charges[0])
+            assert ncols == len(header)
+
+            # the precision is limited, so no need for double precision
+            return pd.DataFrame(
+                atom_charges_array,
+                columns=header,
+                dtype=np.float32,
+                index=pd.RangeIndex(stop=len(atom_charges), name="atom"),
+            )
 
         # Check that a known charge has been requested
         if namel == "voronoi":

From aa0385ac0e4df78586245232870ee03dc70f16a4 Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Fri, 23 Feb 2024 16:37:51 +0100
Subject: [PATCH 02/13] Fix incorrect header for mulliken charges from stdout

---
 src/sisl/io/siesta/stdout.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index ca55c4cb67..a3d97d18d1 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -1171,10 +1171,9 @@ def _parse_species():
                 if header is None:
                     header = (
                         line
-                        .replace("Orb", "")  # For now only total is parsed
                         .replace("Svec", "Sx Sy Sz")  # Qatom in 4.1
                         .split()
-                    )
+                    )[2:]
 
                 # Skip over the starting ---- line
                 self.readline()

From 33b63a7bbcc06a22514cfad4753a1bce1f006481 Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Fri, 23 Feb 2024 16:40:21 +0100
Subject: [PATCH 03/13] Fix comment in parsing mulliken charges from stdout

---
 src/sisl/io/siesta/stdout.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index a3d97d18d1..7860c96668 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -1171,7 +1171,7 @@ def _parse_species():
                 if header is None:
                     header = (
                         line
-                        .replace("Svec", "Sx Sy Sz")  # Qatom in 4.1
+                        .replace("Svec", "Sx Sy Sz")  # Split Svec into Cartesian components
                         .split()
                     )[2:]
 

From a0518940c96e759a33ad7b0878aa3fe3b33dea4e Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Thu, 7 Mar 2024 19:57:45 +0100
Subject: [PATCH 04/13] Updated docstring of read_charge to reflect parsing of
 total Mulliken charges

---
 src/sisl/io/siesta/stdout.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index 7860c96668..69c29560cd 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -985,7 +985,7 @@ def read_charge(
 
         Siesta enables many different modes of writing out charges.
 
-        NOTE: currently Mulliken charges are not implemented.
+        NOTE: currently only Mulliken total atomic charges are implemented.
 
         The below table shows a list of different cases that
         may be encountered, the letters are referred to in the
@@ -1010,12 +1010,12 @@ def read_charge(
         the SCF charges are not present. For `Opt.ANY` it will return
         the most information, effectively SCF will be returned if present.
 
-        Currently Mulliken is not implemented, any help in reading this would be
-        very welcome.
+        Currently orbitally-resolved Mulliken is not implemented, any help in
+        reading this would be very welcome.
 
         Parameters
         ----------
-        name: {"voronoi", "hirshfeld"}
+        name: {"voronoi", "hirshfeld", "mulliken"}
             the name of the charges that you want to read
         iscf: int or Opt, optional
             index (0-based) of the scf iteration you want the charges for.
@@ -1167,12 +1167,12 @@ def _parse_species():
                 # for each species are enclosed by dashes (-----)
 
                 # Step to header
-                _, line = self.step_to('Atom', allow_reread=False)
+                _, line = self.step_to("Atom", allow_reread=False)
                 if header is None:
                     header = (
-                        line
-                        .replace("Svec", "Sx Sy Sz")  # Split Svec into Cartesian components
-                        .split()
+                        line.replace(
+                            "Svec", "Sx Sy Sz"
+                        ).split()  # Split Svec into Cartesian components
                     )[2:]
 
                 # Skip over the starting ---- line

From 5b2c284ae425e0da55467c34ff5173b4c9513b65 Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Thu, 7 Mar 2024 20:10:49 +0100
Subject: [PATCH 05/13] Made Mulliken charges header consistent with Voronoi
 and Hirshfeld

---
 src/sisl/io/siesta/stdout.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index 69c29560cd..c7ce2878e8 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -1170,9 +1170,12 @@ def _parse_species():
                 _, line = self.step_to("Atom", allow_reread=False)
                 if header is None:
                     header = (
-                        line.replace(
+                        line.replace("Charge", "e")
+                        .replace("Spin", "S")
+                        .replace(
                             "Svec", "Sx Sy Sz"
-                        ).split()  # Split Svec into Cartesian components
+                        )  # Split Svec into Cartesian components
+                        .split()
                     )[2:]
 
                 # Skip over the starting ---- line

From 66c4da39e0bde62681798d0d3709fbfbdefca86e Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Thu, 7 Mar 2024 20:14:43 +0100
Subject: [PATCH 06/13] Added parsing of Mulliken charges to CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f2e901472..6dd0b40244 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ we hit release version 1.0.0.
 - Creation of [n]-triangulenes (`sisl.geom.triangulene`)
 - added `offset` argument in `Geometry.add_vacuum` to enable shifting atomic coordinates
 - A new `AtomicMatrixPlot` to plot sparse matrices, #668
+- Parsing of total Mulliken charges in `stdoutSileSiesta`, #691
 
 ### Fixed
 - `txtSileOrca.info.no` used a wrong regex, added a test

From 48e8cb751d36708a38d283afa780ddba2247c330 Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Fri, 8 Mar 2024 22:37:38 +0100
Subject: [PATCH 07/13] Tried to add support for parsing Mulliken charges of
 simpler spin types

---
 src/sisl/io/siesta/stdout.py | 89 ++++++++++++++++++++++++++++++++----
 1 file changed, 79 insertions(+), 10 deletions(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index c7ce2878e8..eecc6ac8a2 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -1151,7 +1151,7 @@ def _mulliken_charges():
             #     1     Total     6.99733   0.41305     -0.288  -0.296   0.007
 
             # Define the function that parses the charges
-            def _parse_charge_total(line):
+            def _parse_charge_total_nc(line):  # non-colinear and soc
                 atom_idx, _, *vals = line.split()
                 # assert that this is a proper line
                 # this should catch cases where the following line of charge output
@@ -1159,8 +1159,12 @@ def _parse_charge_total(line):
                 # atom_idx = int(atom_idx)
                 return int(atom_idx), list(map(float, vals))
 
+            def _parse_charge_total(line):  # unpolarized and colinear spin
+                atom_idx, val, *_ = line.split()
+                return int(atom_idx), float(val)
+
             # Define the function that parses a single species
-            def _parse_species():
+            def _parse_species_nc():  # non-colinear and soc
                 nonlocal header, atom_idx, atom_charges
 
                 # The mulliken charges are organized per species where the charges
@@ -1186,18 +1190,83 @@ def _parse_species():
                 while "----" not in line:
                     line = self.readline()
                     if "Total" in line:
-                        ia, charge_vals = _parse_charge_total(line)
+                        ia, charge_vals = _parse_charge_total_nc(line)
                         atom_idx.append(ia)
                         atom_charges.append(charge_vals)
 
-            # Parse as long as we find new species
-            atom_idx = []
-            atom_charges = []
-            header = None
-            found, _ = self.step_to("Species:", allow_reread=False)
-            while found:
-                _parse_species()
+            def _parse_spin_pol():  # unpolarized and colinear spin
+                nonlocal atom_idx, atom_charges
+
+                # The mulliken charges are organized per spin
+                # polarization (UP/DOWN). The end of a spin
+                # block is marked by a Qtot
+
+                # Read until we encounter "mulliken: Qtot"
+                def try_parse_int(s):
+                    try:
+                        int(s)
+                    except ValueError:
+                        return False
+                    else:
+                        return True
+
+                line = ""
+                while "mulliken: Qtot" not in line:
+                    line = self.readline()
+                    words = line.split()
+                    if len(words) > 0 and try_parse_int(words[0]):
+                        # This should be a line containing the total charge for an atom
+                        ia, charge = _parse_charge_total(line)
+                        atom_idx.append(ia)
+                        atom_charges.append(charge)
+
+            # Determine with which spin type we are dealing
+            IDX_NON_POL = 0
+            IDX_POL = 1
+            IDX_NC = 2
+            search_keys_spin = [
+                "Qatom",  # if we find Qatom and no "mulliken: Spin UP" we are dealing with spin unpolarized
+                "mulliken: Spin UP",
+                "Svec",
+            ]
+            loc = self.fh.tell()
+            ret = self.step_to(
+                search_keys_spin, case=True, ret_index=True, allow_reread=False
+            )
+            self.fh.seek(loc)
+            if ret[2] == IDX_NON_POL:
+                # No spin components so just parse charge
+                atom_charges = []
+                atom_idx = []
+                header = ["e"]
+                _parse_spin_pol()
+            elif ret[2] == IDX_POL:
+                # Parse both spin polarizations
+                atom_charges_pol = []
+                header = ["e", "Sz"]
+                for s in ["UP", "DOWN"]:
+                    atom_charges = []
+                    atom_idx = []
+                    self.step_to(f"mulliken: Spin {s}", allow_reread=False)
+                    _parse_spin_pol()
+                    atom_charges_pol.append(atom_charges)
+
+                # Compute the charge and spin of each atom
+                atom_charges_pol_array = _a.arrayf(atom_charges_pol)
+                atom_q = atom_charges_pol_array[0, :] + atom_charges_pol_array[1, :]
+                atom_s = atom_charges_pol_array[0, :] - atom_charges_pol_array[1, :]
+                atom_charges[:] = np.stack((atom_q, atom_s), axis=-1)
+            elif ret[2] == IDX_NC:
+                # Parse as long as we find new species
+                atom_charges = []
+                atom_idx = []
+                header = None
                 found, _ = self.step_to("Species:", allow_reread=False)
+                while found:
+                    _parse_species_nc()
+                    found, _ = self.step_to("Species:", allow_reread=False)
+            else:
+                assert False  # It should never reach here
 
             # Convert to array and sort in the order of the atoms
             sort_idx = np.argsort(atom_idx)

From d6f6ca9cae1ebbfec72e11e57a28ff155126f16a Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Fri, 15 Mar 2024 19:04:06 +0100
Subject: [PATCH 08/13] Use self.info.spin to determine spin type for Mulliken
 charges

---
 src/sisl/io/siesta/stdout.py | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index eecc6ac8a2..b491b24070 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -1221,26 +1221,13 @@ def try_parse_int(s):
                         atom_charges.append(charge)
 
             # Determine with which spin type we are dealing
-            IDX_NON_POL = 0
-            IDX_POL = 1
-            IDX_NC = 2
-            search_keys_spin = [
-                "Qatom",  # if we find Qatom and no "mulliken: Spin UP" we are dealing with spin unpolarized
-                "mulliken: Spin UP",
-                "Svec",
-            ]
-            loc = self.fh.tell()
-            ret = self.step_to(
-                search_keys_spin, case=True, ret_index=True, allow_reread=False
-            )
-            self.fh.seek(loc)
-            if ret[2] == IDX_NON_POL:
+            if self.info.spin == Spin.UNPOLARIZED:
                 # No spin components so just parse charge
                 atom_charges = []
                 atom_idx = []
                 header = ["e"]
                 _parse_spin_pol()
-            elif ret[2] == IDX_POL:
+            elif self.info.spin == Spin.POLARIZED:
                 # Parse both spin polarizations
                 atom_charges_pol = []
                 header = ["e", "Sz"]
@@ -1256,7 +1243,7 @@ def try_parse_int(s):
                 atom_q = atom_charges_pol_array[0, :] + atom_charges_pol_array[1, :]
                 atom_s = atom_charges_pol_array[0, :] - atom_charges_pol_array[1, :]
                 atom_charges[:] = np.stack((atom_q, atom_s), axis=-1)
-            elif ret[2] == IDX_NC:
+            elif self.info.spin == Spin.NONCOLINEAR or self.info.spin == Spin.SPINORBIT:
                 # Parse as long as we find new species
                 atom_charges = []
                 atom_idx = []

From 604f297018380697d348cb2a6e002dd264e69e0f Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Tue, 13 Aug 2024 19:11:56 +0200
Subject: [PATCH 09/13] Fix determination of spin in reading of mulliken
 moments

---
 src/sisl/io/siesta/stdout.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index d055e8a2d4..c9419edf9a 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -42,7 +42,7 @@ def _ensure_atoms(atoms):
 
 def _parse_spin(attr, match):
     """Parse 'redata: Spin configuration *= <value>'"""
-    opt = match.string.split("=")[-1]
+    opt = match.string.split("=")[-1].strip()
 
     if opt.startswith("spin-orbit"):
         return Spin("spin-orbit")
@@ -1233,16 +1233,16 @@ def try_parse_int(s):
                         # This should be a line containing the total charge for an atom
                         ia, charge = _parse_charge_total(line)
                         atom_idx.append(ia)
-                        atom_charges.append(charge)
+                        atom_charges.append([charge])
 
             # Determine with which spin type we are dealing
-            if self.info.spin == Spin.UNPOLARIZED:
+            if self.info.spin == Spin():  # UNPOLARIZED
                 # No spin components so just parse charge
                 atom_charges = []
                 atom_idx = []
                 header = ["e"]
                 _parse_spin_pol()
-            elif self.info.spin == Spin.POLARIZED:
+            elif self.info.spin == Spin("polarized"):
                 # Parse both spin polarizations
                 atom_charges_pol = []
                 header = ["e", "Sz"]
@@ -1255,10 +1255,14 @@ def try_parse_int(s):
 
                 # Compute the charge and spin of each atom
                 atom_charges_pol_array = _a.arrayf(atom_charges_pol)
-                atom_q = atom_charges_pol_array[0, :] + atom_charges_pol_array[1, :]
-                atom_s = atom_charges_pol_array[0, :] - atom_charges_pol_array[1, :]
+                atom_q = (
+                    atom_charges_pol_array[0, :, 0] + atom_charges_pol_array[1, :, 0]
+                )
+                atom_s = (
+                    atom_charges_pol_array[0, :, 0] - atom_charges_pol_array[1, :, 0]
+                )
                 atom_charges[:] = np.stack((atom_q, atom_s), axis=-1)
-            elif self.info.spin == Spin.NONCOLINEAR or self.info.spin == Spin.SPINORBIT:
+            elif self.info.spin in [Spin("non-colinear"), Spin("spin-orbit")]:
                 # Parse as long as we find new species
                 atom_charges = []
                 atom_idx = []

From 3368f655b61681dffda47e50f7272fc016517960 Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Fri, 27 Sep 2024 18:34:00 +0200
Subject: [PATCH 10/13] Fix parsing of nc/soc Mulliken charges

---
 src/sisl/io/siesta/stdout.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index 0d8c15c189..e027dc7fe9 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -123,6 +123,12 @@ class stdoutSileSiesta(SileSiesta):
             lambda attr, instance, match: int(match.string.split()[-2]),
             not_found="warn",
         ),
+        _A(
+            "ns",
+            r"^redata: Number of Atomic Species",
+            lambda attr, instance, match: int(match.string.split("=")[-1]),
+            not_found="warn",
+        ),
         _A(
             "completed",
             r".*Job completed",
@@ -1263,14 +1269,13 @@ def try_parse_int(s):
                 )
                 atom_charges[:] = np.stack((atom_q, atom_s), axis=-1)
             elif self.info.spin in [Spin("non-colinear"), Spin("spin-orbit")]:
-                # Parse as long as we find new species
+                # Parse each species
                 atom_charges = []
                 atom_idx = []
                 header = None
-                found, _ = self.step_to("Species:", allow_reread=False)
-                while found:
-                    _parse_species_nc()
+                for _ in range(self.info.ns):
                     found, _ = self.step_to("Species:", allow_reread=False)
+                    _parse_species_nc()
             else:
                 assert False  # It should never reach here
 

From ce7b9feb10669c98da8adf111a2647a92ed88b08 Mon Sep 17 00:00:00 2001
From: "A.H. Kole" <a.h.kole@uu.nl>
Date: Fri, 27 Sep 2024 18:34:39 +0200
Subject: [PATCH 11/13] Add tests for parsing of total Mulliken charges

---
 src/sisl/io/siesta/tests/test_stdout_charges.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/sisl/io/siesta/tests/test_stdout_charges.py b/src/sisl/io/siesta/tests/test_stdout_charges.py
index 73865c85c6..a05149b06f 100644
--- a/src/sisl/io/siesta/tests/test_stdout_charges.py
+++ b/src/sisl/io/siesta/tests/test_stdout_charges.py
@@ -13,6 +13,7 @@
 
 # tests here tests charge reads for output
 #  voronoi + hirshfeld: test_vh_*
+#  voronoi + hirshfeld + mulliken: test_vhm_*
 #  voronoi: test_v_*
 #  hirshfeld: test_h_*
 #  mulliken: test_m_*
@@ -30,8 +31,8 @@ def with_pandas():
         return False
 
 
-@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld"))
-def test_vh_empty_file(name, sisl_files):
+@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld", "mulliken"))
+def test_vhm_empty_file(name, sisl_files):
     f = sisl_files("siesta", "ancient", "voronoi_hirshfeld_4.1_none.out")
     out = stdoutSileSiesta(f)
 
@@ -76,9 +77,11 @@ def test_vh_final(fname, name, sisl_files):
 
 
 @pytest.mark.parametrize("fname", ("md", "pol_md", ("ancient", "pol_md"), "nc_md"))
-@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld"))
-def test_vh_md(name, fname, sisl_files):
+@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld", "mulliken"))
+def test_vhm_md(name, fname, sisl_files):
     if isinstance(fname, tuple):
+        if name == "mulliken":
+            return
         f = sisl_files("siesta", "ancient", f"voronoi_hirshfeld_4.1_{fname[1]}.out")
     else:
         f = sisl_files("siesta", "charges", fname, "RUN.out")

From d305e09457185d1c8dd8369e36140c92d71758e0 Mon Sep 17 00:00:00 2001
From: Nick Papior <nickpapior@gmail.com>
Date: Thu, 24 Oct 2024 15:17:44 +0200
Subject: [PATCH 12/13] implemented whalrus and changed ns to nspecies

nspecies is what is currently used in Atom, so better
stick with that name.

Signed-off-by: Nick Papior <nickpapior@gmail.com>
---
 src/sisl/io/siesta/stdout.py | 63 ++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py
index e027dc7fe9..f3b4bc10ab 100644
--- a/src/sisl/io/siesta/stdout.py
+++ b/src/sisl/io/siesta/stdout.py
@@ -124,7 +124,7 @@ class stdoutSileSiesta(SileSiesta):
             not_found="warn",
         ),
         _A(
-            "ns",
+            "nspecies",
             r"^redata: Number of Atomic Species",
             lambda attr, instance, match: int(match.string.split("=")[-1]),
             not_found="warn",
@@ -1000,14 +1000,17 @@ def construct_data(d, data):
 
     @sile_fh_open(True)
     def read_charge(
-        self, name, iscf=Opt.ANY, imd=Opt.ANY, key_scf="scf", as_dataframe=False
+        self,
+        name: Literal["voronoi", "hirshfeld", "mulliken"],
+        iscf=Opt.ANY,
+        imd=Opt.ANY,
+        key_scf: str = "scf",
+        as_dataframe: bool = False,
     ):
         r"""Read charges calculated in SCF loop or MD loop (or both)
 
         Siesta enables many different modes of writing out charges.
 
-        NOTE: currently only Mulliken total atomic charges are implemented.
-
         The below table shows a list of different cases that
         may be encountered, the letters are referred to in the
         return section to indicate what is returned.
@@ -1036,7 +1039,7 @@ def read_charge(
 
         Parameters
         ----------
-        name: {"voronoi", "hirshfeld", "mulliken"}
+        name:
             the name of the charges that you want to read
         iscf: int or Opt, optional
             index (0-based) of the scf iteration you want the charges for.
@@ -1050,9 +1053,9 @@ def read_charge(
             the returned quantities depend on what is present.
             If ``None/Opt.NONE`` it will not return any MD charges.
             If both `imd` and `iscf` are ``None`` then only the final charges will be returned.
-        key_scf : str, optional
+        key_scf :
             the key lookup for the scf iterations (a ":" will automatically be appended)
-        as_dataframe: boolean, optional
+        as_dataframe:
             whether charges should be returned as a pandas dataframe.
 
         Returns
@@ -1061,7 +1064,7 @@ def read_charge(
             if a specific MD+SCF index is requested (or special cases where output is
             not complete)
         list of numpy.ndarray
-            if one both `iscf` or `imd` is different from ``None/Opt.NONE``.
+            if `iscf` or `imd` is different from ``None/Opt.NONE``.
         pandas.DataFrame
             if `as_dataframe` is requested. The dataframe will have multi-indices if multiple
             SCF or MD steps are requested.
@@ -1117,16 +1120,15 @@ def _parse_charge(line):
 
             # We have found the header, prepare a list to read the charges
             atom_charges = []
-            line = " "
-            while line != "":
+            while (line := self.readline()) != "":
                 try:
-                    line = self.readline()
                     charge_vals = _parse_charge(line)
                     atom_charges.append(charge_vals)
                 except Exception:
                     # We already have the charge values and we reached a line that can't be parsed,
                     # this means we have reached the end.
                     break
+
             if pd is None:
                 # not as_dataframe
                 return _a.arrayf(atom_charges)
@@ -1207,9 +1209,7 @@ def _parse_species_nc():  # non-colinear and soc
                 self.readline()
 
                 # Read until closing ---- line
-                line = ""
-                while "----" not in line:
-                    line = self.readline()
+                while "----" not in (line := self.readline()):
                     if "Total" in line:
                         ia, charge_vals = _parse_charge_total_nc(line)
                         atom_idx.append(ia)
@@ -1231,9 +1231,7 @@ def try_parse_int(s):
                     else:
                         return True
 
-                line = ""
-                while "mulliken: Qtot" not in line:
-                    line = self.readline()
+                while "mulliken: Qtot" not in (line := self.readline()):
                     words = line.split()
                     if len(words) > 0 and try_parse_int(words[0]):
                         # This should be a line containing the total charge for an atom
@@ -1242,17 +1240,18 @@ def try_parse_int(s):
                         atom_charges.append([charge])
 
             # Determine with which spin type we are dealing
-            if self.info.spin == Spin():  # UNPOLARIZED
+            if self.info.spin.is_unpolarized:  # UNPOLARIZED
                 # No spin components so just parse charge
                 atom_charges = []
                 atom_idx = []
                 header = ["e"]
                 _parse_spin_pol()
-            elif self.info.spin == Spin("polarized"):
+
+            elif self.info.spin.is_polarized:
                 # Parse both spin polarizations
                 atom_charges_pol = []
                 header = ["e", "Sz"]
-                for s in ["UP", "DOWN"]:
+                for s in ("UP", "DOWN"):
                     atom_charges = []
                     atom_idx = []
                     self.step_to(f"mulliken: Spin {s}", allow_reread=False)
@@ -1268,16 +1267,20 @@ def try_parse_int(s):
                     atom_charges_pol_array[0, :, 0] - atom_charges_pol_array[1, :, 0]
                 )
                 atom_charges[:] = np.stack((atom_q, atom_s), axis=-1)
-            elif self.info.spin in [Spin("non-colinear"), Spin("spin-orbit")]:
+
+            elif not self.info.spin.is_diagonal:
                 # Parse each species
                 atom_charges = []
                 atom_idx = []
                 header = None
-                for _ in range(self.info.ns):
+                for _ in range(self.info.nspecies):
                     found, _ = self.step_to("Species:", allow_reread=False)
                     _parse_species_nc()
+
             else:
-                assert False  # It should never reach here
+                raise NotImplementedError(
+                    "Something went wrong... Couldn't parse file."
+                )
 
             # Convert to array and sort in the order of the atoms
             sort_idx = np.argsort(atom_idx)
@@ -1335,6 +1338,7 @@ def try_parse_int(s):
             key_scf,
             *charge_keys,
         ]
+
         # adjust the below while loop to take into account any additional
         # segments of search_keys
         IDX_SCF_END = [0, 1]
@@ -1364,9 +1368,11 @@ def try_parse_int(s):
         FOUND_MD = False
         FOUND_FINAL = False
 
-        # TODO whalrus
-        ret = self.step_to(search_keys, case=True, ret_index=True, allow_reread=False)
-        while ret[0]:
+        while (
+            ret := self.step_to(
+                search_keys, case=True, ret_index=True, allow_reread=False
+            )
+        )[0]:
             if ret[2] in IDX_SCF_END:
                 # we finished all SCF iterations
                 current_state = state.MD
@@ -1427,11 +1433,6 @@ def try_parse_int(s):
 
                 current_state = state.CHARGE
 
-            # step to next entry
-            ret = self.step_to(
-                search_keys, case=True, ret_index=True, allow_reread=False
-            )
-
         if not any((FOUND_SCF, FOUND_MD, FOUND_FINAL)):
             raise SileError(f"{self!s} does not contain any charges ({name})")
 

From 236c33fdb672201d8f1c0d30f54ba0d80dd07348 Mon Sep 17 00:00:00 2001
From: Nick Papior <nickpapior@gmail.com>
Date: Thu, 24 Oct 2024 15:23:08 +0200
Subject: [PATCH 13/13] small comment on test

Signed-off-by: Nick Papior <nickpapior@gmail.com>
---
 src/sisl/io/siesta/tests/test_stdout_charges.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/sisl/io/siesta/tests/test_stdout_charges.py b/src/sisl/io/siesta/tests/test_stdout_charges.py
index a05149b06f..3e001d5252 100644
--- a/src/sisl/io/siesta/tests/test_stdout_charges.py
+++ b/src/sisl/io/siesta/tests/test_stdout_charges.py
@@ -81,6 +81,8 @@ def test_vh_final(fname, name, sisl_files):
 def test_vhm_md(name, fname, sisl_files):
     if isinstance(fname, tuple):
         if name == "mulliken":
+            # we don't have this in the test, so simply return
+            # (this isn't a test after all)
             return
         f = sisl_files("siesta", "ancient", f"voronoi_hirshfeld_4.1_{fname[1]}.out")
     else: