From de750eeffdf45f56ac95f17e4c7177586c941d87 Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Fri, 23 Feb 2024 16:24:22 +0100 Subject: [PATCH 01/13] Implemented parsing of total mulliken charges from siesta stdout --- src/sisl/io/siesta/stdout.py | 92 +++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index e35f22a18f..ca55c4cb67 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -1126,7 +1126,97 @@ def _parse_charge(line): # define helper function for reading voronoi+hirshfeld charges def _mulliken_charges(): """Read output from Mulliken charges""" - raise NotImplementedError("Mulliken charges are not implemented currently") + nonlocal pd + + # Expecting something like this (NC/SOC) + # mulliken: Atomic and Orbital Populations: + + # Species: Cl + + # Atom Orb Charge Spin Svec + # ---------------------------------------------------------------- + # 1 1 3s 1.75133 0.00566 0.004 0.004 -0.000 + # 1 2 3s 0.09813 0.00658 -0.005 -0.005 0.000 + # 1 3 3py 1.69790 0.21531 -0.161 -0.142 0.018 + # 1 4 3pz 1.72632 0.15770 -0.086 -0.132 -0.008 + # 1 5 3px 1.81369 0.01618 -0.004 0.015 -0.004 + # 1 6 3py -0.04663 0.02356 -0.017 -0.016 -0.000 + # 1 7 3pz -0.04167 0.01560 -0.011 -0.011 0.001 + # 1 8 3px -0.02977 0.00920 -0.006 -0.007 0.000 + # 1 9 3Pdxy 0.00595 0.00054 -0.000 -0.000 -0.000 + # 1 10 3Pdyz 0.00483 0.00073 -0.001 -0.001 -0.000 + # 1 11 3Pdz2 0.00515 0.00098 -0.001 -0.001 -0.000 + # 1 12 3Pdxz 0.00604 0.00039 -0.000 -0.000 0.000 + # 1 13 3Pdx2-y2 0.00607 0.00099 -0.001 -0.001 0.000 + # 1 Total 6.99733 0.41305 -0.288 -0.296 0.007 + + # Define the function that parses the charges + def _parse_charge_total(line): + atom_idx, _, *vals = line.split() + # assert that this is a proper line + # this should catch cases where the following line of charge output + # is still parseable + # atom_idx = int(atom_idx) + return int(atom_idx), list(map(float, vals)) + + # Define the function that parses a single species + def _parse_species(): + nonlocal header, atom_idx, atom_charges + + # The mulliken charges are organized per species where the charges + # for each species are enclosed by dashes (-----) + + # Step to header + _, line = self.step_to('Atom', allow_reread=False) + if header is None: + header = ( + line + .replace("Orb", "") # For now only total is parsed + .replace("Svec", "Sx Sy Sz") # Qatom in 4.1 + .split() + ) + + # Skip over the starting ---- line + self.readline() + + # Read until closing ---- line + line = "" + while "----" not in line: + line = self.readline() + if "Total" in line: + ia, charge_vals = _parse_charge_total(line) + atom_idx.append(ia) + atom_charges.append(charge_vals) + + # Parse as long as we find new species + atom_idx = [] + atom_charges = [] + header = None + found, _ = self.step_to("Species:", allow_reread=False) + while found: + _parse_species() + found, _ = self.step_to("Species:", allow_reread=False) + + # Convert to array and sort in the order of the atoms + sort_idx = np.argsort(atom_idx) + atom_charges_array = _a.arrayf(atom_charges)[sort_idx] + + if pd is None: + # not as_dataframe + return atom_charges_array + + # determine how many columns we have + # this will remove atom indices and species, so only inside + ncols = len(atom_charges[0]) + assert ncols == len(header) + + # the precision is limited, so no need for double precision + return pd.DataFrame( + atom_charges_array, + columns=header, + dtype=np.float32, + index=pd.RangeIndex(stop=len(atom_charges), name="atom"), + ) # Check that a known charge has been requested if namel == "voronoi": From aa0385ac0e4df78586245232870ee03dc70f16a4 Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Fri, 23 Feb 2024 16:37:51 +0100 Subject: [PATCH 02/13] Fix incorrect header for mulliken charges from stdout --- src/sisl/io/siesta/stdout.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index ca55c4cb67..a3d97d18d1 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -1171,10 +1171,9 @@ def _parse_species(): if header is None: header = ( line - .replace("Orb", "") # For now only total is parsed .replace("Svec", "Sx Sy Sz") # Qatom in 4.1 .split() - ) + )[2:] # Skip over the starting ---- line self.readline() From 33b63a7bbcc06a22514cfad4753a1bce1f006481 Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Fri, 23 Feb 2024 16:40:21 +0100 Subject: [PATCH 03/13] Fix comment in parsing mulliken charges from stdout --- src/sisl/io/siesta/stdout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index a3d97d18d1..7860c96668 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -1171,7 +1171,7 @@ def _parse_species(): if header is None: header = ( line - .replace("Svec", "Sx Sy Sz") # Qatom in 4.1 + .replace("Svec", "Sx Sy Sz") # Split Svec into Cartesian components .split() )[2:] From a0518940c96e759a33ad7b0878aa3fe3b33dea4e Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Thu, 7 Mar 2024 19:57:45 +0100 Subject: [PATCH 04/13] Updated docstring of read_charge to reflect parsing of total Mulliken charges --- src/sisl/io/siesta/stdout.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index 7860c96668..69c29560cd 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -985,7 +985,7 @@ def read_charge( Siesta enables many different modes of writing out charges. - NOTE: currently Mulliken charges are not implemented. + NOTE: currently only Mulliken total atomic charges are implemented. The below table shows a list of different cases that may be encountered, the letters are referred to in the @@ -1010,12 +1010,12 @@ def read_charge( the SCF charges are not present. For `Opt.ANY` it will return the most information, effectively SCF will be returned if present. - Currently Mulliken is not implemented, any help in reading this would be - very welcome. + Currently orbitally-resolved Mulliken is not implemented, any help in + reading this would be very welcome. Parameters ---------- - name: {"voronoi", "hirshfeld"} + name: {"voronoi", "hirshfeld", "mulliken"} the name of the charges that you want to read iscf: int or Opt, optional index (0-based) of the scf iteration you want the charges for. @@ -1167,12 +1167,12 @@ def _parse_species(): # for each species are enclosed by dashes (-----) # Step to header - _, line = self.step_to('Atom', allow_reread=False) + _, line = self.step_to("Atom", allow_reread=False) if header is None: header = ( - line - .replace("Svec", "Sx Sy Sz") # Split Svec into Cartesian components - .split() + line.replace( + "Svec", "Sx Sy Sz" + ).split() # Split Svec into Cartesian components )[2:] # Skip over the starting ---- line From 5b2c284ae425e0da55467c34ff5173b4c9513b65 Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Thu, 7 Mar 2024 20:10:49 +0100 Subject: [PATCH 05/13] Made Mulliken charges header consistent with Voronoi and Hirshfeld --- src/sisl/io/siesta/stdout.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index 69c29560cd..c7ce2878e8 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -1170,9 +1170,12 @@ def _parse_species(): _, line = self.step_to("Atom", allow_reread=False) if header is None: header = ( - line.replace( + line.replace("Charge", "e") + .replace("Spin", "S") + .replace( "Svec", "Sx Sy Sz" - ).split() # Split Svec into Cartesian components + ) # Split Svec into Cartesian components + .split() )[2:] # Skip over the starting ---- line From 66c4da39e0bde62681798d0d3709fbfbdefca86e Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Thu, 7 Mar 2024 20:14:43 +0100 Subject: [PATCH 06/13] Added parsing of Mulliken charges to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f2e901472..6dd0b40244 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ we hit release version 1.0.0. - Creation of [n]-triangulenes (`sisl.geom.triangulene`) - added `offset` argument in `Geometry.add_vacuum` to enable shifting atomic coordinates - A new `AtomicMatrixPlot` to plot sparse matrices, #668 +- Parsing of total Mulliken charges in `stdoutSileSiesta`, #691 ### Fixed - `txtSileOrca.info.no` used a wrong regex, added a test From 48e8cb751d36708a38d283afa780ddba2247c330 Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Fri, 8 Mar 2024 22:37:38 +0100 Subject: [PATCH 07/13] Tried to add support for parsing Mulliken charges of simpler spin types --- src/sisl/io/siesta/stdout.py | 89 ++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 10 deletions(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index c7ce2878e8..eecc6ac8a2 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -1151,7 +1151,7 @@ def _mulliken_charges(): # 1 Total 6.99733 0.41305 -0.288 -0.296 0.007 # Define the function that parses the charges - def _parse_charge_total(line): + def _parse_charge_total_nc(line): # non-colinear and soc atom_idx, _, *vals = line.split() # assert that this is a proper line # this should catch cases where the following line of charge output @@ -1159,8 +1159,12 @@ def _parse_charge_total(line): # atom_idx = int(atom_idx) return int(atom_idx), list(map(float, vals)) + def _parse_charge_total(line): # unpolarized and colinear spin + atom_idx, val, *_ = line.split() + return int(atom_idx), float(val) + # Define the function that parses a single species - def _parse_species(): + def _parse_species_nc(): # non-colinear and soc nonlocal header, atom_idx, atom_charges # The mulliken charges are organized per species where the charges @@ -1186,18 +1190,83 @@ def _parse_species(): while "----" not in line: line = self.readline() if "Total" in line: - ia, charge_vals = _parse_charge_total(line) + ia, charge_vals = _parse_charge_total_nc(line) atom_idx.append(ia) atom_charges.append(charge_vals) - # Parse as long as we find new species - atom_idx = [] - atom_charges = [] - header = None - found, _ = self.step_to("Species:", allow_reread=False) - while found: - _parse_species() + def _parse_spin_pol(): # unpolarized and colinear spin + nonlocal atom_idx, atom_charges + + # The mulliken charges are organized per spin + # polarization (UP/DOWN). The end of a spin + # block is marked by a Qtot + + # Read until we encounter "mulliken: Qtot" + def try_parse_int(s): + try: + int(s) + except ValueError: + return False + else: + return True + + line = "" + while "mulliken: Qtot" not in line: + line = self.readline() + words = line.split() + if len(words) > 0 and try_parse_int(words[0]): + # This should be a line containing the total charge for an atom + ia, charge = _parse_charge_total(line) + atom_idx.append(ia) + atom_charges.append(charge) + + # Determine with which spin type we are dealing + IDX_NON_POL = 0 + IDX_POL = 1 + IDX_NC = 2 + search_keys_spin = [ + "Qatom", # if we find Qatom and no "mulliken: Spin UP" we are dealing with spin unpolarized + "mulliken: Spin UP", + "Svec", + ] + loc = self.fh.tell() + ret = self.step_to( + search_keys_spin, case=True, ret_index=True, allow_reread=False + ) + self.fh.seek(loc) + if ret[2] == IDX_NON_POL: + # No spin components so just parse charge + atom_charges = [] + atom_idx = [] + header = ["e"] + _parse_spin_pol() + elif ret[2] == IDX_POL: + # Parse both spin polarizations + atom_charges_pol = [] + header = ["e", "Sz"] + for s in ["UP", "DOWN"]: + atom_charges = [] + atom_idx = [] + self.step_to(f"mulliken: Spin {s}", allow_reread=False) + _parse_spin_pol() + atom_charges_pol.append(atom_charges) + + # Compute the charge and spin of each atom + atom_charges_pol_array = _a.arrayf(atom_charges_pol) + atom_q = atom_charges_pol_array[0, :] + atom_charges_pol_array[1, :] + atom_s = atom_charges_pol_array[0, :] - atom_charges_pol_array[1, :] + atom_charges[:] = np.stack((atom_q, atom_s), axis=-1) + elif ret[2] == IDX_NC: + # Parse as long as we find new species + atom_charges = [] + atom_idx = [] + header = None found, _ = self.step_to("Species:", allow_reread=False) + while found: + _parse_species_nc() + found, _ = self.step_to("Species:", allow_reread=False) + else: + assert False # It should never reach here # Convert to array and sort in the order of the atoms sort_idx = np.argsort(atom_idx) From d6f6ca9cae1ebbfec72e11e57a28ff155126f16a Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Fri, 15 Mar 2024 19:04:06 +0100 Subject: [PATCH 08/13] Use self.info.spin to determine spin type for Mulliken charges --- src/sisl/io/siesta/stdout.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index eecc6ac8a2..b491b24070 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -1221,26 +1221,13 @@ def try_parse_int(s): atom_charges.append(charge) # Determine with which spin type we are dealing - IDX_NON_POL = 0 - IDX_POL = 1 - IDX_NC = 2 - search_keys_spin = [ - "Qatom", # if we find Qatom and no "mulliken: Spin UP" we are dealing with spin unpolarized - "mulliken: Spin UP", - "Svec", - ] - loc = self.fh.tell() - ret = self.step_to( - search_keys_spin, case=True, ret_index=True, allow_reread=False - ) - self.fh.seek(loc) - if ret[2] == IDX_NON_POL: + if self.info.spin == Spin.UNPOLARIZED: # No spin components so just parse charge atom_charges = [] atom_idx = [] header = ["e"] _parse_spin_pol() - elif ret[2] == IDX_POL: + elif self.info.spin == Spin.POLARIZED: # Parse both spin polarizations atom_charges_pol = [] header = ["e", "Sz"] @@ -1256,7 +1243,7 @@ def try_parse_int(s): atom_q = atom_charges_pol_array[0, :] + atom_charges_pol_array[1, :] atom_s = atom_charges_pol_array[0, :] - atom_charges_pol_array[1, :] atom_charges[:] = np.stack((atom_q, atom_s), axis=-1) - elif ret[2] == IDX_NC: + elif self.info.spin == Spin.NONCOLINEAR or self.info.spin == Spin.SPINORBIT: # Parse as long as we find new species atom_charges = [] atom_idx = [] From 604f297018380697d348cb2a6e002dd264e69e0f Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Tue, 13 Aug 2024 19:11:56 +0200 Subject: [PATCH 09/13] Fix determination of spin in reading of mulliken moments --- src/sisl/io/siesta/stdout.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index d055e8a2d4..c9419edf9a 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -42,7 +42,7 @@ def _ensure_atoms(atoms): def _parse_spin(attr, match): """Parse 'redata: Spin configuration *= '""" - opt = match.string.split("=")[-1] + opt = match.string.split("=")[-1].strip() if opt.startswith("spin-orbit"): return Spin("spin-orbit") @@ -1233,16 +1233,16 @@ def try_parse_int(s): # This should be a line containing the total charge for an atom ia, charge = _parse_charge_total(line) atom_idx.append(ia) - atom_charges.append(charge) + atom_charges.append([charge]) # Determine with which spin type we are dealing - if self.info.spin == Spin.UNPOLARIZED: + if self.info.spin == Spin(): # UNPOLARIZED # No spin components so just parse charge atom_charges = [] atom_idx = [] header = ["e"] _parse_spin_pol() - elif self.info.spin == Spin.POLARIZED: + elif self.info.spin == Spin("polarized"): # Parse both spin polarizations atom_charges_pol = [] header = ["e", "Sz"] @@ -1255,10 +1255,14 @@ def try_parse_int(s): # Compute the charge and spin of each atom atom_charges_pol_array = _a.arrayf(atom_charges_pol) - atom_q = atom_charges_pol_array[0, :] + atom_charges_pol_array[1, :] - atom_s = atom_charges_pol_array[0, :] - atom_charges_pol_array[1, :] + atom_q = ( + atom_charges_pol_array[0, :, 0] + atom_charges_pol_array[1, :, 0] + ) + atom_s = ( + atom_charges_pol_array[0, :, 0] - atom_charges_pol_array[1, :, 0] + ) atom_charges[:] = np.stack((atom_q, atom_s), axis=-1) - elif self.info.spin == Spin.NONCOLINEAR or self.info.spin == Spin.SPINORBIT: + elif self.info.spin in [Spin("non-colinear"), Spin("spin-orbit")]: # Parse as long as we find new species atom_charges = [] atom_idx = [] From 3368f655b61681dffda47e50f7272fc016517960 Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Fri, 27 Sep 2024 18:34:00 +0200 Subject: [PATCH 10/13] Fix parsing of nc/soc Mulliken charges --- src/sisl/io/siesta/stdout.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index 0d8c15c189..e027dc7fe9 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -123,6 +123,12 @@ class stdoutSileSiesta(SileSiesta): lambda attr, instance, match: int(match.string.split()[-2]), not_found="warn", ), + _A( + "ns", + r"^redata: Number of Atomic Species", + lambda attr, instance, match: int(match.string.split("=")[-1]), + not_found="warn", + ), _A( "completed", r".*Job completed", @@ -1263,14 +1269,13 @@ def try_parse_int(s): ) atom_charges[:] = np.stack((atom_q, atom_s), axis=-1) elif self.info.spin in [Spin("non-colinear"), Spin("spin-orbit")]: - # Parse as long as we find new species + # Parse each species atom_charges = [] atom_idx = [] header = None - found, _ = self.step_to("Species:", allow_reread=False) - while found: - _parse_species_nc() + for _ in range(self.info.ns): found, _ = self.step_to("Species:", allow_reread=False) + _parse_species_nc() else: assert False # It should never reach here From ce7b9feb10669c98da8adf111a2647a92ed88b08 Mon Sep 17 00:00:00 2001 From: "A.H. Kole" Date: Fri, 27 Sep 2024 18:34:39 +0200 Subject: [PATCH 11/13] Add tests for parsing of total Mulliken charges --- src/sisl/io/siesta/tests/test_stdout_charges.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/sisl/io/siesta/tests/test_stdout_charges.py b/src/sisl/io/siesta/tests/test_stdout_charges.py index 73865c85c6..a05149b06f 100644 --- a/src/sisl/io/siesta/tests/test_stdout_charges.py +++ b/src/sisl/io/siesta/tests/test_stdout_charges.py @@ -13,6 +13,7 @@ # tests here tests charge reads for output # voronoi + hirshfeld: test_vh_* +# voronoi + hirshfeld + mulliken: test_vhm_* # voronoi: test_v_* # hirshfeld: test_h_* # mulliken: test_m_* @@ -30,8 +31,8 @@ def with_pandas(): return False -@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld")) -def test_vh_empty_file(name, sisl_files): +@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld", "mulliken")) +def test_vhm_empty_file(name, sisl_files): f = sisl_files("siesta", "ancient", "voronoi_hirshfeld_4.1_none.out") out = stdoutSileSiesta(f) @@ -76,9 +77,11 @@ def test_vh_final(fname, name, sisl_files): @pytest.mark.parametrize("fname", ("md", "pol_md", ("ancient", "pol_md"), "nc_md")) -@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld")) -def test_vh_md(name, fname, sisl_files): +@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld", "mulliken")) +def test_vhm_md(name, fname, sisl_files): if isinstance(fname, tuple): + if name == "mulliken": + return f = sisl_files("siesta", "ancient", f"voronoi_hirshfeld_4.1_{fname[1]}.out") else: f = sisl_files("siesta", "charges", fname, "RUN.out") From d305e09457185d1c8dd8369e36140c92d71758e0 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 24 Oct 2024 15:17:44 +0200 Subject: [PATCH 12/13] implemented whalrus and changed ns to nspecies nspecies is what is currently used in Atom, so better stick with that name. Signed-off-by: Nick Papior --- src/sisl/io/siesta/stdout.py | 63 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index e027dc7fe9..f3b4bc10ab 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -124,7 +124,7 @@ class stdoutSileSiesta(SileSiesta): not_found="warn", ), _A( - "ns", + "nspecies", r"^redata: Number of Atomic Species", lambda attr, instance, match: int(match.string.split("=")[-1]), not_found="warn", @@ -1000,14 +1000,17 @@ def construct_data(d, data): @sile_fh_open(True) def read_charge( - self, name, iscf=Opt.ANY, imd=Opt.ANY, key_scf="scf", as_dataframe=False + self, + name: Literal["voronoi", "hirshfeld", "mulliken"], + iscf=Opt.ANY, + imd=Opt.ANY, + key_scf: str = "scf", + as_dataframe: bool = False, ): r"""Read charges calculated in SCF loop or MD loop (or both) Siesta enables many different modes of writing out charges. - NOTE: currently only Mulliken total atomic charges are implemented. - The below table shows a list of different cases that may be encountered, the letters are referred to in the return section to indicate what is returned. @@ -1036,7 +1039,7 @@ def read_charge( Parameters ---------- - name: {"voronoi", "hirshfeld", "mulliken"} + name: the name of the charges that you want to read iscf: int or Opt, optional index (0-based) of the scf iteration you want the charges for. @@ -1050,9 +1053,9 @@ def read_charge( the returned quantities depend on what is present. If ``None/Opt.NONE`` it will not return any MD charges. If both `imd` and `iscf` are ``None`` then only the final charges will be returned. - key_scf : str, optional + key_scf : the key lookup for the scf iterations (a ":" will automatically be appended) - as_dataframe: boolean, optional + as_dataframe: whether charges should be returned as a pandas dataframe. Returns @@ -1061,7 +1064,7 @@ def read_charge( if a specific MD+SCF index is requested (or special cases where output is not complete) list of numpy.ndarray - if one both `iscf` or `imd` is different from ``None/Opt.NONE``. + if `iscf` or `imd` is different from ``None/Opt.NONE``. pandas.DataFrame if `as_dataframe` is requested. The dataframe will have multi-indices if multiple SCF or MD steps are requested. @@ -1117,16 +1120,15 @@ def _parse_charge(line): # We have found the header, prepare a list to read the charges atom_charges = [] - line = " " - while line != "": + while (line := self.readline()) != "": try: - line = self.readline() charge_vals = _parse_charge(line) atom_charges.append(charge_vals) except Exception: # We already have the charge values and we reached a line that can't be parsed, # this means we have reached the end. break + if pd is None: # not as_dataframe return _a.arrayf(atom_charges) @@ -1207,9 +1209,7 @@ def _parse_species_nc(): # non-colinear and soc self.readline() # Read until closing ---- line - line = "" - while "----" not in line: - line = self.readline() + while "----" not in (line := self.readline()): if "Total" in line: ia, charge_vals = _parse_charge_total_nc(line) atom_idx.append(ia) @@ -1231,9 +1231,7 @@ def try_parse_int(s): else: return True - line = "" - while "mulliken: Qtot" not in line: - line = self.readline() + while "mulliken: Qtot" not in (line := self.readline()): words = line.split() if len(words) > 0 and try_parse_int(words[0]): # This should be a line containing the total charge for an atom @@ -1242,17 +1240,18 @@ def try_parse_int(s): atom_charges.append([charge]) # Determine with which spin type we are dealing - if self.info.spin == Spin(): # UNPOLARIZED + if self.info.spin.is_unpolarized: # UNPOLARIZED # No spin components so just parse charge atom_charges = [] atom_idx = [] header = ["e"] _parse_spin_pol() - elif self.info.spin == Spin("polarized"): + + elif self.info.spin.is_polarized: # Parse both spin polarizations atom_charges_pol = [] header = ["e", "Sz"] - for s in ["UP", "DOWN"]: + for s in ("UP", "DOWN"): atom_charges = [] atom_idx = [] self.step_to(f"mulliken: Spin {s}", allow_reread=False) @@ -1268,16 +1267,20 @@ def try_parse_int(s): atom_charges_pol_array[0, :, 0] - atom_charges_pol_array[1, :, 0] ) atom_charges[:] = np.stack((atom_q, atom_s), axis=-1) - elif self.info.spin in [Spin("non-colinear"), Spin("spin-orbit")]: + + elif not self.info.spin.is_diagonal: # Parse each species atom_charges = [] atom_idx = [] header = None - for _ in range(self.info.ns): + for _ in range(self.info.nspecies): found, _ = self.step_to("Species:", allow_reread=False) _parse_species_nc() + else: - assert False # It should never reach here + raise NotImplementedError( + "Something went wrong... Couldn't parse file." + ) # Convert to array and sort in the order of the atoms sort_idx = np.argsort(atom_idx) @@ -1335,6 +1338,7 @@ def try_parse_int(s): key_scf, *charge_keys, ] + # adjust the below while loop to take into account any additional # segments of search_keys IDX_SCF_END = [0, 1] @@ -1364,9 +1368,11 @@ def try_parse_int(s): FOUND_MD = False FOUND_FINAL = False - # TODO whalrus - ret = self.step_to(search_keys, case=True, ret_index=True, allow_reread=False) - while ret[0]: + while ( + ret := self.step_to( + search_keys, case=True, ret_index=True, allow_reread=False + ) + )[0]: if ret[2] in IDX_SCF_END: # we finished all SCF iterations current_state = state.MD @@ -1427,11 +1433,6 @@ def try_parse_int(s): current_state = state.CHARGE - # step to next entry - ret = self.step_to( - search_keys, case=True, ret_index=True, allow_reread=False - ) - if not any((FOUND_SCF, FOUND_MD, FOUND_FINAL)): raise SileError(f"{self!s} does not contain any charges ({name})") From 236c33fdb672201d8f1c0d30f54ba0d80dd07348 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 24 Oct 2024 15:23:08 +0200 Subject: [PATCH 13/13] small comment on test Signed-off-by: Nick Papior --- src/sisl/io/siesta/tests/test_stdout_charges.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sisl/io/siesta/tests/test_stdout_charges.py b/src/sisl/io/siesta/tests/test_stdout_charges.py index a05149b06f..3e001d5252 100644 --- a/src/sisl/io/siesta/tests/test_stdout_charges.py +++ b/src/sisl/io/siesta/tests/test_stdout_charges.py @@ -81,6 +81,8 @@ def test_vh_final(fname, name, sisl_files): def test_vhm_md(name, fname, sisl_files): if isinstance(fname, tuple): if name == "mulliken": + # we don't have this in the test, so simply return + # (this isn't a test after all) return f = sisl_files("siesta", "ancient", f"voronoi_hirshfeld_4.1_{fname[1]}.out") else: