diff --git a/CHANGELOG.md b/CHANGELOG.md index 142b8eb9e9..71a361e66c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ we hit release version 1.0.0. - Creation of [n]-triangulenes (`sisl.geom.triangulene`) - added `offset` argument in `Geometry.add_vacuum` to enable shifting atomic coordinates - A new `AtomicMatrixPlot` to plot sparse matrices, #668 +- Parsing of total Mulliken charges in `stdoutSileSiesta`, #691 ### Fixed - PEP-585 compliant diff --git a/src/sisl/io/siesta/stdout.py b/src/sisl/io/siesta/stdout.py index 9064a6a082..f3b4bc10ab 100644 --- a/src/sisl/io/siesta/stdout.py +++ b/src/sisl/io/siesta/stdout.py @@ -42,7 +42,7 @@ def _ensure_atoms(atoms): def _parse_spin(attr, instance, match): """Parse 'redata: Spin configuration *= '""" - opt = match.string.split("=")[-1] + opt = match.string.split("=")[-1].strip() if opt.startswith("spin-orbit"): return Spin("spin-orbit") @@ -123,6 +123,12 @@ class stdoutSileSiesta(SileSiesta): lambda attr, instance, match: int(match.string.split()[-2]), not_found="warn", ), + _A( + "nspecies", + r"^redata: Number of Atomic Species", + lambda attr, instance, match: int(match.string.split("=")[-1]), + not_found="warn", + ), _A( "completed", r".*Job completed", @@ -994,14 +1000,17 @@ def construct_data(d, data): @sile_fh_open(True) def read_charge( - self, name, iscf=Opt.ANY, imd=Opt.ANY, key_scf="scf", as_dataframe=False + self, + name: Literal["voronoi", "hirshfeld", "mulliken"], + iscf=Opt.ANY, + imd=Opt.ANY, + key_scf: str = "scf", + as_dataframe: bool = False, ): r"""Read charges calculated in SCF loop or MD loop (or both) Siesta enables many different modes of writing out charges. - NOTE: currently Mulliken charges are not implemented. - The below table shows a list of different cases that may be encountered, the letters are referred to in the return section to indicate what is returned. @@ -1025,12 +1034,12 @@ def read_charge( the SCF charges are not present. For `Opt.ANY` it will return the most information, effectively SCF will be returned if present. - Currently Mulliken is not implemented, any help in reading this would be - very welcome. + Currently orbitally-resolved Mulliken is not implemented, any help in + reading this would be very welcome. Parameters ---------- - name: {"voronoi", "hirshfeld"} + name: the name of the charges that you want to read iscf: int or Opt, optional index (0-based) of the scf iteration you want the charges for. @@ -1044,9 +1053,9 @@ def read_charge( the returned quantities depend on what is present. If ``None/Opt.NONE`` it will not return any MD charges. If both `imd` and `iscf` are ``None`` then only the final charges will be returned. - key_scf : str, optional + key_scf : the key lookup for the scf iterations (a ":" will automatically be appended) - as_dataframe: boolean, optional + as_dataframe: whether charges should be returned as a pandas dataframe. Returns @@ -1055,7 +1064,7 @@ def read_charge( if a specific MD+SCF index is requested (or special cases where output is not complete) list of numpy.ndarray - if one both `iscf` or `imd` is different from ``None/Opt.NONE``. + if `iscf` or `imd` is different from ``None/Opt.NONE``. pandas.DataFrame if `as_dataframe` is requested. The dataframe will have multi-indices if multiple SCF or MD steps are requested. @@ -1111,16 +1120,15 @@ def _parse_charge(line): # We have found the header, prepare a list to read the charges atom_charges = [] - line = " " - while line != "": + while (line := self.readline()) != "": try: - line = self.readline() charge_vals = _parse_charge(line) atom_charges.append(charge_vals) except Exception: # We already have the charge values and we reached a line that can't be parsed, # this means we have reached the end. break + if pd is None: # not as_dataframe return _a.arrayf(atom_charges) @@ -1141,7 +1149,159 @@ def _parse_charge(line): # define helper function for reading voronoi+hirshfeld charges def _mulliken_charges(): """Read output from Mulliken charges""" - raise NotImplementedError("Mulliken charges are not implemented currently") + nonlocal pd + + # Expecting something like this (NC/SOC) + # mulliken: Atomic and Orbital Populations: + + # Species: Cl + + # Atom Orb Charge Spin Svec + # ---------------------------------------------------------------- + # 1 1 3s 1.75133 0.00566 0.004 0.004 -0.000 + # 1 2 3s 0.09813 0.00658 -0.005 -0.005 0.000 + # 1 3 3py 1.69790 0.21531 -0.161 -0.142 0.018 + # 1 4 3pz 1.72632 0.15770 -0.086 -0.132 -0.008 + # 1 5 3px 1.81369 0.01618 -0.004 0.015 -0.004 + # 1 6 3py -0.04663 0.02356 -0.017 -0.016 -0.000 + # 1 7 3pz -0.04167 0.01560 -0.011 -0.011 0.001 + # 1 8 3px -0.02977 0.00920 -0.006 -0.007 0.000 + # 1 9 3Pdxy 0.00595 0.00054 -0.000 -0.000 -0.000 + # 1 10 3Pdyz 0.00483 0.00073 -0.001 -0.001 -0.000 + # 1 11 3Pdz2 0.00515 0.00098 -0.001 -0.001 -0.000 + # 1 12 3Pdxz 0.00604 0.00039 -0.000 -0.000 0.000 + # 1 13 3Pdx2-y2 0.00607 0.00099 -0.001 -0.001 0.000 + # 1 Total 6.99733 0.41305 -0.288 -0.296 0.007 + + # Define the function that parses the charges + def _parse_charge_total_nc(line): # non-colinear and soc + atom_idx, _, *vals = line.split() + # assert that this is a proper line + # this should catch cases where the following line of charge output + # is still parseable + # atom_idx = int(atom_idx) + return int(atom_idx), list(map(float, vals)) + + def _parse_charge_total(line): # unpolarized and colinear spin + atom_idx, val, *_ = line.split() + return int(atom_idx), float(val) + + # Define the function that parses a single species + def _parse_species_nc(): # non-colinear and soc + nonlocal header, atom_idx, atom_charges + + # The mulliken charges are organized per species where the charges + # for each species are enclosed by dashes (-----) + + # Step to header + _, line = self.step_to("Atom", allow_reread=False) + if header is None: + header = ( + line.replace("Charge", "e") + .replace("Spin", "S") + .replace( + "Svec", "Sx Sy Sz" + ) # Split Svec into Cartesian components + .split() + )[2:] + + # Skip over the starting ---- line + self.readline() + + # Read until closing ---- line + while "----" not in (line := self.readline()): + if "Total" in line: + ia, charge_vals = _parse_charge_total_nc(line) + atom_idx.append(ia) + atom_charges.append(charge_vals) + + def _parse_spin_pol(): # unpolarized and colinear spin + nonlocal atom_idx, atom_charges + + # The mulliken charges are organized per spin + # polarization (UP/DOWN). The end of a spin + # block is marked by a Qtot + + # Read until we encounter "mulliken: Qtot" + def try_parse_int(s): + try: + int(s) + except ValueError: + return False + else: + return True + + while "mulliken: Qtot" not in (line := self.readline()): + words = line.split() + if len(words) > 0 and try_parse_int(words[0]): + # This should be a line containing the total charge for an atom + ia, charge = _parse_charge_total(line) + atom_idx.append(ia) + atom_charges.append([charge]) + + # Determine with which spin type we are dealing + if self.info.spin.is_unpolarized: # UNPOLARIZED + # No spin components so just parse charge + atom_charges = [] + atom_idx = [] + header = ["e"] + _parse_spin_pol() + + elif self.info.spin.is_polarized: + # Parse both spin polarizations + atom_charges_pol = [] + header = ["e", "Sz"] + for s in ("UP", "DOWN"): + atom_charges = [] + atom_idx = [] + self.step_to(f"mulliken: Spin {s}", allow_reread=False) + _parse_spin_pol() + atom_charges_pol.append(atom_charges) + + # Compute the charge and spin of each atom + atom_charges_pol_array = _a.arrayf(atom_charges_pol) + atom_q = ( + atom_charges_pol_array[0, :, 0] + atom_charges_pol_array[1, :, 0] + ) + atom_s = ( + atom_charges_pol_array[0, :, 0] - atom_charges_pol_array[1, :, 0] + ) + atom_charges[:] = np.stack((atom_q, atom_s), axis=-1) + + elif not self.info.spin.is_diagonal: + # Parse each species + atom_charges = [] + atom_idx = [] + header = None + for _ in range(self.info.nspecies): + found, _ = self.step_to("Species:", allow_reread=False) + _parse_species_nc() + + else: + raise NotImplementedError( + "Something went wrong... Couldn't parse file." + ) + + # Convert to array and sort in the order of the atoms + sort_idx = np.argsort(atom_idx) + atom_charges_array = _a.arrayf(atom_charges)[sort_idx] + + if pd is None: + # not as_dataframe + return atom_charges_array + + # determine how many columns we have + # this will remove atom indices and species, so only inside + ncols = len(atom_charges[0]) + assert ncols == len(header) + + # the precision is limited, so no need for double precision + return pd.DataFrame( + atom_charges_array, + columns=header, + dtype=np.float32, + index=pd.RangeIndex(stop=len(atom_charges), name="atom"), + ) # Check that a known charge has been requested if namel == "voronoi": @@ -1178,6 +1338,7 @@ def _mulliken_charges(): key_scf, *charge_keys, ] + # adjust the below while loop to take into account any additional # segments of search_keys IDX_SCF_END = [0, 1] @@ -1207,9 +1368,11 @@ def _mulliken_charges(): FOUND_MD = False FOUND_FINAL = False - # TODO whalrus - ret = self.step_to(search_keys, case=True, ret_index=True, allow_reread=False) - while ret[0]: + while ( + ret := self.step_to( + search_keys, case=True, ret_index=True, allow_reread=False + ) + )[0]: if ret[2] in IDX_SCF_END: # we finished all SCF iterations current_state = state.MD @@ -1270,11 +1433,6 @@ def _mulliken_charges(): current_state = state.CHARGE - # step to next entry - ret = self.step_to( - search_keys, case=True, ret_index=True, allow_reread=False - ) - if not any((FOUND_SCF, FOUND_MD, FOUND_FINAL)): raise SileError(f"{self!s} does not contain any charges ({name})") diff --git a/src/sisl/io/siesta/tests/test_stdout_charges.py b/src/sisl/io/siesta/tests/test_stdout_charges.py index 73865c85c6..3e001d5252 100644 --- a/src/sisl/io/siesta/tests/test_stdout_charges.py +++ b/src/sisl/io/siesta/tests/test_stdout_charges.py @@ -13,6 +13,7 @@ # tests here tests charge reads for output # voronoi + hirshfeld: test_vh_* +# voronoi + hirshfeld + mulliken: test_vhm_* # voronoi: test_v_* # hirshfeld: test_h_* # mulliken: test_m_* @@ -30,8 +31,8 @@ def with_pandas(): return False -@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld")) -def test_vh_empty_file(name, sisl_files): +@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld", "mulliken")) +def test_vhm_empty_file(name, sisl_files): f = sisl_files("siesta", "ancient", "voronoi_hirshfeld_4.1_none.out") out = stdoutSileSiesta(f) @@ -76,9 +77,13 @@ def test_vh_final(fname, name, sisl_files): @pytest.mark.parametrize("fname", ("md", "pol_md", ("ancient", "pol_md"), "nc_md")) -@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld")) -def test_vh_md(name, fname, sisl_files): +@pytest.mark.parametrize("name", ("voronoi", "Hirshfeld", "mulliken")) +def test_vhm_md(name, fname, sisl_files): if isinstance(fname, tuple): + if name == "mulliken": + # we don't have this in the test, so simply return + # (this isn't a test after all) + return f = sisl_files("siesta", "ancient", f"voronoi_hirshfeld_4.1_{fname[1]}.out") else: f = sisl_files("siesta", "charges", fname, "RUN.out")