diff --git a/docs/source/conf.py b/docs/source/conf.py index ca9b2dcd5..e37c73742 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -171,6 +171,7 @@ (r'.*', r'Tuple.*'), ] nitpick_ignore = [ + ('py:class', 'numpy.typing.ArrayLike'), ('py:class', 'AttributeDict'), ('py:class', 'ExitCode'), ('py:class', 'StructureData'), diff --git a/src/aiida_quantumespresso/calculations/projwfc.py b/src/aiida_quantumespresso/calculations/projwfc.py index 41bc77726..5a65cbfdc 100644 --- a/src/aiida_quantumespresso/calculations/projwfc.py +++ b/src/aiida_quantumespresso/calculations/projwfc.py @@ -31,12 +31,11 @@ class ProjwfcCalculation(NamelistsCalculation): xml_path = Path(NamelistsCalculation._default_parent_output_folder ).joinpath(f'{NamelistsCalculation._PREFIX}.save', 'data-file-schema.xml') - _internal_retrieve_list = [ - NamelistsCalculation._PREFIX + '.pdos*', - ] + # The XML file is added to the temporary retrieve list since it is required for parsing, but already in the # repository of a an ancestor calculation. _retrieve_temporary_list = [ + NamelistsCalculation._PREFIX + '.pdos*', xml_path.as_posix(), ] @@ -60,8 +59,16 @@ def define(cls, spec): spec.default_output_node = 'output_parameters' spec.exit_code(301, 'ERROR_NO_RETRIEVED_TEMPORARY_FOLDER', message='The retrieved temporary folder could not be accessed.') + spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING', + message='The retrieved folder did not contain the required stdout output file.') spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING', message='The retrieved folder did not contain the required XML file.') + spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ', + message='The stdout output file could not be read.') + spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE', + message='The stdout output file could not be parsed.') + spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE', + message='The stdout output file was incomplete probably because the calculation got interrupted.') spec.exit_code(320, 'ERROR_OUTPUT_XML_READ', message='The XML output file could not be read.') spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE', @@ -72,4 +79,6 @@ def define(cls, spec): message='The pdos_tot file could not be read from the retrieved folder.') spec.exit_code(340, 'ERROR_PARSING_PROJECTIONS', message='An exception was raised parsing bands and projections.') + spec.exit_code(350, 'ERROR_UNEXPECTED_PARSER_EXCEPTION', + message='The parser raised an unexpected exception: {exception}') # yapf: enable diff --git a/src/aiida_quantumespresso/parsers/projwfc.py b/src/aiida_quantumespresso/parsers/projwfc.py index 6b7bc2959..57eab5bb7 100644 --- a/src/aiida_quantumespresso/parsers/projwfc.py +++ b/src/aiida_quantumespresso/parsers/projwfc.py @@ -2,458 +2,433 @@ import fnmatch from pathlib import Path import re +from typing import List, Optional, Tuple -from aiida.orm import BandsData, Dict, ProjectionData, XyData -from aiida.plugins import OrbitalFactory +from aiida.common.extendeddicts import AttributeDict +from aiida.engine import ExitCode +from aiida.orm import BandsData, Dict, KpointsData, ProjectionData, StructureData, XyData +from aiida.plugins import DataFactory, OrbitalFactory +from aiida.tools.data.orbital.orbital import Orbital import numpy as np +from numpy.typing import ArrayLike -from aiida_quantumespresso.parsers import QEOutputParsingError -from aiida_quantumespresso.parsers.parse_raw.base import convert_qe_to_aiida_structure, convert_qe_to_kpoints +from aiida_quantumespresso.parsers.parse_raw.base import ( + convert_qe_time_to_sec, + convert_qe_to_aiida_structure, + convert_qe_to_kpoints, +) from aiida_quantumespresso.utils.mapping import get_logging_container from .base import BaseParser -def find_orbitals_from_statelines(out_info_dict): - """This function reads in all the state_lines, that is, the lines describing which atomic states, taken from the - pseudopotential, are used for the projection. Then it converts these state_lines into a set of orbitals. - - :param out_info_dict: contains various technical internals useful in parsing - :return: orbitals, a list of orbitals suitable for setting ProjectionData - """ - - # Format of statelines - # From PP/src/projwfc.f90: (since Oct. 8 2019) - # - # 1000 FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2," (l=",i1) - # IF (lspinorb) THEN - # 1001 FORMAT (" j=",f3.1," m_j=",f4.1,")") - # ELSE IF (noncolin) THEN - # 1002 FORMAT (" m=",i2," s_z=",f4.1,")") - # ELSE - # 1003 FORMAT (" m=",i2,")") - # ENDIF - # - # Before: - # IF (lspinorb) THEN - # ... - # 1000 FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2, & - # " (j=",f3.1," l=",i1," m_j=",f4.1,")") - # ELSE - # ... - # 1500 FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2, & - # " (l=",i1," m=",i2," s_z=",f4.1,")") - # ENDIF - - out_file = out_info_dict['out_file'] - atomnum_re = re.compile(r'atom\s*([0-9]+?)[^0-9]') - element_re = re.compile(r'atom\s*[0-9]+\s*\(\s*([A-Za-z0-9-_]+?)\s*\)') - if out_info_dict['spinorbit']: - # spinorbit - lnum_re = re.compile(r'l=\s*([0-9]+?)[^0-9]') - jnum_re = re.compile(r'j=\s*([0-9.]+?)[^0-9.]') - mjnum_re = re.compile(r'm_j=\s*([-0-9.]+?)[^-0-9.]') - elif not out_info_dict['collinear']: - # non-collinear - lnum_re = re.compile(r'l=\s*([0-9]+?)[^0-9]') - mnum_re = re.compile(r'm=\s*([-0-9]+?)[^-0-9]') - sznum_re = re.compile(r's_z=\s*([-0-9.]*?)[^-0-9.]') - else: - # collinear / no spin - lnum_re = re.compile(r'l=\s*([0-9]+?)[^0-9]') - mnum_re = re.compile(r'm=\s*([-0-9]+?)[^-0-9]') - wfc_lines = out_info_dict['wfc_lines'] - state_lines = [out_file[wfc_line] for wfc_line in wfc_lines] - state_dicts = [] - for state_line in state_lines: - try: - state_dict = {} - state_dict['atomnum'] = int(atomnum_re.findall(state_line)[0]) - state_dict['atomnum'] -= 1 # to keep with orbital indexing - state_dict['kind_name'] = element_re.findall(state_line)[0].strip() - state_dict['angular_momentum'] = int(lnum_re.findall(state_line)[0]) - if out_info_dict['spinorbit']: - state_dict['total_angular_momentum'] = float(jnum_re.findall(state_line)[0]) - state_dict['magnetic_number'] = float(mjnum_re.findall(state_line)[0]) - else: - if not out_info_dict['collinear']: - state_dict['spin'] = float(sznum_re.findall(state_line)[0]) - state_dict['magnetic_number'] = int(mnum_re.findall(state_line)[0]) - state_dict['magnetic_number'] -= 1 # to keep with orbital indexing - except ValueError: - raise QEOutputParsingError('State lines are not formatted in a standard way.') - state_dicts.append(state_dict) - - # here is some logic to figure out the value of radial_nodes to use - new_state_dicts = [] - for i in range(len(state_dicts)): - radial_nodes = 0 - state_dict = state_dicts[i].copy() - for j in range(i - 1, -1, -1): - if state_dict == state_dicts[j]: - radial_nodes += 1 - state_dict['radial_nodes'] = radial_nodes - new_state_dicts.append(state_dict) - state_dicts = new_state_dicts - - # here is some logic to assign positions based on the atom_index - structure = out_info_dict['structure'] - for state_dict in state_dicts: - site_index = state_dict.pop('atomnum') - state_dict['position'] = structure.sites[site_index].position - - # here we set the resulting state_dicts to a new set of orbitals - orbitals = [] - if out_info_dict['spinorbit']: - OrbitalCls = OrbitalFactory('spinorbithydrogen') - elif not out_info_dict['collinear']: - OrbitalCls = OrbitalFactory('noncollinearhydrogen') - else: - OrbitalCls = OrbitalFactory('core.realhydrogen') - for state_dict in state_dicts: - orbitals.append(OrbitalCls(**state_dict)) - - return orbitals - - -def spin_dependent_subparser(out_info_dict): - """This find the projection and bands arrays from the out_file and out_info_dict. Used to handle the different - possible spin-cases in a convenient manner. - - :param out_info_dict: contains various technical internals useful in parsing - :return: ProjectionData, BandsData parsed from out_file - """ - out_file = out_info_dict['out_file'] - spin_down = out_info_dict['spin_down'] - od = out_info_dict # using a shorter name for convenience - # regular expressions needed for later parsing - WaveFraction1_re = re.compile(r'\=(.*?)\*') # state composition 1 - WaveFractionremain_re = re.compile(r'\+(.*?)\*') # state comp 2 - FunctionId_re = re.compile(r'\#(.*?)\]') # state identity - # primes arrays for the later parsing - num_wfc = len(od['wfc_lines']) - bands = np.zeros([od['k_states'], od['num_bands']]) - projection_arrays = np.zeros([od['k_states'], od['num_bands'], num_wfc]) - - try: - for i in range(od['k_states']): - if spin_down: - i += od['k_states'] - # grabs band energy - for j in range(i * od['num_bands'], (i + 1) * od['num_bands'], 1): - out_ind = od['e_lines'][j] - try: - # post ~6.3 <6.5 output format "e =" - val = out_file[out_ind].split()[2] - float(val) - except ValueError: - # pre ~6.3 and 6.5+ output format "==== e(" - val = out_file[out_ind].split(' = ')[1].split()[0] - bands[i % od['k_states']][j % od['num_bands']] = val - #subloop grabs pdos - wave_fraction = [] - wave_id = [] - for k in range(od['e_lines'][j] + 1, od['psi_lines'][j], 1): - out_line = out_file[k] - wave_fraction += WaveFraction1_re.findall(out_line) - wave_fraction += WaveFractionremain_re.findall(out_line) - wave_id += FunctionId_re.findall(out_line) - if len(wave_id) != len(wave_fraction): - raise IndexError - for l in range(len(wave_id)): - wave_id[l] = int(wave_id[l]) - wave_fraction[l] = float(wave_fraction[l]) - #sets relevant values in pdos_array - projection_arrays[i % od['k_states']][j % od['num_bands']][wave_id[l] - 1] = wave_fraction[l] - except IndexError: - raise QEOutputParsingError('the standard out file does not comply with the official documentation.') - - bands_data = BandsData() - kpoints = od['kpoints'] - try: - if len(od['k_vect']) != len(kpoints.get_kpoints()): - raise AttributeError - bands_data.set_kpointsdata(kpoints) - except AttributeError: - bands_data.set_kpoints(od['k_vect'].astype(float)) - - bands_data.set_bands(bands, units='eV') - - orbitals = out_info_dict['orbitals'] - if len(orbitals) != np.shape(projection_arrays[0, 0, :])[0]: - raise QEOutputParsingError( - 'There was an internal parsing error, ' - ' the projection array shape does not agree' - ' with the number of orbitals' - ) - projection_data = ProjectionData() - projection_data.set_reference_bandsdata(bands_data) - projections = [projection_arrays[:, :, i] for i in range(len(orbitals))] - - # Do the bands_check manually here - for projection in projections: - if np.shape(projection) != np.shape(bands): - raise AttributeError('Projections not the same shape as the bands') - - #insert here some logic to assign pdos to the orbitals - pdos_arrays = spin_dependent_pdos_subparser(out_info_dict) - energy_arrays = [out_info_dict['energy']] * len(orbitals) - projection_data.set_projectiondata( - orbitals, - list_of_projections=projections, - list_of_energy=energy_arrays, - list_of_pdos=pdos_arrays, - bands_check=False - ) - # pdos=pdos_arrays - return bands_data, projection_data - - -def natural_sort_key(sort_key, _nsre=re.compile('([0-9]+)')): - """Pass to ``key`` for ``str.sort`` to achieve natural sorting. For example, ``["2", "11", "1"]`` will be sorted to - ``["1", "2", "11"]`` instead of ``["1", "11", "2"]`` - - :param sort_key: Original key to be processed - :return: A list of string and integers. - """ - keys = [] - for text in _nsre.split(sort_key): - if text.isdigit(): - keys.append(int(text)) - else: - keys.append(text) - return keys - - -def spin_dependent_pdos_subparser(out_info_dict): - """Finds and labels the pdos arrays associated with the out_info_dict. - - :param out_info_dict: contains various technical internals useful in parsing - :return: (pdos_name, pdos_array) tuples for all the specific pdos - """ - spin = out_info_dict['spin'] - collinear = out_info_dict.get('collinear', True) - spinorbit = out_info_dict.get('spinorbit', False) - spin_down = out_info_dict['spin_down'] - pdos_atm_array_dict = out_info_dict['pdos_atm_array_dict'] - if spin: - mult_factor = 2 - if spin_down: - first_array = 4 - else: - first_array = 3 - else: - mult_factor = 1 - first_array = 2 - mf = mult_factor - fa = first_array - pdos_file_names = [k for k in pdos_atm_array_dict] - pdos_file_names.sort(key=natural_sort_key) - out_arrays = [] - # we can keep the pdos in synch with the projections by relying on the fact - # both are produced in the same order (thus the sorted file_names) - for name in pdos_file_names: - this_array = pdos_atm_array_dict[name] - if not collinear and not spinorbit: - # In the non-collinear, non-spinorbit case, the "up"-spin orbitals - # come first, followed by all "down" orbitals - for i in range(3, np.shape(this_array)[1], 2): - out_arrays.append(this_array[:, i]) - for i in range(4, np.shape(this_array)[1], 2): - out_arrays.append(this_array[:, i]) - else: - for i in range(fa, np.shape(this_array)[1], mf): - out_arrays.append(this_array[:, i]) - - return out_arrays - - class ProjwfcParser(BaseParser): - """``Parser`` implementation for the ``ProjwfcCalculation`` calculation job class. + """This class is the implementation of the Parser class for the ``projwfc.x`` code in Quantum ESPRESSO. Parses projection arrays that map the projection onto each point in the bands structure, as well as pdos arrays, which map the projected density of states onto an energy axis. """ def parse(self, **kwargs): - """Parse the retrieved files from a ``ProjwfcCalculation`` into output nodes.""" - # we create a dictionary the progressively accumulates more info - out_info_dict = {} - + """Parses the retrieved files of the ``ProjwfcCalculation`` and converts them into output nodes.""" logs = get_logging_container() stdout, parsed_data, logs = self.parse_stdout_from_retrieved(logs) - out_info_dict['out_file'] = stdout.split('\n') - - base_exit_code = self.check_base_errors(logs) - if base_exit_code: - return self.exit(base_exit_code, logs) - self.out('output_parameters', Dict(parsed_data)) + self.out('output_parameters', Dict(dict=parsed_data)) - if 'ERROR_OUTPUT_STDOUT_INCOMPLETE'in logs.error: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE, logs) + # Split the stdout into header and k-point blocks - TODO: Lowdin + stdout_blocks = stdout.split('Lowdin Charges:')[0].split('k = ') + header = stdout_blocks[0] + kpoint_blocks = stdout_blocks[1:] + # Check that the temporary retrieved folder is there try: - retrieved_temporary_folder = kwargs['retrieved_temporary_folder'] + retrieved_temporary_folder = Path(kwargs['retrieved_temporary_folder']) except KeyError: - return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER, logs) + return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER) # Parse the XML to obtain the `structure`, `kpoints` and spin-related settings from the parent calculation - self.exit_code_xml = None - parsed_xml, logs_xml = self._parse_xml(retrieved_temporary_folder) + parsed_xml, logs_xml, xml_exit_code = self._parse_xml(retrieved_temporary_folder) self.emit_logs(logs_xml) + if xml_exit_code is not None: + return xml_exit_code - if self.exit_code_xml: - return self.exit(self.exit_code_xml) + structure = convert_qe_to_aiida_structure(parsed_xml['structure']) + kpoints = convert_qe_to_kpoints(parsed_xml, structure) - out_info_dict['structure'] = convert_qe_to_aiida_structure(parsed_xml['structure']) - out_info_dict['kpoints'] = convert_qe_to_kpoints(parsed_xml, out_info_dict['structure']) - out_info_dict['nspin'] = parsed_xml.get('number_of_spin_components') - out_info_dict['collinear'] = not parsed_xml.get('non_colinear_calculation') - out_info_dict['spinorbit'] = parsed_xml.get('spin_orbit_calculation') - out_info_dict['spin'] = out_info_dict['nspin'] == 2 + nspin = parsed_xml.get('number_of_spin_components') + spinorbit = parsed_xml.get('spin_orbit_calculation') + non_collinear = parsed_xml.get('non_colinear_calculation') - # check and read pdos_tot file - out_filenames = self.retrieved.base.repository.list_object_names() - try: - pdostot_filename = fnmatch.filter(out_filenames, '*pdos_tot*')[0] - with self.retrieved.base.repository.open(pdostot_filename, 'r') as pdostot_file: - # Columns: Energy(eV), Ldos, Pdos - pdostot_array = np.atleast_2d(np.genfromtxt(pdostot_file)) - energy = pdostot_array[:, 0] - dos = pdostot_array[:, 1] - except (OSError, KeyError): - return self.exit(self.exit_codes.ERROR_READING_PDOSTOT_FILE, logs) + orbitals = self._parse_orbitals(header, structure, non_collinear, spinorbit) + bands, projections = self._parse_bands_and_projections(kpoint_blocks, len(orbitals)) + energy, dos_node, pdos_array = self._parse_pdos_files(retrieved_temporary_folder, nspin, spinorbit, logs) - # check and read all of the individual pdos_atm files - pdos_atm_filenames = fnmatch.filter(out_filenames, '*pdos_atm*') - pdos_atm_array_dict = {} - for name in pdos_atm_filenames: - with self.retrieved.base.repository.open(name, 'r') as pdosatm_file: - pdos_atm_array_dict[name] = np.atleast_2d(np.genfromtxt(pdosatm_file)) + self.out('Dos', dos_node) - # finding the bands and projections - out_info_dict['energy'] = energy - out_info_dict['pdos_atm_array_dict'] = pdos_atm_array_dict - try: - new_nodes_list = self._parse_bands_and_projections(out_info_dict) - except QEOutputParsingError as err: - self.logger.error(f'Error parsing bands and projections: {err}') - return self.exit(self.exit_codes.ERROR_PARSING_PROJECTIONS, logs) - for linkname, node in new_nodes_list: + output_node_dict = self._build_bands_and_projections( + kpoints, bands, energy, orbitals, projections, pdos_array, nspin + ) + for linkname, node in output_node_dict.items(): self.out(linkname, node) - Dos_out = XyData() - Dos_out.set_x(energy, 'Energy', 'eV') - Dos_out.set_y(dos, 'Dos', 'states/eV') - self.out('Dos', Dos_out) + for exit_code in [ + 'ERROR_OUTPUT_STDOUT_MISSING', + 'ERROR_OUTPUT_STDOUT_READ', + 'ERROR_OUTPUT_STDOUT_PARSE', + 'ERROR_OUTPUT_STDOUT_INCOMPLETE', + 'ERROR_READING_PDOSTOT_FILE' + ]: + if exit_code in logs.error: + return self.exit(self.exit_codes.get(exit_code), logs) return self.exit(logs=logs) - def _parse_xml(self, retrieved_temporary_folder): + def _parse_xml(self, retrieved_temporary_folder: Path) -> Tuple[dict, AttributeDict, Optional[ExitCode]]: """Parse the XML file. - The XML must be parsed in order to obtain the required information for the orbital parsing. + The XML must be parsed in order to obtain the required information for the other parser methods. + + :param retrieved_temporary_folder: temporary folder of retrieved files that is deleted after parsing. + + :return: tuple with the parsed_xml and parsing logs. """ from .parse_xml.exceptions import XMLParseError, XMLUnsupportedFormatError from .parse_xml.pw.parse import parse_xml logs = get_logging_container() - parsed_xml = {} - xml_filepath = Path(retrieved_temporary_folder) / self.node.process_class.xml_path.name + xml_filepath = retrieved_temporary_folder / self.node.process_class.xml_path.name if not xml_filepath.exists(): - self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_MISSING - return parsed_xml, logs + return {}, logs, self.exit(self.exit_codes.ERROR_OUTPUT_XML_MISSING) try: with xml_filepath.open('r') as handle: parsed_xml, logs = parse_xml(handle, None) + return parsed_xml, logs, None except IOError: - self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_READ + return {}, logs, self.exit(self.exit_codes.ERROR_OUTPUT_XML_READ) except XMLParseError: - self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_PARSE + return {}, logs, self.exit(self.exit_codes.ERROR_OUTPUT_XML_PARSE) except XMLUnsupportedFormatError: - self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_FORMAT - except Exception as exc: - self.exit_code_xml = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION.format(exception=exc) + return {}, logs, self.exit(self.exit_codes.ERROR_OUTPUT_XML_FORMAT) + except Exception: + return {}, logs, self.exit(self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION) + + @staticmethod + def _parse_orbitals(header: str, structure: StructureData, non_collinear: bool, spinorbit: bool) -> list: + """Parse the orbitals from the stdout header. + + This method reads in all the state lines - that is, the lines describing which atomic states taken from the + pseudopotential are used for the projections. Then it converts these state lines into a set of orbitals. + + :param header: the header block of text before the first k-point is printed. + :param structure: the input structure. + :param non_collinear: True if the calculation is non-collinear. + :param spinorbit: True if the calculation used spin-orbit coupling. + + :return: a list of orbitals suitable for setting ProjectionData. + """ + # Format of statelines + # From PP/src/projwfc.f90: (since Oct. 8 2019) + # + # 1000 FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2," (l=",i1) + # IF (lspinorb) THEN + # 1001 FORMAT (" j=",f3.1," m_j=",f4.1,")") + # ELSE IF (noncolin) THEN + # 1002 FORMAT (" m=",i2," s_z=",f4.1,")") + # ELSE + # 1003 FORMAT (" m=",i2,")") + # ENDIF + # + # Before: + # IF (lspinorb) THEN + # ... + # 1000 FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2, & + # " (j=",f3.1," l=",i1," m_j=",f4.1,")") + # ELSE + # ... + # 1500 FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2, & + # " (l=",i1," m=",i2," s_z=",f4.1,")") + # ENDIF + + # Based on the formats above, set up the orbital regex patterns, with the corresponding keys and types + if spinorbit: + # FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2," (l=",i1," j=",f3.1," m_j=",f4.1,")") + orbital_pattern = re.compile( + r'state\s#\s*\d+:\satom\s+(\d+)\s\((\S+)\s*\),\swfc\s+\d+\s\(l=(\d)\sj=\s*(.*)\sm_j=(.*)\)' + ) + orbital_keys = ('atomnum', 'kind_name', 'angular_momentum', 'total_angular_momentum', 'magnetic_number') + orbital_types = (int, str, int, float, float) + elif non_collinear: + # FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2," (l=",i1," m=",i2," s_z=",f4.1,")") + orbital_pattern = re.compile( + r'state\s#\s*\d+:\satom\s+(\d+)\s\((\S+)\s*\),\swfc\s+\d+\s\(l=(\d)\sm=\s?(\d+)\ss_z=(.*)\)' + ) + orbital_keys = ('atomnum', 'kind_name', 'angular_momentum', 'magnetic_number', 'spin') + orbital_types = (int, str, int, int, float) + else: + # This works for both collinear / no spin + # FORMAT (5x,"state #",i4,": atom ",i3," (",a3,"), wfc ",i2," (l=",i1," m=",i2,")") + orbital_pattern = re.compile( + r'state\s#\s*\d+:\satom\s+(\d+)\s\((\S+)\s*\),\swfc\s+\d+\s\(l=(\d)\sm=\s?(\d+)\)' + ) + orbital_keys = ('atomnum', 'kind_name', 'angular_momentum', 'magnetic_number') + orbital_types = (int, str, int, int) + + orbital_dicts = [] + + for orbital_data in orbital_pattern.findall(header): + orbital_dict = { + key: data_type(data) for key, data_type, data in zip(orbital_keys, orbital_types, orbital_data) + } + orbital_dict['atomnum'] -= 1 # convert to zero indexing + orbital_dict['magnetic_number'] -= int(not spinorbit) # convert to zero indexing, except for spinorbit + orbital_dicts.append(orbital_dict) + + # Figure out the value of radial_nodes + new_orbital_dicts = [] + for i, orbital_dict in enumerate(orbital_dicts): + radial_nodes = 0 + new_orbital_dict = orbital_dict.copy() + for j in range(i - 1, -1, -1): + if new_orbital_dict == orbital_dicts[j]: + radial_nodes += 1 + new_orbital_dict['radial_nodes'] = radial_nodes + new_orbital_dicts.append(new_orbital_dict) + orbital_dicts = new_orbital_dicts + + # Assign positions based on the atom_index + for new_orbital_dict in orbital_dicts: + site_index = new_orbital_dict.pop('atomnum') + new_orbital_dict['position'] = structure.sites[site_index].position + + # Convert the resulting orbital_dicts into the list of orbitals + orbitals = [] + if spinorbit: + orbital_class = OrbitalFactory('spinorbithydrogen') + elif non_collinear: + orbital_class = OrbitalFactory('noncollinearhydrogen') + else: + orbital_class = OrbitalFactory('realhydrogen') + for new_orbital_dict in orbital_dicts: + orbitals.append(orbital_class(**new_orbital_dict)) + + return orbitals + + @staticmethod + def _parse_bands_and_projections(kpoint_blocks: list, num_orbitals: int) -> Tuple[ArrayLike, ArrayLike]: + """Parse the bands energies and orbital projections from the kpoint blocks in the stdout. - return parsed_xml, logs + :param kpoint_blocks: list of blocks for each k-point that contain the energies and projections in the stdout. + :param num_orbitals: number of orbitals used for the projections. - def _parse_bands_and_projections(self, out_info_dict): - """Function that parses the standard output into bands and projection data. + :return: tuple with two arrays containing the band energies and projection values + """ + + # Parse the bands energies + energy_pattern = re.compile(r'====\se\(\s*\d+\)\s=\s*(\S+)\seV\s====') + bands = np.array([energy_pattern.findall(block) for block in kpoint_blocks]) + + # Parse the projection arrays + energy_pattern = re.compile(r'\n====.+==== \n') + psi_pattern = re.compile(r'([.\d]+)\*\[#\s*(\d+)\]') + + projections = [] + + for block in kpoint_blocks: + + kpoint_projections = [] + + for band_projections in re.split(energy_pattern, block)[1:]: + + proj_array = np.zeros(num_orbitals) + + for [projection_value, orbital_index] in psi_pattern.findall(band_projections): + proj_array[int(orbital_index) - 1] = projection_value + + kpoint_projections.append(proj_array) + + projections.append(kpoint_projections) + + projections = np.array(projections) + + return bands, projections + + def _parse_pdos_files(self, retrieved_temporary_folder: Path, nspin: int, + spinorbit: bool, logs: AttributeDict) -> Tuple[ArrayLike, XyData, ArrayLike]: + """Parse the PDOS files and convert them into arrays. + + Reads in all of the ``*.pdos*`` files and converts the data into arrays. The PDOS arrays are then concatenated + and reordered so the order of the columns matches the order of the orbitals read from the statelines. This is + done in three steps: + + 1. Sort the filenames by the atom # and wfc #, in that order. + 2. Concatenate all the PDOS files into one large array. + 3. Reorder the columns depending on the ``npsin`` value and if spin-orbit is used. + + :param retrieved_temporary_folder: temporary folder of retrieved files that is deleted after parsing. + :param nspin: nspin value of the parent calculation. + :param spinorbit: True if the calculation used spin-orbit coupling. + + :return: tuple of three containing the energy grid, the total DOS as a node and the PDOS + """ + + def natural_sort_key(sort_key, _nsre=re.compile('([0-9]+)')): + """Pass to ``key`` for ``str.sort`` to achieve natural sorting. For example, ``["2", "11", "1"]`` will be sorted to + ``["1", "2", "11"]`` instead of ``["1", "11", "2"]`` + + :param sort_key: Original key to be processed + :return: A list of string and integers. + """ + keys = [] + for text in _nsre.split(sort_key): + if text.isdigit(): + keys.append(int(text)) + else: + keys.append(text) + return keys + + # Read the `pdos_tot` file + try: + pdostot_filepath = next(retrieved_temporary_folder.glob('*pdos_tot*')) + with pdostot_filepath.open('r') as pdostot_file: + # Columns: Energy(eV), Ldos, Pdos + pdostot_array = np.atleast_2d(np.genfromtxt(pdostot_file)) + except (OSError, KeyError): + logs.error.append('ERROR_READING_PDOSTOT_FILE') + return np.array([]), XyData(), np.array([]) + + energy = pdostot_array[:, 0] + + dos_node = XyData() + dos_node.set_x(energy, 'Energy', 'eV') + + # For spin-polarised calculations the total DOS is split up in spin up and down + if nspin == 2: + dos_node.set_y( + (pdostot_array[:, 1], pdostot_array[:, 2]), + ('dos_up', 'dos_down'), + ('states/eV', 'states/eV') + ) + else: + dos_node.set_y(pdostot_array[:, 1], 'Dos', 'states/eV') + + # We're only interested in the PDOS, so we skip the first columns corresponding to the energy and LDOS + if nspin == 1 or spinorbit: + first_pdos_column = 2 + else: + first_pdos_column = 3 - :param out_info_dict: used to pass technical internal variables - to helper functions in compact form - :return: append_nodes_list a list containing BandsData and - ProjectionData parsed from standard_out + # Read the `pdos_atm` files + pdos_atm_array_dict = {} + for path in retrieved_temporary_folder.glob('*pdos_atm*'): + with path.open('r') as pdosatm_file: + pdos_atm_array_dict[path.name] = np.atleast_2d(np.genfromtxt(pdosatm_file))[:, first_pdos_column:] + + # Keep the pdos in sync with the orbitals by properly sorting the filenames + pdos_file_names = [k for k in pdos_atm_array_dict] + pdos_file_names.sort(key=natural_sort_key) + + # Make sure the order of the PDOS columns matches with the orbitals + if nspin != 4 or spinorbit: + # Simply concatenate the PDOS columns as they are ordered by the filenames + pdos_array = np.concatenate([pdos_atm_array_dict[name] for name in pdos_file_names], axis=1) + if nspin == 2: + # Reorder the columns so the 'up' spin columns are first + pdos_array = np.concatenate([pdos_array[:, 0::2], pdos_array[:, 1::2]], axis=1) + if nspin == 4: + # Reorder the columns like the order of orbitals for spin-orbit + pdos_array = np.concatenate([pdos_array[:, 1::2], pdos_array[:, 0::2]], axis=1) + else: + # Here all the 'up' orbitals for each l number come first, so the PDOS columns must be sorted accordingly + pdos_list = [] + for wfc_pdos_array in [pdos_atm_array_dict[name] for name in pdos_file_names]: + # Reorder the columns so the 'up' spin columns _for this l number_ come first + pdos_list.append(np.concatenate([wfc_pdos_array[:, 0::2], wfc_pdos_array[:, 1::2]], axis=1)) + pdos_array = np.concatenate(pdos_list, axis=1) + + return energy, dos_node, pdos_array + + @classmethod + def _build_bands_and_projections( + cls, kpoints: KpointsData, bands: ArrayLike, energy: ArrayLike, orbitals: List[Orbital], projections: ArrayLike, + pdos_array: ArrayLike, nspin: int + ) -> dict: + """Build the ``BandsData`` and ``ProjectionData`` output nodes. + + :param kpoints: data node that contains the list of k-points. + :param bands: array that contains all of the band energies. + :param orbitals: list of orbitals used for the projections. + :param projections: array that contains all of the projection values. + :param pdos_array: array that contains all of the PDOS values. + :param nspin: nspin value of the parent calculation. + + :return: dictionary with the output links (keys) and the corresponding output nodes (values). """ - out_file = out_info_dict['out_file'] # Note: we expect a list of lines - out_info_dict['k_lines'] = [] - out_info_dict['e_lines'] = [] - out_info_dict['psi_lines'] = [] - out_info_dict['wfc_lines'] = [] - append_nodes_list = [] - - for i, line in enumerate(out_file): - if 'k =' in line: - out_info_dict['k_lines'].append(i) - if '==== e(' in line or line.strip().startswith('e ='): - # The energy format in output was changed in QE6.3 - # this check supports old and new format - out_info_dict['e_lines'].append(i) - if '|psi|^2' in line: - out_info_dict['psi_lines'].append(i) - if 'state #' in line: - out_info_dict['wfc_lines'].append(i) - - # Basic check - if len(out_info_dict['e_lines']) != len(out_info_dict['psi_lines']): - raise QEOutputParsingError('e-lines and psi-lines are in different number') - if len(out_info_dict['psi_lines']) % len(out_info_dict['k_lines']) != 0: - raise QEOutputParsingError('Band Energy Points is not a multiple of kpoints') - # calculates the number of bands - out_info_dict['num_bands'] = len(out_info_dict['psi_lines']) // len(out_info_dict['k_lines']) - - # changes k-numbers to match spin - # because if spin is on, k points double for up and down - out_info_dict['k_states'] = len(out_info_dict['k_lines']) - if out_info_dict['spin']: - if out_info_dict['k_states'] % 2 != 0: - raise QEOutputParsingError('Internal formatting error regarding spin') - out_info_dict['k_states'] = out_info_dict['k_states'] // 2 - - # adds in the k-vector for each kpoint - k_vect = [out_file[out_info_dict['k_lines'][i]].split()[2:] for i in range(out_info_dict['k_states'])] - out_info_dict['k_vect'] = np.array(k_vect) - out_info_dict['orbitals'] = find_orbitals_from_statelines(out_info_dict) - - spin = out_info_dict['spin'] - - if spin: - # I had to guess what the ordering of the spin is, because - # the projwfc.x documentation doesn't say, but looking at the - # source code I found: + num_kpoints = len(kpoints.get_kpoints()) + num_orbitals = len(orbitals) + + bands_data, projection_data = cls._intialize_bands_projection_data( + kpoints, bands[:num_kpoints, :], energy, orbitals, projections[:num_kpoints, :, :], + pdos_array[:, :num_orbitals] + ) + if nspin == 2: + # For collinear spin-polarised calculations, each orbital has an 'up' and 'down' projection. + # Based on the Quantum ESPRESSO source code: # # DO is=1,nspin # IF (nspin==2) THEN # IF (is==1) filename=trim(filproj)//'.up' # IF (is==2) filename=trim(filproj)//'.down' # - # Which would say that it is reasonable to assume that the - # spin up states are written first, then spin down - # - out_info_dict['spin_down'] = False - bands_data1, projection_data1 = spin_dependent_subparser(out_info_dict) - append_nodes_list += [('projections_up', projection_data1), ('bands_up', bands_data1)] - out_info_dict['spin_down'] = True - bands_data2, projection_data2 = spin_dependent_subparser(out_info_dict) - append_nodes_list += [('projections_down', projection_data2), ('bands_down', bands_data2)] - else: - out_info_dict['spin_down'] = False - bands_data, projection_data = spin_dependent_subparser(out_info_dict) - append_nodes_list += [('projections', projection_data), ('bands', bands_data)] - - return append_nodes_list + # It is reasonable to assume that the spin up states are written first, then spin down in the stdout. + # So the first/second half of the `bands` and `projections` correspond to spin up/down. + # The `pdos_arrays` are constructed to match this order, i.e. the first len(orbitals) correspond to 'up'. + # The 'up' bands and projections have already been initialized above, the 'down' we do now. + bands_data_down, projection_data_down = cls._intialize_bands_projection_data( + kpoints, bands[num_kpoints:, :], energy, orbitals, projections[num_kpoints:, :, :], + pdos_array[:, num_orbitals:] + ) + return { + 'bands_up': bands_data, + 'bands_down': bands_data_down, + 'projections_up': projection_data, + 'projections_down': projection_data_down, + } + + return {'bands': bands_data, 'projections': projection_data} + + @staticmethod + def _intialize_bands_projection_data( + kpoints: KpointsData, bands: ArrayLike, energy: ArrayLike, orbitals: List[Orbital], projections: ArrayLike, + pdos_array: ArrayLike + ) -> Tuple[BandsData, ProjectionData]: + """Initialize an instance of ``BandsData`` and corresponding ``ProjectionData``. + + :param kpoints: data node that contains the list of k-points. + :param bands: array that contains all of the band energies. + :param orbitals: list of orbitals used for the projections. + :param projections: array that contains all of the projection values. + :param pdos_array: array that contains all of the PDOS values. + + :return: tuple with the ``BandsData`` and ``ProjectionData`` nodes. + """ + bands_data = BandsData() + bands_data.set_kpointsdata(kpoints) + bands_data.set_bands(bands, units='eV') + + projection_data = ProjectionData() + projection_data.set_reference_bandsdata(bands_data) + + energy_arrays = [energy] * len(orbitals) + projection_data.set_projectiondata( + orbitals, + list_of_projections=[projections[:, :, i] for i in range(len(orbitals))], + list_of_energy=energy_arrays, + list_of_pdos=[pdos_array[:, i] for i in range(len(orbitals))], + bands_check=False + ) + return bands_data, projection_data diff --git a/tests/conftest.py b/tests/conftest.py index ee465315d..166b3173c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,7 @@ import io import os import pathlib +from pathlib import Path import shutil import tempfile @@ -287,11 +288,13 @@ def _generate_calc_job_node( if retrieve_temporary: dirpath, filenames = retrieve_temporary + dirpath = Path(dirpath) + filepaths = [] for filename in filenames: - try: - shutil.copy(os.path.join(filepath_folder, filename), os.path.join(dirpath, filename)) - except FileNotFoundError: - pass # To test the absence of files in the retrieve_temporary folder + filepaths.extend(Path(filepath_folder).glob(filename)) + + for filepath in filepaths: + shutil.copy(filepath, dirpath / filepath.name) if filepath_folder: retrieved = orm.FolderData() @@ -299,11 +302,8 @@ def _generate_calc_job_node( # Remove files that are supposed to be only present in the retrieved temporary folder if retrieve_temporary: - for filename in filenames: - try: - retrieved.base.repository.delete_object(filename) - except OSError: - pass # To test the absence of files in the retrieve_temporary folder + for filepath in filepaths: + retrieved.delete_object(filepath.name) retrieved.base.links.add_incoming(node, link_type=LinkType.CREATE, link_label='retrieved') retrieved.store() diff --git a/tests/parsers/test_projwfc.py b/tests/parsers/test_projwfc.py index ce24ac6e4..6bfe98268 100644 --- a/tests/parsers/test_projwfc.py +++ b/tests/parsers/test_projwfc.py @@ -16,7 +16,7 @@ def _generate_projwfc_node(test_name): """ entry_point_calc_job = 'quantumespresso.projwfc' - retrieve_temporary_list = ['data-file-schema.xml'] + retrieve_temporary_list = ['data-file-schema.xml', '*.pdos*'] attributes = {'retrieve_temporary_list': retrieve_temporary_list} node = generate_calc_job_node( @@ -65,7 +65,8 @@ def test_projwfc_spinpolarised(generate_projwfc_node, generate_parser, data_regr assert link_name in results, list(results.keys()) data_regression.check({ - 'Dos': results['Dos'].base.attributes.all, + 'Dos': + {array_name: results['Dos'].get_array(array_name).tolist() for array_name in results['Dos'].get_arraynames()}, 'bands_up': results['bands_up'].base.attributes.all, 'bands_down': results['bands_down'].base.attributes.all, 'projections_up': { diff --git a/tests/parsers/test_projwfc/test_projwfc_spinpolarised.yml b/tests/parsers/test_projwfc/test_projwfc_spinpolarised.yml index 124203088..161f1f7ac 100644 --- a/tests/parsers/test_projwfc/test_projwfc_spinpolarised.yml +++ b/tests/parsers/test_projwfc/test_projwfc_spinpolarised.yml @@ -1,14 +1,172 @@ Dos: - array|x_array: - - 55 - array|y_array_0: - - 55 - x_name: Energy - x_units: eV - y_names: - - Dos - y_units: - - states/eV + x_array: + - -70.062 + - -68.062 + - -66.062 + - -64.062 + - -62.062 + - -60.062 + - -58.062 + - -56.062 + - -54.062 + - -52.062 + - -50.062 + - -48.062 + - -46.062 + - -44.062 + - -42.062 + - -40.062 + - -38.062 + - -36.062 + - -34.062 + - -32.062 + - -30.062 + - -28.062 + - -26.062 + - -24.062 + - -22.062 + - -20.062 + - -18.062 + - -16.062 + - -14.062 + - -12.062 + - -10.062 + - -8.062 + - -6.062 + - -4.062 + - -2.062 + - -0.062 + - 1.938 + - 3.938 + - 5.938 + - 7.938 + - 9.938 + - 11.938 + - 13.938 + - 15.938 + - 17.938 + - 19.938 + - 21.938 + - 23.938 + - 25.938 + - 27.938 + - 29.938 + - 31.938 + - 33.938 + - 35.938 + - 37.938 + y_array_0: + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 6.48 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.256 + - 0.256 + - 0.518 + - 4.55 + - 1.58 + - 0.108 + - 0.108 + - 0.108 + - 0.108 + - 0.278 + - 0.278 + - 0.278 + - 1.8 + - 0.0 + - 0.0 + y_array_1: + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.0 + - 0.238 + - 0.238 + - 0.438 + - 0.438 + - 1.42 + - 0.131 + - 0.131 + - 0.131 + - 0.131 + - 0.264 + - 0.264 + - 0.264 + - 2.15 + - 0.0 bands_down: array|bands: - 2