aiidateam · mbercx · Oct 8, 2021 · Oct 3, 2021 · Oct 3, 2021 · Oct 6, 2021
diff --git a/aiida_quantumespresso/calculations/namelists.py b/aiida_quantumespresso/calculations/namelists.py
@@ -31,11 +31,12 @@ class NamelistsCalculation(CalcJob):
     # parent_folder is of type RemoteData or FolderData
     _OUTPUT_SUBFOLDER = './out/'
     _PREFIX = 'aiida'
-    _internal_retrieve_list = []
     _default_namelists = ['INPUTPP']
     _blocked_keywords = []  # a list of tuples with key and value fixed
 
+    _internal_retrieve_list = []
     _retrieve_singlefile_list = []
+    _retrieve_temporary_list = []
 
     _DEFAULT_INPUT_FILE = 'aiida.in'
     _DEFAULT_OUTPUT_FILE = 'aiida.out'
@@ -218,10 +219,10 @@ def prepare_for_submission(self, folder):
         # Retrieve by default the output file and the xml file
         calcinfo.retrieve_list = []
         calcinfo.retrieve_list.append(self.inputs.metadata.options.output_filename)
-        settings_retrieve_list = settings.pop('ADDITIONAL_RETRIEVE_LIST', [])
-        calcinfo.retrieve_list += settings_retrieve_list
+        calcinfo.retrieve_list += settings.pop('ADDITIONAL_RETRIEVE_LIST', [])
         calcinfo.retrieve_list += self._internal_retrieve_list
 
+        calcinfo.retrieve_temporary_list = self._retrieve_temporary_list
         calcinfo.retrieve_singlefile_list = self._retrieve_singlefile_list
 
         # We might still have parser options in the settings dictionary: pop them.

diff --git a/aiida_quantumespresso/calculations/projwfc.py b/aiida_quantumespresso/calculations/projwfc.py
@@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
 """`CalcJob` implementation for the projwfc.x code of Quantum ESPRESSO."""
+from pathlib import Path
+
 from aiida.orm import RemoteData, FolderData, Dict, XyData
 from aiida_quantumespresso.calculations.namelists import NamelistsCalculation
 
@@ -25,7 +27,17 @@ class ProjwfcCalculation(NamelistsCalculation):
         ('PROJWFC', 'plotboxes', False),
     ]
     _default_parser = 'quantumespresso.projwfc'
-    _internal_retrieve_list = [NamelistsCalculation._PREFIX + '.pdos*']
+
+    xml_path = Path(NamelistsCalculation._default_parent_output_folder
+                    ).joinpath(f'{NamelistsCalculation._PREFIX}.save', 'data-file-schema.xml')
+    _internal_retrieve_list = [
+        NamelistsCalculation._PREFIX + '.pdos*',
+    ]
+    # The XML file is added to the temporary retrieve list since it is required for parsing, but already in the
+    # repository of a an ancestor calculation.
+    _retrieve_temporary_list = [
+        xml_path.as_posix(),
+    ]
 
     @classmethod
     def define(cls, spec):
@@ -45,10 +57,20 @@ def define(cls, spec):
         spec.output('projections', valid_type=ProjectionData, required=False)
         spec.output('bands', valid_type=BandsData, required=False)
         spec.default_output_node = 'output_parameters'
+        spec.exit_code(301, 'ERROR_NO_RETRIEVED_TEMPORARY_FOLDER',
+            message='The retrieved temporary folder could not be accessed.')
+        spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
+            message='The retrieved folder did not contain the required XML file.')
         spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
             message='The stdout output file could not be read.')
         spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
             message='The stdout output file was incomplete probably because the calculation got interrupted.')
+        spec.exit_code(320, 'ERROR_OUTPUT_XML_READ',
+            message='The XML output file could not be read.')
+        spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE',
+            message='The XML output file could not be parsed.')
+        spec.exit_code(322, 'ERROR_OUTPUT_XML_FORMAT',
+            message='The XML output file has an unsupported format.')
         spec.exit_code(330, 'ERROR_READING_PDOSTOT_FILE',
             message='The pdos_tot file could not be read from the retrieved folder.')
         spec.exit_code(340, 'ERROR_PARSING_PROJECTIONS',

diff --git a/aiida_quantumespresso/calculations/pw.py b/aiida_quantumespresso/calculations/pw.py
@@ -89,7 +89,7 @@ def define(cls, spec):
         spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
             message='The retrieved folder did not contain the required stdout output file.')
         spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
-            message='The retrieved folder did not contain the required required XML file.')
+            message='The retrieved folder did not contain the required XML file.')
         spec.exit_code(304, 'ERROR_OUTPUT_XML_MULTIPLE',
             message='The retrieved folder contained multiple XML files.')
         spec.exit_code(305, 'ERROR_OUTPUT_FILES',

diff --git a/aiida_quantumespresso/parsers/parse_raw/base.py b/aiida_quantumespresso/parsers/parse_raw/base.py
@@ -156,3 +156,30 @@ def convert_qe2aiida_structure(output_dict, input_structure=None):
         s.reset_sites_positions(new_pos)
 
     return s
+
+
+def convert_qe_to_kpoints(xml_dict, structure):
+    """Build the output kpoints from the raw parsed data.
+
+    :param parsed_parameters: the raw parsed data
+    :return: a `KpointsData` or None
+    """
+    from aiida.plugins import DataFactory
+
+    KpointsData = DataFactory('array.kpoints')
+
+    k_points_list = xml_dict.get('k_points', None)
+    k_points_units = xml_dict.get('k_points_units', None)
+    k_points_weights_list = xml_dict.get('k_points_weights', None)
+
+    if k_points_list is None or k_points_weights_list is None:
+        return None
+
+    if k_points_units != '1 / angstrom':
+        raise ValueError('k-points are not expressed in reciprocal cartesian coordinates')
+
+    kpoints = KpointsData()
+    kpoints.set_cell_from_structure(structure)
+    kpoints.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list)
+
+    return kpoints
diff --git a/aiida_quantumespresso/parsers/projwfc.py b/aiida_quantumespresso/parsers/projwfc.py
@@ -1,15 +1,19 @@
 # -*- coding: utf-8 -*-
+from pathlib import Path
 import re
 import fnmatch
 
 import numpy as np
 
-from aiida.common import LinkType
-from aiida.orm import Dict, ProjectionData, BandsData, XyData, CalcJobNode
+from aiida.orm import Dict, ProjectionData, BandsData, XyData
 from aiida.plugins import OrbitalFactory
 
 from aiida_quantumespresso.parsers import QEOutputParsingError
-from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base
+from aiida_quantumespresso.parsers.parse_raw.base import (
+    parse_output_base, convert_qe2aiida_structure, convert_qe_to_kpoints
+)
+from aiida_quantumespresso.utils.mapping import get_logging_container
+
 from .base import Parser
 
 
@@ -168,16 +172,11 @@ def spin_dependent_subparser(out_info_dict):
         raise QEOutputParsingError('the standard out file does not comply with the official documentation.')
 
     bands_data = BandsData()
-    # Attempts to retrieve the kpoints from the parent calc
-    parent_calc = out_info_dict['parent_calc']
-    try:
-        parent_kpoints = parent_calc.get_incoming(link_label_filter='kpoints').one().node
-    except ValueError:
-        raise QEOutputParsingError('The parent had no input kpoints! Cannot parse from this!')
+    kpoints = od['kpoints']
     try:
-        if len(od['k_vect']) != len(parent_kpoints.get_kpoints()):
+        if len(od['k_vect']) != len(kpoints.get_kpoints()):
             raise AttributeError
-        bands_data.set_kpointsdata(parent_kpoints)
+        bands_data.set_kpointsdata(kpoints)
     except AttributeError:
         bands_data.set_kpoints(od['k_vect'].astype(float))
 
@@ -284,8 +283,12 @@ def parse(self, **kwargs):
 
         Retrieves projwfc output, and some basic information from the out_file, such as warnings and wall_time
         """
-        # Check that the retrieved folder is there
         retrieved = self.retrieved
+        # Get the temporary retrieved folder
+        try:
+            retrieved_temporary_folder = kwargs['retrieved_temporary_folder']
+        except KeyError:
+            return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER)
 
         # Read standard out
         try:
@@ -309,6 +312,24 @@ def parse(self, **kwargs):
         self.emit_logs(logs)
         self.out('output_parameters', Dict(dict=parsed_data))
 
+        # Parse the XML to obtain the `structure`, `kpoints` and spin-related settings from the parent calculation
+        self.exit_code_xml = None
+        parsed_xml, logs_xml = self._parse_xml(retrieved_temporary_folder)
+        self.emit_logs(logs_xml)
+
+        if self.exit_code_xml:
+            return self.exit(self.exit_code_xml)
+
+        # we create a dictionary the progressively accumulates more info
+        out_info_dict = {}
+
+        out_info_dict['structure'] = convert_qe2aiida_structure(parsed_xml['structure'])
+        out_info_dict['kpoints'] = convert_qe_to_kpoints(parsed_xml, out_info_dict['structure'])
+        out_info_dict['nspin'] = parsed_xml.get('number_of_spin_components')
+        out_info_dict['collinear'] = not parsed_xml.get('non_colinear_calculation')
+        out_info_dict['spinorbit'] = parsed_xml.get('spin_orbit_calculation')
+        out_info_dict['spin'] = out_info_dict['nspin'] == 2
+
         # check and read pdos_tot file
         out_filenames = retrieved.list_object_names()
         try:
@@ -329,8 +350,6 @@ def parse(self, **kwargs):
                 pdos_atm_array_dict[name] = np.atleast_2d(np.genfromtxt(pdosatm_file))
 
         # finding the bands and projections
-        # we create a dictionary the progressively accumulates more info
-        out_info_dict = {}
         out_info_dict['out_file'] = out_file
         out_info_dict['energy'] = energy
         out_info_dict['pdos_atm_array_dict'] = pdos_atm_array_dict
@@ -347,6 +366,37 @@ def parse(self, **kwargs):
         Dos_out.set_y(dos, 'Dos', 'states/eV')
         self.out('Dos', Dos_out)
 
+    def _parse_xml(self, retrieved_temporary_folder):
+        """Parse the XML file.
+
+        The XML must be parsed in order to obtain the required information for the orbital parsing.
+        """
+        from .parse_xml.exceptions import XMLParseError, XMLUnsupportedFormatError
+        from .parse_xml.pw.parse import parse_xml
+
+        logs = get_logging_container()
+        parsed_xml = {}
+
+        xml_filepath = Path(retrieved_temporary_folder) / self.node.process_class.xml_path.name
+
+        if not xml_filepath.exists():
+            self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_MISSING
+            return parsed_xml, logs
+
+        try:
+            with xml_filepath.open('r') as handle:
+                parsed_xml, logs = parse_xml(handle, None)
+        except IOError:
+            self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_READ
+        except XMLParseError:
+            self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_PARSE
+        except XMLUnsupportedFormatError:
+            self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_FORMAT
+        except Exception:
+            self.exit_code_xml = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION
+
+        return parsed_xml, logs
+
     def _parse_bands_and_projections(self, out_info_dict):
         """Function that parses the standard output into bands and projection data.
 
@@ -382,57 +432,21 @@ def _parse_bands_and_projections(self, out_info_dict):
         # calculates the number of bands
         out_info_dict['num_bands'] = len(out_info_dict['psi_lines']) // len(out_info_dict['k_lines'])
 
-        # Uses the parent input parameters, and checks if the parent used
-        # spin calculations. Try to replace with a query, if possible.
-        try:
-            parent_calc = (
-                self.node.inputs.parent_folder.get_incoming(node_class=CalcJobNode,
-                                                            link_type=LinkType.CREATE).one().node
-            )
-        except ValueError as e:
-            raise QEOutputParsingError(f'Could not get parent calculation of input folder: {e}')
-        out_info_dict['parent_calc'] = parent_calc
-        try:
-            parent_param = parent_calc.get_outgoing(link_label_filter='output_parameters').one().node
-        except ValueError:
-            raise QEOutputParsingError('The parent had no output_parameters! Cannot parse from this!')
-        try:
-            structure = parent_calc.get_incoming(link_label_filter='structure').one().node
-        except ValueError:
-            raise QEOutputParsingError('The parent had no input structure! Cannot parse from this!')
-        try:
-            nspin = parent_param.get_dict()['number_of_spin_components']
-            if nspin != 1:
-                spin = True
-            else:
-                spin = False
-            out_info_dict['spinorbit'] = parent_param.get_dict().get('spin_orbit_calculation', False)
-            out_info_dict['collinear'] = not parent_param.get_dict().get('non_colinear_calculation', False)
-            if not out_info_dict['collinear']:
-                # Sanity check
-                if nspin != 4:
-                    raise QEOutputParsingError('The calculation is non-collinear, but nspin is not set to 4!')
-                spin = False
-        except KeyError:
-            spin = False
-            out_info_dict['spinorbit'] = False
-            out_info_dict['collinear'] = True
-        out_info_dict['spin'] = spin
-
         # changes k-numbers to match spin
         # because if spin is on, k points double for up and down
         out_info_dict['k_states'] = len(out_info_dict['k_lines'])
-        if spin:
+        if out_info_dict['spin']:
             if out_info_dict['k_states'] % 2 != 0:
                 raise QEOutputParsingError('Internal formatting error regarding spin')
             out_info_dict['k_states'] = out_info_dict['k_states'] // 2
 
-        #   adds in the k-vector for each kpoint
+        # adds in the k-vector for each kpoint
         k_vect = [out_file[out_info_dict['k_lines'][i]].split()[2:] for i in range(out_info_dict['k_states'])]
         out_info_dict['k_vect'] = np.array(k_vect)
-        out_info_dict['structure'] = structure
         out_info_dict['orbitals'] = find_orbitals_from_statelines(out_info_dict)
 
+        spin = out_info_dict['spin']
+
         if spin:
             # I had to guess what the ordering of the spin is, because
             # the projwfc.x documentation doesn't say, but looking at the

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -265,7 +265,10 @@ def _generate_calc_job_node(
         if retrieve_temporary:
             dirpath, filenames = retrieve_temporary
             for filename in filenames:
-                shutil.copy(os.path.join(filepath_folder, filename), os.path.join(dirpath, filename))
+                try:
+                    shutil.copy(os.path.join(filepath_folder, filename), os.path.join(dirpath, filename))
+                except FileNotFoundError:
+                    pass  # To test the absence of files in the retrieve_temporary folder
 
         if filepath_folder:
             retrieved = orm.FolderData()
@@ -274,7 +277,10 @@ def _generate_calc_job_node(
             # Remove files that are supposed to be only present in the retrieved temporary folder
             if retrieve_temporary:
                 for filename in filenames:
-                    retrieved.delete_object(filename)
+                    try:
+                        retrieved.delete_object(filename)
+                    except OSError:
+                        pass  # To test the absence of files in the retrieve_temporary folder
 
             retrieved.add_incoming(node, link_type=LinkType.CREATE, link_label='retrieved')
             retrieved.store()