Skip to content

Commit

Permalink
projwfc.x: parse from XML instead of parent calc (#747)
Browse files Browse the repository at this point in the history
The current `ProjwfcParser` uses several in and output nodes from the
parent calculation. This increased the complexity of the tests for this
parser, and made running `opengrid.x` in between the `pw.x` and
`projwfc.x` run impossible without adding these in and output nodes to
the calculation job of `opengrid.x`.

Here we switch to parsing the XML instead of relying on the parent
calculation. The `data-file-schema.xml` of the parent calculation is
retrieved and parsed, providing the required information for the
subsequent parsing of the `projwfc.x` output. All the parsing tests are
updated to include the XML output file and remove the in/output links
for the parent calculation.

Note that the XML file is added to the temporary retrieve list since
although it is required for parsing, it is already in repository of a an
ancestor calculation.

The `convert_qe_to_kpoints` function is added to convert the k-points
data in the XML to a `KpointsData` node.
  • Loading branch information
mbercx authored Oct 8, 2021
1 parent e22fcba commit 0874d95
Show file tree
Hide file tree
Showing 93 changed files with 9,401 additions and 3,036 deletions.
7 changes: 4 additions & 3 deletions aiida_quantumespresso/calculations/namelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ class NamelistsCalculation(CalcJob):
# parent_folder is of type RemoteData or FolderData
_OUTPUT_SUBFOLDER = './out/'
_PREFIX = 'aiida'
_internal_retrieve_list = []
_default_namelists = ['INPUTPP']
_blocked_keywords = [] # a list of tuples with key and value fixed

_internal_retrieve_list = []
_retrieve_singlefile_list = []
_retrieve_temporary_list = []

_DEFAULT_INPUT_FILE = 'aiida.in'
_DEFAULT_OUTPUT_FILE = 'aiida.out'
Expand Down Expand Up @@ -218,10 +219,10 @@ def prepare_for_submission(self, folder):
# Retrieve by default the output file and the xml file
calcinfo.retrieve_list = []
calcinfo.retrieve_list.append(self.inputs.metadata.options.output_filename)
settings_retrieve_list = settings.pop('ADDITIONAL_RETRIEVE_LIST', [])
calcinfo.retrieve_list += settings_retrieve_list
calcinfo.retrieve_list += settings.pop('ADDITIONAL_RETRIEVE_LIST', [])
calcinfo.retrieve_list += self._internal_retrieve_list

calcinfo.retrieve_temporary_list = self._retrieve_temporary_list
calcinfo.retrieve_singlefile_list = self._retrieve_singlefile_list

# We might still have parser options in the settings dictionary: pop them.
Expand Down
24 changes: 23 additions & 1 deletion aiida_quantumespresso/calculations/projwfc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
"""`CalcJob` implementation for the projwfc.x code of Quantum ESPRESSO."""
from pathlib import Path

from aiida.orm import RemoteData, FolderData, Dict, XyData
from aiida_quantumespresso.calculations.namelists import NamelistsCalculation

Expand All @@ -25,7 +27,17 @@ class ProjwfcCalculation(NamelistsCalculation):
('PROJWFC', 'plotboxes', False),
]
_default_parser = 'quantumespresso.projwfc'
_internal_retrieve_list = [NamelistsCalculation._PREFIX + '.pdos*']

xml_path = Path(NamelistsCalculation._default_parent_output_folder
).joinpath(f'{NamelistsCalculation._PREFIX}.save', 'data-file-schema.xml')
_internal_retrieve_list = [
NamelistsCalculation._PREFIX + '.pdos*',
]
# The XML file is added to the temporary retrieve list since it is required for parsing, but already in the
# repository of a an ancestor calculation.
_retrieve_temporary_list = [
xml_path.as_posix(),
]

@classmethod
def define(cls, spec):
Expand All @@ -45,10 +57,20 @@ def define(cls, spec):
spec.output('projections', valid_type=ProjectionData, required=False)
spec.output('bands', valid_type=BandsData, required=False)
spec.default_output_node = 'output_parameters'
spec.exit_code(301, 'ERROR_NO_RETRIEVED_TEMPORARY_FOLDER',
message='The retrieved temporary folder could not be accessed.')
spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
message='The retrieved folder did not contain the required XML file.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(320, 'ERROR_OUTPUT_XML_READ',
message='The XML output file could not be read.')
spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE',
message='The XML output file could not be parsed.')
spec.exit_code(322, 'ERROR_OUTPUT_XML_FORMAT',
message='The XML output file has an unsupported format.')
spec.exit_code(330, 'ERROR_READING_PDOSTOT_FILE',
message='The pdos_tot file could not be read from the retrieved folder.')
spec.exit_code(340, 'ERROR_PARSING_PROJECTIONS',
Expand Down
2 changes: 1 addition & 1 deletion aiida_quantumespresso/calculations/pw.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def define(cls, spec):
spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
message='The retrieved folder did not contain the required stdout output file.')
spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
message='The retrieved folder did not contain the required required XML file.')
message='The retrieved folder did not contain the required XML file.')
spec.exit_code(304, 'ERROR_OUTPUT_XML_MULTIPLE',
message='The retrieved folder contained multiple XML files.')
spec.exit_code(305, 'ERROR_OUTPUT_FILES',
Expand Down
27 changes: 27 additions & 0 deletions aiida_quantumespresso/parsers/parse_raw/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,30 @@ def convert_qe2aiida_structure(output_dict, input_structure=None):
s.reset_sites_positions(new_pos)

return s


def convert_qe_to_kpoints(xml_dict, structure):
"""Build the output kpoints from the raw parsed data.
:param parsed_parameters: the raw parsed data
:return: a `KpointsData` or None
"""
from aiida.plugins import DataFactory

KpointsData = DataFactory('array.kpoints')

k_points_list = xml_dict.get('k_points', None)
k_points_units = xml_dict.get('k_points_units', None)
k_points_weights_list = xml_dict.get('k_points_weights', None)

if k_points_list is None or k_points_weights_list is None:
return None

if k_points_units != '1 / angstrom':
raise ValueError('k-points are not expressed in reciprocal cartesian coordinates')

kpoints = KpointsData()
kpoints.set_cell_from_structure(structure)
kpoints.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list)

return kpoints
122 changes: 68 additions & 54 deletions aiida_quantumespresso/parsers/projwfc.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
# -*- coding: utf-8 -*-
from pathlib import Path
import re
import fnmatch

import numpy as np

from aiida.common import LinkType
from aiida.orm import Dict, ProjectionData, BandsData, XyData, CalcJobNode
from aiida.orm import Dict, ProjectionData, BandsData, XyData
from aiida.plugins import OrbitalFactory

from aiida_quantumespresso.parsers import QEOutputParsingError
from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base
from aiida_quantumespresso.parsers.parse_raw.base import (
parse_output_base, convert_qe2aiida_structure, convert_qe_to_kpoints
)
from aiida_quantumespresso.utils.mapping import get_logging_container

from .base import Parser


Expand Down Expand Up @@ -168,16 +172,11 @@ def spin_dependent_subparser(out_info_dict):
raise QEOutputParsingError('the standard out file does not comply with the official documentation.')

bands_data = BandsData()
# Attempts to retrieve the kpoints from the parent calc
parent_calc = out_info_dict['parent_calc']
try:
parent_kpoints = parent_calc.get_incoming(link_label_filter='kpoints').one().node
except ValueError:
raise QEOutputParsingError('The parent had no input kpoints! Cannot parse from this!')
kpoints = od['kpoints']
try:
if len(od['k_vect']) != len(parent_kpoints.get_kpoints()):
if len(od['k_vect']) != len(kpoints.get_kpoints()):
raise AttributeError
bands_data.set_kpointsdata(parent_kpoints)
bands_data.set_kpointsdata(kpoints)
except AttributeError:
bands_data.set_kpoints(od['k_vect'].astype(float))

Expand Down Expand Up @@ -284,8 +283,12 @@ def parse(self, **kwargs):
Retrieves projwfc output, and some basic information from the out_file, such as warnings and wall_time
"""
# Check that the retrieved folder is there
retrieved = self.retrieved
# Get the temporary retrieved folder
try:
retrieved_temporary_folder = kwargs['retrieved_temporary_folder']
except KeyError:
return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER)

# Read standard out
try:
Expand All @@ -309,6 +312,24 @@ def parse(self, **kwargs):
self.emit_logs(logs)
self.out('output_parameters', Dict(dict=parsed_data))

# Parse the XML to obtain the `structure`, `kpoints` and spin-related settings from the parent calculation
self.exit_code_xml = None
parsed_xml, logs_xml = self._parse_xml(retrieved_temporary_folder)
self.emit_logs(logs_xml)

if self.exit_code_xml:
return self.exit(self.exit_code_xml)

# we create a dictionary the progressively accumulates more info
out_info_dict = {}

out_info_dict['structure'] = convert_qe2aiida_structure(parsed_xml['structure'])
out_info_dict['kpoints'] = convert_qe_to_kpoints(parsed_xml, out_info_dict['structure'])
out_info_dict['nspin'] = parsed_xml.get('number_of_spin_components')
out_info_dict['collinear'] = not parsed_xml.get('non_colinear_calculation')
out_info_dict['spinorbit'] = parsed_xml.get('spin_orbit_calculation')
out_info_dict['spin'] = out_info_dict['nspin'] == 2

# check and read pdos_tot file
out_filenames = retrieved.list_object_names()
try:
Expand All @@ -329,8 +350,6 @@ def parse(self, **kwargs):
pdos_atm_array_dict[name] = np.atleast_2d(np.genfromtxt(pdosatm_file))

# finding the bands and projections
# we create a dictionary the progressively accumulates more info
out_info_dict = {}
out_info_dict['out_file'] = out_file
out_info_dict['energy'] = energy
out_info_dict['pdos_atm_array_dict'] = pdos_atm_array_dict
Expand All @@ -347,6 +366,37 @@ def parse(self, **kwargs):
Dos_out.set_y(dos, 'Dos', 'states/eV')
self.out('Dos', Dos_out)

def _parse_xml(self, retrieved_temporary_folder):
"""Parse the XML file.
The XML must be parsed in order to obtain the required information for the orbital parsing.
"""
from .parse_xml.exceptions import XMLParseError, XMLUnsupportedFormatError
from .parse_xml.pw.parse import parse_xml

logs = get_logging_container()
parsed_xml = {}

xml_filepath = Path(retrieved_temporary_folder) / self.node.process_class.xml_path.name

if not xml_filepath.exists():
self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_MISSING
return parsed_xml, logs

try:
with xml_filepath.open('r') as handle:
parsed_xml, logs = parse_xml(handle, None)
except IOError:
self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_READ
except XMLParseError:
self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_PARSE
except XMLUnsupportedFormatError:
self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_FORMAT
except Exception:
self.exit_code_xml = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION

return parsed_xml, logs

def _parse_bands_and_projections(self, out_info_dict):
"""Function that parses the standard output into bands and projection data.
Expand Down Expand Up @@ -382,57 +432,21 @@ def _parse_bands_and_projections(self, out_info_dict):
# calculates the number of bands
out_info_dict['num_bands'] = len(out_info_dict['psi_lines']) // len(out_info_dict['k_lines'])

# Uses the parent input parameters, and checks if the parent used
# spin calculations. Try to replace with a query, if possible.
try:
parent_calc = (
self.node.inputs.parent_folder.get_incoming(node_class=CalcJobNode,
link_type=LinkType.CREATE).one().node
)
except ValueError as e:
raise QEOutputParsingError(f'Could not get parent calculation of input folder: {e}')
out_info_dict['parent_calc'] = parent_calc
try:
parent_param = parent_calc.get_outgoing(link_label_filter='output_parameters').one().node
except ValueError:
raise QEOutputParsingError('The parent had no output_parameters! Cannot parse from this!')
try:
structure = parent_calc.get_incoming(link_label_filter='structure').one().node
except ValueError:
raise QEOutputParsingError('The parent had no input structure! Cannot parse from this!')
try:
nspin = parent_param.get_dict()['number_of_spin_components']
if nspin != 1:
spin = True
else:
spin = False
out_info_dict['spinorbit'] = parent_param.get_dict().get('spin_orbit_calculation', False)
out_info_dict['collinear'] = not parent_param.get_dict().get('non_colinear_calculation', False)
if not out_info_dict['collinear']:
# Sanity check
if nspin != 4:
raise QEOutputParsingError('The calculation is non-collinear, but nspin is not set to 4!')
spin = False
except KeyError:
spin = False
out_info_dict['spinorbit'] = False
out_info_dict['collinear'] = True
out_info_dict['spin'] = spin

# changes k-numbers to match spin
# because if spin is on, k points double for up and down
out_info_dict['k_states'] = len(out_info_dict['k_lines'])
if spin:
if out_info_dict['spin']:
if out_info_dict['k_states'] % 2 != 0:
raise QEOutputParsingError('Internal formatting error regarding spin')
out_info_dict['k_states'] = out_info_dict['k_states'] // 2

# adds in the k-vector for each kpoint
# adds in the k-vector for each kpoint
k_vect = [out_file[out_info_dict['k_lines'][i]].split()[2:] for i in range(out_info_dict['k_states'])]
out_info_dict['k_vect'] = np.array(k_vect)
out_info_dict['structure'] = structure
out_info_dict['orbitals'] = find_orbitals_from_statelines(out_info_dict)

spin = out_info_dict['spin']

if spin:
# I had to guess what the ordering of the spin is, because
# the projwfc.x documentation doesn't say, but looking at the
Expand Down
10 changes: 8 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,10 @@ def _generate_calc_job_node(
if retrieve_temporary:
dirpath, filenames = retrieve_temporary
for filename in filenames:
shutil.copy(os.path.join(filepath_folder, filename), os.path.join(dirpath, filename))
try:
shutil.copy(os.path.join(filepath_folder, filename), os.path.join(dirpath, filename))
except FileNotFoundError:
pass # To test the absence of files in the retrieve_temporary folder

if filepath_folder:
retrieved = orm.FolderData()
Expand All @@ -274,7 +277,10 @@ def _generate_calc_job_node(
# Remove files that are supposed to be only present in the retrieved temporary folder
if retrieve_temporary:
for filename in filenames:
retrieved.delete_object(filename)
try:
retrieved.delete_object(filename)
except OSError:
pass # To test the absence of files in the retrieve_temporary folder

retrieved.add_incoming(node, link_type=LinkType.CREATE, link_label='retrieved')
retrieved.store()
Expand Down
Loading

0 comments on commit 0874d95

Please sign in to comment.