Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

projwfc.x: parse from XML instead of parent calc #747

Merged
merged 5 commits into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions aiida_quantumespresso/calculations/namelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ class NamelistsCalculation(CalcJob):
# parent_folder is of type RemoteData or FolderData
_OUTPUT_SUBFOLDER = './out/'
_PREFIX = 'aiida'
_internal_retrieve_list = []
_default_namelists = ['INPUTPP']
_blocked_keywords = [] # a list of tuples with key and value fixed

_internal_retrieve_list = []
_retrieve_singlefile_list = []
_retrieve_temporary_list = []

_DEFAULT_INPUT_FILE = 'aiida.in'
_DEFAULT_OUTPUT_FILE = 'aiida.out'
Expand Down Expand Up @@ -218,10 +219,10 @@ def prepare_for_submission(self, folder):
# Retrieve by default the output file and the xml file
calcinfo.retrieve_list = []
calcinfo.retrieve_list.append(self.inputs.metadata.options.output_filename)
settings_retrieve_list = settings.pop('ADDITIONAL_RETRIEVE_LIST', [])
calcinfo.retrieve_list += settings_retrieve_list
calcinfo.retrieve_list += settings.pop('ADDITIONAL_RETRIEVE_LIST', [])
calcinfo.retrieve_list += self._internal_retrieve_list

calcinfo.retrieve_temporary_list = self._retrieve_temporary_list
calcinfo.retrieve_singlefile_list = self._retrieve_singlefile_list

# We might still have parser options in the settings dictionary: pop them.
Expand Down
24 changes: 23 additions & 1 deletion aiida_quantumespresso/calculations/projwfc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
"""`CalcJob` implementation for the projwfc.x code of Quantum ESPRESSO."""
from pathlib import Path

from aiida.orm import RemoteData, FolderData, Dict, XyData
from aiida_quantumespresso.calculations.namelists import NamelistsCalculation

Expand All @@ -25,7 +27,17 @@ class ProjwfcCalculation(NamelistsCalculation):
('PROJWFC', 'plotboxes', False),
]
_default_parser = 'quantumespresso.projwfc'
_internal_retrieve_list = [NamelistsCalculation._PREFIX + '.pdos*']

xml_path = Path(NamelistsCalculation._default_parent_output_folder
).joinpath(f'{NamelistsCalculation._PREFIX}.save', 'data-file-schema.xml')
_internal_retrieve_list = [
NamelistsCalculation._PREFIX + '.pdos*',
]
# The XML file is added to the temporary retrieve list since it is required for parsing, but already in the
# repository of a an ancestor calculation.
_retrieve_temporary_list = [
xml_path.as_posix(),
]

@classmethod
def define(cls, spec):
Expand All @@ -45,10 +57,20 @@ def define(cls, spec):
spec.output('projections', valid_type=ProjectionData, required=False)
spec.output('bands', valid_type=BandsData, required=False)
spec.default_output_node = 'output_parameters'
spec.exit_code(301, 'ERROR_NO_RETRIEVED_TEMPORARY_FOLDER',
message='The retrieved temporary folder could not be accessed.')
spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
message='The retrieved folder did not contain the required XML file.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(320, 'ERROR_OUTPUT_XML_READ',
message='The XML output file could not be read.')
spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE',
message='The XML output file could not be parsed.')
spec.exit_code(322, 'ERROR_OUTPUT_XML_FORMAT',
message='The XML output file has an unsupported format.')
spec.exit_code(330, 'ERROR_READING_PDOSTOT_FILE',
message='The pdos_tot file could not be read from the retrieved folder.')
spec.exit_code(340, 'ERROR_PARSING_PROJECTIONS',
Expand Down
2 changes: 1 addition & 1 deletion aiida_quantumespresso/calculations/pw.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def define(cls, spec):
spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
message='The retrieved folder did not contain the required stdout output file.')
spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
message='The retrieved folder did not contain the required required XML file.')
message='The retrieved folder did not contain the required XML file.')
spec.exit_code(304, 'ERROR_OUTPUT_XML_MULTIPLE',
message='The retrieved folder contained multiple XML files.')
spec.exit_code(305, 'ERROR_OUTPUT_FILES',
Expand Down
27 changes: 27 additions & 0 deletions aiida_quantumespresso/parsers/parse_raw/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,30 @@ def convert_qe2aiida_structure(output_dict, input_structure=None):
s.reset_sites_positions(new_pos)

return s


def convert_qe_to_kpoints(xml_dict, structure):
"""Build the output kpoints from the raw parsed data.

:param parsed_parameters: the raw parsed data
:return: a `KpointsData` or None
"""
from aiida.plugins import DataFactory

KpointsData = DataFactory('array.kpoints')

k_points_list = xml_dict.get('k_points', None)
k_points_units = xml_dict.get('k_points_units', None)
k_points_weights_list = xml_dict.get('k_points_weights', None)

if k_points_list is None or k_points_weights_list is None:
return None

if k_points_units != '1 / angstrom':
raise ValueError('k-points are not expressed in reciprocal cartesian coordinates')

kpoints = KpointsData()
kpoints.set_cell_from_structure(structure)
kpoints.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list)

return kpoints
122 changes: 68 additions & 54 deletions aiida_quantumespresso/parsers/projwfc.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
# -*- coding: utf-8 -*-
from pathlib import Path
import re
import fnmatch

import numpy as np

from aiida.common import LinkType
from aiida.orm import Dict, ProjectionData, BandsData, XyData, CalcJobNode
from aiida.orm import Dict, ProjectionData, BandsData, XyData
from aiida.plugins import OrbitalFactory

from aiida_quantumespresso.parsers import QEOutputParsingError
from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base
from aiida_quantumespresso.parsers.parse_raw.base import (
parse_output_base, convert_qe2aiida_structure, convert_qe_to_kpoints
)
from aiida_quantumespresso.utils.mapping import get_logging_container

from .base import Parser


Expand Down Expand Up @@ -168,16 +172,11 @@ def spin_dependent_subparser(out_info_dict):
raise QEOutputParsingError('the standard out file does not comply with the official documentation.')

bands_data = BandsData()
# Attempts to retrieve the kpoints from the parent calc
parent_calc = out_info_dict['parent_calc']
try:
parent_kpoints = parent_calc.get_incoming(link_label_filter='kpoints').one().node
except ValueError:
raise QEOutputParsingError('The parent had no input kpoints! Cannot parse from this!')
kpoints = od['kpoints']
sphuber marked this conversation as resolved.
Show resolved Hide resolved
try:
if len(od['k_vect']) != len(parent_kpoints.get_kpoints()):
if len(od['k_vect']) != len(kpoints.get_kpoints()):
raise AttributeError
bands_data.set_kpointsdata(parent_kpoints)
bands_data.set_kpointsdata(kpoints)
except AttributeError:
bands_data.set_kpoints(od['k_vect'].astype(float))

Expand Down Expand Up @@ -284,8 +283,12 @@ def parse(self, **kwargs):

Retrieves projwfc output, and some basic information from the out_file, such as warnings and wall_time
"""
# Check that the retrieved folder is there
retrieved = self.retrieved
# Get the temporary retrieved folder
try:
retrieved_temporary_folder = kwargs['retrieved_temporary_folder']
except KeyError:
return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER)

# Read standard out
try:
Expand All @@ -309,6 +312,24 @@ def parse(self, **kwargs):
self.emit_logs(logs)
self.out('output_parameters', Dict(dict=parsed_data))

# Parse the XML to obtain the `structure`, `kpoints` and spin-related settings from the parent calculation
self.exit_code_xml = None
parsed_xml, logs_xml = self._parse_xml(retrieved_temporary_folder)
self.emit_logs(logs_xml)

if self.exit_code_xml:
return self.exit(self.exit_code_xml)

# we create a dictionary the progressively accumulates more info
out_info_dict = {}

out_info_dict['structure'] = convert_qe2aiida_structure(parsed_xml['structure'])
out_info_dict['kpoints'] = convert_qe_to_kpoints(parsed_xml, out_info_dict['structure'])
out_info_dict['nspin'] = parsed_xml.get('number_of_spin_components')
out_info_dict['collinear'] = not parsed_xml.get('non_colinear_calculation')
out_info_dict['spinorbit'] = parsed_xml.get('spin_orbit_calculation')
out_info_dict['spin'] = out_info_dict['nspin'] == 2

# check and read pdos_tot file
out_filenames = retrieved.list_object_names()
try:
Expand All @@ -329,8 +350,6 @@ def parse(self, **kwargs):
pdos_atm_array_dict[name] = np.atleast_2d(np.genfromtxt(pdosatm_file))

# finding the bands and projections
# we create a dictionary the progressively accumulates more info
out_info_dict = {}
out_info_dict['out_file'] = out_file
out_info_dict['energy'] = energy
out_info_dict['pdos_atm_array_dict'] = pdos_atm_array_dict
Expand All @@ -347,6 +366,37 @@ def parse(self, **kwargs):
Dos_out.set_y(dos, 'Dos', 'states/eV')
self.out('Dos', Dos_out)

def _parse_xml(self, retrieved_temporary_folder):
"""Parse the XML file.

The XML must be parsed in order to obtain the required information for the orbital parsing.
"""
from .parse_xml.exceptions import XMLParseError, XMLUnsupportedFormatError
from .parse_xml.pw.parse import parse_xml

logs = get_logging_container()
parsed_xml = {}

xml_filepath = Path(retrieved_temporary_folder) / self.node.process_class.xml_path.name

if not xml_filepath.exists():
self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_MISSING
return parsed_xml, logs

try:
with xml_filepath.open('r') as handle:
parsed_xml, logs = parse_xml(handle, None)
except IOError:
self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_READ
except XMLParseError:
self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_PARSE
except XMLUnsupportedFormatError:
self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_FORMAT
except Exception:
self.exit_code_xml = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION

return parsed_xml, logs

def _parse_bands_and_projections(self, out_info_dict):
"""Function that parses the standard output into bands and projection data.

Expand Down Expand Up @@ -382,57 +432,21 @@ def _parse_bands_and_projections(self, out_info_dict):
# calculates the number of bands
out_info_dict['num_bands'] = len(out_info_dict['psi_lines']) // len(out_info_dict['k_lines'])

# Uses the parent input parameters, and checks if the parent used
# spin calculations. Try to replace with a query, if possible.
try:
parent_calc = (
self.node.inputs.parent_folder.get_incoming(node_class=CalcJobNode,
link_type=LinkType.CREATE).one().node
)
except ValueError as e:
raise QEOutputParsingError(f'Could not get parent calculation of input folder: {e}')
out_info_dict['parent_calc'] = parent_calc
try:
parent_param = parent_calc.get_outgoing(link_label_filter='output_parameters').one().node
except ValueError:
raise QEOutputParsingError('The parent had no output_parameters! Cannot parse from this!')
try:
structure = parent_calc.get_incoming(link_label_filter='structure').one().node
except ValueError:
raise QEOutputParsingError('The parent had no input structure! Cannot parse from this!')
try:
nspin = parent_param.get_dict()['number_of_spin_components']
if nspin != 1:
spin = True
else:
spin = False
out_info_dict['spinorbit'] = parent_param.get_dict().get('spin_orbit_calculation', False)
out_info_dict['collinear'] = not parent_param.get_dict().get('non_colinear_calculation', False)
if not out_info_dict['collinear']:
# Sanity check
if nspin != 4:
raise QEOutputParsingError('The calculation is non-collinear, but nspin is not set to 4!')
spin = False
except KeyError:
spin = False
out_info_dict['spinorbit'] = False
out_info_dict['collinear'] = True
out_info_dict['spin'] = spin

# changes k-numbers to match spin
# because if spin is on, k points double for up and down
out_info_dict['k_states'] = len(out_info_dict['k_lines'])
if spin:
if out_info_dict['spin']:
if out_info_dict['k_states'] % 2 != 0:
raise QEOutputParsingError('Internal formatting error regarding spin')
out_info_dict['k_states'] = out_info_dict['k_states'] // 2

# adds in the k-vector for each kpoint
# adds in the k-vector for each kpoint
k_vect = [out_file[out_info_dict['k_lines'][i]].split()[2:] for i in range(out_info_dict['k_states'])]
out_info_dict['k_vect'] = np.array(k_vect)
out_info_dict['structure'] = structure
out_info_dict['orbitals'] = find_orbitals_from_statelines(out_info_dict)

spin = out_info_dict['spin']

if spin:
# I had to guess what the ordering of the spin is, because
# the projwfc.x documentation doesn't say, but looking at the
Expand Down
10 changes: 8 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,10 @@ def _generate_calc_job_node(
if retrieve_temporary:
dirpath, filenames = retrieve_temporary
for filename in filenames:
shutil.copy(os.path.join(filepath_folder, filename), os.path.join(dirpath, filename))
try:
shutil.copy(os.path.join(filepath_folder, filename), os.path.join(dirpath, filename))
except FileNotFoundError:
pass # To test the absence of files in the retrieve_temporary folder

if filepath_folder:
retrieved = orm.FolderData()
Expand All @@ -274,7 +277,10 @@ def _generate_calc_job_node(
# Remove files that are supposed to be only present in the retrieved temporary folder
if retrieve_temporary:
for filename in filenames:
retrieved.delete_object(filename)
try:
retrieved.delete_object(filename)
except OSError:
pass # To test the absence of files in the retrieve_temporary folder

retrieved.add_incoming(node, link_type=LinkType.CREATE, link_label='retrieved')
retrieved.store()
Expand Down
Loading