aiidateam · sphuber · Feb 10, 2023 · Feb 24, 2023
diff --git a/src/aiida_quantumespresso/parsers/cp.py b/src/aiida_quantumespresso/parsers/cp.py
@@ -69,6 +69,7 @@ def parse(self, **kwargs):
         out_dict, _raw_successful = parse_cp_raw_output(
             output_stdout, output_xml, output_xml_counter, print_counter_xml
         )
+        out_dict.pop('trajectory', None)
 
         if not no_trajectory_output:
             # parse the trajectory. Units in Angstrom, picoseconds and eV.

diff --git a/src/aiida_quantumespresso/parsers/neb.py b/src/aiida_quantumespresso/parsers/neb.py
@@ -130,8 +130,9 @@ def parse(self, **kwargs):
                 return self.exit(self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION.format(exception=exc))
 
             parsed_structure = parsed_data_stdout.pop('structure', {})
-            parsed_trajectory = parsed_data_stdout.pop('trajectory', {})
-            parsed_parameters = PwParser.build_output_parameters(parsed_data_xml, parsed_data_stdout)
+            parsed_trajectory = parsed_data_xml.pop('trajectory', {})
+            parsed_parameters = parsed_data_xml
+            PwParser.backwards_compatibility_parameters(parsed_parameters, parsed_data_stdout)
 
             # Explicit information about k-points does not need to be queryable so we remove it from the parameters
             parsed_parameters.pop('k_points', None)

diff --git a/src/aiida_quantumespresso/parsers/parse_raw/pw.py b/src/aiida_quantumespresso/parsers/parse_raw/pw.py
@@ -309,7 +309,7 @@ def parse_stdout(stdout, input_parameters, parser_options=None, parsed_xml=None,
 
     parsed_data = {}
     vdw_correction = False
-    bands_data = parsed_xml.pop('bands', {})
+    bands_data = parsed_xml.get('bands', {})
     structure_data = parsed_xml.pop('structure', {})
     trajectory_data = {}
 

diff --git a/src/aiida_quantumespresso/parsers/parse_xml/parse.py b/src/aiida_quantumespresso/parsers/parse_xml/parse.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import collections
 from urllib.error import URLError
 
 import numpy as np
@@ -75,20 +76,6 @@ def parse_xml_post_6_2(xml):
     #  xml_dictionary['key']['@attr'] returns its attribute 'attr'
     #  xml_dictionary['key']['nested_key'] goes one level deeper.
 
-    # Fix a bug of QE 6.8: the output XML is not consistent with schema, see
-    # https://github.com/aiidateam/aiida-quantumespresso/pull/717
-    xml_creator = xml.find('./general_info/creator')
-    if xml_creator is not None and 'VERSION' in xml_creator.attrib:
-        creator_version = xml_creator.attrib['VERSION']
-        if creator_version == '6.8':
-            root = xml.getroot()
-            timing_info = root.find('./timing_info')
-            partial_pwscf = timing_info.find("partial[@label='PWSCF'][@calls='0']")
-            try:
-                timing_info.remove(partial_pwscf)
-            except (TypeError, ValueError):
-                pass
-
     xml_dictionary, errors = xsd.to_dict(xml, validation='lax')
     if errors:
         logs.error.append(f'{len(errors)} XML schema validation error(s) schema: {schema_filepath}:')
@@ -99,6 +86,17 @@ def parse_xml_post_6_2(xml):
     inputs = xml_dictionary.get('input', {})
     outputs = xml_dictionary['output']
 
+    # Fix a bug of QE 6.8: the output XML is not consistent with schema, see
+    # https://github.com/aiidateam/aiida-quantumespresso/pull/717
+    if xml_version == '6.8':
+        if 'timing_info' in xml_dictionary:
+            timing_info = xml_dictionary['timing_info']
+            partial_pwscf = timing_info.find("partial[@label='PWSCF'][@calls='0']")
+            try:
+                timing_info.remove(partial_pwscf)
+            except (TypeError, ValueError):
+                pass
+
     lattice_vectors = [
         [x * CONSTANTS.bohr_to_ang for x in outputs['atomic_structure']['cell']['a1']],
         [x * CONSTANTS.bohr_to_ang for x in outputs['atomic_structure']['cell']['a2']],
@@ -263,8 +261,22 @@ def parse_xml_post_6_2(xml):
         # WARNING: this is different between old XML and new XML
         'spin_orbit_calculation': spin_orbit_calculation,
         'q_real_space': outputs['algorithmic_info']['real_space_q'],
+
+        'energy_units': 'eV',
+        'energy_accuracy_units': 'eV',
+        'energy_ewald_units': 'eV',
+        'energy_hartree_units': 'eV',
+        'energy_one_electron_units': 'eV',
+        'energy_xc_units': 'eV',
+
+        'number_of_atoms': inputs['atomic_structure']['@nat'],
+        'number_of_species': inputs['atomic_species']['@ntyp'],
+
     }
 
+    if 'timing_info' in xml_dictionary:
+        xml_data['wall_time_seconds'] = xml_dictionary['timing_info']['total']['wall']
+
     # alat is technically an optional attribute according to the schema,
     # but I don't know what to do if it's missing. atomic_structure is mandatory.
     output_alat_bohr = outputs['atomic_structure']['@alat']
@@ -494,35 +506,33 @@ def parse_xml_post_6_2(xml):
         # - individual electronic phases and weights
 
     # TODO: We should put the `non_periodic_cell_correction` string in (?)
-    atoms = [[atom['@name'], [coord * CONSTANTS.bohr_to_ang
-                              for coord in atom['$']]]
-             for atom in outputs['atomic_structure']['atomic_positions']['atom']]
+    atomic_species_name = []
+    atoms = []
+
+    for atom in outputs['atomic_structure']['atomic_positions']['atom']:
+        atomic_species_name.append(atom['@name'])
+        atoms.append([atom['@name'], [coord * CONSTANTS.bohr_to_ang for coord in atom['$']]])
+
     species = outputs['atomic_species']['species']
     structure_data = {
-        'atomic_positions_units':
-        'Angstrom',
-        'direct_lattice_vectors_units':
-        'Angstrom',
+        'atomic_positions_units': 'Angstrom',
+        'direct_lattice_vectors_units': 'Angstrom',
         # ??? 'atoms_if_pos_list': [[1, 1, 1], [1, 1, 1]],
-        'number_of_atoms':
-        outputs['atomic_structure']['@nat'],
-        'lattice_parameter':
-        output_alat_angstrom,
+        'number_of_atoms': outputs['atomic_structure']['@nat'],
+        'lattice_parameter': output_alat_angstrom,
         'reciprocal_lattice_vectors': [
-            outputs['basis_set']['reciprocal_lattice']['b1'], outputs['basis_set']['reciprocal_lattice']['b2'],
+            outputs['basis_set']['reciprocal_lattice']['b1'],
+            outputs['basis_set']['reciprocal_lattice']['b2'],
             outputs['basis_set']['reciprocal_lattice']['b3']
         ],
-        'atoms':
-        atoms,
+        'atoms': atoms,
         'cell': {
             'lattice_vectors': lattice_vectors,
             'volume': cell_volume(*lattice_vectors),
             'atoms': atoms,
         },
-        'lattice_parameter_xml':
-        output_alat_bohr,
-        'number_of_species':
-        outputs['atomic_species']['@ntyp'],
+        'lattice_parameter_xml': output_alat_bohr,
+        'number_of_species': outputs['atomic_species']['@ntyp'],
         'species': {
             'index': [i + 1 for i, specie in enumerate(species)],
             'pseudo': [specie['pseudo_file'] for specie in species],
@@ -531,6 +541,68 @@ def parse_xml_post_6_2(xml):
         },
     }
 
+    xml_data['volume'] = structure_data['cell']['volume']
     xml_data['structure'] = structure_data
+    xml_data['trajectory'] = collections.defaultdict(list)
+    xml_data['trajectory']['atomic_species_name'] = atomic_species_name
+
+    for frame in xml_dictionary.get('step', []):
+        parse_step_to_trajectory(xml_data['trajectory'], frame)
+
+    calculation_type = inputs.get('control_variables', {}).get('calculation', 'scf')
+
+    # In case of an SCF calculation, there are no trajectory steps so parse from the final outputs. For a vc-relax, the
+    # code performs a final SCF, the results of which are not added as a step but are part of the final outputs.
+    if calculation_type in ['scf', 'vc-relax']:
+        parse_step_to_trajectory(xml_data['trajectory'], outputs, skip_structure=True)
+
+    # For some reason, the legacy trajectory structure contained a key `steps` which was a list of integers from 0 to
+    # N - 1 where N is the number steps in the trajectory.
+    if 'step' in xml_dictionary:
+        xml_data['trajectory']['steps'] = list(range(len(xml_dictionary['step'])))
+
+    xml_data['total_number_of_scf_iterations'] = sum(xml_data['trajectory']['scf_iterations'])
 
     return xml_data, logs
+
+
+def parse_step_to_trajectory(trajectory, data, skip_structure=False):
+    """."""
+    if 'scf_conv' in data and 'n_scf_steps' in data['scf_conv']:
+        scf_iterations = data['scf_conv']['n_scf_steps']  # Can be zero in case of initialization-only calculation
+        if scf_iterations:
+            trajectory['scf_iterations'].append(scf_iterations)
+
+    if 'convergence_info' in data:
+        convergence_info = data['convergence_info']
+        if 'scf_conv' in convergence_info and 'n_scf_steps' in convergence_info['scf_conv']:
+            trajectory['scf_iterations'].append(convergence_info['scf_conv']['n_scf_steps'])
+
+    if 'atomic_structure' in data and not skip_structure:
+        atomic_structure = data['atomic_structure']
+
+        if 'atomic_positions' in atomic_structure:
+            positions = np.array([a['$'] for a in atomic_structure['atomic_positions']['atom']])
+            trajectory['positions'].append(positions * CONSTANTS.bohr_to_ang)
+
+        if 'cell' in atomic_structure:
+            cell = atomic_structure['cell']
+            cell = np.array([cell['a1'], cell['a2'], cell['a3']])
+            trajectory['cells'].append(cell * CONSTANTS.bohr_to_ang)
+
+    if 'total_energy' in data:
+        total_energy = data['total_energy']
+
+        for key, key_alt in [('etot', 'energy'), ('ehart', 'energy_hartree'), ('ewald', 'energy_ewald'), ('etxc', 'energy_xc')]:
+            if key in total_energy:
+                trajectory[key_alt].append(total_energy[key] * CONSTANTS.hartree_to_ev)
+
+    if 'forces' in data and '$' in data['forces']:
+        forces = np.array(data['forces']['$'])
+        dimensions = data['forces']['@dims']  # Like [3, 2], should be reversed to reshape the forces array
+        trajectory['forces'].append(forces.reshape(dimensions[::-1]))
+
+    if 'stress' in data and '$' in data['stress']:
+        stress = np.array(data['stress']['$'])
+        dimensions = data['stress']['@dims']  # Like [3, 3], should be reversed to reshape the stress array
+        trajectory['stress'].append(stress.reshape(dimensions[::-1]))
diff --git a/src/aiida_quantumespresso/parsers/pw.py b/src/aiida_quantumespresso/parsers/pw.py
@@ -53,12 +53,18 @@ def parse(self, **kwargs):
 
         parameters = self.node.inputs.parameters.get_dict()
         parsed_xml, logs_xml = self.parse_xml(dir_with_bands, parser_options)
-        parsed_stdout, logs_stdout = self.parse_stdout(parameters, parser_options, parsed_xml, crash_file)
+        parsed_stdout, logs_stdout = self.parse_stdout(parameters, parser_options, crash_file)
+
+        if not parsed_xml and self.node.get_option('without_xml'):
+            parsed_xml = parsed_stdout
+
+        parsed_bands = parsed_xml.pop('bands', {})
+        parsed_structure = parsed_xml.pop('structure', {})
+        parsed_trajectory = parsed_xml.pop('trajectory', {})
+        self.backwards_compatibility_trajectory(parsed_trajectory, parsed_stdout)
 
-        parsed_bands = parsed_stdout.pop('bands', {})
-        parsed_structure = parsed_stdout.pop('structure', {})
-        parsed_trajectory = parsed_stdout.pop('trajectory', {})
         parsed_parameters = self.build_output_parameters(parsed_stdout, parsed_xml)
+        self.backwards_compatibility_parameters(parsed_parameters, parsed_stdout)
 
         # Append the last frame of some of the smaller trajectory arrays to the parameters for easy querying
         self.final_trajectory_frame_to_parameters(parsed_parameters, parsed_trajectory)
@@ -152,6 +158,53 @@ def parse(self, **kwargs):
             if exit_code:
                 return self.exit(exit_code)
 
+    def backwards_compatibility_trajectory(self, parsed_trajectory, parsed_stdout):
+        """."""
+        # For QE v7.0 and lower, the stress is not reported in the trajectory steps in the XML. The XML parsing will
+        # therefore only add the stress of the last SCF to the trajectory. We need to replace this with the trajectory
+        # parsed from the SCF to have the data of all frames.
+        if 'trajectory' not in parsed_stdout:
+            return
+
+        if self.get_calculation_type() in [
+            'relax', 'vc-relax'
+        ] and ('stress' not in parsed_trajectory or
+               len(parsed_trajectory['stress']) == 1) and 'stress' in parsed_stdout['trajectory']:
+            parsed_trajectory['stress'] = parsed_stdout['trajectory']['stress']
+
+        for key in [
+            'energy_accuracy', 'energy_one_electron', 'energy_threshold', 'energy_smearing', 'energy_one_center_paw',
+            'energy_vdw', 'fermi_energy', 'scf_accuracy', 'steps', 'total_force', 'stress'
+        ]:
+            if key not in parsed_trajectory and key in parsed_stdout['trajectory']:
+                parsed_trajectory[key] = parsed_stdout['trajectory'][key]
+
+    @staticmethod
+    def backwards_compatibility_parameters(parsed_parameters, parsed_stdout):
+        """."""
+        keys = [
+            'energy_smearing_units',
+            'energy_one_center_paw_units',
+            'init_wall_time_seconds',
+            'stress_units',
+            'wall_time',
+            'wall_time_seconds',
+            'number_ionic_steps',
+            'estimated_ram_per_process',
+            'estimated_ram_per_process_units',
+            'estimated_ram_total',
+            'estimated_ram_total_units',
+            'forces_units',
+            'total_force_units',
+            'forces_units',
+            'number_of_bands',
+            'number_of_k_points',
+        ]
+
+        for key in keys:
+            if key not in parsed_parameters and key in parsed_stdout:
+                parsed_parameters[key] = parsed_stdout[key]
+
     def get_calculation_type(self):
         """Return the type of the calculation."""
         return self.node.inputs.parameters.base.attributes.get('CONTROL', {}).get('calculation', 'scf')
@@ -187,8 +240,9 @@ def validate_electronic(self, trajectory, parameters, logs):
 
         if 'ERROR_ELECTRONIC_CONVERGENCE_NOT_REACHED' in logs['error']:
             scf_must_converge = self.node.inputs.parameters.base.attributes.get('ELECTRONS',
-                                                                          {}).get('scf_must_converge', True)
-            electron_maxstep = self.node.inputs.parameters.base.attributes.get('ELECTRONS', {}).get('electron_maxstep', 1)
+                                                                                {}).get('scf_must_converge', True)
+            electron_maxstep = self.node.inputs.parameters.base.attributes.get('ELECTRONS',
+                                                                               {}).get('electron_maxstep', 1)
 
             if electron_maxstep == 0 or not scf_must_converge:
                 return self.exit_codes.WARNING_ELECTRONIC_CONVERGENCE_NOT_REACHED
@@ -287,10 +341,7 @@ def is_ionically_converged(self, trajectory, except_final_scf=False):
 
         if relax_type == 'relax':
             return verify_convergence_trajectory(
-                trajectory=trajectory,
-                index=-1,
-                threshold_forces=threshold_forces,
-                fixed_coords=fixed_coords
+                trajectory=trajectory, index=-1, threshold_forces=threshold_forces, fixed_coords=fixed_coords
             )
 
         if relax_type == 'vc-relax':
@@ -342,12 +393,11 @@ def parse_xml(self, dir_with_bands=None, parser_options=None):
 
         return parsed_data, logs
 
-    def parse_stdout(self, parameters, parser_options=None, parsed_xml=None, crash_file=None):
+    def parse_stdout(self, parameters, parser_options=None, crash_file=None):
         """Parse the stdout output file.
 
         :param parameters: the input parameters dictionary
         :param parser_options: optional dictionary with parser options
-        :param parsed_xml: the raw parsed data from the XML output
         :return: tuple of two dictionaries, first with raw parsed data and second with log messages
         """
         from aiida_quantumespresso.parsers.parse_raw.pw import parse_stdout
@@ -368,7 +418,7 @@ def parse_stdout(self, parameters, parser_options=None, parsed_xml=None, crash_f
             return parsed_data, logs
 
         try:
-            parsed_data, logs = parse_stdout(stdout, parameters, parser_options, parsed_xml, crash_file)
+            parsed_data, logs = parse_stdout(stdout, parameters, parser_options, crash_file=crash_file)
         except Exception as exc:
             logs.critical.append(traceback.format_exc())
             self.exit_code_stdout = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION.format(exception=exc)
@@ -404,18 +454,7 @@ def build_output_parameters(parsed_stdout, parsed_xml):
         :param parsed_xml: the raw parsed data dictionary from the XML output file
         :return: the union of the two raw parsed data dictionaries
         """
-        for key in list(parsed_stdout.keys()):
-            if key in list(parsed_xml.keys()):
-                if parsed_stdout[key] != parsed_xml[key]:
-                    raise AssertionError(
-                        '{} found in both dictionaries with different values: {} vs. {}'.format(
-                            key, parsed_stdout[key], parsed_xml[key]
-                        )
-                    )
-
-        parameters = dict(list(parsed_xml.items()) + list(parsed_stdout.items()))
-
-        return parameters
+        return parsed_xml
 
     def build_output_structure(self, parsed_structure):
         """Build the output structure from the raw parsed data.
@@ -578,7 +617,7 @@ def final_trajectory_frame_to_parameters(parameters, parsed_trajectory):
 
         for property_key, property_values in parsed_trajectory.items():
 
-            if property_key not in include_keys:
+            if property_key not in include_keys or not property_values:
                 continue
 
             parameters[property_key] = property_values[-1]