From 710b57d193ad4aae644b1d880c443479dff3acd7 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 15 Aug 2024 13:05:31 +0200 Subject: [PATCH 1/8] Respect suppress_errors during matching and rename --- pyiron_contrib/repair/__init__.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pyiron_contrib/repair/__init__.py b/pyiron_contrib/repair/__init__.py index 60445a937..162763667 100644 --- a/pyiron_contrib/repair/__init__.py +++ b/pyiron_contrib/repair/__init__.py @@ -13,6 +13,7 @@ from pyiron_atomistics.atomistics.job.atomistic import AtomisticGenericJob from pyiron_base import GenericJob, GenericMaster from pyiron_snippets.logger import logger +from pyiron_snippets.deprecate import deprecate from tqdm.auto import tqdm @@ -140,11 +141,13 @@ def failed(self): class HandyMan: + @deprecate(suppress_fix_errors="Use suppress_errors instead") def __init__( - self, tools: Union[None, Iterable[RepairTool]] = None, suppress_fix_errors=True + self, tools: Union[None, Iterable[RepairTool]] = None, + suppress_errors=True, suppress_fix_errors=True ): self.shed = defaultdict(list) - self._suppress_fix_errors = suppress_fix_errors + self._suppress_errors = suppress_fix_errors and suppress_errors if tools is None: tools = DEFAULT_SHED @@ -182,7 +185,7 @@ def fix_job(self, tool, job, graveyard=None): try: tool.fix(job, new_job) except Exception as e: - if self._suppress_fix_errors: + if self._suppress_errors: raise FixFailed(e) from None else: raise @@ -228,7 +231,10 @@ def find_tool(self, job): if tool.match(job): return tool except Exception as e: - logger.warn(f"Matching {tool} on job {job.id} failed with {e}!") + if self._suppress_errors: + logger.warn(f"Matching {tool} on job {job.id} failed with {e}!") + else: + raise raise NoMatchingTool("Cannot find stuitable tool!") def fix_project( From 21282c35b65237de9a0be69be6f3a2330a68192a Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Oct 2024 11:37:17 +0200 Subject: [PATCH 2/8] Add description to tqdm bar when repairing --- pyiron_contrib/repair/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyiron_contrib/repair/__init__.py b/pyiron_contrib/repair/__init__.py index 162763667..609a74501 100644 --- a/pyiron_contrib/repair/__init__.py +++ b/pyiron_contrib/repair/__init__.py @@ -258,7 +258,8 @@ def fix_project( fixing = defaultdict(list) status_list = set([k[0] for k in self.shed.keys()]) job_ids = tqdm( - project.job_table(**kwargs).query("status.isin(@status_list)").id + project.job_table(**kwargs).query("status.isin(@status_list)").id, + desc="Repairing Jobs" ) for jid in job_ids: try: From 744b3a01464ed59d8dff9c2a0fb7e7f7cc665d81 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Oct 2024 11:37:36 +0200 Subject: [PATCH 3/8] Catch tar errors when dealing with timeout jobs --- pyiron_contrib/repair/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyiron_contrib/repair/__init__.py b/pyiron_contrib/repair/__init__.py index 609a74501..767190c7a 100644 --- a/pyiron_contrib/repair/__init__.py +++ b/pyiron_contrib/repair/__init__.py @@ -293,6 +293,11 @@ def fix_project( hopeless.append(job.id) except RepairError as e: failed[job.id] = e + except tarfile.ReadError as e: + TimeoutTool().fix_inplace(job, self) + except EOFError as e: + if e.args[0] == "Compressed file ended before the end-of-stream marker was reached": + TimeoutTool().fix_inplace(job, self) return ConstructionSite(fixing, hopeless, failed) From 52ef3cb5269e570f6169b1f2b83777e81faea06e Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Oct 2024 11:38:19 +0200 Subject: [PATCH 4/8] Update some errors messages for vasp6 --- pyiron_contrib/repair/__init__.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pyiron_contrib/repair/__init__.py b/pyiron_contrib/repair/__init__.py index 767190c7a..0c39ebca9 100644 --- a/pyiron_contrib/repair/__init__.py +++ b/pyiron_contrib/repair/__init__.py @@ -459,12 +459,10 @@ def match(self, job): " VERY BAD NEWS! internal error in subroutine PRICEL " "(probably precision problem, try to change SYMPREC in INCAR ?):", " VERY BAD NEWS! internal error in subroutine INVGRP:", - PartialLine( - "VERY BAD NEWS! internal error in subroutine POSMAP: symmetry" - ), - PartialLine( - "Inconsistent Bravais lattice types found for crystalline and" - ), + PartialLine("PRICELV: current lattice and primitive lattice are incommensurate"), + PartialLine("IBZKPT: not all point group operations associated with the symmetry"), + PartialLine("VERY BAD NEWS! internal error in subroutine POSMAP: symmetry"), + PartialLine("Inconsistent Bravais lattice types found for crystalline and"), PartialLine("Found some non-integer element in rotation matrix"), ], job, From 5ebb62702e1ba629e711b15581e8bfca8317729b Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Oct 2024 11:38:34 +0200 Subject: [PATCH 5/8] Change minimization ALGO on ZBRENT error --- pyiron_contrib/repair/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyiron_contrib/repair/__init__.py b/pyiron_contrib/repair/__init__.py index 0c39ebca9..8e43bf478 100644 --- a/pyiron_contrib/repair/__init__.py +++ b/pyiron_contrib/repair/__init__.py @@ -525,6 +525,8 @@ def fix(self, old_job, new_job): ediff = old_job.input.incar.get("EDIFF", 1e-4) if ediff > 1e-6: new_job.input.incar["EDIFF"] = 1e-6 + if old_job.input.incar.get("IBRION", 2) != 1: + new_job.input.incar["IBRION"] = 1 else: contcar = ase_to_pyiron(ase_read(str(old_job.files.CONTCAR))) # VASP manual recommend to copy CONTCAR to POSCAR, but if we From 8882a04dc170fb14e725991f1edb67c75e7cedf1 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Oct 2024 11:39:18 +0200 Subject: [PATCH 6/8] Generalize NCORE handling in Memory tool Old values were specific to cmti and could lead to inconsistent values with server.cores --- pyiron_contrib/repair/__init__.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pyiron_contrib/repair/__init__.py b/pyiron_contrib/repair/__init__.py index 8e43bf478..6d761c242 100644 --- a/pyiron_contrib/repair/__init__.py +++ b/pyiron_contrib/repair/__init__.py @@ -762,10 +762,13 @@ def fix(self, old_job, new_job): else: new_cores = old_job.server.cores new_job.server.cores = new_cores - if new_cores >= 40: - new_job.input.incar["NCORE"] = 20 - elif new_cores >= 20: - new_job.input.incar["NCORE"] = 10 + old_ncore = old_job.input.incar.get("NCORE", 1) + if old_ncore > 1: + # keep NCORE below smallest node size on our cluster, so that wave + # info is kept in one cache + new_job.input.incar["NCORE"] = min(old_ncore*2, 40) + else: + new_job.input.incar["NCORE"] = int(new_cores // 2) applicable_status = ("aborted",) From bf6148485e59a738a1ea5dd652fcb44ec9c8086b Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 10 Oct 2024 11:40:02 +0200 Subject: [PATCH 7/8] Expand electronic convergence tool and use it by default Apply the MetaGGA tool at most once. --- pyiron_contrib/repair/__init__.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/pyiron_contrib/repair/__init__.py b/pyiron_contrib/repair/__init__.py index 6d761c242..54fcf0e93 100644 --- a/pyiron_contrib/repair/__init__.py +++ b/pyiron_contrib/repair/__init__.py @@ -948,28 +948,33 @@ def fix(self, old_job, new_job): class VaspElectronicConvergenceTool(VaspTool): - def __init__(self, factor=2, max_steps=200, reset_ediff=None, **kwargs): + def __init__(self, factor=2, max_steps=200, reset_ediff=None, reset_algo=None, **kwargs): + super().__init__(**kwargs) self.factor = factor self.max_steps = max_steps self.reset_ediff = reset_ediff + self.reset_algo = reset_algo def match(self, job): ef = job.content["output/generic/dft/scf_energy_free"] n = job.input.incar.get("NELM", 60) electronically_converged = all(len(l) < n for l in ef) - return ( - super().match(job) and n < self.max_steps and not electronically_converged - ) + try_fix = n < self.max_steps + if self.reset_ediff is not None: + try_fix |= job.input.incar.get("EDIFF") < self.reset_ediff + if self.reset_algo is not None: + try_fix |= job.input.incar.get("ALGO", "Fast") != self.reset_algo + return super().match(job) and try_fix and not electronically_converged def fix(self, old_job, new_job): super().fix(old_job, new_job) - new_job.input.incar["NELM"] = old_job.input.incar("NELM", 60) * self.max_factor + new_job.input.incar['NELM'] = old_job.input.incar.get('NELM', 60) * self.factor if self.reset_ediff is not None: - new_job.input.incar["EDIFF"] = max( - old_job.input.incar.get("EDIFF"), self.reset_ediff - ) + new_job.input.incar["EDIFF"] = max(old_job.input.incar.get("EDIFF"), self.reset_ediff) + if self.reset_algo is not None: + new_job.input.incar["ALGO"] = self.reset_algo - applicable_status = ("not_converged", "aborted") + applicable_status = ("not_converged",) class VaspMetaGGAElectronicConvergenceTool(VaspTool): @@ -988,6 +993,11 @@ def __init__(self, *args, reset_ediff=None, **kwargs): self.reset_ediff = reset_ediff def match(self, job): + try: + if job.content['user/handyman/last'] == type(self).__name__: + return False + except KeyError: + pass def electronically_converged(job): ef = job.content["output/generic/dft/scf_energy_free"] n = job.input.incar.get("NELM", 60) @@ -1033,4 +1043,5 @@ def fix(self, old_job, new_job): VaspNbandsTool(1.5), VaspMinimizeStepsTool(2), VaspEddrmmTool(), + VaspElectronicConvergenceTool(reset_algo="Normal"), ] From e80cb7e62c987d7a5d5ad43a85317a4320ff68f1 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Thu, 10 Oct 2024 09:49:17 +0000 Subject: [PATCH 8/8] Format black --- pyiron_contrib/repair/__init__.py | 44 ++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/pyiron_contrib/repair/__init__.py b/pyiron_contrib/repair/__init__.py index 54fcf0e93..cb62bcb04 100644 --- a/pyiron_contrib/repair/__init__.py +++ b/pyiron_contrib/repair/__init__.py @@ -143,8 +143,10 @@ class HandyMan: @deprecate(suppress_fix_errors="Use suppress_errors instead") def __init__( - self, tools: Union[None, Iterable[RepairTool]] = None, - suppress_errors=True, suppress_fix_errors=True + self, + tools: Union[None, Iterable[RepairTool]] = None, + suppress_errors=True, + suppress_fix_errors=True, ): self.shed = defaultdict(list) self._suppress_errors = suppress_fix_errors and suppress_errors @@ -259,7 +261,7 @@ def fix_project( status_list = set([k[0] for k in self.shed.keys()]) job_ids = tqdm( project.job_table(**kwargs).query("status.isin(@status_list)").id, - desc="Repairing Jobs" + desc="Repairing Jobs", ) for jid in job_ids: try: @@ -296,7 +298,10 @@ def fix_project( except tarfile.ReadError as e: TimeoutTool().fix_inplace(job, self) except EOFError as e: - if e.args[0] == "Compressed file ended before the end-of-stream marker was reached": + if ( + e.args[0] + == "Compressed file ended before the end-of-stream marker was reached" + ): TimeoutTool().fix_inplace(job, self) return ConstructionSite(fixing, hopeless, failed) @@ -459,10 +464,18 @@ def match(self, job): " VERY BAD NEWS! internal error in subroutine PRICEL " "(probably precision problem, try to change SYMPREC in INCAR ?):", " VERY BAD NEWS! internal error in subroutine INVGRP:", - PartialLine("PRICELV: current lattice and primitive lattice are incommensurate"), - PartialLine("IBZKPT: not all point group operations associated with the symmetry"), - PartialLine("VERY BAD NEWS! internal error in subroutine POSMAP: symmetry"), - PartialLine("Inconsistent Bravais lattice types found for crystalline and"), + PartialLine( + "PRICELV: current lattice and primitive lattice are incommensurate" + ), + PartialLine( + "IBZKPT: not all point group operations associated with the symmetry" + ), + PartialLine( + "VERY BAD NEWS! internal error in subroutine POSMAP: symmetry" + ), + PartialLine( + "Inconsistent Bravais lattice types found for crystalline and" + ), PartialLine("Found some non-integer element in rotation matrix"), ], job, @@ -766,7 +779,7 @@ def fix(self, old_job, new_job): if old_ncore > 1: # keep NCORE below smallest node size on our cluster, so that wave # info is kept in one cache - new_job.input.incar["NCORE"] = min(old_ncore*2, 40) + new_job.input.incar["NCORE"] = min(old_ncore * 2, 40) else: new_job.input.incar["NCORE"] = int(new_cores // 2) @@ -948,7 +961,9 @@ def fix(self, old_job, new_job): class VaspElectronicConvergenceTool(VaspTool): - def __init__(self, factor=2, max_steps=200, reset_ediff=None, reset_algo=None, **kwargs): + def __init__( + self, factor=2, max_steps=200, reset_ediff=None, reset_algo=None, **kwargs + ): super().__init__(**kwargs) self.factor = factor self.max_steps = max_steps @@ -968,9 +983,11 @@ def match(self, job): def fix(self, old_job, new_job): super().fix(old_job, new_job) - new_job.input.incar['NELM'] = old_job.input.incar.get('NELM', 60) * self.factor + new_job.input.incar["NELM"] = old_job.input.incar.get("NELM", 60) * self.factor if self.reset_ediff is not None: - new_job.input.incar["EDIFF"] = max(old_job.input.incar.get("EDIFF"), self.reset_ediff) + new_job.input.incar["EDIFF"] = max( + old_job.input.incar.get("EDIFF"), self.reset_ediff + ) if self.reset_algo is not None: new_job.input.incar["ALGO"] = self.reset_algo @@ -994,10 +1011,11 @@ def __init__(self, *args, reset_ediff=None, **kwargs): def match(self, job): try: - if job.content['user/handyman/last'] == type(self).__name__: + if job.content["user/handyman/last"] == type(self).__name__: return False except KeyError: pass + def electronically_converged(job): ef = job.content["output/generic/dft/scf_energy_free"] n = job.input.incar.get("NELM", 60)