Skip to content

Commit

Permalink
Merge branch 'main' into 311-workflow-execution-record-in-database-is…
Browse files Browse the repository at this point in the history
…-missing-ended_at_time
  • Loading branch information
mbthornton-lbl committed Dec 4, 2024
2 parents 0a6ea07 + 270afce commit 6d2e06e
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions nmdc_automation/workflow_automation/watch_nmdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def process_failed_job(self, job: WorkflowJob) -> Optional[str]:
job.workflow.state["failed_count"] = job.workflow.state.get("failed_count", 0) + 1
job.workflow.state["last_status"] = job.job_status
self.save_checkpoint()
logger.info(f"Job {job.opid} failed {job.workflow.state['failed_count']} times. Retrying.")
logger.warning(f"Job {job.opid} failed {job.workflow.state['failed_count']} times. Retrying.")
jobid = job.job.submit_job()
return jobid

Expand Down Expand Up @@ -320,7 +320,7 @@ def update_operation(self, opid, done, meta):
class Watcher:
""" Watcher class for monitoring and managing jobs """
def __init__(self, site_configuration_file: Union[str, Path], state_file: Union[str, Path] = None):
self._POLL = 60
self._POLL_INTERVAL_SEC = 60
self._MAX_FAILS = 2
self.should_skip_claim = False
self.config = SiteConfig(site_configuration_file)
Expand Down Expand Up @@ -395,7 +395,7 @@ def watch(self):
self.cycle()
except (IOError, ValueError, TypeError, AttributeError) as e:
logger.exception(f"Error occurred during cycle: {e}", exc_info=True)
sleep(self._POLL)
sleep(self._POLL_INTERVAL_SEC)

def claim_jobs(self, unclaimed_jobs: List[WorkflowJob] = None) -> None:
""" Claim unclaimed jobs, prepare them, and submit them. Write a checkpoint after claiming jobs. """
Expand Down

0 comments on commit 6d2e06e

Please sign in to comment.