diff --git a/.travis.yml b/.travis.yml index acff49a..9dfffe0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,44 @@ matrix: include: - - os: linux + - os: linux + language: perl + perl: "5.22" + install: + - which cpanm + - cpanm -nq --installdeps . + - perl Build.PL + - perl ./Build + - perl ./Build test + - perl ./Build install + script: + - hpcrunner.pl submit_jobs -h + - hpcrunner.pl execute_job -h + - hpcrunner.pl execute_array -h + - hpcrunner.pl stats -h + - hpcrunner.pl new -h + #script: ci-tests/slurm/travis-setup.sh; ci-tests/slurm/travis-run.sh # slurm testing + - os: osx language: c - sudo: required - services: - - docker - script: ci-tests/slurm/travis-setup.sh; ci-tests/slurm/travis-run.sh # slurm testing - - os: osx - language: c - script: ci-tests/run-travis-perl-osx.sh #osx testing + install: + - wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh + - bash miniconda.sh -b -p $HOME/miniconda + - export PATH="$HOME/miniconda/bin:$PATH" + - hash -r + - conda config --set always_yes yes --set changeps1 no + - conda config --add channels conda-forge + - conda config --add channels bioconda + - conda update -q conda + - conda info -a + - conda create -q -n build perl perl-app-cpanminus + - source activate build + - cpanm -nq --installdeps . + - perl Build.PL + - perl ./Build + - perl ./Build test + - perl ./Build install + script: + - hpcrunner.pl submit_jobs -h + - hpcrunner.pl execute_job -h + - hpcrunner.pl execute_array -h + - hpcrunner.pl stats -h + - hpcrunner.pl new -h diff --git a/Changes b/Changes index 26d3889..683e68b 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,7 @@ Revision history for HPC-Runner-Command {{$NEXT}} + - added back in stats command 3.2.11 2017-09-11 13:50:17 GST - Milestones - https://github.com/biosails/HPC-Runner-Command/milestone/3 diff --git a/META.json b/META.json index 7dca4a5..1627d63 100644 --- a/META.json +++ b/META.json @@ -58,6 +58,7 @@ "File::Basename" : "0", "File::Copy" : "0", "File::Find::Rule" : "0", + "File::Glob" : "0", "File::Path" : "0", "File::Slurp" : "0", "File::Spec" : "0", @@ -117,6 +118,7 @@ "Test::Class::Moose" : "0", "Test::Class::Moose::Load" : "0", "Test::Class::Moose::Runner" : "0", + "Test::Exception" : "0.43", "Test::More" : "0", "Text::Diff" : "0", "strict" : "0", diff --git a/README.md b/README.md index c1dd97f..315ccde 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Get help by heading on over to github and raising an issue. [GitHub ](https://me https:#github.com-biosails-HPC-Runner-Command-issues). Please see the complete documentation at [HPC::Runner::Command GitBooks ](https://metacpan.org/pod/ -https:#jerowe.gitbooks.io-hpc-runner-command-docs-content). +https:#biosails.gitbooks.io-hpc-runner-command-docs-content). # Quick Start - Create a New Project @@ -136,11 +136,11 @@ Within a job type we can declare dependencies on particular tasks. Each scheduler has its own set of variables. HPC::Runner::Command has a set of generalized variables for declaring types across templates. For more information please see [Job Scheduler -Comparison](https://jerowe.gitbooks.io/hpc-runner-command-docs/content/job_submission/comparison.html) +Comparison](https://biosails.gitbooks.io/hpc-runner-command-docs/content/job_submission/comparison.html) Additionally, for workflows with a large number of tasks, please see [Considerations for Workflows with a Large Number of -Tasks](https://jerowe.gitbooks.io/hpc-runner-command-docs/content/design_workflow.html#considerations-for-workflows-with-a-large-number-of-tasks) +Tasks](https://biosails.gitbooks.io/hpc-runner-command-docs/content/design_workflow.html#considerations-for-workflows-with-a-large-number-of-tasks) for information on how to group tasks together. ### Workflow file diff --git a/_docs/README.md b/_docs/README.md deleted file mode 100644 index 0304c81..0000000 --- a/_docs/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# HPC-Runner-Command - -![HPC::Runner::Command](images/rabbit.jpeg) - -Create composable bioinformatics hpc analyses. - -While the code is mostly stable, the documentation is still under heavy development. - -# SYNOPSIS - -To create a new project - - hpcrunner.pl new MyNewProject - -To submit jobs to a cluster - - hpcrunner.pl submit_jobs - -To run jobs on an interactive queue or workstation - - hpcrunner.pl execute_job - -# DESCRIPTION - -HPC::Runner::Command is a set of libraries for scaffolding data analysis projects, -submitting and executing jobs on an HPC cluster or workstation, and obsessively -logging results. - -Please see the complete documentation at [https://jerowe.gitbooks.io/hpc-runner-command-docs/content/](https://jerowe.gitbooks.io/hpc-runner-command-docs/content/) diff --git a/_docs/SUMMARY.md b/_docs/SUMMARY.md deleted file mode 100644 index 7e66cef..0000000 --- a/_docs/SUMMARY.md +++ /dev/null @@ -1,20 +0,0 @@ -# Summary - -* [Overview](overview.md) -* [Quick Start](quick_start.md) -* [Why use HPC::Runner::Command](why.md) -* [Changes](changes.md) -* [Glossary of Terms](glossary.md) -* [Job Submission](job_submission/job_submission.md) - * [Scheduler Comparison](job_submission/comparison.md) - * [Job Submission Parameters](job_submission/hpcrunner_submission_parameters.md) - * [Job Submission Dependencies](job_submission/dependencies.md) -* [Job Execution](job_execution/job_execution.md) -* [Designing your Workflows](design_workflow.md) -* [Examples](examples/examples.md) - * [Example 01 - Simple](examples/example_01.md) - * [Example 02 - Multiple Nodes](examples/example_02.md) - * [Example 03 - Linear Workflow](examples/example_03.md) - * [Example 04 - Nested Workflow](examples/example_04.md) - * [Example 04 - Task Dependencies](examples/example_05.md) -* [Acknowledgements](acknowledgements.md) diff --git a/_docs/Usage.pod b/_docs/Usage.pod deleted file mode 100644 index b387c71..0000000 --- a/_docs/Usage.pod +++ /dev/null @@ -1,327 +0,0 @@ -=head1 Name - -HPC::Runner::Usage - -=head1 HPC-Runner-Scheduler - -=head2 Overview - -The HPC::Runner modules are wrappers around running Gnu Parallel, -Parallel::ForkManager, MCE/MCE::Queue, and job submission to a Slurm or PBS -queue - -=head2 Submit your commands - - ##Submit a job to slurm to spread across nodes - slurmrunner.pl --infile /path/to/fileofcommands --outdir slurmoutput --jobname slurmjob - - ##Run in parallel using MCE on a single node - mcerunner.pl --infile /path/to/fileofcommands --outdir threadsoutput --procs 4 - -=head3 Run Your Command - -The idea behind the HPC::Runner modules is to be able to run arbitrary -bash with proper logging, catching STDOUT/ERROR and exit status, and -when possible to run jobs in parallel with some job flow. - -The modules are written with Moose, and can be overwritten and -extended. - -Logging is done with Log::Log4perl. - -HPC::Runner is a base class thas has the variables common among -HPC::Runner::Threads, HPC::Runner::MCE, and HPC::Runner::Slurm. All -three modules have use a similar philosophy, but different technologies -to implement it. For myself this was a workaround so I didn't have to -learn to write MPI scripts, or have every tool be written into some -sort of workflow manager. - -The different runners each come with an executable script that should -be installed in your path: mcerunner.pl, parallelrunner.pl, and -slurmrunner.pl. - -=head1 An Indepth Look - -=head2 Single Node Execution - -If you only have a single node to execute on, but still have many -threads/processes available, you can use HPC::Runner::MCE or -HPC::Runner:Threads. Both have job logging and workflow control. - -=head2 Example for Runner::Threads and Runner::MCE - -An example infile would contain any command that can be executed from -the command line. All the modules have a basic level of workflow -management, meaning you can use the command 'wait' to wait for all -other threads/processes to finish. - -In the example directory there is a script calledE -testioselect.pl. It is 100% from a thread on perlmonks discussing the -proper use of IPC::Open3 found here.E -http://www.perlmonks.org/?node_id=151886. I based all the usage of -running bash commands from the user abstract's post, only adding in the -parts for logging. - -You could create a test_threads/mce.in with the following. - - test_threads/mce.in - -It is ALWAYS a good idea to put full paths when running arbitrary -scripts. Jobs will always be run from the your current working directory, but its still a good idea! - -And submit that to the Runner::MCE/Threads with the following. - - #Using MCE - - mcerunner.pl --infile test_mce.in --outdir `pwd`/test --procs 4 - - # OR - # Using Parallel::Forkmanager - - parallelrunner.pl --infile test_mce.in --outdir `pwd`/test --procs 4 - -Which would generate you the the test directory, and logs for the -commands detailing STDOUT/STDERR, time and date, and run those commands -4 threads/processes at a time. - -Each command gets its own log file, as well as a MAIN log file to -detail how the job is running overall. - -=head3 Trouble Shooting mcerunner and parallelrunner - -First of all, make sure your jobs run without the wrapper script. -Runner::Threads/MCE only makes sure your threads/processes start. It -does not make sure your jobs exit successfully, but the exitcode will -be in your log. - -View your logs in -outdir/prunnerIdatetimeIdatetime. This will -give you the STDOUT/STDERR. - -=head3 Full Path Names - -Please give all your commands, infiles, and -outdirectories the full path names. If you are executing arbitrary -script you should give either the full path name or the path should be -in your ENV{PATH}. HPC::Runner::Init will do some guessing on the infile and -outdir parameters using File::Spec, but this is no guarantee! - -If you are using Runner::Threads your perl must be installed with -thread capabilities. - -=head2 MultiNode Job Submission - -This documentation was initially written for Slurm, but for the most part the -concepts and requirements are the same across schedulers (Slurm, PBS, SGE, etc). - -=head2 HPC::Runner::Slurm Example - -HPC::Runner::Slurm adds another layer to HPC::Runner::MCE or -HPC::Runner::Threads by submitting jobs to the queing system Slurm. -https://computing.llnl.gov/linux/slurm/. Slurm submits its jobs to -different machines, or nodes, across a cluster. It is common for many -users sharing the same space. - -When I was first using slurm I wanted something that would -automatically distribute my jobs across the cluster in a way that would -get them done reasonably quickly. Most of the jobs being submitted were -'embarassingly parallel' and did not require much of the fine tuning -slurm is capable of. For most jobs what we wanted to be able to do was -take a list of jobs, chop them into pieces, take each piece and send it -to a node, and then on that node run those jobs in parallel. - -=head3 alljobs.in - - job1 - job2 - job3 - job4 - # Lets tell mcerunner/parallelrunner/slurmrunner.pl to execute jobs5-8 AFTER jobs1-4 have completed - # wait - job5 - job6 - job7 - job8 - - -What I want is for Slurm to take 4 jobs at a time, submit those to a -node. I don't want to do this all manually for obvious reasons. - -=head3 Slurm Template - - #!/bin/bash - #SBATCH --share - #SBATCH --get-user-env - #SBATCH --job-name=alljobs_batch1 - #SBATCH --output=batch1.out - #SBATCH --partition=bigpartition - #SBATCH --nodelist=node1onbigpartion - - #Here are the jobs! - job1 - job2 - job3 - job4 - - -Ok, I don't really want that. I want all the logging, and since those -jobs don't depend on one another I want to run them all in parallel. -Because that is what HPC is all about. ;) So I run this command instead -that uses the script that comes with Runner::Slurm. - - slurmrunner.pl --infile pwd/alljobs.in --jobname alljobs --outdir pwd/alljobs - -And have the following template files created and submitted to the -queue. - -Although it is not required to supply a jobname or an outdir, it is -strongly recommended especially if you are submitting multiple jobs. - - -=head3 Slurm Template with Batched Job - - #!/bin/bash - ##alljobs_batch1.sh - #SBATCH --share - #SBATCH --get-user-env - #SBATCH --job-name=alljobs_batch1 - #SBATCH --output=batch1.out - #SBATCH --partition=bigpartition - #SBATCH --nodelist=node1onbigpartion - #SBATCH --cpus-per-task=4 - # - # #Take out jobs, batch them out to a node, and run them in parallel - # mcerunner.pl --infile batch1.in --procs 4 --outdir /outdir/we/set/in/slurmrunner.pl - - -Where batch1.in contains our jobs1-4. The number that is in ---cpus-per-task should be greater than or equal to the maximum number -of threads/processes that are run in parallel (procs). The default -values in HPC::Runner::Slurm are fine, but if you change them make sure -you stick with that rule. - -This template and batch1.in is generated by the command and is -submitted with the slurmjobid 123. - -Then the next job batch is generated as alljobs_batch2.sh, and we tell -slurm we want for it to be submitted after jobs1,2,3,4 exit -successfully. - - -=head3 Slurm Template with Dependency - - #!/bin/bash - ##alljobs_batch2.sh - #SBATCH --share - #SBATCH --get-user-env - #SBATCH --job-name=alljobs_batch2 - #SBATCH --output=batch2.out - #SBATCH --partition=bigpartition - #SBATCH --nodelist=node2onbigpartion - #SBATCH --cpus-per-task=4 - # - # #Don't start this job until 123 submits successfully - # SBATCH --dependency=afterok:123 - # - # mcerunner.pl --infile batch2.in --procs 4 --outdir /outdir/we/set/in/slurmrunner.pl - -=head2 Customizing HPC::Runner::Slurm Input - -Since the HPC::Runner modules are written in Moose, they can be -overridden and extended in the usual fashion. Logging is done with -Log4Perl, so any of the appenders can be used. The default is to log to -files, but what if you want to log to rsyslog or a database? - -=head3 Extend slurmrunner.pl to add your own custom loggin - - - #!/usr/bin/env perl - #slurmrunner_rsyslog.pl - - package Main; - use Moose; - - extends 'HPC::Runner::Slurm'; - - - use Log::Log4perl; - - #Lets override init_log with our own subroutine... - - sub init_log { - my $self = shift; - my $filename = $self->logdir."/".$self->logfile; - my $log_conf =<get_logger(); - return $log; - }; - - Main->new_with_options->run; - - 1; - - -=head2 Trouble Shooting Runner::Slurm - -Make sure your paths are sourced correctly for slurm. The easiest way -to do this is add all your paths to your ~/.bashrc, source it, and add -the line - - #SBATCH --get-user-env> - -to your submit script. By default this is already placed in the -template, but if you decide to supply your own template you may want to -add it. - -If you are submitting a script that is not in your path, you probably -want to give the full pathname for it, especially if supplying the -outdir option. In general I think its always best to give the full -pathname. - -If you are in the directory already and submitting from bash, just use -backticks around pwd. - -Another common error is 'This node configuration is not available'. -This could mean several things. - - 1. The node is down at the time of job submission - 2. You are asking for more resources on a node than it has. If you ask for --cpus-per-task=32 and the node only has 16 cpus, you will get this error. - 3. You misspelled the partition or nodename. - -Point 2 will be improved upon in the next release so it queries slurm -for the number of cpus available on a node at the time of submission. -For now it must be manually set with --cpus-per-task - -=head2 Authors and Contributors - -Jillian Rowe in collaboration with the ITS Advanced Computing Team at -Weill Cornell Medical College in Qatar. - -=head2 Acknowledgements - -This module was originally developed at and for Weill Cornell Medical -College in Qatar. With approval from WCMC-Q, this information was -generalized and put on github, for which the authors would like to -express their gratitude. Also to all the HPC users at WCMCQ, who all -gave their input. - -The continued development of the HPC::Runner modules is supported by NYUAD, at -the Center for Genomics and Systems Biology. - -=cut diff --git a/_docs/acknowledgements.md b/_docs/acknowledgements.md deleted file mode 100644 index ac9ef2b..0000000 --- a/_docs/acknowledgements.md +++ /dev/null @@ -1,15 +0,0 @@ -# Acknowledgements - -h2. As of Version 2.41 - -This modules continuing development is supported by NYU Abu Dhabi in the Center -for Genomics and Systems Biology. With approval from NYUAD, this information -was generalized and put on github, for which the authors would like to -express their gratitude. - -h2. Before Version 2.41 - -This module was originally developed at and for Weill Cornell Medical College -in Qatar within ITS Advanced Computing Team. With approval from WCMC-Q, this -information was generalized and put on github, for which the authors would like -to express their gratitude. diff --git a/_docs/changes.md b/_docs/changes.md deleted file mode 100644 index 1cba196..0000000 --- a/_docs/changes.md +++ /dev/null @@ -1,20 +0,0 @@ -## Changes from 2.0 - -### New (and hopefully clearer!) Syntax - -Instead of calling various shell scripts (slurmrunner.pl for slurm submission, pbsrunner.pl for pbs submission, mcerunner.pl for job execution) now call a single script with commands. - -``` -hpcrunner.pl new ProjectName -hpcrunner.pl submit_jobs --infile submission_file -hpcrunner.pl execute_job --infile job_file -``` - - -### Nested workflow submission - -Previously workflows could only have linear dependencies, with each job depending upon the previous. Now jobs can depend upon any job in the workflow. - -### Git versioning of job runs - -If chosen each run can be a git version. You can use all the usual git tools to track differences between job submissions, archive certain submissions, create branches of analyses, etc. diff --git a/_docs/design_workflow.md b/_docs/design_workflow.md deleted file mode 100644 index 20c3fea..0000000 --- a/_docs/design_workflow.md +++ /dev/null @@ -1,131 +0,0 @@ -# Overview - -When designing a workflow you should keep several key components in mind. - -1. Job computation requirements should be explicitly and verbosely stated. -2. Jobs should be grouped by their order in an analysis and computational requirements. -3. Tasks should be broken down to their smallest components. -4. Job dependencies must be verbosely stated. - -The reasoning behind this is that each job type will have similar computation requirements. - -``` -#HPC jobname=qc -#HPC procs=1 -#HPC commands_per_node=1 -#HPC ntasks=1 -#HPC cpus_per_task=2 - -qc --threads 2 Sample1 -qc --threads 2 Sample2 -qc --threads 2 Sample3 -.. -qc --threads 2 Sample16 - -#HPC jobname=gzip -#HPC procs=1 -#HPC commands_per_node=1 -#HPC ntasks=1 -#HPC cpus_per_task=1 - -gzip Sample1_results -gzip Sample2_results -.. -gzip Sample16_results -``` - -Let's break this down job by job. - -## Jobname qc - Submission - -Each QC is a multithreaded process. #HPC procs=N is the number of tasks that -can be run in parallel. Our qc tasks are each running in 2 threads, so #HPC -procs= $commands_per_node/$procs = 8. In this way we make the most efficient -use of the cluster resources. - -## Jobname gzip - Subission - -Gzip runs in a single thread, and therefore we have cpus_per_task=1 - -## Workflow Submission - -``` -hpcrunner.pl submit_jobs --infile my_submission -``` - -Would produce 001_qc.sh, 001_qc.in, 002_gzip.sh, and 002_gzip.in - -## Jobname qc - Execution - -Once submitted to the scheduler the QC job would be executed as - -``` -hpcrunner.pl execute_job --infile 001_qc.in --procs 1 -``` - -Telling our job runner to execute 1 task (qc --threads 2 Sample_N) per batch, -with the amount of concurrent tasks being handled by the resources of the -computational environment. - -## Jobname gzip - Execution - -While the gzip job would be executed as - -``` -hpcrunner.pl execute_job --infile 001_gzip.in --procs 1 -``` - -Telling our job runner to execute 16 jobs within our infile in parallel at a -time. - -More information on HPC-Runner-Command parameters is available at: SOMELINKE - - -## Considerations for workflows with a large number of tasks - -Above, ntasks, commands_per_node and procs are always 1. - -When submitting a large number of tasks, 1000+ (or different depending on the limits set by the scheduler) chances are we want to batch these differently. - -For instance, lets say we are submitting thousands of blast jobs, with each task taking 1 hour to complete. - -Our slurm configuration would look like this - -``` -#HPC commands_per_node=10 -#HPC walltime=05:00:00 -#HPC procs=2 -#HPC ntasks=2 -#HPC cpus_per_task=6 -blastx --threads 6 --db env_nr --infile Sample1.fasta -blastx --threads 6 --db env_nr --infile Sample2.fasta -blastx --threads 6 --db env_nr --infile Sample3.fasta -blastx --threads 6 --db env_nr --infile Sample4.fasta -blastx --threads 6 --db env_nr --infile Sample5.fasta -... -blastx --threads 6 --db env_nr --infile Sample1000.fasta -``` - -And our PBS configuration would look like this - -``` -#HPC commands_per_node=10 -#HPC walltime=05:00:00 -#HPC procs=2 -#HPC cpus_per_task=12 -blastx --threads 6 --db env_nr --infile Sample1.fasta -blastx --threads 6 --db env_nr --infile Sample2.fasta -blastx --threads 6 --db env_nr --infile Sample3.fasta -blastx --threads 6 --db env_nr --infile Sample4.fasta -blastx --threads 6 --db env_nr --infile Sample5.fasta -... -blastx --threads 6 --db env_nr --infile Sample1000.fasta -``` - -With both SLURM and PBS we are submitting 10 tasks to a node and running 2 -tasks concurrently with 6 threads per task. Slurm handles this a bit -differently than PBS. With SLURM you give the number of concurrent tasks, -ntasks, in this case 2, and the number of cpus in a given task, in this case 6. -With PBS you leave out the ntasks, and just have the cpus_per_task as the -number of concurrent tasks * the number of threads per task. This is only a -consideration when grouping tasks. diff --git a/_docs/examples/example_01.md b/_docs/examples/example_01.md deleted file mode 100644 index 523e5c4..0000000 --- a/_docs/examples/example_01.md +++ /dev/null @@ -1,162 +0,0 @@ -# Example 01 - Simple - -The simplest example is one job type, where each task has a single thread. - -``` -#Filename - preprocess.sh -#HPC jobname=preprocess -#HPC commands_per_node=1 -``` - -Here is a birds eye view of a simple example. - -### Submission and output directory structure - -![Example 01-1](../images/HPC_Runner_Diagrams_-_Example_01-1.jpg) - - -### Job execution - -![Example 01-2](../images/HPC_Runner_Diagrams_-_Example_01-2.jpg) - - -## Examine the output - -You can examine the [Full Jupyterhub Notebook](https://jerowe.gitbooks.io/hpc-runner-command-docs/content/examples/example_01_jupyterhub.html) notebook here, and it is included below. - - - -## HPC Runner Submission - -### Submission file - - -```python -%%bash - -cat example_001/preprocess.sh -``` - - #HPC jobname=preprocess - #HPC commands_per_node=1 - #HPC walltime=00:00:30 - #HPC module=gencore/1 gencore_dev - echo "preprocess sample1" && sleep 30 - echo "preprocess sample2" && sleep 30 - echo "preprocess sample3" && sleep 30 - echo "preprocess sample4" && sleep 30 - echo "preprocess sample5" && sleep 30 - echo "preprocess sample6" && sleep 30 - - -### Submit to Slurm - -``` -hpcrunner.pl submit_jobs --infile preprocess.sh -``` - -``` -[2016/11/03 08:50:03] Beginning to submit jobs to the scheduler -[2016/11/03 08:50:03] Schedule is preprocess -[2016/11/03 08:50:03] Submitting all preprocess job types -[2016/11/03 08:50:04] Submitted batch job 23162 - -[2016/11/03 08:50:04] Submited job /scratch/gencore/nov_dalma_training/example_001/hpc-runner/scratch/001_preprocess.sh - With Slurm jobid 23162 -[2016/11/03 08:50:04] There are 6 batches for job type preprocess -``` - -## Directory Structure - - -```python -%%bash - -tree example_001 -``` - - example_001 - ├── hpc-runner - │   ├── logs - │   │   ├── 2016-11-03-001_preprocess - │   │   │   ├── 2016-11-03-CMD_001-PID_13774.md - │   │   │   ├── 2016-11-03-CMD_002-PID_13778.md - │   │   │   ├── 2016-11-03-CMD_003-PID_13776.md - │   │   │   ├── 2016-11-03-CMD_004-PID_16555.md - │   │   │   ├── 2016-11-03-CMD_005-PID_16553.md - │   │   │   ├── 2016-11-03-CMD_006-PID_16551.md - │   │   │   └── MAIN_2016-11-03.log - │   │   └── 2016-11-03-hpcrunner_logs - │   │   ├── 001_preprocess.log - │   │   └── 001-process_table.md - │   └── scratch - │   ├── 001_preprocess_001.in - │   ├── 001_preprocess_002.in - │   ├── 001_preprocess_003.in - │   ├── 001_preprocess_004.in - │   ├── 001_preprocess_005.in - │   ├── 001_preprocess_006.in - │   └── 001_preprocess.sh - └── preprocess.sh - - 5 directories, 17 files - - -## Task Log Output - -Each individual task gets its own output file. The structure is date of submission, jobtype, date of executition, task count, and processID. - - -```python -%%bash - -cat example_001/hpc-runner/logs/2016-11-03-001_preprocess/2016-11-03-CMD_001-PID_13774.md -``` - - 2016/11/03 08:50:11: INFO Starting Job: 1 - Cmd is echo "preprocess sample1" && sleep 30 - 2016/11/03 08:50:11: INFO preprocess sample1 - 2016/11/03 08:50:41: INFO Finishing job 1 with ExitCode 0 - 2016/11/03 08:50:41: INFO Total execution time 0 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds - - -## Slurm Log Output - -Additionally, all output from the scheduler is logged. This is useful when debugging submissions. If, for instance, we had mistyped a module name, submitted to the wrong queue, or requested impossible resources, this would be recorded here. - - -```python -%%bash - -cat example_001/hpc-runner/logs/2016-11-03-hpcrunner_logs/001_preprocess.log -``` - - Module 'gencore/1' is already loaded - Module 'gencore/1' is already loaded - Module 'gencore/1' is already loaded - Module 'gencore/1' is already loadedModule 'gencore/1' is already loadedModule 'gencore/1' is already loaded - -## Process Table Output - -The process table is a table for the whole job. It records version ( more on this later), the Slurm scheduler ID, the Slurm jobname, any task tags, and process pid, the exit code and the duration. - -It is highly recommended to record this table in a project management tool. - - -```python -%%bash - -ls example_001/hpc-runner/logs/2016-11-03-hpcrunner_logs/001-process_table.md -``` - - example_001/hpc-runner/logs/2016-11-03-hpcrunner_logs/001-process_table.md - - -|Version | Scheduler Id | Jobname | Task Tags | ProcessID | ExitCode | Duration | -| --- | --- | --- | --- | --- | --- | --- | -|0.0|23162|001_preprocess||16551|0|0 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds| -|0.0|23167|001_preprocess||16553|0|0 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds| -|0.0|23163|001_preprocess||13774|0|0 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds| -|0.0|23166|001_preprocess||16555|0|0 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds| -|0.0|23165|001_preprocess||13776|0|0 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds| -|0.0|23164|001_preprocess||13778|0|0 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds| diff --git a/_docs/examples/example_01_jupyterhub.html b/_docs/examples/example_01_jupyterhub.html deleted file mode 100644 index 2622b5f..0000000 --- a/_docs/examples/example_01_jupyterhub.html +++ /dev/null @@ -1,577 +0,0 @@ - - - -Example 001 - - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-
-
-

HPC Runner Submission

-
-
-
-
-
-
-
-
-

Submission file

-
-
-
-
-
-
In [12]:
-
-
-
%%bash
-
-cat example_001/preprocess.sh
-
- -
-
-
- -
-
- - -
-
-
#HPC jobname=preprocess
-#HPC commands_per_node=1
-#HPC walltime=00:00:30
-#HPC module=gencore/1 gencore_dev
-echo "preprocess sample1" && sleep 30 
-echo "preprocess sample2" && sleep 30 
-echo "preprocess sample3" && sleep 30
-echo "preprocess sample4" && sleep 30
-echo "preprocess sample5" && sleep 30
-echo "preprocess sample6" && sleep 30
-
-
-
- -
-
- -
-
-
-
-
-
-

Submit to Slurm

-
-
-
-
-
-
-
-
- -
hpcrunner.pl submit_jobs --infile preprocess.sh
- -
-
-
-
-
-
-
-
- -
[2016/11/03 08:50:03] Beginning to submit jobs to the scheduler 
-[2016/11/03 08:50:03] Schedule is preprocess 
-[2016/11/03 08:50:03] Submitting all preprocess job types 
-[2016/11/03 08:50:04] Submitted batch job 23162
-
-[2016/11/03 08:50:04] Submited job /scratch/gencore/nov_dalma_training/example_001/hpc-runner/scratch/001_preprocess.sh
-        With Slurm jobid 23162 
-[2016/11/03 08:50:04] There are 6 batches for job type preprocess
- -
-
-
-
-
-
-
-
-

Directory Structure

-
-
-
-
-
-
In [7]:
-
-
-
%%bash
-
-tree example_001
-
- -
-
-
- -
-
- - -
-
-
example_001
-├── hpc-runner
-│   ├── logs
-│   │   ├── 2016-11-03-001_preprocess
-│   │   │   ├── 2016-11-03-CMD_001-PID_13774.md
-│   │   │   ├── 2016-11-03-CMD_002-PID_13778.md
-│   │   │   ├── 2016-11-03-CMD_003-PID_13776.md
-│   │   │   ├── 2016-11-03-CMD_004-PID_16555.md
-│   │   │   ├── 2016-11-03-CMD_005-PID_16553.md
-│   │   │   ├── 2016-11-03-CMD_006-PID_16551.md
-│   │   │   └── MAIN_2016-11-03.log
-│   │   └── 2016-11-03-hpcrunner_logs
-│   │       ├── 001_preprocess.log
-│   │       └── 001-process_table.md
-│   └── scratch
-│       ├── 001_preprocess_001.in
-│       ├── 001_preprocess_002.in
-│       ├── 001_preprocess_003.in
-│       ├── 001_preprocess_004.in
-│       ├── 001_preprocess_005.in
-│       ├── 001_preprocess_006.in
-│       └── 001_preprocess.sh
-└── preprocess.sh
-
-5 directories, 17 files
-
-
-
- -
-
- -
-
-
-
-
-
-

Task Log Output

-
-
-
-
-
-
-
-
-

Each individual task gets its own output file. The structure is date of submission, jobtype, date of executition, task count, and processID.

- -
-
-
-
-
-
In [9]:
-
-
-
%%bash
-
-cat example_001/hpc-runner/logs/2016-11-03-001_preprocess/2016-11-03-CMD_001-PID_13774.md
-
- -
-
-
- -
-
- - -
-
-
2016/11/03 08:50:11: INFO Starting Job: 1 
-Cmd is echo "preprocess sample1" && sleep 30 
- 2016/11/03 08:50:11: INFO preprocess sample1
- 2016/11/03 08:50:41: INFO Finishing job 1 with ExitCode 0
- 2016/11/03 08:50:41: INFO Total execution time 0 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds
- 
-
-
- -
-
- -
-
-
-
-
-
-

Slurm Log Output

-
-
-
-
-
-
-
-
-

Additionally, all output from the scheduler is logged. This is useful when debugging submissions. If, for instance, we had mistyped a module name, submitted to the wrong queue, or requested impossible resources, this would be recorded here.

- -
-
-
-
-
-
In [10]:
-
-
-
%%bash
-
-cat example_001/hpc-runner/logs/2016-11-03-hpcrunner_logs/001_preprocess.log
-
- -
-
-
- -
-
- - -
-
-
Module 'gencore/1' is already loaded
-Module 'gencore/1' is already loaded
-Module 'gencore/1' is already loaded
-Module 'gencore/1' is already loadedModule 'gencore/1' is already loadedModule 'gencore/1' is already loaded
-
-
-
-
-
- -
-
- -
-
-
-
-
-
-

Process Table Output

-
-
-
-
-
-
-
-
-

The process table is a table for the whole job. It records version ( more on this later), the Slurm scheduler ID, the Slurm jobname, any task tags, and process pid, the exit code and the duration.

-

It is highly recommended to record this table in a project management tool.

- -
-
-
-
-
-
In [14]:
-
-
-
%%bash
-
-ls example_001/hpc-runner/logs/2016-11-03-hpcrunner_logs/001-process_table.md
-
- -
-
-
- -
-
- - -
-
-
example_001/hpc-runner/logs/2016-11-03-hpcrunner_logs/001-process_table.md
-
-
-
- -
-
- -
-
-
-
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
VersionScheduler IdJobnameTask TagsProcessIDExitCodeDuration
0.023162001_preprocess1655100 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds
0.023167001_preprocess1655300 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds
0.023163001_preprocess1377400 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds
0.023166001_preprocess1655500 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds
0.023165001_preprocess1377600 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds
0.023164001_preprocess1377800 years, 00 months, 0 days, 00 hours, 00 minutes, 30 seconds
- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - diff --git a/_docs/examples/example_02.md b/_docs/examples/example_02.md deleted file mode 100644 index 5862e85..0000000 --- a/_docs/examples/example_02.md +++ /dev/null @@ -1,59 +0,0 @@ -# Example 02 - Multiple Nodes - -## Usual case - 1 command per node -Special considerations should be used when submitting a large number of tasks. -Depending on the configuration of your scheduler you will hit limits very -quickly. In this case you would group tasks together, using the commands_per_node option. -You may also want to increase the number of concurrent tasks using the procs -option. Consider the case when having a large number of single threaded tasks. -Normally you would submit the job as follows: - -``` -#HPC commands_per_node=1 -#HPC procs=1 -#HPC ntasks=1 -#HPC walltime=01:00:00 -#HPC cpus_per_task=1 -#HPC mem=100GB -``` - -## Two commands per node - Concurrency 1 - -Now, you could pack 2 commands onto a node, but only run 1 task concurrently. In -this case you would increase the walltime by a factor of commands_per_node, in -this case two, but leave the other variables the same. - -``` -#HPC commands_per_node=2 -#HPC procs=1 -#HPC ntasks=1 -#HPC walltime=02:00:00 -#HPC cpus_per_task=1 -#HPC mem=100GB -``` - -## Two commands per node - Concurrency 2 - -Now, you are packing 2 commands per node, but increasing the concurrency to 2. -Since our tasks are running concurrently you would set the walltime to the time -it takes 1 task to complete, but increase the cpus_per_task, ntasks (if using -SLURM) and memory by a factor of procs. - -``` -#HPC commands_per_node=2 -#HPC procs=2 -#HPC ntasks=2 -#HPC walltime=01:00:00 -#HPC cpus_per_task=2 -#HPC mem=200GB -``` - -Here is a birds eye view of a simple example. - -### Submission and output directory structure - -![Example 02-1](../images/HPC_Runner_Diagrams_-_Example_02-1.jpg) - -### Job execution - -![Example 02-2](../images/HPC_Runner_Diagrams_-_Example_02-2.jpg) diff --git a/_docs/examples/example_03.md b/_docs/examples/example_03.md deleted file mode 100644 index 61f9e6f..0000000 --- a/_docs/examples/example_03.md +++ /dev/null @@ -1,17 +0,0 @@ -# Example 03 - Linear Dependencies - -Dependencies can be linear, meaning one job depends upon the next, or nested, -where one job can depend upon any other. This example describes linear -dependencies. - -### Submission and output directory structure - -![Example 03-1](../images/HPC_Runner_Diagrams_-_Example_03-1.jpg) - -### Dependency Tree - -![Example 03-2](../images/HPC_Runner_Diagrams_-_Example_03-2.jpg) - -### Job execution - -![Example 03-3](../images/HPC_Runner_Diagrams_-_Example_03-3.jpg) diff --git a/_docs/examples/example_04.md b/_docs/examples/example_04.md deleted file mode 100644 index dbf5083..0000000 --- a/_docs/examples/example_04.md +++ /dev/null @@ -1,26 +0,0 @@ -# Example 04 - Non Linear Dependencies - -An example of non linear dependencies would be to have 3 job types, where two -of those jobtypes depend upon the same job. - -``` -#HPC jobname=preprocess - -#HPC jobname=analyze -#HPC deps=preprocess - -#HPC jobname=qc -#HPC deps=preprocess -``` - -The jobtype preprocess would execute first, followed by either analyze, qc or -both if there is room in the scheduler. Qc and analyze would have the same job -weight. - -### Submission and output directory structure - -![Example 04-1](../images/HPC_Runner_Diagrams_-_Example_04-1.jpg) - -### Dependency Tree - -![Example 04-2](../images/HPC_Runner_Diagrams_-_Example_04-2.jpg) diff --git a/_docs/examples/example_05.md b/_docs/examples/example_05.md deleted file mode 100644 index 34b5ce2..0000000 --- a/_docs/examples/example_05.md +++ /dev/null @@ -1,38 +0,0 @@ -# Example 05 - Declaring Task Dependencies - - -HPC Runner has two levels of dependencies. The first is the job type, declared as - -``` -#HPC jobname=preprocess - -#HPC jobname=qc -#HPC deps=preprocess -``` - -The second level declares within job type task dependencies. - -``` -#HPC jobname=preprocess -#TASK tags=sample1 -preprocess sample1 -#TASK tags=sample2 -preprocess sample2 - -#HPC jobname=qc -#HPC deps=preprocess -#TASK tags=sample1 -qc sample1 -#TASK tags=sample2 -qc sample2 -``` -Now, the qc job tagged with sample1 would only depend upon the sample1 task in -preprocess, instead of the entire preprocess job. - -### Submission and output directory structure - -![Example 05-1](../images/HPC_Runner_Diagrams_-_Example_05-1.jpg) - -### Dependency Tree - -![Example 05-2](../images/HPC_Runner_Diagrams_-_Example_05-2.jpg) diff --git a/_docs/examples/examples.md b/_docs/examples/examples.md deleted file mode 100644 index b4c0721..0000000 --- a/_docs/examples/examples.md +++ /dev/null @@ -1,3 +0,0 @@ -# Examples - -Currently, these examples are mostly conceptual. A repository of examples is coming along shortly! diff --git a/_docs/glossary.md b/_docs/glossary.md deleted file mode 100644 index b0dbc6c..0000000 --- a/_docs/glossary.md +++ /dev/null @@ -1,67 +0,0 @@ -# Glossary - -The HPC-Runner-Command libraries use certain naming conventions. - -### Submission - -A submission, in HPC-Runner-Command terms, is the single input given as --infile - -``` -hpcrunner.pl submit_jobs --infile resequencing.sh -``` - -### Job Types - -A single HPC-Runner-Command submission is comprised of one or more jobtypes, specified as - -``` -#HPC jobname=gzip -``` - -### Job Batches - -Job types are batched into one or more jobs based on the total number of jobs -and the commands_per_node. If using job arrays, which is the default, there is also -a max array size. - -If we have 100 tasks in a submission, with a single jobtype of gzip, and a max -array size of 50, we would submit 2 job arrays of length 50, 001_gzip and -002_gzip. - -### Jobs - -Jobs are the actual jobs submitted to the cluster. Each 'sbatch' call is a single job. - -### Tasks - -Job and job batches are made up of one or more tasks. - -``` -gzip file -``` - -is a task. - -``` - blastx --help -``` - -is another task. - - -### Concurrent/Parallel tasks - -This is defined as - -``` -#HPC procs=X -``` - -With x as the number of tasks we can run at any given time. It can be thought -of as the amount of multitasking we are able to do. - -If you are familiar with gnuparallel, it is very similar to - -``` -parallel --jobs X :::: infile -``` diff --git a/_docs/images/HPC_Runner_Diagrams_-_001_Heirarchy.jpg b/_docs/images/HPC_Runner_Diagrams_-_001_Heirarchy.jpg deleted file mode 100644 index 84517a3..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_001_Heirarchy.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_01-1.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_01-1.jpg deleted file mode 100644 index 5d3b234..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_01-1.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_01-2.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_01-2.jpg deleted file mode 100644 index 0be5944..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_01-2.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_01-3.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_01-3.jpg deleted file mode 100644 index 0be5944..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_01-3.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_02-1.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_02-1.jpg deleted file mode 100644 index 906fb0f..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_02-1.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_02-2.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_02-2.jpg deleted file mode 100644 index f5e026f..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_02-2.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_03-1.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_03-1.jpg deleted file mode 100644 index 308c30a..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_03-1.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_03-2.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_03-2.jpg deleted file mode 100644 index a2ff968..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_03-2.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_03-3.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_03-3.jpg deleted file mode 100644 index 047cc55..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_03-3.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_03-4.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_03-4.jpg deleted file mode 100644 index bf2bef3..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_03-4.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_04-1.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_04-1.jpg deleted file mode 100644 index 009769d..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_04-1.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_04-2.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_04-2.jpg deleted file mode 100644 index bec60ec..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_04-2.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_05-1.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_05-1.jpg deleted file mode 100644 index 9e58262..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_05-1.jpg and /dev/null differ diff --git a/_docs/images/HPC_Runner_Diagrams_-_Example_05-2.jpg b/_docs/images/HPC_Runner_Diagrams_-_Example_05-2.jpg deleted file mode 100644 index 9dbdce2..0000000 Binary files a/_docs/images/HPC_Runner_Diagrams_-_Example_05-2.jpg and /dev/null differ diff --git a/_docs/images/dependency.jpg b/_docs/images/dependency.jpg deleted file mode 100644 index 07769ff..0000000 Binary files a/_docs/images/dependency.jpg and /dev/null differ diff --git a/_docs/images/rabbit.jpeg b/_docs/images/rabbit.jpeg deleted file mode 100644 index 551d52f..0000000 Binary files a/_docs/images/rabbit.jpeg and /dev/null differ diff --git a/_docs/job_execution/job_execution.md b/_docs/job_execution/job_execution.md deleted file mode 100644 index 59b8158..0000000 --- a/_docs/job_execution/job_execution.md +++ /dev/null @@ -1,10 +0,0 @@ -# Job Execution - -Each time a job is executed a threadpool is created, with the maximum number of -tasks running concurrently at any time being equal to the number of procs given -(supplied by #HPC procs=N). - -Each task is run in isolation in its own thead to keep it from interfering with -other tasks. In this way if one task fails the entire job does not necessarily -fail. Each task has its stdout, stderr, exit code, and duration logged, along -with any task tags. diff --git a/_docs/job_submission/comparison.md b/_docs/job_submission/comparison.md deleted file mode 100644 index 92e8afa..0000000 --- a/_docs/job_submission/comparison.md +++ /dev/null @@ -1,19 +0,0 @@ -Side by side comparison of submission parameters by scheduler type - -| Variable Name | HPC Runner Variable | Torque/PBS | SLURM | -| -- | -- | -- | -- | -| Job Name | #HPC jobname=$JOBNAME | #PBS -N $JOBNAME | #SBATCH --job-name=$JOBNAME | -| Job Dependencies | #HPC deps=$JOBNAME | #PBS -W depend=afterok=$JOBID | #SBATCH --dependency=afterok=$JOBID | -| CPUS | #HPC cpus_per_task=$CPUS_PER_TASK | #PBS -l nodes=$NODES_COUNT:ppn=$CPUS_PER_TASK | #SBATCH --cpus-per-task=$CPUS_PER_TASK | -| Queue/Partition | #HPC partition=$PARTTION or #HPC queue=$PARTITION | #PBS -q queue=$PARTITION | #SBATCH --partition=$PARTITION | -| ntasks | #HPC ntasks=$NTASKS | NA | #SBATCH --ntasks=$NTASKS | -| Number of Nodes | #HPC nodes_count=$NODES | #PBS -l nodes=$NODES_COUNT:ppn=$CPUS_PER_TASK | #SBATCH --nodes=$NODES_COUNT | -| Walltime | #HPC walltime=$WALLTIME | #PBS --walltime=$WALLTIME | #SBATCH --time=$WALLTIME | - -### Resources - -http://www.sdsc.edu/~hocks/FG/PBS.slurm.html - -http://slurm.schedmd.com/sbatch.html - Simple Linux Utility for Resource Management - -https://wiki.hpcc.msu.edu/display/hpccdocs/Advanced+Scripting+Using+PBS+Environment+Variables Advanced Scripting Using PBS Environment Variables - HPCC Documentation and User Manual - HPCC Wiki diff --git a/_docs/job_submission/dependencies.md b/_docs/job_submission/dependencies.md deleted file mode 100644 index 3f2b51b..0000000 --- a/_docs/job_submission/dependencies.md +++ /dev/null @@ -1,175 +0,0 @@ -# Overview - -An analysis can be thought of as a series of inputs and outputs, and can be mapped like so. - -![Dependency Tree](../images/dependency.jpg) - -``` -#INPUT <- OUTPUT -QC Raw Reads -Trim Reads <- QC Trim Reads -QC Trim Reads <- Align Reads -Align Reads <- Convert Alignment -Convert Reads <- Call Variants -``` - -## Job Type dependency Resolution - -Using the HPC Runner Syntax our dependencies would look like: - -``` -#HPC jobname=qc_raw_reads - -fastqc --read1 Sample1.raw_read1.fastq --read2 Sample1.raw_read2.fastq -fastqc --read1 Sample2.raw_read1.fastq --read2 Sample2.raw_read2.fastq -fastqc --read1 Sample3.raw_read1.fastq --read2 Sample3.raw_read2.fastq -fastqc --read1 Sample4.raw_read1.fastq --read2 Sample2.raw_read2.fastq - -#HPC jobname=trim_reads - -java -jar trimmomatic Sample1 -java -jar trimmomatic Sample2 -java -jar trimmomatic Sample3 -java -jar trimmomatic Sample4 - -#HPC jobname=qc_trim_reads -#HPC deps=trim_reads - -fastqc --read1 Sample1.trim_read1.fastq --read2 Sample1.trim_read2.fastq -fastqc --read1 Sample2.trim_read1.fastq --read2 Sample2.trim_read2.fastq -fastqc --read1 Sample3.trim_read1.fastq --read2 Sample3.trim_read2.fastq -fastqc --read1 Sample4.trim_read1.fastq --read2 Sample4.trim_read2.fastq - -#HPC jobname=align_reads -#HPC deps=trim_reads - -bowtie2 -x homo_sapiens_ensembl -1 Sample1.trim_read1.fastq -2 Sample1.trim_read2.fastq -out Sample1.sam -bowtie2 -x homo_sapiens_ensembl -1 Sample2.trim_read1.fastq -2 Sample2.trim_read2.fastq -out Sample2.sam -bowtie2 -x homo_sapiens_ensembl -1 Sample3.trim_read1.fastq -2 Sample3.trim_read2.fastq -out Sample3.sam -bowtie2 -x homo_sapiens_ensembl -1 Sample4.trim_read1.fastq -2 Sample4.trim_read2.fastq -out Sample4.sam - -#HPC jobname=convert_alignment -#HPC deps=align_reads - -samtools view Sample1.sam Sample1.bam -samtools view Sample2.sam Sample2.bam -samtools view Sample3.sam Sample3.bam -samtools view Sample4.sam Sample4.bam - -#HPC jobname=call_variants -#HPC deps=convert_alignment - -samtools mpileup Sample1.bam > Sample1.vcf -samtools mpileup Sample2.bam > Sample2.vcf -samtools mpileup Sample3.bam > Sample3.vcf -samtools mpileup Sample4.bam > Sample4.vcf -``` - -This would take each jobtype, and make it depend upon all job batches from its dependencies. - -For instance, the job type align_reads depends upon all trim_reads completing. - -We drew our dependency tree but mapping our input and outputs. This is always a very helpful and informative exercise! - -| Job Type Name | Variable | INPUT | OUTPUT | -| -- | -- | -- | -- | -| QC Raw Reads | qc_raw_reads | Sample1.raw_read1.fastq, Sample1.raw_read2.fastq | raw_fastqc_folder | -| Trim Raw Reads | trim_raw_reads | Sample1.raw_read1.fastq, Sample1.raw_read2.fastq | Sample1.trim_read1.fastq, Sample1.trim_read2.fastq | -| QC Trimmed Reads | qc_trim_reads | Sample1.raw_read1.fastq, Sample1.raw_read2.fastq | trimmomatic_fastqc_folder | -| Align Trimmed Reads | align_reads | Sample1.trim_read1.fastq, Sample1.trim_read2.fastq | Sample1.sam | -| Convert Alignment | convert_alignment | Sample1.sam | Sample1.bam | - -## Task Dependency Resolution - - -In addition to using job type dependency resolution, we can also defined task -dependencies. Please note that task dependencies are only supported when using -job arrays, which is the default HPC-Runner-Command configuration for SLURM and -PBS. - -Let's take our previous example. - -``` -#HPC jobname=qc_raw_reads - -fastqc --read1 Sample1.raw_read1.fastq --read2 Sample1.raw_read2.fastq -fastqc --read1 Sample2.raw_read1.fastq --read2 Sample2.raw_read2.fastq -fastqc --read1 Sample3.raw_read1.fastq --read2 Sample3.raw_read2.fastq -fastqc --read1 Sample4.raw_read1.fastq --read2 Sample2.raw_read2.fastq - -#HPC jobname=trim_reads - -#TASK tags=Sample1 -java -jar trimmomatic Sample1 -#TASK tags=Sample2 -java -jar trimmomatic Sample2 -#TASK tags=Sample3 -java -jar trimmomatic Sample3 -#TASK tags=Sample4 -java -jar trimmomatic Sample4 - -#HPC jobname=qc_trim_reads -#HPC deps=trim_reads - -#TASK tags=Sample1 -fastqc --read1 Sample1.trim_read1.fastq --read2 Sample1.trim_read2.fastq -#TASK tags=Sample2 -fastqc --read1 Sample2.trim_read1.fastq --read2 Sample2.trim_read2.fastq -#TASK tags=Sample3 -fastqc --read1 Sample3.trim_read1.fastq --read2 Sample3.trim_read2.fastq -#TASK tags=Sample4 -fastqc --read1 Sample4.trim_read1.fastq --read2 Sample4.trim_read2.fastq - -#HPC jobname=align_reads -#HPC deps=trim_reads - -#TASK tags=Sample1 -bowtie2 -x homo_sapiens_ensembl -1 Sample1.trim_read1.fastq -2 Sample1.trim_read2.fastq -out Sample1.sam -#TASK tags=Sample2 -bowtie2 -x homo_sapiens_ensembl -1 Sample2.trim_read1.fastq -2 Sample2.trim_read2.fastq -out Sample2.sam -#TASK tags=Sample3 -bowtie2 -x homo_sapiens_ensembl -1 Sample3.trim_read1.fastq -2 Sample3.trim_read2.fastq -out Sample3.sam -#TASK tags=Sample4 -bowtie2 -x homo_sapiens_ensembl -1 Sample4.trim_read1.fastq -2 Sample4.trim_read2.fastq -out Sample4.sam - -#HPC jobname=convert_alignment -#HPC deps=align_reads - -#TASK tags=Sample1 -samtools view Sample1.sam Sample1.bam -#TASK tags=Sample2 -samtools view Sample2.sam Sample2.bam -#TASK tags=Sample3 -samtools view Sample3.sam Sample3.bam -#TASK tags=Sample4 -samtools view Sample4.sam Sample4.bam - -#HPC jobname=call_variants -#HPC deps=convert_alignment - -#TASK tags=Sample1 -samtools mpileup Sample1.bam > Sample1.vcf -#TASK tags=Sample2 -samtools mpileup Sample2.bam > Sample2.vcf -#TASK tags=Sample3 -samtools mpileup Sample3.bam > Sample3.vcf -#TASK tags=Sample4 -samtools mpileup Sample4.bam > Sample4.vcf -``` - -Now we will take trim_reads and align_reads to figure out exactly what is happening. - -For the sake of this exercise let's say trim_reads is submitted and has a scheduler id of 1234. - -Each task (when commands_per_node=1) is an element in the array. - -| JobName | SchedulerID | Task Tag | Scheduler Array ID | Scheduler Array Dependencies | -| -- | -- | -- | -- | -- | -| trim_reads | 1234 | Sample1 | 1234_1 | | -| trim_reads | 1234 | Sample2 | 1234_2 | | -| trim_reads | 1234 | Sample3 | 1234_3 | | -| trim_reads | 1234 | Sample4 | 1234_4 | | -| align_reads | 2345 | Sample1 | 2345_1 | 1234_1 | -| align_reads | 2345 | Sample2 | 2345_2 | 1234_2 | -| align_reads | 2345 | Sample3 | 2345_3 | 1234_3 | -| align_reads | 2345 | Sample4 | 2345_4 | 1234_4 | diff --git a/_docs/job_submission/hpcrunner_submission_parameters.md b/_docs/job_submission/hpcrunner_submission_parameters.md deleted file mode 100644 index 0246df6..0000000 --- a/_docs/job_submission/hpcrunner_submission_parameters.md +++ /dev/null @@ -1,70 +0,0 @@ -``` -hpcrunner.pl submit_jobs --help -``` - -``` - --afterok afterok switch in slurm. --afterok 123,134 will - tell slurm to start this job after 123,134 have - exited successfully [Multiple; Split by ","] - --autocommit Run a git add -A on dirty build [Flag] - --config Path to command config file - --cpus_per_task --cpus-per-task switch in slurm [Default:"4"; - Integer] - --custom_command - --help -h --usage -? Prints this usage information. [Flag] - --hpc_plugins Load hpc_plugins [Multiple; Split by ","] - --hpc_plugins_opts Options for hpc_plugins [Key-Value] - --infile File of commands separated by newline. The command - 'wait' indicates all previous commands should - finish before starting the next one. [Required] - --job_plugins Load job execution plugins [Multiple; Split by ","] - --job_plugins_opts Options for job_plugins [Key-Value] - --job_scheduler_id This defaults to your current Job Scheduler ID. - Ignore this if running on a single node - --jobname Specify a job name, each job will be appended with - its batch order [Default:"hpcjob_001"] - --logdir Directory where logfiles are written. Defaults to - current_working_directory/prunner_current_date_time - /log1 .. log2 .. log3' - --logname [Default:"hpcrunner_logs"] - --max_array_size [Default:"200"; Integer] - --mem Supply a memory limit [Default:"10GB"] - --metastr Meta str passed from HPC::Runner::Scheduler - --module List of modules to load ex. R2, samtools, etc [ - Multiple; Split by ","] - --nodes_count Number of nodes requested. You should only use this - if submitting parallel jobs. [Default:"1"; Integer - ] - --ntasks --ntasks switch in slurm. This is equal to the - number of concurrent tasks on each node * the - number of nodes, not the total number of tasks [ - Default:"1"; Integer] - --ntasks_per_node --ntasks-per-node switch in slurm. total concurrent - tasks on a node. [Default:"1"; Integer] - --outdir Directory to write out files. - --partition Slurm partition to submit jobs to. Defaults to the - partition with the most available nodes - --plugins Load aplication plugins [Multiple; Split by ","] - --plugins_opts Options for application plugins [Key-Value] - --process_table - --procs Total number of concurrently running jobs allowed - at any time. [Default:"1"; Integer] - --serial Use this if you wish to run each job run one after - another, with each job starting only after the - previous has completed successfully [Flag] - --show_processid Show the process ID per logging message. This is - useful when aggregating logs. [Flag] - --tags Tags for the whole submission [Multiple; Split by " - ,"] - --use_batches Switch to use batches. The default is to use job - arrays. Warning! This was the default way of - submitting before 3.0, but is not well supported. [ - Flag] - --user This defaults to your current user ID. This can - only be changed if running as an admin user - --version Submission version. Each version has a - corresponding git tag. See the difference between - tags with `git diff tag1 tag2`. Tags are always - version numbers, starting with 0.01. - --walltime [Default:"00:20:00"] -``` diff --git a/_docs/job_submission/job_submission.md b/_docs/job_submission/job_submission.md deleted file mode 100644 index 9e66029..0000000 --- a/_docs/job_submission/job_submission.md +++ /dev/null @@ -1,12 +0,0 @@ -HPC-Runner-Command efficiently chunks your HPC workflow into batches of HPC scheduler (PBS, SLURM) jobs. The submission script, supplied by --infile, is split based on the number of commands_per_node. - -``` -#HPC commands_per_node=2 -job_001 -job_002 -... -job_009 -job_010 -``` - -Would be split into 5 HPC job submissions, with 2 commands (job_001-job_002, job003-job004 ... job_009-job010) per job. diff --git a/_docs/overview.md b/_docs/overview.md deleted file mode 100644 index d2bf06e..0000000 --- a/_docs/overview.md +++ /dev/null @@ -1,22 +0,0 @@ -# Overview - -1. Chunk Workflow -2. Submit jobs to scheduler -3. Execute jobs - -HPC-Runner-Command uses a predefined set of parameters to chunk your commandsV -into HPC scheduler (PBS, SLURM) jobs. This chunking is done based on the number -the job type (qc, alignment, postprocessing), the job dependencies (alignment -should not run until qc has completed), and the number of tasks (if we have a -node with 16 cpus, we should run 16 single threaded processes). Each of these -jobs is submitted to the the scheduler using that scheduler's standard -template. Each job is then executed, with one or more processes running in -parallel based on availability of resources. - - hpcrunner.pl submit_jobs --infile my_submission_script.sh - -Chunks and submits your jobs. - - hpcrunner.pl execute_jobs --infile 001_batch_my_submission_script.sh --procs N - -Executes your job on a given node with N processes running in parallel. diff --git a/_docs/quick_start.md b/_docs/quick_start.md deleted file mode 100644 index 5c7ef4b..0000000 --- a/_docs/quick_start.md +++ /dev/null @@ -1,133 +0,0 @@ -# Quick Start - - - -# Create a New Project - -You can create a new project, with a sane directory structure by using - - hpcrunner.pl new MyNewProject - -# Submit Workflows - -## Simple Example - -Our simplest example is a single job type with no dependencies - each task is independent of all other tasks. - -### Workflow file - - #preprocess.sh - - echo "preprocess" && sleep 10; - echo "preprocess" && sleep 10; - echo "preprocess" && sleep 10; - -### Submit to the scheduler - - hpcrunner.pl submit_jobs --infile preprocess.sh - -### Look at results! - - tree hpc-runner - -## Job Type Dependencency Declaration - -Most of the time we have jobs that depend upon other jobs. - -### Workflow file - - #blastx.sh - - #HPC jobname=unzip - unzip Sample1.zip - unzip Sample2.zip - unzip Sample3.zip - - #HPC jobname=blastx - #HPC deps=unzip - blastx --db env_nr --sample Sample1.fasta - blastx --db env_nr --sample Sample2.fasta - blastx --db env_nr --sample Sample3.fasta - -### Submit to the scheduler - - hpcrunner.pl submit_jobs --infile preprocess.sh - -### Look at results! - - tree hpc-runner - -## Task Dependencency Declaration - -Within a job type we can declare dependencies on particular tasks. - -### Workflow file - - #blastx.sh - - #HPC jobname=unzip - #TASK tags=Sample1 - unzip Sample1.zip - #TASK tags=Sample2 - unzip Sample2.zip - #TASK tags=Sample3 - unzip Sample3.zip - - #HPC jobname=blastx - #HPC deps=unzip - #TASK tags=Sample1 - blastx --db env_nr --sample Sample1.fasta - #TASK tags=Sample2 - blastx --db env_nr --sample Sample2.fasta - #TASK tags=Sample3 - blastx --db env_nr --sample Sample3.fasta - -### Submit to the scheduler - - hpcrunner.pl submit_jobs --infile preprocess.sh - -### Look at results! - - tree hpc-runner - -## Declare Scheduler Variables - -Each scheduler has its own set of variables. HPC::Runner::Command has a set of -generalized variables for declaring types across templates. For more -information please see [Job Scheduler Comparison](https://jerowe.gitbooks.io/hpc-runner-command-docs/content/job_submission/comparison.html) - -Additionally, for workflows with a large number of tasks, please see [Considerations for Workflows with a Large Number of Tasks](https://jerowe.gitbooks.io/hpc-runner-command-docs/content/design_workflow.html#considerations-for-workflows-with-a-large-number-of-tasks) for information on how to group tasks together. - -### Workflow file - - #blastx.sh - - #HPC jobname=unzip - #HPC cpus_per_task=1 - #HPC partition=serial - #HPC commands_per_node=1 - #TASK tags=Sample1 - unzip Sample1.zip - #TASK tags=Sample2 - unzip Sample2.zip - #TASK tags=Sample3 - unzip Sample3.zip - - #HPC jobname=blastx - #HPC cpus_per_task=6 - #HPC deps=unzip - #TASK tags=Sample1 - blastx --threads 6 --db env_nr --sample Sample1.fasta - #TASK tags=Sample2 - blastx --threads 6 --db env_nr --sample Sample2.fasta - #TASK tags=Sample3 - blastx --threads 6 --db env_nr --sample Sample3.fasta - -### Submit to the scheduler - - hpcrunner.pl submit_jobs --infile preprocess.sh - -### Look at results! - - tree hpc-runner - diff --git a/_docs/why.md b/_docs/why.md deleted file mode 100644 index 4c5edaa..0000000 --- a/_docs/why.md +++ /dev/null @@ -1,39 +0,0 @@ -# Why use HPC::Runner ? - -HPC-Runner-App optimizations your workflows by batching them and submitting -them as various jobs over an HPC cluster. It does this by completing the -following objectives: - -1. Templates away your business logic - without the need to rewrite any - of your existing analysis. -2. Makes your workflows reproducible and easily shared. -3. Takes advantage of HPC resources by splitting your jobs into the - components. -4. Total transparency - because the workflow itself is not rewritten, - your workflow is transparent and easy understood by support staff. - - -## Template away your business logic - -Many HPC submission wrappers require a user to rewrite their job into a -specific syntax. HPC-Runner-App borrows from the simple syntax of HPC -schedulers. There is no need to re engineer your code into python or complex -configuration files. - -## Reproducible Workflows - -Workflows are saved in a single file that is easily shared with collaborators. -Parameters can be changed with a simple text substitution. - -## Make the best use of HPC resources - -The logic of your workflows remains the same whether processing 1 or 1000 -samples. HPC-Runner-App splits your workflow into efficient chunks, and lets -the scheduler take care of the rest. - -## Total Transparency - -Your code is passed to the scheduler and run as is. Everything is bash based -and uses standard templates. Trouble shooting your jobs and getting support -from your HPC admins becomes a straight forward task without the need to dig -through a separate layer of logic. diff --git a/book.json b/book.json deleted file mode 100644 index d7e5733..0000000 --- a/book.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - - "root": "./_docs" -} diff --git a/inotify-hookable.sh b/inotify-hookable.sh index 024c484..469c06e 100755 --- a/inotify-hookable.sh +++ b/inotify-hookable.sh @@ -4,6 +4,10 @@ export DEV='DEV' RSYNC_HPC="rsync -avz ../HPC-Runner-Command gencore@dalma.abudhabi.nyu.edu:/home/gencore/hpcrunner-test/" RSYNC_BIOSAILS="rsync -avz ../BioSAILs gencore@dalma.abudhabi.nyu.edu:/home/gencore/hpcrunner-test/" RSYNC_SQL="rsync -avz ../HPC-Runner-Command-Plugin-Logger-Sqlite gencore@dalma.abudhabi.nyu.edu:/home/gencore/hpcrunner-test/" + +$RSYNC_HPC +$RSYNC_BIOSAILS + inotify-hookable \ --watch-directories /home/jillian/Dropbox/projects/HPC-Runner-Libs/New/BioSAILs/lib \ --watch-directories lib \ diff --git a/lib/HPC/Runner/Command.pm b/lib/HPC/Runner/Command.pm index d3ca681..c76ab0e 100644 --- a/lib/HPC/Runner/Command.pm +++ b/lib/HPC/Runner/Command.pm @@ -7,6 +7,19 @@ with 'BioSAILs::Utils::LoadConfigs'; use MooseX::Types::Path::Tiny qw/Path Paths AbsPath AbsFile/; +app_strict 0; +app_exclude( + 'HPC::Runner::Command::Utils', + 'HPC::Runner::Command::Logger', + 'HPC::Runner::Command::submit_jobs::Utils', + 'HPC::Runner::Command::submit_jobs::Plugin', + 'HPC::Runner::Command::submit_jobs::Logger', + 'HPC::Runner::Command::stats::Logger', + 'HPC::Runner::Command::execute_job::Utils', + 'HPC::Runner::Command::execute_job::Logger', + 'HPC::Runner::Command::execute_job::Base', +); + option '+config_base' => ( default => '.hpcrunner', ); =head3 project @@ -22,7 +35,7 @@ option 'project' => ( . '#HPC jobname=gzip will be submitted as 001_project_gzip', required => 0, predicate => 'has_project', - cmd_aliases => ['p'], + cmd_aliases => ['pr'], ); option 'no_log_json' => ( @@ -54,7 +67,7 @@ sub BUILD { } =begin HTML

HPC::Runner::Command

=end HTML @@ -85,7 +98,7 @@ Get help by heading on over to github and raising an issue. L. Please see the complete documentation at L. +https://biosails.gitbooks.io/hpc-runner-command-docs/content/>. =head1 Quick Start - Create a New Project @@ -193,11 +206,11 @@ Within a job type we can declare dependencies on particular tasks. Each scheduler has its own set of variables. HPC::Runner::Command has a set of generalized variables for declaring types across templates. For more information please see L +Comparison|https://biosails.gitbooks.io/hpc-runner-command-docs/content/job_submission/comparison.html> Additionally, for workflows with a large number of tasks, please see L +Tasks|https://biosails.gitbooks.io/hpc-runner-command-docs/content/design_workflow.html#considerations-for-workflows-with-a-large-number-of-tasks> for information on how to group tasks together. =head3 Workflow file diff --git a/lib/HPC/Runner/Command/Logger/JSON.pm b/lib/HPC/Runner/Command/Logger/JSON.pm index 7c64c5e..be25140 100644 --- a/lib/HPC/Runner/Command/Logger/JSON.pm +++ b/lib/HPC/Runner/Command/Logger/JSON.pm @@ -39,35 +39,13 @@ sub create_data_dir { return; } - my $data_dir = File::Spec->catdir( $self->cache_dir, '.hpcrunner-data' ); - my $project = "NULL_PROJECT"; - $project = $self->project if $self->has_project; - - ##Create initial document - my $dt = DateTime->now( time_zone => 'local' ); - $dt = "$dt"; - $dt =~ s/:/-/g; - + my $data_dir = File::Spec->catdir( $self->logdir, 'stats' ); my $ug = Data::UUID->new; my $uuid = $ug->create(); $uuid = $ug->to_string($uuid); - $self->submission_uuid($uuid); - - my $path = File::Spec->catdir( $data_dir, $project ); - make_path($path); - - if ( $self->has_project ) { - $path = - File::Spec->catdir( $data_dir, $project, - $dt . '__PR_' . $project . '__UID_' . $uuid ); - } - else { - $path = - File::Spec->catdir( $data_dir, $project, $dt . '__UID_' . $uuid ); - } - - return $path; + + return $data_dir; } 1; diff --git a/lib/HPC/Runner/Command/Utils/Log.pm b/lib/HPC/Runner/Command/Utils/Log.pm index f85502a..93a592f 100644 --- a/lib/HPC/Runner/Command/Utils/Log.pm +++ b/lib/HPC/Runner/Command/Utils/Log.pm @@ -112,7 +112,7 @@ option 'metastr' => ( option 'logname' => ( isa => 'Str', is => 'rw', - default => 'hpcrunner_logs', + default => '000_hpcrunner_logs', required => 0, ); diff --git a/lib/HPC/Runner/Command/execute_job/Base.pm b/lib/HPC/Runner/Command/execute_job/Base.pm index 5df1189..78041ea 100644 --- a/lib/HPC/Runner/Command/execute_job/Base.pm +++ b/lib/HPC/Runner/Command/execute_job/Base.pm @@ -36,6 +36,7 @@ has 'job_scheduler_id' => ( || $ENV{SBATCH_JOB_ID} || $ENV{PBS_JOBID} || $ENV{JOB_ID} + || $ENV{AWS_BATCH_JOB_ARRAY_INDEX} || ''; if ( $self->can('task_id') && $self->task_id ) { $scheduler_id = $scheduler_id . '_' . $self->task_id; diff --git a/lib/HPC/Runner/Command/execute_job/Logger/JSON.pm b/lib/HPC/Runner/Command/execute_job/Logger/JSON.pm index f62c6ed..d328e72 100644 --- a/lib/HPC/Runner/Command/execute_job/Logger/JSON.pm +++ b/lib/HPC/Runner/Command/execute_job/Logger/JSON.pm @@ -68,7 +68,7 @@ sub create_json_task { $self->write_lock; $self->add_to_running( $data_dir, $task_obj ); try { - $self->lock_file->remove; + $self->lock_file->remove; }; } @@ -97,39 +97,30 @@ sub update_json_task { } } - # my $task_obj = $self->get_from_running($data_dir); - my $task_obj = {}; - - $task_obj->{exit_time} = $self->table_data->{exit_time}; - $task_obj->{duration} = $self->table_data->{duration}; - $task_obj->{exit_code} = $self->table_data->{exitcode}; - $task_obj->{task_tags} = $tags; - $task_obj->{cmdpid} = $self->table_data->{cmdpid}; - $task_obj->{start_time} = $self->table_data->{start_time}; - $task_obj->{task_id} = $self->table_data->{task_id}; - - # $task_obj->{memory_profile} = {}; - # - # foreach my $stat (@stats) { - # $task_obj->{memory_profile}->{$stat}->{low} = - # $self->task_mem_data->{low}->{$stat}; - # $task_obj->{memory_profile}->{$stat}->{high} = - # $self->task_mem_data->{high}->{$stat}; - # $task_obj->{memory_profile}->{$stat}->{mean} = - # $self->task_mem_data->{mean}->{$stat}; - # $task_obj->{memory_profile}->{$stat}->{count} = - # $self->task_mem_data->{count}->{$stat}; - # } + my $task_obj = $self->get_from_running($data_dir); + + # my $task_obj = {}; + + $task_obj->{exit_time} = $self->table_data->{exit_time}; + $task_obj->{duration} = $self->table_data->{duration}; + $task_obj->{exit_code} = $self->table_data->{exitcode}; + $task_obj->{task_tags} = $tags; + $task_obj->{cmdpid} = $self->table_data->{cmdpid}; + $task_obj->{start_time} = $self->table_data->{start_time}; + $task_obj->{task_id} = $self->table_data->{task_id}; + $task_obj->{scheduler_id} = $self->table_data->{scheduler_id} + if $self->table_data->{scheduler_id}; + $task_obj->{hostname} = $self->hostname; if ( !$self->no_log_json ) { $self->check_lock; $self->write_lock; - $self->remove_from_running($data_dir); + # $self->remove_from_running($data_dir); ##TODO Add in mem for job $self->add_to_complete( $data_dir, $task_obj ); try { - $self->lock_file->remove; + $self->lock_file->remove; }; } @@ -143,7 +134,8 @@ sub add_to_complete { my $data_dir = shift; my $task_data = shift; - my $c_file = File::Spec->catfile( $data_dir, 'complete.json' ); + my $pad = sprintf "%.4d", $self->counter; + my $c_file = File::Spec->catfile( $data_dir, $pad . '.json' ); my $json_obj = $self->read_json($c_file); @@ -161,7 +153,7 @@ sub create_task_file { my $json_obj = shift; my $t_file = File::Spec->catfile( $data_dir, $self->counter . '.json' ); - $self->write_json( $t_file, $json_obj, ); + $self->write_json( $t_file, $json_obj, ); } sub add_to_running { @@ -169,9 +161,10 @@ sub add_to_running { my $data_dir = shift; my $task_data = shift; - my $r_file = File::Spec->catfile( $data_dir, 'running.json' ); + my $pad = sprintf "%.4d", $self->counter; + my $r_file = File::Spec->catfile( $data_dir, $pad . '.json' ); - my $json_obj = $self->read_json( $r_file, ); + my $json_obj = $self->read_json( $r_file, ); $json_obj->{ $self->counter } = $task_data; $self->write_json( $r_file, $json_obj, ); @@ -183,7 +176,7 @@ sub remove_from_running { my $r_file = File::Spec->catfile( $data_dir, 'running.json' ); - my $json_obj = $self->read_json( $r_file, ); + my $json_obj = $self->read_json( $r_file, ); delete $json_obj->{ $self->table_data->{task_id} }; $self->write_json( $r_file, $json_obj, ); @@ -193,8 +186,8 @@ sub get_from_running { my $self = shift; my $data_dir = shift; - my $r_file = File::Spec->catfile( $data_dir, 'running.json' ); - my $json_obj = $self->read_json( $r_file, ); + my $r_file = File::Spec->catfile( $data_dir, 'running.json' ); + my $json_obj = $self->read_json( $r_file, ); return $json_obj->{ $self->table_data->{task_id} }; } @@ -231,7 +224,7 @@ sub write_json { $json_text = ''; }; - write_file($file, $json_text); + write_file( $file, $json_text ); } 1; diff --git a/lib/HPC/Runner/Command/execute_job/Utils/MCE.pm b/lib/HPC/Runner/Command/execute_job/Utils/MCE.pm index 4d2ebbb..4bbae61 100644 --- a/lib/HPC/Runner/Command/execute_job/Utils/MCE.pm +++ b/lib/HPC/Runner/Command/execute_job/Utils/MCE.pm @@ -45,10 +45,22 @@ has 'read_command' => ( lazy => 1, default => sub { my $self = shift; + if ( $self->can('task_id') && !defined $self->task_id ) { + $self->single_node(1); + return; + } + if ( $self->can('batch_index_start') + && !defined $self->batch_index_start ) + { + $self->single_node(1); + return; + } if ( $self->can('task_id') && $self->can('batch_index_start') ) { return $self->task_id - $self->batch_index_start; } - elsif ( $self->can('batch_index_start') ) { + elsif ( $self->can('batch_index_start') + && defined $self->batch_index_start ) + { return $self->batch_index_start - 1; } else { @@ -167,6 +179,10 @@ sub run_mce { $self->append_logfile(".log"); $self->log( $self->init_log ); + if($self->single_node){ + print "Logging to ".$self->logfile."\n"; + } + $self->mce->spawn; #MCE specific @@ -220,14 +236,7 @@ sub parse_file_mce { "Error opening file " . $self->infile . " " . $! ); die print "The infile does not exist!\n" unless $fh; - if ( $self->single_node ) { - $self->log_main_messages( 'info', 'Running in single node mode' ); - while (<$fh>) { - my $line = $_; - $self->process_lines($line); - } - } - elsif ( defined $self->read_command ) { + if ( defined $self->read_command ) { $self->log_main_messages( 'info', 'Executing Command # ' . $self->read_command ); my $cmds = $self->parse_cmd_file($fh); @@ -237,6 +246,13 @@ sub parse_file_mce { $self->wait(0); } } + elsif ( $self->single_node ) { + $self->log_main_messages( 'info', 'Running in single node mode' ); + while (<$fh>) { + my $line = $_; + $self->process_lines($line); + } + } else { $self->log_main_messages( 'fatal', 'No running mode found. Exiting' ); exit 1; @@ -265,21 +281,22 @@ sub parse_cmd_file { my $line = $_; next unless $line; next unless $line =~ m/\S/; + next if $line =~ /^\s*$/ ; $cmd .= $line; next if $line =~ m/\\$/; next if $line =~ m/^#/; if ( $x == $self->read_command && $cmd_count < $self->commands ) { - $add_cmds = 1; + $add_cmds = 1; } if ($add_cmds) { - push( @cmds, $cmd ); - $cmd_count++; + push( @cmds, $cmd ); + $cmd_count++; } $x++; if ( $x >= $self->read_command && $cmd_count >= $self->commands ) { - last; + last; } $cmd = ''; } @@ -290,92 +307,108 @@ sub parse_cmd_file { ##TODO separate out single node mode sub process_lines { - my $self = shift; - my $line = shift; - - if ( $line =~ m/^#TASK/ ) { - $self->add_cmd($line); - } - - $self->check_single_node($line) if $self->single_node; - - return if $line =~ m/^#/; - $self->add_cmd($line); - - ##Bash style we continue to the next lime if the current line ends in \ - return if $line =~ m/\\$/; - if ( $self->match_cmd(qr/^wait$/) ) { - $self->hold_pool; - } - else { - $self->add_pool; - } + my $self = shift; + my $line = shift; + + return unless $line; + if ( $line =~ m/^#TASK/ ) { + $self->add_cmd($line); + } + + $self->check_single_node($line) if $self->single_node; + + return if $line =~ m/^#/; + $self->add_cmd($line); + + ##Bash style we continue to the next lime if the current line ends in \ + return if $line =~ m/\\$/; + return unless $self->has_cmd; + if ( $self->match_cmd(qr/^wait$/) ) { + $self->hold_pool; + } + else { + $self->add_pool; + } } sub check_single_node { - my $self = shift; - my $line = shift; - - if ( $line =~ m/^#HPC jobname=/ ) { - $self->hold_pool; - $self->_clear_mce; - my ( $t1, $t2 ) = parse_meta($line); - $self->jobname($t2); - ##Trigger outdir - $self->logname($t2); - $self->logfile( $self->set_logfile ); - $self->logdir( $self->set_logdir ); - } - if ( $line =~ m/^#HPC procs=/ ) { - $self->hold_pool; - $self->_clear_mce; - my ( $t1, $t2 ) = parse_meta($line); - $self->procs($t2); - $self->hold_pool; - } + my $self = shift; + my $line = shift; + + if ( $line =~ m/^#HPC jobname=/ ) { + $self->hold_pool; + $self->_clear_mce; + my ( $t1, $t2 ) = parse_meta($line); + $self->jobname($t2); + print "Starting jobname $t2\n"; + ##Trigger outdir + $self->logname($t2); + $self->logfile( $self->set_logfile ); + $self->logdir( $self->set_logdir ); + } + if ( $line =~ m/^#HPC procs=/ ) { + $self->hold_pool; + $self->_clear_mce; + my ( $t1, $t2 ) = parse_meta($line); + $self->procs($t2); + $self->hold_pool; + } } sub add_pool { - my $self = shift; - $self->log_main_messages( 'debug', "Enqueuing command:\n\t" . $self->cmd ); + my $self = shift; - ##Task ID is the counter for the array - $self->task_id( $self->counter ) if $self->can('task_id'); + return unless $self->has_cmd; - $self->queue->enqueue( $self->counter, $self->cmd ); - $self->clear_cmd; - $self->inc_counter; + my $cmd = $self->cmd; + if ( $cmd =~ /^\s*$/ ) { + $self->clear_cmd; + return; + } + + $self->log_main_messages( 'debug', + "Enqueuing command:\n\t" . $self->cmd ); + + ##Task ID is the counter for the array + $self->task_id( $self->counter ) if $self->can('task_id'); + + $self->queue->enqueue( $self->counter, $self->cmd ); + $self->clear_cmd; + $self->inc_counter; } sub hold_pool { - my $self = shift; + my $self = shift; - $self->log_main_messages( 'debug', "Beginning command:\n\t" . $self->cmd ) - if $self->has_cmd; - $self->log_main_messages( 'debug', - 'Waiting for all threads to complete...' ) - if $self->has_cmd; + $self->log_main_messages( 'debug', "Beginning command:\n\t" . $self->cmd ) + if $self->has_cmd; + $self->log_main_messages( 'debug', + 'Waiting for all threads to complete...' ) + if $self->has_cmd; - $self->wait(1); - push( @{ $self->jobref }, [] ); - $self->queue->enqueue( (undef) x ( $self->procs * 2 ) ); - $self->mce->run(0); # 0 indicates do not shutdown after running + $self->wait(1); + push( @{ $self->jobref }, [] ); + $self->queue->enqueue( (undef) x ( $self->procs * 2 ) ); + $self->mce->run(0); # 0 indicates do not shutdown after running - $self->log_main_messages( 'debug', - 'All children have completed processing!' ); - $self->clear_cmd; + ##Don't need this for first message + $self->log_main_messages( 'debug', + 'All children have completed processing!' ) + if $self->has_cmd; + + $self->clear_cmd; } memoize('parse_meta'); sub parse_meta { - my $line = shift; - my ( @match, $t1, $t2 ); + my $line = shift; + my ( @match, $t1, $t2 ); - @match = $line =~ m/ (\w+)=(.+)$/; - ( $t1, $t2 ) = ( $match[0], $match[1] ); + @match = $line =~ m/ (\w+)=(.+)$/; + ( $t1, $t2 ) = ( $match[0], $match[1] ); - return ( $t1, $2 ); + return ( $t1, $2 ); } =head3 run_command_mce @@ -385,16 +418,16 @@ MCE knows which subcommand to use from Runner/MCE - object mce =cut sub run_command_mce { - my $self = shift; + my $self = shift; - my $pid = $$; + my $pid = $$; - #$DB::single = 2; + #$DB::single = 2; - push( @{ $self->jobref->[-1] }, $pid ); - $self->_log_commands($pid); + push( @{ $self->jobref->[-1] }, $pid ); + $self->_log_commands($pid); - return; + return; } =head1 AUTHOR diff --git a/lib/HPC/Runner/Command/stats/Logger/JSON/Long.pm b/lib/HPC/Runner/Command/stats/Logger/JSON/Long.pm index 5a91a40..efae025 100644 --- a/lib/HPC/Runner/Command/stats/Logger/JSON/Long.pm +++ b/lib/HPC/Runner/Command/stats/Logger/JSON/Long.pm @@ -3,7 +3,10 @@ package HPC::Runner::Command::stats::Logger::JSON::Long; use Moose::Role; use namespace::autoclean; +with 'HPC::Runner::Command::stats::Logger::JSON::Utils'; + use JSON; +use File::Glob; use File::Slurp; sub get_tasks { @@ -11,33 +14,15 @@ sub get_tasks { my $submission_id = shift; my $jobname = shift; - ##Get the running tasks - my $running_file = - File::Spec->catdir( $self->data_dir, $jobname, 'running.json' ); - - my $running = {}; - if ( -e $running_file ) { - my $running_json = read_file($running_file); - $running = decode_json($running_json); - } - - my $complete = {}; - my $complete_file = - File::Spec->catdir( $self->data_dir, $jobname, 'complete.json' ); - if ( -e $complete_file ) { - my $complete_json = read_file($complete_file); - $complete = decode_json($complete_json); - } + my $running = $self->read_json_files($submission_id, $jobname); my $total_tasks = []; - foreach ( sort { $a <=> $b } keys(%{$running}) ) { - push(@{$total_tasks}, $running->{$_}); - } - foreach ( sort { $a <=> $b } keys(%{$complete}) ) { - push(@{$total_tasks}, $complete->{$_}); + foreach ( sort { $a <=> $b } keys( %{$running} ) ) { + push( @{$total_tasks}, $running->{$_} ); } return $total_tasks; } + 1; diff --git a/lib/HPC/Runner/Command/stats/Logger/JSON/Summary.pm b/lib/HPC/Runner/Command/stats/Logger/JSON/Summary.pm index 86b6ff6..475eed0 100644 --- a/lib/HPC/Runner/Command/stats/Logger/JSON/Summary.pm +++ b/lib/HPC/Runner/Command/stats/Logger/JSON/Summary.pm @@ -3,6 +3,8 @@ package HPC::Runner::Command::stats::Logger::JSON::Summary; use Moose::Role; use namespace::autoclean; +with 'HPC::Runner::Command::stats::Logger::JSON::Utils'; + use JSON; use Try::Tiny; use File::Slurp; @@ -13,9 +15,11 @@ sub iter_tasks_summary { my $submission_id = shift; my $jobname = shift; - my $running = $self->count_running_tasks( $submission_id, $jobname ); - my $success = $self->count_successful_tasks( $submission_id, $jobname ); - my $fail = $self->count_failed_tasks( $submission_id, $jobname ); + my $tasks = $self->read_json_files( $submission_id, $jobname ); + my $running = + $self->count_running_tasks( $submission_id, $jobname, $tasks ); + my $success = $self->count_successful_tasks( $submission_id, $jobname, $tasks ); + my $fail = $self->count_failed_tasks( $submission_id, $jobname, $tasks ); my $complete = $success + $fail; $self->task_data->{$jobname} = { @@ -30,82 +34,73 @@ sub count_running_tasks { my $self = shift; my $submission_id = shift; my $jobname = shift; + my $tasks = shift; - my $running_file = - File::Spec->catdir( $self->data_dir, $jobname, 'running.json' ); + my @task_ids = keys %{$tasks}; + my $running = 0; - if ( -e $running_file ) { - my $running_json = read_file($running_file); - ##TODO Add in some error checking - my $running; - try { - $running = decode_json($running_json); - } - catch { - $running = {}; - }; - my @keys = keys %{$running}; - return scalar @keys; - } - else { - return 0; + foreach my $task_id (@task_ids) { + my $task = $tasks->{$task_id}; + $running++ unless exists $task->{exit_code}; } + + return $running; } sub get_running_tasks { my $self = shift; my $submission_id = shift; my $jobname = shift; + my $tasks = shift; - my $running_file = - File::Spec->catdir( $self->data_dir, $jobname, 'running.json' ); + my @task_ids = keys %{$tasks}; + my $running = {}; - if ( -e $running_file ) { - my $running_json = read_file($running_file); - ##TODO Add in some error checking - my $running = decode_json($running_json); - return $running; - } - else { - return {}; + foreach my $task_id (@task_ids) { + my $task = $tasks->{$task_id}; + if ( !exists $task->{exit_code} ) { + $running->{$task_id} = $task; + } } + return $running; } sub get_completed_tasks { my $self = shift; my $submission_id = shift; my $jobname = shift; + my $tasks = shift; - my $complete_file = - File::Spec->catdir( $self->data_dir, $jobname, 'complete.json' ); + my @task_ids = keys %{$tasks}; + my $complete = {}; - if ( -e $complete_file ) { - my $complete_json = read_file($complete_file); - ##TODO Add in some error checking - my $complete = decode_json($complete_json); - return $complete; - } - else { - return {}; + foreach my $task_id (@task_ids) { + my $task = $tasks->{$task_id}; + if ( exists $task->{exit_code} ) { + $complete->{$task_id} = $task; + } } + return $complete; } sub count_successful_tasks { my $self = shift; my $submission_id = shift; my $jobname = shift; + my $tasks = shift; - return $self->search_complete( $jobname, 1 ); + return $self->search_complete( $tasks, $jobname, 1 ); } sub count_failed_tasks { my $self = shift; my $submission_id = shift; my $jobname = shift; + my $tasks = shift; - return $self->search_complete( $jobname, 0 ); + return $self->search_complete( $tasks, $jobname, 0 ); } =head3 search_complete @@ -116,27 +111,12 @@ See which jobs completed successfully sub search_complete { my $self = shift; + my $tasks = shift; my $jobname = shift; my $success = shift; - my $complete_file = - File::Spec->catdir( $self->data_dir, $jobname, 'complete.json' ); - - if ( -e $complete_file ) { - my $complete_json = read_file($complete_file); - my $complete; - try { - $complete = decode_json($complete_json); - } - catch { - $complete = {}; - }; - ##TODO Add in some error checking - return $self->look_for_exit_code( $complete, $success ); - } - else { - return 0; - } + return 0 unless $tasks; + return $self->look_for_exit_code( $tasks, $success ); } sub look_for_exit_code { diff --git a/lib/HPC/Runner/Command/stats/Logger/JSON/Utils.pm b/lib/HPC/Runner/Command/stats/Logger/JSON/Utils.pm new file mode 100644 index 0000000..7236105 --- /dev/null +++ b/lib/HPC/Runner/Command/stats/Logger/JSON/Utils.pm @@ -0,0 +1,30 @@ +package HPC::Runner::Command::stats::Logger::JSON::Utils; + +use Moose::Role; +use namespace::autoclean; + +use JSON; +use Try::Tiny; +use File::Slurp; + +sub read_json_files { + my $self = shift; + my $submission_id = shift; + my $jobname = shift; + + my @json_files = + glob( File::Spec->catdir( $self->data_dir, $jobname, '*json' ) ); + + my $running = {}; + foreach my $file (@json_files) { + my $running_json = read_file($file); + my $trun = decode_json($running_json); + foreach my $key ( keys %{$trun} ) { + $running->{$key} = $trun->{$key}; + } + } + + return $running; +} + +1; diff --git a/lib/HPC/Runner/Command/submit_jobs/Logger/JSON.pm b/lib/HPC/Runner/Command/submit_jobs/Logger/JSON.pm index 354e842..e0bdfa3 100644 --- a/lib/HPC/Runner/Command/submit_jobs/Logger/JSON.pm +++ b/lib/HPC/Runner/Command/submit_jobs/Logger/JSON.pm @@ -47,10 +47,10 @@ sub update_json_submission { my $hpc_meta = $self->gen_hpc_meta; my $json_text = encode_json $hpc_meta; - my $file_name = File::Spec->catdir( $self->logdir, 'submission.json' ); + # my $file_name = File::Spec->catdir( $self->logdir, 'submission.json' ); $self->_make_the_dirs( $self->logdir ); - write_file($file_name, $json_text); + # write_file($file_name, $json_text); write_file(File::Spec->catdir($self->data_dir, 'submission.json'), $json_text); return $hpc_meta; diff --git a/lib/HPC/Runner/Command/submit_jobs/Plugin/Dummy.pm b/lib/HPC/Runner/Command/submit_jobs/Plugin/Dummy.pm index 9a25cae..9966824 100644 --- a/lib/HPC/Runner/Command/submit_jobs/Plugin/Dummy.pm +++ b/lib/HPC/Runner/Command/submit_jobs/Plugin/Dummy.pm @@ -148,7 +148,7 @@ sub update_job_deps { return unless $self->has_array_deps; - my $array_deps_file = File::Spec->catdir( $self->logdir, 'array_deps.txt' ); + my $array_deps_file = File::Spec->catdir( $self->logdir, 'array_deps.tsv' ); foreach my $current_task ( sort keys %{ $self->array_deps } ) { my $v = $self->array_deps->{$current_task}; diff --git a/lib/HPC/Runner/Command/submit_jobs/Plugin/Slurm.pm b/lib/HPC/Runner/Command/submit_jobs/Plugin/Slurm.pm index 1f4d82c..07c1d1e 100644 --- a/lib/HPC/Runner/Command/submit_jobs/Plugin/Slurm.pm +++ b/lib/HPC/Runner/Command/submit_jobs/Plugin/Slurm.pm @@ -80,6 +80,7 @@ sub update_job_deps { my $array_deps_file = File::Spec->catdir( $self->logdir, 'array_deps.tsv' ); my $array_log_file = File::Spec->catdir( $self->logdir, 'array_deps.log' ); + ##TODO Add TaskID, TaskTags, Task Tags while ( my ( $current_task, $v ) = each %{ $self->array_deps } ) { my $dep_tasks = join( ':', @{$v} ); my $cmd = @@ -95,7 +96,7 @@ sub update_job_deps { my $info = "Task Deps:\t" . $current_task . "\t" - . $dep_tasks . "\n" + . $dep_tasks . "\t" . "ExitCode: $exitcode\n"; $info .= "Stderr: $stderr\n" if $stderr; $info .= "Stdout: $stdout\n" if $stdout; diff --git a/lib/HPC/Runner/Command/submit_jobs/Utils/Log.pm b/lib/HPC/Runner/Command/submit_jobs/Utils/Log.pm index 1f52bd8..236d113 100644 --- a/lib/HPC/Runner/Command/submit_jobs/Utils/Log.pm +++ b/lib/HPC/Runner/Command/submit_jobs/Utils/Log.pm @@ -61,12 +61,12 @@ sub summarize_jobs { my $x = 0; my @rows = (); - #SIGHS #cmd_start is zero indexes #But batches are 1 indexes #WHY DO I DO THIS TO MYSELF foreach my $job ( $self->all_schedules ) { + $DB::single = 2; my $cmd_start = $self->jobs->{$job}->{cmd_start}; my $commands_per_node = $self->jobs->{$job}->commands_per_node; @@ -81,13 +81,13 @@ sub summarize_jobs { my $batch_index_end = $batch_indexes->{batch_index_end} - 1; my $start_array = - $self->jobs->{$job}->batches->[$batch_index_start]->{cmd_start} + - $cmd_start; + $cmd_start + + $self->jobs->{$job}->batches->[$batch_index_start]->{cmd_start}; my $end_array = + $cmd_start + $self->jobs->{$job}->batches->[$batch_index_end]->{cmd_start} + - $cmd_start - 1 + - $self->jobs->{$job}->commands_per_node; + $self->jobs->{$job}->batches->[$batch_index_end]->{cmd_count} - 1; my $len = $end_array - $start_array + 1; @@ -112,27 +112,32 @@ sub write_job_project_table { my $self = shift; my $job_file = File::Spec->catdir( $self->logdir, 'project_job_table.md' ); - write_file( $job_file, '| Job | Status | Notes |'."\n" ); + write_file( $job_file, '| Job | Status | Notes |' . "\n" ); foreach my $job ( $self->all_schedules ) { - write_file($job_file, {append => 1}, '| '.$job.' | | |'."\n"); + write_file( $job_file, { append => 1 }, '| ' . $job . ' | | |' . "\n" ); } - $self->app_log->info('Project table per job: '.$job_file); + $self->app_log->info( 'Project table per job: ' . $job_file ); } sub write_task_project_table { my $self = shift; - my $task_file = File::Spec->catdir( $self->logdir, 'project_task_table.md' ); - write_file( $task_file, '| Job | TaskID | Status | Notes |'."\n" ); + my $task_file = + File::Spec->catdir( $self->logdir, 'project_task_table.md' ); + write_file( $task_file, '| Job | TaskID | Status | Notes |' . "\n" ); foreach my $job ( $self->all_schedules ) { - my $cmd_start = $self->jobs->{$job}->{cmd_start} + 1; - my $cmd_end = $self->jobs->{$job}->cmd_counter + $cmd_start - 1; - for(my $x=$cmd_start; $x<=$cmd_end; $x++){ - write_file($task_file, {append => 1}, '| '.$job.' | '.$x.' | | |'."\n"); + my $cmd_start = $self->jobs->{$job}->{cmd_start} + 1; + my $cmd_end = $self->jobs->{$job}->cmd_counter + $cmd_start - 1; + for ( my $x = $cmd_start ; $x <= $cmd_end ; $x++ ) { + write_file( + $task_file, + { append => 1 }, + '| ' . $job . ' | ' . $x . ' | | |' . "\n" + ); } } - $self->app_log->info('Project table per task: '.$task_file); + $self->app_log->info( 'Project table per task: ' . $task_file ); } 1; diff --git a/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/Batch.pm b/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/Batch.pm index 1d42308..e47364e 100644 --- a/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/Batch.pm +++ b/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/Batch.pm @@ -3,38 +3,6 @@ package HPC::Runner::Command::submit_jobs::Utils::Scheduler::Batch; use Moose; use Moose::Util::TypeConstraints; -#cmd_count = Int -#job_deps = ArrayRef -#batch_str = Str -#job = Str -#cmds = ArrayRef -#batch_tags = ArrayRef -#scheduler_index = Hashref[ArrayRef] -#array_deps = ArrayRef <- Do I need this? - -#TODO batch_tags is going to be batch_tags and batch_dep_tags - -#Begin Example -#my $href = { -#'cmds' => [ -#'#TASK tags=Sample1 -##TASK deps=Sample1 -#blastx -db env_nr -query Sample1 -#' -#], -#'cmd_count' => '1', -#'job_deps' => ['pyfasta'], -#'batch_tags' => ['Sample1'], -#'batch_str' => '#TASK tags=Sample1 -##TASK deps=Sample1 -#blastx -db env_nr -query Sample1 -#', -#'job' => 'blastx_scratch', -#'scheduler_index' => { 'pyfasta' => ['0'], }, -#'array_deps' => [ [ '1237_7', '1234_1' ], ], -#}; -#End Example - has batch_tags => ( traits => ['Array'], is => 'rw', diff --git a/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/ResolveDeps.pm b/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/ResolveDeps.pm index 1d06048..bc96321 100644 --- a/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/ResolveDeps.pm +++ b/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/ResolveDeps.pm @@ -104,7 +104,7 @@ Run a sanity check on the schedule. All the job deps should have existing job na sub sanity_check_schedule { my $self = shift; - $DB::single = 2; + # $DB::single = 2; my @jobnames = keys %{ $self->graph_job_deps }; @jobnames = sort(@jobnames); @@ -117,7 +117,7 @@ sub sanity_check_schedule { #Search the dependencies for matching jobs foreach my $job (@jobnames) { - $DB::single = 2; + # $DB::single = 2; my $row = []; my $ref = $self->graph_job_deps->{$job}; push( @$row, $job ); @@ -127,7 +127,7 @@ sub sanity_check_schedule { #TODO This should be a proper error foreach my $r (@$ref) { - $DB::single = 2; + # $DB::single = 2; if ( !exists $self->graph_job_deps->{$r} ) { $ref->[$y] = "**$r**"; @@ -229,6 +229,11 @@ sub chunk_commands_jobs { my @cmds = @{ $self->parse_cmd_file }; + if($commands_per_node > scalar @cmds){ + $commands_per_node = scalar @cmds; + $self->jobs->{$self->current_job}->commands_per_node($commands_per_node); + } + my $iter = natatime $commands_per_node, @cmds; $self->assign_batches($iter); diff --git a/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/UseArrays.pm b/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/UseArrays.pm index 7e626e3..257d420 100644 --- a/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/UseArrays.pm +++ b/lib/HPC/Runner/Command/submit_jobs/Utils/Scheduler/UseArrays.pm @@ -54,8 +54,9 @@ sub gen_array_str { ->{cmd_start} + $self->jobs->{ $self->current_job }->{cmd_start}; my $end_array = - $self->jobs->{ $self->current_job }->{batches}->[$batch_index_end] - ->{cmd_start} + $self->jobs->{ $self->current_job }->{cmd_start}; + $self->jobs->{$self->current_job}->batches->[$batch_index_end]->{cmd_start} + + $self->jobs->{$self->current_job}->{cmd_start} - 1 + + $self->jobs->{$self->current_job}->commands_per_node; my $array_str = $start_array . "-" diff --git a/t/lib/TestsFor/HPC/Runner/Command/Test014.pm b/t/lib/TestsFor/HPC/Runner/Command/Test014.pm index 8218e55..80e60e1 100644 --- a/t/lib/TestsFor/HPC/Runner/Command/Test014.pm +++ b/t/lib/TestsFor/HPC/Runner/Command/Test014.pm @@ -180,18 +180,9 @@ sub test_005 : Tags(use_batches) { $test->_log_commands; my $complete_file = - File::Spec->catdir( $test->data_dir, 'job', 'complete.json' ); - my $running_file = - File::Spec->catdir( $test->data_dir, 'job', 'running.json' ); + File::Spec->catdir( $test->data_dir, 'job', '0001.json' ); ok( -e $complete_file ); - ok( -e $running_file ); - - # $test->lock_file->touchpath; - # my $ret = $test->check_lock; - # is( $ret, 0, 'Lock file exists and should not be removed' ); - - # diag($test->archive->get_content($complete_file)); ok(1); chdir($Bin); diff --git a/t/test_class_tests.t b/t/test_class_tests.t index 41061c1..63bd5d5 100644 --- a/t/test_class_tests.t +++ b/t/test_class_tests.t @@ -4,6 +4,8 @@ use Test::Class::Moose::Load catdir( $Bin, 'lib' ); use Test::Class::Moose::Runner; ##Tests fail on travis without this use IO::Interactive; +use Test::Exception 0.43; + ##Run the main applications tests if ( $ENV{'TRAVIS'} || $ENV{'DEV'} ) {