Skip to content

Commit

Permalink
Merge pull request #276 from argonne-lcf/polaris
Browse files Browse the repository at this point in the history
Update filesystems options for ALCF systems
  • Loading branch information
tomuram authored Oct 26, 2022
2 parents 1f8405e + 93ff90b commit 2b63d83
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 48 deletions.
2 changes: 1 addition & 1 deletion balsam/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from balsam.util import config_root_logger

__version__ = "0.7.0.a17"
__version__ = "0.7.0.a18"
config_root_logger()
10 changes: 5 additions & 5 deletions balsam/_api/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file was auto-generated via /Users/misha/workflow/balsam/.venv/bin/python balsam/schemas/api_generator.py
# [git rev 2e64e31]
# This file was auto-generated via /Users/turam/opt/miniconda3/bin/python balsam/schemas/api_generator.py
# [git rev 8578c92]
# Do *not* make changes to the API by changing this file!

import datetime
Expand Down Expand Up @@ -765,7 +765,7 @@ class BatchJob(balsam._api.bases.BatchJobBase):
job_mode = Field[balsam.schemas.batchjob.JobMode]()
optional_params = Field[typing.Dict[str, str]]()
filter_tags = Field[typing.Dict[str, str]]()
partitions = Field[typing.Optional[typing.List[balsam.schemas.batchjob.BatchJobPartition]]]()
partitions = Field[Optional[typing.Union[typing.List[balsam.schemas.batchjob.BatchJobPartition], None]]]()
site_id = Field[int]()
project = Field[str]()
queue = Field[str]()
Expand All @@ -786,7 +786,7 @@ def __init__(
queue: str,
optional_params: Optional[typing.Dict[str, str]] = None,
filter_tags: Optional[typing.Dict[str, str]] = None,
partitions: Optional[typing.Optional[typing.List[balsam.schemas.batchjob.BatchJobPartition]]] = None,
partitions: Optional[typing.Union[typing.List[balsam.schemas.batchjob.BatchJobPartition], None]] = None,
**kwargs: Any,
) -> None:
"""
Expand Down Expand Up @@ -918,7 +918,7 @@ def create(
queue: str,
optional_params: Optional[typing.Dict[str, str]] = None,
filter_tags: Optional[typing.Dict[str, str]] = None,
partitions: Optional[typing.Optional[typing.List[balsam.schemas.batchjob.BatchJobPartition]]] = None,
partitions: Optional[typing.Union[typing.List[balsam.schemas.batchjob.BatchJobPartition], None]] = None,
) -> BatchJob:
"""
Create a new BatchJob object and save it to the API in one step.
Expand Down
1 change: 1 addition & 0 deletions balsam/cmdline/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def count_by_state(job_qs: "JobQuery", verbose: bool) -> None:
def list_verbose(job_qs: "JobQuery") -> None:
for job in job_qs:
click.echo(yaml.dump(job.display_dict(), sort_keys=False, indent=4))
click.echo(f"deserialized parameters: {str(job.get_parameters())}")
click.echo("---\n")


Expand Down
2 changes: 1 addition & 1 deletion balsam/config/defaults/alcf_cooley/job-template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#COBALT -n {{ num_nodes }}
#COBALT -q {{ queue }}
#COBALT -t {{ wall_time_min }}
#COBALT --attrs pubnet
#COBALT --attrs pubnet:filesystems=home,grand,eagle,theta-fs0

# Uncomment this if the server is on an external network
# (Note that https_proxy is set to use an `http://` protocol!
Expand Down
39 changes: 0 additions & 39 deletions balsam/config/defaults/alcf_polaris/job-template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,10 @@
#PBS -l filesystems=home:grand:eagle
#PBS -A {{ project }}
#PBS -q {{ queue }}
# #COBALT --attrs pubnet=true:enable_ssh=1:{% if optional_params.get("mig_count") %}mig-mode=true{% endif %}

export http_proxy=http://proxy:3128
export https_proxy=http://proxy:3128

{% if optional_params.mig_count == "2" %}
cgi=9
mig_count=2
echo "Creating 2x MIG 3g.20gb (ID 9)"
{% elif optional_params.mig_count == "3" %}
cgi=14
mig_count=3
echo "Creating 3x MIG 2g.10gb (ID 14)"
{% elif optional_params.mig_count == "7" %}
cgi=19
mig_count=7
echo "Creating 7x MIG 1g.5gb (ID 19)"
{% else %}
mig_count=0
echo "Not using MIG"
{% endif %}

if [ "$mig_count" -gt "0" ]
then
# Create MIG Compute Instances
for i in $(seq 1 $mig_count)
do
mpirun -hostfile $COBALT_NODEFILE \
-n {{ num_nodes }} -npernode 1 \
nvidia-smi_mig -cgi "$cgi" -C
done

# Record Instance IDs in local /var/tmp:
for host in $(cat $COBALT_NODEFILE)
do
gpu_file="/var/tmp/balsam-$host-gpulist.txt"
mpirun -hostfile $COBALT_NODEFILE \
--host $host -n 1 nvidia-smi -L > $gpu_file
echo "Recorded GPU list for $host in $gpu_file"
cat $gpu_file
done
fi

#remove export PMI_NO_FORK=1
export BALSAM_SITE_PATH={{balsam_site_path}}
cd $BALSAM_SITE_PATH
Expand Down
2 changes: 1 addition & 1 deletion balsam/config/defaults/alcf_theta/job-template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#COBALT -n {{ num_nodes }}
#COBALT -q {{ queue }}
#COBALT -t {{ wall_time_min }}
#COBALT --attrs ssds=required:ssd_size=128
#COBALT --attrs ssds=required:ssd_size=128:filesystems=home,grand,eagle,theta-fs0


{% if optional_params.get("singularity_prime_cache") %}
Expand Down
2 changes: 1 addition & 1 deletion balsam/config/defaults/alcf_thetagpu/job-template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#COBALT -n {{ num_nodes }}
#COBALT -q {{ queue }}
#COBALT -t {{ wall_time_min }}
#COBALT --attrs pubnet=true:enable_ssh=1:{% if optional_params.get("mig_count") %}mig-mode=true{% endif %}
#COBALT --attrs pubnet=true:enable_ssh=1:filesystems=home,grand,eagle,theta-fs0:{% if optional_params.get("mig_count") %}mig-mode=true{% endif %}

export https_proxy=http://theta-proxy.tmi.alcf.anl.gov:3128

Expand Down

0 comments on commit 2b63d83

Please sign in to comment.