Skip to content

Commit

Permalink
improved remote glob (to cut out unnecessary file operations)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisjsewell committed Sep 23, 2017
1 parent 4c5ea7e commit 3d6e067
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 4 deletions.
2 changes: 1 addition & 1 deletion atomic_hpc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from atomic_hpc import config_yaml, deploy_runs, mockssh, context_folder, utils

__version__ = "0.1.6"
__version__ = "0.1.7"

6 changes: 5 additions & 1 deletion atomic_hpc/config_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,14 @@ def format_config_yaml(file_obj, errormsg_only=False):

ryaml = YAML()
dct = ryaml.load(file_obj)

logger.info("validating & formatting config: {}".format(file_obj))

validate(dct, _config_schema)
runs = []
defaults = edict.merge([_global_defaults, dct.get('defaults', {})], overwrite=True)


for i, run in enumerate(dct['runs']):

new_run = edict.merge([defaults, run], overwrite=True)
Expand Down Expand Up @@ -250,7 +254,7 @@ def renumber_config_yaml(in_file_obj, out_file_obj, start_num=1):
ryaml = YAML()
config = ryaml.load(in_file_obj)
validate(config, _config_schema)
for i, run in enumerate(config["runs"]):
for i, _ in enumerate(config["runs"]):
config["runs"][i]["id"] = i+start_num

logger.info("outputting renumbered config to: {}".format(out_file_obj))
Expand Down
21 changes: 19 additions & 2 deletions atomic_hpc/context_folder/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,14 +194,29 @@ def glob(self, pattern):
-------
"""
logger.debug("yielding files for pattern: {}".format(pattern))

if pattern.startswith(".."):
raise IOError("cannot go outside folder context")

def walk_func(apath):
return walk_path(apath, listdir=self._sftp.listdir, isfile=self.isfile, isfolder=self.isdir)

for path in glob_path("", pattern, walk_func):
yield path
# can be time consuming to walk through paths, so don't start from root if possible
# TODO this may be able to be written better (or moved to glob_path)
init_path = []
for pattern_piece in splitall(pattern):
if any(c in pattern_piece for c in ('*', '?', "[", "]")):
break
init_path.append(pattern_piece)
if init_path and init_path[0] == ".":
init_path = init_path[1:]
init_path = os.path.join(*init_path) if init_path else ""
if self.exists(init_path):
for path in glob_path(init_path, pattern, walk_func):
yield path

logger.debug("finished yielding files for pattern: {}".format(pattern))

@renew_connection
def rmtree(self, path):
Expand Down Expand Up @@ -332,8 +347,10 @@ def copy_to(self, path, target):
targetchild = target.joinpath(os.path.basename(path))
if self.isfile(path):
targetchild.touch()
#logger.debug("started copying file")
with targetchild.open("wb") as file_obj:
self._sftp.getfo(path, file_obj)
#logger.debug("finished copying file")
else:
targetchild.mkdir()
for childpath in self.glob(os.path.join(path, "*")):
Expand Down
2 changes: 2 additions & 0 deletions atomic_hpc/deploy_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,8 @@ def retrieve_outputs(runs, local_path, root_path, if_exists="abort"):
logger.info("copying {0} to {1}".format(outname, local_path))
for pname in folder.glob(os.path.join(outname, "*")):
folder.copy_to(pname, local_path.joinpath(outname))

logger.info("finished copying {0} to {1}".format(outname, local_path))

if failed_runs:
raise RuntimeError("The following runs did not complete: \n{}".format("\n".join(failed_runs)))

0 comments on commit 3d6e067

Please sign in to comment.