Skip to content

Commit

Permalink
[Execution] Add the ability to include other files when writing a CON…
Browse files Browse the repository at this point in the history
…D file (#78)

* Add the ability to include() other files.

This commit also improves the task parsing error messages.

* Use relative paths in error messages

* Cache included files

* Add test cases for error scenarios

* Add a test for the include() functionality

* Add documentation for include()

* Finish up the new documentation

* Fix tests for Python 3.8

* Increase test timeout to 60 s
  • Loading branch information
geoffxy authored Jul 24, 2022
1 parent ad300ff commit 83b1606
Show file tree
Hide file tree
Showing 29 changed files with 467 additions and 20 deletions.
19 changes: 17 additions & 2 deletions errors/errors.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Task parsing errors (error code 1xxx)
1000:
name: TaskParseError
message: "An unknown error occurred when parsing a COND file."
message: "An error occurred when parsing a COND file or a file that it includes: {error_details}."

1001:
name: MissingTaskParameter
Expand Down Expand Up @@ -29,7 +29,8 @@

1007:
name: TaskSyntaxError
message: "Encountered a syntax error when parsing a COND file."
message: >-
Encountered a syntax error when parsing a COND file or a file that it includes.
1008:
name: ParsingUnknownNameError
Expand Down Expand Up @@ -87,6 +88,12 @@
All arguments must be either a string, integer, floating point number, or
boolean.
1017:
name: IncludeFileInvalidExtension
message: >-
Encountered an include() of '{included_file}', which does not have a '.cond'
extension. Conductor only supports including .cond files.
# Task graph loading errors (error code 2xxx)
2001:
Expand All @@ -105,6 +112,14 @@
name: UnsupportedVersionIndexFormat
message: "Detected an unsupported version index ({version}). Please make sure that you are using the latest version of Conductor."

2005:
name: IncludeFileNotFound
message: Encountered an include() of '{included_file}'. However, that file does not exist.

2006:
name: IncludeFileNotInProject
message: Encountered an include() of '{included_file}'. However, that file is not inside the project.


# Execution errors (error code 3xxx)
3001:
Expand Down
4 changes: 2 additions & 2 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[pytest]
# Configuration option for `pytest-timeout`
# An individual unit test cannot take longer than 30 seconds.
timeout = 30
# An individual unit test cannot take longer than 60 seconds.
timeout = 60
3 changes: 3 additions & 0 deletions src/conductor/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,6 @@

# The name of the experiment arguments serialized JSON file.
EXP_ARGS_JSON_FILE_NAME = "args.json"

# COND files can only include files with this extension.
COND_INCLUDE_EXTENSION = ".cond"
5 changes: 5 additions & 0 deletions src/conductor/errors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ def add_file_context(self, file_path, line_number=None):
self.file_context = FileContext(file_path, line_number)
return self

def add_file_context_if_missing(self, file_path, line_number=None):
if self.file_context is not None:
return self
return self.add_file_context(file_path, line_number)

def add_extra_context(self, context_string):
self.extra_context = context_string
return self
Expand Down
50 changes: 46 additions & 4 deletions src/conductor/errors/generated.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ class TaskParseError(ConductorError):

def __init__(self, **kwargs):
super().__init__()

self.error_details = kwargs["error_details"]

def _message(self):
return "An unknown error occurred when parsing a COND file.".format(

return "An error occurred when parsing a COND file or a file that it includes: {error_details}.".format(
error_details=self.error_details,
)


Expand Down Expand Up @@ -111,7 +111,7 @@ def __init__(self, **kwargs):


def _message(self):
return "Encountered a syntax error when parsing a COND file.".format(
return "Encountered a syntax error when parsing a COND file or a file that it includes.".format(

)

Expand Down Expand Up @@ -241,6 +241,19 @@ def _message(self):
)


class IncludeFileInvalidExtension(ConductorError):
error_code = 1017

def __init__(self, **kwargs):
super().__init__()
self.included_file = kwargs["included_file"]

def _message(self):
return "Encountered an include() of '{included_file}', which does not have a '.cond' extension. Conductor only supports including .cond files.".format(
included_file=self.included_file,
)


class TaskNotFound(ConductorError):
error_code = 2001

Expand Down Expand Up @@ -293,6 +306,32 @@ def _message(self):
)


class IncludeFileNotFound(ConductorError):
error_code = 2005

def __init__(self, **kwargs):
super().__init__()
self.included_file = kwargs["included_file"]

def _message(self):
return "Encountered an include() of '{included_file}'. However, that file does not exist.".format(
included_file=self.included_file,
)


class IncludeFileNotInProject(ConductorError):
error_code = 2006

def __init__(self, **kwargs):
super().__init__()
self.included_file = kwargs["included_file"]

def _message(self):
return "Encountered an include() of '{included_file}'. However, that file is not inside the project.".format(
included_file=self.included_file,
)


class TaskNonZeroExit(ConductorError):
error_code = 3001

Expand Down Expand Up @@ -586,10 +625,13 @@ def _message(self):
"ExperimentGroupDuplicateName",
"ExperimentGroupInvalidExperimentInstance",
"RunArgumentsNonPrimitiveValue",
"IncludeFileInvalidExtension",
"TaskNotFound",
"MissingProjectRoot",
"CyclicDependency",
"UnsupportedVersionIndexFormat",
"IncludeFileNotFound",
"IncludeFileNotInProject",
"TaskNonZeroExit",
"TaskFailed",
"OutputDirTaken",
Expand Down
2 changes: 1 addition & 1 deletion src/conductor/parsing/task_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
class TaskIndex:
def __init__(self, project_root: pathlib.Path):
self._project_root = project_root
self._task_loader = TaskLoader()
self._task_loader = TaskLoader(project_root)
# Keyed by the relative path to the COND file
self._loaded_raw_tasks: Dict[pathlib.Path, Dict[str, Dict]] = {}
# Keyed by task identifier
Expand Down
126 changes: 115 additions & 11 deletions src/conductor/parsing/task_loader.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,87 @@
import pathlib
from typing import Any, Dict, Optional
from conductor.config import COND_INCLUDE_EXTENSION
from conductor.task_types import raw_task_types
from conductor.errors import (
ConductorError,
DuplicateTaskName,
MissingCondFile,
ParsingUnknownNameError,
TaskSyntaxError,
TaskParseError,
IncludeFileInvalidExtension,
IncludeFileNotFound,
IncludeFileNotInProject,
)
from conductor.task_types.stdlib import STDLIB_FILES


class TaskLoader:
def __init__(self):
self._tasks = None
self._current_cond_file_path = None
def __init__(self, project_root: pathlib.Path):
self._project_root = project_root
self._tasks: Optional[Dict[str, Dict]] = None
self._current_cond_file_path: Optional[pathlib.Path] = None
self._conductor_scope = self._compile_scope()
self._curr_exec_scope = None

def parse_cond_file(self, cond_file_path):
# We cache the results from evaluating `include()`s so that if a file is
# included across multiple `COND` files, we do not repeatedly evaluate
# the file. The code being included is expected to be deterministic.
#
# Key is the absolute path to the file (e.g. /home/user/path/to/file.cond).
# Value is the resulting scope object.
self._include_cache: Dict[str, Any] = {}

def parse_cond_file(self, cond_file_path: pathlib.Path):
"""
Parses all the tasks in a single COND file.
"""
tasks = {}
tasks: Dict[str, Dict] = {}
self._tasks = tasks
self._current_cond_file_path = cond_file_path
try:
with open(cond_file_path, encoding="UTF-8") as file:
code = file.read()
self._curr_exec_scope = self._conductor_scope.copy()
# pylint: disable=exec-used
exec(code, self._conductor_scope.copy())
exec(code, self._curr_exec_scope)
return tasks
except ConductorError as ex:
ex.add_file_context(file_path=cond_file_path)
ex.add_file_context_if_missing(
file_path=self._to_project_path(cond_file_path)
)
raise ex
except SyntaxError as ex:
syntax_err = TaskSyntaxError()
syntax_err.add_file_context(
file_path=cond_file_path,
file_path=self._to_project_path(cond_file_path),
line_number=ex.lineno,
)
raise syntax_err from ex
except NameError as ex:
name_err = ParsingUnknownNameError(error_message=str(ex))
name_err.add_file_context(file_path=cond_file_path)
name_err.add_file_context(file_path=self._to_project_path(cond_file_path))
raise name_err from ex
except FileNotFoundError as ex:
missing_file_err = MissingCondFile()
missing_file_err.add_file_context(file_path=cond_file_path)
missing_file_err.add_file_context(
file_path=self._to_project_path(cond_file_path)
)
raise missing_file_err from ex
except Exception as ex:
run_err = TaskParseError(error_details=str(ex))
run_err.add_file_context(file_path=self._to_project_path(cond_file_path))
raise run_err from ex
finally:
self._tasks = None
self._current_cond_file_path = None
self._curr_exec_scope = None

def _compile_scope(self):
scope = {}
scope = {
# Used to handle included files.
"include": self._run_include,
}
# Create the task constructors for Conductor's foundational task types.
for raw_task_type in raw_task_types.values():
scope[raw_task_type.name] = self._wrap_task_function(
Expand All @@ -76,3 +106,77 @@ def shim(**kwargs):
self._tasks[raw_task["name"]] = raw_task

return shim

def _run_include(self, candidate_path: str):
assert self._current_cond_file_path is not None
assert self._curr_exec_scope is not None

# 1. Validate `candidate_path`.
if not candidate_path.endswith(COND_INCLUDE_EXTENSION):
raise IncludeFileInvalidExtension(included_file=candidate_path)

# 2. Parse `candidate_path`.
if candidate_path.startswith("//"):
include_path = self._project_root.joinpath(candidate_path[2:])
else:
include_path = self._current_cond_file_path.parent.joinpath(candidate_path)
try:
include_path = include_path.resolve(strict=True)
except FileNotFoundError as ex:
raise IncludeFileNotFound(included_file=candidate_path) from ex

# 3. Make sure `include_path` is inside our project.
# If `include_path` is not relative to `self._project_root` then the
# method will raise a `ValueError`. For compatibility with Python 3.8,
# we do not use `is_relative_to()` (it is a Python 3.9+ method).
try:
include_path.relative_to(self._project_root)
except ValueError as ex:
raise IncludeFileNotInProject(included_file=candidate_path) from ex

# 4. Check if the file is in our cache. If so, just use the cached results.
if str(include_path) in self._include_cache:
self._curr_exec_scope.update(self._include_cache[str(include_path)])
return

# 5. Run the included file. We purposely use a separate scope so that
# the Conductor task symbols (e.g., run_experiment()) are not available
# in the included file.
with open(include_path, encoding="UTF-8") as file:
include_code = file.read()
scope: Dict[str, Any] = {}
try:
# pylint: disable=exec-used
exec(include_code, {}, scope)
except SyntaxError as ex:
syntax_err = TaskSyntaxError()
syntax_err.add_file_context(
file_path=self._to_project_path(include_path),
line_number=ex.lineno,
).add_extra_context(
"This error occurred while parsing a file included by {}.".format(
self._to_project_path(self._current_cond_file_path)
)
)
raise syntax_err from ex
except Exception as ex:
run_err = TaskParseError(error_details=str(ex))
run_err.add_file_context(
file_path=self._to_project_path(include_path)
).add_extra_context(
"This error occurred while parsing a file included by {}.".format(
self._to_project_path(self._current_cond_file_path)
)
)
raise run_err from ex

# 6. Update the current scope with the new symbols.
self._curr_exec_scope.update(scope)

# 7. Update the cache.
self._curr_exec_scope[str(include_path)] = scope

def _to_project_path(self, path: pathlib.Path) -> str:
"""Converts the given path to a path that is relative to the project root."""
rel_path = path.relative_to(self._project_root)
return "//{}".format(rel_path)
Loading

0 comments on commit 83b1606

Please sign in to comment.