lbl-cbg · ajtritt · Feb 26, 2025 · Feb 26, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/.github/workflows/common-tests.yml b/.github/workflows/common-tests.yml
@@ -0,0 +1,45 @@
+name: Run tests for common library
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  run-tests:
+    name: common-${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
+    defaults:
+      run:
+        shell: bash
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }}
+      cancel-in-progress: true
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - { name: linux-python3.10-minimum   , requirements: minimum,  python-ver: "3.10", os: ubuntu-latest }
+          - { name: linux-python3.13           , requirements: pinned ,  python-ver: "3.13", os: ubuntu-latest }
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-ver }}
+
+      - name: Install dependencies
+        run: |
+          cd common
+          pip install -e .
+          pip install -r tests/requirements.txt
+
+      - name: Run tests
+        run: |
+          cd common
+          pytest tests
+
diff --git a/common/README.md b/common/README.md
@@ -0,0 +1,72 @@
+# The t5common package
+`t5common` is a Python package for common operations in the Taskforce5 commons.
+
+- A class for connecting to and interacting with Jira (Python class `t5common.jira.JiraConnector`)
+- A framework for polling Jira for new issues, and starting workflows (commands `init-db`, `check-jira`, and `mark-job`)
+- A class for building Slurm sbatch scripts (Python class `t5common.job.SlurmJob`)
+
+
+## Jira-based workflow automation
+
+Workflows can be automatically triggered using the `init-db`, `check-jira`, and `mark-job` commands that come with
+the `t5common` package. 
+
+### Configuring workflow automation
+
+Workflow automation with Jira is configured using a YAML file. The YAML file must contain the following keys:
+
+- `host` - the Jira host to get issues for running jobs from
+- `user` - the username for connecting to Jira
+- `token_file` - the path to a file containing the Jira API token. This file should contain the token on a single line
+- `database` - the path to the SQLite database to use for tracking jobs associated with issues
+- `job_directory` - the job directory to run jobs from.
+- `projects` - A list of objects containing the information needed to automate workflows from a Jira project. These objects must contain the following keys:
+   - `project` - The project to query new issues for
+   - `new_status` - The issue status indicating an issue is new and should have a workflow run for it.
+   - `command` - The command to run to start a new workflow. This command should take the issue key as the first and only positional argument.
+
+For more details, refer to the JSON schema in`t5common/jira/schema/config.json`.
+
+
+### Initializing workflow automation
+
+Once you have defined your configuration file, you will need to initialize the SQLite database using the `init-db` command.
+
+```bash
+init-db config.yaml
+```
+
+This database maintain jobs in three states:
+
+- `STARTED` - Job has been picked up from Jira and started
+- `FINISHED` - Job execution has finished
+- `PUBLISHED` - Job results have been published
+
+
+### Starting jobs
+
+**This section documents the command that must run as a cron job to automate workflow execution**
+
+Jobs can be started using the `check-jira` command.
+
+```bash
+check-jira config.yaml
+```
+
+This will check each project specified in with the `projects` key the configuration file, and start job for each new issue. This
+job will be started from a subdirectory named after the issue key and created in the directory specified by the `job_directory`
+key of the configuration file.
+
+### Updating jobs
+
+**This section documents the command that workflows are required to use to connect to the workflow automation system**
+
+Workflows will need to indicate that jobs have been finished or published using the command `mark-job`. The first argument to this command
+must be `finished` or `published`, indicating that the job has finished running or the results have been published, respectively. The `mark-job`
+command also takes a second optional argument, specifying the job directory. This defaults to the current working directory. 
+
+```bash
+mark-job finished
+```
+
+Workflows must ensure that the `t5common` package is installed in their environments, and that they call `mark-job` when steps are complete.
diff --git a/common/pyproject.toml b/common/pyproject.toml
@@ -0,0 +1,34 @@
+[build-system]
+requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "t5common"
+version = "0.1.0"
+description = "A library for common routines needed for Taskforce5 code"
+readme = "README.md"  # Path to your README file
+requires-python = ">=3.7"
+license = { text = "Modified BSD" }  # Specify your license here
+authors = [
+    { name = "Andrew Tritt", email = "[email protected]" }
+]
+keywords = ["t5", "common", "library"]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "requests",
+    "jsonschema",
+    "sqlalchemy",
+    "pyyaml",
+]
+
+[project.scripts]
+init-db = "t5common.jira.database:init_db"
+check-jira = "t5common.jira.check_jira:main"
+mark-job = "t5common.jira.mark_job:main"
+
+[tool.setuptools.package-data]
+"t5common.jira" = ["schema/*.json"] 
diff --git a/common/t5common/__init__.py b/common/t5common/__init__.py
diff --git a/common/t5common/jira/__init__.py b/common/t5common/jira/__init__.py
@@ -0,0 +1 @@
+from .connector import JiraConnector
diff --git a/common/t5common/jira/check_jira.py b/common/t5common/jira/check_jira.py
@@ -0,0 +1,123 @@
+import argparse
+import asyncio
+import json
+import os
+from os.path import abspath, relpath
+import re
+import sys
+import subprocess
+import time
+
+import yaml
+
+from .connector import JiraConnector
+from .database import DBConnector
+from .utils import load_config, WF_FILENAME
+from ..utils import get_logger, read_token
+
+
+logger = get_logger()
+
+
+def format_query(config):
+    return 'project = {project} AND status = "{new_status}"'.format(**config)
+
+
+async def process_issue(issue, project_config, config):
+    # Set up environment to run subprocess in
+    env = os.environ.copy()
+    env['JIRA_HOST'] = config['host']
+    env['JIRA_USER'] = config['user']
+    env['JIRA_TOKEN'] = read_token(config['token_file'])
+
+    # Set up the command to run in the subprocess
+    command = re.split(r'\s+', project_config['command'])
+    command.append(issue)
+
+    # Set up the working directory to run the job in
+    wd = os.path.join(config.get('job_directory', '.'), issue)
+    if os.path.exists(wd):
+        raise RuntimeError(f"workflow already started for {issue} - {wd} already exists")
+    else:
+        os.mkdir(wd)
+
+    # Add workflow info to the working directory for subsequence steps
+    wf_info = {
+            'issue': issue,
+            'database': relpath(abspath(config['database']), abspath(wd)),
+            }
+    with open(os.path.join(wd, WF_FILENAME), 'w') as f:
+        json.dump(wf_info, f)
+
+    # Call the job command in a subprocess
+    logger.info(f"Processing {issue}: {' '.join(command)}")
+    process = await asyncio.create_subprocess_exec(
+        command[0], *command[1:],
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.STDOUT,
+        env=env,
+        cwd=wd,
+    )
+    # Read the output and error streams
+    stdout, _ = await process.communicate()
+
+    if process.returncode != 0:
+        logger.error(f"Processing {issue} failed:\n{stdout.decode()}")
+    else:
+        msg = f"Processing {issue} succeeded:"
+        if len(stdout) > 0:
+            msg += f"\n{stdout.decode()}"
+        logger.info(msg)
+
+    return process.returncode, wd
+
+
+async def check_jira(config):
+    # Connect to Jira
+    jc = JiraConnector(jira_host=config['host'],
+                       jira_user=config['user'],
+                       jira_token=read_token(config['token_file']))
+
+    database = config['database']
+    dbc = DBConnector(f"sqlite:///{database}")
+
+    # Check each project queue, and create a new job for each new issue
+    tasks = list()
+    issues = list()
+    for project_config in config['projects']:
+        query = format_query(project_config)
+        proj_issues = jc.query(query)['issues']
+        for issue in proj_issues:
+            key = issue['key']
+            state = dbc.job_state(key)
+            if state is not None:
+                if state != 'STARTED':
+                    logger.error(f"Issue {key} still has new_status, but is not in STARTED state: state = {state}")
+                continue
+            issues.append(key)
+            tasks.append(process_issue(key, project_config, config))
+
+    results = await asyncio.gather(*tasks)
+    for issue, (retcode, wd) in zip(issues, results):
+        if retcode == 0:
+            logger.info(f"Issue {issue} marked as started")
+            dbc.start_job(issue, wd)
+        else:
+            logger.info(f"Issue {issue} failed -- not marking as started")
+
+    dbc.close()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Poll Jira projects and run a script for each issue.")
+    parser.add_argument('config', type=str, help='Path to the YAML configuration file')
+    args = parser.parse_args()
+
+    config = None
+
+    config = load_config(args.config)
+    asyncio.run(check_jira(config))
+
+
+if __name__ == "__main__":
+    main()