Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement new organise handlers #47

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@ jobs:
python -m pip install --upgrade pip
pip install wheel
pip install -r requirements/dev .
npm install -g pajv
- name: Launch tests
run: |
nosetests ./tests
- name: Lint config files
run: |
yamllint -c .yamllint.yml config/organise_configs/*.yml
- name: Validate config files
run: >
pajv
-s config/organise_configs/schema.json
-d config/organise_configs/*.yml
6 changes: 6 additions & 0 deletions .yammlint.yml
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extends: default

rules:
line-length:
max: 80
level: warning
1 change: 1 addition & 0 deletions config/app.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ staging_directory: /tmp/
project_links_directory: /tmp/
dds_conf:
log_path: dds.log
organise_config_dir: config/organise_configs/
port: 9999
43 changes: 0 additions & 43 deletions config/organise_config/organise_runfolder.yml

This file was deleted.

46 changes: 46 additions & 0 deletions config/organise_configs/runfolder.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---
# Input: runfolder_name

# Currently named runfolderpath and runfolder to harmonize with rnaseq config
# by Monika, but for specific runfolder delivery, technically, these could be
# "merged" to one.
variables:
runfolderpath: /proj/ngi2016001/incoming
runfolder: "{runfolderpath}/{runfolder_name}"
organised: "{runfolder}/Projects/"

# current path is
# /proj/ngi2016001/incoming/<RUNFOLDER>/Projects/<PROJECT>/<RUNFOLDER>/ The
# following is based on that we are to keep the same directory structure as
# above.
files_to_organise:

# The fastq files
- source: "{runfolder}/Unaligned/*"
destination: "{organised}/{projectid}/{runfolder_name}/Sample_{samplename}/"
options:
required: true
link_type: softlink
filter: (?P<projectid>[\w-]+)/Sample_(?P<samplename>[\w-]+)/(?P=samplename)_S(?P<samplenumber>\d+)_L(?P<lanes>\d+)_R(?P<read>\d)_001.fastq.gz # yamllint disable-line

# The MultiQC files
- source: "{runfolder}/seqreports/project/*"
destination: "{organised}/{projectid}/<runfolder_name>/"
options:
required: true
link_type: softlink
filter: (?P<projectid>[\w-]+)/\w+_(?P=projectid)_multiqc_report[\w.-]+

# what we are lacking, and what might need to be created outside the config is:
# 1. checksums.md5
# 2. Encrypted samplesheet
# As far as I know, these don't exist prior to organization.

# hypothethical undetermined, needs an input of lane(s) connected to project
# We will not include this at this point, as it requires additional input. Will
# maybe end up in its own config or something.
# - source: <RUNFOLDER>/Unaligned/Undetermined/Undetermined_S0_L<lanes>_R<read>_001.fastq.gz
# destination: <RUNFOLDER>/Unaligned/<PROJECT>/Undetermined/
# options:
# required: False
# symlink: True
34 changes: 34 additions & 0 deletions config/organise_configs/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"type": "object",
"required": ["variables", "files_to_organise"],
"properties": {
"variables": {
"type": "object",
"additionalProperties": {"type": "string"}
},
"files_to_organise": {
"type": "array",
"items": {
"type": "object",
"required": ["source", "destination", "options"],
"properties": {
"source": {"type": "string"},
"destination": {"type": "string"},
"options": {
"type": "object",
"properties": {
"required": {"type": "boolean"},
"link_type": {
"type": "string",
"enum": ["softlink", "hardlink", "copy"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we decided on removing the hardlink option

},
"filter": {"type": "string"}
},
"additionalProperties": false
}
}
}
}
},
"additionalProperties": false
}
19 changes: 18 additions & 1 deletion delivery/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
from delivery.handlers.delivery_handlers import DeliverByStageIdHandler, DeliveryStatusHandler
from delivery.handlers.staging_handlers import StagingRunfolderHandler, StagingHandler,\
StageGeneralDirectoryHandler, StagingProjectRunfoldersHandler
from delivery.handlers.organise_handlers import OrganiseRunfolderHandler
from delivery.handlers.organise_handlers import \
OrganiseRunfolderHandler, \
OrganiseProjectHandler, \
OrganiseProjectAnalysisHandler, \
OrganiseRunfolderConfigHandler

from delivery.repositories.runfolder_repository import FileSystemBasedRunfolderRepository, \
FileSystemBasedUnorganisedRunfolderRepository
Expand Down Expand Up @@ -57,9 +61,22 @@ def routes(**kwargs):
url(r"/api/1.0/project/([^/]+)/best_practice_samples$", BestPracticeProjectSampleHandler,
name="best_practice_samples", kwargs=kwargs),

# I'm keeping this endpoint for backward compatibility but we should be
# able to remove it once we have updated our workflows to use the new
# endpoints /AC230510
url(r"/api/1.0/organise/runfolder/([^/]+)", OrganiseRunfolderHandler,
name="organise_runfolder", kwargs=kwargs),

url(r"/api/1.0/organise/delivery/runfolder/([^/]+)",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we actually need one endpoint for every type of organization we want to do? Shouldn't it be enough to have one OrganiseRunfolderConfigHandler since the organization it self is defined in the config? Please correct me if I get it all wrong :)

OrganiseRunfolderConfigHandler,
name="organise_runfolder", kwargs=kwargs),
url(r"/api/1.0/organise/delivery/project/([^/]+)/([^/]+)",
OrganiseProjectAnalysisHandler,
name="organise_project", kwargs=kwargs),
url(r"/api/1.0/organise/delivery/project/([^/]+)",
OrganiseProjectHandler,
name="organise_project_custom", kwargs=kwargs),

url(r"/api/1.0/stage/project/runfolders/(.+)", StagingProjectRunfoldersHandler,
name="stage_multiple_runfolders_one_project", kwargs=kwargs),
url(r"/api/1.0/stage/runfolder/(.+)", StagingRunfolderHandler,
Expand Down
22 changes: 21 additions & 1 deletion delivery/handlers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,30 @@

import jsonschema

# Status codes
OK = 200
ACCEPTED = 202
NO_CONTENT = 204

BAD_REQUEST = 400
FORBIDDEN = 403
NOT_FOUND = 404
INTERNAL_SERVER_ERROR = 500
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can of course import these codes directly from the http.HTTPStatus module to avoid re-defining them



def exception_handler(postget_request):
def wrapper(self, *args, **kwargs):
try:
postget_request(self, *args, **kwargs)
except FileNotFoundError as file_not_found_error:
self.set_status(NOT_FOUND)
self.write(str(file_not_found_error))
except PermissionError as permission_error:
self.set_status(FORBIDDEN)
self.write(str(permission_error))
except RuntimeError as runtime_error:
self.set_status(INTERNAL_SERVER_ERROR)
self.write(str(runtime_error))
except jsonschema.ValidationError as validation_error:
self.set_status(BAD_REQUEST)
self.write(str(validation_error))
return wrapper
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Very nice!

111 changes: 110 additions & 1 deletion delivery/handlers/organise_handlers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@

import logging
import pathlib
import yaml
import json
import jsonschema

from arteria.web.handlers import BaseRestHandler
from delivery.exceptions import ProjectsDirNotfoundException, ChecksumFileNotFoundException, FileNameParsingException, \
SamplesheetNotFoundException, ProjectReportNotFoundException, ProjectAlreadyOrganisedException
from delivery.handlers import OK, NOT_FOUND, INTERNAL_SERVER_ERROR, FORBIDDEN
from delivery.handlers import OK, NOT_FOUND, INTERNAL_SERVER_ERROR, FORBIDDEN, exception_handler

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -81,3 +85,108 @@ def post(self, runfolder_id):
except FileNameParsingException as e:
log.error(str(e), exc_info=e)
self.set_status(INTERNAL_SERVER_ERROR, reason=str(e))


class OrganiseProjectAnalysisHandler(BaseOrganiseHandler):
"""
Handler class for organizing a project after analysis in preparation for
staging and delivery.
"""

def initialize(self, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it an expected pattern in the handlers to have a initialize-function instead of/in addition to the constructor?

self.config = kwargs["config"]
self.organise_service = kwargs["organise_service"]

@exception_handler
def post(self, analysis_pipeline, project):
organise_config_dir = pathlib.Path(self.config["organise_config_dir"])
organise_config_path = organise_config_dir / f"{analysis_pipeline}.md"

project_path = pathlib.Path(self.config["general_project_directory"]) / project

# TODO once we have not found exceptions, use these here
if not organise_config_path.is_file():
raise FileNotFoundError(
f"Config file not found at {organise_config_path}")
with open(organise_config_path, 'r') as organise_config_file:
config = yaml.load(organise_config_file, Loader=yaml.CLoader)
with open(organise_config_dir / "schema.json", 'r') as organise_config_schema:
schema = json.load(organise_config_schema)
jsonschema.validate(config, schema)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for re-usability, it could be nice to break out the validation code to its own (static) function, perhaps even move it to another service if that fits with the overall code structure

maybe it's up for consideration if the validation should happen here in the handler or downstream in the service?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, this code is more or less repeated (with slight variations) in all three organize handlers, could you consider breaking it out to a helper function that can be called instead?


if not project_path.is_dir():
raise FileNotFoundError(
f"Project {project} not found at {project_path}")

self.organise_service.organise_with_config(
str(organise_config_path), str(project_path))


class OrganiseProjectHandler(BaseOrganiseHandler):
"""
Handler class for organizing a project from a custom config file.
"""

def initialize(self, **kwargs):
self.config = kwargs["config"]
self.organise_service = kwargs["organise_service"]

@exception_handler
def post(self, project):
required_members = ["config"]
request_data = self.body_as_object(required_members=required_members)
organise_config_path = pathlib.Path(request_data["config"])
project_path = pathlib.Path(self.config["general_project_directory"]) / project

if not organise_config_path.is_file():
raise FileNotFoundError(
f"Config file not found at {organise_config_path}")
with open(organise_config_path, 'r') as organise_config_file:
config = yaml.load(organise_config_file, Loader=yaml.CLoader)
with open(pathlib.Path(self.config["organise_config_dir"]) / "schema.json", 'r') as organise_config_schema:
schema = json.load(organise_config_schema)
jsonschema.validate(config, schema)

if not project_path.is_dir():
raise FileNotFoundError(
f"Project {project} not found at {project_path}")

self.organise_service.organise_with_config(
str(organise_config_path), str(project_path))

class OrganiseRunfolderConfigHandler(BaseOrganiseHandler):
"""
Handler class for organizing a runfolder from a config file.
"""

def initialize(self, **kwargs):
self.config = kwargs["config"]
self.organise_service = kwargs["organise_service"]

@exception_handler
def post(self, runfolder):
request_data = self.body_as_object()

try:
organise_config_path = pathlib.Path(request_data["config"])
except KeyError:
organise_config_dir = pathlib.Path(self.config["organise_config_dir"])
organise_config_path = organise_config_dir / "runfolder.yml"

runfolder_path = pathlib.Path(self.config["runfolder_directory"]) / runfolder

if not organise_config_path.is_file():
raise FileNotFoundError(
f"Config file not found at {organise_config_path}")
with open(organise_config_path, 'r') as organise_config_file:
config = yaml.load(organise_config_file, Loader=yaml.CLoader)
with open(organise_config_dir / "schema.json", 'r') as organise_config_schema:
schema = json.load(organise_config_schema)
jsonschema.validate(config, schema)

if not runfolder_path.is_dir():
raise FileNotFoundError(
f"Runfolder {runfolder} not found at {runfolder_path}")

self.organise_service.organise_with_config(
str(organise_config_path), str(runfolder_path))
2 changes: 1 addition & 1 deletion requirements/dev
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-r prod
mock==4.0.3
nose==1.3.7

yamllint==1.32.0
2 changes: 2 additions & 0 deletions requirements/prod
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ sqlalchemy==1.4.35
alembic==1.7.7
enum34==1.1.10
arteria==1.1.4
jsonschema==4.19
dds-cli
PyYAML==6.0.1
Loading