-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement new organise handlers #47
base: master
Are you sure you want to change the base?
Changes from all commits
dd7f9fb
ca0608f
1822c5f
6e49202
a08d129
f649b10
7777dd1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
extends: default | ||
|
||
rules: | ||
line-length: | ||
max: 80 | ||
level: warning |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
--- | ||
# Input: runfolder_name | ||
|
||
# Currently named runfolderpath and runfolder to harmonize with rnaseq config | ||
# by Monika, but for specific runfolder delivery, technically, these could be | ||
# "merged" to one. | ||
variables: | ||
runfolderpath: /proj/ngi2016001/incoming | ||
runfolder: "{runfolderpath}/{runfolder_name}" | ||
organised: "{runfolder}/Projects/" | ||
|
||
# current path is | ||
# /proj/ngi2016001/incoming/<RUNFOLDER>/Projects/<PROJECT>/<RUNFOLDER>/ The | ||
# following is based on that we are to keep the same directory structure as | ||
# above. | ||
files_to_organise: | ||
|
||
# The fastq files | ||
- source: "{runfolder}/Unaligned/*" | ||
destination: "{organised}/{projectid}/{runfolder_name}/Sample_{samplename}/" | ||
options: | ||
required: true | ||
link_type: softlink | ||
filter: (?P<projectid>[\w-]+)/Sample_(?P<samplename>[\w-]+)/(?P=samplename)_S(?P<samplenumber>\d+)_L(?P<lanes>\d+)_R(?P<read>\d)_001.fastq.gz # yamllint disable-line | ||
|
||
# The MultiQC files | ||
- source: "{runfolder}/seqreports/project/*" | ||
destination: "{organised}/{projectid}/<runfolder_name>/" | ||
options: | ||
required: true | ||
link_type: softlink | ||
filter: (?P<projectid>[\w-]+)/\w+_(?P=projectid)_multiqc_report[\w.-]+ | ||
|
||
# what we are lacking, and what might need to be created outside the config is: | ||
# 1. checksums.md5 | ||
# 2. Encrypted samplesheet | ||
# As far as I know, these don't exist prior to organization. | ||
|
||
# hypothethical undetermined, needs an input of lane(s) connected to project | ||
# We will not include this at this point, as it requires additional input. Will | ||
# maybe end up in its own config or something. | ||
# - source: <RUNFOLDER>/Unaligned/Undetermined/Undetermined_S0_L<lanes>_R<read>_001.fastq.gz | ||
# destination: <RUNFOLDER>/Unaligned/<PROJECT>/Undetermined/ | ||
# options: | ||
# required: False | ||
# symlink: True |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
{ | ||
"type": "object", | ||
"required": ["variables", "files_to_organise"], | ||
"properties": { | ||
"variables": { | ||
"type": "object", | ||
"additionalProperties": {"type": "string"} | ||
}, | ||
"files_to_organise": { | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"required": ["source", "destination", "options"], | ||
"properties": { | ||
"source": {"type": "string"}, | ||
"destination": {"type": "string"}, | ||
"options": { | ||
"type": "object", | ||
"properties": { | ||
"required": {"type": "boolean"}, | ||
"link_type": { | ||
"type": "string", | ||
"enum": ["softlink", "hardlink", "copy"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we decided on removing the hardlink option |
||
}, | ||
"filter": {"type": "string"} | ||
}, | ||
"additionalProperties": false | ||
} | ||
} | ||
} | ||
} | ||
}, | ||
"additionalProperties": false | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,11 @@ | |
from delivery.handlers.delivery_handlers import DeliverByStageIdHandler, DeliveryStatusHandler | ||
from delivery.handlers.staging_handlers import StagingRunfolderHandler, StagingHandler,\ | ||
StageGeneralDirectoryHandler, StagingProjectRunfoldersHandler | ||
from delivery.handlers.organise_handlers import OrganiseRunfolderHandler | ||
from delivery.handlers.organise_handlers import \ | ||
OrganiseRunfolderHandler, \ | ||
OrganiseProjectHandler, \ | ||
OrganiseProjectAnalysisHandler, \ | ||
OrganiseRunfolderConfigHandler | ||
|
||
from delivery.repositories.runfolder_repository import FileSystemBasedRunfolderRepository, \ | ||
FileSystemBasedUnorganisedRunfolderRepository | ||
|
@@ -57,9 +61,22 @@ def routes(**kwargs): | |
url(r"/api/1.0/project/([^/]+)/best_practice_samples$", BestPracticeProjectSampleHandler, | ||
name="best_practice_samples", kwargs=kwargs), | ||
|
||
# I'm keeping this endpoint for backward compatibility but we should be | ||
# able to remove it once we have updated our workflows to use the new | ||
# endpoints /AC230510 | ||
url(r"/api/1.0/organise/runfolder/([^/]+)", OrganiseRunfolderHandler, | ||
name="organise_runfolder", kwargs=kwargs), | ||
|
||
url(r"/api/1.0/organise/delivery/runfolder/([^/]+)", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we actually need one endpoint for every type of organization we want to do? Shouldn't it be enough to have one OrganiseRunfolderConfigHandler since the organization it self is defined in the config? Please correct me if I get it all wrong :) |
||
OrganiseRunfolderConfigHandler, | ||
name="organise_runfolder", kwargs=kwargs), | ||
url(r"/api/1.0/organise/delivery/project/([^/]+)/([^/]+)", | ||
OrganiseProjectAnalysisHandler, | ||
name="organise_project", kwargs=kwargs), | ||
url(r"/api/1.0/organise/delivery/project/([^/]+)", | ||
OrganiseProjectHandler, | ||
name="organise_project_custom", kwargs=kwargs), | ||
|
||
url(r"/api/1.0/stage/project/runfolders/(.+)", StagingProjectRunfoldersHandler, | ||
name="stage_multiple_runfolders_one_project", kwargs=kwargs), | ||
url(r"/api/1.0/stage/runfolder/(.+)", StagingRunfolderHandler, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,30 @@ | ||
|
||
import jsonschema | ||
|
||
# Status codes | ||
OK = 200 | ||
ACCEPTED = 202 | ||
NO_CONTENT = 204 | ||
|
||
BAD_REQUEST = 400 | ||
FORBIDDEN = 403 | ||
NOT_FOUND = 404 | ||
INTERNAL_SERVER_ERROR = 500 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can of course import these codes directly from the |
||
|
||
|
||
def exception_handler(postget_request): | ||
def wrapper(self, *args, **kwargs): | ||
try: | ||
postget_request(self, *args, **kwargs) | ||
except FileNotFoundError as file_not_found_error: | ||
self.set_status(NOT_FOUND) | ||
self.write(str(file_not_found_error)) | ||
except PermissionError as permission_error: | ||
self.set_status(FORBIDDEN) | ||
self.write(str(permission_error)) | ||
except RuntimeError as runtime_error: | ||
self.set_status(INTERNAL_SERVER_ERROR) | ||
self.write(str(runtime_error)) | ||
except jsonschema.ValidationError as validation_error: | ||
self.set_status(BAD_REQUEST) | ||
self.write(str(validation_error)) | ||
return wrapper | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 Very nice! |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,14 @@ | ||
|
||
import logging | ||
import pathlib | ||
import yaml | ||
import json | ||
import jsonschema | ||
|
||
from arteria.web.handlers import BaseRestHandler | ||
from delivery.exceptions import ProjectsDirNotfoundException, ChecksumFileNotFoundException, FileNameParsingException, \ | ||
SamplesheetNotFoundException, ProjectReportNotFoundException, ProjectAlreadyOrganisedException | ||
from delivery.handlers import OK, NOT_FOUND, INTERNAL_SERVER_ERROR, FORBIDDEN | ||
from delivery.handlers import OK, NOT_FOUND, INTERNAL_SERVER_ERROR, FORBIDDEN, exception_handler | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
@@ -81,3 +85,108 @@ def post(self, runfolder_id): | |
except FileNameParsingException as e: | ||
log.error(str(e), exc_info=e) | ||
self.set_status(INTERNAL_SERVER_ERROR, reason=str(e)) | ||
|
||
|
||
class OrganiseProjectAnalysisHandler(BaseOrganiseHandler): | ||
""" | ||
Handler class for organizing a project after analysis in preparation for | ||
staging and delivery. | ||
""" | ||
|
||
def initialize(self, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it an expected pattern in the handlers to have a |
||
self.config = kwargs["config"] | ||
self.organise_service = kwargs["organise_service"] | ||
|
||
@exception_handler | ||
def post(self, analysis_pipeline, project): | ||
organise_config_dir = pathlib.Path(self.config["organise_config_dir"]) | ||
organise_config_path = organise_config_dir / f"{analysis_pipeline}.md" | ||
|
||
project_path = pathlib.Path(self.config["general_project_directory"]) / project | ||
|
||
# TODO once we have not found exceptions, use these here | ||
if not organise_config_path.is_file(): | ||
raise FileNotFoundError( | ||
f"Config file not found at {organise_config_path}") | ||
with open(organise_config_path, 'r') as organise_config_file: | ||
config = yaml.load(organise_config_file, Loader=yaml.CLoader) | ||
with open(organise_config_dir / "schema.json", 'r') as organise_config_schema: | ||
schema = json.load(organise_config_schema) | ||
jsonschema.validate(config, schema) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for re-usability, it could be nice to break out the validation code to its own (static) function, perhaps even move it to another service if that fits with the overall code structure maybe it's up for consideration if the validation should happen here in the handler or downstream in the service? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also, this code is more or less repeated (with slight variations) in all three organize handlers, could you consider breaking it out to a helper function that can be called instead? |
||
|
||
if not project_path.is_dir(): | ||
raise FileNotFoundError( | ||
f"Project {project} not found at {project_path}") | ||
|
||
self.organise_service.organise_with_config( | ||
str(organise_config_path), str(project_path)) | ||
|
||
|
||
class OrganiseProjectHandler(BaseOrganiseHandler): | ||
""" | ||
Handler class for organizing a project from a custom config file. | ||
""" | ||
|
||
def initialize(self, **kwargs): | ||
self.config = kwargs["config"] | ||
self.organise_service = kwargs["organise_service"] | ||
|
||
@exception_handler | ||
def post(self, project): | ||
required_members = ["config"] | ||
request_data = self.body_as_object(required_members=required_members) | ||
organise_config_path = pathlib.Path(request_data["config"]) | ||
project_path = pathlib.Path(self.config["general_project_directory"]) / project | ||
|
||
if not organise_config_path.is_file(): | ||
raise FileNotFoundError( | ||
f"Config file not found at {organise_config_path}") | ||
with open(organise_config_path, 'r') as organise_config_file: | ||
config = yaml.load(organise_config_file, Loader=yaml.CLoader) | ||
with open(pathlib.Path(self.config["organise_config_dir"]) / "schema.json", 'r') as organise_config_schema: | ||
schema = json.load(organise_config_schema) | ||
jsonschema.validate(config, schema) | ||
|
||
if not project_path.is_dir(): | ||
raise FileNotFoundError( | ||
f"Project {project} not found at {project_path}") | ||
|
||
self.organise_service.organise_with_config( | ||
str(organise_config_path), str(project_path)) | ||
|
||
class OrganiseRunfolderConfigHandler(BaseOrganiseHandler): | ||
""" | ||
Handler class for organizing a runfolder from a config file. | ||
""" | ||
|
||
def initialize(self, **kwargs): | ||
self.config = kwargs["config"] | ||
self.organise_service = kwargs["organise_service"] | ||
|
||
@exception_handler | ||
def post(self, runfolder): | ||
request_data = self.body_as_object() | ||
|
||
try: | ||
organise_config_path = pathlib.Path(request_data["config"]) | ||
except KeyError: | ||
organise_config_dir = pathlib.Path(self.config["organise_config_dir"]) | ||
organise_config_path = organise_config_dir / "runfolder.yml" | ||
|
||
runfolder_path = pathlib.Path(self.config["runfolder_directory"]) / runfolder | ||
|
||
if not organise_config_path.is_file(): | ||
raise FileNotFoundError( | ||
f"Config file not found at {organise_config_path}") | ||
with open(organise_config_path, 'r') as organise_config_file: | ||
config = yaml.load(organise_config_file, Loader=yaml.CLoader) | ||
with open(organise_config_dir / "schema.json", 'r') as organise_config_schema: | ||
schema = json.load(organise_config_schema) | ||
jsonschema.validate(config, schema) | ||
|
||
if not runfolder_path.is_dir(): | ||
raise FileNotFoundError( | ||
f"Runfolder {runfolder} not found at {runfolder_path}") | ||
|
||
self.organise_service.organise_with_config( | ||
str(organise_config_path), str(runfolder_path)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
-r prod | ||
mock==4.0.3 | ||
nose==1.3.7 | ||
|
||
yamllint==1.32.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,4 +5,6 @@ sqlalchemy==1.4.35 | |
alembic==1.7.7 | ||
enum34==1.1.10 | ||
arteria==1.1.4 | ||
jsonschema==4.19 | ||
dds-cli | ||
PyYAML==6.0.1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice!