Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataops 467: Removed runfolder directory created in delivery_runfolders_for_project_workflow #57

Merged
merged 12 commits into from
Oct 11, 2024
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Run Unit Tests

on: [push]
on: [push, pull_request]

jobs:
build:
Expand Down
4 changes: 3 additions & 1 deletion delivery/services/delivery_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ def _create_links_area_for_project_runfolders(self, project_name, projects, batc
for project in projects:
try:
link_name = os.path.join(project_dir, project.runfolder_name)
self.file_system_service.symlink(project.path, link_name)
self.file_system_service.symlink(
os.path.join(project.path, project.runfolder_name), link_name
)
except FileExistsError as e:
log.error("Project link: {} already exists".format(project_dir))
raise e
Expand Down
3 changes: 1 addition & 2 deletions delivery/services/organise_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def organise_project(
sample,
organised_project_runfolder_path,
lanes))
# symlink the project files
# symlink the project files
organised_project_files = []
if project.project_files:
for project_file in project.project_files:
Expand Down Expand Up @@ -147,7 +147,6 @@ def organise_project_file(self, project_file, organised_project_path):
before organisation
:param organised_project_path: path where the project will be organised
"""

# the relative path from the project file base to the project file (e.g. plots/filename.png)
relpath = self.file_system_service.relpath(
project_file.file_path,
Expand Down
10 changes: 5 additions & 5 deletions delivery/services/staging_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ def _copy_dir(staging_order_id, external_program_service, session_factory, stagi
cmd = ['rsync', '--stats', '-r', '--copy-links', '--times',
staging_source_with_trailing_slash, staging_order.staging_target]
log.debug("Running rsync with command: {}".format(" ".join(cmd)))

execution = external_program_service.run(cmd)

staging_order.pid = execution.pid
Expand All @@ -103,11 +102,12 @@ def _copy_dir(staging_order_id, external_program_service, session_factory, stagi

# Parse the file size from the output of rsync stats:
# Total file size: 207,707,566 bytes
match = re.search(r'Total file size: ([\d,]+) bytes',
execution_result.stdout,
re.MULTILINE)

match = re.search(r'Total file size: ([\d,.]+) bytes',
execution_result.stdout,
re.MULTILINE)
size_of_transfer = match.group(1)
size_of_transfer = int(size_of_transfer.replace(",", ""))
size_of_transfer = int(size_of_transfer.replace(",", "").replace(".", ""))
staging_order.size = size_of_transfer

staging_order.status = StagingStatus.staging_successful
Expand Down
4 changes: 3 additions & 1 deletion tests/integration_tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ def __init__(self, *args):
# Default duration of mock delivery
self.mock_duration = 0.1

def _create_projects_dir_with_random_data(self, base_dir, proj_name='ABC_123'):
def _create_projects_dir_with_random_data(self, base_dir, proj_name='ABC_123', runfolder_name=None):
tmp_proj_dir = os.path.join(base_dir, 'Projects', proj_name)
if runfolder_name:
tmp_proj_dir = os.path.join(tmp_proj_dir, runfolder_name)
os.makedirs(tmp_proj_dir)
with open(os.path.join(tmp_proj_dir, 'test_file'), 'wb') as f:
f.write(os.urandom(1024))
Expand Down
50 changes: 38 additions & 12 deletions tests/integration_tests/test_integration_dds.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import json
import time
import tempfile
Expand All @@ -7,7 +8,7 @@
from delivery.models.db_models import StagingStatus, DeliveryStatus

from tests.integration_tests.base import BaseIntegration

from tests.test_utils import unorganised_runfolder

class TestIntegrationDDS(BaseIntegration):
@gen_test
Expand Down Expand Up @@ -118,8 +119,8 @@ def test_can_stage_and_deliver_clean_flowcells(self):
prefix='160930_ST-E00216_0555_BH37CWALXX_') as tmpdir1,\
tempfile.TemporaryDirectory(dir='./tests/resources/runfolders/',
prefix='160930_ST-E00216_0556_BH37CWALXX_') as tmpdir2:
self._create_projects_dir_with_random_data(tmpdir1, 'XYZ_123')
self._create_projects_dir_with_random_data(tmpdir2, 'XYZ_123')
self._create_projects_dir_with_random_data(tmpdir1, 'XYZ_123', os.path.basename(tmpdir1))
self._create_projects_dir_with_random_data(tmpdir2, 'XYZ_123', os.path.basename(tmpdir2))

url = "/".join([self.API_BASE, "stage", "project", 'runfolders', 'XYZ_123'])
payload = {'delivery_mode': 'CLEAN'}
Expand All @@ -146,12 +147,13 @@ def test_can_stage_and_deliver_batch_flowcells(self):
prefix='160930_ST-E00216_0555_BH37CWALXX_') as tmpdir1, \
tempfile.TemporaryDirectory(dir='./tests/resources/runfolders/',
prefix='160930_ST-E00216_0556_BH37CWALXX_') as tmpdir2:
self._create_projects_dir_with_random_data(tmpdir1, 'XYZ_123')
self._create_projects_dir_with_random_data(tmpdir2, 'XYZ_123')
self._create_projects_dir_with_random_data(tmpdir1, 'XYZ_123', os.path.basename(tmpdir1))
self._create_projects_dir_with_random_data(tmpdir2, 'XYZ_123', os.path.basename(tmpdir2))

url = "/".join([self.API_BASE, "stage", "project", 'runfolders', 'XYZ_123'])
payload = {'delivery_mode': 'BATCH'}
response = yield self.http_client.fetch(self.get_url(url), method='POST', body=json.dumps(payload))

self.assertEqual(response.code, 202)

payload = {'delivery_mode': 'BATCH'}
Expand All @@ -171,17 +173,35 @@ def test_can_stage_and_deliver_batch_flowcells(self):
status_response = yield self.http_client.fetch(link)
self.assertEqual(json.loads(status_response.body)["status"], StagingStatus.staging_successful.name)

def create_unorganised_test_runfolders(self, tmpdir):
return(
unorganised_runfolder(
name=os.path.basename(tmpdir),
root_path=os.path.dirname(tmpdir))
)
Aratz marked this conversation as resolved.
Show resolved Hide resolved
@gen_test
def test_can_stage_and_deliver_force_flowcells(self):
Aratz marked this conversation as resolved.
Show resolved Hide resolved
with tempfile.TemporaryDirectory(dir='./tests/resources/runfolders/',
prefix='160930_ST-E00216_0555_BH37CWALXX_') as tmpdir1, \
prefix='160930_ST-E00216_0555_BH37CWALXX_') as tmpdir1, \
Aratz marked this conversation as resolved.
Show resolved Hide resolved
tempfile.TemporaryDirectory(dir='./tests/resources/runfolders/',
prefix='160930_ST-E00216_0556_BH37CWALXX_') as tmpdir2:
self._create_projects_dir_with_random_data(tmpdir1, 'XYZ_123')
self._create_projects_dir_with_random_data(tmpdir2, 'XYZ_123')

# First just stage it
url = "/".join([self.API_BASE, "stage", "project", 'runfolders', 'XYZ_123'])
# First organise
unorganised_runfolder1 = self.create_unorganised_test_runfolders(tmpdir1)
unorganised_runfolder2 = self.create_unorganised_test_runfolders(tmpdir2)

self._create_runfolder_structure_on_disk(unorganised_runfolder1)
self._create_runfolder_structure_on_disk(unorganised_runfolder2)

url = "/".join([self.API_BASE, "organise", "runfolder", unorganised_runfolder1.name])
response1 = yield self.http_client.fetch(self.get_url(url), method='POST', body='')
self.assertEqual(response1.code, 200)

url = "/".join([self.API_BASE, "organise", "runfolder", unorganised_runfolder2.name])
response2 = yield self.http_client.fetch(self.get_url(url), method='POST', body='')
self.assertEqual(response2.code, 200)

# Then stage it
url = "/".join([self.API_BASE, "stage", "project", 'runfolders', 'JKL_123'])
payload = {'delivery_mode': 'BATCH'}
response = yield self.http_client.fetch(self.get_url(url), method='POST', body=json.dumps(payload))
self.assertEqual(response.code, 202)
Expand All @@ -199,12 +219,18 @@ def test_can_stage_and_deliver_force_flowcells(self):
response_json = json.loads(response_forced.body)

staging_status_links = response_json.get("staging_order_links")
staging_order_ids = response_json.get("staging_order_ids")

# Assert the staged folder structure has only one runfolder folder
temp_staging_dir = f"/tmp/{staging_order_ids.get('JKL_123')}/JKL_123"
for runfolder in os.listdir(temp_staging_dir):
self.assertFalse(set(runfolder).issuperset(set(os.listdir(f"{temp_staging_dir}/{runfolder}"))))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question here, is it necessary to check this using sets? Since listdir returns a list, can't you do this:

Suggested change
self.assertFalse(set(runfolder).issuperset(set(os.listdir(f"{temp_staging_dir}/{runfolder}"))))
self.assertFalse(runfolder in os.listdir(f"{temp_staging_dir}/{runfolder}"))


# Insert a pause to allow staging to complete
time.sleep(1)
matrulda marked this conversation as resolved.
Show resolved Hide resolved

for project, link in staging_status_links.items():
self.assertEqual(project, 'XYZ_123')
self.assertEqual(project, 'JKL_123')

status_response = yield self.http_client.fetch(link)
self.assertEqual(json.loads(status_response.body)["status"], StagingStatus.staging_successful.name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ This delivery includes sequencing data in FASTQ format, a summary report created
└── <SampleN id>
├── <sampleN name>_<sample number>_R1_001.fastq.gz
└── <sampleN name>_<sample number>_R2_001.fastq.gz
```
```
Loading