From 5c1a7ca13b741044a654dec9eed94e40b69a4b26 Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Mon, 8 Jan 2024 10:34:04 +0100 Subject: [PATCH 1/7] Fix a few bugs and replace dates_prio with value from config --- daily_read/ngi_data.py | 47 +++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/daily_read/ngi_data.py b/daily_read/ngi_data.py index f739f49..19266a4 100644 --- a/daily_read/ngi_data.py +++ b/daily_read/ngi_data.py @@ -128,7 +128,7 @@ def save_data(self): abs_path = os.path.join(self.data_location, project_record.relative_path) if os.path.dirname(abs_path) != source_year_dir: # This should really never happen - raise ValueError(f"Error with paths, dirname of {abs_path} should be {year_dir}") + raise ValueError(f"Error with paths, dirname of {abs_path} should be {source_year_dir}") with open(abs_path, mode="w") as fh: log.debug(f"Writing data for {project_record.project_id} to {abs_path}") @@ -188,7 +188,9 @@ def get_modified_or_new_projects(self): internal_id, internal_name, ) = ProjectDataRecord.data_from_file(project_path) - project_record = ProjectDataRecord(project_path, orderer, project_dates, internal_id, internal_name) + project_record = ProjectDataRecord( + project_path, orderer, project_dates, internal_id, internal_name, self.config.STATUS_PRIORITY_REV + ) projects_list.append(project_record) return projects_list @@ -218,23 +220,7 @@ class ProjectDataRecord(object): Raises ValueError if orderer is not present in data, if data is given """ - dates_prio = { - "All Raw data Delivered": 5, - "All Samples Sequenced": 4, - "Library QC finished": 3, - "Reception Control finished": 2, - "Samples Received": 1, - "None": 0, - } - - def __init__( - self, - relative_path, - orderer, - project_dates, - internal_id=None, - internal_name=None, - ): + def __init__(self, relative_path, orderer, project_dates, internal_id=None, internal_name=None, dates_prio=None): """relative_path: e.g. "NGIS/2023/NGI0002313.json" """ node_year, file_name = os.path.split(relative_path) node, year = os.path.split(node_year) @@ -268,7 +254,7 @@ def __init__( # If multiple statuses for the same date, choose the one with highest prio if len(latest_statuses) > 1: - self.status = sorted(latest_statuses, key=lambda s: self.dates_prio[s], reverse=True)[0] + self.status = sorted(latest_statuses, key=lambda s: dates_prio[s], reverse=True)[0] else: self.status = latest_statuses[0] else: @@ -326,6 +312,7 @@ def __init__(self, config): self.name = "NGI Stockholm" self.dirname = "NGIS" self.statusdb_session = statusdb.StatusDBSession(config) + self.dates_prio = config.STATUS_PRIORITY_REV def get_data(self, project_id=None, close_date=None): """Fetch data from Stockholm StatusDB. @@ -350,7 +337,7 @@ def get_data(self, project_id=None, close_date=None): internal_name = row.value["project_name"] self.data[portal_id] = ProjectDataRecord( - relative_path, orderer, project_dates, internal_id, internal_name + relative_path, orderer, project_dates, internal_id, internal_name, self.dates_prio ) return self.data @@ -364,7 +351,15 @@ def get_entry(self, project_id): order_year = row.value["order_year"] portal_id = row.value["portal_id"] relative_path = f"{self.dirname}/{order_year}/{portal_id}.json" - self.data[portal_id] = ProjectDataRecord(relative_path, data=row.value) + + project_dates = row.value["proj_dates"] + orderer = row.value["orderer"] + internal_id = row.value["project_id"] + internal_name = row.value["project_name"] + + self.data[portal_id] = ProjectDataRecord( + relative_path, orderer, project_dates, internal_id, internal_name, self.dates_prio + ) return raise ValueError(f"Project {project_id} not found in statusdb") @@ -376,8 +371,8 @@ def __init__(self, config): self.name = "SNP&SEQ" self.dirname = "SNPSEQ" - def get_data(self, project_id=None): - pass + def get_data(self, project_id=None, close_date=None): + return {} class UGCProjectData(object): @@ -387,5 +382,5 @@ def __init__(self, config): self.name = "Uppsala Genome Center" self.dirname = "UGC" - def get_data(self, project_id=None): - pass + def get_data(self, project_id=None, close_date=None): + return {} From 5c4cce4c05b46b55291d83efad6ea9a7c5b337ab Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Mon, 8 Jan 2024 10:35:20 +0100 Subject: [PATCH 2/7] Reorder and cleanup existing tests a bit --- tests/test_daily_report.py | 6 +++--- tests/test_order_portal.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_daily_report.py b/tests/test_daily_report.py index 0bc62bc..e5490f1 100644 --- a/tests/test_daily_report.py +++ b/tests/test_daily_report.py @@ -1,9 +1,9 @@ -import dotenv import datetime +import dotenv import os -from unittest import mock -from conftest import mocked_requests_get +from conftest import mocked_requests_get +from unittest import mock from daily_read import daily_report, config, ngi_data, order_portal diff --git a/tests/test_order_portal.py b/tests/test_order_portal.py index 5a0f232..c8c77b9 100644 --- a/tests/test_order_portal.py +++ b/tests/test_order_portal.py @@ -1,8 +1,9 @@ +import base64 import dotenv import pytest -from unittest import mock + from conftest import mocked_requests_get -import base64 +from unittest import mock from daily_read import order_portal, config, ngi_data @@ -119,9 +120,10 @@ def test_get_and_process_orders_mult_reports(data_repo_full, mock_project_data_r op.get_orders(orderer=orderer) assert op.all_orders[2]["identifier"] == order_id - with pytest.raises(ValueError) as err: + with pytest.raises( + ValueError, match=f"Multiple reports for Project Progress found in the Order Portal for order {order_id}" + ) as err: op.process_orders(config_values.STATUS_PRIORITY_REV) - assert err.value == f"Multiple reports for Project Progress found in the Order Portal for order {order_id}" def test_base_url_and_api_key_not_set(data_repo_full, mock_project_data_record): @@ -137,13 +139,11 @@ def test_base_url_and_api_key_not_set(data_repo_full, mock_project_data_record): data_master.data = {order_id: mock_project_data_record("open")} config_values.ORDER_PORTAL_URL = None config_values.ORDER_PORTAL_API_KEY = None - with pytest.raises(ValueError) as err: + with pytest.raises(ValueError, match="environment variable ORDER_PORTAL_URL not set") as err: order_portal.OrderPortal(config_values, data_master) - assert err.value == "environment variable ORDER_PORTAL_URL not set" config_values.ORDER_PORTAL_URL = order_portal_url - with pytest.raises(ValueError) as err: + with pytest.raises(ValueError, match="Environment variable ORDER_PORTAL_API_KEY not set") as err: order_portal.OrderPortal(config_values, data_master) - assert err.value == "Environment variable ORDER_PORTAL_API_KEY not set" config_values.ORDER_PORTAL_API_KEY = api_key config_values.ORDER_PORTAL_URL = order_portal_url.rstrip("/") From 6904820e857718388d9e67852ab1b0b8325914bf Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Mon, 8 Jan 2024 10:36:02 +0100 Subject: [PATCH 3/7] Add more test for ngi_data --- tests/.test.env | 2 +- tests/conftest.py | 58 +++++++++++++++++++--- tests/test_ngi_data.py | 108 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 157 insertions(+), 11 deletions(-) diff --git a/tests/.test.env b/tests/.test.env index 45589e5..a57cabf 100644 --- a/tests/.test.env +++ b/tests/.test.env @@ -7,4 +7,4 @@ DAILY_READ_REPORTS_LOCATION='tests/test_reports_location' DAILY_READ_FETCH_FROM_NGIS = true DAILY_READ_FETCH_FROM_SNPSEQ = true DAILY_READ_FETCH_FROM_UGC = true -#DAILY_READ_LOG_LOCATION='tests/log_location' +DAILY_READ_USERS_LIST_LOCATION= \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 642c2fb..b5bb614 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,13 @@ +import copy +import couchdb import git +import json import os import pytest -import json -from daily_read import ngi_data + from datetime import date, timedelta -import copy +from daily_read import ngi_data, config dummy_order_open = { "orderer": "dummy@dummy.se", @@ -25,8 +27,8 @@ "2023-07-28": ["All Samples Sequenced"], "2023-07-29": ["All Raw data Delivered"], }, - "internal_id": "P123456", - "internal_name": "D.Dummysson_23_01", + "internal_id": "P123455", + "internal_name": "D.Dummysson_23_02", } order_portal_resp_order_processing = { @@ -159,7 +161,7 @@ "fields": { "assigned_node": "Stockholm", "project_ngi_identifier": "P123455", - "project_ngi_name": "D.Dummysson_23_01", + "project_ngi_name": "D.Dummysson_23_02", }, } @@ -298,6 +300,7 @@ def data_repo_full( @pytest.fixture def mock_project_data_record(): def _method(status): + config_values = config.Config() if status == "open": mock_record = ngi_data.ProjectDataRecord( "NGIS/2023/NGI123456.json", @@ -305,6 +308,7 @@ def _method(status): dummy_order_open["project_dates"], dummy_order_open["internal_id"], dummy_order_open["internal_name"], + config_values.STATUS_PRIORITY_REV, ) if status == "closed": mock_record = ngi_data.ProjectDataRecord( @@ -313,6 +317,7 @@ def _method(status): dummy_order_closed["project_dates"], dummy_order_closed["internal_id"], dummy_order_closed["internal_name"], + config_values.STATUS_PRIORITY_REV, ) if status == "open_with_report": mock_record = ngi_data.ProjectDataRecord( @@ -321,6 +326,7 @@ def _method(status): dummy_order_open["project_dates"], dummy_order_open["internal_id"], dummy_order_open["internal_name"], + config_values.STATUS_PRIORITY_REV, ) return mock_record @@ -357,3 +363,43 @@ def json(self): ) return MockResponse(None, 404) + + +@pytest.fixture +def mocked_statusdb_conn_rows(): + """To substitute return value for the daily_read.statusdb.StatusDBSession.rows method""" + row1 = couchdb.client.Row( + id="b77d4f", + key=["XXXX-XX-XX", "P123457"], + value={ + "orderer": "dummy@dummy.se", + "portal_id": "NGI123457", + "order_year": "2023", + "project_id": "P123457", + "project_name": "D.Dummysson_23_03", + "proj_dates": { + "2023-06-15": ["Samples Received"], + "2023-06-28": ["Reception Control finished", "Library QC finished"], + }, + "status": "Ongoing", + }, + ) + row2 = couchdb.client.Row( + id="b77d4g", + key=[(date.today() - timedelta(days=31)).strftime("%Y-%m-%d"), "P123458"], + value={ + "orderer": "test@dummy.se", + "portal_id": "NGI123458", + "order_year": "2023", + "project_id": "P123458", + "project_name": "T.Dummysson_23_04", + "proj_dates": { + "2023-06-15": ["Samples Received"], + "2023-06-28": ["Reception Control finished", "Library QC finished"], + "2023-07-28": ["All Samples Sequenced"], + "2023-07-29": ["All Raw data Delivered"], + }, + "status": "Closed", + }, + ) + return [row1, row2] diff --git a/tests/test_ngi_data.py b/tests/test_ngi_data.py index d4b44ac..4544daa 100644 --- a/tests/test_ngi_data.py +++ b/tests/test_ngi_data.py @@ -2,11 +2,14 @@ import dotenv from unittest import mock +import pytest +import logging from daily_read import ngi_data, config dotenv.load_dotenv() +LOGGER = logging.getLogger(__name__) ####################################################### TESTS ######################################################### @@ -129,12 +132,111 @@ def test_modified_or_new_tracked(data_repo_tracked): assert len(set(file_names)) == 0 +def test_get_unique_orderers(data_repo_full): + """Test getting unique orders in the project data from statusdb""" + config_values = config.Config() + with mock.patch("daily_read.statusdb.StatusDBSession"): + data_master = ngi_data.ProjectDataMaster(config_values) + data_master.get_data() + orderers = data_master.find_unique_orderers() + assert orderers == set(["dummy@dummy.se"]) + + +def test_user_list(data_repo_full, tmp_path, mocked_statusdb_conn_rows): + """Test getting and reading users from the user list url""" + config_values = config.Config() + temp_file = tmp_path / "test_file.txt" + temp_file.write_text("dummy@dummy.se\ntest@dummy.se") + config_values.USERS_LIST_LOCATION = temp_file + with mock.patch("daily_read.statusdb.StatusDBSession"): + data_master = ngi_data.ProjectDataMaster(config_values) + assert not set(["dummy@dummy.se", "test@dummy.se"]) ^ set(data_master.user_list) + + data_master.sources[0].statusdb_session.rows.return_value = mocked_statusdb_conn_rows + data_master.get_data() + data_master.save_data() + orderers = data_master.find_unique_orderers() + assert orderers == set(["dummy@dummy.se", "test@dummy.se"]) + + +def test_save_data_to_disk(data_repo_full, mocked_statusdb_conn_rows): + """Test saving in git repo the data gotten from statusdb""" + config_values = config.Config() + with mock.patch("daily_read.statusdb.StatusDBSession"): + data_master = ngi_data.ProjectDataMaster(config_values) + data_master.sources[0].statusdb_session.rows.return_value = mocked_statusdb_conn_rows + data_master.get_data() + data_master.save_data() + assert os.path.exists(os.path.join(config_values.DATA_LOCATION, "NGIS/2023/NGI123457.json")) + assert os.path.exists(os.path.join(config_values.DATA_LOCATION, "NGIS/2023/NGI123458.json")) + + +def test_get_data_with_project(data_repo_full, mocked_statusdb_conn_rows): + """Test getting data for a specific order""" + config_values = config.Config() + with mock.patch("daily_read.statusdb.StatusDBSession"): + data_master = ngi_data.ProjectDataMaster(config_values) + data_master.sources[0].statusdb_session.rows.return_value = mocked_statusdb_conn_rows + data_master.get_data("NGI123457") + assert len(data_master.data.keys()) == 1 + assert "NGI123457" in data_master.data + + +def test_get_data_with_project_unknown(data_repo_full, mocked_statusdb_conn_rows): + """Test error thrown when the order specified is not found in statusdb""" + config_values = config.Config() + with mock.patch("daily_read.statusdb.StatusDBSession"): + data_master = ngi_data.ProjectDataMaster(config_values) + data_master.sources[0].statusdb_session.rows.return_value = mocked_statusdb_conn_rows + with pytest.raises(ValueError, match="Project NGI123 not found in statusdb") as err: + data_master.get_data("NGI123") + + +def test_data_loc_not_abs(): + """Test error thrown when given data location is not an absolute path""" + config_values = config.Config() + config_values.DATA_LOCATION = "tests/test_data_location" + with pytest.raises( + ValueError, match=f"Data location is not an absolute path: {config_values.DATA_LOCATION}" + ) as err: + ngi_data.ProjectDataMaster(config_values) + + +def test_data_loc_not_dir(tmp_path): + """Test error thrown when data location is not a directory""" + config_values = config.Config() + temp_file = tmp_path / "test_file.txt" + temp_file.write_text("test") + config_values.DATA_LOCATION = temp_file + with pytest.raises( + ValueError, match=f"Data Location exists but is not a directory: {config_values.DATA_LOCATION}" + ) as err: + ngi_data.ProjectDataMaster(config_values) + + +def test_get_data_with_no_project_dates(data_repo_full, mocked_statusdb_conn_rows, caplog): + """Test log output when no project dates are found in statusdb for a specifi project""" + from copy import deepcopy + + config_values = config.Config() + with mock.patch("daily_read.statusdb.StatusDBSession"): + data_master = ngi_data.ProjectDataMaster(config_values) + data_master.sources[0].statusdb_session.rows.return_value = mocked_statusdb_conn_rows + proj_no_dates = deepcopy(data_master.sources[0].statusdb_session.rows.return_value[0]) + proj_no_dates.value["proj_dates"] = {} + proj_no_dates.value["portal_id"] = "NGI123459" + data_master.sources[0].statusdb_session.rows.return_value.append(proj_no_dates) + with caplog.at_level(logging.INFO): + data_master.get_data("NGI123459") + assert len(data_master.data.keys()) == 1 + assert "NGI123459" in data_master.data + assert "No project dates found for NGI123459" in caplog.text + + # Planned tests # def test_getting_data(): - # Test getting data - # When everything is ok # When some source fails # When all sources fails # When some source is not enabled @@ -145,8 +247,6 @@ def test_getting_data(): pass -# Test saving data - # Test add data for staging # If other things are added already # Test adding something that isn't changed From 7fd0458b865f44b5dc68b3d5bc68f330cb9176d6 Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Mon, 8 Jan 2024 11:07:49 +0100 Subject: [PATCH 4/7] Mock statusdb conn --- tests/test_ngi_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_ngi_data.py b/tests/test_ngi_data.py index 4544daa..3d8aa5c 100644 --- a/tests/test_ngi_data.py +++ b/tests/test_ngi_data.py @@ -196,7 +196,7 @@ def test_data_loc_not_abs(): """Test error thrown when given data location is not an absolute path""" config_values = config.Config() config_values.DATA_LOCATION = "tests/test_data_location" - with pytest.raises( + with mock.patch("daily_read.statusdb.StatusDBSession") and pytest.raises( ValueError, match=f"Data location is not an absolute path: {config_values.DATA_LOCATION}" ) as err: ngi_data.ProjectDataMaster(config_values) @@ -208,7 +208,7 @@ def test_data_loc_not_dir(tmp_path): temp_file = tmp_path / "test_file.txt" temp_file.write_text("test") config_values.DATA_LOCATION = temp_file - with pytest.raises( + with mock.patch("daily_read.statusdb.StatusDBSession") and pytest.raises( ValueError, match=f"Data Location exists but is not a directory: {config_values.DATA_LOCATION}" ) as err: ngi_data.ProjectDataMaster(config_values) From 296b7fde725707b9455f25b7d78b7aee89f40d64 Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Mon, 8 Jan 2024 11:12:50 +0100 Subject: [PATCH 5/7] Try something with mock --- tests/test_ngi_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_ngi_data.py b/tests/test_ngi_data.py index 3d8aa5c..831a07c 100644 --- a/tests/test_ngi_data.py +++ b/tests/test_ngi_data.py @@ -208,7 +208,7 @@ def test_data_loc_not_dir(tmp_path): temp_file = tmp_path / "test_file.txt" temp_file.write_text("test") config_values.DATA_LOCATION = temp_file - with mock.patch("daily_read.statusdb.StatusDBSession") and pytest.raises( + with mock.patch("daily_read.ngi_data.statusdb.StatusDBSession") and pytest.raises( ValueError, match=f"Data Location exists but is not a directory: {config_values.DATA_LOCATION}" ) as err: ngi_data.ProjectDataMaster(config_values) From ab5f2a96bb737025cebca227a2ee530ee02072b5 Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Mon, 8 Jan 2024 11:39:38 +0100 Subject: [PATCH 6/7] Try more mocking --- tests/test_ngi_data.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_ngi_data.py b/tests/test_ngi_data.py index 831a07c..3c4f7e0 100644 --- a/tests/test_ngi_data.py +++ b/tests/test_ngi_data.py @@ -192,25 +192,28 @@ def test_get_data_with_project_unknown(data_repo_full, mocked_statusdb_conn_rows data_master.get_data("NGI123") +@mock.patch("daily_read.statusdb.StatusDBSession") def test_data_loc_not_abs(): """Test error thrown when given data location is not an absolute path""" config_values = config.Config() config_values.DATA_LOCATION = "tests/test_data_location" - with mock.patch("daily_read.statusdb.StatusDBSession") and pytest.raises( + with pytest.raises( ValueError, match=f"Data location is not an absolute path: {config_values.DATA_LOCATION}" ) as err: ngi_data.ProjectDataMaster(config_values) +@mock.patch("daily_read.statusdb.StatusDBSession") def test_data_loc_not_dir(tmp_path): """Test error thrown when data location is not a directory""" config_values = config.Config() temp_file = tmp_path / "test_file.txt" temp_file.write_text("test") config_values.DATA_LOCATION = temp_file - with mock.patch("daily_read.ngi_data.statusdb.StatusDBSession") and pytest.raises( + with pytest.raises( ValueError, match=f"Data Location exists but is not a directory: {config_values.DATA_LOCATION}" ) as err: + print("hi") ngi_data.ProjectDataMaster(config_values) From b680759de1e4636f747d1c8ddba69c85cb2df813 Mon Sep 17 00:00:00 2001 From: Anandashankar Anil Date: Mon, 8 Jan 2024 12:00:15 +0100 Subject: [PATCH 7/7] Mock mock --- tests/test_ngi_data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_ngi_data.py b/tests/test_ngi_data.py index 3c4f7e0..19d33d7 100644 --- a/tests/test_ngi_data.py +++ b/tests/test_ngi_data.py @@ -193,7 +193,7 @@ def test_get_data_with_project_unknown(data_repo_full, mocked_statusdb_conn_rows @mock.patch("daily_read.statusdb.StatusDBSession") -def test_data_loc_not_abs(): +def test_data_loc_not_abs(mock_status): """Test error thrown when given data location is not an absolute path""" config_values = config.Config() config_values.DATA_LOCATION = "tests/test_data_location" @@ -204,7 +204,7 @@ def test_data_loc_not_abs(): @mock.patch("daily_read.statusdb.StatusDBSession") -def test_data_loc_not_dir(tmp_path): +def test_data_loc_not_dir(mock_status, tmp_path): """Test error thrown when data location is not a directory""" config_values = config.Config() temp_file = tmp_path / "test_file.txt" @@ -213,7 +213,6 @@ def test_data_loc_not_dir(tmp_path): with pytest.raises( ValueError, match=f"Data Location exists but is not a directory: {config_values.DATA_LOCATION}" ) as err: - print("hi") ngi_data.ProjectDataMaster(config_values)