diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index e1b85f9f4..1bc390f37 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -39,8 +39,8 @@ - [ ] Merged - [ ] No blocking PRs - PR to `master` branch - - [ ] Yes: Read [the release instructions](../doc/procedures/new_release.md) - - [ ] I have followed steps 1-7. + - [ ] Yes: Read [the release instructions](https://github.com/ScilifelabDataCentre/dds_web/blob/master/doc/procedures/new_release.md) + - [ ] I have followed steps 1-8. - [ ] No ## Actions / Scans diff --git a/.github/workflows/docker-compose-tests.yml b/.github/workflows/docker-compose-tests.yml index 55ded68d9..5b5db2891 100644 --- a/.github/workflows/docker-compose-tests.yml +++ b/.github/workflows/docker-compose-tests.yml @@ -24,6 +24,7 @@ jobs: - name: Run tests against database container run: docker-compose -f docker-compose.yml -f tests/docker-compose-test.yml up --build --exit-code-from backend - - uses: codecov/codecov-action@v2 + - uses: codecov/codecov-action@v3 with: + token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage/report.xml diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 44eea0588..f71c69244 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,25 @@ Changelog ========== +.. _2.3.1: + +2.3.1 - 2023-07-05 +~~~~~~~~~~~~~~~~~~~ + +- Dependencies: + - `requests` from `2.27.1` to `2.31.0` + - `redis` from `4.4.4` to `4.5.5` + - `Flask` from `2.0.3` to `2.2.5` +- Statistics: + - Number of TBHours stored in the last month calculated and stored in DB + - Number of TBHours stored since start calculated and stored in DB + - Endpoint `Statistics` to return rows stored in the Reporting table +- Full name of Unit Admins-, Personnel and Super Admins not shown to Researchers; Only display Unit name when... + - Listing projects + - Sending invites + - Releasing projects +- Backend Flask command `lost-files` changed to group command with subcommands `ls`, `delete`, `add-missing-bucket` + .. _2.3.0: 2.3.0 - 2023-06-07 diff --git a/SPRINTLOG.md b/SPRINTLOG.md index 261a57de3..77e5e29d4 100644 --- a/SPRINTLOG.md +++ b/SPRINTLOG.md @@ -254,4 +254,20 @@ _Nothing merged in CLI during this sprint_ - Save number of unique Project Owners ([#1421](https://github.com/ScilifelabDataCentre/dds_web/pull/1421)) - Save amount of TB's currently stored in system ([#1424](https://github.com/ScilifelabDataCentre/dds_web/pull/1424)) - Save amount of TB's uploaded since start ([#1430](https://github.com/ScilifelabDataCentre/dds_web/pull/1430)) + - Save number of TBHours stored in the last month ([#1431](https://github.com/ScilifelabDataCentre/dds_web/pull/1431)) + - Save number of TBHours stored in since start ([#1434](https://github.com/ScilifelabDataCentre/dds_web/pull/1434)) - New version: 2.3.0 ([#1433](https://github.com/ScilifelabDataCentre/dds_web/pull/1433)) +- Dependency: Bump `requests` to 2.31.0 due to security vulnerability alert ([#1427](https://github.com/ScilifelabDataCentre/dds_web/pull/1427)) +- Endpoint: Statistics; Return all rows stored in the Reporting table ([#1435](https://github.com/ScilifelabDataCentre/dds_web/pull/1435)) + +# 2023-06-09 - 2023-06-23 + +- Dependency: Bump `Flask` to 2.2.5 due to security vulnerability alert(s) ([#1425](https://github.com/ScilifelabDataCentre/dds_web/pull/1425)) +- Dependency: Bump `redis-py` to 4.5.5 due to security vulnerability alert(s) ([#1437](https://github.com/ScilifelabDataCentre/dds_web/pull/1437)) +- Change from personal name to unit name if / where it's displayed in emails ([#1439](https://github.com/ScilifelabDataCentre/dds_web/pull/1439)) +- Refactoring: `lost_files_s3_db` flask command changed to group with subcommands ([#1438](https://github.com/ScilifelabDataCentre/dds_web/pull/1438)) + +# 2023-06-26 - 2023-07-14 + +- Change display project info depending on the user role ([#1440](https://github.com/ScilifelabDataCentre/dds_web/pull/1440)) +- New version: 2.4.0 ([#1443](https://github.com/ScilifelabDataCentre/dds_web/pull/1443)) diff --git a/dds_web/api/__init__.py b/dds_web/api/__init__.py index 926c6ec39..c5824ea00 100644 --- a/dds_web/api/__init__.py +++ b/dds_web/api/__init__.py @@ -90,6 +90,7 @@ def output_json(data, code, headers=None): superadmin_only.ResetTwoFactor, "/user/totp/deactivate", endpoint="reset_user_hotp" ) api.add_resource(superadmin_only.AnyProjectsBusy, "/proj/busy/any", endpoint="projects_busy_any") +api.add_resource(superadmin_only.Statistics, "/stats", endpoint="stats") # Invoicing ############################################################################ Invoicing # api.add_resource(user.ShowUsage, "/usage", endpoint="usage") diff --git a/dds_web/api/project.py b/dds_web/api/project.py index 3a97c5a2d..fdefa0fcc 100644 --- a/dds_web/api/project.py +++ b/dds_web/api/project.py @@ -488,15 +488,23 @@ def format_project_dict(self, current_user): # Apply the filters user_projects = models.Project.query.filter(sqlalchemy.and_(*all_filters)).all() + researcher = False + if auth.current_user().role not in ["Super Admin", "Unit Admin", "Unit Personnel"]: + researcher = True + # Get info for all projects for p in user_projects: + project_creator = p.creator.name if p.creator else None + if researcher: + project_creator = p.responsible_unit.external_display_name + project_info = { "Project ID": p.public_id, "Title": p.title, "PI": p.pi, "Status": p.current_status, "Last updated": p.date_updated if p.date_updated else p.date_created, - "Created by": p.creator.name if p.creator else "Former User", + "Created by": project_creator or "Former User", } # Get proj size and update total size @@ -967,10 +975,15 @@ def get(self): project = dds_web.utils.collect_project(project_id=project_id) dds_web.utils.verify_project_access(project=project) + # if current user Researcher, show unit name instead of creator name + project_creator = project.creator.name if project.creator else None + if auth.current_user().role not in ["Super Admin", "Unit Admin", "Unit Personnel"]: + project_creator = project.responsible_unit.external_display_name + # Construct a dict with info items project_info = { "Project ID": project.public_id, - "Created by": project.creator.name if project.creator else "Former User", + "Created by": project_creator or "Former User", "Status": project.current_status, "Last updated": project.date_updated, "Size": project.size, diff --git a/dds_web/api/superadmin_only.py b/dds_web/api/superadmin_only.py index ae197034e..13e5b618a 100644 --- a/dds_web/api/superadmin_only.py +++ b/dds_web/api/superadmin_only.py @@ -311,3 +311,37 @@ def get(self): return_info.update({"projects": {p.public_id: p.date_updated for p in projects_busy}}) return return_info + + +class Statistics(flask_restful.Resource): + """Get rows from Reporting table.""" + + @auth.login_required(role=["Super Admin"]) + @logging_bind_request + @handle_db_error + def get(self): + """Collect rows from reporting table and return them.""" + stat_rows: typing.List = models.Reporting.query.all() + return { + "stats": [ + { + "Date": str(row.date), + "Units": row.unit_count, + "Researchers": row.researcher_count, + "Project Owners": row.project_owner_unique_count, + "Unit Personnel": row.unit_personnel_count, + "Unit Admins": row.unit_admin_count, + "Super Admins": row.superadmin_count, + "Total Users": row.total_user_count, + "Total Projects": row.total_project_count, + "Active Projects": row.active_project_count, + "Inactive Projects": row.inactive_project_count, + "Data Now (TB)": row.tb_stored_now, + "Data Uploaded (TB)": row.tb_uploaded_since_start, + "TBHours Last Month": row.tbhours, + "TBHours Total": row.tbhours_since_start, + } + for row in stat_rows + if stat_rows + ] + } diff --git a/dds_web/api/user.py b/dds_web/api/user.py index 0eaf764f3..aaedce3b9 100644 --- a/dds_web/api/user.py +++ b/dds_web/api/user.py @@ -416,26 +416,24 @@ def compose_and_send_email_to_user(userobj, mail_type, link=None, project=None): # userobj likely an invite recipients = [userobj.email] - unit_name = None unit_email = None project_id = None deadline = None + + # Don't display unit admins or personnels name if auth.current_user().role in ["Unit Admin", "Unit Personnel"]: unit = auth.current_user().unit - unit_name = unit.external_display_name unit_email = unit.contact_email - sender_name = auth.current_user().name - subject_subject = unit_name - + displayed_sender = unit.external_display_name + # Display name if Super admin or Project owners else: - sender_name = auth.current_user().name - subject_subject = sender_name + displayed_sender = auth.current_user().name # Fill in email subject with sentence subject if mail_type == "invite": - subject = f"{subject_subject} invites you to the SciLifeLab Data Delivery System" + subject = f"{displayed_sender} invites you to the SciLifeLab Data Delivery System" elif mail_type == "project_release": - subject = f"Project made available by {subject_subject} in the SciLifeLab Data Delivery System" + subject = f"Project made available by {displayed_sender} in the SciLifeLab Data Delivery System" project_id = project.public_id deadline = project.current_deadline.astimezone(datetime.timezone.utc).strftime( "%Y-%m-%d %H:%M:%S %Z" @@ -464,8 +462,7 @@ def compose_and_send_email_to_user(userobj, mail_type, link=None, project=None): msg.body = flask.render_template( f"mail/{mail_type}.txt", link=link, - sender_name=sender_name, - unit_name=unit_name, + displayed_sender=displayed_sender, unit_email=unit_email, project_id=project_id, deadline=deadline, @@ -473,8 +470,7 @@ def compose_and_send_email_to_user(userobj, mail_type, link=None, project=None): msg.html = flask.render_template( f"mail/{mail_type}.html", link=link, - sender_name=sender_name, - unit_name=unit_name, + displayed_sender=displayed_sender, unit_email=unit_email, project_id=project_id, deadline=deadline, diff --git a/dds_web/commands.py b/dds_web/commands.py index 3692343c8..930c65c67 100644 --- a/dds_web/commands.py +++ b/dds_web/commands.py @@ -7,12 +7,14 @@ import re import sys import datetime +from dateutil.relativedelta import relativedelta # Installed import click import flask import flask_mail import sqlalchemy +import botocore # Own from dds_web import db @@ -218,131 +220,234 @@ def update_uploaded_file_with_log(project, path_to_log_file): flask.current_app.logger.info(f"Errors while adding files: {errors}") -@click.command("lost-files") -@click.argument("action_type", type=click.Choice(["find", "list", "delete", "add-missing-buckets"])) +@click.group(name="lost-files") @flask.cli.with_appcontext -def lost_files_s3_db(action_type: str): - """Identify (and optionally delete) files that are present in S3 or in the db, but not both. +def lost_files_s3_db(): + """Group command for handling lost files: Either in db or s3, but not in both.""" + pass - Args: - action_type (str): "find", "list", or "delete" - """ - from dds_web.database import models + +@lost_files_s3_db.command(name="ls") +@click.option("--project-id", "-p", type=str, required=False) +@flask.cli.with_appcontext +def list_lost_files(project_id: str): + """List lost files: Existing either in DB or S3, not in both.""" + # Imports import boto3 - from dds_web.utils import bucket_is_valid + from dds_web.database import models + from dds_web.utils import list_lost_files_in_project + + if project_id: + flask.current_app.logger.debug(f"Searching for lost files in project '{project_id}'.") + # Get project if option used + project: models.Project = models.Project.query.filter_by(public_id=project_id).one_or_none() + if not project: + flask.current_app.logger.error(f"No such project: '{project_id}'") + sys.exit(1) - # Interate through the units - for unit in models.Unit.query: + # Start s3 session session = boto3.session.Session() # Connect to S3 resource = session.resource( service_name="s3", - endpoint_url=unit.safespring_endpoint, - aws_access_key_id=unit.safespring_access, - aws_secret_access_key=unit.safespring_secret, + endpoint_url=project.responsible_unit.safespring_endpoint, + aws_access_key_id=project.responsible_unit.safespring_access, + aws_secret_access_key=project.responsible_unit.safespring_secret, ) - # Variables - db_count = 0 # Files not found in s3 - s3_count = 0 # Files not found in db + # List the lost files + try: + in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project( + project=project, s3_resource=resource + ) + except (botocore.exceptions.ClientError, sqlalchemy.exc.OperationalError): + flask.current_app.logger.info("Not listing files due to error above.") + sys.exit(1) + + # Number of lost files listed + num_lost_files: int = sum([len(in_db_but_not_in_s3), len(in_s3_but_not_in_db)]) - # Iterate through unit projects - for project in unit.projects: - # Check for objects in bucket - try: - s3_filenames = set( - entry.key for entry in resource.Bucket(project.bucket).objects.all() - ) - except resource.meta.client.exceptions.NoSuchBucket: - if project.is_active: - flask.current_app.logger.warning("Missing bucket %s", project.bucket) - # Create a missing bucket if argument chosen - if action_type == "add-missing-buckets": - valid, message = bucket_is_valid(bucket_name=project.bucket) - if not valid: - flask.current_app.logger.warning( - f"Could not create bucket '{project.bucket}' for project '{project.public_id}': {message}" - ) - else: - resource.create_bucket(Bucket=project.bucket) - flask.current_app.logger.info(f"Bucket '{project.bucket}' created.") - continue - - # Get objects in project - try: - db_filenames = set(entry.name_in_bucket for entry in project.files) - except sqlalchemy.exc.OperationalError: - flask.current_app.logger.critical("Unable to connect to db") - - # Differences - diff_db = db_filenames.difference(s3_filenames) # In db but not in S3 - diff_s3 = s3_filenames.difference(db_filenames) # In S3 but not in db - - # List all files which are missing in either db of s3 - # or delete the files from the s3 if missing in db, or db if missing in s3 - if action_type == "list": - for file_entry in diff_db: - flask.current_app.logger.info( - "Entry %s (%s, %s) not found in S3", file_entry, project, unit - ) - for file_entry in diff_s3: - flask.current_app.logger.info( - "Entry %s (%s, %s) not found in database", file_entry, project, unit - ) - elif action_type == "delete": - # s3 can only delete 1000 objects per request - batch_size = 1000 - s3_to_delete = list(diff_s3) - for i in range(0, len(s3_to_delete), batch_size): - resource.meta.client.delete_objects( - Bucket=project.bucket, - Delete={ - "Objects": [ - {"Key": entry} for entry in s3_to_delete[i : i + batch_size] - ] - }, - ) + # Print out message if no lost files + if not num_lost_files: + flask.current_app.logger.info(f"No lost files in project '{project_id}'") - db_entries = models.File.query.filter( - sqlalchemy.and_( - models.File.name_in_bucket.in_(diff_db), - models.File.project_id == project.id, + flask.current_app.logger.info( + f"Lost files in project: {project_id}\t" + f"\tIn DB but not S3: {len(in_db_but_not_in_s3)}\t" + f"In S3 but not DB: {len(in_s3_but_not_in_db)}\n" + ) + else: + flask.current_app.logger.debug( + "No project specified, searching for lost files in all units." + ) + + # Interate through the units + for unit in models.Unit.query: + flask.current_app.logger.info(f"Listing lost files in unit: {unit.public_id}") + + num_proj_errors: int = 0 + + # Start s3 session + session = boto3.session.Session() + + # Connect to S3 + resource_unit = session.resource( + service_name="s3", + endpoint_url=unit.safespring_endpoint, + aws_access_key_id=unit.safespring_access, + aws_secret_access_key=unit.safespring_secret, + ) + + # Counts + in_db_but_not_in_s3_count: int = 0 + in_s3_but_not_in_db_count: int = 0 + + # List files in all projects + for proj in unit.projects: + # List the lost files + try: + in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project( + project=proj, s3_resource=resource_unit ) - ) - for db_entry in db_entries: - try: - for db_entry_version in db_entry.versions: - if db_entry_version.time_deleted is None: - db_entry_version.time_deleted = datetime.datetime.utcnow() - db.session.delete(db_entry) - db.session.commit() - except (sqlalchemy.exc.SQLAlchemyError, sqlalchemy.exc.OperationalError): - db.session.rollback() - flask.current_app.logger.critical("Unable to delete the database entries") - sys.exit(1) + except (botocore.exceptions.ClientError, sqlalchemy.exc.OperationalError): + num_proj_errors += 1 + continue - # update the counters at the end of the loop to have accurate numbers for delete - s3_count += len(diff_s3) - db_count += len(diff_db) + # Add to sum + in_db_but_not_in_s3_count += len(in_db_but_not_in_s3) + in_s3_but_not_in_db_count += len(in_s3_but_not_in_db) - # Print out information about actions performed in cronjob - if s3_count or db_count: - action_word = ( - "Found" if action_type in ("find", "list", "add-missing-buckets") else "Deleted" - ) + if not sum([in_db_but_not_in_s3_count, in_s3_but_not_in_db_count]): + flask.current_app.logger.info(f"No lost files for unit '{unit.public_id}'") + + flask.current_app.logger.info( + f"Lost files for unit: {unit.public_id}\t" + f"\tIn DB but not S3: {in_db_but_not_in_s3_count}\t" + f"In S3 but not DB: {in_s3_but_not_in_db_count}\t" + f"Project errors: {num_proj_errors}\n" + ) + + +@lost_files_s3_db.command(name="add-missing-bucket") +@click.option("--project-id", "-p", type=str, required=True) +@flask.cli.with_appcontext +def add_missing_bucket(project_id: str): + """Add project bucket if project is active and bucket is missing.""" + # Imports + import boto3 + from botocore.client import ClientError + from dds_web.database import models + from dds_web.utils import bucket_is_valid + + # Get project object + project: models.Project = models.Project.query.filter_by(public_id=project_id).one_or_none() + if not project: + flask.current_app.logger.error(f"No such project: '{project_id}'") + sys.exit(1) + + # Only create new bucket if project is active + if not project.is_active: + flask.current_app.logger.error(f"Project '{project_id}' is not an active project.") + sys.exit(1) + + # Start s3 session + session = boto3.session.Session() + + # Connect to S3 + resource = session.resource( + service_name="s3", + endpoint_url=project.responsible_unit.safespring_endpoint, + aws_access_key_id=project.responsible_unit.safespring_access, + aws_secret_access_key=project.responsible_unit.safespring_secret, + ) + + # Check if bucket exists + try: + resource.meta.client.head_bucket(Bucket=project.bucket) + except ClientError: + flask.current_app.logger.info("Project bucket is missing. Proceeding...") + + # Verify that bucket name is valid and if so create bucket + valid, message = bucket_is_valid(bucket_name=project.bucket) + if not valid: + flask.current_app.logger.warning( + f"Invalid bucket name: '{project.bucket}'. Details: {message}. Bucket not created." + ) + sys.exit(1) + else: + resource.create_bucket(Bucket=project.bucket) + flask.current_app.logger.info(f"Bucket '{project.bucket}' created.") + else: flask.current_app.logger.info( - "%s %d entries for lost files (%d in db, %d in s3)", - action_word, - s3_count + db_count, - db_count, - s3_count, + f"Bucket for project '{project.public_id}' found; Bucket not missing. Will not create bucket." ) - if action_type in ("find", "list", "add-missing-buckets"): + + +@lost_files_s3_db.command(name="delete") +@click.option("--project-id", "-p", type=str, required=True) +@flask.cli.with_appcontext +def delete_lost_files(project_id: str): + """Delete files that are located in only s3 or db.""" + # Imports + import boto3 + from dds_web.database import models + from dds_web.utils import list_lost_files_in_project + + # Get project object + project: models.Project = models.Project.query.filter_by(public_id=project_id).one_or_none() + if not project: + flask.current_app.logger.error(f"No such project: '{project_id}'") + sys.exit(1) + + # Start s3 session + session = boto3.session.Session() + + # Connect to S3 + resource = session.resource( + service_name="s3", + endpoint_url=project.responsible_unit.safespring_endpoint, + aws_access_key_id=project.responsible_unit.safespring_access, + aws_secret_access_key=project.responsible_unit.safespring_secret, + ) + + # Get list of lost files + in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project( + project=project, s3_resource=resource + ) + + # S3 can only delete 1000 objects per request + batch_size = 1000 + s3_to_delete = list(in_s3_but_not_in_db) + + # Delete items from S3 + for i in range(0, len(s3_to_delete), batch_size): + resource.meta.client.delete_objects( + Bucket=project.bucket, + Delete={"Objects": [{"Key": entry} for entry in s3_to_delete[i : i + batch_size]]}, + ) + + # Delete items from DB + db_entries = models.File.query.filter( + sqlalchemy.and_( + models.File.name_in_bucket.in_(in_db_but_not_in_s3), + models.File.project_id == project.id, + ) + ) + for db_entry in db_entries: + try: + for db_entry_version in db_entry.versions: + if db_entry_version.time_deleted is None: + db_entry_version.time_deleted = datetime.datetime.utcnow() + db.session.delete(db_entry) + db.session.commit() + except (sqlalchemy.exc.SQLAlchemyError, sqlalchemy.exc.OperationalError): + db.session.rollback() + flask.current_app.logger.critical("Unable to delete the database entries") sys.exit(1) - else: - flask.current_app.logger.info("Found no lost files") + flask.current_app.logger.info(f"Files deleted from S3: {len(in_s3_but_not_in_db)}") + flask.current_app.logger.info(f"Files deleted from DB: {len(in_db_but_not_in_s3)}") @click.command("set-available-to-expired") @@ -682,6 +787,7 @@ def collect_stats(): # Own import dds_web.utils + from dds_web.utils import bytehours_in_last_month, page_query, calculate_bytehours from dds_web.database.models import ( Unit, UnitUser, @@ -730,13 +836,36 @@ def collect_stats(): unit_count = Unit.query.count() # Amount of data + # Currently stored bytes_stored_now: int = sum(proj.size for proj in Project.query.filter_by(is_active=True)) tb_stored_now: float = round(bytes_stored_now / 1e12, 2) + # Uploaded since start bytes_uploaded_since_start = db.session.query( func.sum(Version.size_stored).label("sum_bytes") ).first() tb_uploaded_since_start: float = round(int(bytes_uploaded_since_start.sum_bytes) / 1e12, 2) + # TBHours + # In last month + byte_hours_sum = sum( + bytehours_in_last_month(version=version) + for version in page_query(Version.query) + if version.time_deleted is None + or version.time_deleted > (dds_web.utils.current_time() - relativedelta(months=1)) + ) + tbhours = round(byte_hours_sum / 1e12, 2) + # Since start + time_now = dds_web.utils.current_time() + byte_hours_sum_total = sum( + calculate_bytehours( + minuend=version.time_deleted or time_now, + subtrahend=version.time_uploaded, + size_bytes=version.size_stored, + ) + for version in page_query(Version.query) + ) + tbhours_total = round(byte_hours_sum_total / 1e12, 2) + # Add to database new_reporting_row = Reporting( unit_count=unit_count, @@ -751,10 +880,13 @@ def collect_stats(): inactive_project_count=inactive_project_count, tb_stored_now=tb_stored_now, tb_uploaded_since_start=tb_uploaded_since_start, + tbhours=tbhours, + tbhours_since_start=tbhours_total, ) db.session.add(new_reporting_row) db.session.commit() except BaseException as err: # We want to know if there's any error + db.session.rollback() flask.current_app.logger.warning( f"Exception raised during reporting cronjob. Preparing email. Error: {err}" ) diff --git a/dds_web/database/models.py b/dds_web/database/models.py index 3f72a4052..ef92a4be1 100644 --- a/dds_web/database/models.py +++ b/dds_web/database/models.py @@ -1090,3 +1090,5 @@ class Reporting(db.Model): inactive_project_count = db.Column(db.Integer, unique=False, nullable=True) tb_stored_now = db.Column(db.Float, unique=False, nullable=True) tb_uploaded_since_start = db.Column(db.Float, unique=False, nullable=True) + tbhours = db.Column(db.Float, unique=False, nullable=True) + tbhours_since_start = db.Column(db.Float, unique=False, nullable=True) diff --git a/dds_web/templates/mail/invite.html b/dds_web/templates/mail/invite.html index d5faf8fa0..f9d20232b 100644 --- a/dds_web/templates/mail/invite.html +++ b/dds_web/templates/mail/invite.html @@ -16,11 +16,10 @@ The DDS is a system for SciLifeLab infrastructures to deliver data to researchers in a fast, secure and simple way.

- {% if unit_name is none %} - The invite was sent to you by {{sender_name}}. + {% if unit_email %} + The invite was sent to you on behalf of {{displayed_sender}} ({{unit_email}}). {% else %} - The invite was sent to you by {{sender_name}} on behalf of {{unit_name}}{% if unit_email is not none %} ({{unit_email}}){% endif %}. + The invite was sent to you by {{displayed_sender}}. {% endif %} Please press the 'Sign Up' button to create your account:

diff --git a/dds_web/templates/mail/invite.txt b/dds_web/templates/mail/invite.txt index fbd099d2d..d6d5b2c17 100644 --- a/dds_web/templates/mail/invite.txt +++ b/dds_web/templates/mail/invite.txt @@ -1,9 +1,9 @@ You have been invited to join the SciLifeLab Data Delivery System (DDS). The DDS is a system for SciLifeLab infrastructures to deliver data to researchers in a fast, secure and simple way. -{% if unit_name is none %} -The invite was sent to you by {{sender_name}}. +{% if unit_email %} +The invite was sent to you on behalf of {{displayed_sender}} ({{unit_email}}). {% else %} -The invite was sent to you by {{sender_name}} on behalf of {{unit_name}}{% if unit_email is not none %} ({{unit_email}}){% endif %}. +The invite was sent to you by {{displayed_sender}}. {% endif %} Please copy paste the following link into your browser to create your account: {{link}} \ No newline at end of file diff --git a/dds_web/templates/mail/project_release.html b/dds_web/templates/mail/project_release.html index 107c359f5..24262a413 100644 --- a/dds_web/templates/mail/project_release.html +++ b/dds_web/templates/mail/project_release.html @@ -16,11 +16,10 @@ The DDS is a system for SciLifeLab infrastructures to deliver data to researchers in a fast, secure and simple way.

- {% if unit_name is none %} - You were added to this project by {{sender_name}}. + {% if unit_email %} + You were added to this project on behalf of {{displayed_sender}} ({{unit_email}}). {% else %} - You were added to this project by {{sender_name}} on behalf of {{unit_name}}{% if unit_email is not none %} ({{unit_email}}){% endif %}. + You were added to this project by {{displayed_sender}}. {% endif %}

diff --git a/dds_web/templates/mail/project_release.txt b/dds_web/templates/mail/project_release.txt index 2ba8451d3..121d5ea6f 100644 --- a/dds_web/templates/mail/project_release.txt +++ b/dds_web/templates/mail/project_release.txt @@ -1,10 +1,10 @@ The project {{project_id}} is now available for your access in the SciLifeLab Data Delivery System (DDS). The DDS is a system for SciLifeLab infrastructures to deliver data to researchers in a fast, secure and simple way. -{% if unit_name is none %} -You were added to this project by {{sender_name}}. +{% if unit_email %} +You were added to this project on behalf of {{displayed_sender}} ({{unit_email}}). {% else %} -You were added to this project by {{sender_name}} on behalf of {{unit_name}}{% if unit_email is not none %} ({{unit_email}}){% endif %}. +You were added to this project by {{displayed_sender}}. {% endif %} The DDS CLI command 'dds ls -p {{project_id}}' can be used to list the files in this project. diff --git a/dds_web/utils.py b/dds_web/utils.py index 8d19902bf..f7269d22c 100644 --- a/dds_web/utils.py +++ b/dds_web/utils.py @@ -12,8 +12,10 @@ import urllib.parse import time import smtplib +from dateutil.relativedelta import relativedelta # Installed +import botocore from contextlib import contextmanager import flask from dds_web.errors import ( @@ -587,6 +589,69 @@ def calculate_version_period_usage(version): return bytehours +def format_timestamp( + timestamp_string: str = None, timestamp_object=None, timestamp_format: str = "%Y-%m-%d %H:%M:%S" +): + """Change timestamp format.""" + if not timestamp_string and not timestamp_object: + return + + if timestamp_string and timestamp_format != "%Y-%m-%d %H:%M:%S": + raise ValueError( + "Timestamp strings need to contain year, month, day, hour, minute and seconds." + ) + + if timestamp_object: + timestamp_string = timestamp_object.strftime(timestamp_format) + + return datetime.datetime.strptime(timestamp_string, timestamp_format) + + +def bytehours_in_last_month(version): + """Calculate number of terrabyte hours stored in last month.""" + # Current date and date a month ago + now = format_timestamp(timestamp_object=current_time()) + a_month_ago = now - relativedelta(months=1) + byte_hours: int = 0 + + # 1. File uploaded after start (a month ago) + if version.time_uploaded > a_month_ago: + # A. File not deleted --> now - uploaded + if not version.time_deleted: + byte_hours = calculate_bytehours( + minuend=now, + subtrahend=version.time_uploaded, + size_bytes=version.size_stored, + ) + + # B. File deleted --> deleted - uploaded + else: + byte_hours += calculate_bytehours( + minuend=version.time_deleted, + subtrahend=version.time_uploaded, + size_bytes=version.size_stored, + ) + + # 2. File uploaded prior to start (a month ago) + else: + # A. File not deleted --> now - thirty_days_ago + if not version.time_deleted: + byte_hours += calculate_bytehours( + minuend=now, subtrahend=a_month_ago, size_bytes=version.size_stored + ) + + # B. File deleted --> deleted - thirty_days_ago + else: + if version.time_deleted > a_month_ago: + byte_hours += calculate_bytehours( + minuend=version.time_deleted, + subtrahend=a_month_ago, + size_bytes=version.size_stored, + ) + + return byte_hours + + # maintenance check def block_if_maintenance(user=None): """Block API requests if maintenance is ongoing and projects are busy.""" @@ -611,3 +676,52 @@ def block_if_maintenance(user=None): else: if user.role != "Super Admin": raise MaintenanceOngoingException() + + +def list_lost_files_in_project(project, s3_resource): + """List lost files in project.""" + s3_filenames: set = set() + db_filenames: set = set() + + # Check if bucket exists + try: + s3_resource.meta.client.head_bucket(Bucket=project.bucket) + except botocore.exceptions.ClientError: + missing_expected: bool = not project.is_active + flask.current_app.logger.error( + f"Project '{project.public_id}' bucket is missing. Expected: {missing_expected}" + ) + raise + + # Get items in s3 + s3_filenames = set(entry.key for entry in s3_resource.Bucket(project.bucket).objects.all()) + + # Get items in db + try: + db_filenames = set(entry.name_in_bucket for entry in project.files) + except sqlalchemy.exc.OperationalError: + flask.current_app.logger.critical("Unable to connect to db") + raise + + # Differences + diff_db = db_filenames.difference(s3_filenames) # In db but not in S3 + diff_s3 = s3_filenames.difference(db_filenames) # In S3 but not in db + + # List items + if any([diff_db, diff_s3]): + for file_entry in diff_db: + flask.current_app.logger.info( + "Entry %s (%s, %s) not found in S3 (but found in db)", + file_entry, + project.public_id, + project.responsible_unit, + ) + for file_entry in diff_s3: + flask.current_app.logger.info( + "Entry %s (%s, %s) not found in database (but found in s3)", + file_entry, + project.public_id, + project.responsible_unit, + ) + + return diff_db, diff_s3 diff --git a/dds_web/version.py b/dds_web/version.py index 55e470907..3d67cd6bb 100644 --- a/dds_web/version.py +++ b/dds_web/version.py @@ -1 +1 @@ -__version__ = "2.3.0" +__version__ = "2.4.0" diff --git a/doc/procedures/new_release.md b/doc/procedures/new_release.md index e5b9fd3d0..cdb83dc91 100644 --- a/doc/procedures/new_release.md +++ b/doc/procedures/new_release.md @@ -1,21 +1,22 @@ # How to create a new release -**Is the release planned for the next cluster maintenance?** Please read point 8 first. - -1. Confirm that the development instance works +1. Create a PR from `dev` to `master`: "New release" +2. Confirm that the development instance works and that the newest changes have been deployed 1. _In general_, e.g. that it's up and running 2. _Specific feature has been added or changed:_ Confirm that it also works in the development instance 3. _The change is in the API:_ Confirm that the development instance works together with the CLI -2. Fork a new branch from `dev` -3. Update the version [changelog](../../CHANGELOG.rst) +3. Fork a new branch from `dev` +4. Update the version [changelog](../../CHANGELOG.rst) + + **Tip:** Use the PR to `master` to see all changes since last release. - The new version should be at the top of the page - List the changes that the users will / may notice - Do not add information regarding workflow (e.g. GitHub Actions) etc -4. Update the version in [`version.py`](../../dds_web/version.py) +5. Update the version in [`version.py`](../../dds_web/version.py) - _Minor changes, e.g. bug fix_: Minor version upgrade, e.g. `1.0.1 --> 1.0.2` - _Small changes, e.g. new feature_: Mid version upgrade, e.g. `1.1.0 --> 1.2.0` @@ -23,12 +24,12 @@ > Will break if CLI version not bumped as well -5. Push version change to branch -6. Create a new PR from `` to `dev` +6. Push version change to branch +7. Create a new PR from `` to `dev` Wait for approval and merge by Product Owner or admin. -7. Create a PR from `dev` to `master` +8. Go back to the PR to `master` ("New release", step 1 above) - Are you bumping the major version (e.g. 1.x.x to 2.x.x)? - Yes: Add this info to the PR. @@ -42,7 +43,7 @@ - There should be at least one approval of the PR. - _Everything looks ok and there's at least one approval?_ Merge it. -8. [Draft a new release](https://github.com/ScilifelabDataCentre/dds_web/releases) +9. [Draft a new release](https://github.com/ScilifelabDataCentre/dds_web/releases) 1. `Choose a tag` → `Find or create a new tag` → Fill in the new version, e.g. if the new version is `1.0.0`, you should fill in `v1.0.0`. 2. `Target` should be set to `master` @@ -55,7 +56,7 @@ An image of the web / api will be published to the [GitHub Container Registry](https://codefresh.io/csdp-docs/docs/integrations/container-registries/github-cr/) -9. Perform redeployment +10. Perform redeployment The method for this _depends on the situation_ / size of and reason for the upgrade. diff --git a/docker-compose.yml b/docker-compose.yml index 3b8550421..d4a9c540d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,7 @@ version: "3.9" services: db: container_name: dds_database - image: mariadb:latest + image: mariadb:10.7.8 environment: - MYSQL_ROOT_PASSWORD=${DDS_MYSQL_ROOT_PASS} - MYSQL_USER=${DDS_MYSQL_USER} @@ -56,7 +56,7 @@ services: environment: - DDS_VERSION=local-dev - DDS_APP_CONFIG=/code/dds_web/sensitive/dds_app.cfg - - FLASK_ENV=development + - FLASK_DEBUG=true - FLASK_APP=dds_web - DB_TYPE=${DDS_DB_TYPE} # - RATELIMIT_STORAGE_URI=redis://dds_redis diff --git a/migrations/versions/2cefec51b9bb_add_tbhours_since_start.py b/migrations/versions/2cefec51b9bb_add_tbhours_since_start.py new file mode 100644 index 000000000..1da10b121 --- /dev/null +++ b/migrations/versions/2cefec51b9bb_add_tbhours_since_start.py @@ -0,0 +1,28 @@ +"""add_tbhours_since_start + +Revision ID: 2cefec51b9bb +Revises: c8f25f39b62f +Create Date: 2023-06-02 12:07:01.372912 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +# revision identifiers, used by Alembic. +revision = "2cefec51b9bb" +down_revision = "c8f25f39b62f" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("reporting", sa.Column("tbhours_since_start", sa.Float(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("reporting", "tbhours_since_start") + # ### end Alembic commands ### diff --git a/migrations/versions/c8f25f39b62f_add_tbhours_since_last_month.py b/migrations/versions/c8f25f39b62f_add_tbhours_since_last_month.py new file mode 100644 index 000000000..457852442 --- /dev/null +++ b/migrations/versions/c8f25f39b62f_add_tbhours_since_last_month.py @@ -0,0 +1,28 @@ +"""add_tbhours_since_last_month + +Revision ID: c8f25f39b62f +Revises: d48ecb4db259 +Create Date: 2023-05-31 08:04:09.768951 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +# revision identifiers, used by Alembic. +revision = "c8f25f39b62f" +down_revision = "aec752f1e0a5" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("reporting", sa.Column("tbhours", sa.Float(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("reporting", "tbhours") + # ### end Alembic commands ### diff --git a/requirements.txt b/requirements.txt index fa8bafaf1..0d6fd43f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ dominate==2.6.0 email-validator==1.1.3 factory-boy==3.2.1 Faker==12.0.0 -Flask==2.0.3 +Flask==2.2.5 Flask-APScheduler==1.12.3 Flask-HTTPAuth==4.5.0 Flask-Limiter==2.1.3 @@ -51,8 +51,8 @@ python-dateutil==2.8.2 pytz==2021.3 pytz-deprecation-shim==0.1.0.post0 qrcode==7.3.1 -redis==4.4.4 -requests==2.27.1 +redis==4.5.5 +requests==2.31.0 requests_cache==0.9.4 s3transfer==0.5.1 simplejson==3.17.6 diff --git a/tests/__init__.py b/tests/__init__.py index 06bd5aca7..9e8c72e75 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -123,7 +123,7 @@ def fake_web_login(self, client): def set_session_cookie(client): app = flask.current_app val = app.session_interface.get_signing_serializer(app).dumps(dict(session)) - client.set_cookie("localhost", app.session_cookie_name, val) + client.set_cookie("localhost", app.config["SESSION_COOKIE_NAME"], val) flask_login.login_user(user) set_session_cookie(client) @@ -229,5 +229,6 @@ class DDSEndpoint: MOTD_SEND = BASE_ENDPOINT + "/motd/send" USER_FIND = BASE_ENDPOINT + "/user/find" TOTP_DEACTIVATE = BASE_ENDPOINT + "/user/totp/deactivate" + STATS = BASE_ENDPOINT + "/stats" TIMEOUT = 5 diff --git a/tests/api/test_superadmin_only.py b/tests/api/test_superadmin_only.py index b5d4c1a68..ae60c97f9 100644 --- a/tests/api/test_superadmin_only.py +++ b/tests/api/test_superadmin_only.py @@ -7,16 +7,22 @@ import time import typing import unittest +from datetime import datetime, timedelta +from unittest import mock + # Installed import flask import werkzeug import flask_mail +import freezegun +import click # Own from dds_web import db from dds_web.database import models import tests +from dds_web.commands import collect_stats #################################################################################################### # CONFIG ################################################################################## CONFIG # @@ -821,3 +827,74 @@ def test_anyprojectsbusy_false_list(client: flask.testing.FlaskClient) -> None: assert num == 0 projects_returned: typing.Dict = response.json.get("projects") assert projects_returned is None + + +def test_statistics_no_access(client: flask.testing.FlaskClient) -> None: + """Verify that users that are not Super Admins cannot use this endpoint.""" + # Verify no access for researchers and unit users + for user in ["researcher", "unituser", "unitadmin"]: + token = tests.UserAuth(tests.USER_CREDENTIALS[user]).token(client) + response = client.get(tests.DDSEndpoint.STATS, headers=token) + assert response.status_code == http.HTTPStatus.FORBIDDEN + + +def test_statistics_return_none(client: flask.testing.FlaskClient) -> None: + """There are no rows in the Reporting table.""" + # Check that there are no rows + assert not models.Reporting.query.count() + + # Get all rows from API + token = tests.UserAuth(tests.USER_CREDENTIALS["superadmin"]).token(client) + response = client.get(tests.DDSEndpoint.STATS, headers=token) + assert response.status_code == http.HTTPStatus.OK + + # Check response + returned: typing.Dict = response.json.get("stats") + assert returned == [] + + +def test_statistics_return_rows(client: flask.testing.FlaskClient, cli_runner) -> None: + """Verify list returned when there are rows in reporting table.""" + + def add_row_to_reporting_table(time): + """Run command to add a new row to the reporting table.""" + with freezegun.freeze_time(time): + # Run scheduled job now + with mock.patch.object(flask_mail.Mail, "send") as mock_mail_send: + result: click.testing.Result = cli_runner.invoke(collect_stats) + assert not result.exception, "Raised an unwanted exception." + assert mock_mail_send.call_count == 0 + + # Generate row in reporting table + time_1 = datetime(year=2022, month=12, day=10, hour=10, minute=54, second=10) + add_row_to_reporting_table(time=time_1) + + # Verify that there's a row added + assert models.Reporting.query.count() == 1 + + # Get all rows from API + token = tests.UserAuth(tests.USER_CREDENTIALS["superadmin"]).token(client) + response = client.get(tests.DDSEndpoint.STATS, headers=token) + assert response.status_code == http.HTTPStatus.OK + + # Check response + returned: typing.Dict = response.json.get("stats") + assert len(returned) == 1 + reporting_row = models.Reporting.query.first() + assert returned[0] == { + "Date": str(reporting_row.date), + "Units": reporting_row.unit_count, + "Researchers": reporting_row.researcher_count, + "Project Owners": reporting_row.project_owner_unique_count, + "Unit Personnel": reporting_row.unit_personnel_count, + "Unit Admins": reporting_row.unit_admin_count, + "Super Admins": reporting_row.superadmin_count, + "Total Users": reporting_row.total_user_count, + "Total Projects": reporting_row.total_project_count, + "Active Projects": reporting_row.active_project_count, + "Inactive Projects": reporting_row.inactive_project_count, + "Data Now (TB)": reporting_row.tb_stored_now, + "Data Uploaded (TB)": reporting_row.tb_uploaded_since_start, + "TBHours Last Month": reporting_row.tbhours, + "TBHours Total": reporting_row.tbhours_since_start, + } diff --git a/tests/docker-compose-test.yml b/tests/docker-compose-test.yml index 3609fa341..6a1a6c4c9 100644 --- a/tests/docker-compose-test.yml +++ b/tests/docker-compose-test.yml @@ -11,8 +11,8 @@ services: context: ./ target: test command: > - sh -c "COVERAGE_FILE=./coverage/.coverage pytest -s -color=yes " \ - "$DDS_PYTEST_ARGS --cov=./dds_web --cov-report=xml:coverage/report.xml" + sh -c "COVERAGE_FILE=./coverage/.coverage pytest -s -color=yes \ + $DDS_PYTEST_ARGS --cov=./dds_web --cov-report=xml:coverage/report.xml" restart: "no" volumes: - type: bind diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py index e39783927..665fec2fc 100644 --- a/tests/test_basic_api.py +++ b/tests/test_basic_api.py @@ -273,7 +273,6 @@ def test_request_totp_activation(client): user_auth = tests.UserAuth(tests.USER_CREDENTIALS["researcher"]) token = tests.UserAuth(tests.USER_CREDENTIALS["unituser"]).token(client) - print(token) response = client.post( tests.DDSEndpoint.TOTP_ACTIVATION, headers=token, diff --git a/tests/test_commands.py b/tests/test_commands.py index ecd8a5345..ebac7aa40 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -31,6 +31,7 @@ delete_invites, quarterly_usage, collect_stats, + lost_files_s3_db, ) from dds_web.database import models from dds_web import db @@ -54,16 +55,19 @@ def mock_unit_size(): # fill_db_wrapper -def test_fill_db_wrapper_production(client, runner) -> None: +def test_fill_db_wrapper_production(client, runner, capfd) -> None: """Run init-db with the production argument.""" result: click.testing.Result = runner.invoke(fill_db_wrapper, ["production"]) - assert result.exit_code == 1 + _, err = capfd.readouterr() + assert "already exists, not creating user" in err -def test_fill_db_wrapper_devsmall(client, runner) -> None: +def test_fill_db_wrapper_devsmall(client, runner, capfd) -> None: """Run init-db with the dev-small argument.""" result: click.testing.Result = runner.invoke(fill_db_wrapper, ["dev-small"]) - assert result.exit_code == 1 + _, err = capfd.readouterr() + assert "Initializing development db" in err + assert "DB filled" not in err # DB already filled, duplicates. # def test_fill_db_wrapper_devbig(client, runner) -> None: @@ -212,7 +216,7 @@ def test_create_new_unit_success(client, runner) -> None: # update_uploaded_file_with_log -def test_update_uploaded_file_with_log_nonexisting_project(client, runner) -> None: +def test_update_uploaded_file_with_log_nonexisting_project(client, runner, capfd) -> None: """Add file info to non existing project.""" # Create command options command_options: typing.List = [ @@ -226,7 +230,8 @@ def test_update_uploaded_file_with_log_nonexisting_project(client, runner) -> No assert db.session.query(models.Project).all() with patch("dds_web.database.models.Project.query.filter_by", mock_no_project): result: click.testing.Result = runner.invoke(update_uploaded_file_with_log, command_options) - assert result.exit_code == 1 + _, err = capfd.readouterr() + assert "The project 'projectdoesntexist' doesn't exist." in err def test_update_uploaded_file_with_log_nonexisting_file(client, runner, fs: FakeFilesystem) -> None: @@ -245,10 +250,284 @@ def test_update_uploaded_file_with_log_nonexisting_file(client, runner, fs: Fake # Run command result: click.testing.Result = runner.invoke(update_uploaded_file_with_log, command_options) + # TODO: Add check for logging or change command to return or raise error. capfd does not work together with fs + # _, err = capfd.readouterr() + # assert "The project 'projectdoesntexist' doesn't exist." in result.stderr + + +# lost_files_s3_db + + +def test_lost_files_s3_db_no_command(client, cli_runner, capfd): + """Test running the flask lost-files command without any subcommand.""" + _: click.testing.Result = cli_runner.invoke(lost_files_s3_db) + _, err = capfd.readouterr() + assert not err + + +# lost_files_s3_db -- list_lost_files + + +def test_list_lost_files_no_such_project(client, cli_runner, capfd): + """flask lost-files ls: project specified, project doesnt exist.""" + # Project ID -- doesn't exist + project_id: str = "nonexistentproject" + assert not models.Project.query.filter_by(public_id=project_id).one_or_none() + + # Run command with non existent project + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["ls", "--project-id", project_id] + ) + assert result.exit_code == 1 # sys.exit(1) + + # Verify output + _, err = capfd.readouterr() + assert f"Searching for lost files in project '{project_id}'." in err + assert f"No such project: '{project_id}'" in err + + +def test_list_lost_files_no_lost_files_in_project(client, cli_runner, boto3_session, capfd): + """flask lost-files ls: project specified, no lost files.""" + # Get project + project = models.Project.query.first() + assert project + + # Mock project.files -- no files + with patch("dds_web.database.models.Project.files", new_callable=PropertyMock) as mock_files: + mock_files.return_value = [] + + # Run command + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["ls", "--project-id", project.public_id] + ) + assert result.exit_code == 0 + + # Verify output -- no lost files + _, err = capfd.readouterr() + assert f"Searching for lost files in project '{project.public_id}'." in err + assert f"No lost files in project '{project.public_id}'" in err + + +def test_list_lost_files_missing_in_s3_in_project(client, cli_runner, boto3_session, capfd): + """flask lost-files ls: project specified, lost files in s3.""" + # Get project + project = models.Project.query.first() + assert project + + # Run command + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["ls", "--project-id", project.public_id] + ) + assert result.exit_code == 0 + + # Verify output + _, err = capfd.readouterr() + # All files should be in db but not in s3 + for f in project.files: + assert ( + f"Entry {f.name_in_bucket} ({project.public_id}, {project.responsible_unit}) not found in S3 (but found in db)" + in err + ) + assert ( + f"Entry {f.name_in_bucket} ({project.public_id}, {project.responsible_unit}) not found in database (but found in s3)" + not in err + ) + + assert f"Lost files in project: {project.public_id}\t\tIn DB but not S3: {len(len(project.files))}\tIn S3 but not DB: 0\n" + + +def test_list_lost_files_no_lost_files_total(client, cli_runner, boto3_session, capfd): + """flask lost-files ls: no project specified, no lost files.""" + # Mock project.files -- no files + with patch("dds_web.database.models.Project.files", new_callable=PropertyMock) as mock_files: + mock_files.return_value = [] + + # Run command + result: click.testing.Result = cli_runner.invoke(lost_files_s3_db, ["ls"]) + assert result.exit_code == 0 + + # Verify output -- no lost files + _, err = capfd.readouterr() + assert "Searching for lost files in project" not in err + assert "No project specified, searching for lost files in all units." in err + for u in models.Unit.query.all(): + assert f"Listing lost files in unit: {u.public_id}" in err + assert f"No lost files for unit '{u.public_id}'" in err + + +def test_list_lost_files_missing_in_s3_in_project(client, cli_runner, boto3_session, capfd): + """flask lost-files ls: project specified, lost files in s3.""" + # Run command + result: click.testing.Result = cli_runner.invoke(lost_files_s3_db, ["ls"]) + assert result.exit_code == 0 + + # Verify output + _, err = capfd.readouterr() + # All files should be in db but not in s3 + for u in models.Unit.query.all(): + num_files: int = 0 + for p in u.projects: + num_files += len(p.files) + for f in p.files: + assert ( + f"Entry {f.name_in_bucket} ({p.public_id}, {u}) not found in S3 (but found in db)" + in err + ) + assert ( + f"Entry {f.name_in_bucket} ({p.public_id}, {u}) not found in database (but found in s3)" + not in err + ) + assert f"Lost files for unit: {u.public_id}\t\tIn DB but not S3: {num_files}\tIn S3 but not DB: 0\tProject errors: 0\n" + + +# lost_files_s3_db -- add_missing_bucket + + +def test_add_missing_bucket_no_project(client, cli_runner): + """flask lost-files add-missing-bucket: no project specified (required).""" + # Run command + result: click.testing.Result = cli_runner.invoke(lost_files_s3_db, ["add-missing-bucket"]) + + # Get output from result and verify that help message printed + assert result.exit_code == 2 + assert "Missing option '--project-id' / '-p'." in result.stdout + + +def test_add_missing_bucket_project_nonexistent(client, cli_runner, capfd): + """flask lost-files add-missing-bucket: no such project --> print out error.""" + # Project -- doesn't exist + project_id: str = "nonexistentproject" + assert not models.Project.query.filter_by(public_id=project_id).one_or_none() + + # Run command + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["add-missing-bucket", "--project-id", project_id] + ) + assert result.exit_code == 1 + + # Verify output + _, err = capfd.readouterr() + assert f"No such project: '{project_id}'" in err + + +def test_add_missing_bucket_project_inactive(client, cli_runner, capfd): + """flask lost-files add-missing-bucket: project specified, but inactive --> error message.""" + # Get project + project: models.Project = models.Project.query.first() + assert project + + # Set project as inactive + project.is_active = False + db.session.commit() + assert not project.is_active + + # Run command + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["add-missing-bucket", "--project-id", project.public_id] + ) + assert result.exit_code == 1 + + # Verify output + _, err = capfd.readouterr() + assert f"Project '{project.public_id}' is not an active project." in err + + +def test_add_missing_bucket_not_missing(client, cli_runner, boto3_session, capfd): + """flask lost-files add-missing-bucket: project specified, not missing --> ok.""" + from tests.test_utils import mock_nosuchbucket + + # Get project + project: models.Project = models.Project.query.first() + assert project + + # Run command + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["add-missing-bucket", "--project-id", project.public_id] + ) + assert result.exit_code == 0 + + # Verify output + _, err = capfd.readouterr() + assert ( + f"Bucket for project '{project.public_id}' found; Bucket not missing. Will not create bucket." + in err + ) + + +# lost_files_s3_db -- delete_lost_files + + +def test_delete_lost_files_no_project(client, cli_runner): + """flask lost-files delete: no project specified (required).""" + # Run command + result: click.testing.Result = cli_runner.invoke(lost_files_s3_db, ["delete"]) + + # Get output from result and verify that help message printed + assert result.exit_code == 2 + assert "Missing option '--project-id' / '-p'." in result.stdout + + +def test_delete_lost_files_project_nonexistent(client, cli_runner, capfd): + """flask lost-files delete: no such project --> print out error.""" + # Project -- doesn't exist + project_id: str = "nonexistentproject" + assert not models.Project.query.filter_by(public_id=project_id).one_or_none() + + # Run command + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["delete", "--project-id", project_id] + ) assert result.exit_code == 1 + # Verify output + _, err = capfd.readouterr() + assert f"No such project: '{project_id}'" in err + + +def test_delete_lost_files_deleted(client, cli_runner, boto3_session, capfd): + """flask lost-files delete: project specified and exists --> deleted files ok.""" + # Get project + project: models.Project = models.Project.query.first() + assert project + num_project_files = len(project.files) + assert num_project_files > 0 + + # Run command + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["delete", "--project-id", project.public_id] + ) + assert result.exit_code == 0 + + # Verify output - files deleted + _, err = capfd.readouterr() + assert f"Files deleted from S3: 0" in err + assert f"Files deleted from DB: {num_project_files}" in err + + +def test_delete_lost_files_sqlalchemyerror(client, cli_runner, boto3_session, capfd): + """flask lost-files delete: sqlalchemyerror during deletion.""" + # Imports + from tests.api.test_project import mock_sqlalchemyerror + + # Get project + project: models.Project = models.Project.query.first() + assert project + num_project_files = len(project.files) + assert num_project_files > 0 + + # Mock commit --> no delete + with patch("dds_web.db.session.commit", mock_sqlalchemyerror): + # Run command + result: click.testing.Result = cli_runner.invoke( + lost_files_s3_db, ["delete", "--project-id", project.public_id] + ) + assert result.exit_code == 1 -# monitor_usage + # Verify output - files deleted + _, err = capfd.readouterr() + assert "Unable to delete the database entries" in err + assert f"Files deleted from S3: 0" not in err + assert f"Files deleted from DB: 0" not in err # usage = 0 --> check log @@ -443,6 +722,7 @@ def test_collect_stats(client, cli_runner, fs: FakeFilesystem): ProjectUsers, Version, ) + from dds_web.utils import bytehours_in_last_month, page_query, calculate_bytehours import dds_web.utils def verify_reporting_row(row, time_date): @@ -485,6 +765,23 @@ def verify_reporting_row(row, time_date): / 1000000000000, 2, ) + assert row.tbhours == round( + sum(bytehours_in_last_month(version=version) for version in page_query(Version.query)) + / 1e12, + 2, + ) + assert row.tbhours_since_start == round( + sum( + calculate_bytehours( + minuend=version.time_deleted or time_date, + subtrahend=version.time_uploaded, + size_bytes=version.size_stored, + ) + for version in page_query(Version.query) + ) + / 1e12, + 2, + ) # Verify that there are no reporting rows assert Reporting.query.count() == 0 diff --git a/tests/test_login_web.py b/tests/test_login_web.py index 5da4ab349..de8dfd81d 100644 --- a/tests/test_login_web.py +++ b/tests/test_login_web.py @@ -31,7 +31,7 @@ def successful_web_login(client: flask.testing.FlaskClient, user_auth: UserAuth) headers=DEFAULT_HEADER, ) assert response.status_code == HTTPStatus.OK - assert flask.request.path == DDSEndpoint.CONFIRM_2FA + assert response.request.path == DDSEndpoint.CONFIRM_2FA form_token: str = flask.g.csrf_token @@ -48,8 +48,8 @@ def successful_web_login(client: flask.testing.FlaskClient, user_auth: UserAuth) headers=DEFAULT_HEADER, ) assert response.status_code == HTTPStatus.OK - assert flask.request.path == DDSEndpoint.INDEX - assert flask.request.path == flask.url_for("pages.home") + assert response.request.path == DDSEndpoint.INDEX + assert response.request.path == flask.url_for("pages.home") return form_token @@ -87,7 +87,7 @@ def test_cancel_2fa(client: flask.testing.FlaskClient): headers=DEFAULT_HEADER, ) assert response.status_code == HTTPStatus.OK - assert flask.request.path == DDSEndpoint.CONFIRM_2FA + assert response.request.path == DDSEndpoint.CONFIRM_2FA second_factor_token: str = flask.session.get("2fa_initiated_token") assert second_factor_token is not None @@ -99,7 +99,7 @@ def test_cancel_2fa(client: flask.testing.FlaskClient): ) assert response.status_code == HTTPStatus.OK - assert flask.request.path == DDSEndpoint.LOGIN + assert response.request.path == DDSEndpoint.LOGIN second_factor_token: str = flask.session.get("2fa_initiated_token") assert second_factor_token is None @@ -123,7 +123,7 @@ def test_password_reset(client: flask.testing.FlaskClient): follow_redirects=True, ) assert response.status_code == HTTPStatus.OK - assert flask.request.path == DDSEndpoint.USER_INFO + assert response.request.path == DDSEndpoint.USER_INFO form_token: str = flask.g.csrf_token @@ -131,7 +131,7 @@ def test_password_reset(client: flask.testing.FlaskClient): DDSEndpoint.LOGOUT, follow_redirects=True, headers=headers ) assert response.status_code == HTTPStatus.OK - assert flask.request.path == DDSEndpoint.INDEX + assert response.request.path == DDSEndpoint.INDEX response: werkzeug.test.WrapperTestResponse = client.post( DDSEndpoint.REQUEST_RESET_PASSWORD, @@ -144,7 +144,7 @@ def test_password_reset(client: flask.testing.FlaskClient): ) assert response.status_code == HTTPStatus.OK assert response.content_type == "text/html; charset=utf-8" - assert flask.request.path == DDSEndpoint.LOGIN + assert response.request.path == DDSEndpoint.LOGIN response: werkzeug.test.WrapperTestResponse = client.post( f"{DDSEndpoint.REQUEST_RESET_PASSWORD}/{token}", @@ -158,7 +158,7 @@ def test_password_reset(client: flask.testing.FlaskClient): ) assert response.status_code == HTTPStatus.OK assert response.content_type == "text/html; charset=utf-8" - assert flask.request.path == DDSEndpoint.PASSWORD_RESET_COMPLETED + assert response.request.path == DDSEndpoint.PASSWORD_RESET_COMPLETED with client.session_transaction() as session: session["reset_token"] = token @@ -170,7 +170,7 @@ def test_password_reset(client: flask.testing.FlaskClient): ) assert response.status_code == HTTPStatus.OK assert response.content_type == "text/html; charset=utf-8" - assert flask.request.path == DDSEndpoint.PASSWORD_RESET_COMPLETED + assert response.request.path == DDSEndpoint.PASSWORD_RESET_COMPLETED response: werkzeug.test.WrapperTestResponse = client.get( DDSEndpoint.USER_INFO, @@ -178,7 +178,7 @@ def test_password_reset(client: flask.testing.FlaskClient): ) assert response.status_code == HTTPStatus.UNAUTHORIZED assert response.content_type == "application/json" - assert flask.request.path == DDSEndpoint.USER_INFO + assert response.request.path == DDSEndpoint.USER_INFO assert ( response.json.get("message") == "Password reset performed after last authentication. Start a new authenticated session to proceed." diff --git a/tests/test_project_info.py b/tests/test_project_info.py index cac738f9a..1dc7fc3d6 100644 --- a/tests/test_project_info.py +++ b/tests/test_project_info.py @@ -47,7 +47,7 @@ def test_list_proj_info_without_project(client): def test_list_proj_info_access_granted(client): - """Researcher should be able to list project information""" + """Researcher should be able to list project information, "Created by" should be the Unit name""" token = tests.UserAuth(tests.USER_CREDENTIALS["researchuser"]).token(client) response = client.get(tests.DDSEndpoint.PROJECT_INFO, headers=token, query_string=proj_query) @@ -56,20 +56,24 @@ def test_list_proj_info_access_granted(client): project_info = response_json.get("project_info") assert "public_project_id" == project_info.get("Project ID") + # check that Researcher gets Unit name as "Created by" + assert "Display Name" == project_info.get("Created by") # check that endpoint returns dictionary and not a list assert isinstance(project_info, dict) def test_list_proj_info_unit_user(client): - """Unit user should be able to list project information""" + """Test returned project information for unituser""" - token = tests.UserAuth(tests.USER_CREDENTIALS["unitadmin"]).token(client) + token = tests.UserAuth(tests.USER_CREDENTIALS["unituser"]).token(client) response = client.get(tests.DDSEndpoint.PROJECT_INFO, headers=token, query_string=proj_query) assert response.status_code == http.HTTPStatus.OK response_json = response.json project_info = response_json.get("project_info") assert "public_project_id" == project_info.get("Project ID") + # check that Unit admin gets personal name as "Created by" + assert "Unit User" == project_info.get("Created by") assert ( "This is a test project. You will be able to upload to but NOT download" in project_info.get("Description") @@ -77,14 +81,30 @@ def test_list_proj_info_unit_user(client): assert "Size" in project_info.keys() and project_info["Size"] is not None -def test_list_proj_info_returned_items(client): - """Returned project information should contain certain items""" +def test_list_proj_info_returned_items_unitadmin(client): + """Test returned project information for unitadmin""" token = tests.UserAuth(tests.USER_CREDENTIALS["unitadmin"]).token(client) response = client.get(tests.DDSEndpoint.PROJECT_INFO, headers=token, query_string=proj_query) assert response.status_code == http.HTTPStatus.OK response_json = response.json project_info = response_json.get("project_info") + # check that Unit admin gets personal name as "Created by" + assert "Unit User" == project_info.get("Created by") + + assert all(item in project_info for item in proj_info_items) + + +def test_list_proj_info_returned_items_superadmin(client): + """Test returned project information for superadmin""" + + token = tests.UserAuth(tests.USER_CREDENTIALS["superadmin"]).token(client) + response = client.get(tests.DDSEndpoint.PROJECT_INFO, headers=token, query_string=proj_query) + assert response.status_code == http.HTTPStatus.OK + response_json = response.json + project_info = response_json.get("project_info") + # check that Super admin gets personal name as "Created by" + assert "Unit User" == project_info.get("Created by") assert all(item in project_info for item in proj_info_items) diff --git a/tests/test_project_listing.py b/tests/test_project_listing.py index 1c17fcfaa..b04faa4d0 100644 --- a/tests/test_project_listing.py +++ b/tests/test_project_listing.py @@ -30,7 +30,7 @@ def test_list_proj_no_token(client): def test_list_proj_access_granted_ls(client): - """Researcher should be able to list""" + """Researcher should be able to list, "Created by" should be the Unit name""" token = tests.UserAuth(tests.USER_CREDENTIALS["researchuser"]).token(client) response = client.get(tests.DDSEndpoint.LIST_PROJ, headers=token) @@ -38,10 +38,12 @@ def test_list_proj_access_granted_ls(client): response_json = response.json list_of_projects = response_json.get("project_info") assert "public_project_id" == list_of_projects[0].get("Project ID") + # check that Researcher gets Unit name as "Created by" + assert "Display Name" == list_of_projects[0].get("Created by") -def test_list_proj_unit_user(client): - """Unit user should be able to list projects""" +def test_list_proj_unit_admin(client): + """Unit admin should be able to list projects, "Created by" should be the creators name""" token = tests.UserAuth(tests.USER_CREDENTIALS["unitadmin"]).token(client) response = client.get( @@ -56,6 +58,48 @@ def test_list_proj_unit_user(client): assert "public_project_id" == public_project.get("Project ID") assert "Cost" in public_project.keys() and public_project["Cost"] is not None assert "Usage" in public_project.keys() and public_project["Usage"] is not None + # check that Unit admin gets personal name as "Created by" + assert "Unit User" == public_project.get("Created by") + + +def test_list_proj_unit_user(client): + """Unit user should be able to list projects, "Created by" should be the creators name""" + + token = tests.UserAuth(tests.USER_CREDENTIALS["unituser"]).token(client) + response = client.get( + tests.DDSEndpoint.LIST_PROJ, + headers=token, + json={"usage": True}, + content_type="application/json", + ) + + assert response.status_code == http.HTTPStatus.OK + public_project = response.json.get("project_info")[0] + assert "public_project_id" == public_project.get("Project ID") + assert "Cost" in public_project.keys() and public_project["Cost"] is not None + assert "Usage" in public_project.keys() and public_project["Usage"] is not None + # check that Unit user gets personal name as "Created by" + assert "Unit User" == public_project.get("Created by") + + +def test_list_proj_superadmin(client): + """Super admin should be able to list projects, "Created by" should be the creators name""" + + token = tests.UserAuth(tests.USER_CREDENTIALS["superadmin"]).token(client) + response = client.get( + tests.DDSEndpoint.LIST_PROJ, + headers=token, + json={"usage": True}, + content_type="application/json", + ) + + assert response.status_code == http.HTTPStatus.OK + public_project = response.json.get("project_info")[0] + assert "public_project_id" == public_project.get("Project ID") + assert "Cost" in public_project.keys() and public_project["Cost"] is not None + assert "Usage" in public_project.keys() and public_project["Usage"] is not None + # check that Super admin gets personal name as "Created by" + assert "Unit User" == public_project.get("Created by") def test_list_only_active_projects_unit_user(client): diff --git a/tests/test_user_change_password.py b/tests/test_user_change_password.py index 591f34e5b..c1b41b056 100644 --- a/tests/test_user_change_password.py +++ b/tests/test_user_change_password.py @@ -17,7 +17,7 @@ def test_get_user_change_password_without_login(client): # Because it redirects to login assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.LOGIN + assert response.request.path == tests.DDSEndpoint.LOGIN def test_unsuccessful_user_change_password_with_login_nonlatin1(client): @@ -60,7 +60,7 @@ def test_unsuccessful_user_change_password_with_login_nonlatin1(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.CHANGE_PASSWORD + assert response.request.path == tests.DDSEndpoint.CHANGE_PASSWORD # Password should not have changed, neither should other info assert user.verify_password("password") @@ -115,7 +115,7 @@ def test_successful_user_change_password_with_login(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.LOGIN + assert response.request.path == tests.DDSEndpoint.LOGIN assert not user.verify_password("password") assert user.verify_password("123$%^qweRTY") diff --git a/tests/test_user_confirm_invites_and_register.py b/tests/test_user_confirm_invites_and_register.py index 8f9868c39..0efa8ab71 100644 --- a/tests/test_user_confirm_invites_and_register.py +++ b/tests/test_user_confirm_invites_and_register.py @@ -38,7 +38,7 @@ def test_confirm_invite_invalid_token(client): assert response.status_code == http.HTTPStatus.OK # index redirects to login - assert flask.request.path == flask.url_for("pages.home") + assert response.request.path == flask.url_for("pages.home") def test_confirm_invite_expired_token(client): @@ -59,7 +59,7 @@ def test_confirm_invite_expired_token(client): ) assert response.status_code == http.HTTPStatus.OK # index redirects to login - assert flask.request.path == flask.url_for("pages.home") + assert response.request.path == flask.url_for("pages.home") def test_confirm_invite_valid_token(client): @@ -135,7 +135,7 @@ def test_register_no_token_in_session(registry_form_data, client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX # Invite should be kept and user should not be created invite = models.Invite.query.filter_by( @@ -160,7 +160,7 @@ def test_register_weak_password(registry_form_data, client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.USER_NEW + assert response.request.path == tests.DDSEndpoint.USER_NEW # Invite should be kept and user should not be created invite = models.Invite.query.filter_by( @@ -190,7 +190,7 @@ def test_register_nonlatin1_username(registry_form_data, client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.USER_NEW + assert response.request.path == tests.DDSEndpoint.USER_NEW # Invite should be kept and user should not be created invite = models.Invite.query.filter_by( @@ -220,7 +220,7 @@ def test_register_nonlatin1_password(registry_form_data, client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.USER_NEW + assert response.request.path == tests.DDSEndpoint.USER_NEW # Invite should be kept and user should not be created invite = models.Invite.query.filter_by( diff --git a/tests/test_user_reset_password.py b/tests/test_user_reset_password.py index 748f98f12..b5714dadb 100644 --- a/tests/test_user_reset_password.py +++ b/tests/test_user_reset_password.py @@ -115,7 +115,7 @@ def test_reset_password_invalid_token_get(client): # Redirection status code assert response.status_code == http.HTTPStatus.OK # Incorrect token should redirect and not lead to form - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX def get_valid_reset_token(username, expires_in=3600): @@ -150,7 +150,7 @@ def test_reset_password_invalid_token_post(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token + assert response.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token form_token = flask.g.csrf_token form_data = { @@ -170,7 +170,7 @@ def test_reset_password_invalid_token_post(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX # Just make sure no project user keys has been removed nr_proj_user_keys_after = models.ProjectUserKeys.query.count() @@ -192,7 +192,7 @@ def test_reset_password_expired_token_get(client): assert response.status_code == http.HTTPStatus.OK # Incorrect token should redirect and not lead to form - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX def test_reset_password_expired_token_post_no_password_reset_row(client): @@ -209,7 +209,7 @@ def test_reset_password_expired_token_post_no_password_reset_row(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX def test_reset_password_expired_token_post(client): @@ -233,7 +233,7 @@ def test_reset_password_expired_token_post(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token + assert response.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token form_token = flask.g.csrf_token form_data = { @@ -252,7 +252,7 @@ def test_reset_password_expired_token_post(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX # Just make sure no project user keys has been removed nr_proj_user_keys_after = models.ProjectUserKeys.query.count() @@ -284,7 +284,7 @@ def test_reset_password_researchuser_no_password_reset_row(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX def test_reset_password_researchuser(client): @@ -314,7 +314,7 @@ def test_reset_password_researchuser(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token + assert response.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token form_token = flask.g.csrf_token form_data = { @@ -331,7 +331,7 @@ def test_reset_password_researchuser(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.PASSWORD_RESET_COMPLETED + assert response.request.path == tests.DDSEndpoint.PASSWORD_RESET_COMPLETED user = models.User.query.filter_by(username="researchuser").first() @@ -373,7 +373,7 @@ def test_reset_password_project_owner_no_password_reset_row(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX def test_reset_password_project_owner(client): @@ -403,7 +403,7 @@ def test_reset_password_project_owner(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token + assert response.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token form_token = flask.g.csrf_token form_data = { @@ -420,7 +420,7 @@ def test_reset_password_project_owner(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.PASSWORD_RESET_COMPLETED + assert response.request.path == tests.DDSEndpoint.PASSWORD_RESET_COMPLETED user = models.User.query.filter_by(username="projectowner").first() @@ -462,7 +462,7 @@ def test_reset_password_unituser_no_password_reset_row(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.INDEX + assert response.request.path == tests.DDSEndpoint.INDEX def test_reset_password_unituser(client): @@ -492,7 +492,7 @@ def test_reset_password_unituser(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token + assert response.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token form_token = flask.g.csrf_token form_data = { @@ -509,7 +509,7 @@ def test_reset_password_unituser(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.PASSWORD_RESET_COMPLETED + assert response.request.path == tests.DDSEndpoint.PASSWORD_RESET_COMPLETED user = models.User.query.filter_by(username="unituser").first() @@ -558,7 +558,7 @@ def test_reset_password_unitadmin(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token + assert response.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token form_token = flask.g.csrf_token form_data = { @@ -575,7 +575,7 @@ def test_reset_password_unitadmin(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.PASSWORD_RESET_COMPLETED + assert response.request.path == tests.DDSEndpoint.PASSWORD_RESET_COMPLETED user = models.User.query.filter_by(username="unitadmin").first() @@ -626,7 +626,7 @@ def test_reset_password_unitadmin_nonlatin1(client): ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token + assert response.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token # Set new password info -- contains invalid char € form_token = flask.g.csrf_token @@ -645,7 +645,7 @@ def test_reset_password_unitadmin_nonlatin1(client): headers=tests.DEFAULT_HEADER, ) assert response.status_code == http.HTTPStatus.OK - assert flask.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token + assert response.request.path == tests.DDSEndpoint.RESET_PASSWORD + valid_reset_token # Get user user = models.User.query.filter_by(username="unitadmin").first() diff --git a/tests/test_utils.py b/tests/test_utils.py index e047ef8bf..a999285ea 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,6 +2,8 @@ from dds_web import utils import pytest from unittest.mock import patch +from unittest.mock import PropertyMock + from dds_web import db from dds_web.database import models from dds_web.errors import ( @@ -19,11 +21,37 @@ from flask.testing import FlaskClient import requests_mock import werkzeug +from dateutil.relativedelta import relativedelta +import boto3 +import botocore +import sqlalchemy # Variables url: str = "http://localhost" +# Mocking + + +def mock_nosuchbucket(*_, **__): + raise botocore.exceptions.ClientError( + error_response={"Error": {"Code": "NoSuchBucket"}}, operation_name="Test" + ) + + +def mock_items_in_bucket(): + class Object(object): + pass + + list_of_items = [] + for i in range(20): + obj = Object() + obj.key = f"testing{i}" + list_of_items.append(obj) + + return list_of_items + + # collect_project @@ -992,3 +1020,416 @@ def test_calculate_version_period_usage_new_version(client: flask.testing.FlaskC assert existing_version.size_stored == 10000 assert bytehours < 10000.0 assert existing_version.time_invoiced + + +# format_timestamp + + +def test_format_timestamp_no_timestamp(client: flask.testing.FlaskClient): + """No timestamp can be formatted if no timestamp is entered.""" + from dds_web.utils import format_timestamp + + timestamp = format_timestamp() + assert timestamp is None + + +def test_format_timestamp_timestamp_object(client: flask.testing.FlaskClient): + """Verify working timestamp object formatting.""" + from dds_web.utils import format_timestamp, current_time + + # 1. No passed in format + # Verify that timestamp has a microseconds part + now = current_time() + assert now.microsecond != 0 + + # Verify that timestamp does not have a microseconds part after formatting + formatted = format_timestamp(timestamp_object=now) + assert formatted.microsecond == 0 + + # Verify that the two timestamps are not equal + assert formatted != now + + # Verify that the timestamps have equal parts + assert formatted.year == now.year + assert formatted.month == now.month + assert formatted.day == now.day + assert formatted.hour == now.hour + assert formatted.minute == now.minute + assert formatted.second == now.second + + # 2. Passed in format + # Verify that timestamp does not have minute, second or microsecond parts after formatting + formatted_2 = format_timestamp(timestamp_object=now, timestamp_format="%Y-%m-%d %H") + assert formatted_2.minute == 0 + assert formatted_2.second == 0 + assert formatted_2.microsecond == 0 + + # Verify that the two timestamps are now equal + # Verify that the two timestamps are not equal + assert formatted_2 != now + + # Verify that the timestamps have equal parts + assert formatted_2.year == now.year + assert formatted_2.month == now.month + assert formatted_2.day == now.day + assert formatted_2.hour == now.hour + + +def test_format_timestamp_timestamp_string(client: flask.testing.FlaskClient): + """Verify working timestamp string formatting.""" + from dds_web.utils import format_timestamp, current_time + + # 1. No passed in format + now = current_time() + now_as_string = now.strftime("%Y-%m-%d %H:%M:%S") + + # Verify that timestamp has a microseconds part + assert now.microsecond != 0 + + # # Verify that timestamp does not have a microseconds part after formatting + formatted = format_timestamp(timestamp_string=now_as_string) + assert formatted.microsecond == 0 + + # Verify that the two timestamps are not equal + assert formatted != now + + # Verify that the timestamps have equal parts + assert formatted.year == now.year + assert formatted.month == now.month + assert formatted.day == now.day + assert formatted.hour == now.hour + assert formatted.minute == now.minute + assert formatted.second == now.second + + # 2. Passed in format + # Verify that timestamp does not have minute, second or microsecond parts after formatting + with pytest.raises(ValueError) as err: + format_timestamp(timestamp_string=now_as_string, timestamp_format="%H:%M:%S") + assert ( + str(err.value) + == "Timestamp strings need to contain year, month, day, hour, minute and seconds." + ) + + +# bytehours_in_last_month + + +def run_bytehours_test(client: flask.testing.FlaskClient, size_to_test: int): + """Run checks to see that bytehours calc works.""" + # Imports + from dds_web.utils import bytehours_in_last_month, current_time, format_timestamp + + # 1. 1 byte, 1 hour, since a month, not deleted --> 1 bytehour + now = format_timestamp(timestamp_object=current_time()) + time_uploaded = now - datetime.timedelta(hours=1) + expected_bytehour = size_to_test + + # 1a. Get version and change size stored + version_to_test = models.Version.query.filter_by(time_deleted=None).first() + version_to_test.size_stored = size_to_test + version_to_test.time_uploaded = time_uploaded + version_id = version_to_test.id + db.session.commit() + + # 1b. Get same version + version_to_test = models.Version.query.filter_by(id=version_id).first() + assert version_to_test + assert version_to_test.size_stored == size_to_test + assert not version_to_test.time_deleted + + # 1c. Test bytehours + bytehours = bytehours_in_last_month(version=version_to_test) + + # --- + # 2. 1 byte, since 30 days, deleted 1 hour ago --> 1 bytehour + time_deleted = now - datetime.timedelta(hours=1) + time_uploaded = time_deleted - datetime.timedelta(hours=1) + + # 2a. Change time deleted to an hour ago and time uploaded to 2 + version_to_test.time_deleted = time_deleted + version_to_test.time_uploaded = time_uploaded + db.session.commit() + + # 2b. Get version again + version_to_test = models.Version.query.filter_by(id=version_id).first() + + # 2c. Test bytehours + bytehours = bytehours_in_last_month(version=version_to_test) + assert int(bytehours) == expected_bytehour + + # --- + # 3. 1 byte, before a month ago, not deleted --> 1*month + now = format_timestamp(timestamp_object=current_time()) + time_uploaded = now - relativedelta(months=1, hours=1) + time_a_month_ago = now - relativedelta(months=1) + hours_since_month = (now - time_a_month_ago).total_seconds() / (60 * 60) + expected_bytehour = size_to_test * hours_since_month + + # 3a. Change time uploaded and not deleted + version_to_test.time_uploaded = time_uploaded + version_to_test.time_deleted = None + db.session.commit() + + # 3b. Get version again + version_to_test = models.Version.query.filter_by(id=version_id).first() + + # 3c. Test bytehours + bytehours = bytehours_in_last_month(version=version_to_test) + assert bytehours == expected_bytehour + + # --- + # 4. 1 byte, before 30 days, deleted an hour ago --> 1 hour less than a month + time_deleted = format_timestamp(timestamp_object=current_time()) - relativedelta(hours=1) + time_uploaded = now - relativedelta(months=1, hours=1) + time_a_month_ago = now - relativedelta(months=1) + hours_since_month = (time_deleted - time_a_month_ago).total_seconds() / (60 * 60) + expected_bytehour = size_to_test * hours_since_month + + # 4a. Change time deleted and uploaded + version_to_test.time_uploaded = time_uploaded + version_to_test.time_deleted = time_deleted + db.session.commit() + + # 4b. Get version again + version_to_test = models.Version.query.filter_by(id=version_id).first() + + # 4c. Test bytehours + bytehours = bytehours_in_last_month(version=version_to_test) + assert int(bytehours) == expected_bytehour + + +def test_bytehours_in_last_month_1byte(client: flask.testing.FlaskClient): + """Test that function calculates the correct number of TBHours.""" + run_bytehours_test(client=client, size_to_test=1) + + +def test_bytehours_in_last_month_1tb(client: flask.testing.FlaskClient): + """Test that function calculates the correct number of TBHours.""" + run_bytehours_test(client=client, size_to_test=1e12) + + +def test_bytehours_in_last_month_20tb(client: flask.testing.FlaskClient): + """Test that function calculates the correct number of TBHours.""" + run_bytehours_test(client=client, size_to_test=20 * 1e12) + + +# list_lost_files_in_project + + +def test_list_lost_files_in_project_nosuchbucket( + client: flask.testing.FlaskClient, boto3_session, capfd +): + """Verify that nosuchbucket error is raised and therefore message printed.""" + # Imports + from dds_web.utils import list_lost_files_in_project + + # Get project + project = models.Project.query.first() + assert project + + # Mock NoSuchBucket error + with patch("boto3.session.Session.resource.meta.client.head_bucket", mock_nosuchbucket): + # Verify that exception is raised + with pytest.raises(botocore.exceptions.ClientError): + in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project( + project=project, s3_resource=boto3_session + ) + assert not in_db_but_not_in_s3 + assert not in_s3_but_not_in_db + + # Verify that correct messages is printed + _, err = capfd.readouterr() + assert f"Project '{project.public_id}' bucket is missing" in err + assert f"Expected: {not project.is_active}" in err + + +def test_list_lost_files_in_project_nothing_in_s3( + client: flask.testing.FlaskClient, boto3_session, capfd +): + """Verify that all files in db are printed since they do not exist in s3.""" + # Imports + from dds_web.utils import list_lost_files_in_project + + # Get project + project = models.Project.query.first() + assert project + + # Run listing + in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project( + project=project, s3_resource=boto3_session + ) + + # Verify that in_s3_but_not_db is empty + assert not in_s3_but_not_in_db + + # Get logging + _, err = capfd.readouterr() + + # Verify that all files are listed + for f in project.files: + assert f.name_in_bucket in in_db_but_not_in_s3 + assert ( + f"Entry {f.name_in_bucket} ({project.public_id}, {project.responsible_unit}) not found in S3 (but found in db)" + in err + ) + assert ( + f"Entry {f.name_in_bucket} ({project.public_id}, {project.responsible_unit}) not found in database (but found in s3)" + not in err + ) + + +def test_list_lost_files_in_project_s3anddb_empty( + client: flask.testing.FlaskClient, boto3_session, capfd +): + """Verify that there are no lost files because there are no files.""" + # Imports + from dds_web.utils import list_lost_files_in_project + + # Get project + project = models.Project.query.first() + assert project + + # Mock project.files -- no files + with patch("dds_web.database.models.Project.files", new_callable=PropertyMock) as mock_files: + mock_files.return_value = [] + + # Run listing + in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project( + project=project, s3_resource=boto3_session + ) + + # Verify that both are empty + assert not in_db_but_not_in_s3 + assert not in_s3_but_not_in_db + + # Get logging output + _, err = capfd.readouterr() + + # Verify no message printed out + assert not err + + +def test_list_lost_files_in_project_no_files_in_db( + client: flask.testing.FlaskClient, boto3_session, capfd +): + """Mock files in s3 and verify that only those are printed out.""" + # Imports + from dds_web.utils import list_lost_files_in_project + + # Get project + project = models.Project.query.first() + assert project + + # Mock project.files -- no files + with patch("dds_web.database.models.Project.files", new_callable=PropertyMock) as mock_files: + mock_files.return_value = [] + + # Mock files in s3 + boto3_session.Bucket(project.bucket).objects.all = mock_items_in_bucket + # Get created testfiles + fake_files_in_bucket = mock_items_in_bucket() + + # Run listing + in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project( + project=project, s3_resource=boto3_session + ) + + # Verify that missing in database but exists in s3 + assert not in_db_but_not_in_s3 + assert in_s3_but_not_in_db + + # Get logging + _, err = capfd.readouterr() + + # Verify that all fake files are printed out + for x in fake_files_in_bucket: + assert ( + f"Entry {x.key} ({project.public_id}, {project.responsible_unit}) not found in database (but found in s3)" + in err + ) + + # Verify that no file lines are printed out + for x in project.files: + assert ( + f"Entry {x.name_in_bucket} ({project.public_id}, {project.responsible_unit}) not found in S3 (but found in db)" + not in err + ) + + +def test_list_lost_files_in_project_overlap( + client: flask.testing.FlaskClient, boto3_session, capfd +): + """Verify that only some files are printed out when some files exist in the database and s3, but not all.""" + # Imports + from dds_web.utils import list_lost_files_in_project + + # Get project + project = models.Project.query.first() + assert project + + # Get created testfiles + fake_files_in_bucket = mock_items_in_bucket() + + # Number of project files + original_db_files = project.files + num_proj_files = len(original_db_files) + + # Create 15 few new files + new_files = [] + for x in fake_files_in_bucket[:15]: + new_file = models.File( + name=x.key, + name_in_bucket=x.key, + subpath=".", + size_original=0, + size_stored=0, + compressed=True, + public_key="X" * 64, + salt="X" * 32, + checksum="X" * 64, + ) + new_files.append(new_file) + project.files.append(new_file) + db.session.commit() + + # Mock files in s3 + boto3_session.Bucket(project.bucket).objects.all = mock_items_in_bucket + + # Run listing + in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project( + project=project, s3_resource=boto3_session + ) + + # Verify that both contain entries + assert in_db_but_not_in_s3 + assert in_s3_but_not_in_db + + # Get logging output + _, err = capfd.readouterr() + + # Verify that original db files are printed + assert len(project.files) == num_proj_files + 15 + for x in project.files: + if x not in new_files: + assert ( + f"Entry {x.name_in_bucket} ({project.public_id}, {project.responsible_unit}) not found in S3 (but found in db)" + in err + ) + + # Verify that s3 files are printed + for x in fake_files_in_bucket[15::]: + assert ( + f"Entry {x.key} ({project.public_id}, {project.responsible_unit}) not found in database (but found in s3)" + in err + ) + + # Verify that the rest of the files are not printed + for x in fake_files_in_bucket[:15]: + assert ( + f"Entry {x.key} ({project.public_id}, {project.responsible_unit}) not found in S3 (but found in db)" + not in err + ) + assert ( + f"Entry {x.key} ({project.public_id}, {project.responsible_unit}) not found in database (but found in s3)" + not in err + ) diff --git a/tests/test_version.py b/tests/test_version.py index 475acb1ff..2f3624c1a 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -2,4 +2,4 @@ def test_version(): - assert version.__version__ == "2.3.0" + assert version.__version__ == "2.4.0"