diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 8f2687a12..c4a77d687 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,67 +1,126 @@ -> **Before submitting the PR, please go through the sections below and fill in what you can. If there are any items that are irrelevant for the current PR, remove the row. If a relevant option is missing, please add it as an item and add a PR comment informing that the new option should be included into this template.** + -> **All _relevant_ items should be ticked before the PR is merged** +## **1. This PR contains the following changes...** -# Description +_Add a summary of the changes and the related issue._ -- [ ] Summary of the changes and the related issue: -- [ ] Motivation and context regarding why the change is needed: -- [ ] List / description of any dependencies or other changes required for this change: -- Fixes an issue in GitHub / Jira: - - [ ] Yes: _[link to GitHub issue / Jira task ID]_ - - [ ] No +## **2. The following additional changes are required for this to work** -## Type of change +_Add information on additional changes required for the PR changes to work, both locally and in the deployments._ + +> E.g. Does the deployment setup need anything for this to work? + +## **3. The PR fixes the following GitHub issue / Jira task** + + + +- [ ] GitHub issue (link): +- [ ] Jira task (ID, `DDS-xxxx`): +- [ ] The PR does not fix a specific GitHub issue or Jira task + +## **4. What _type of change(s)_ does the PR contain?** + + -- [ ] Bug fix - - [ ] Breaking: _Describe_ - - [ ] Non-breaking -- [ ] Documentation - [ ] New feature - - [ ] Breaking: _Describe_ + - [ ] Breaking: _Please describe the reason for the break and how we can fix it._ + - [ ] Non-breaking +- [ ] Database change + - [ ] Migration _included in PR_ + - [ ] Migration _not needed_ +- [ ] Bug fix + - [ ] Breaking: _Please describe the reason for the break and how we can fix it._ - [ ] Non-breaking - [ ] Security Alert fix +- [ ] Documentation - [ ] Tests **(only)** - [ ] Workflow -_"Breaking": The change will cause existing functionality to not work as expected._ - -# Checklist: - -## General - -- [ ] [Changelog](../CHANGELOG.md): New row added. Not needed when PR includes _only_ tests. -- [ ] Database schema has changed - - [ ] A new migration is included in the PR - - [ ] The change does not require a migration -- [ ] Code change - - [ ] Self-review of code done - - [ ] Comments added, particularly in hard-to-understand areas - - Documentation update - - [ ] Done - - [ ] Not needed - -## Repository / Releases - -- [ ] Blocking PRs have been merged -- [ ] Rebase / update of branch done -- [ ] PR to `master` branch (Product Owner / Scrum Master) - - [ ] The [version](../dds_web/version.py) is updated - - [ ] I am bumping the major version (e.g. 1.x.x to 2.x.x) - - [ ] I have made the corresponding changes to the CLI version - - Backward compatible - - [ ] Yes: The code works together with `dds_cli/master` branch - - [ ] No: The code **does not** entirely / at all work together with the `dds_cli/master` branch. _Please add detailed and clear information about the broken features_ - -## Checks - -- [ ] CodeQL passes -- [ ] Formatting: Black & Prettier checks pass -- Tests - - [ ] I have added tests for the new code - - [ ] The tests pass -- Trivy / Snyk: - - [ ] There are no new security alerts - - [ ] This PR fixes new security alerts - - [ ] Security alerts have been dismissed - - [ ] PR will be merged with new security alerts; This is why: _Please add a short description here_ +## **5. Checklist** + + + +### **Always** + + + +- [Changelog](../CHANGELOG.md) + - [ ] Added + - [ ] Not needed (E.g. PR contains _only_ tests) +- Rebase / Update / Merge _from_ base branch (the branch from which the current is forked) + - [ ] Done + - [ ] Not needed +- Blocking PRs + - [ ] Merged + - [ ] No blocking PRs +- PR to `master` branch + - [ ] Yes: Go to the section [PR to master](#pr-to-master) + - [ ] No + +### If PR consists of **code change(s)** + + + +- Self review + - [ ] Done +- Comments, docstrings, etc + - [ ] Added / Updated +- Documentation + - [ ] Updated + - [ ] Update not needed + +### If PR is to **master** + + + +- [ ] I have followed steps 1-5 in [the release instructions](../doc/procedures/new_release.md) +- [ ] I am bumping the major version (e.g. 1.x.x to 2.x.x) +- [ ] I have made the corresponding changes to the CLI version + +**Is this version _backward compatible?_** + +- [ ] Yes: The code works together with `dds_cli/master` branch +- [ ] No: The code **does not** entirely / at all work together with the `dds_cli/master` branch. _Please add detailed and clear information about the broken features_ + +## **6. Actions / Scans** + + + +- **Black**: Python code formatter. Does not execute. Only tests. + Run `black .` locally to execute formatting. + - [ ] Passed +- **Prettier**: General code formatter. Our use case: MD and yaml mainly. + Run `npx prettier --write .` locally to execute formatting. + - [ ] Passed +- **Tests**: Pytest to verify that functionality works as expected. + - [ ] New tests added + - [ ] No new tests + - [ ] Passed +- **CodeQL**: Scan for security vulnerabilities, bugs, errors + - [ ] New alerts: _Go through them and either fix, dismiss och ignore. Add reasoning in items below._ + - [ ] Alerts fixed: _What?_ + - [ ] Alerts ignored / dismissed: _Why?_ + - [ ] Passed +- **Trivy**: Security scanner + - [ ] New alerts: _Go through them and either fix, dismiss och ignore. Add reasoning in items below._ + - [ ] Alerts fixed: _What?_ + - [ ] Alerts ignored / dismissed: _Why?_ + - [ ] Passed +- **Snyk**: Security scanner + - [ ] New alerts: _Go through them and either fix, dismiss och ignore. Add reasoning in items below._ + - [ ] Alerts fixed: _What?_ + - [ ] Alerts ignored / dismissed: _Why?_ + - [ ] Passed diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index fbeb0ab63..7cf5703c7 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,3 +1,13 @@ +# CodeQL analysis: +# CodeQL is the analysis engine used by developers to automate security checks, +# and by security researchers to perform variant analysis. +# In CodeQL, code is treated like data. Security vulnerabilities, bugs, +# and other errors are modeled as queries that can be executed against databases +# extracted from code. You can run the standard CodeQL queries, written by GitHub +# researchers and community contributors, or write your own to use in custom analyses. +# Queries that find potential bugs highlight the result directly in the source file. +# https://codeql.github.com/docs/codeql-overview/about-codeql/ +# ---------------------------------------------------------------------------- # For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # @@ -41,11 +51,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +66,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v2 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -70,4 +80,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v2 diff --git a/.github/workflows/docker-compose-tests.yml b/.github/workflows/docker-compose-tests.yml index 5f6444ac4..e5cfd590a 100644 --- a/.github/workflows/docker-compose-tests.yml +++ b/.github/workflows/docker-compose-tests.yml @@ -1,3 +1,7 @@ +# Pytest +# Runs all test in the dds_web by executing the docker compose yml files for testing. +# The actual pytest command is not in this file. + name: Tests on: diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml index db7dfc117..cf395e4ed 100644 --- a/.github/workflows/prettier.yml +++ b/.github/workflows/prettier.yml @@ -1,3 +1,7 @@ +# Prettier +# Prettier is a code formatter. We mostly use it for the markdown files. +# https://prettier.io/ + name: Lint with Prettier on: [push, pull_request] diff --git a/.github/workflows/publish_and_trivyscan.yml b/.github/workflows/publish_and_trivyscan.yml index 45c0ee773..14089531a 100644 --- a/.github/workflows/publish_and_trivyscan.yml +++ b/.github/workflows/publish_and_trivyscan.yml @@ -1,5 +1,18 @@ ---- -name: Publish Docker Image and run Trivy Security Scan +# GHCR: +# GitHub Container Repository +# Images from ghcr are used when deploying prod and dev. +# +# Trivy +# Trivy is a comprehensive and versatile security scanner. Trivy has scanners that look for +# security issues, and targets where it can find those issues. +# +# This action publishes a docker image and then runs the trivy security scan on that image. +# Potential security issues will be uploaded to the security tab in the repository. +# +# https://github.com/aquasecurity/trivy +# --------------------------------------- +# Publish Docker Image to GHCR and run Trivy Security Scan +name: GHCR and Trivy Scan on: pull_request: push: @@ -11,7 +24,7 @@ on: jobs: push_to_registry: if: github.repository == 'ScilifelabDataCentre/dds_web' - name: Push Docker image to Docker Hub + name: Push image runs-on: ubuntu-latest permissions: contents: read @@ -23,11 +36,6 @@ jobs: steps: - name: Check out the repo uses: actions/checkout@v3 - - name: Log in to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Log in to Github Container Repository uses: docker/login-action@v2 with: @@ -39,7 +47,6 @@ jobs: uses: docker/metadata-action@v4 with: images: | - scilifelabdatacentre/dds-backend ghcr.io/scilifelabdatacentre/dds-backend - name: Ensure lowercase name run: echo IMAGE_REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV diff --git a/.github/workflows/python-black.yml b/.github/workflows/python-black.yml index 312940af3..74127ef6f 100644 --- a/.github/workflows/python-black.yml +++ b/.github/workflows/python-black.yml @@ -1,3 +1,8 @@ +# Lint Python - Black +# This action lints python using black - a python code formatter. +# https://github.com/psf/black +# This does not format the code, this only detects and informs on issues. +# To format with black, run `black .` locally in the repository. name: Lint Python on: push: diff --git a/.github/workflows/snyk-scan.yml b/.github/workflows/snyk-scan.yml new file mode 100644 index 000000000..db080f506 --- /dev/null +++ b/.github/workflows/snyk-scan.yml @@ -0,0 +1,45 @@ +# Snyk IaC: +# A sample workflow which checks out your Infrastructure as Code Configuration files, +# such as Kubernetes, Helm & Terraform and scans them for any security issues. +# The results are then uploaded to GitHub Security Code Scanning +# +# For more examples, including how to limit scans to only high-severity issues +# and fail PR checks, see https://github.com/snyk/actions/ +# ----------------------------------------------------------- +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# ------------------------------------------------------------ +name: Snyk IaC + +on: + push: + branches: ["dev", master] + pull_request: + # The branches below must be a subset of the branches above + branches: ["dev"] + schedule: + - cron: "0 7,13 * * *" + +jobs: + snyk: + permissions: + contents: read + security-events: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Run Snyk to check for vulnerabilities + uses: snyk/actions/python@master + continue-on-error: true # To make sure that SARIF upload gets called + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + command: code test + args: --sarif-file-output=snyk.sarif + - name: Upload result to GitHub Code Scanning + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: snyk.sarif + category: snyk diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 09abfd196..b1014c8e6 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -1,3 +1,8 @@ +# Trivy: +# Trivy (pronunciation) is a comprehensive and versatile security scanner. +# Trivy has scanners that look for security issues, and targets where it can find those issues. +# https://github.com/aquasecurity/trivy +# --------------------------------- name: trivy on: schedule: diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a59ae0db..bb371ca10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -176,10 +176,24 @@ Please add a _short_ line describing the PR you make, if the PR implements a spe - Changed support email ([#1324](https://github.com/ScilifelabDataCentre/dds_web/pull/1324)) - Allow Super Admin login during maintenance ([#1333](https://github.com/ScilifelabDataCentre/dds_web/pull/1333)) -## Sprint (2022-12-09 - 2022-12-23) +## Sprint (2022-12-09 - 2023-01-09) - Longer sprint due to Christmas - Dependency: Bump `certifi` due to CVE-2022-23491 ([#1337](https://github.com/ScilifelabDataCentre/dds_web/pull/1337)) - Dependency: Bump `jwcrypto` due to CVE-2022-3102 ([#1339](https://github.com/ScilifelabDataCentre/dds_web/pull/1339)) - Cronjob: Get number of units and users for reporting ([#1324](https://github.com/ScilifelabDataCentre/dds_web/pull/1335)) - Add ability to change project information via ProjectInfo endpoint ([#1331](https://github.com/ScilifelabDataCentre/dds_web/pull/1331)) - Fix the reporting file path ([1345](https://github.com/ScilifelabDataCentre/dds_web/pull/1345)) + +## Sprint (2023-01-09 - 2023-01-20) + +- Refactoring: Move flask commands to own module `commands.py` ([#1351](https://github.com/ScilifelabDataCentre/dds_web/pull/1351)) +- Workflow: Scan with Snyk on PR and schedule ([#1349](https://github.com/ScilifelabDataCentre/dds_web/pull/1349)) +- Flask command (cronjob): Monitor unit usage and warn if above level ([#1350](https://github.com/ScilifelabDataCentre/dds_web/pull/1350)) + +## Sprint (2023-01-20 - 2023-02-03) + +- Workflow: Do not publish to DockerHub anymore ([#1357](https://github.com/ScilifelabDataCentre/dds_web/pull/1357)) +- Refactoring: move cronjobs previously handled by APScheduler to flask commands ([#1355](https://github.com/ScilifelabDataCentre/dds_web/pull/1355)) +- Bug: Fix type issue in 0c9c237cced5 (latest) migration ([#1360](https://github.com/ScilifelabDataCentre/dds_web/pull/1360)) +- Database: New `Reporting` table for saving unit / user stats every month ([#1363](https://github.com/ScilifelabDataCentre/dds_web/pull/1363)) +- Version bump: 2.2.6 ([#1375](https://github.com/ScilifelabDataCentre/dds_web/pull/1375)) diff --git a/Dockerfiles/backend.Dockerfile b/Dockerfiles/backend.Dockerfile index 9db625df5..8f0aae26f 100644 --- a/Dockerfiles/backend.Dockerfile +++ b/Dockerfiles/backend.Dockerfile @@ -45,7 +45,7 @@ RUN apk add mariadb-client ################### ## BUILD FRONTEND ################### -FROM node:16 as nodebuilder +FROM node:18 as nodebuilder COPY ./dds_web/static /build WORKDIR /build RUN npm install -g npm@latest --quiet diff --git a/Dockerfiles/nodebuilder.Dockerfile b/Dockerfiles/nodebuilder.Dockerfile index 1ea3f63cf..cd8b8e97f 100644 --- a/Dockerfiles/nodebuilder.Dockerfile +++ b/Dockerfiles/nodebuilder.Dockerfile @@ -1,4 +1,4 @@ -FROM node:16 +FROM node:18 RUN mkdir /build WORKDIR /build RUN npm install -g npm@latest --quiet diff --git a/dds_web/__init__.py b/dds_web/__init__.py index 58e720d19..a606f9042 100644 --- a/dds_web/__init__.py +++ b/dds_web/__init__.py @@ -6,15 +6,11 @@ # Standard library import logging -import datetime import pathlib import sys -import re import os -import typing # Installed -import click import flask from flask_sqlalchemy import SQLAlchemy from flask_marshmallow import Marshmallow @@ -24,7 +20,7 @@ import flask_mail import flask_login import flask_migrate -from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore + # import flask_qrcode from werkzeug.middleware.proxy_fix import ProxyFix @@ -34,8 +30,6 @@ import structlog import werkzeug -from dds_web.scheduled_tasks import scheduler - #################################################################################################### # GLOBAL VARIABLES ############################################################## GLOBAL VARIABLES # #################################################################################################### @@ -265,11 +259,34 @@ def load_user(user_id): client_kwargs={"scope": "openid profile email"}, ) + # Import flask commands - all + from dds_web.commands import ( + fill_db_wrapper, + create_new_unit, + update_uploaded_file_with_log, + lost_files_s3_db, + set_available_to_expired, + set_expired_to_archived, + delete_invites, + quarterly_usage, + reporting_units_and_users, + monitor_usage, + ) + + # Add flask commands - general app.cli.add_command(fill_db_wrapper) app.cli.add_command(create_new_unit) app.cli.add_command(update_uploaded_file_with_log) app.cli.add_command(lost_files_s3_db) + # Add flask commands - cronjobs + app.cli.add_command(set_available_to_expired) + app.cli.add_command(set_expired_to_archived) + app.cli.add_command(delete_invites) + app.cli.add_command(quarterly_usage) + app.cli.add_command(reporting_units_and_users) + app.cli.add_command(monitor_usage) + # Make version available inside jinja templates: @app.template_filter("dds_version") def dds_version_filter(_): @@ -290,11 +307,6 @@ def dds_version_filter(_): app.register_blueprint(pages, url_prefix="") app.register_blueprint(auth_blueprint, url_prefix="") - # Set-up the scheduler - app.config["SCHEDULER_JOBSTORES"] = {"default": SQLAlchemyJobStore(engine=db.engine)} - scheduler.init_app(app) - scheduler.start() - ENCRYPTION_KEY_BIT_LENGTH = 256 ENCRYPTION_KEY_CHAR_LENGTH = int(ENCRYPTION_KEY_BIT_LENGTH / 8) @@ -307,324 +319,3 @@ def dds_version_filter(_): except sqlalchemy.exc.OperationalError as err: app.logger.exception("The database seems to be down.") sys.exit(1) - - -@click.command("init-db") -@click.argument("db_type", type=click.Choice(["production", "dev-small", "dev-big"])) -@flask.cli.with_appcontext -def fill_db_wrapper(db_type): - from dds_web.database import models - - if db_type == "production": - username = flask.current_app.config["SUPERADMIN_USERNAME"] - password = flask.current_app.config["SUPERADMIN_PASSWORD"] - name = flask.current_app.config["SUPERADMIN_NAME"] - existing_user = models.User.query.filter_by(username=username).one_or_none() - - email = flask.current_app.config["SUPERADMIN_EMAIL"] - existing_email = models.Email.query.filter_by(email=email).one_or_none() - - if existing_email: - flask.current_app.logger.info( - f"User with email '{email}' already exists, not creating user." - ) - elif existing_user: - if isinstance(existing_user, models.SuperAdmin): - flask.current_app.logger.info( - f"Super admin with username '{username}' already exists, not creating user." - ) - else: - flask.current_app.logger.info(f"Adding Super Admin: {username} ({email})") - new_super_admin = models.SuperAdmin(username=username, name=name, password=password) - new_email = models.Email(email=email, primary=True) - new_email.user = new_super_admin - db.session.add(new_email) - db.session.commit() - flask.current_app.logger.info(f"Super Admin added: {username} ({email})") - else: - flask.current_app.logger.info("Initializing development db") - assert flask.current_app.config["USE_LOCAL_DB"] - - if db_type == "dev-small": - from dds_web.development.db_init import fill_db - - fill_db() - elif db_type == "dev-big": - import dds_web.development.factories - - dds_web.development.factories.create_all() - - flask.current_app.logger.info("DB filled") - - -@click.command("create-unit") -@click.option("--name", "-n", type=str, required=True) -@click.option("--public_id", "-p", type=str, required=True) -@click.option("--external_display_name", "-e", type=str, required=True) -@click.option("--contact_email", "-c", type=str, required=True) -@click.option("--internal_ref", "-ref", type=str, required=False) -@click.option("--safespring_endpoint", "-se", type=str, required=True) -@click.option("--safespring_name", "-sn", type=str, required=True) -@click.option("--safespring_access", "-sa", type=str, required=True) -@click.option("--safespring_secret", "-ss", type=str, required=True) -@click.option("--days_in_available", "-da", type=int, required=False, default=90) -@click.option("--days_in_expired", "-de", type=int, required=False, default=30) -@flask.cli.with_appcontext -def create_new_unit( - name, - public_id, - external_display_name, - contact_email, - internal_ref, - safespring_endpoint, - safespring_name, - safespring_access, - safespring_secret, - days_in_available, - days_in_expired, -): - """Create a new unit. - - Rules for bucket names, which are affected by the public_id at the moment: - https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html - """ - from dds_web.database import models - - error_message = "" - if len(public_id) > 50: - error_message = "The 'public_id' can be a maximum of 50 characters" - elif re.findall(r"[^a-zA-Z0-9.-]", public_id): - error_message = ( - "The 'public_id' can only contain letters, numbers, dots (.) and hyphens (-)." - ) - elif public_id[0] in [".", "-"]: - error_message = "The 'public_id' must begin with a letter or number." - elif public_id.count(".") > 2: - error_message = "The 'public_id' should not contain more than two dots." - elif public_id.startswith("xn--"): - error_message = "The 'public_id' cannot begin with the 'xn--' prefix." - - if error_message: - flask.current_app.logger.error(error_message) - return - - new_unit = models.Unit( - name=name, - public_id=public_id, - external_display_name=external_display_name, - contact_email=contact_email, - internal_ref=internal_ref or public_id, - safespring_endpoint=safespring_endpoint, - safespring_name=safespring_name, - safespring_access=safespring_access, - safespring_secret=safespring_secret, - days_in_available=days_in_available, - days_in_expired=days_in_expired, - ) - db.session.add(new_unit) - db.session.commit() - - flask.current_app.logger.info(f"Unit '{name}' created") - - -@click.command("update-uploaded-file") -@click.option("--project", "-p", type=str, required=True) -@click.option("--path-to-log-file", "-fp", type=str, required=True) -@flask.cli.with_appcontext -def update_uploaded_file_with_log(project, path_to_log_file): - """Update file details that weren't properly uploaded to db from cli log""" - import botocore - from dds_web.database import models - from dds_web import db - from dds_web.api.api_s3_connector import ApiS3Connector - import json - - proj_in_db = models.Project.query.filter_by(public_id=project).one_or_none() - if not proj_in_db: - flask.current_app.logger.error(f"The project '{project}' doesn't exist.") - return - - if not os.path.exists(path_to_log_file): - flask.current_app.logger.error(f"The log file '{path_to_log_file}' doesn't exist.") - return - - with open(path_to_log_file, "r") as f: - log = json.load(f) - errors = {} - files_added = [] - for file, vals in log.items(): - status = vals.get("status") - if not status or not status.get("failed_op") == "add_file_db": - continue - - with ApiS3Connector(project=proj_in_db) as s3conn: - try: - _ = s3conn.resource.meta.client.head_object( - Bucket=s3conn.project.bucket, Key=vals["path_remote"] - ) - except botocore.client.ClientError as err: - if err.response["Error"]["Code"] == "404": - errors[file] = {"error": "File not found in S3", "traceback": err.__traceback__} - else: - file_object = models.File.query.filter( - sqlalchemy.and_( - models.File.name == sqlalchemy.func.binary(file), - models.File.project_id == proj_in_db.id, - ) - ).first() - if file_object: - errors[file] = {"error": "File already in database."} - else: - new_file = models.File( - name=file, - name_in_bucket=vals["path_remote"], - subpath=vals["subpath"], - project_id=proj_in_db.id, - size_original=vals["size_raw"], - size_stored=vals["size_processed"], - compressed=not vals["compressed"], - public_key=vals["public_key"], - salt=vals["salt"], - checksum=vals["checksum"], - ) - new_version = models.Version( - size_stored=new_file.size_stored, time_uploaded=datetime.datetime.utcnow() - ) - proj_in_db.file_versions.append(new_version) - proj_in_db.files.append(new_file) - new_file.versions.append(new_version) - - db.session.add(new_file) - files_added.append(new_file) - db.session.commit() - - flask.current_app.logger.info(f"Files added: {files_added}") - flask.current_app.logger.info(f"Errors while adding files: {errors}") - - -@click.command("lost-files") -@click.argument("action_type", type=click.Choice(["find", "list", "delete", "add-missing-buckets"])) -@flask.cli.with_appcontext -def lost_files_s3_db(action_type: str): - """ - Identify (and optionally delete) files that are present in S3 or in the db, but not both. - - Args: - action_type (str): "find", "list", or "delete" - """ - from dds_web.database import models - import boto3 - from dds_web.utils import bucket_is_valid - - # Interate through the units - for unit in models.Unit.query: - session = boto3.session.Session() - - # Connect to S3 - resource = session.resource( - service_name="s3", - endpoint_url=unit.safespring_endpoint, - aws_access_key_id=unit.safespring_access, - aws_secret_access_key=unit.safespring_secret, - ) - - # Variables - db_count = 0 # Files not found in s3 - s3_count = 0 # Files not found in db - - # Iterate through unit projects - for project in unit.projects: - # Check for objects in bucket - try: - s3_filenames = set( - entry.key for entry in resource.Bucket(project.bucket).objects.all() - ) - except resource.meta.client.exceptions.NoSuchBucket: - if project.is_active: - flask.current_app.logger.warning("Missing bucket %s", project.bucket) - # Create a missing bucket if argument chosen - if action_type == "add-missing-buckets": - valid, message = bucket_is_valid(bucket_name=project.bucket) - if not valid: - flask.current_app.logger.warning( - f"Could not create bucket '{project.bucket}' for project '{project.public_id}': {message}" - ) - else: - resource.create_bucket(Bucket=project.bucket) - flask.current_app.logger.info(f"Bucket '{project.bucket}' created.") - continue - - # Get objects in project - try: - db_filenames = set(entry.name_in_bucket for entry in project.files) - except sqlalchemy.exc.OperationalError: - flask.current_app.logger.critical("Unable to connect to db") - - # Differences - diff_db = db_filenames.difference(s3_filenames) # In db but not in S3 - diff_s3 = s3_filenames.difference(db_filenames) # In S3 but not in db - - # List all files which are missing in either db of s3 - # or delete the files from the s3 if missing in db, or db if missing in s3 - if action_type == "list": - for file_entry in diff_db: - flask.current_app.logger.info( - "Entry %s (%s, %s) not found in S3", file_entry, project, unit - ) - for file_entry in diff_s3: - flask.current_app.logger.info( - "Entry %s (%s, %s) not found in database", file_entry, project, unit - ) - elif action_type == "delete": - # s3 can only delete 1000 objects per request - batch_size = 1000 - s3_to_delete = list(diff_s3) - for i in range(0, len(s3_to_delete), batch_size): - resource.meta.client.delete_objects( - Bucket=project.bucket, - Delete={ - "Objects": [ - {"Key": entry} for entry in s3_to_delete[i : i + batch_size] - ] - }, - ) - - db_entries = models.File.query.filter( - sqlalchemy.and_( - models.File.name_in_bucket.in_(diff_db), - models.File.project_id == project.id, - ) - ) - for db_entry in db_entries: - try: - for db_entry_version in db_entry.versions: - if db_entry_version.time_deleted is None: - db_entry_version.time_deleted = datetime.datetime.utcnow() - db.session.delete(db_entry) - db.session.commit() - except (sqlalchemy.exc.SQLAlchemyError, sqlalchemy.exc.OperationalError): - db.session.rollback() - flask.current_app.logger.critical("Unable to delete the database entries") - sys.exit(1) - - # update the counters at the end of the loop to have accurate numbers for delete - s3_count += len(diff_s3) - db_count += len(diff_db) - - # Print out information about actions performed in cronjob - if s3_count or db_count: - action_word = ( - "Found" if action_type in ("find", "list", "add-missing-buckets") else "Deleted" - ) - flask.current_app.logger.info( - "%s %d entries for lost files (%d in db, %d in s3)", - action_word, - s3_count + db_count, - db_count, - s3_count, - ) - if action_type in ("find", "list", "add-missing-buckets"): - sys.exit(1) - - else: - flask.current_app.logger.info("Found no lost files") diff --git a/dds_web/commands.py b/dds_web/commands.py new file mode 100644 index 000000000..5dae617c8 --- /dev/null +++ b/dds_web/commands.py @@ -0,0 +1,787 @@ +"""Flask commands runable in container.""" + +# Imports + +# Standard +import os +import re +import sys +import datetime + +# Installed +import click +import flask +import flask_mail +import sqlalchemy + +# Own +from dds_web import db + + +@click.command("init-db") +@click.argument("db_type", type=click.Choice(["production", "dev-small", "dev-big"])) +@flask.cli.with_appcontext +def fill_db_wrapper(db_type): + """Add necessary information to the initial database depending on if in dev or prod.""" + from dds_web.database import models + + if db_type == "production": + username = flask.current_app.config["SUPERADMIN_USERNAME"] + password = flask.current_app.config["SUPERADMIN_PASSWORD"] + name = flask.current_app.config["SUPERADMIN_NAME"] + existing_user = models.User.query.filter_by(username=username).one_or_none() + + email = flask.current_app.config["SUPERADMIN_EMAIL"] + existing_email = models.Email.query.filter_by(email=email).one_or_none() + + if existing_email: + flask.current_app.logger.info( + f"User with email '{email}' already exists, not creating user." + ) + elif existing_user: + if isinstance(existing_user, models.SuperAdmin): + flask.current_app.logger.info( + f"Super admin with username '{username}' already exists, not creating user." + ) + else: + flask.current_app.logger.info(f"Adding Super Admin: {username} ({email})") + new_super_admin = models.SuperAdmin(username=username, name=name, password=password) + new_email = models.Email(email=email, primary=True) + new_email.user = new_super_admin + db.session.add(new_email) + db.session.commit() + flask.current_app.logger.info(f"Super Admin added: {username} ({email})") + else: + flask.current_app.logger.info("Initializing development db") + assert flask.current_app.config["USE_LOCAL_DB"] + + if db_type == "dev-small": + from dds_web.development.db_init import fill_db + + fill_db() + elif db_type == "dev-big": + import dds_web.development.factories + + dds_web.development.factories.create_all() + + flask.current_app.logger.info("DB filled") + + +@click.command("create-unit") +@click.option("--name", "-n", type=str, required=True) +@click.option("--public_id", "-p", type=str, required=True) +@click.option("--external_display_name", "-e", type=str, required=True) +@click.option("--contact_email", "-c", type=str, required=True) +@click.option("--internal_ref", "-ref", type=str, required=False) +@click.option("--safespring_endpoint", "-se", type=str, required=True) +@click.option("--safespring_name", "-sn", type=str, required=True) +@click.option("--safespring_access", "-sa", type=str, required=True) +@click.option("--safespring_secret", "-ss", type=str, required=True) +@click.option("--days_in_available", "-da", type=int, required=False, default=90) +@click.option("--days_in_expired", "-de", type=int, required=False, default=30) +@click.option("--quota", "-q", type=int, required=True) +@click.option("--warn-at", "-w", type=int, required=False, default=80) +@flask.cli.with_appcontext +def create_new_unit( + name, + public_id, + external_display_name, + contact_email, + internal_ref, + safespring_endpoint, + safespring_name, + safespring_access, + safespring_secret, + days_in_available, + days_in_expired, + quota, + warn_at, +): + """Create a new unit. + + Rules for bucket names, which are affected by the public_id at the moment: + https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html + """ + from dds_web.database import models + + error_message = "" + if len(public_id) > 50: + error_message = "The 'public_id' can be a maximum of 50 characters" + elif re.findall(r"[^a-zA-Z0-9.-]", public_id): + error_message = ( + "The 'public_id' can only contain letters, numbers, dots (.) and hyphens (-)." + ) + elif public_id[0] in [".", "-"]: + error_message = "The 'public_id' must begin with a letter or number." + elif public_id.count(".") > 2: + error_message = "The 'public_id' should not contain more than two dots." + elif public_id.startswith("xn--"): + error_message = "The 'public_id' cannot begin with the 'xn--' prefix." + + if error_message: + flask.current_app.logger.error(error_message) + return + + new_unit = models.Unit( + name=name, + public_id=public_id, + external_display_name=external_display_name, + contact_email=contact_email, + internal_ref=internal_ref or public_id, + safespring_endpoint=safespring_endpoint, + safespring_name=safespring_name, + safespring_access=safespring_access, + safespring_secret=safespring_secret, + days_in_available=days_in_available, + days_in_expired=days_in_expired, + quota=quota, + warning_level=warn_at, + ) + db.session.add(new_unit) + db.session.commit() + + flask.current_app.logger.info(f"Unit '{name}' created") + + +@click.command("update-uploaded-file") +@click.option("--project", "-p", type=str, required=True) +@click.option("--path-to-log-file", "-fp", type=str, required=True) +@flask.cli.with_appcontext +def update_uploaded_file_with_log(project, path_to_log_file): + """Update file details that weren't properly uploaded to db from cli log""" + import botocore + from dds_web.database import models + from dds_web import db + from dds_web.api.api_s3_connector import ApiS3Connector + import json + + proj_in_db = models.Project.query.filter_by(public_id=project).one_or_none() + if not proj_in_db: + flask.current_app.logger.error(f"The project '{project}' doesn't exist.") + return + + if not os.path.exists(path_to_log_file): + flask.current_app.logger.error(f"The log file '{path_to_log_file}' doesn't exist.") + return + + with open(path_to_log_file, "r") as f: + log = json.load(f) + errors = {} + files_added = [] + for file, vals in log.items(): + status = vals.get("status") + if not status or not status.get("failed_op") == "add_file_db": + continue + + with ApiS3Connector(project=proj_in_db) as s3conn: + try: + _ = s3conn.resource.meta.client.head_object( + Bucket=s3conn.project.bucket, Key=vals["path_remote"] + ) + except botocore.client.ClientError as err: + if err.response["Error"]["Code"] == "404": + errors[file] = {"error": "File not found in S3", "traceback": err.__traceback__} + else: + file_object = models.File.query.filter( + sqlalchemy.and_( + models.File.name == sqlalchemy.func.binary(file), + models.File.project_id == proj_in_db.id, + ) + ).first() + if file_object: + errors[file] = {"error": "File already in database."} + else: + new_file = models.File( + name=file, + name_in_bucket=vals["path_remote"], + subpath=vals["subpath"], + project_id=proj_in_db.id, + size_original=vals["size_raw"], + size_stored=vals["size_processed"], + compressed=not vals["compressed"], + public_key=vals["public_key"], + salt=vals["salt"], + checksum=vals["checksum"], + ) + new_version = models.Version( + size_stored=new_file.size_stored, time_uploaded=datetime.datetime.utcnow() + ) + proj_in_db.file_versions.append(new_version) + proj_in_db.files.append(new_file) + new_file.versions.append(new_version) + + db.session.add(new_file) + files_added.append(new_file) + db.session.commit() + + flask.current_app.logger.info(f"Files added: {files_added}") + flask.current_app.logger.info(f"Errors while adding files: {errors}") + + +@click.command("lost-files") +@click.argument("action_type", type=click.Choice(["find", "list", "delete", "add-missing-buckets"])) +@flask.cli.with_appcontext +def lost_files_s3_db(action_type: str): + """Identify (and optionally delete) files that are present in S3 or in the db, but not both. + + Args: + action_type (str): "find", "list", or "delete" + """ + from dds_web.database import models + import boto3 + from dds_web.utils import bucket_is_valid + + # Interate through the units + for unit in models.Unit.query: + session = boto3.session.Session() + + # Connect to S3 + resource = session.resource( + service_name="s3", + endpoint_url=unit.safespring_endpoint, + aws_access_key_id=unit.safespring_access, + aws_secret_access_key=unit.safespring_secret, + ) + + # Variables + db_count = 0 # Files not found in s3 + s3_count = 0 # Files not found in db + + # Iterate through unit projects + for project in unit.projects: + # Check for objects in bucket + try: + s3_filenames = set( + entry.key for entry in resource.Bucket(project.bucket).objects.all() + ) + except resource.meta.client.exceptions.NoSuchBucket: + if project.is_active: + flask.current_app.logger.warning("Missing bucket %s", project.bucket) + # Create a missing bucket if argument chosen + if action_type == "add-missing-buckets": + valid, message = bucket_is_valid(bucket_name=project.bucket) + if not valid: + flask.current_app.logger.warning( + f"Could not create bucket '{project.bucket}' for project '{project.public_id}': {message}" + ) + else: + resource.create_bucket(Bucket=project.bucket) + flask.current_app.logger.info(f"Bucket '{project.bucket}' created.") + continue + + # Get objects in project + try: + db_filenames = set(entry.name_in_bucket for entry in project.files) + except sqlalchemy.exc.OperationalError: + flask.current_app.logger.critical("Unable to connect to db") + + # Differences + diff_db = db_filenames.difference(s3_filenames) # In db but not in S3 + diff_s3 = s3_filenames.difference(db_filenames) # In S3 but not in db + + # List all files which are missing in either db of s3 + # or delete the files from the s3 if missing in db, or db if missing in s3 + if action_type == "list": + for file_entry in diff_db: + flask.current_app.logger.info( + "Entry %s (%s, %s) not found in S3", file_entry, project, unit + ) + for file_entry in diff_s3: + flask.current_app.logger.info( + "Entry %s (%s, %s) not found in database", file_entry, project, unit + ) + elif action_type == "delete": + # s3 can only delete 1000 objects per request + batch_size = 1000 + s3_to_delete = list(diff_s3) + for i in range(0, len(s3_to_delete), batch_size): + resource.meta.client.delete_objects( + Bucket=project.bucket, + Delete={ + "Objects": [ + {"Key": entry} for entry in s3_to_delete[i : i + batch_size] + ] + }, + ) + + db_entries = models.File.query.filter( + sqlalchemy.and_( + models.File.name_in_bucket.in_(diff_db), + models.File.project_id == project.id, + ) + ) + for db_entry in db_entries: + try: + for db_entry_version in db_entry.versions: + if db_entry_version.time_deleted is None: + db_entry_version.time_deleted = datetime.datetime.utcnow() + db.session.delete(db_entry) + db.session.commit() + except (sqlalchemy.exc.SQLAlchemyError, sqlalchemy.exc.OperationalError): + db.session.rollback() + flask.current_app.logger.critical("Unable to delete the database entries") + sys.exit(1) + + # update the counters at the end of the loop to have accurate numbers for delete + s3_count += len(diff_s3) + db_count += len(diff_db) + + # Print out information about actions performed in cronjob + if s3_count or db_count: + action_word = ( + "Found" if action_type in ("find", "list", "add-missing-buckets") else "Deleted" + ) + flask.current_app.logger.info( + "%s %d entries for lost files (%d in db, %d in s3)", + action_word, + s3_count + db_count, + db_count, + s3_count, + ) + if action_type in ("find", "list", "add-missing-buckets"): + sys.exit(1) + + else: + flask.current_app.logger.info("Found no lost files") + + +@click.command("set-available-to-expired") +@flask.cli.with_appcontext +def set_available_to_expired(): + """ + Search for available projects whose deadlines are past and expire them. + Should be run every day at around 00:01. + """ + + flask.current_app.logger.info("Task: Checking for Expiring projects.") + + # Imports + # Installed + import sqlalchemy + + # Own + from dds_web import db + from dds_web.database import models + from dds_web.errors import DatabaseError + from dds_web.api.project import ProjectStatus + from dds_web.utils import current_time, page_query + + expire = ProjectStatus() + + errors = {} + + try: + for unit in db.session.query(models.Unit).with_for_update().all(): + errors[unit.name] = {} + + days_in_expired = unit.days_in_expired + + for project in page_query( + db.session.query(models.Project) + .filter( + sqlalchemy.and_( + models.Project.is_active == 1, models.Project.unit_id == unit.id + ) + ) + .with_for_update() + ): + + if ( + project.current_status == "Available" + and project.current_deadline <= current_time() + ): + flask.current_app.logger.debug("Handling expiring project") + flask.current_app.logger.debug( + "Project: %s has status %s and expires on: %s", + project.public_id, + project.current_status, + project.current_deadline, + ) + new_status_row = expire.expire_project( + project=project, + current_time=current_time(), + deadline_in=days_in_expired, + ) + + project.project_statuses.append(new_status_row) + + try: + db.session.commit() + flask.current_app.logger.debug( + "Project: %s has status Expired now!", project.public_id + ) + except ( + sqlalchemy.exc.OperationalError, + sqlalchemy.exc.SQLAlchemyError, + ) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + errors[unit.name][project.public_id] = str(err) + continue + else: + flask.current_app.logger.debug( + "Nothing to do for Project: %s", project.public_id + ) + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + raise + + for unit, projects in errors.items(): + if projects: + flask.current_app.logger.error( + f"Following projects of Unit '{unit}' encountered issues during expiration process:" + ) + for proj in errors[unit].keys(): + flask.current_app.logger.error(f"Error for project '{proj}': {errors[unit][proj]} ") + + +@click.command("set-expired-to-archived") +@flask.cli.with_appcontext +def set_expired_to_archived(): + """ + Search for expired projects whose deadlines are past and archive them. + Should be run every day at around 01:01. + """ + + flask.current_app.logger.debug("Task: Checking for projects to archive.") + + # Imports + # Installed + import sqlalchemy + + # Own + from dds_web import db + from dds_web.database import models + from dds_web.errors import DatabaseError + from dds_web.utils import current_time, page_query + from dds_web.api.project import ProjectStatus + + archive = ProjectStatus() + errors = {} + + try: + for unit in db.session.query(models.Unit).with_for_update().all(): + errors[unit.name] = {} + + for project in page_query( + db.session.query(models.Project) + .filter( + sqlalchemy.and_( + models.Project.is_active == 1, models.Project.unit_id == unit.id + ) + ) + .with_for_update() + ): + + if ( + project.current_status == "Expired" + and project.current_deadline <= current_time() + ): + flask.current_app.logger.debug("Handling project to archive") + flask.current_app.logger.debug( + "Project: %s has status %s and expired on: %s", + project.public_id, + project.current_status, + project.current_deadline, + ) + new_status_row, delete_message = archive.archive_project( + project=project, + current_time=current_time(), + ) + flask.current_app.logger.debug(delete_message.strip()) + project.project_statuses.append(new_status_row) + + try: + db.session.commit() + flask.current_app.logger.debug( + "Project: %s has status Archived now!", project.public_id + ) + except ( + sqlalchemy.exc.OperationalError, + sqlalchemy.exc.SQLAlchemyError, + ) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + errors[unit.name][project.public_id] = str(err) + continue + else: + flask.current_app.logger.debug( + "Nothing to do for Project: %s", project.public_id + ) + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + raise + + for unit, projects in errors.items(): + if projects: + flask.current_app.logger.error( + f"Following projects of Unit '{unit}' encountered issues during archival process:" + ) + for proj in errors[unit].keys(): + flask.current_app.logger.error(f"Error for project '{proj}': {errors[unit][proj]} ") + + +@click.command("delete-invites") +@flask.cli.with_appcontext +def delete_invites(): + """ + Delete invites older than a week. + Should be run evry day at around 00:01. + """ + + flask.current_app.logger.debug("Task: Checking for invites to delete.") + + # Imports + # Installed + from datetime import datetime, timedelta + from sqlalchemy.exc import OperationalError, SQLAlchemyError + + # Own + from dds_web import db + from dds_web.database import models + from dds_web.errors import DatabaseError + from dds_web.utils import current_time + + expiration: datetime.datetime = current_time() + errors: Dict = {} + + try: + invites: list = db.session.query(models.Invite).all() + for invite in invites: + invalid_invite = invite.created_at == "0000-00-00 00:00:00" + if invalid_invite or (invite.created_at + timedelta(weeks=1)) < expiration: + try: + db.session.delete(invite) + db.session.commit() + if invalid_invite: + flask.current_app.logger.warning( + "Invite with created_at = 0000-00-00 00:00:00 deleted." + ) + else: + flask.current_app.logger.debug("Invite deleted.") + except (OperationalError, SQLAlchemyError) as err: + errors[invite] = str(err) + flask.current_app.logger.exception(err) + db.session.rollback() + continue + except (OperationalError, SQLAlchemyError) as err: + flask.current_app.logger.exception(err) + raise + + for invite, error in errors.items(): + flask.current_app.logger.error(f"{invite} not deleted: {error}") + + +@click.command("quartely-usage") +@flask.cli.with_appcontext +def quarterly_usage(): + """ + Get the monthly usage for the units + Should be run on the 1st of Jan,Apr,Jul,Oct at around 00:01. + """ + + flask.current_app.logger.debug("Task: Collecting usage information from database.") + + # Imports + # Installed + import sqlalchemy + + # Own + from dds_web import db + from dds_web.database import models + from dds_web.utils import ( + current_time, + page_query, + # calculate_period_usage, + calculate_version_period_usage, + ) + + try: + # 1. Get projects where is_active = False + # .. a. Check if the versions are all time_deleted == time_invoiced + # .. b. Yes --> Set new column to True ("done") + flask.current_app.logger.info("Marking projects as 'done'....") + for unit, project in page_query( + db.session.query(models.Unit, models.Project) + .join(models.Project) + .filter(models.Project.is_active == False) + ): + # Get number of versions in project that have been fully included in usage calcs + num_done = ( + db.session.query(models.Project, models.Version) + .join(models.Version) + .filter( + sqlalchemy.and_( + models.Project.id == project.id, + models.Version.time_deleted == models.Version.time_invoiced, + ) + ) + .count() + ) + + # Check if there are any versions that are not fully included + # If not, project is done and should not be included in any more usage calculations in billing + if num_done == len(project.file_versions): + project.done = True + + db.session.commit() + + # 2. Get project where done = False + for unit, project in page_query( + db.session.query(models.Unit, models.Project) + .join(models.Project) + .filter(models.Project.done == False) + ): + project_byte_hours: int = 0 + for version in project.file_versions: + # Skipp deleted and already invoiced versions + if version.time_deleted == version.time_invoiced and [ + version.time_deleted, + version.time_invoiced, + ] != [None, None]: + continue + version_bhours = calculate_version_period_usage(version=version) + project_byte_hours += version_bhours + flask.current_app.logger.info( + f"Project {project.public_id} byte hours: {project_byte_hours}" + ) + + # Create a record in usage table + new_record = models.Usage( + project_id=project.id, + usage=project_byte_hours, + cost=0, + time_collected=current_time(), + ) + db.session.add(new_record) + db.session.commit() + + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + raise + + +@click.command("reporting-units-and-users") +@flask.cli.with_appcontext +def reporting_units_and_users(): + """ + At the start of every month, get number of units and users. + Should be run on the 1st of each month, at around 00:01. + """ + # Imports + # Installed + import flask_mail + + # Own + import dds_web.utils + from dds_web.database.models import Unit, UnitUser, ResearchUser, SuperAdmin, User, Reporting + + # Get current time + current_time = dds_web.utils.timestamp(ts_format="%Y-%m-%d") + + # Get email address + recipient: str = flask.current_app.config.get("MAIL_DDS") + error_subject: str = "[CRONJOB] Error during collection of DDS unit- and user statistics." + error_body: str = ( + f"The cronjob 'reporting' experienced issues. Please see logs. Time: {current_time}." + ) + + # New reporting row - numbers are automatically set + try: + unit_count = Unit.query.count() + researchuser_count = ResearchUser.query.count() + unituser_count = UnitUser.query.count() + superadmin_count = SuperAdmin.query.count() + total_user_count = User.query.count() + new_reporting_row = Reporting( + unit_count=unit_count, + researchuser_count=researchuser_count, + unituser_count=unituser_count, + superadmin_count=superadmin_count, + total_user_count=total_user_count, + ) + db.session.add(new_reporting_row) + db.session.commit() + except BaseException as err: # We want to know if there's any error + flask.current_app.logger.warning( + f"Exception raised during reporting cronjob. Preparing email. Error: {err}" + ) + # Send email about error + file_error_msg: flask_mail.Message = flask_mail.Message( + subject=error_subject, + recipients=[recipient], + body=error_body, + ) + dds_web.utils.send_email_with_retry(msg=file_error_msg) + raise + else: + flask.current_app.logger.info( + f"Unit- and user statistis collected successfully: {current_time}" + ) + + +@click.command("monitor-usage") +@flask.cli.with_appcontext +def monitor_usage(): + """ + Check the units storage usage and compare with chosen quota. + Should be run on the 1st of each month, at around 00:01. + """ + flask.current_app.logger.info("Starting: Checking unit quotas and usage...") + + # Imports + # Own + from dds_web.database import models + import dds_web.utils + + # Email settings + recipient: str = flask.current_app.config.get("MAIL_DDS") + default_subject: str = "DDS: Usage quota warning!" + + # Run task + for unit in models.Unit.query: + flask.current_app.logger.info(f"Checking quotas and usage for: {unit.name}") + + # Get info from database + quota: int = unit.quota + warn_after: int = unit.warning_level + current_usage: int = unit.size + + # Check if 0 and then skip the next steps + if not current_usage: + flask.current_app.logger.info( + f"{unit.name} usage: {current_usage} bytes. Skipping percentage calculation." + ) + continue + + # Calculate percentage of quota + perc_used = round((current_usage / quota) * 100, 3) + + # Information to log and potentially send + info_string: str = ( + f"- Quota:{quota} bytes\n" + f"- Warning level: {warn_after*quota} bytes ({warn_after}%)\n" + f"- Current usage: {current_usage} bytes ({perc_used}%)\n" + ) + flask.current_app.logger.debug( + f"Monitoring the usage for unit '{unit.name}' showed the following:\n" + info_string + ) + + # Email if the unit is using more + if perc_used > warn_after: + # Email settings + message: str = ( + "A SciLifeLab Unit is approaching the allocated data quota.\n" + f"Affected unit: {unit.name}\n" + f"{info_string}" + ) + flask.current_app.logger.info(message) + msg: flask_mail.Message = flask_mail.Message( + subject=default_subject, + recipients=[recipient], + body=message, + ) + dds_web.utils.send_email_with_retry(msg=msg) diff --git a/dds_web/database/models.py b/dds_web/database/models.py index 15a7ccc1f..87de85168 100644 --- a/dds_web/database/models.py +++ b/dds_web/database/models.py @@ -197,6 +197,8 @@ class Unit(db.Model): days_in_available = db.Column(db.Integer, unique=False, nullable=False, default=90) counter = db.Column(db.Integer, unique=False, nullable=True) days_in_expired = db.Column(db.Integer, unique=False, nullable=False, default=30) + quota = db.Column(db.BigInteger, unique=False, nullable=False) + warning_level = db.Column(db.Float, unique=False, nullable=False, default=0.8) # Relationships users = db.relationship("UnitUser", back_populates="unit") @@ -1049,3 +1051,20 @@ class Maintenance(db.Model): # Columns id = db.Column(db.Integer, primary_key=True, autoincrement=True) active = db.Column(db.Boolean, nullable=False, default=True) + + +class Reporting(db.Model): + """Keep track of number of users and units.""" + + # Table setup + __tablename__ = "reporting" + __table_args__ = {"extend_existing": True} + + # Columns + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + date = db.Column(db.DateTime(), unique=True, nullable=False, default=datetime.date.today) + unit_count = db.Column(db.Integer, unique=False, nullable=False) + researchuser_count = db.Column(db.Integer, unique=False, nullable=False) + unituser_count = db.Column(db.Integer, unique=False, nullable=False) + superadmin_count = db.Column(db.Integer, unique=False, nullable=False) + total_user_count = db.Column(db.Integer, unique=False, nullable=False) diff --git a/dds_web/development/db_init.py b/dds_web/development/db_init.py index 3595f935a..c04937ccf 100644 --- a/dds_web/development/db_init.py +++ b/dds_web/development/db_init.py @@ -90,6 +90,7 @@ def fill_db(): external_display_name="Unit 1 external", contact_email="support@example.com", internal_ref="someunit", + quota=10**9, # 1 GB safespring_endpoint=current_app.config.get("SAFESPRING_URL"), safespring_name=current_app.config.get("DDS_SAFESPRING_PROJECT"), safespring_access=current_app.config.get("DDS_SAFESPRING_ACCESS"), diff --git a/dds_web/scheduled_tasks.py b/dds_web/scheduled_tasks.py deleted file mode 100644 index 02c24ac55..000000000 --- a/dds_web/scheduled_tasks.py +++ /dev/null @@ -1,389 +0,0 @@ -from datetime import datetime, timedelta - -import flask_apscheduler -import flask - -from typing import Dict - -## Apscheduler -scheduler = flask_apscheduler.APScheduler() - - -@scheduler.task("cron", id="available_to_expired", hour=0, minute=1, misfire_grace_time=3600) -# @scheduler.task("interval", id="available_to_expired", seconds=15, misfire_grace_time=1) -def set_available_to_expired(): - scheduler.app.logger.debug("Task: Checking for Expiring projects.") - import sqlalchemy - - from dds_web import db - from dds_web.database import models - from dds_web.errors import DatabaseError - from dds_web.api.project import ProjectStatus - from dds_web.utils import current_time, page_query - - with scheduler.app.app_context(): - expire = ProjectStatus() - - errors = {} - - try: - for unit in db.session.query(models.Unit).with_for_update().all(): - errors[unit.name] = {} - - days_in_expired = unit.days_in_expired - - for project in page_query( - db.session.query(models.Project) - .filter( - sqlalchemy.and_( - models.Project.is_active == 1, models.Project.unit_id == unit.id - ) - ) - .with_for_update() - ): - - if ( - project.current_status == "Available" - and project.current_deadline <= current_time() - ): - scheduler.app.logger.debug("Handling expiring project") - scheduler.app.logger.debug( - "Project: %s has status %s and expires on: %s", - project.public_id, - project.current_status, - project.current_deadline, - ) - new_status_row = expire.expire_project( - project=project, - current_time=current_time(), - deadline_in=days_in_expired, - ) - - project.project_statuses.append(new_status_row) - - try: - db.session.commit() - scheduler.app.logger.debug( - "Project: %s has status Expired now!", project.public_id - ) - except ( - sqlalchemy.exc.OperationalError, - sqlalchemy.exc.SQLAlchemyError, - ) as err: - flask.current_app.logger.exception(err) - db.session.rollback() - errors[unit.name][project.public_id] = str(err) - continue - else: - scheduler.app.logger.debug( - "Nothing to do for Project: %s", project.public_id - ) - except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: - flask.current_app.logger.exception(err) - db.session.rollback() - raise - - for unit, projects in errors.items(): - if projects: - scheduler.app.logger.error( - f"Following projects of Unit '{unit}' encountered issues during expiration process:" - ) - for proj in errors[unit].keys(): - scheduler.app.logger.error(f"Error for project '{proj}': {errors[unit][proj]} ") - - -@scheduler.task("cron", id="expired_to_archived", hour=1, minute=1, misfire_grace_time=3600) -# @scheduler.task("interval", id="expired_to_archived", seconds=15, misfire_grace_time=1) -def set_expired_to_archived(): - """Search for expired projects whose deadlines are past and archive them""" - - scheduler.app.logger.debug("Task: Checking for projects to archive.") - - import sqlalchemy - from dds_web import db - from dds_web.database import models - from dds_web.errors import DatabaseError - from dds_web.utils import current_time, page_query - from dds_web.api.project import ProjectStatus - - with scheduler.app.app_context(): - - archive = ProjectStatus() - errors = {} - - try: - for unit in db.session.query(models.Unit).with_for_update().all(): - errors[unit.name] = {} - - for project in page_query( - db.session.query(models.Project) - .filter( - sqlalchemy.and_( - models.Project.is_active == 1, models.Project.unit_id == unit.id - ) - ) - .with_for_update() - ): - - if ( - project.current_status == "Expired" - and project.current_deadline <= current_time() - ): - scheduler.app.logger.debug("Handling project to archive") - scheduler.app.logger.debug( - "Project: %s has status %s and expired on: %s", - project.public_id, - project.current_status, - project.current_deadline, - ) - new_status_row, delete_message = archive.archive_project( - project=project, - current_time=current_time(), - ) - scheduler.app.logger.debug(delete_message.strip()) - project.project_statuses.append(new_status_row) - - try: - db.session.commit() - scheduler.app.logger.debug( - "Project: %s has status Archived now!", project.public_id - ) - except ( - sqlalchemy.exc.OperationalError, - sqlalchemy.exc.SQLAlchemyError, - ) as err: - scheduler.app.logger.exception(err) - db.session.rollback() - errors[unit.name][project.public_id] = str(err) - continue - else: - scheduler.app.logger.debug( - "Nothing to do for Project: %s", project.public_id - ) - except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: - scheduler.app.logger.exception(err) - db.session.rollback() - raise - - for unit, projects in errors.items(): - if projects: - scheduler.app.logger.error( - f"Following projects of Unit '{unit}' encountered issues during archival process:" - ) - for proj in errors[unit].keys(): - scheduler.app.logger.error(f"Error for project '{proj}': {errors[unit][proj]} ") - - -@scheduler.task("cron", id="delete_invite", hour=0, minute=1, misfire_grace_time=3600) -# @scheduler.task("interval", id="delete_invite", seconds=15, misfire_grace_time=1) - - -def delete_invite(): - """Delete invite older than a week""" - - scheduler.app.logger.debug("Task: Checking for invites to delete.") - - from sqlalchemy.exc import OperationalError, SQLAlchemyError - from dds_web import db - from dds_web.database import models - from dds_web.errors import DatabaseError - from dds_web.utils import current_time - - with scheduler.app.app_context(): - expiration: datetime.datetime = current_time() - errors: Dict = {} - - try: - invites: list = db.session.query(models.Invite).all() - for invite in invites: - invalid_invite = invite.created_at == "0000-00-00 00:00:00" - if invalid_invite or (invite.created_at + timedelta(weeks=1)) < expiration: - try: - db.session.delete(invite) - db.session.commit() - if invalid_invite: - scheduler.app.logger.warning( - "Invite with created_at = 0000-00-00 00:00:00 deleted." - ) - else: - scheduler.app.logger.debug("Invite deleted.") - except (OperationalError, SQLAlchemyError) as err: - errors[invite] = str(err) - scheduler.app.logger.exception(err) - db.session.rollback() - continue - except (OperationalError, SQLAlchemyError) as err: - scheduler.app.logger.exception(err) - raise - - for invite, error in errors.items(): - scheduler.app.logger.error(f"{invite} not deleted: {error}") - - -@scheduler.task( - "cron", id="get_quarterly_usage", month="Jan,Apr,Jul,Oct", day="1", hour=0, minute=1 -) -# @scheduler.task("interval", id="monthly_usage", seconds=60, misfire_grace_time=1) -def quarterly_usage(): - """Get the monthly usage for the units""" - - scheduler.app.logger.debug("Task: Collecting usage information from database.") - import sqlalchemy - - from dds_web import db - from dds_web.database import models - from dds_web.utils import ( - current_time, - page_query, - # calculate_period_usage, - calculate_version_period_usage, - ) - - with scheduler.app.app_context(): - try: - # 1. Get projects where is_active = False - # .. a. Check if the versions are all time_deleted == time_invoiced - # .. b. Yes --> Set new column to True ("done") - scheduler.app.logger.info("Marking projects as 'done'....") - for unit, project in page_query( - db.session.query(models.Unit, models.Project) - .join(models.Project) - .filter(models.Project.is_active == False) - ): - # Get number of versions in project that have been fully included in usage calcs - num_done = ( - db.session.query(models.Project, models.Version) - .join(models.Version) - .filter( - sqlalchemy.and_( - models.Project.id == project.id, - models.Version.time_deleted == models.Version.time_invoiced, - ) - ) - .count() - ) - - # Check if there are any versions that are not fully included - # If not, project is done and should not be included in any more usage calculations in billing - if num_done == len(project.file_versions): - project.done = True - - db.session.commit() - - # 2. Get project where done = False - for unit, project in page_query( - db.session.query(models.Unit, models.Project) - .join(models.Project) - .filter(models.Project.done == False) - ): - project_byte_hours: int = 0 - for version in project.file_versions: - # Skipp deleted and already invoiced versions - if version.time_deleted == version.time_invoiced and [ - version.time_deleted, - version.time_invoiced, - ] != [None, None]: - continue - version_bhours = calculate_version_period_usage(version=version) - project_byte_hours += version_bhours - scheduler.app.logger.info( - f"Project {project.public_id} byte hours: {project_byte_hours}" - ) - - # Create a record in usage table - new_record = models.Usage( - project_id=project.id, - usage=project_byte_hours, - cost=0, - time_collected=current_time(), - ) - db.session.add(new_record) - db.session.commit() - - except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: - flask.current_app.logger.exception(err) - db.session.rollback() - raise - - -# @scheduler.task("interval", id="reporting", seconds=30, misfire_grace_time=1) -@scheduler.task("cron", id="reporting", day="1", hour=0, minute=1) -def reporting_units_and_users(): - """At the start of every month, get number of units and users.""" - # Imports - import csv - import flask_mail - import flask_sqlalchemy - import pathlib - from dds_web import errors, utils - from dds_web.database.models import User, Unit - - # Get current date - current_date: str = utils.timestamp(ts_format="%Y-%m-%d") - - # Location of reporting file - reporting_file: pathlib.Path = pathlib.Path("/code/doc/reporting/dds-reporting.csv") - - # Error default - error: str = None - - # App context required - with scheduler.app.app_context(): - # Get email address - recipient: str = scheduler.app.config.get("MAIL_DDS") - default_subject: str = "DDS Unit / User report" - default_body: str = f"This email contains the DDS unit- and user statistics. The data was collected on: {current_date}." - error_subject: str = f"Error in {default_subject}" - error_body: str = "The cronjob 'reporting' experienced issues" - - # Get units and count them - units: flask_sqlalchemy.BaseQuery = Unit.query - num_units: int = units.count() - - # Count users - users: flask_sqlalchemy.BaseQuery = User.query - num_users_total: int = users.count() # All users - num_superadmins: int = users.filter_by(type="superadmin").count() # Super Admins - num_unit_users: int = users.filter_by(type="unituser").count() # Unit Admins / Personnel - num_researchers: int = users.filter_by(type="researchuser").count() # Researchers - num_users_excl_superadmins: int = num_users_total - num_superadmins - - # Verify that sum is correct - if sum([num_superadmins, num_unit_users, num_researchers]) != num_users_total: - error: str = "Sum of number of users incorrect." - # Define csv file and verify that it exists - elif not reporting_file.exists(): - error: str = "Could not find the csv file." - - if error: - # Send email about error - file_error_msg: flask_mail.Message = flask_mail.Message( - subject=error_subject, - recipients=[recipient], - body=f"{error_body}: {error}", - ) - utils.send_email_with_retry(msg=file_error_msg) - raise Exception(error) - - # Add row with new info - with reporting_file.open(mode="a") as repfile: - writer = csv.writer(repfile) - writer.writerow( - [ - current_date, - num_units, - num_researchers, - num_unit_users, - num_users_excl_superadmins, - ] - ) - - # Create email - msg: flask_mail.Message = flask_mail.Message( - subject=default_subject, - recipients=[recipient], - body=default_body, - ) - with reporting_file.open(mode="r") as file: # Attach file - msg.attach(filename=reporting_file.name, content_type="text/csv", data=file.read()) - utils.send_email_with_retry(msg=msg) # Send diff --git a/dds_web/templates/troubleshooting.html b/dds_web/templates/troubleshooting.html index cbac0bfde..97a2ded13 100644 --- a/dds_web/templates/troubleshooting.html +++ b/dds_web/templates/troubleshooting.html @@ -11,7 +11,7 @@

Experiencing issues with the DDS?

Did the troubleshooting document not help?

Please go through the following steps:

    -
  1. If the issue is related to the CLI, verify that you have the latest version installed: Open a terminal window, run dds --version and compare the displayed version to the latest one. The latest version can be found at PyPi.
  2. +
  3. If the issue is related to the CLI, verify that you have the latest version installed: Open a terminal window, run dds --version and compare the displayed version to the latest one. The latest version can be found at PyPI.
  4. Look at the CLI documentation here.
  5. Read through the sections in the technical overview relevant to your issue and check the topics below for possible solutions.
diff --git a/dds_web/version.py b/dds_web/version.py index 62fa04d70..a7ecb802b 100644 --- a/dds_web/version.py +++ b/dds_web/version.py @@ -1 +1 @@ -__version__ = "2.2.4" +__version__ = "2.2.6" diff --git a/doc/architecture/decisions/0019-define-and-run-cronjobs-via-kubernetes.md b/doc/architecture/decisions/0019-define-and-run-cronjobs-via-kubernetes.md new file mode 100644 index 000000000..41c4e969a --- /dev/null +++ b/doc/architecture/decisions/0019-define-and-run-cronjobs-via-kubernetes.md @@ -0,0 +1,19 @@ +# 19. Define and run cronjobs via Kubernetes + +Date: 2023-01-19 + +## Status + +Accepted + +## Context + +Prior to this decision, the DDS has defined the cronjobs in a module within the app, and they have been run via Flask-APScheduler. This worked without any issues for a while, but when the deployment setup on the cluster was scaled up to 2 replicas, we started getting errors during for example the project status transitions which are run every day at midnight. After some investigating we noticed that the issues are due to there being two replicas with two worker processes each, giving a total of four cronjobs which run simultaneously. I order to avoid these issues now and in the future, we are now defining the current cronjobs as Flask commands and setting up the cronjobs in k8s. + +## Decision + +Change the current cronjobs to Flask commands. Set up k8s cronjobs to run the Flask commands. All future cronjobs should be configured in this way; There should be no cronjobs defined within the DDS app. + +## Consequences + +The cronjobs defined in k8s will start up a new pod at a certain time, run the specific command, and then shut down the pod. Therefore, there will be more than 2 pods whenever a cronjob is run, and each cronjob will only be executed once, resulting in the avoidance of the deadlocks and duplicate errors that we are currently experiencing. diff --git a/doc/architecture/decisions/0020-improve-pypi-package-security-with-2fa.md b/doc/architecture/decisions/0020-improve-pypi-package-security-with-2fa.md new file mode 100644 index 000000000..171f4bdf2 --- /dev/null +++ b/doc/architecture/decisions/0020-improve-pypi-package-security-with-2fa.md @@ -0,0 +1,110 @@ +# 20. Improve PyPI package security with 2FA + +Date: 2023-01-23 + +## Status + +Accepted + +## Context + +During the threat- and risk assessment of the DDS, the following possible threat was discovered regarding the CLI: + +| Threat | Cause | Consequence | +| ------------------------------------------ | -------------------------- | ------------------------------- | +| An unauthorized (by us) change in the code | The change is not detected | Sensitive data is not encrypted | + +In order to mitigate this threat, we decided to investigate the following possible improvements: + +- Provide a checksum for the package published on PyPI and recommend that the users installing `dds-cli` also verify the integrity of the package +- Cryptographically sign the package published on PyPI and allow users installing `dds-cli` to verify the packages origin + +Both generating the checksum and signing the package needs to occur prior to publishing the CLI to PyPI. + +### Providing a checksum + +- Hashes are automatically generated and uploaded to PyPI upon publishing packages. These hashes are available on the ["Download files" page](https://pypi.org/project/dds-cli/#files) + + - You can view the hashes by clicking "view hashes", either in the _Source Distribution_ or _Built Distribution_ section + - The hashes are also available via the PyPI API + +- Installing `dds-cli` with `pip install dds-cli` (as the documentation currently states) followed by generating the hash with e.g. `pip hash ` does not generate a checksum matching the hash displayed on PyPI. + + ```bash + # SHA256 hash for dds_cli-2.2.5-py3-none-any.whl: 8ba6495b73d759e96c35652273cf4e4158acba02f1cf64f012cc67cf2e346cae + + # 1. Install dds-cli + pip install dds-cli + + # 2. Get path to dds command + which dds + # /home//dds-venv/bin/dds + + # 3. Generate checksum for dds + pip hash /home//dds-venv/bin/dds + # /home//dds-venv/bin/dds: + # --hash=sha256:88dd1285dacb2c2bcf314aec2c940a774c801167a26e5f93f90c649fbed2e9a0 + ``` + +- Downloading `dds-cli` with `pip download dds-cli` downloads the `whl` distribution files for _all_ the `dds-cli` requirements. An additional `--dest` option to specify the destination of all files would be needed in the command. The checksum verification is performed on the whl distribution file. An installation command after the download is required in order for the package to be usable: `pip install .`. + + ```bash + # SHA256 hash for dds_cli-2.2.5-py3-none-any.whl: 8ba6495b73d759e96c35652273cf4e4158acba02f1cf64f012cc67cf2e346cae + + # 1. Download dds-cli + pip download dds-cli --dest dds-downloaded + + # 2. Generate checksum for dds whl + pip hash dds-downloaded/dds_cli-2.2.5-py3-none-any.whl + # dds_cli-2.2.5-py3-none-any.whl: + # --hash=sha256:8ba6495b73d759e96c35652273cf4e4158acba02f1cf64f012cc67cf2e346cae + + # 3. Verify that the hashes match + if [ "" = "" ]; then echo "Package integrity verified"; else echo "Package compromised!"; fi + # Package integrity verified + + # 4. Install the dds-cli + pip install dds-downloaded/dds_cli-2.2.5-py3-none-any.whl + ``` + + - Downloading the package (step 1) via the browser is possible, the following steps (2-4) are only possible via the terminal. + +- The main principal of adding the hashes is that we should not blindly trust third party software. + + - Regarding PyPI: We do not ourselves generate the hashes available on PyPI - PyPI does. Therefore, verifying the hashes cannot guarantee that what we intend to publish is installed by the users. + - Regarding dependencies: If the CLI is installed via `pip install dds-cli`, there's a file with hashes for each package. If it's installed by first downloading with `pip download dds-cli`, all dependency `whl` distribution files are also downloaded. These package checksums could be verified by creating- and running a custom shell- or python script. + - It's possible to require matching hashes from all dependencies. This blocks the installation if a package either does not have a hash, or the hash does not match. + +- Many users are not familiar with (or happy about) using the terminal and a high priority item in our backlog is to implement a UI (e.g. web interface) allowing the users to collect their data without needing to execute shell commands. Recommending additional commands prior to running `dds` will not only be unpopular but also lead to an increase in support tickets to both us and the units. + +### Cryptographically sign package + +- The development version of `dds-cli` is automatically published to TestPyPI when a PR is created or when there's a change in the `dev` branch. The production version is (also automatically) published to PyPI when a new release is made. +- The publishing of the package is handled by the GitHub Action [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish). This runs `twine` which is what runs the publication to PyPI / TestPyPI. Twine itself has the `--sign` option which tells twine to sign the package prior to publishing it. At this time, it appears the GitHub Action used does not support this. This is likely due to that the functionality is discouraged and barely usable: https://github.com/pypa/gh-action-pypi-publish/discussions/67. +- Despite being discouraged, it's technically possible to sign the package; You can generate the signature prior to the upload and place it next to the distribution. This will upload both the package and the signature. See the link above. +- If one was to sign the package, it would not be available on PyPI via the browser. It is, however, possible to collect them via the PyPI API. +- There is no proper tool for working with signatures for PyPI, and during the investigation to find a solution for this we have not found an example, clear suggestion or instructions on how to do it. We would therefore need to create our own solution. +- Signatures on PyPI appear to be very rare, likely due to the points above. We have asked within Data Centre, NBIS and systems developers in general, and also done extensive research; No one seems to have experience with this, or know how to do do it. + +## Decision + +- We will _not_ change the recommended installation procedure from `pip install dds-cli` to `pip download dds-cli`, followed by hash verification. +- We _will_ add instructions to the documentation showing how you can verify the package integrity, _if they want to_. +- We will _not_ require hashes for all package dependencies; Not all packages provide hashes and those that do have stored them in varying files and formats, thus not all hashes will be found or recognized. +- We will _not_ spend time on signing the `dds-cli` package prior to publishing it to PyPI; We should not implement any functionality which is barely used, PyPI itself does not support in a good way, there are no proper available tools for, are marked as discouraged and will possibly be phased out. +- We _will_ secure our PyPI account by activating 2FA. + +## Consequences + +- Users will have the option to choose which installation method they want to use and if they want to verify the package integrity prior to running it. +- 2FA will reduce the risk of a breach and thereby prevent an unauthorized entity creating a new API key on our account. A new API key would allow the unauthorized entity to impersonate the SciLifeLab Data Centre and publish a new, fake, and possibly harmful version of the CLI. + +## Relevant links + +- https://pypi.org/project/dds-cli/#files +- https://peps.python.org/pep-0541/ +- https://github.com/pypa/gh-action-pypi-publish/discussions/67 +- https://security.stackexchange.com/questions/79326/which-security-measures-does-pypi-and-similar-third-party-software-repositories +- https://pip.pypa.io/en/stable/topics/secure-installs/ +- https://security.stackexchange.com/questions/175425/pip-verify-packet-integrity +- https://github.com/pypi/warehouse/issues/3356 diff --git a/doc/procedures/new_release.md b/doc/procedures/new_release.md new file mode 100644 index 000000000..5be8fc310 --- /dev/null +++ b/doc/procedures/new_release.md @@ -0,0 +1,88 @@ +# How to create a new release + +**Is the release planned for the next cluster maintenance?** Please read point 8 first. + +1. Confirm that the development instance works + + 1. _In general_, e.g. that it's up and running + 2. _Specific feature has been added or changed:_ Confirm that it also works in the development instance + 3. _The change is in the API:_ Confirm that the development instance works together with the CLI + +2. Fork a new branch from `dev` +3. Update the version in [`version.py`](../../dds_web/version.py) + + - _Minor changes, e.g. bug fix_: Minor version upgrade, e.g. `1.0.1 --> 1.0.2` + - _Small changes, e.g. new feature_: Mid version upgrade, e.g. `1.1.0 --> 1.2.0` + - _Breaking changes or large new feature(s)_: Major version upgrade, e.g. `1.0.0 --> 2.0.0` + + > Will break if CLI version not bumped as well + +4. Push version change to branch +5. Create a new PR from `` to `dev` + + Wait for approval and merge by Product Owner or admin. + +6. Create a PR from `dev` to `master` + + - _Backward compatibility:_ Check whether or not the dds_cli master branch works with the code in the PR. There’s an item in the PR comment regarding this; note if the dds_web changes work with the previous version of the dds_cli. If something might break - give detailed information about what. **This information should also be included in the MOTD.** + - All changes should be approved in the PRs to dev so reviewing the changes a second time in this PR is not necessary. Instead, the team should look through the code just to see if something looks weird. + - All sections and checks in the PR template should be filled in and checked. Follow the instruction in the PR description field. + - There should be at least one approval of the PR. + - _Everything looks ok and there's at least one approval?_ Merge it. + +7. [Draft a new release](https://github.com/ScilifelabDataCentre/dds_web/releases) + + 1. `Choose a tag` → `Find or create a new tag` → Fill in the new version, e.g. if the new version is `1.0.0`, you should fill in `v1.0.0`. + 2. `Target` should be set to `master` + 3. `Release title` field should be set to the same as the tag, e.g. `v1.0.0` + 4. `Write` → `Generate release notes`. + + You can also fill in something to describe what has been changed in this release, if you feel that the auto-generated release notes are missing something etc. + + 5. `Publish release`. + + An image of the web / api will be published to the [GitHub Container Registry](https://codefresh.io/csdp-docs/docs/integrations/container-registries/github-cr/) + +8. Perform redeployment + + The method for this _depends on the situation_ / size of and reason for the upgrade. + + - **Bug**, affecting the DDS functionality - Fix ASAP + + 1. Add a new _Message of the Day_ (MOTD) informing the users of an ongoing / immediate update - (see CLI) + 2. Send the MOTD via email (see CLI) + 3. Send a message in the `dds-status` slack channel to inform the units + 4. Ask for a redeployment + + 1. Go to the [sysadmin repository](https://github.com/ScilifelabDataCentre/sysadmin/issues) + 2. Create a new issue and fill in the following information + + `Title` + + DDS: Redeploy the production instance (`dds`) + + `Leave a comment` + + Please redeploy the production instance of the DDS. + Image: + + Fill in the [manual log](https://scilifelab.atlassian.net/wiki/spaces/deliveryportal/pages/2318565390/Production) on Confluence. + + - **New feature** or bug that does not need an immediate fix + + Cluster maintenance is performed the first Wednesday (that is a work day) of every month. The DDS is redeployed during this as well. However, we still need to inform the users of the upcoming upgrade, and the sysadmins of which image they should deploy. + + 1. Go to the [sysadmin repository](https://github.com/ScilifelabDataCentre/sysadmin/issues) + 2. Create a new issue and fill in the following information + + `Title` + + DDS: Schedule redeployment of production instance (`dds`) for next cluster maintenance window + + `Leave a comment` + + During the next cluster maintenance (``), please redeploy the production instance of the DDS. + + Image: + + Fill in the [manual log](https://scilifelab.atlassian.net/wiki/spaces/deliveryportal/pages/2318565390/Production) on Confluence. diff --git a/doc/procedures/update_dev_instance.md b/doc/procedures/update_dev_instance.md new file mode 100644 index 000000000..f179f2e6d --- /dev/null +++ b/doc/procedures/update_dev_instance.md @@ -0,0 +1,24 @@ +# How to update the development instance + +A new feature should always be tested on the development instance of the DDS **before** it's released in production. When a new feature is added and pushed to the `dev` branch, the development instance (`dds-dev`) should be redeployed. + +Redeployments of the development instance can be done at any time. No planning required. + +1. Merge change into `dev`. A GitHub action publishes the image to GHCR. +2. Perform redeployment of dev instance + + - A member of the team has access to the development deployment: They can do the redeployment themselves. This is only OK in the development instance. + - No team members have access to the deployment / they are not available: + + 1. Go to the [sysadmin repository](https://github.com/ScilifelabDataCentre/sysadmin/issues) + 2. Create a new issue and fill in the following information + + `Title` + + DDS: Redeploy the development instance (`dds-dev`) + + `Leave a comment` + + Please redeploy the dev instance of the DDS. + + Fill in the [manual log](https://scilifelab.atlassian.net/wiki/spaces/deliveryportal/pages/2318729217/Development+Test) on Confluence. diff --git a/doc/reporting/dds-reporting.csv b/doc/reporting/dds-reporting.csv index 0f9c928b2..051c72e66 100644 --- a/doc/reporting/dds-reporting.csv +++ b/doc/reporting/dds-reporting.csv @@ -1,3 +1,3 @@ Date,Units using DDS in production,Researchers,Unit users,Total number of users 2022-11-30,2,108,11,119 -2023-01-02,2,126,11,137 \ No newline at end of file +2023-01-02,2,126,11,137 diff --git a/migrations/versions/0c9c237cced5_unit_quota_and_warning.py b/migrations/versions/0c9c237cced5_unit_quota_and_warning.py new file mode 100644 index 000000000..f144bafaa --- /dev/null +++ b/migrations/versions/0c9c237cced5_unit_quota_and_warning.py @@ -0,0 +1,38 @@ +"""unit_quota_and_warning + +Revision ID: 0c9c237cced5 +Revises: eb395af90e18 +Create Date: 2023-01-10 14:30:57.089391 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +# revision identifiers, used by Alembic. +revision = "0c9c237cced5" +down_revision = "eb395af90e18" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + # Add new columns + op.add_column("units", sa.Column("quota", sa.BigInteger(), nullable=False)) + op.add_column("units", sa.Column("warning_level", sa.Float(), nullable=False, default=0.80)) + + # Update existing columns + # 1. Load table - need to load columns in order to use + unit_table = sa.sql.table("units", sa.sql.column("quota", mysql.BIGINT)) + # 2. Update column value - set value to 100 TB + op.execute(unit_table.update().values(quota=100 * (10**12))) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("units", "warning_level") + op.drop_column("units", "quota") + # ### end Alembic commands ### diff --git a/migrations/versions/b976f6cda95c_add_reporting_table.py b/migrations/versions/b976f6cda95c_add_reporting_table.py new file mode 100644 index 000000000..b6f7dffd6 --- /dev/null +++ b/migrations/versions/b976f6cda95c_add_reporting_table.py @@ -0,0 +1,39 @@ +"""add-reporting-table + +Revision ID: b976f6cda95c +Revises: 0c9c237cced5 +Create Date: 2023-01-26 10:02:37.535695 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +# revision identifiers, used by Alembic. +revision = "b976f6cda95c" +down_revision = "0c9c237cced5" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "reporting", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("date", sa.DateTime(), nullable=False), + sa.Column("unit_count", sa.Integer(), nullable=False), + sa.Column("researchuser_count", sa.Integer(), nullable=False), + sa.Column("unituser_count", sa.Integer(), nullable=False), + sa.Column("superadmin_count", sa.Integer(), nullable=False), + sa.Column("total_user_count", sa.Integer(), nullable=False), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("date"), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("reporting") + # ### end Alembic commands ### diff --git a/requirements.txt b/requirements.txt index 0b7cdaa9a..9881a6561 100644 --- a/requirements.txt +++ b/requirements.txt @@ -65,4 +65,4 @@ urllib3==1.26.8 visitor==0.1.3 Werkzeug==2.0.3 wrapt==1.13.3 -WTForms==3.0.1 +WTForms==3.0.1 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 550868c5c..927a84866 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ import boto3 from requests_mock.mocker import Mocker import requests_cache +import click # Own from dds_web.database.models import ( @@ -119,6 +120,7 @@ def demo_data(): external_display_name="Display Name", contact_email="support@example.com", internal_ref="someunit", + quota=10**9, safespring_endpoint="endpoint", safespring_name="dds.example.com", safespring_access="access", @@ -130,6 +132,7 @@ def demo_data(): external_display_name="Retraction guaranteed", contact_email="tloteg@mailtrap.io", internal_ref="Unit to test user deletion", + quota=10**9, safespring_endpoint="endpoint", safespring_name="dds.example.com", safespring_access="access", @@ -538,3 +541,22 @@ def disable_requests_cache(): """ with unittest.mock.patch("requests_cache.CachedSession", requests.Session): yield + + +@pytest.fixture +def runner() -> click.testing.CliRunner: + return click.testing.CliRunner() + + +@pytest.fixture() +def cli_test_app(): + from dds_web import create_app + from tests import conftest + + cli_test_app = create_app(testing=True, database_uri=DATABASE_URI) + yield cli_test_app + + +@pytest.fixture() +def cli_runner(cli_test_app): + return cli_test_app.test_cli_runner() diff --git a/tests/docker-compose-test.yml b/tests/docker-compose-test.yml index 1394c1026..edb9cc1bf 100644 --- a/tests/docker-compose-test.yml +++ b/tests/docker-compose-test.yml @@ -9,7 +9,7 @@ services: dockerfile: Dockerfiles/backend.Dockerfile context: ./ target: test - command: sh -c "COVERAGE_FILE=./coverage/.coverage pytest --color=yes $DDS_PYTEST_ARGS --cov=./dds_web --cov-report=xml:coverage/report.xml" + command: sh -c "COVERAGE_FILE=./coverage/.coverage pytest -s -color=yes $DDS_PYTEST_ARGS --cov=./dds_web --cov-report=xml:coverage/report.xml" restart: "no" volumes: - type: bind diff --git a/tests/requirements-test.txt b/tests/requirements-test.txt index 612258222..4453da88d 100644 --- a/tests/requirements-test.txt +++ b/tests/requirements-test.txt @@ -5,4 +5,5 @@ requests-mock==1.9.3 sqlalchemy-utils==0.38.2 pytest-mock==3.7.0 pyfakefs==4.5.5 -requests_cache==0.9.4 \ No newline at end of file +requests_cache==0.9.4 +testfixtures==7.0.4 \ No newline at end of file diff --git a/tests/test_commands.py b/tests/test_commands.py new file mode 100644 index 000000000..1fb4e74fa --- /dev/null +++ b/tests/test_commands.py @@ -0,0 +1,467 @@ +# Imports + +# Standard +import typing +from unittest import mock +from unittest.mock import patch +from unittest.mock import PropertyMock +from unittest.mock import MagicMock +import os +import pytest +from _pytest.logging import LogCaptureFixture +import logging +from datetime import datetime, timedelta +import pathlib +import csv + +# Installed +import click +from pyfakefs.fake_filesystem import FakeFilesystem +import flask_mail +import freezegun + +# Own +from dds_web.commands import ( + fill_db_wrapper, + create_new_unit, + update_uploaded_file_with_log, + monitor_usage, + set_available_to_expired, + set_expired_to_archived, + delete_invites, + quarterly_usage, + reporting_units_and_users, +) +from dds_web.database import models +from dds_web import db +from dds_web.utils import current_time + +# Tools + + +def mock_commit(): + return + + +def mock_no_project(): + return None + + +def mock_unit_size(): + return 100 + + +# fill_db_wrapper + + +def test_fill_db_wrapper_production(client, runner) -> None: + """Run init-db with the production argument.""" + result: click.testing.Result = runner.invoke(fill_db_wrapper, ["production"]) + assert result.exit_code == 1 + + +def test_fill_db_wrapper_devsmall(client, runner) -> None: + """Run init-db with the dev-small argument.""" + result: click.testing.Result = runner.invoke(fill_db_wrapper, ["dev-small"]) + assert result.exit_code == 1 + + +# def test_fill_db_wrapper_devbig(client, runner) -> None: +# """Run init-db with the dev-big argument.""" +# result: click.testing.Result = runner.invoke(fill_db_wrapper, ["dev-big"]) +# assert result.exit_code == 1 + +# create_new_unit + + +def create_command_options_from_dict(options: typing.Dict) -> typing.List: + """Create a list with options and values from a dict.""" + # Create command options + command_options: typing.List = [] + for key, val in options.items(): + command_options.append(f"--{key}") + command_options.append(val) + + return command_options + + +correct_unit: typing.Dict = { + "name": "newname", + "public_id": "newpublicid", + "external_display_name": "newexternaldisplay", + "contact_email": "newcontact@mail.com", + "internal_ref": "newinternalref", + "safespring_endpoint": "newsafespringendpoint", + "safespring_name": "newsafespringname", + "safespring_access": "newsafespringaccess", + "safespring_secret": "newsafespringsecret", + "days_in_available": 45, + "days_in_expired": 15, +} + + +def test_create_new_unit_public_id_too_long(client, runner) -> None: + """Create new unit, public_id too long.""" + # Change public_id + incorrect_unit: typing.Dict = correct_unit.copy() + incorrect_unit["public_id"] = "public" * 10 + + # Get command options + command_options = create_command_options_from_dict(options=incorrect_unit) + + # Run command + result: click.testing.Result = runner.invoke(create_new_unit, command_options) + # assert "The 'public_id' can be a maximum of 50 characters" in result.output + assert ( + not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() + ) + + +def test_create_new_unit_public_id_incorrect_characters(client, runner) -> None: + """Create new unit, public_id has invalid characters (here _).""" + # Change public_id + incorrect_unit: typing.Dict = correct_unit.copy() + incorrect_unit["public_id"] = "new_public_id" + + # Get command options + command_options = create_command_options_from_dict(options=incorrect_unit) + + # Run command + result: click.testing.Result = runner.invoke(create_new_unit, command_options) + # assert "The 'public_id' can only contain letters, numbers, dots (.) and hyphens (-)." in result.output + assert ( + not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() + ) + + +def test_create_new_unit_public_id_starts_with_dot(client, runner) -> None: + """Create new unit, public_id starts with invalid character (. or -).""" + # Change public_id + incorrect_unit: typing.Dict = correct_unit.copy() + incorrect_unit["public_id"] = ".newpublicid" + + # Get command options + command_options = create_command_options_from_dict(options=incorrect_unit) + + # Run command + result: click.testing.Result = runner.invoke(create_new_unit, command_options) + # assert "The 'public_id' must begin with a letter or number." in result.output + assert ( + not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() + ) + + # Change public_id again + incorrect_unit["public_id"] = "-newpublicid" + + # Get command options + command_options = create_command_options_from_dict(options=incorrect_unit) + + # Run command + result: click.testing.Result = runner.invoke(create_new_unit, command_options) + # assert "The 'public_id' must begin with a letter or number." in result.output + assert ( + not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() + ) + + +def test_create_new_unit_public_id_too_many_dots(client, runner) -> None: + """Create new unit, public_id has invalid number of dots.""" + # Change public_id + incorrect_unit: typing.Dict = correct_unit.copy() + incorrect_unit["public_id"] = "new.public..id" + + # Get command options + command_options = create_command_options_from_dict(options=incorrect_unit) + + # Run command + result: click.testing.Result = runner.invoke(create_new_unit, command_options) + # assert "The 'public_id' should not contain more than two dots." in result.output + assert ( + not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() + ) + + +def test_create_new_unit_public_id_invalid_start(client, runner) -> None: + """Create new unit, public_id starts with prefix.""" + # Change public_id + incorrect_unit: typing.Dict = correct_unit.copy() + incorrect_unit["public_id"] = "xn--newpublicid" + + # Get command options + command_options = create_command_options_from_dict(options=incorrect_unit) + + # Run command + result: click.testing.Result = runner.invoke(create_new_unit, command_options) + # assert "The 'public_id' cannot begin with the 'xn--' prefix." in result.output + assert ( + not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() + ) + + +def test_create_new_unit_success(client, runner) -> None: + """Create new unit, public_id starts with prefix.""" + # Get command options + command_options = create_command_options_from_dict(options=correct_unit) + + with patch("dds_web.db.session.commit", mock_commit): + # Run command + result: click.testing.Result = runner.invoke(create_new_unit, command_options) + # assert f"Unit '{correct_unit['name']}' created" in result.output + + +# update_uploaded_file_with_log + + +def test_update_uploaded_file_with_log_nonexisting_project(client, runner) -> None: + """Add file info to non existing project.""" + # Create command options + command_options: typing.List = [ + "--project", + "projectdoesntexist", + "--path-to-log-file", + "somefile", + ] + + # Run command + assert db.session.query(models.Project).all() + with patch("dds_web.database.models.Project.query.filter_by", mock_no_project): + result: click.testing.Result = runner.invoke(update_uploaded_file_with_log, command_options) + assert result.exit_code == 1 + + +def test_update_uploaded_file_with_log_nonexisting_file(client, runner, fs: FakeFilesystem) -> None: + """Attempt to read file which does not exist.""" + # Verify that fake file does not exist + non_existent_log_file: str = "this_is_not_a_file.json" + assert not os.path.exists(non_existent_log_file) + + # Create command options + command_options: typing.List = [ + "--project", + "projectdoesntexist", + "--path-to-log-file", + non_existent_log_file, + ] + + # Run command + result: click.testing.Result = runner.invoke(update_uploaded_file_with_log, command_options) + assert result.exit_code == 1 + + +# monitor_usage + +# usage = 0 --> check log +def test_monitor_usage_no_usage(client, cli_runner, capfd): + """If a unit has no uploaded data, there's no need to do the calculations or send email warning.""" + # Mock the size property of the Unit table + with patch("dds_web.database.models.Unit.size", new_callable=PropertyMock) as mock_size: + mock_size.return_value = 0 # Test size = 0 + # Mock emails - only check if function call + with patch.object(flask_mail.Mail, "send") as mock_mail_send: + # Run command + _: click.testing.Result = cli_runner.invoke(monitor_usage) + # Verify no email has been sent and stoud contains logging info + assert mock_mail_send.call_count == 0 + # Logging ends up in stderr + _, err = capfd.readouterr() + for unit in models.Unit.query.all(): + assert f"{unit.name} usage: 0 bytes. Skipping percentage calculation." in err + + +# percentage below warning level --> check log + no email +def test_monitor_usage_no_email(client, cli_runner, capfd): + """No email should be sent if the usage is below the warning level.""" + # Mock the size property of the Unit table + with patch("dds_web.database.models.Unit.size", new_callable=PropertyMock) as mock_size: + mock_size.return_value = 1 + # Mock emails - only check if function call + with patch.object(flask_mail.Mail, "send") as mock_mail_send: + # Run command + _: click.testing.Result = cli_runner.invoke(monitor_usage) + # Verify no email has been sent and stoud contains logging info + assert mock_mail_send.call_count == 0 + # Logging ends up in stderr + _, err = capfd.readouterr() + for unit in models.Unit.query.all(): + assert f"Monitoring the usage for unit '{unit.name}' showed the following:\n" in err + + +# percentage above warning level --> check log + email sent +def test_monitor_usage_warning_sent(client, cli_runner, capfd): + """An email should be sent if the usage is above the warning level.""" + for unit in models.Unit.query.all(): + unit_quota: int = unit.quota + unit_warning_level: float = unit.warning_level + max_level: float = unit_quota * unit_warning_level + + with patch("dds_web.database.models.Unit.size", new_callable=PropertyMock) as mock_size: + mock_size.return_value = max_level + 100 + with patch.object(flask_mail.Mail, "send") as mock_mail_send: + result: click.testing.Result = cli_runner.invoke(monitor_usage) + assert mock_mail_send.call_count == 2 # 2 because client and cli_runner both run + _, err = capfd.readouterr() + assert ( + f"A SciLifeLab Unit is approaching the allocated data quota.\nAffected unit: {unit.name}\n" + in err + ) + + +# set_available_to_expired + + +def test_set_available_to_expired(client, cli_runner): + units: List = db.session.query(models.Unit).all() + # Set project statuses to Available + # and deadline to now to be able to test cronjob functionality + for unit in units: + for project in unit.projects: + for status in project.project_statuses: + status.deadline = current_time() - timedelta(weeks=1) + status.status = "Available" + + i: int = 0 + for unit in units: + i += len( + [ + project + for project in unit.projects + if project.current_status == "Available" + and project.current_deadline <= current_time() + ] + ) + assert i == 6 + + cli_runner.invoke(set_available_to_expired) + + units: List = db.session.query(models.Unit).all() + + i: int = 0 + j: int = 0 + for unit in units: + i += len([project for project in unit.projects if project.current_status == "Available"]) + j += len([project for project in unit.projects if project.current_status == "Expired"]) + + assert i == 0 + assert j == 6 + + +# set_expired_to_archived + + +@mock.patch("boto3.session.Session") +def test_set_expired_to_archived(_: MagicMock, client, cli_runner): + units: List = db.session.query(models.Unit).all() + + for unit in units: + for project in unit.projects: + for status in project.project_statuses: + status.deadline = current_time() - timedelta(weeks=1) + status.status = "Expired" + + i: int = 0 + for unit in units: + i += len([project for project in unit.projects if project.current_status == "Expired"]) + assert i == 6 + + cli_runner.invoke(set_expired_to_archived) + + units: List = db.session.query(models.Unit).all() + + i: int = 0 + j: int = 0 + for unit in units: + i += len([project for project in unit.projects if project.current_status == "Expired"]) + j += len([project for project in unit.projects if project.current_status == "Archived"]) + + assert i == 0 + assert j == 6 + + +# delete invites + + +def test_delete_invite(client, cli_runner): + assert len(db.session.query(models.Invite).all()) == 2 + cli_runner.invoke(delete_invites) + assert len(db.session.query(models.Invite).all()) == 1 + + +def test_delete_invite_timestamp_issue(client, cli_runner): + """Test that the delete_invite cronjob deletes invites with '0000-00-00 00:00:00' timestamp.""" + assert len(db.session.query(models.Invite).all()) == 2 + invites = db.session.query(models.Invite).all() + for invite in invites: + invite.created_at = "0000-00-00 00:00:00" + db.session.commit() + cli_runner.invoke(delete_invites) + assert len(db.session.query(models.Invite).all()) == 0 + + +# quarterly usage + + +def test_quarterly_usage(client, cli_runner): + """Test the quarterly_usage cron job.""" + cli_runner.invoke(quarterly_usage) + + +# reporting units and users + + +def test_reporting_units_and_users(client, cli_runner, fs: FakeFilesystem): + """Test that the reporting is giving correct values.""" + from dds_web.database.models import Unit, UnitUser, ResearchUser, SuperAdmin, User, Reporting + + def verify_reporting_row(row, time_date): + """Verify correct values in reporting row.""" + assert row.date.date() == datetime.date(time_date) + assert row.unit_count == Unit.query.count() + assert row.researchuser_count == ResearchUser.query.count() + assert row.unituser_count == UnitUser.query.count() + assert row.superadmin_count == SuperAdmin.query.count() + assert row.total_user_count == User.query.count() + assert row.total_user_count == sum( + [row.researchuser_count, row.unituser_count, row.superadmin_count] + ) + + # Verify that there are no reporting rows + assert Reporting.query.count() == 0 + + # Run successful command - new row should be created + first_time = datetime(year=2022, month=12, day=10, hour=10, minute=54, second=10) + with freezegun.freeze_time(first_time): + # Run scheduled job now + with mock.patch.object(flask_mail.Mail, "send") as mock_mail_send: + result: click.testing.Result = cli_runner.invoke(reporting_units_and_users) + assert not result.exception, "Raised an unwanted exception." + assert mock_mail_send.call_count == 0 + + # Verify that there's now a reporting row + assert Reporting.query.count() == 1 + row = Reporting.query.first() + verify_reporting_row(row=row, time_date=first_time) + + # Check that an exception is raised if the command is run on the same day + with freezegun.freeze_time(first_time): + # Run scheduled job now + with mock.patch.object(flask_mail.Mail, "send") as mock_mail_send: + # with pytest.raises(Exception) as err: + result: click.testing.Result = cli_runner.invoke(reporting_units_and_users) + assert result.exception, "Did not raise exception." + assert "Duplicate entry" in str(result.exception) + assert mock_mail_send.call_count == 1 + + # Verify that the next day works + second_time = datetime(year=2022, month=12, day=11, hour=10, minute=54, second=10) + with freezegun.freeze_time(second_time): + # Run scheduled job now + with mock.patch.object(flask_mail.Mail, "send") as mock_mail_send: + result: click.testing.Result = cli_runner.invoke(reporting_units_and_users) + assert not result.exception, "Raised an unwanted exception." + assert mock_mail_send.call_count == 0 + + # Verify that there's now a reporting row + assert Reporting.query.count() == 2 + reporting_rows = Reporting.query.all() + for row in reporting_rows: + verify_reporting_row(row=row, time_date=first_time if row.id == 1 else second_time) diff --git a/tests/test_init.py b/tests/test_init.py index 7cbb11471..b6ff8ae69 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -1,14 +1,8 @@ -from dds_web import fill_db_wrapper, create_new_unit, update_uploaded_file_with_log import click.testing import pytest from dds_web import db -import dds_web -import dds_web.api.api_s3_connector from dds_web.database import models from unittest.mock import patch -import typing -from pyfakefs.fake_filesystem import FakeFilesystem -import os from tests import DDSEndpoint, DEFAULT_HEADER, UserAuth, USER_CREDENTIALS import http import werkzeug @@ -16,217 +10,6 @@ import flask_mail -@pytest.fixture -def runner() -> click.testing.CliRunner: - return click.testing.CliRunner() - - -def mock_commit(): - return - - -# fill_db_wrapper - - -def test_fill_db_wrapper_production(client, runner) -> None: - """Run init-db with the production argument.""" - result: click.testing.Result = runner.invoke(fill_db_wrapper, ["production"]) - assert result.exit_code == 1 - - -def test_fill_db_wrapper_devsmall(client, runner) -> None: - """Run init-db with the dev-small argument.""" - result: click.testing.Result = runner.invoke(fill_db_wrapper, ["dev-small"]) - assert result.exit_code == 1 - - -# def test_fill_db_wrapper_devbig(client, runner) -> None: -# """Run init-db with the dev-big argument.""" -# result: click.testing.Result = runner.invoke(fill_db_wrapper, ["dev-big"]) -# assert result.exit_code == 1 - - -# create_new_unit - - -def create_command_options_from_dict(options: typing.Dict) -> typing.List: - """Create a list with options and values from a dict.""" - # Create command options - command_options: typing.List = [] - for key, val in options.items(): - command_options.append(f"--{key}") - command_options.append(val) - - return command_options - - -correct_unit: typing.Dict = { - "name": "newname", - "public_id": "newpublicid", - "external_display_name": "newexternaldisplay", - "contact_email": "newcontact@mail.com", - "internal_ref": "newinternalref", - "safespring_endpoint": "newsafespringendpoint", - "safespring_name": "newsafespringname", - "safespring_access": "newsafespringaccess", - "safespring_secret": "newsafespringsecret", - "days_in_available": 45, - "days_in_expired": 15, -} - - -def test_create_new_unit_public_id_too_long(client, runner) -> None: - """Create new unit, public_id too long.""" - # Change public_id - incorrect_unit: typing.Dict = correct_unit.copy() - incorrect_unit["public_id"] = "public" * 10 - - # Get command options - command_options = create_command_options_from_dict(options=incorrect_unit) - - # Run command - result: click.testing.Result = runner.invoke(create_new_unit, command_options) - # assert "The 'public_id' can be a maximum of 50 characters" in result.output - assert ( - not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() - ) - - -def test_create_new_unit_public_id_incorrect_characters(client, runner) -> None: - """Create new unit, public_id has invalid characters (here _).""" - # Change public_id - incorrect_unit: typing.Dict = correct_unit.copy() - incorrect_unit["public_id"] = "new_public_id" - - # Get command options - command_options = create_command_options_from_dict(options=incorrect_unit) - - # Run command - result: click.testing.Result = runner.invoke(create_new_unit, command_options) - # assert "The 'public_id' can only contain letters, numbers, dots (.) and hyphens (-)." in result.output - assert ( - not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() - ) - - -def test_create_new_unit_public_id_starts_with_dot(client, runner) -> None: - """Create new unit, public_id starts with invalid character (. or -).""" - # Change public_id - incorrect_unit: typing.Dict = correct_unit.copy() - incorrect_unit["public_id"] = ".newpublicid" - - # Get command options - command_options = create_command_options_from_dict(options=incorrect_unit) - - # Run command - result: click.testing.Result = runner.invoke(create_new_unit, command_options) - # assert "The 'public_id' must begin with a letter or number." in result.output - assert ( - not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() - ) - - # Change public_id again - incorrect_unit["public_id"] = "-newpublicid" - - # Get command options - command_options = create_command_options_from_dict(options=incorrect_unit) - - # Run command - result: click.testing.Result = runner.invoke(create_new_unit, command_options) - # assert "The 'public_id' must begin with a letter or number." in result.output - assert ( - not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() - ) - - -def test_create_new_unit_public_id_too_many_dots(client, runner) -> None: - """Create new unit, public_id has invalid number of dots.""" - # Change public_id - incorrect_unit: typing.Dict = correct_unit.copy() - incorrect_unit["public_id"] = "new.public..id" - - # Get command options - command_options = create_command_options_from_dict(options=incorrect_unit) - - # Run command - result: click.testing.Result = runner.invoke(create_new_unit, command_options) - # assert "The 'public_id' should not contain more than two dots." in result.output - assert ( - not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() - ) - - -def test_create_new_unit_public_id_invalid_start(client, runner) -> None: - """Create new unit, public_id starts with prefix.""" - # Change public_id - incorrect_unit: typing.Dict = correct_unit.copy() - incorrect_unit["public_id"] = "xn--newpublicid" - - # Get command options - command_options = create_command_options_from_dict(options=incorrect_unit) - - # Run command - result: click.testing.Result = runner.invoke(create_new_unit, command_options) - # assert "The 'public_id' cannot begin with the 'xn--' prefix." in result.output - assert ( - not db.session.query(models.Unit).filter(models.Unit.name == incorrect_unit["name"]).all() - ) - - -def test_create_new_unit_success(client, runner) -> None: - """Create new unit, public_id starts with prefix.""" - # Get command options - command_options = create_command_options_from_dict(options=correct_unit) - - with patch("dds_web.db.session.commit", mock_commit): - # Run command - result: click.testing.Result = runner.invoke(create_new_unit, command_options) - # assert f"Unit '{correct_unit['name']}' created" in result.output - - -# Update uploaded file with log - - -def mock_no_project(): - return None - - -def test_update_uploaded_file_with_log_nonexisting_project(client, runner) -> None: - """Add file info to non existing project.""" - # Create command options - command_options: typing.List = [ - "--project", - "projectdoesntexist", - "--path-to-log-file", - "somefile", - ] - - # Run command - assert db.session.query(models.Project).all() - with patch("dds_web.database.models.Project.query.filter_by", mock_no_project): - result: click.testing.Result = runner.invoke(update_uploaded_file_with_log, command_options) - assert result.exit_code == 1 - - -def test_update_uploaded_file_with_log_nonexisting_file(client, runner, fs: FakeFilesystem) -> None: - """Attempt to read file which does not exist.""" - # Verify that fake file does not exist - non_existent_log_file: str = "this_is_not_a_file.json" - assert not os.path.exists(non_existent_log_file) - - # Create command options - command_options: typing.List = [ - "--project", - "projectdoesntexist", - "--path-to-log-file", - non_existent_log_file, - ] - - # Run command - result: click.testing.Result = runner.invoke(update_uploaded_file_with_log, command_options) - assert result.exit_code == 1 - - # block_if_maintenance - should be blocked in init by before_request diff --git a/tests/test_scheduled_tasks.py b/tests/test_scheduled_tasks.py deleted file mode 100644 index 5b2a7f8b3..000000000 --- a/tests/test_scheduled_tasks.py +++ /dev/null @@ -1,208 +0,0 @@ -from datetime import timedelta - -import flask -import flask_mail -import unittest -import pathlib -import csv -from datetime import datetime -import typing -import pytest - -from unittest import mock -from unittest.mock import MagicMock -from pyfakefs.fake_filesystem import FakeFilesystem -import freezegun - -from dds_web import db -from dds_web.database import models -from dds_web.utils import current_time - -from dds_web.scheduled_tasks import ( - set_available_to_expired, - set_expired_to_archived, - delete_invite, - quarterly_usage, - reporting_units_and_users, -) - -from typing import List - -# set_available_to_expired - - -def test_set_available_to_expired(client: flask.testing.FlaskClient) -> None: - units: List = db.session.query(models.Unit).all() - # Set project statuses to Available - # and deadline to now to be able to test cronjob functionality - for unit in units: - for project in unit.projects: - for status in project.project_statuses: - status.deadline = current_time() - timedelta(weeks=1) - status.status = "Available" - - i: int = 0 - for unit in units: - i += len( - [ - project - for project in unit.projects - if project.current_status == "Available" - and project.current_deadline <= current_time() - ] - ) - assert i == 6 - - set_available_to_expired() - - units: List = db.session.query(models.Unit).all() - - i: int = 0 - j: int = 0 - for unit in units: - i += len([project for project in unit.projects if project.current_status == "Available"]) - j += len([project for project in unit.projects if project.current_status == "Expired"]) - - assert i == 0 - assert j == 6 - - -# set_expired_to_archived - - -@mock.patch("boto3.session.Session") -def test_set_expired_to_archived(_: MagicMock, client: flask.testing.FlaskClient) -> None: - units: List = db.session.query(models.Unit).all() - - for unit in units: - for project in unit.projects: - for status in project.project_statuses: - status.deadline = current_time() - timedelta(weeks=1) - status.status = "Expired" - - i: int = 0 - for unit in units: - i += len([project for project in unit.projects if project.current_status == "Expired"]) - assert i == 6 - - set_expired_to_archived() - - units: List = db.session.query(models.Unit).all() - - i: int = 0 - j: int = 0 - for unit in units: - i += len([project for project in unit.projects if project.current_status == "Expired"]) - j += len([project for project in unit.projects if project.current_status == "Archived"]) - - assert i == 0 - assert j == 6 - - -def test_delete_invite(client: flask.testing.FlaskClient) -> None: - assert len(db.session.query(models.Invite).all()) == 2 - delete_invite() - assert len(db.session.query(models.Invite).all()) == 1 - - -def test_delete_invite_timestamp_issue(client: flask.testing.FlaskClient) -> None: - """Test that the delete_invite cronjob deletes invites with '0000-00-00 00:00:00' timestamp.""" - assert len(db.session.query(models.Invite).all()) == 2 - invites = db.session.query(models.Invite).all() - for invite in invites: - invite.created_at = "0000-00-00 00:00:00" - db.session.commit() - delete_invite() - assert len(db.session.query(models.Invite).all()) == 0 - - -def test_quarterly_usage(client: flask.testing.FlaskClient) -> None: - """Test the quarterly_usage cron job.""" - quarterly_usage() - - -def test_reporting_units_and_users(client: flask.testing.FlaskClient, fs: FakeFilesystem) -> None: - """Test that the reporting is giving correct values.""" - # Create reporting file - reporting_file: pathlib.Path = pathlib.Path("/code/doc/reporting/dds-reporting.csv") - assert not fs.exists(reporting_file) - fs.create_file(reporting_file) - assert fs.exists(reporting_file) - - # Rows for csv - header: typing.List = [ - "Date", - "Units using DDS in production", - "Researchers", - "Unit users", - "Total number of users", - ] - first_row: typing.List = [f"2022-12-10", 2, 108, 11, 119] - - # Fill reporting file with headers and one row - with reporting_file.open(mode="a") as csv_file: - writer = csv.writer(csv_file) - writer.writerow(header) # Header - Columns - writer.writerow(first_row) # First row - - time_now = datetime(year=2022, month=12, day=10, hour=10, minute=54, second=10) - with freezegun.freeze_time(time_now): - # Run scheduled job now - with unittest.mock.patch.object(flask_mail.Mail, "send") as mock_mail_send: - reporting_units_and_users() - assert mock_mail_send.call_count == 1 - - # Check correct numbers - num_units: int = models.Unit.query.count() - num_users_total: int = models.User.query.count() - num_unit_users: int = models.UnitUser.query.count() - num_researchers: int = models.ResearchUser.query.count() - num_superadmins: int = models.SuperAdmin.query.count() - num_users_excl_superadmins: int = num_users_total - num_superadmins - - # Expected new row: - new_row: typing.List = [ - f"{time_now.year}-{time_now.month}-{time_now.day}", - num_units, - num_researchers, - num_unit_users, - num_users_excl_superadmins, - ] - - # Check csv file contents - with reporting_file.open(mode="r") as result: - reader = csv.reader(result) - line: int = 0 - for row in reader: - if line == 0: - assert row == header - elif line == 1: - assert row == [str(x) for x in first_row] - elif line == 2: - assert row == [str(x) for x in new_row] - line += 1 - - # Delete file to test error - fs.remove(reporting_file) - assert not fs.exists(reporting_file) - - # Test no file found - with freezegun.freeze_time(time_now): - # Run scheduled job now - with unittest.mock.patch.object(flask_mail.Mail, "send") as mock_mail_send: - with pytest.raises(Exception) as err: - reporting_units_and_users() - assert mock_mail_send.call_count == 1 - assert str(err.value) == "Could not find the csv file." - - # Change total number of users to test error - with unittest.mock.patch("dds_web.scheduled_tasks.sum") as mocker: - mocker.return_value = num_users_total + 1 - # Test incorrect number of users - with freezegun.freeze_time(time_now): - # Run scheduled job now - with unittest.mock.patch.object(flask_mail.Mail, "send") as mock_mail_send: - with pytest.raises(Exception) as err: - reporting_units_and_users() - assert mock_mail_send.call_count == 1 - assert str(err.value) == "Sum of number of users incorrect." diff --git a/tests/test_version.py b/tests/test_version.py index 848a422a6..5894783db 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -2,4 +2,4 @@ def test_version(): - assert version.__version__ == "2.2.4" + assert version.__version__ == "2.2.6"