diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 8f2687a12..c4a77d687 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,67 +1,126 @@ -> **Before submitting the PR, please go through the sections below and fill in what you can. If there are any items that are irrelevant for the current PR, remove the row. If a relevant option is missing, please add it as an item and add a PR comment informing that the new option should be included into this template.** + -> **All _relevant_ items should be ticked before the PR is merged** +## **1. This PR contains the following changes...** -# Description +_Add a summary of the changes and the related issue._ -- [ ] Summary of the changes and the related issue: -- [ ] Motivation and context regarding why the change is needed: -- [ ] List / description of any dependencies or other changes required for this change: -- Fixes an issue in GitHub / Jira: - - [ ] Yes: _[link to GitHub issue / Jira task ID]_ - - [ ] No +## **2. The following additional changes are required for this to work** -## Type of change +_Add information on additional changes required for the PR changes to work, both locally and in the deployments._ + +> E.g. Does the deployment setup need anything for this to work? + +## **3. The PR fixes the following GitHub issue / Jira task** + + + +- [ ] GitHub issue (link): +- [ ] Jira task (ID, `DDS-xxxx`): +- [ ] The PR does not fix a specific GitHub issue or Jira task + +## **4. What _type of change(s)_ does the PR contain?** + + -- [ ] Bug fix - - [ ] Breaking: _Describe_ - - [ ] Non-breaking -- [ ] Documentation - [ ] New feature - - [ ] Breaking: _Describe_ + - [ ] Breaking: _Please describe the reason for the break and how we can fix it._ + - [ ] Non-breaking +- [ ] Database change + - [ ] Migration _included in PR_ + - [ ] Migration _not needed_ +- [ ] Bug fix + - [ ] Breaking: _Please describe the reason for the break and how we can fix it._ - [ ] Non-breaking - [ ] Security Alert fix +- [ ] Documentation - [ ] Tests **(only)** - [ ] Workflow -_"Breaking": The change will cause existing functionality to not work as expected._ - -# Checklist: - -## General - -- [ ] [Changelog](../CHANGELOG.md): New row added. Not needed when PR includes _only_ tests. -- [ ] Database schema has changed - - [ ] A new migration is included in the PR - - [ ] The change does not require a migration -- [ ] Code change - - [ ] Self-review of code done - - [ ] Comments added, particularly in hard-to-understand areas - - Documentation update - - [ ] Done - - [ ] Not needed - -## Repository / Releases - -- [ ] Blocking PRs have been merged -- [ ] Rebase / update of branch done -- [ ] PR to `master` branch (Product Owner / Scrum Master) - - [ ] The [version](../dds_web/version.py) is updated - - [ ] I am bumping the major version (e.g. 1.x.x to 2.x.x) - - [ ] I have made the corresponding changes to the CLI version - - Backward compatible - - [ ] Yes: The code works together with `dds_cli/master` branch - - [ ] No: The code **does not** entirely / at all work together with the `dds_cli/master` branch. _Please add detailed and clear information about the broken features_ - -## Checks - -- [ ] CodeQL passes -- [ ] Formatting: Black & Prettier checks pass -- Tests - - [ ] I have added tests for the new code - - [ ] The tests pass -- Trivy / Snyk: - - [ ] There are no new security alerts - - [ ] This PR fixes new security alerts - - [ ] Security alerts have been dismissed - - [ ] PR will be merged with new security alerts; This is why: _Please add a short description here_ +## **5. Checklist** + + + +### **Always** + + + +- [Changelog](../CHANGELOG.md) + - [ ] Added + - [ ] Not needed (E.g. PR contains _only_ tests) +- Rebase / Update / Merge _from_ base branch (the branch from which the current is forked) + - [ ] Done + - [ ] Not needed +- Blocking PRs + - [ ] Merged + - [ ] No blocking PRs +- PR to `master` branch + - [ ] Yes: Go to the section [PR to master](#pr-to-master) + - [ ] No + +### If PR consists of **code change(s)** + + + +- Self review + - [ ] Done +- Comments, docstrings, etc + - [ ] Added / Updated +- Documentation + - [ ] Updated + - [ ] Update not needed + +### If PR is to **master** + + + +- [ ] I have followed steps 1-5 in [the release instructions](../doc/procedures/new_release.md) +- [ ] I am bumping the major version (e.g. 1.x.x to 2.x.x) +- [ ] I have made the corresponding changes to the CLI version + +**Is this version _backward compatible?_** + +- [ ] Yes: The code works together with `dds_cli/master` branch +- [ ] No: The code **does not** entirely / at all work together with the `dds_cli/master` branch. _Please add detailed and clear information about the broken features_ + +## **6. Actions / Scans** + + + +- **Black**: Python code formatter. Does not execute. Only tests. + Run `black .` locally to execute formatting. + - [ ] Passed +- **Prettier**: General code formatter. Our use case: MD and yaml mainly. + Run `npx prettier --write .` locally to execute formatting. + - [ ] Passed +- **Tests**: Pytest to verify that functionality works as expected. + - [ ] New tests added + - [ ] No new tests + - [ ] Passed +- **CodeQL**: Scan for security vulnerabilities, bugs, errors + - [ ] New alerts: _Go through them and either fix, dismiss och ignore. Add reasoning in items below._ + - [ ] Alerts fixed: _What?_ + - [ ] Alerts ignored / dismissed: _Why?_ + - [ ] Passed +- **Trivy**: Security scanner + - [ ] New alerts: _Go through them and either fix, dismiss och ignore. Add reasoning in items below._ + - [ ] Alerts fixed: _What?_ + - [ ] Alerts ignored / dismissed: _Why?_ + - [ ] Passed +- **Snyk**: Security scanner + - [ ] New alerts: _Go through them and either fix, dismiss och ignore. Add reasoning in items below._ + - [ ] Alerts fixed: _What?_ + - [ ] Alerts ignored / dismissed: _Why?_ + - [ ] Passed diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index fbeb0ab63..7cf5703c7 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,3 +1,13 @@ +# CodeQL analysis: +# CodeQL is the analysis engine used by developers to automate security checks, +# and by security researchers to perform variant analysis. +# In CodeQL, code is treated like data. Security vulnerabilities, bugs, +# and other errors are modeled as queries that can be executed against databases +# extracted from code. You can run the standard CodeQL queries, written by GitHub +# researchers and community contributors, or write your own to use in custom analyses. +# Queries that find potential bugs highlight the result directly in the source file. +# https://codeql.github.com/docs/codeql-overview/about-codeql/ +# ---------------------------------------------------------------------------- # For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # @@ -41,11 +51,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +66,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v2 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -70,4 +80,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v2 diff --git a/.github/workflows/docker-compose-tests.yml b/.github/workflows/docker-compose-tests.yml index 5f6444ac4..e5cfd590a 100644 --- a/.github/workflows/docker-compose-tests.yml +++ b/.github/workflows/docker-compose-tests.yml @@ -1,3 +1,7 @@ +# Pytest +# Runs all test in the dds_web by executing the docker compose yml files for testing. +# The actual pytest command is not in this file. + name: Tests on: diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml index db7dfc117..cf395e4ed 100644 --- a/.github/workflows/prettier.yml +++ b/.github/workflows/prettier.yml @@ -1,3 +1,7 @@ +# Prettier +# Prettier is a code formatter. We mostly use it for the markdown files. +# https://prettier.io/ + name: Lint with Prettier on: [push, pull_request] diff --git a/.github/workflows/publish_and_trivyscan.yml b/.github/workflows/publish_and_trivyscan.yml index 45c0ee773..14089531a 100644 --- a/.github/workflows/publish_and_trivyscan.yml +++ b/.github/workflows/publish_and_trivyscan.yml @@ -1,5 +1,18 @@ ---- -name: Publish Docker Image and run Trivy Security Scan +# GHCR: +# GitHub Container Repository +# Images from ghcr are used when deploying prod and dev. +# +# Trivy +# Trivy is a comprehensive and versatile security scanner. Trivy has scanners that look for +# security issues, and targets where it can find those issues. +# +# This action publishes a docker image and then runs the trivy security scan on that image. +# Potential security issues will be uploaded to the security tab in the repository. +# +# https://github.com/aquasecurity/trivy +# --------------------------------------- +# Publish Docker Image to GHCR and run Trivy Security Scan +name: GHCR and Trivy Scan on: pull_request: push: @@ -11,7 +24,7 @@ on: jobs: push_to_registry: if: github.repository == 'ScilifelabDataCentre/dds_web' - name: Push Docker image to Docker Hub + name: Push image runs-on: ubuntu-latest permissions: contents: read @@ -23,11 +36,6 @@ jobs: steps: - name: Check out the repo uses: actions/checkout@v3 - - name: Log in to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Log in to Github Container Repository uses: docker/login-action@v2 with: @@ -39,7 +47,6 @@ jobs: uses: docker/metadata-action@v4 with: images: | - scilifelabdatacentre/dds-backend ghcr.io/scilifelabdatacentre/dds-backend - name: Ensure lowercase name run: echo IMAGE_REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV diff --git a/.github/workflows/python-black.yml b/.github/workflows/python-black.yml index 312940af3..74127ef6f 100644 --- a/.github/workflows/python-black.yml +++ b/.github/workflows/python-black.yml @@ -1,3 +1,8 @@ +# Lint Python - Black +# This action lints python using black - a python code formatter. +# https://github.com/psf/black +# This does not format the code, this only detects and informs on issues. +# To format with black, run `black .` locally in the repository. name: Lint Python on: push: diff --git a/.github/workflows/snyk-scan.yml b/.github/workflows/snyk-scan.yml new file mode 100644 index 000000000..db080f506 --- /dev/null +++ b/.github/workflows/snyk-scan.yml @@ -0,0 +1,45 @@ +# Snyk IaC: +# A sample workflow which checks out your Infrastructure as Code Configuration files, +# such as Kubernetes, Helm & Terraform and scans them for any security issues. +# The results are then uploaded to GitHub Security Code Scanning +# +# For more examples, including how to limit scans to only high-severity issues +# and fail PR checks, see https://github.com/snyk/actions/ +# ----------------------------------------------------------- +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# ------------------------------------------------------------ +name: Snyk IaC + +on: + push: + branches: ["dev", master] + pull_request: + # The branches below must be a subset of the branches above + branches: ["dev"] + schedule: + - cron: "0 7,13 * * *" + +jobs: + snyk: + permissions: + contents: read + security-events: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Run Snyk to check for vulnerabilities + uses: snyk/actions/python@master + continue-on-error: true # To make sure that SARIF upload gets called + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + command: code test + args: --sarif-file-output=snyk.sarif + - name: Upload result to GitHub Code Scanning + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: snyk.sarif + category: snyk diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 09abfd196..b1014c8e6 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -1,3 +1,8 @@ +# Trivy: +# Trivy (pronunciation) is a comprehensive and versatile security scanner. +# Trivy has scanners that look for security issues, and targets where it can find those issues. +# https://github.com/aquasecurity/trivy +# --------------------------------- name: trivy on: schedule: diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a59ae0db..bb371ca10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -176,10 +176,24 @@ Please add a _short_ line describing the PR you make, if the PR implements a spe - Changed support email ([#1324](https://github.com/ScilifelabDataCentre/dds_web/pull/1324)) - Allow Super Admin login during maintenance ([#1333](https://github.com/ScilifelabDataCentre/dds_web/pull/1333)) -## Sprint (2022-12-09 - 2022-12-23) +## Sprint (2022-12-09 - 2023-01-09) - Longer sprint due to Christmas - Dependency: Bump `certifi` due to CVE-2022-23491 ([#1337](https://github.com/ScilifelabDataCentre/dds_web/pull/1337)) - Dependency: Bump `jwcrypto` due to CVE-2022-3102 ([#1339](https://github.com/ScilifelabDataCentre/dds_web/pull/1339)) - Cronjob: Get number of units and users for reporting ([#1324](https://github.com/ScilifelabDataCentre/dds_web/pull/1335)) - Add ability to change project information via ProjectInfo endpoint ([#1331](https://github.com/ScilifelabDataCentre/dds_web/pull/1331)) - Fix the reporting file path ([1345](https://github.com/ScilifelabDataCentre/dds_web/pull/1345)) + +## Sprint (2023-01-09 - 2023-01-20) + +- Refactoring: Move flask commands to own module `commands.py` ([#1351](https://github.com/ScilifelabDataCentre/dds_web/pull/1351)) +- Workflow: Scan with Snyk on PR and schedule ([#1349](https://github.com/ScilifelabDataCentre/dds_web/pull/1349)) +- Flask command (cronjob): Monitor unit usage and warn if above level ([#1350](https://github.com/ScilifelabDataCentre/dds_web/pull/1350)) + +## Sprint (2023-01-20 - 2023-02-03) + +- Workflow: Do not publish to DockerHub anymore ([#1357](https://github.com/ScilifelabDataCentre/dds_web/pull/1357)) +- Refactoring: move cronjobs previously handled by APScheduler to flask commands ([#1355](https://github.com/ScilifelabDataCentre/dds_web/pull/1355)) +- Bug: Fix type issue in 0c9c237cced5 (latest) migration ([#1360](https://github.com/ScilifelabDataCentre/dds_web/pull/1360)) +- Database: New `Reporting` table for saving unit / user stats every month ([#1363](https://github.com/ScilifelabDataCentre/dds_web/pull/1363)) +- Version bump: 2.2.6 ([#1375](https://github.com/ScilifelabDataCentre/dds_web/pull/1375)) diff --git a/Dockerfiles/backend.Dockerfile b/Dockerfiles/backend.Dockerfile index 9db625df5..8f0aae26f 100644 --- a/Dockerfiles/backend.Dockerfile +++ b/Dockerfiles/backend.Dockerfile @@ -45,7 +45,7 @@ RUN apk add mariadb-client ################### ## BUILD FRONTEND ################### -FROM node:16 as nodebuilder +FROM node:18 as nodebuilder COPY ./dds_web/static /build WORKDIR /build RUN npm install -g npm@latest --quiet diff --git a/Dockerfiles/nodebuilder.Dockerfile b/Dockerfiles/nodebuilder.Dockerfile index 1ea3f63cf..cd8b8e97f 100644 --- a/Dockerfiles/nodebuilder.Dockerfile +++ b/Dockerfiles/nodebuilder.Dockerfile @@ -1,4 +1,4 @@ -FROM node:16 +FROM node:18 RUN mkdir /build WORKDIR /build RUN npm install -g npm@latest --quiet diff --git a/dds_web/__init__.py b/dds_web/__init__.py index 58e720d19..a606f9042 100644 --- a/dds_web/__init__.py +++ b/dds_web/__init__.py @@ -6,15 +6,11 @@ # Standard library import logging -import datetime import pathlib import sys -import re import os -import typing # Installed -import click import flask from flask_sqlalchemy import SQLAlchemy from flask_marshmallow import Marshmallow @@ -24,7 +20,7 @@ import flask_mail import flask_login import flask_migrate -from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore + # import flask_qrcode from werkzeug.middleware.proxy_fix import ProxyFix @@ -34,8 +30,6 @@ import structlog import werkzeug -from dds_web.scheduled_tasks import scheduler - #################################################################################################### # GLOBAL VARIABLES ############################################################## GLOBAL VARIABLES # #################################################################################################### @@ -265,11 +259,34 @@ def load_user(user_id): client_kwargs={"scope": "openid profile email"}, ) + # Import flask commands - all + from dds_web.commands import ( + fill_db_wrapper, + create_new_unit, + update_uploaded_file_with_log, + lost_files_s3_db, + set_available_to_expired, + set_expired_to_archived, + delete_invites, + quarterly_usage, + reporting_units_and_users, + monitor_usage, + ) + + # Add flask commands - general app.cli.add_command(fill_db_wrapper) app.cli.add_command(create_new_unit) app.cli.add_command(update_uploaded_file_with_log) app.cli.add_command(lost_files_s3_db) + # Add flask commands - cronjobs + app.cli.add_command(set_available_to_expired) + app.cli.add_command(set_expired_to_archived) + app.cli.add_command(delete_invites) + app.cli.add_command(quarterly_usage) + app.cli.add_command(reporting_units_and_users) + app.cli.add_command(monitor_usage) + # Make version available inside jinja templates: @app.template_filter("dds_version") def dds_version_filter(_): @@ -290,11 +307,6 @@ def dds_version_filter(_): app.register_blueprint(pages, url_prefix="") app.register_blueprint(auth_blueprint, url_prefix="") - # Set-up the scheduler - app.config["SCHEDULER_JOBSTORES"] = {"default": SQLAlchemyJobStore(engine=db.engine)} - scheduler.init_app(app) - scheduler.start() - ENCRYPTION_KEY_BIT_LENGTH = 256 ENCRYPTION_KEY_CHAR_LENGTH = int(ENCRYPTION_KEY_BIT_LENGTH / 8) @@ -307,324 +319,3 @@ def dds_version_filter(_): except sqlalchemy.exc.OperationalError as err: app.logger.exception("The database seems to be down.") sys.exit(1) - - -@click.command("init-db") -@click.argument("db_type", type=click.Choice(["production", "dev-small", "dev-big"])) -@flask.cli.with_appcontext -def fill_db_wrapper(db_type): - from dds_web.database import models - - if db_type == "production": - username = flask.current_app.config["SUPERADMIN_USERNAME"] - password = flask.current_app.config["SUPERADMIN_PASSWORD"] - name = flask.current_app.config["SUPERADMIN_NAME"] - existing_user = models.User.query.filter_by(username=username).one_or_none() - - email = flask.current_app.config["SUPERADMIN_EMAIL"] - existing_email = models.Email.query.filter_by(email=email).one_or_none() - - if existing_email: - flask.current_app.logger.info( - f"User with email '{email}' already exists, not creating user." - ) - elif existing_user: - if isinstance(existing_user, models.SuperAdmin): - flask.current_app.logger.info( - f"Super admin with username '{username}' already exists, not creating user." - ) - else: - flask.current_app.logger.info(f"Adding Super Admin: {username} ({email})") - new_super_admin = models.SuperAdmin(username=username, name=name, password=password) - new_email = models.Email(email=email, primary=True) - new_email.user = new_super_admin - db.session.add(new_email) - db.session.commit() - flask.current_app.logger.info(f"Super Admin added: {username} ({email})") - else: - flask.current_app.logger.info("Initializing development db") - assert flask.current_app.config["USE_LOCAL_DB"] - - if db_type == "dev-small": - from dds_web.development.db_init import fill_db - - fill_db() - elif db_type == "dev-big": - import dds_web.development.factories - - dds_web.development.factories.create_all() - - flask.current_app.logger.info("DB filled") - - -@click.command("create-unit") -@click.option("--name", "-n", type=str, required=True) -@click.option("--public_id", "-p", type=str, required=True) -@click.option("--external_display_name", "-e", type=str, required=True) -@click.option("--contact_email", "-c", type=str, required=True) -@click.option("--internal_ref", "-ref", type=str, required=False) -@click.option("--safespring_endpoint", "-se", type=str, required=True) -@click.option("--safespring_name", "-sn", type=str, required=True) -@click.option("--safespring_access", "-sa", type=str, required=True) -@click.option("--safespring_secret", "-ss", type=str, required=True) -@click.option("--days_in_available", "-da", type=int, required=False, default=90) -@click.option("--days_in_expired", "-de", type=int, required=False, default=30) -@flask.cli.with_appcontext -def create_new_unit( - name, - public_id, - external_display_name, - contact_email, - internal_ref, - safespring_endpoint, - safespring_name, - safespring_access, - safespring_secret, - days_in_available, - days_in_expired, -): - """Create a new unit. - - Rules for bucket names, which are affected by the public_id at the moment: - https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html - """ - from dds_web.database import models - - error_message = "" - if len(public_id) > 50: - error_message = "The 'public_id' can be a maximum of 50 characters" - elif re.findall(r"[^a-zA-Z0-9.-]", public_id): - error_message = ( - "The 'public_id' can only contain letters, numbers, dots (.) and hyphens (-)." - ) - elif public_id[0] in [".", "-"]: - error_message = "The 'public_id' must begin with a letter or number." - elif public_id.count(".") > 2: - error_message = "The 'public_id' should not contain more than two dots." - elif public_id.startswith("xn--"): - error_message = "The 'public_id' cannot begin with the 'xn--' prefix." - - if error_message: - flask.current_app.logger.error(error_message) - return - - new_unit = models.Unit( - name=name, - public_id=public_id, - external_display_name=external_display_name, - contact_email=contact_email, - internal_ref=internal_ref or public_id, - safespring_endpoint=safespring_endpoint, - safespring_name=safespring_name, - safespring_access=safespring_access, - safespring_secret=safespring_secret, - days_in_available=days_in_available, - days_in_expired=days_in_expired, - ) - db.session.add(new_unit) - db.session.commit() - - flask.current_app.logger.info(f"Unit '{name}' created") - - -@click.command("update-uploaded-file") -@click.option("--project", "-p", type=str, required=True) -@click.option("--path-to-log-file", "-fp", type=str, required=True) -@flask.cli.with_appcontext -def update_uploaded_file_with_log(project, path_to_log_file): - """Update file details that weren't properly uploaded to db from cli log""" - import botocore - from dds_web.database import models - from dds_web import db - from dds_web.api.api_s3_connector import ApiS3Connector - import json - - proj_in_db = models.Project.query.filter_by(public_id=project).one_or_none() - if not proj_in_db: - flask.current_app.logger.error(f"The project '{project}' doesn't exist.") - return - - if not os.path.exists(path_to_log_file): - flask.current_app.logger.error(f"The log file '{path_to_log_file}' doesn't exist.") - return - - with open(path_to_log_file, "r") as f: - log = json.load(f) - errors = {} - files_added = [] - for file, vals in log.items(): - status = vals.get("status") - if not status or not status.get("failed_op") == "add_file_db": - continue - - with ApiS3Connector(project=proj_in_db) as s3conn: - try: - _ = s3conn.resource.meta.client.head_object( - Bucket=s3conn.project.bucket, Key=vals["path_remote"] - ) - except botocore.client.ClientError as err: - if err.response["Error"]["Code"] == "404": - errors[file] = {"error": "File not found in S3", "traceback": err.__traceback__} - else: - file_object = models.File.query.filter( - sqlalchemy.and_( - models.File.name == sqlalchemy.func.binary(file), - models.File.project_id == proj_in_db.id, - ) - ).first() - if file_object: - errors[file] = {"error": "File already in database."} - else: - new_file = models.File( - name=file, - name_in_bucket=vals["path_remote"], - subpath=vals["subpath"], - project_id=proj_in_db.id, - size_original=vals["size_raw"], - size_stored=vals["size_processed"], - compressed=not vals["compressed"], - public_key=vals["public_key"], - salt=vals["salt"], - checksum=vals["checksum"], - ) - new_version = models.Version( - size_stored=new_file.size_stored, time_uploaded=datetime.datetime.utcnow() - ) - proj_in_db.file_versions.append(new_version) - proj_in_db.files.append(new_file) - new_file.versions.append(new_version) - - db.session.add(new_file) - files_added.append(new_file) - db.session.commit() - - flask.current_app.logger.info(f"Files added: {files_added}") - flask.current_app.logger.info(f"Errors while adding files: {errors}") - - -@click.command("lost-files") -@click.argument("action_type", type=click.Choice(["find", "list", "delete", "add-missing-buckets"])) -@flask.cli.with_appcontext -def lost_files_s3_db(action_type: str): - """ - Identify (and optionally delete) files that are present in S3 or in the db, but not both. - - Args: - action_type (str): "find", "list", or "delete" - """ - from dds_web.database import models - import boto3 - from dds_web.utils import bucket_is_valid - - # Interate through the units - for unit in models.Unit.query: - session = boto3.session.Session() - - # Connect to S3 - resource = session.resource( - service_name="s3", - endpoint_url=unit.safespring_endpoint, - aws_access_key_id=unit.safespring_access, - aws_secret_access_key=unit.safespring_secret, - ) - - # Variables - db_count = 0 # Files not found in s3 - s3_count = 0 # Files not found in db - - # Iterate through unit projects - for project in unit.projects: - # Check for objects in bucket - try: - s3_filenames = set( - entry.key for entry in resource.Bucket(project.bucket).objects.all() - ) - except resource.meta.client.exceptions.NoSuchBucket: - if project.is_active: - flask.current_app.logger.warning("Missing bucket %s", project.bucket) - # Create a missing bucket if argument chosen - if action_type == "add-missing-buckets": - valid, message = bucket_is_valid(bucket_name=project.bucket) - if not valid: - flask.current_app.logger.warning( - f"Could not create bucket '{project.bucket}' for project '{project.public_id}': {message}" - ) - else: - resource.create_bucket(Bucket=project.bucket) - flask.current_app.logger.info(f"Bucket '{project.bucket}' created.") - continue - - # Get objects in project - try: - db_filenames = set(entry.name_in_bucket for entry in project.files) - except sqlalchemy.exc.OperationalError: - flask.current_app.logger.critical("Unable to connect to db") - - # Differences - diff_db = db_filenames.difference(s3_filenames) # In db but not in S3 - diff_s3 = s3_filenames.difference(db_filenames) # In S3 but not in db - - # List all files which are missing in either db of s3 - # or delete the files from the s3 if missing in db, or db if missing in s3 - if action_type == "list": - for file_entry in diff_db: - flask.current_app.logger.info( - "Entry %s (%s, %s) not found in S3", file_entry, project, unit - ) - for file_entry in diff_s3: - flask.current_app.logger.info( - "Entry %s (%s, %s) not found in database", file_entry, project, unit - ) - elif action_type == "delete": - # s3 can only delete 1000 objects per request - batch_size = 1000 - s3_to_delete = list(diff_s3) - for i in range(0, len(s3_to_delete), batch_size): - resource.meta.client.delete_objects( - Bucket=project.bucket, - Delete={ - "Objects": [ - {"Key": entry} for entry in s3_to_delete[i : i + batch_size] - ] - }, - ) - - db_entries = models.File.query.filter( - sqlalchemy.and_( - models.File.name_in_bucket.in_(diff_db), - models.File.project_id == project.id, - ) - ) - for db_entry in db_entries: - try: - for db_entry_version in db_entry.versions: - if db_entry_version.time_deleted is None: - db_entry_version.time_deleted = datetime.datetime.utcnow() - db.session.delete(db_entry) - db.session.commit() - except (sqlalchemy.exc.SQLAlchemyError, sqlalchemy.exc.OperationalError): - db.session.rollback() - flask.current_app.logger.critical("Unable to delete the database entries") - sys.exit(1) - - # update the counters at the end of the loop to have accurate numbers for delete - s3_count += len(diff_s3) - db_count += len(diff_db) - - # Print out information about actions performed in cronjob - if s3_count or db_count: - action_word = ( - "Found" if action_type in ("find", "list", "add-missing-buckets") else "Deleted" - ) - flask.current_app.logger.info( - "%s %d entries for lost files (%d in db, %d in s3)", - action_word, - s3_count + db_count, - db_count, - s3_count, - ) - if action_type in ("find", "list", "add-missing-buckets"): - sys.exit(1) - - else: - flask.current_app.logger.info("Found no lost files") diff --git a/dds_web/commands.py b/dds_web/commands.py new file mode 100644 index 000000000..5dae617c8 --- /dev/null +++ b/dds_web/commands.py @@ -0,0 +1,787 @@ +"""Flask commands runable in container.""" + +# Imports + +# Standard +import os +import re +import sys +import datetime + +# Installed +import click +import flask +import flask_mail +import sqlalchemy + +# Own +from dds_web import db + + +@click.command("init-db") +@click.argument("db_type", type=click.Choice(["production", "dev-small", "dev-big"])) +@flask.cli.with_appcontext +def fill_db_wrapper(db_type): + """Add necessary information to the initial database depending on if in dev or prod.""" + from dds_web.database import models + + if db_type == "production": + username = flask.current_app.config["SUPERADMIN_USERNAME"] + password = flask.current_app.config["SUPERADMIN_PASSWORD"] + name = flask.current_app.config["SUPERADMIN_NAME"] + existing_user = models.User.query.filter_by(username=username).one_or_none() + + email = flask.current_app.config["SUPERADMIN_EMAIL"] + existing_email = models.Email.query.filter_by(email=email).one_or_none() + + if existing_email: + flask.current_app.logger.info( + f"User with email '{email}' already exists, not creating user." + ) + elif existing_user: + if isinstance(existing_user, models.SuperAdmin): + flask.current_app.logger.info( + f"Super admin with username '{username}' already exists, not creating user." + ) + else: + flask.current_app.logger.info(f"Adding Super Admin: {username} ({email})") + new_super_admin = models.SuperAdmin(username=username, name=name, password=password) + new_email = models.Email(email=email, primary=True) + new_email.user = new_super_admin + db.session.add(new_email) + db.session.commit() + flask.current_app.logger.info(f"Super Admin added: {username} ({email})") + else: + flask.current_app.logger.info("Initializing development db") + assert flask.current_app.config["USE_LOCAL_DB"] + + if db_type == "dev-small": + from dds_web.development.db_init import fill_db + + fill_db() + elif db_type == "dev-big": + import dds_web.development.factories + + dds_web.development.factories.create_all() + + flask.current_app.logger.info("DB filled") + + +@click.command("create-unit") +@click.option("--name", "-n", type=str, required=True) +@click.option("--public_id", "-p", type=str, required=True) +@click.option("--external_display_name", "-e", type=str, required=True) +@click.option("--contact_email", "-c", type=str, required=True) +@click.option("--internal_ref", "-ref", type=str, required=False) +@click.option("--safespring_endpoint", "-se", type=str, required=True) +@click.option("--safespring_name", "-sn", type=str, required=True) +@click.option("--safespring_access", "-sa", type=str, required=True) +@click.option("--safespring_secret", "-ss", type=str, required=True) +@click.option("--days_in_available", "-da", type=int, required=False, default=90) +@click.option("--days_in_expired", "-de", type=int, required=False, default=30) +@click.option("--quota", "-q", type=int, required=True) +@click.option("--warn-at", "-w", type=int, required=False, default=80) +@flask.cli.with_appcontext +def create_new_unit( + name, + public_id, + external_display_name, + contact_email, + internal_ref, + safespring_endpoint, + safespring_name, + safespring_access, + safespring_secret, + days_in_available, + days_in_expired, + quota, + warn_at, +): + """Create a new unit. + + Rules for bucket names, which are affected by the public_id at the moment: + https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html + """ + from dds_web.database import models + + error_message = "" + if len(public_id) > 50: + error_message = "The 'public_id' can be a maximum of 50 characters" + elif re.findall(r"[^a-zA-Z0-9.-]", public_id): + error_message = ( + "The 'public_id' can only contain letters, numbers, dots (.) and hyphens (-)." + ) + elif public_id[0] in [".", "-"]: + error_message = "The 'public_id' must begin with a letter or number." + elif public_id.count(".") > 2: + error_message = "The 'public_id' should not contain more than two dots." + elif public_id.startswith("xn--"): + error_message = "The 'public_id' cannot begin with the 'xn--' prefix." + + if error_message: + flask.current_app.logger.error(error_message) + return + + new_unit = models.Unit( + name=name, + public_id=public_id, + external_display_name=external_display_name, + contact_email=contact_email, + internal_ref=internal_ref or public_id, + safespring_endpoint=safespring_endpoint, + safespring_name=safespring_name, + safespring_access=safespring_access, + safespring_secret=safespring_secret, + days_in_available=days_in_available, + days_in_expired=days_in_expired, + quota=quota, + warning_level=warn_at, + ) + db.session.add(new_unit) + db.session.commit() + + flask.current_app.logger.info(f"Unit '{name}' created") + + +@click.command("update-uploaded-file") +@click.option("--project", "-p", type=str, required=True) +@click.option("--path-to-log-file", "-fp", type=str, required=True) +@flask.cli.with_appcontext +def update_uploaded_file_with_log(project, path_to_log_file): + """Update file details that weren't properly uploaded to db from cli log""" + import botocore + from dds_web.database import models + from dds_web import db + from dds_web.api.api_s3_connector import ApiS3Connector + import json + + proj_in_db = models.Project.query.filter_by(public_id=project).one_or_none() + if not proj_in_db: + flask.current_app.logger.error(f"The project '{project}' doesn't exist.") + return + + if not os.path.exists(path_to_log_file): + flask.current_app.logger.error(f"The log file '{path_to_log_file}' doesn't exist.") + return + + with open(path_to_log_file, "r") as f: + log = json.load(f) + errors = {} + files_added = [] + for file, vals in log.items(): + status = vals.get("status") + if not status or not status.get("failed_op") == "add_file_db": + continue + + with ApiS3Connector(project=proj_in_db) as s3conn: + try: + _ = s3conn.resource.meta.client.head_object( + Bucket=s3conn.project.bucket, Key=vals["path_remote"] + ) + except botocore.client.ClientError as err: + if err.response["Error"]["Code"] == "404": + errors[file] = {"error": "File not found in S3", "traceback": err.__traceback__} + else: + file_object = models.File.query.filter( + sqlalchemy.and_( + models.File.name == sqlalchemy.func.binary(file), + models.File.project_id == proj_in_db.id, + ) + ).first() + if file_object: + errors[file] = {"error": "File already in database."} + else: + new_file = models.File( + name=file, + name_in_bucket=vals["path_remote"], + subpath=vals["subpath"], + project_id=proj_in_db.id, + size_original=vals["size_raw"], + size_stored=vals["size_processed"], + compressed=not vals["compressed"], + public_key=vals["public_key"], + salt=vals["salt"], + checksum=vals["checksum"], + ) + new_version = models.Version( + size_stored=new_file.size_stored, time_uploaded=datetime.datetime.utcnow() + ) + proj_in_db.file_versions.append(new_version) + proj_in_db.files.append(new_file) + new_file.versions.append(new_version) + + db.session.add(new_file) + files_added.append(new_file) + db.session.commit() + + flask.current_app.logger.info(f"Files added: {files_added}") + flask.current_app.logger.info(f"Errors while adding files: {errors}") + + +@click.command("lost-files") +@click.argument("action_type", type=click.Choice(["find", "list", "delete", "add-missing-buckets"])) +@flask.cli.with_appcontext +def lost_files_s3_db(action_type: str): + """Identify (and optionally delete) files that are present in S3 or in the db, but not both. + + Args: + action_type (str): "find", "list", or "delete" + """ + from dds_web.database import models + import boto3 + from dds_web.utils import bucket_is_valid + + # Interate through the units + for unit in models.Unit.query: + session = boto3.session.Session() + + # Connect to S3 + resource = session.resource( + service_name="s3", + endpoint_url=unit.safespring_endpoint, + aws_access_key_id=unit.safespring_access, + aws_secret_access_key=unit.safespring_secret, + ) + + # Variables + db_count = 0 # Files not found in s3 + s3_count = 0 # Files not found in db + + # Iterate through unit projects + for project in unit.projects: + # Check for objects in bucket + try: + s3_filenames = set( + entry.key for entry in resource.Bucket(project.bucket).objects.all() + ) + except resource.meta.client.exceptions.NoSuchBucket: + if project.is_active: + flask.current_app.logger.warning("Missing bucket %s", project.bucket) + # Create a missing bucket if argument chosen + if action_type == "add-missing-buckets": + valid, message = bucket_is_valid(bucket_name=project.bucket) + if not valid: + flask.current_app.logger.warning( + f"Could not create bucket '{project.bucket}' for project '{project.public_id}': {message}" + ) + else: + resource.create_bucket(Bucket=project.bucket) + flask.current_app.logger.info(f"Bucket '{project.bucket}' created.") + continue + + # Get objects in project + try: + db_filenames = set(entry.name_in_bucket for entry in project.files) + except sqlalchemy.exc.OperationalError: + flask.current_app.logger.critical("Unable to connect to db") + + # Differences + diff_db = db_filenames.difference(s3_filenames) # In db but not in S3 + diff_s3 = s3_filenames.difference(db_filenames) # In S3 but not in db + + # List all files which are missing in either db of s3 + # or delete the files from the s3 if missing in db, or db if missing in s3 + if action_type == "list": + for file_entry in diff_db: + flask.current_app.logger.info( + "Entry %s (%s, %s) not found in S3", file_entry, project, unit + ) + for file_entry in diff_s3: + flask.current_app.logger.info( + "Entry %s (%s, %s) not found in database", file_entry, project, unit + ) + elif action_type == "delete": + # s3 can only delete 1000 objects per request + batch_size = 1000 + s3_to_delete = list(diff_s3) + for i in range(0, len(s3_to_delete), batch_size): + resource.meta.client.delete_objects( + Bucket=project.bucket, + Delete={ + "Objects": [ + {"Key": entry} for entry in s3_to_delete[i : i + batch_size] + ] + }, + ) + + db_entries = models.File.query.filter( + sqlalchemy.and_( + models.File.name_in_bucket.in_(diff_db), + models.File.project_id == project.id, + ) + ) + for db_entry in db_entries: + try: + for db_entry_version in db_entry.versions: + if db_entry_version.time_deleted is None: + db_entry_version.time_deleted = datetime.datetime.utcnow() + db.session.delete(db_entry) + db.session.commit() + except (sqlalchemy.exc.SQLAlchemyError, sqlalchemy.exc.OperationalError): + db.session.rollback() + flask.current_app.logger.critical("Unable to delete the database entries") + sys.exit(1) + + # update the counters at the end of the loop to have accurate numbers for delete + s3_count += len(diff_s3) + db_count += len(diff_db) + + # Print out information about actions performed in cronjob + if s3_count or db_count: + action_word = ( + "Found" if action_type in ("find", "list", "add-missing-buckets") else "Deleted" + ) + flask.current_app.logger.info( + "%s %d entries for lost files (%d in db, %d in s3)", + action_word, + s3_count + db_count, + db_count, + s3_count, + ) + if action_type in ("find", "list", "add-missing-buckets"): + sys.exit(1) + + else: + flask.current_app.logger.info("Found no lost files") + + +@click.command("set-available-to-expired") +@flask.cli.with_appcontext +def set_available_to_expired(): + """ + Search for available projects whose deadlines are past and expire them. + Should be run every day at around 00:01. + """ + + flask.current_app.logger.info("Task: Checking for Expiring projects.") + + # Imports + # Installed + import sqlalchemy + + # Own + from dds_web import db + from dds_web.database import models + from dds_web.errors import DatabaseError + from dds_web.api.project import ProjectStatus + from dds_web.utils import current_time, page_query + + expire = ProjectStatus() + + errors = {} + + try: + for unit in db.session.query(models.Unit).with_for_update().all(): + errors[unit.name] = {} + + days_in_expired = unit.days_in_expired + + for project in page_query( + db.session.query(models.Project) + .filter( + sqlalchemy.and_( + models.Project.is_active == 1, models.Project.unit_id == unit.id + ) + ) + .with_for_update() + ): + + if ( + project.current_status == "Available" + and project.current_deadline <= current_time() + ): + flask.current_app.logger.debug("Handling expiring project") + flask.current_app.logger.debug( + "Project: %s has status %s and expires on: %s", + project.public_id, + project.current_status, + project.current_deadline, + ) + new_status_row = expire.expire_project( + project=project, + current_time=current_time(), + deadline_in=days_in_expired, + ) + + project.project_statuses.append(new_status_row) + + try: + db.session.commit() + flask.current_app.logger.debug( + "Project: %s has status Expired now!", project.public_id + ) + except ( + sqlalchemy.exc.OperationalError, + sqlalchemy.exc.SQLAlchemyError, + ) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + errors[unit.name][project.public_id] = str(err) + continue + else: + flask.current_app.logger.debug( + "Nothing to do for Project: %s", project.public_id + ) + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + raise + + for unit, projects in errors.items(): + if projects: + flask.current_app.logger.error( + f"Following projects of Unit '{unit}' encountered issues during expiration process:" + ) + for proj in errors[unit].keys(): + flask.current_app.logger.error(f"Error for project '{proj}': {errors[unit][proj]} ") + + +@click.command("set-expired-to-archived") +@flask.cli.with_appcontext +def set_expired_to_archived(): + """ + Search for expired projects whose deadlines are past and archive them. + Should be run every day at around 01:01. + """ + + flask.current_app.logger.debug("Task: Checking for projects to archive.") + + # Imports + # Installed + import sqlalchemy + + # Own + from dds_web import db + from dds_web.database import models + from dds_web.errors import DatabaseError + from dds_web.utils import current_time, page_query + from dds_web.api.project import ProjectStatus + + archive = ProjectStatus() + errors = {} + + try: + for unit in db.session.query(models.Unit).with_for_update().all(): + errors[unit.name] = {} + + for project in page_query( + db.session.query(models.Project) + .filter( + sqlalchemy.and_( + models.Project.is_active == 1, models.Project.unit_id == unit.id + ) + ) + .with_for_update() + ): + + if ( + project.current_status == "Expired" + and project.current_deadline <= current_time() + ): + flask.current_app.logger.debug("Handling project to archive") + flask.current_app.logger.debug( + "Project: %s has status %s and expired on: %s", + project.public_id, + project.current_status, + project.current_deadline, + ) + new_status_row, delete_message = archive.archive_project( + project=project, + current_time=current_time(), + ) + flask.current_app.logger.debug(delete_message.strip()) + project.project_statuses.append(new_status_row) + + try: + db.session.commit() + flask.current_app.logger.debug( + "Project: %s has status Archived now!", project.public_id + ) + except ( + sqlalchemy.exc.OperationalError, + sqlalchemy.exc.SQLAlchemyError, + ) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + errors[unit.name][project.public_id] = str(err) + continue + else: + flask.current_app.logger.debug( + "Nothing to do for Project: %s", project.public_id + ) + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + raise + + for unit, projects in errors.items(): + if projects: + flask.current_app.logger.error( + f"Following projects of Unit '{unit}' encountered issues during archival process:" + ) + for proj in errors[unit].keys(): + flask.current_app.logger.error(f"Error for project '{proj}': {errors[unit][proj]} ") + + +@click.command("delete-invites") +@flask.cli.with_appcontext +def delete_invites(): + """ + Delete invites older than a week. + Should be run evry day at around 00:01. + """ + + flask.current_app.logger.debug("Task: Checking for invites to delete.") + + # Imports + # Installed + from datetime import datetime, timedelta + from sqlalchemy.exc import OperationalError, SQLAlchemyError + + # Own + from dds_web import db + from dds_web.database import models + from dds_web.errors import DatabaseError + from dds_web.utils import current_time + + expiration: datetime.datetime = current_time() + errors: Dict = {} + + try: + invites: list = db.session.query(models.Invite).all() + for invite in invites: + invalid_invite = invite.created_at == "0000-00-00 00:00:00" + if invalid_invite or (invite.created_at + timedelta(weeks=1)) < expiration: + try: + db.session.delete(invite) + db.session.commit() + if invalid_invite: + flask.current_app.logger.warning( + "Invite with created_at = 0000-00-00 00:00:00 deleted." + ) + else: + flask.current_app.logger.debug("Invite deleted.") + except (OperationalError, SQLAlchemyError) as err: + errors[invite] = str(err) + flask.current_app.logger.exception(err) + db.session.rollback() + continue + except (OperationalError, SQLAlchemyError) as err: + flask.current_app.logger.exception(err) + raise + + for invite, error in errors.items(): + flask.current_app.logger.error(f"{invite} not deleted: {error}") + + +@click.command("quartely-usage") +@flask.cli.with_appcontext +def quarterly_usage(): + """ + Get the monthly usage for the units + Should be run on the 1st of Jan,Apr,Jul,Oct at around 00:01. + """ + + flask.current_app.logger.debug("Task: Collecting usage information from database.") + + # Imports + # Installed + import sqlalchemy + + # Own + from dds_web import db + from dds_web.database import models + from dds_web.utils import ( + current_time, + page_query, + # calculate_period_usage, + calculate_version_period_usage, + ) + + try: + # 1. Get projects where is_active = False + # .. a. Check if the versions are all time_deleted == time_invoiced + # .. b. Yes --> Set new column to True ("done") + flask.current_app.logger.info("Marking projects as 'done'....") + for unit, project in page_query( + db.session.query(models.Unit, models.Project) + .join(models.Project) + .filter(models.Project.is_active == False) + ): + # Get number of versions in project that have been fully included in usage calcs + num_done = ( + db.session.query(models.Project, models.Version) + .join(models.Version) + .filter( + sqlalchemy.and_( + models.Project.id == project.id, + models.Version.time_deleted == models.Version.time_invoiced, + ) + ) + .count() + ) + + # Check if there are any versions that are not fully included + # If not, project is done and should not be included in any more usage calculations in billing + if num_done == len(project.file_versions): + project.done = True + + db.session.commit() + + # 2. Get project where done = False + for unit, project in page_query( + db.session.query(models.Unit, models.Project) + .join(models.Project) + .filter(models.Project.done == False) + ): + project_byte_hours: int = 0 + for version in project.file_versions: + # Skipp deleted and already invoiced versions + if version.time_deleted == version.time_invoiced and [ + version.time_deleted, + version.time_invoiced, + ] != [None, None]: + continue + version_bhours = calculate_version_period_usage(version=version) + project_byte_hours += version_bhours + flask.current_app.logger.info( + f"Project {project.public_id} byte hours: {project_byte_hours}" + ) + + # Create a record in usage table + new_record = models.Usage( + project_id=project.id, + usage=project_byte_hours, + cost=0, + time_collected=current_time(), + ) + db.session.add(new_record) + db.session.commit() + + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: + flask.current_app.logger.exception(err) + db.session.rollback() + raise + + +@click.command("reporting-units-and-users") +@flask.cli.with_appcontext +def reporting_units_and_users(): + """ + At the start of every month, get number of units and users. + Should be run on the 1st of each month, at around 00:01. + """ + # Imports + # Installed + import flask_mail + + # Own + import dds_web.utils + from dds_web.database.models import Unit, UnitUser, ResearchUser, SuperAdmin, User, Reporting + + # Get current time + current_time = dds_web.utils.timestamp(ts_format="%Y-%m-%d") + + # Get email address + recipient: str = flask.current_app.config.get("MAIL_DDS") + error_subject: str = "[CRONJOB] Error during collection of DDS unit- and user statistics." + error_body: str = ( + f"The cronjob 'reporting' experienced issues. Please see logs. Time: {current_time}." + ) + + # New reporting row - numbers are automatically set + try: + unit_count = Unit.query.count() + researchuser_count = ResearchUser.query.count() + unituser_count = UnitUser.query.count() + superadmin_count = SuperAdmin.query.count() + total_user_count = User.query.count() + new_reporting_row = Reporting( + unit_count=unit_count, + researchuser_count=researchuser_count, + unituser_count=unituser_count, + superadmin_count=superadmin_count, + total_user_count=total_user_count, + ) + db.session.add(new_reporting_row) + db.session.commit() + except BaseException as err: # We want to know if there's any error + flask.current_app.logger.warning( + f"Exception raised during reporting cronjob. Preparing email. Error: {err}" + ) + # Send email about error + file_error_msg: flask_mail.Message = flask_mail.Message( + subject=error_subject, + recipients=[recipient], + body=error_body, + ) + dds_web.utils.send_email_with_retry(msg=file_error_msg) + raise + else: + flask.current_app.logger.info( + f"Unit- and user statistis collected successfully: {current_time}" + ) + + +@click.command("monitor-usage") +@flask.cli.with_appcontext +def monitor_usage(): + """ + Check the units storage usage and compare with chosen quota. + Should be run on the 1st of each month, at around 00:01. + """ + flask.current_app.logger.info("Starting: Checking unit quotas and usage...") + + # Imports + # Own + from dds_web.database import models + import dds_web.utils + + # Email settings + recipient: str = flask.current_app.config.get("MAIL_DDS") + default_subject: str = "DDS: Usage quota warning!" + + # Run task + for unit in models.Unit.query: + flask.current_app.logger.info(f"Checking quotas and usage for: {unit.name}") + + # Get info from database + quota: int = unit.quota + warn_after: int = unit.warning_level + current_usage: int = unit.size + + # Check if 0 and then skip the next steps + if not current_usage: + flask.current_app.logger.info( + f"{unit.name} usage: {current_usage} bytes. Skipping percentage calculation." + ) + continue + + # Calculate percentage of quota + perc_used = round((current_usage / quota) * 100, 3) + + # Information to log and potentially send + info_string: str = ( + f"- Quota:{quota} bytes\n" + f"- Warning level: {warn_after*quota} bytes ({warn_after}%)\n" + f"- Current usage: {current_usage} bytes ({perc_used}%)\n" + ) + flask.current_app.logger.debug( + f"Monitoring the usage for unit '{unit.name}' showed the following:\n" + info_string + ) + + # Email if the unit is using more + if perc_used > warn_after: + # Email settings + message: str = ( + "A SciLifeLab Unit is approaching the allocated data quota.\n" + f"Affected unit: {unit.name}\n" + f"{info_string}" + ) + flask.current_app.logger.info(message) + msg: flask_mail.Message = flask_mail.Message( + subject=default_subject, + recipients=[recipient], + body=message, + ) + dds_web.utils.send_email_with_retry(msg=msg) diff --git a/dds_web/database/models.py b/dds_web/database/models.py index 15a7ccc1f..87de85168 100644 --- a/dds_web/database/models.py +++ b/dds_web/database/models.py @@ -197,6 +197,8 @@ class Unit(db.Model): days_in_available = db.Column(db.Integer, unique=False, nullable=False, default=90) counter = db.Column(db.Integer, unique=False, nullable=True) days_in_expired = db.Column(db.Integer, unique=False, nullable=False, default=30) + quota = db.Column(db.BigInteger, unique=False, nullable=False) + warning_level = db.Column(db.Float, unique=False, nullable=False, default=0.8) # Relationships users = db.relationship("UnitUser", back_populates="unit") @@ -1049,3 +1051,20 @@ class Maintenance(db.Model): # Columns id = db.Column(db.Integer, primary_key=True, autoincrement=True) active = db.Column(db.Boolean, nullable=False, default=True) + + +class Reporting(db.Model): + """Keep track of number of users and units.""" + + # Table setup + __tablename__ = "reporting" + __table_args__ = {"extend_existing": True} + + # Columns + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + date = db.Column(db.DateTime(), unique=True, nullable=False, default=datetime.date.today) + unit_count = db.Column(db.Integer, unique=False, nullable=False) + researchuser_count = db.Column(db.Integer, unique=False, nullable=False) + unituser_count = db.Column(db.Integer, unique=False, nullable=False) + superadmin_count = db.Column(db.Integer, unique=False, nullable=False) + total_user_count = db.Column(db.Integer, unique=False, nullable=False) diff --git a/dds_web/development/db_init.py b/dds_web/development/db_init.py index 3595f935a..c04937ccf 100644 --- a/dds_web/development/db_init.py +++ b/dds_web/development/db_init.py @@ -90,6 +90,7 @@ def fill_db(): external_display_name="Unit 1 external", contact_email="support@example.com", internal_ref="someunit", + quota=10**9, # 1 GB safespring_endpoint=current_app.config.get("SAFESPRING_URL"), safespring_name=current_app.config.get("DDS_SAFESPRING_PROJECT"), safespring_access=current_app.config.get("DDS_SAFESPRING_ACCESS"), diff --git a/dds_web/scheduled_tasks.py b/dds_web/scheduled_tasks.py deleted file mode 100644 index 02c24ac55..000000000 --- a/dds_web/scheduled_tasks.py +++ /dev/null @@ -1,389 +0,0 @@ -from datetime import datetime, timedelta - -import flask_apscheduler -import flask - -from typing import Dict - -## Apscheduler -scheduler = flask_apscheduler.APScheduler() - - -@scheduler.task("cron", id="available_to_expired", hour=0, minute=1, misfire_grace_time=3600) -# @scheduler.task("interval", id="available_to_expired", seconds=15, misfire_grace_time=1) -def set_available_to_expired(): - scheduler.app.logger.debug("Task: Checking for Expiring projects.") - import sqlalchemy - - from dds_web import db - from dds_web.database import models - from dds_web.errors import DatabaseError - from dds_web.api.project import ProjectStatus - from dds_web.utils import current_time, page_query - - with scheduler.app.app_context(): - expire = ProjectStatus() - - errors = {} - - try: - for unit in db.session.query(models.Unit).with_for_update().all(): - errors[unit.name] = {} - - days_in_expired = unit.days_in_expired - - for project in page_query( - db.session.query(models.Project) - .filter( - sqlalchemy.and_( - models.Project.is_active == 1, models.Project.unit_id == unit.id - ) - ) - .with_for_update() - ): - - if ( - project.current_status == "Available" - and project.current_deadline <= current_time() - ): - scheduler.app.logger.debug("Handling expiring project") - scheduler.app.logger.debug( - "Project: %s has status %s and expires on: %s", - project.public_id, - project.current_status, - project.current_deadline, - ) - new_status_row = expire.expire_project( - project=project, - current_time=current_time(), - deadline_in=days_in_expired, - ) - - project.project_statuses.append(new_status_row) - - try: - db.session.commit() - scheduler.app.logger.debug( - "Project: %s has status Expired now!", project.public_id - ) - except ( - sqlalchemy.exc.OperationalError, - sqlalchemy.exc.SQLAlchemyError, - ) as err: - flask.current_app.logger.exception(err) - db.session.rollback() - errors[unit.name][project.public_id] = str(err) - continue - else: - scheduler.app.logger.debug( - "Nothing to do for Project: %s", project.public_id - ) - except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: - flask.current_app.logger.exception(err) - db.session.rollback() - raise - - for unit, projects in errors.items(): - if projects: - scheduler.app.logger.error( - f"Following projects of Unit '{unit}' encountered issues during expiration process:" - ) - for proj in errors[unit].keys(): - scheduler.app.logger.error(f"Error for project '{proj}': {errors[unit][proj]} ") - - -@scheduler.task("cron", id="expired_to_archived", hour=1, minute=1, misfire_grace_time=3600) -# @scheduler.task("interval", id="expired_to_archived", seconds=15, misfire_grace_time=1) -def set_expired_to_archived(): - """Search for expired projects whose deadlines are past and archive them""" - - scheduler.app.logger.debug("Task: Checking for projects to archive.") - - import sqlalchemy - from dds_web import db - from dds_web.database import models - from dds_web.errors import DatabaseError - from dds_web.utils import current_time, page_query - from dds_web.api.project import ProjectStatus - - with scheduler.app.app_context(): - - archive = ProjectStatus() - errors = {} - - try: - for unit in db.session.query(models.Unit).with_for_update().all(): - errors[unit.name] = {} - - for project in page_query( - db.session.query(models.Project) - .filter( - sqlalchemy.and_( - models.Project.is_active == 1, models.Project.unit_id == unit.id - ) - ) - .with_for_update() - ): - - if ( - project.current_status == "Expired" - and project.current_deadline <= current_time() - ): - scheduler.app.logger.debug("Handling project to archive") - scheduler.app.logger.debug( - "Project: %s has status %s and expired on: %s", - project.public_id, - project.current_status, - project.current_deadline, - ) - new_status_row, delete_message = archive.archive_project( - project=project, - current_time=current_time(), - ) - scheduler.app.logger.debug(delete_message.strip()) - project.project_statuses.append(new_status_row) - - try: - db.session.commit() - scheduler.app.logger.debug( - "Project: %s has status Archived now!", project.public_id - ) - except ( - sqlalchemy.exc.OperationalError, - sqlalchemy.exc.SQLAlchemyError, - ) as err: - scheduler.app.logger.exception(err) - db.session.rollback() - errors[unit.name][project.public_id] = str(err) - continue - else: - scheduler.app.logger.debug( - "Nothing to do for Project: %s", project.public_id - ) - except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: - scheduler.app.logger.exception(err) - db.session.rollback() - raise - - for unit, projects in errors.items(): - if projects: - scheduler.app.logger.error( - f"Following projects of Unit '{unit}' encountered issues during archival process:" - ) - for proj in errors[unit].keys(): - scheduler.app.logger.error(f"Error for project '{proj}': {errors[unit][proj]} ") - - -@scheduler.task("cron", id="delete_invite", hour=0, minute=1, misfire_grace_time=3600) -# @scheduler.task("interval", id="delete_invite", seconds=15, misfire_grace_time=1) - - -def delete_invite(): - """Delete invite older than a week""" - - scheduler.app.logger.debug("Task: Checking for invites to delete.") - - from sqlalchemy.exc import OperationalError, SQLAlchemyError - from dds_web import db - from dds_web.database import models - from dds_web.errors import DatabaseError - from dds_web.utils import current_time - - with scheduler.app.app_context(): - expiration: datetime.datetime = current_time() - errors: Dict = {} - - try: - invites: list = db.session.query(models.Invite).all() - for invite in invites: - invalid_invite = invite.created_at == "0000-00-00 00:00:00" - if invalid_invite or (invite.created_at + timedelta(weeks=1)) < expiration: - try: - db.session.delete(invite) - db.session.commit() - if invalid_invite: - scheduler.app.logger.warning( - "Invite with created_at = 0000-00-00 00:00:00 deleted." - ) - else: - scheduler.app.logger.debug("Invite deleted.") - except (OperationalError, SQLAlchemyError) as err: - errors[invite] = str(err) - scheduler.app.logger.exception(err) - db.session.rollback() - continue - except (OperationalError, SQLAlchemyError) as err: - scheduler.app.logger.exception(err) - raise - - for invite, error in errors.items(): - scheduler.app.logger.error(f"{invite} not deleted: {error}") - - -@scheduler.task( - "cron", id="get_quarterly_usage", month="Jan,Apr,Jul,Oct", day="1", hour=0, minute=1 -) -# @scheduler.task("interval", id="monthly_usage", seconds=60, misfire_grace_time=1) -def quarterly_usage(): - """Get the monthly usage for the units""" - - scheduler.app.logger.debug("Task: Collecting usage information from database.") - import sqlalchemy - - from dds_web import db - from dds_web.database import models - from dds_web.utils import ( - current_time, - page_query, - # calculate_period_usage, - calculate_version_period_usage, - ) - - with scheduler.app.app_context(): - try: - # 1. Get projects where is_active = False - # .. a. Check if the versions are all time_deleted == time_invoiced - # .. b. Yes --> Set new column to True ("done") - scheduler.app.logger.info("Marking projects as 'done'....") - for unit, project in page_query( - db.session.query(models.Unit, models.Project) - .join(models.Project) - .filter(models.Project.is_active == False) - ): - # Get number of versions in project that have been fully included in usage calcs - num_done = ( - db.session.query(models.Project, models.Version) - .join(models.Version) - .filter( - sqlalchemy.and_( - models.Project.id == project.id, - models.Version.time_deleted == models.Version.time_invoiced, - ) - ) - .count() - ) - - # Check if there are any versions that are not fully included - # If not, project is done and should not be included in any more usage calculations in billing - if num_done == len(project.file_versions): - project.done = True - - db.session.commit() - - # 2. Get project where done = False - for unit, project in page_query( - db.session.query(models.Unit, models.Project) - .join(models.Project) - .filter(models.Project.done == False) - ): - project_byte_hours: int = 0 - for version in project.file_versions: - # Skipp deleted and already invoiced versions - if version.time_deleted == version.time_invoiced and [ - version.time_deleted, - version.time_invoiced, - ] != [None, None]: - continue - version_bhours = calculate_version_period_usage(version=version) - project_byte_hours += version_bhours - scheduler.app.logger.info( - f"Project {project.public_id} byte hours: {project_byte_hours}" - ) - - # Create a record in usage table - new_record = models.Usage( - project_id=project.id, - usage=project_byte_hours, - cost=0, - time_collected=current_time(), - ) - db.session.add(new_record) - db.session.commit() - - except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.SQLAlchemyError) as err: - flask.current_app.logger.exception(err) - db.session.rollback() - raise - - -# @scheduler.task("interval", id="reporting", seconds=30, misfire_grace_time=1) -@scheduler.task("cron", id="reporting", day="1", hour=0, minute=1) -def reporting_units_and_users(): - """At the start of every month, get number of units and users.""" - # Imports - import csv - import flask_mail - import flask_sqlalchemy - import pathlib - from dds_web import errors, utils - from dds_web.database.models import User, Unit - - # Get current date - current_date: str = utils.timestamp(ts_format="%Y-%m-%d") - - # Location of reporting file - reporting_file: pathlib.Path = pathlib.Path("/code/doc/reporting/dds-reporting.csv") - - # Error default - error: str = None - - # App context required - with scheduler.app.app_context(): - # Get email address - recipient: str = scheduler.app.config.get("MAIL_DDS") - default_subject: str = "DDS Unit / User report" - default_body: str = f"This email contains the DDS unit- and user statistics. The data was collected on: {current_date}." - error_subject: str = f"Error in {default_subject}" - error_body: str = "The cronjob 'reporting' experienced issues" - - # Get units and count them - units: flask_sqlalchemy.BaseQuery = Unit.query - num_units: int = units.count() - - # Count users - users: flask_sqlalchemy.BaseQuery = User.query - num_users_total: int = users.count() # All users - num_superadmins: int = users.filter_by(type="superadmin").count() # Super Admins - num_unit_users: int = users.filter_by(type="unituser").count() # Unit Admins / Personnel - num_researchers: int = users.filter_by(type="researchuser").count() # Researchers - num_users_excl_superadmins: int = num_users_total - num_superadmins - - # Verify that sum is correct - if sum([num_superadmins, num_unit_users, num_researchers]) != num_users_total: - error: str = "Sum of number of users incorrect." - # Define csv file and verify that it exists - elif not reporting_file.exists(): - error: str = "Could not find the csv file." - - if error: - # Send email about error - file_error_msg: flask_mail.Message = flask_mail.Message( - subject=error_subject, - recipients=[recipient], - body=f"{error_body}: {error}", - ) - utils.send_email_with_retry(msg=file_error_msg) - raise Exception(error) - - # Add row with new info - with reporting_file.open(mode="a") as repfile: - writer = csv.writer(repfile) - writer.writerow( - [ - current_date, - num_units, - num_researchers, - num_unit_users, - num_users_excl_superadmins, - ] - ) - - # Create email - msg: flask_mail.Message = flask_mail.Message( - subject=default_subject, - recipients=[recipient], - body=default_body, - ) - with reporting_file.open(mode="r") as file: # Attach file - msg.attach(filename=reporting_file.name, content_type="text/csv", data=file.read()) - utils.send_email_with_retry(msg=msg) # Send diff --git a/dds_web/templates/troubleshooting.html b/dds_web/templates/troubleshooting.html index cbac0bfde..97a2ded13 100644 --- a/dds_web/templates/troubleshooting.html +++ b/dds_web/templates/troubleshooting.html @@ -11,7 +11,7 @@
Please go through the following steps:
dds --version
and compare the displayed version to the latest one. The latest version can be found at PyPi.dds --version
and compare the displayed version to the latest one. The latest version can be found at PyPI.