Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add client support for NLPRP tabular schema #180

Draft
wants to merge 18 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crate_anon/anonymise/anonregex.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ def get_uk_postcode_regex_elements(
See:

- https://stackoverflow.com/questions/164979/regex-for-matching-uk-postcodes
""" # noqa
""" # noqa: E501
# -------------------------------------------------------------------------
# Old
# -------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion crate_anon/anonymise/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def get_word_alternatives(filenames: List[str]) -> List[List[str]]:
Returns:
a list of lists of equivalent words

""" # noqa
""" # noqa: E501
alternatives = [] # type: List[List[str]]
all_words_seen = set() # type: Set[str]
for filename in filenames:
Expand Down
4 changes: 2 additions & 2 deletions crate_anon/anonymise/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ class SrcFlag(StrEnum):
# MySQL: 64 -- http://dev.mysql.com/doc/refman/5.7/en/identifiers.html
SQLSERVER_MAX_IDENTIFIER_LENGTH = 128
# Microsoft SQL Server: 128 --
# https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers # noqa
# https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers # noqa: E501


# =============================================================================
Expand Down Expand Up @@ -862,7 +862,7 @@ class HashConfigKeys:
{_SK.DDGEN_FILENAME_TO_TEXT_FIELDS} =
{_SK.DDGEN_BINARY_TO_TEXT_FIELD_PAIRS} =

""" # noqa
""" # noqa: E501

# For the style:
# [source_databases]
Expand Down
2 changes: 1 addition & 1 deletion crate_anon/anonymise/dd.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ def tidy_draft(self) -> None:
CREATE TABLE junk (intthing INT PRIMARY KEY, text1 LONGTEXT, text2 LONGTEXT);
ALTER TABLE junk ADD FULLTEXT INDEX ftidx1 (text1);
ALTER TABLE junk ADD FULLTEXT INDEX ftidx2 (text2); -- OK
""" # noqa
""" # noqa: E501
log.info("Tidying/correcting draft data dictionary")

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion crate_anon/anonymise/ddr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,7 @@ def dest_sqla_column(self) -> Column:
kwargs = {
"doc": comment, # Python side
"comment": comment, # SQL side; supported from SQLAlchemy 1.2:
# https://docs.sqlalchemy.org/en/14/core/metadata.html#sqlalchemy.schema.Column.params.comment # noqa
# https://docs.sqlalchemy.org/en/14/core/metadata.html#sqlalchemy.schema.Column.params.comment # noqa: E501
}
if self.pk:
kwargs["primary_key"] = True
Expand Down
2 changes: 1 addition & 1 deletion crate_anon/anonymise/launch_multiprocess_anonymiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
- https://stackoverflow.com/questions/23611396/python-execute-cat-subprocess-in-parallel
- https://stackoverflow.com/questions/320232/ensuring-subprocesses-are-dead-on-exiting-python-program
- https://stackoverflow.com/questions/641420/how-should-i-log-while-using-multiprocessing-in-python
""" # noqa
""" # noqa: E501

import argparse
import logging
Expand Down
2 changes: 1 addition & 1 deletion crate_anon/anonymise/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
To create a SQLAlchemy ORM programmatically:

- https://stackoverflow.com/questions/2574105/sqlalchemy-dynamic-mapping/2575016#2575016
""" # noqa
""" # noqa: E501

import logging
import random
Expand Down
10 changes: 5 additions & 5 deletions crate_anon/common/bugfix_flashtext.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ def replace_keywords(self, a_sentence: str) -> str:
if not self.case_sensitive:
sentence = a_sentence.lower()
# by Ihor Bobak:
# some letters can expand in size when lower() is called, therefore we will preprocess # noqa
# a_sentense to find those letters which lower()-ed to 2 or more symbols. # noqa
# So, imagine that X is lowered as yz, the rest are lowered as is: A->a, B->b, C->c # noqa
# some letters can expand in size when lower() is called, therefore we will preprocess # noqa: E501
# a_sentense to find those letters which lower()-ed to 2 or more symbols. # noqa: E501
# So, imagine that X is lowered as yz, the rest are lowered as is: A->a, B->b, C->c # noqa: E501
# then for the string ABCXABC we want to get
# ['A', 'B', 'C', 'X', '', 'A', 'B', 'C'] which corresponds to
# ['a', 'b', 'c', 'y', 'z', 'a', 'b', 'c'] because when the code below will run by the indexes # noqa
# of the lowered string, it will "glue" the original string also by THE SAME indexes # noqa
# ['a', 'b', 'c', 'y', 'z', 'a', 'b', 'c'] because when the code below will run by the indexes # noqa: E501
# of the lowered string, it will "glue" the original string also by THE SAME indexes # noqa: E501
orig_sentence = []
for i in range(0, len(a_sentence)):
char = a_sentence[i]
Expand Down
4 changes: 3 additions & 1 deletion crate_anon/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
# Is this program running on readthedocs.org?
ON_READTHEDOCS = os.environ.get("READTHEDOCS") == "True"

NoneType = type(None) # for isinstance, sometimes


# =============================================================================
# Constant creation
Expand Down Expand Up @@ -111,7 +113,7 @@ class DockerConstants:

HOST = "0.0.0.0"
# ... not "localhost" or "127.0.0.1"; see
# https://nickjanetakis.com/blog/docker-tip-54-fixing-connection-reset-by-peer-or-similar-errors # noqa
# https://nickjanetakis.com/blog/docker-tip-54-fixing-connection-reset-by-peer-or-similar-errors # noqa: E501


# =============================================================================
Expand Down
7 changes: 6 additions & 1 deletion crate_anon/common/extendedconfigparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import ast
import configparser
import logging
import os.path
from typing import (
Any,
Dict,
Expand Down Expand Up @@ -148,7 +149,7 @@ def __init__(self, *args, case_sensitive: bool = False, **kwargs) -> None:
# 'converters': Python 3.5 and up
super().__init__(*args, **kwargs)
if case_sensitive:
# https://stackoverflow.com/questions/1611799/preserve-case-in-configparser # noqa
# https://stackoverflow.com/questions/1611799/preserve-case-in-configparser # noqa: E501
self.optionxform = str

# Use the underlying ConfigParser class for e.g.
Expand Down Expand Up @@ -520,6 +521,10 @@ def __init__(
self.parser = ExtendedConfigParser(case_sensitive=case_sensitive)
if filename:
log.info(f"Reading config file: {filename}")
if not os.path.isfile(filename):
raise RuntimeError(
f"Config file {filename} does not exist"
)
self.parser.read(filename, encoding=encoding)
else:
self.parser.read_file(fileobj)
Expand Down
4 changes: 2 additions & 2 deletions crate_anon/common/memsize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

From https://stackoverflow.com/questions/449560/how-do-i-determine-the-size-of-an-object-in-python

""" # noqa
""" # noqa: E501

from gc import get_referents
from sys import getsizeof
Expand All @@ -52,7 +52,7 @@ def getsize(obj: Any, assume_none_denied: bool = False) -> int:
Skip checks for classes/modules/functions. Assume that all objects
should be checked (typically, meaning that the caller guarantees
not to pass stuff that doesn't need checking).
""" # noqa
""" # noqa: E501
if not assume_none_denied:
if isinstance(obj, DENYLIST):
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion crate_anon/common/regex_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@

_NOT_EMPTY_WORD_ONLY_REGEX = regex.compile(r"^\w+$")
_NOT_EMPTY_ALPHABETICAL_ONLY_REGEX = regex.compile("^[a-zA-Z]+$")
# cf. https://stackoverflow.com/questions/336210/regular-expression-for-alphanumeric-and-underscores # noqa
# cf. https://stackoverflow.com/questions/336210/regular-expression-for-alphanumeric-and-underscores # noqa: E501


# =============================================================================
Expand Down
2 changes: 1 addition & 1 deletion crate_anon/common/spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def write_spreadsheet(
ext = filetype or os.path.splitext(filename)[1]
if filename == "-" or ext == SpreadsheetFileExtensions.TSV.value:
first_key = next(iter(data))
# https://stackoverflow.com/questions/30362391/how-do-you-find-the-first-key-in-a-dictionary # noqa
# https://stackoverflow.com/questions/30362391/how-do-you-find-the-first-key-in-a-dictionary # noqa: E501
first_sheet = data[first_key]
write_tsv(filename, first_sheet)
elif ext == SpreadsheetFileExtensions.CSV.value:
Expand Down
36 changes: 31 additions & 5 deletions crate_anon/common/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@

MSSQL_COLTYPE_TO_LEN = {
# The "N" prefix means Unicode.
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/char-and-varchar-transact-sql?view=sql-server-ver15 # noqa
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/nchar-and-nvarchar-transact-sql?view=sql-server-ver15 # noqa
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/ntext-text-and-image-transact-sql?view=sql-server-ver15 # noqa
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/char-and-varchar-transact-sql?view=sql-server-ver15 # noqa: E501
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/nchar-and-nvarchar-transact-sql?view=sql-server-ver15 # noqa: E501
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/ntext-text-and-image-transact-sql?view=sql-server-ver15 # noqa: E501
"NVARCHAR_MAX": 2**30 - 1,
# Can specify NVARCHAR(1) to NVARCHAR(4000), or NVARCHAR(MAX) for 2^30 - 1.
"VARCHAR_MAX": 2**31 - 1,
Expand Down Expand Up @@ -222,7 +222,7 @@ class SchemaId:

- https://stackoverflow.com/questions/11618277/difference-between-schema-database-in-mysql

""" # noqa
""" # noqa: E501

def __init__(self, db: str = "", schema: str = "") -> None:
"""
Expand Down Expand Up @@ -2452,7 +2452,7 @@ def translate_sql_qmark_to_percent(sql: str) -> str:
:class:`cardinal_pythonlib.sql.sql_grammar.SqlGrammar` objects, so that
the visual appearance matches what they expect from their database.

""" # noqa
""" # noqa: E501
# 1. Escape % characters
sql = escape_percent_for_python_dbapi(sql)
# 2. Replace ? characters that are not within quotes with %s.
Expand All @@ -2466,3 +2466,29 @@ def translate_sql_qmark_to_percent(sql: str) -> str:
else:
newsql += c
return newsql


def decorate_index_name(
idxname: str, tablename: str = None, engine: Engine = None
) -> str:
"""
Amend the name of a database index. Specifically, this is because SQLite
(which we won't use much, but do use for testing!) won't accept two indexes
with the same names applying to different tables.

Args:
idxname:
The original index name.
tablename:
The name of the table.
engine:
The SQLAlchemy engine, from which we obtain the dialect.

Returns:
The index name, amended if necessary.
"""
if not tablename or not engine:
return idxname
if engine.dialect.name == "sqlite":
return f"{idxname}_{tablename}"
return idxname
30 changes: 15 additions & 15 deletions crate_anon/crateweb/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
UrlNames,
)

# https://stackoverflow.com/questions/2636536/how-to-make-django-work-with-unsupported-mysql-drivers-such-as-gevent-mysql-or-c # noqa
# https://stackoverflow.com/questions/2636536/how-to-make-django-work-with-unsupported-mysql-drivers-such-as-gevent-mysql-or-c # noqa: E501
try:
import pymysql

Expand Down Expand Up @@ -115,10 +115,10 @@
# 'kombu.transport.django', # for Celery with Django database as broker
# 'template_profiler_panel',
# 'silk',
"crate_anon.crateweb.config.apps.UserProfileAppConfig", # for user-specific settings # noqa
"crate_anon.crateweb.config.apps.ResearchAppConfig", # the research database query app # noqa
"crate_anon.crateweb.config.apps.ConsentAppConfig", # the consent-to-contact app # noqa
"crate_anon.crateweb.config.apps.CoreAppConfig", # for e.g. the runcpserver command # noqa
"crate_anon.crateweb.config.apps.UserProfileAppConfig", # for user-specific settings # noqa: E501
"crate_anon.crateweb.config.apps.ResearchAppConfig", # the research database query app # noqa: E501
"crate_anon.crateweb.config.apps.ConsentAppConfig", # the consent-to-contact app # noqa: E501
"crate_anon.crateweb.config.apps.CoreAppConfig", # for e.g. the runcpserver command # noqa: E501
"crate_anon.crateweb.config.apps.ApiConfig", # for the anonymisation API
)

Expand All @@ -130,7 +130,7 @@
# ... reinstated here 2017-01-30 (django-debug-toolbar==1.6)
# ... "as early as possible... but after any other middle that encodes the
# response's content, such as GZipMiddleware"
# ... http://django-debug-toolbar.readthedocs.io/en/1.0/installation.html#explicit-setup # noqa
# ... http://django-debug-toolbar.readthedocs.io/en/1.0/installation.html#explicit-setup # noqa: E501
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
Expand All @@ -140,11 +140,11 @@
"django.middleware.clickjacking.XFrameOptionsMiddleware",
"django.middleware.security.SecurityMiddleware",
# Additional:
"cardinal_pythonlib.django.middleware.UserBasedExceptionMiddleware", # provide debugging details to superusers # noqa
"cardinal_pythonlib.django.middleware.LoginRequiredMiddleware", # prohibit all pages except login pages if not logged in # noqa
# 'cardinal_pythonlib.django.middleware.DisableClientSideCachingMiddleware', # no client-side caching # noqa
"crate_anon.crateweb.core.middleware.RestrictAdminMiddleware", # non-developers can't access the devadmin site # noqa
# 'cardinal_pythonlib.django.request_cache.RequestCacheMiddleware', # per-request cache, UNTESTED # noqa
"cardinal_pythonlib.django.middleware.UserBasedExceptionMiddleware", # provide debugging details to superusers # noqa: E501
"cardinal_pythonlib.django.middleware.LoginRequiredMiddleware", # prohibit all pages except login pages if not logged in # noqa: E501
# 'cardinal_pythonlib.django.middleware.DisableClientSideCachingMiddleware', # no client-side caching # noqa: E501
"crate_anon.crateweb.core.middleware.RestrictAdminMiddleware", # non-developers can't access the devadmin site # noqa: E501
# 'cardinal_pythonlib.django.request_cache.RequestCacheMiddleware', # per-request cache, UNTESTED # noqa: E501
)

LOGIN_URL = "/login/" # for LoginRequiredMiddleware
Expand All @@ -166,7 +166,7 @@
"django.template.context_processors.request",
"django.contrib.auth.context_processors.auth",
"django.contrib.messages.context_processors.messages",
"crate_anon.crateweb.core.context_processors.common_context", # noqa
"crate_anon.crateweb.core.context_processors.common_context",
],
"loaders": [
# https://docs.djangoproject.com/en/1.9/ref/templates/api/
Expand Down Expand Up @@ -206,9 +206,9 @@
"debug_toolbar.panels.logging.LoggingPanel",
"debug_toolbar.panels.redirects.RedirectsPanel",
# Built in but not enabled as standard:
# 'debug_toolbar.panels.profiling.ProfilingPanel', # EXTREME DANGER! Breaks middleware inc. LoginRequiredMiddleware! # noqa
# 'debug_toolbar.panels.profiling.ProfilingPanel', # EXTREME DANGER! Breaks middleware inc. LoginRequiredMiddleware! # noqa: E501
# Extra:
# 'template_profiler_panel.panels.template.TemplateProfilerPanel', # removed 2017-01-31; division by zero error # noqa
# 'template_profiler_panel.panels.template.TemplateProfilerPanel', # removed 2017-01-31; division by zero error # noqa: E501
]


Expand All @@ -223,7 +223,7 @@
CELERY_TASK_SERIALIZER = "json"
# Results are OPTIONAL. The CRATE web service doesn't use them.
# But may be helpful for Celery testing.
# See http://docs.celeryproject.org/en/latest/configuration.html#std:setting-CELERY_RESULT_BACKEND # noqa
# See http://docs.celeryproject.org/en/latest/configuration.html#std:setting-CELERY_RESULT_BACKEND # noqa: E501
CELERY_RESULT_BACKEND = "rpc://" # uses AMQP
CELERY_RESULT_PERSISTENT = False

Expand Down
4 changes: 2 additions & 2 deletions crate_anon/crateweb/consent/lookup_rio.py
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,7 @@ def get_latest_consent_mode_from_rio_generic(
po.NHSNumber = %s -- string comparison
ORDER BY
cr.AssessmentDate DESC
""" # noqa
""" # noqa: E501
# BEWARE "%s" IN SQL COMMENTS! The database backend will crash because
# the number of substituted parameters will be wrong.
# New as of 2018-06-28:
Expand All @@ -999,7 +999,7 @@ def get_latest_consent_mode_from_rio_generic(
cr.NHSNumber = %s -- string comparison
ORDER BY
cr.AssessmentDate DESC
""" # noqa
""" # noqa: E501
else:
assert False, "Internal bug" # makes type checker happy

Expand Down
4 changes: 2 additions & 2 deletions crate_anon/crateweb/consent/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@

Requires Django 1.9. As of 2015-11-21, that means 1.9rc1

""" # noqa
""" # noqa: E501

import logging
from celery import shared_task
Expand Down Expand Up @@ -195,7 +195,7 @@ def finalize_clinician_response(clinician_response_id: int) -> None:

Args:
clinician_response_id: PK of the clinician response
""" # noqa
""" # noqa: E501
from crate_anon.crateweb.consent.models import (
ClinicianResponse,
) # delayed import
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

./manage.py runcpserver --port 8080 --ssl_certificate /etc/ssl/certs/ssl-cert-snakeoil.pem --ssl_private_key /etc/ssl/private/ssl-cert-snakeoil.key

""" # noqa
""" # noqa: E501

from argparse import ArgumentParser, Namespace
import logging
Expand Down
4 changes: 2 additions & 2 deletions crate_anon/crateweb/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def site_absolute_url(path: str) -> str:

But that does at least mean we can use the same method for static and
Django URLs.
""" # noqa
""" # noqa: E501
url = settings.DJANGO_SITE_ROOT_ABSOLUTE_URL + path
log.debug(f"site_absolute_url: {path} -> {url}")
return url
Expand Down Expand Up @@ -467,7 +467,7 @@ class JavascriptTree(JavascriptTreeNode):
print(t.js_str_html())
print(t.js_data())

""" # noqa
""" # noqa: E501

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion crate_anon/crateweb/research/html_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def visibility_button(
<img class="plusminus_image" id="collapse_img_{tag}" alt="" src="{img}">
{title_html}
</{eltype}>
""" # noqa
""" # noqa: E501


def visibility_contentdiv(
Expand Down
Loading
Loading