diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8c64f69..caafe8c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,9 +24,11 @@ jobs:
SKIP: no-commit-to-branch
test:
env:
- MARKLOGIC_HOST: ""
- MARKLOGIC_USER: ""
- MARKLOGIC_PASSWORD: ""
+ MARKLOGIC_HOST: ml-host
+ MARKLOGIC_USER: ml-user
+ MARKLOGIC_PASSWORD: ml-password
+ MARKLOGIC_USE_HTTPS: 0
+ AWS_BUCKET_NAME: judgments-original-versions
name: Run unit tests
runs-on: ubuntu-24.04
steps:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f7f19bb..a95fecc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,12 +19,22 @@ repos:
rev: v0.8.4
hooks:
- id: ruff-format
+ - id: ruff
+ args:
+ - --fix
+ - --exit-non-zero-on-fix
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.13.0
hooks:
- id: mypy
files: ^ds-caselaw-ingester/
+ additional_dependencies:
+ - types-requests
+ - types-python-dateutil
+ - types-pytz
+ - boto3-stubs[s3,sns]
+ - ds-caselaw-marklogic-api-client~=29.0.0
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
diff --git a/ds-caselaw-ingester/content_sqid.py b/ds-caselaw-ingester/content_sqid.py
deleted file mode 100644
index ce79310..0000000
--- a/ds-caselaw-ingester/content_sqid.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from sqids import Sqids
-
-# HASH_SUBSTRING_LENGTH must be strictly less than 16;
-# for 16, hashes starting with 8 have a number too large to be turned into sqid.
-# (under the hood, sqids are numbers less than the hex value
-# 8000 0000 0000 0000 and do not exist for numbers higher than that)
-HASH_SUBSTRING_LENGTH = 12
-
-# SQID_ALPHABET contains no vowels, including y
-SQID_ALPHABET = "bcdfghjklmnpqrstvwxz"
-SQID_MIN_LENGTH = 8
-
-sqids = Sqids(alphabet=SQID_ALPHABET, min_length=SQID_MIN_LENGTH)
-
-
-def _hex_digest_to_int(digest_string: str) -> int:
- return int(digest_string.encode("utf-8")[:HASH_SUBSTRING_LENGTH], 16)
-
-
-def hex_digest_to_sqid(digest_string: str) -> str:
- num = _hex_digest_to_int(digest_string)
- return sqids.encode([num])
diff --git a/ds-caselaw-ingester/lambda_function.py b/ds-caselaw-ingester/lambda_function.py
index 1ff02ef..7367ae6 100644
--- a/ds-caselaw-ingester/lambda_function.py
+++ b/ds-caselaw-ingester/lambda_function.py
@@ -1,15 +1,16 @@
import json
+import logging
import os
import tarfile
import xml.etree.ElementTree as ET
+from contextlib import suppress
from urllib.parse import unquote_plus
+from uuid import uuid4
from xml.sax.saxutils import escape
-from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
-from caselawclient.models.documents import DocumentURIString
import boto3
+import boto3.s3
import rollbar
-from boto3.session import Session
from botocore.exceptions import NoCredentialsError
from caselawclient.Client import (
DEFAULT_USER_AGENT,
@@ -17,22 +18,31 @@
MarklogicResourceNotFoundError,
)
from caselawclient.client_helpers import VersionAnnotation, VersionType
+from caselawclient.models.documents import DocumentURIString
+from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
from dotenv import load_dotenv
+from mypy_boto3_s3.client import S3Client
from notifications_python_client.notifications import NotificationsAPIClient
-import logging
-from caselawclient.models.documents import Document
logger = logging.getLogger("ingester")
logger.setLevel(logging.DEBUG)
load_dotenv()
+
rollbar.init(os.getenv("ROLLBAR_TOKEN"), environment=os.getenv("ROLLBAR_ENV"))
+MARKLOGIC_HOST: str = os.environ["MARKLOGIC_HOST"]
+MARKLOGIC_USER: str = os.environ["MARKLOGIC_USER"]
+MARKLOGIC_PASSWORD: str = os.environ["MARKLOGIC_PASSWORD"]
+MARKLOGIC_USE_HTTPS: bool = bool(os.environ["MARKLOGIC_USE_HTTPS"])
+
+AWS_BUCKET_NAME: str = os.environ["AWS_BUCKET_NAME"]
+
api_client = MarklogicApiClient(
- host=os.getenv("MARKLOGIC_HOST", default=None),
- username=os.getenv("MARKLOGIC_USER", default=None),
- password=os.getenv("MARKLOGIC_PASSWORD", default=None),
- use_https=os.getenv("MARKLOGIC_USE_HTTPS", default=False),
+ host=MARKLOGIC_HOST,
+ username=MARKLOGIC_USER,
+ password=MARKLOGIC_PASSWORD,
+ use_https=MARKLOGIC_USE_HTTPS,
user_agent=f"ds-caselaw-ingester/unknown {DEFAULT_USER_AGENT}",
)
@@ -44,7 +54,7 @@ def __init__(self, metadata):
@property
def is_tdr(self) -> bool:
- return "TDR" in self.parameters.keys()
+ return "TDR" in self.parameters
@property
def force_publish(self):
@@ -63,7 +73,7 @@ def from_event(cls, event):
def from_message(cls, message: dict):
if message.get("Records", [{}])[0].get("eventSource") == "aws:s3":
return S3Message(message["Records"][0])
- elif "parameters" in message.keys():
+ elif "parameters" in message:
return V2Message(message)
else:
raise InvalidMessageException(f"Did not recognise message type. {message}")
@@ -229,18 +239,6 @@ def extract_metadata(tar: tarfile.TarFile, consignment_reference: str):
return decoder.decode(te_metadata_file.read().decode("utf-8"))
-def extract_uri(metadata: dict, consignment_reference: str) -> str:
- uri = metadata["parameters"]["PARSER"].get("uri", "")
-
- if uri:
- uri = uri.replace("https://caselaw.nationalarchives.gov.uk/id/", "")
-
- if not uri:
- uri = f"failures/{consignment_reference}"
-
- return uri
-
-
# called by tests
def get_consignment_reference(message):
return Message.from_message(message).get_consignment_reference()
@@ -263,10 +261,10 @@ def extract_lambda_versions(versions: list[dict[str, str]]) -> list[tuple[str, s
return version_tuples
-def store_file(file, folder, filename, s3_client: Session.client):
+def store_file(file, folder, filename, s3_client: S3Client):
pathname = f"{folder}/{filename}"
try:
- s3_client.upload_fileobj(file, os.getenv("AWS_BUCKET_NAME"), pathname)
+ s3_client.upload_fileobj(file, AWS_BUCKET_NAME, pathname)
print(f"Upload Successful {pathname}")
except FileNotFoundError:
print(f"The file {pathname} was not found")
@@ -290,7 +288,7 @@ def personalise_email(uri: str, metadata: dict) -> dict:
}
-def copy_file(tarfile, input_filename, output_filename, uri, s3_client: Session.client):
+def copy_file(tarfile, input_filename, output_filename, uri, s3_client: S3Client):
try:
file = tarfile.extractfile(input_filename)
store_file(file, uri, output_filename, s3_client)
@@ -395,14 +393,14 @@ def __init__(self, message: Message):
print(f"Ingester Start: Consignment reference {self.consignment_reference}")
print(f"Received Message: {self.message.message}")
self.local_tar_filename = self.save_tar_file_in_s3()
- self.tar = tarfile.open(self.local_tar_filename, mode="r")
- self.metadata = extract_metadata(self.tar, self.consignment_reference)
- self.message.update_consignment_reference(self.metadata["parameters"]["TRE"]["reference"])
self.consignment_reference = self.message.get_consignment_reference()
- self.xml_file_name = self.metadata["parameters"]["TRE"]["payload"]["xml"]
- self.uri = extract_uri(self.metadata, self.consignment_reference)
+ self.uri = DocumentURIString("d-" + str(uuid4()))
+ with tarfile.open(self.local_tar_filename, mode="r") as tar:
+ self.metadata = extract_metadata(tar, self.consignment_reference)
+ self.message.update_consignment_reference(self.metadata["parameters"]["TRE"]["reference"])
+ self.xml_file_name = self.metadata["parameters"]["TRE"]["payload"]["xml"]
+ self.xml = get_best_xml(self.uri, tar, self.xml_file_name, self.consignment_reference)
print(f"Ingesting document {self.uri}")
- self.xml = get_best_xml(self.uri, self.tar, self.xml_file_name, self.consignment_reference)
def save_tar_file_in_s3(self):
"""This should be mocked out for testing -- get the tar file from S3 and
@@ -459,7 +457,7 @@ def set_document_identifiers(self) -> None:
doc.save_identifiers()
logger.info(f"Ingested document had NCN {ncn}")
else:
- logger.info(f"Ingested document had NCN (NOT FOUND)")
+ logger.info("Ingested document had NCN (NOT FOUND)")
def send_updated_judgment_notification(self) -> None:
personalisation = personalise_email(self.uri, self.metadata)
@@ -476,10 +474,7 @@ def send_updated_judgment_notification(self) -> None:
print(f'Sent update notification to {os.getenv("NOTIFY_EDITORIAL_ADDRESS")} (Message ID: {response["id"]})')
def send_new_judgment_notification(self) -> None:
- if "/press-summary/" in self.uri:
- doctype = "Press Summary"
- else:
- doctype = "Judgment"
+ doctype = "Press Summary" if "/press-summary/" in self.uri else "Judgment"
personalisation = personalise_email(self.uri, self.metadata)
personalisation["doctype"] = doctype
@@ -535,48 +530,49 @@ def save_files_to_s3(self) -> None:
modified_targz_filename = (
self.local_tar_filename if docx_filename else modify_filename(self.local_tar_filename, "_nodocx")
)
- store_file(
- open(self.local_tar_filename, mode="rb"),
- self.uri,
- os.path.basename(modified_targz_filename),
- s3_client,
- )
+ with open(self.local_tar_filename, mode="rb") as local_tar:
+ store_file(
+ local_tar,
+ self.uri,
+ os.path.basename(modified_targz_filename),
+ s3_client,
+ )
print(f"saved tar.gz as {modified_targz_filename!r}")
# Store docx and rename
# The docx_filename is None for files which have been reparsed.
if docx_filename is not None:
- copy_file(
- self.tar,
- f"{self.consignment_reference}/{docx_filename}",
- f'{self.uri.replace("/", "_")}.docx',
- self.uri,
- s3_client,
- )
+ with tarfile.open(self.local_tar_filename, mode="r") as tar:
+ copy_file(
+ tar,
+ f"{self.consignment_reference}/{docx_filename}",
+ f'{self.uri.replace("/", "_")}.docx',
+ self.uri,
+ s3_client,
+ )
# Store parser log
- try:
+ with suppress(FileNotFoundException), tarfile.open(self.local_tar_filename, mode="r") as tar:
copy_file(
- self.tar,
+ tar,
f"{self.consignment_reference}/parser.log",
"parser.log",
self.uri,
s3_client,
)
- except FileNotFoundException:
- pass
# Store images
image_list = self.metadata["parameters"]["TRE"]["payload"]["images"]
if image_list:
for image_filename in image_list:
- copy_file(
- self.tar,
- f"{self.consignment_reference}/{image_filename}",
- image_filename,
- self.uri,
- s3_client,
- )
+ with tarfile.open(self.local_tar_filename, mode="r") as tar:
+ copy_file(
+ tar,
+ f"{self.consignment_reference}/{image_filename}",
+ image_filename,
+ self.uri,
+ s3_client,
+ )
@property
def metadata_object(self) -> Metadata:
@@ -611,9 +607,6 @@ def send_email(self) -> None:
raise RuntimeError(f"Didn't recognise originator {originator!r}")
- def close_tar(self) -> None:
- self.tar.close()
-
def upload_xml(self) -> None:
self.updated = self.update_document_xml()
self.inserted = False if self.updated else self.insert_document_xml()
@@ -641,7 +634,7 @@ def process_message(message):
ingest.send_email()
# Store metadata in Marklogic
- has_TDR_data = "TDR" in ingest.metadata["parameters"].keys()
+ has_TDR_data = "TDR" in ingest.metadata["parameters"]
if has_TDR_data:
ingest.store_metadata()
@@ -655,8 +648,6 @@ def process_message(message):
else:
ingest.unpublish_updated_judgment()
- ingest.close_tar()
-
print("Ingestion complete")
return message.message
diff --git a/ds-caselaw-ingester/test_sqid.py b/ds-caselaw-ingester/test_sqid.py
deleted file mode 100644
index bca5e7c..0000000
--- a/ds-caselaw-ingester/test_sqid.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import content_sqid
-import pytest
-from content_sqid import _hex_digest_to_int, hex_digest_to_sqid
-
-
-@pytest.fixture()
-def no_hash_limit():
- """Remove the limitation on the length of the contenthash that is consumed temporarily"""
- old = content_sqid.HASH_SUBSTRING_LENGTH
- content_sqid.HASH_SUBSTRING_LENGTH = 999
- yield None
- content_sqid.HASH_SUBSTRING_LENGTH = old
-
-
-def test_hex_to_int():
- """
- These values shouldn't change -- if they do, it means our hashes aren't stable.
- Changing the alphabet will change them.
- """
- assert _hex_digest_to_int("deadbeef") == 3735928559
- assert hex_digest_to_sqid("deadbeef") == "hdgcqtcnm"
-
-
-def test_min_length():
- """Low-value hashes are an acceptable length"""
- assert hex_digest_to_sqid("0") == "xcsrdnmp"
-
-
-def test_max_value():
- """This should be the largest value we can ever get"""
- assert hex_digest_to_sqid("ffffffffffffffffffffffffffffff") == "tspwbpshvpklr"
-
-
-def test_hex_truncation():
- """A large hex value works and is the same value as the truncated version"""
- assert _hex_digest_to_int("2597c39e63c20d69dc0cb189a88a8ab127c335cdcbf1d9ee43de3f711002de52") == _hex_digest_to_int(
- "2597c39e63c2"
- )
-
-
-def test_demo_limit_of_truncation(no_hash_limit):
- """Demonstrate that without a limit to the length of a hash, a 16-character hash can fail"""
- assert hex_digest_to_sqid("7fffffffffffffff")
- with pytest.raises(ValueError):
- assert hex_digest_to_sqid("8000000000000000")
diff --git a/ds-caselaw-ingester/tests.py b/ds-caselaw-ingester/tests.py
index 93f6abe..d2a2a24 100644
--- a/ds-caselaw-ingester/tests.py
+++ b/ds-caselaw-ingester/tests.py
@@ -92,14 +92,18 @@ def assert_log_sensible(log):
"lambda_function.Ingest.save_tar_file_in_s3",
return_value="/tmp/TDR-2022-DNWR.tar.gz",
)
-def v2_ingest(fake_s3):
+@patch("lambda_function.uuid4")
+def v2_ingest(mock_uuid4, fake_s3):
+ mock_uuid4.return_value = "v2-a1b2-c3d4"
create_fake_tdr_file()
return lambda_function.Ingest.from_message_dict(v2_message)
@pytest.fixture
@patch("lambda_function.Ingest.save_tar_file_in_s3", return_value="/tmp/BULK-0.tar.gz")
-def s3_ingest(fake_s3):
+@patch("lambda_function.uuid4")
+def s3_ingest(mock_uuid4, fake_s3):
+ mock_uuid4.return_value = "s3-a1b2-c3d4"
create_fake_bulk_file()
return lambda_function.Ingest.from_message_dict(s3_message)
@@ -120,8 +124,9 @@ def fcl_ingest(fake_s3):
class TestHandler:
def test_fixture_works(self, v2_ingest, s3_ingest):
"""We get the XML of the data and extract the URI from it successfully using the fixtures"""
- assert v2_ingest.uri == "ewca/civ/2022/111"
- assert s3_ingest.uri == "ukut/iac/2012/82"
+
+ assert v2_ingest.uri == "d-v2-a1b2-c3d4"
+ assert s3_ingest.uri == "d-s3-a1b2-c3d4"
@patch("lambda_function.api_client", autospec=True)
@patch("lambda_function.boto3.session.Session")
@@ -172,8 +177,10 @@ def test_handler_messages_v2(
@patch("lambda_function.Ingest.send_updated_judgment_notification")
@patch("lambda_function.VersionAnnotation")
@patch("lambda_function.modify_filename")
+ @patch("lambda_function.uuid4")
def test_handler_messages_s3(
self,
+ mock_uuid4,
modify_filename,
annotation,
notify_new,
@@ -186,6 +193,7 @@ def test_handler_messages_s3(
boto_session.return_value.client.return_value.download_file = create_fake_bulk_file
doc = apiclient.get_document_by_uri.return_value
doc.neutral_citation = "[2012] UKUT 82 (IAC)"
+ mock_uuid4.return_value = "a1b2-c3d4"
message = s3_message_raw
event = {"Records": [{"Sns": {"Message": message}}, {"Sns": {"Message": message}}]}
@@ -201,7 +209,7 @@ def test_handler_messages_s3(
assert "publishing" in log
assert "Invalid XML file" not in log
assert "No XML file found" not in log
- apiclient.set_published.assert_called_with("ukut/iac/2012/82", True)
+ apiclient.set_published.assert_called_with("d-a1b2-c3d4", True)
assert apiclient.set_published.call_count == 2
notify_new.assert_not_called()
notify_updated.assert_not_called()
@@ -235,66 +243,50 @@ class TestLambda:
)
def test_extract_xml_file_success_tdr(self):
- filename = "TDR-2022-DNWR.xml"
- tar = tarfile.open(
+ with tarfile.open(
self.TDR_TARBALL_PATH,
mode="r",
- )
- result = lambda_function.extract_xml_file(tar, filename)
- xml = ET.XML(result.read())
- assert xml.tag == "{http://docs.oasis-open.org/legaldocml/ns/akn/3.0}akomaNtoso"
+ ) as tar:
+ filename = "TDR-2022-DNWR.xml"
+ result = lambda_function.extract_xml_file(tar, filename)
+ xml = ET.XML(result.read())
+ assert xml.tag == "{http://docs.oasis-open.org/legaldocml/ns/akn/3.0}akomaNtoso"
def test_extract_xml_file_not_found_tdr(self):
- filename = "unknown.xml"
- tar = tarfile.open(
+ with tarfile.open(
self.TDR_TARBALL_PATH,
mode="r",
- )
- result = lambda_function.extract_xml_file(tar, filename)
- assert result is None
+ ) as tar:
+ filename = "unknown.xml"
+ result = lambda_function.extract_xml_file(tar, filename)
+ assert result is None
def test_extract_xml_file_name_empty(self):
- filename = ""
- tar = tarfile.open(
+ with tarfile.open(
self.TDR_TARBALL_PATH,
mode="r",
- )
- result = lambda_function.extract_xml_file(tar, filename)
- assert result is None
+ ) as tar:
+ filename = ""
+ result = lambda_function.extract_xml_file(tar, filename)
+ assert result is None
def test_extract_metadata_success_tdr(self):
- consignment_reference = "TDR-2022-DNWR"
- tar = tarfile.open(
+ with tarfile.open(
self.TDR_TARBALL_PATH,
mode="r",
- )
- result = lambda_function.extract_metadata(tar, consignment_reference)
- assert result["parameters"]["TRE"]["payload"] is not None
+ ) as tar:
+ consignment_reference = "TDR-2022-DNWR"
+ result = lambda_function.extract_metadata(tar, consignment_reference)
+ assert result["parameters"]["TRE"]["payload"] is not None
def test_extract_metadata_not_found_tdr(self):
- consignment_reference = "unknown_consignment_reference"
- tar = tarfile.open(
+ with tarfile.open(
self.TARBALL_MISSING_METADATA_PATH,
mode="r",
- )
- with pytest.raises(lambda_function.FileNotFoundException, match="Consignment Ref:"):
- lambda_function.extract_metadata(tar, consignment_reference)
-
- def test_extract_uri_success(self):
- metadata = {"parameters": {"PARSER": {"uri": "https://caselaw.nationalarchives.gov.uk/id/ewca/civ/2022/111"}}}
- assert lambda_function.extract_uri(metadata, "anything") == "ewca/civ/2022/111"
-
- def test_extract_uri_incompete(self):
- metadata = {"parameters": {"PARSER": {"uri": "https://caselaw.nationalarchives.gov.uk/id/"}}}
- assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"
-
- def test_extract_uri_missing_key(self):
- metadata = {"parameters": {"PARSER": {}}}
- assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"
-
- def test_extract_uri_none(self):
- metadata = {"parameters": {"PARSER": {"uri": None}}}
- assert lambda_function.extract_uri(metadata, "anything") == "failures/anything"
+ ) as tar:
+ consignment_reference = "unknown_consignment_reference"
+ with pytest.raises(lambda_function.FileNotFoundException, match="Consignment Ref:"):
+ lambda_function.extract_metadata(tar, consignment_reference)
def test_extract_docx_filename_success(self):
metadata = {"parameters": {"TRE": {"payload": {"filename": "judgment.docx"}}}}
@@ -504,43 +496,43 @@ def test_send_updated_judgment_notification_with_no_tdr_section(self, mock_print
@patch.object(lambda_function, "store_file")
def test_copy_file_success(self, mock_store_file):
- tar = tarfile.open(
+ with tarfile.open(
self.TDR_TARBALL_PATH,
mode="r",
- )
- filename = "TDR-2022-DNWR/TDR-2022-DNWR.xml"
- session = boto3.Session
- lambda_function.store_file = MagicMock()
- lambda_function.copy_file(tar, filename, "new_filename", "uri", session)
- lambda_function.store_file.assert_called_with(ANY, ANY, ANY, ANY)
+ ) as tar:
+ filename = "TDR-2022-DNWR/TDR-2022-DNWR.xml"
+ session = boto3.Session
+ lambda_function.store_file = MagicMock()
+ lambda_function.copy_file(tar, filename, "new_filename", "uri", session)
+ lambda_function.store_file.assert_called_with(ANY, ANY, ANY, ANY)
def test_copy_file_not_found(self):
- tar = tarfile.open(
+ with tarfile.open(
self.TDR_TARBALL_PATH,
mode="r",
- )
- filename = "does_not_exist.txt"
- session = boto3.Session
- with pytest.raises(lambda_function.FileNotFoundException):
- lambda_function.copy_file(tar, filename, "new_filename", "uri", session)
+ ) as tar:
+ filename = "does_not_exist.txt"
+ session = boto3.Session
+ with pytest.raises(lambda_function.FileNotFoundException):
+ lambda_function.copy_file(tar, filename, "new_filename", "uri", session)
def test_create_xml_contents_success(self):
- tar = tarfile.open(
+ with tarfile.open(
self.TDR_TARBALL_PATH,
mode="r",
- )
- result = lambda_function.create_parser_log_xml(tar)
- assert result == "This is the parser error log."
+ ) as tar:
+ result = lambda_function.create_parser_log_xml(tar)
+ assert result == "This is the parser error log."
@patch.object(tarfile, "open")
def test_create_xml_contents_failure(self, mock_open_tarfile):
- tar = tarfile.open(
+ with tarfile.open(
self.TDR_TARBALL_PATH,
mode="r",
- )
- tar.extractfile = MagicMock(side_effect=KeyError)
- result = lambda_function.create_parser_log_xml(tar)
- assert result == "parser.log not found"
+ ) as tar:
+ tar.extractfile = MagicMock(side_effect=KeyError)
+ result = lambda_function.create_parser_log_xml(tar)
+ assert result == "parser.log not found"
@patch.dict(
os.environ,
diff --git a/pyproject.toml b/pyproject.toml
index 00e2477..a8adaf3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,12 +2,16 @@
line-length = 120
[tool.ruff.lint]
-ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # long lines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
-extend-select = ["W", "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
- "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SLOT", "SIM", "TID", "TCH", "INT", "PTH",
- "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
+ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # longlines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
+extend-select = ["W", "I", "SLF", "SIM"]
+# extend-select = [ "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
+# "ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLOT", "TID", "TCH", "INT", "PTH",
+# "FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
unfixable = ["ERA"]
+[tool.ruff.lint.extend-per-file-ignores]
+"tests.py" = ["S101"] # `assert` is fine in tests
+
# things skipped:
# N: naming, possibly good
# D: docstrings missing throughout
diff --git a/requirements/base.txt b/requirements/base.txt
index 2ff94e4..e7e3b61 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -9,4 +9,3 @@ notifications-python-client~=10.0
mypy-boto3-s3
mypy-boto3-sns
python-dotenv
-sqids