Skip to content

Commit

Permalink
Add pre-commit config
Browse files Browse the repository at this point in the history
  • Loading branch information
dragon-dxw committed Nov 27, 2023
1 parent e3cf4ab commit a93566e
Show file tree
Hide file tree
Showing 16 changed files with 449 additions and 330 deletions.
52 changes: 52 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
exclude: "^docs/|/migrations/"
default_install_hook_types: [pre-commit, pre-push]

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml

- repo: https://github.com/psf/black
rev: 23.11.0
hooks:
- id: black

- repo: https://github.com/Riverside-Healthcare/djLint
rev: v1.34.0
hooks:
- id: djlint-django
name: djLint Reformatting for Django
entry: djlint --profile=django --reformat
types_or: [html]
language: python

- repo: https://github.com/Riverside-Healthcare/djLint
rev: v1.34.0
hooks:
- id: djlint-django
name: djLint Checks for Django
entry: djlint --profile=django
types_or: [html]
language: python

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.1.0
hooks:
- id: prettier
types_or: [scss, yaml, markdown, javascript, xml]

- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.6
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]

# sets up .pre-commit-ci.yaml to ensure pre-commit dependencies stay up to date
ci:
autoupdate_schedule: weekly
skip: []
submodules: false
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ The file layout is explained in the [project layout documentation](https://githu
1. Install `gradle`. On MacOS, you can use `brew install gradle`.

2. If you're running against anything other than development, copy `gradle-development.properties`
to `gradle-{environment}.properties` and set the credentials and hostname for your Marklogic server.
to `gradle-{environment}.properties` and set the credentials and hostname for your Marklogic server.

## Deployment

Expand Down Expand Up @@ -93,10 +93,9 @@ Two gradle tasks are available for bulk management of documents in a database us
necessary to use, but are provided in order to automate some development tasks and provide
examples for future data migrations.


* `gradle manageAllDocuments`: Enables version management for all documents
* `gradle publishAllDocuments`: Sets the `published` flag for all documents
* `gradle addAllDocumentsToJudgmentsCollection`: Adds all documents to the 'judgments' collection.
- `gradle manageAllDocuments`: Enables version management for all documents
- `gradle publishAllDocuments`: Sets the `published` flag for all documents
- `gradle addAllDocumentsToJudgmentsCollection`: Adds all documents to the 'judgments' collection.

### Loading data from a backup on S3 (deprecated)

Expand Down
3 changes: 2 additions & 1 deletion development_scripts/populate_from_caselaw.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
print("got xml")

response = requests.put(
f"http://admin:admin@localhost:8011/LATEST/documents?uri={ml_url}", data=xml
f"http://admin:admin@localhost:8011/LATEST/documents?uri={ml_url}",
data=xml,
)
response.raise_for_status()
print("added to local Marklogic db")
36 changes: 25 additions & 11 deletions development_scripts/populate_top_judgments_and_neighbours.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
- Run the script.
"""

import requests
import re
from urllib.parse import quote
from xml.etree import ElementTree as ET

import requests
from bs4 import BeautifulSoup
import re

urls = [
"ewca/civ/2022/1146",
Expand All @@ -27,35 +28,49 @@
"ewca/civ/2022/1047",
"ewfc/2023/46",
"ewfc/2022/95",
"ewhc/admin/2006/815"
"ewhc/admin/2006/815",
]


def get_judgment_xml(url):
print("Getting judgment: %s" % url)
response = requests.get(f"https://caselaw.nationalarchives.gov.uk/{url}/data.xml")
response.raise_for_status()
return response.content


def save_judgment_xml(url, xml):
print("Saving judgment: %s" % url);
print("Saving judgment: %s" % url)
ml_url = f"/{url}.xml"
response = requests.put(
f"http://admin:admin@localhost:8011/LATEST/documents?uri={ml_url}", data=xml
f"http://admin:admin@localhost:8011/LATEST/documents?uri={ml_url}",
data=xml,
)
response.raise_for_status()


def get_neighbours_for_judgment(xml):
tree = ET.fromstring(xml)
ns = {'akn': 'http://docs.oasis-open.org/legaldocml/ns/akn/3.0'}
title = tree.find("./akn:judgment/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname", ns).attrib["value"]
ns = {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0"}
title = tree.find(
"./akn:judgment/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname",
ns,
).attrib["value"]
print("Getting neighbours for judgment title: %s" % title)
search_url = "https://caselaw.nationalarchives.gov.uk/judgments/results?query=" + quote(title)
search_url = (
"https://caselaw.nationalarchives.gov.uk/judgments/results?query="
+ quote(title)
)
search_results = requests.get(search_url)
search_soup = BeautifulSoup(search_results.content, 'html.parser')
neighbours = list(re.sub("^\/", "", a["href"]) for a in search_soup.select(".judgment-listing__title a"))
search_soup = BeautifulSoup(search_results.content, "html.parser")
neighbours = list(
re.sub(r"^\/", "", a["href"])
for a in search_soup.select(".judgment-listing__title a")
)
print("... found %s" % len(neighbours))
return neighbours


found = set()
for url in urls:
xml = get_judgment_xml(url)
Expand All @@ -70,4 +85,3 @@ def get_neighbours_for_judgment(xml):
print("Skipping already imported judgment %s" % url2)
print(f"**** {url} and close title matches added to local Marklogic db ****")
print("DONE. Imported %s judgments." % len(found))

2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: '3.1'
version: "3.1"

services:
marklogic:
Expand Down
54 changes: 54 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
[tool.poetry]
name = "ds-caselaw-marklogic"
version = "0.1.0"
description = ""
authors = ["David McKee <[email protected]>"]
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.9"
saxonche = "^12.3.0"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"


[tool.ruff]
ignore = ["E501", "G004", "PLR2004", "RUF005", "RUF012", "UP040"] # long lines, fstrings in logs, magic values, consider not concat, mutable classbits, type instead of TypeAlias
extend-select = ["W", "B", "Q", "C90", "I", "UP", "YTT", "ASYNC", "S", "BLE", "A", "COM", "C4", "DTZ", "T10", "DJ", "EM", "EXE", "FA",
"ISC", "ICN", "G", "INP", "PIE", "T20", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SLOT", "SIM", "TID", "TCH", "INT", "PTH",
"FIX", "PGH", "PL", "TRY", "FLY", "PERF", "RUF"]
unfixable = ["ERA"]

# things skipped:
# N: naming, possibly good
# D: docstrings missing throughout
# ANN: annotations missing throughout
# FBT: not convinced boolean trap worth auto-banning.
# CPY: copyright at top of each file
# G: logging warnings -- fstrings bad?
# ARG: sometimes you need to accept arguments.
# TD: somewhat finicky details about formatting TODOs
# FIX: flags todos: possible to add -- skipped for now
# ERA: lots of false positives, not a good autofix
# PD, NPY, AIR: ignored, panda / numpy / airflow specific
# FURB: not yet out of preview



[tool.ruff.extend-per-file-ignores]
"*" = ["RET505", # disagree with if X: return Y else: return Z being wrong
"T201", # print
"S113", # requests no timeout : TODO
]
"tests/*" = ["S101"] # assert fine in tests
"development_scripts/populate_top_judgments_and_neighbours.py" = ["S314", "C400"] # TODO



[tool.ruff.isort]
known-first-party = ["ds-caselaw-editor-ui", "config"]

[tool.ruff.pycodestyle]
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"role-name" : "caselaw-nobody",
"description" : "Unauthenticated user",
"role" : [ ]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
"action" : "http://marklogic.com/xdmp/privileges/xdbc-invoke",
"kind" : "execute"
} ]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
"action" : "http://marklogic.com/xdmp/privileges/unprotected-collections",
"kind" : "execute"
} ]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
"action" : "http://marklogic.com/xdmp/privileges/xdmp-eval-in",
"kind" : "execute"
} ]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
"action": "https://caselaw.nationalarchives.gov.uk/custom/privileges/can-view-unpublished-documents"
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"role-name" : "priv-api-writer",
"description" : "Can view documents, including unpublished documents, and edit",
"role" : [ "caselaw-writer", "caselaw-unpublished-reader" ]
}
}
Loading

0 comments on commit a93566e

Please sign in to comment.