Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

style: use ruff for formatting/linting #293

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions .flake8

This file was deleted.

14 changes: 14 additions & 0 deletions .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,17 @@ jobs:
run: |
python -m pip install pipenv
pipenv install --skip-lock # this is what Elastic beanstalk uses
lint:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4

- name: Install dependencies
run: python3 -m pip install '.[dev]'

- name: Check style
run: python3 -m ruff check . && ruff format --check .
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,11 @@ analysis/graph/*.ipynb

# Build files
Pipfile.lock
pyproject.toml

# DynamoDB
dynamodb_local_latest/

# Zip
*.zip

notebooks
notebooks
18 changes: 11 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v1.4.0
hooks:
- id: flake8
additional_dependencies: [flake8-docstrings]
- id: check-added-large-files
args: ['--maxkb=1024']
exclude: ^tests/data
- id: detect-private-key
- id: check-added-large-files
args: ['--maxkb=1024']
exclude: ^tests/data
- id: detect-private-key
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.14
hooks:
- id: ruff-format
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
59 changes: 33 additions & 26 deletions analysis/civic/examples/harvester/civic_harvester_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,35 @@
def create_evidence_examples(data):
"""Create five CIViC evidence examples."""
evidence_items = list()
for i in range(len(data['evidence'])):
if data['evidence'][i]['assertions']:
evidence_items.append(data['evidence'][i])
for i in range(len(data["evidence"])):
if data["evidence"][i]["assertions"]:
evidence_items.append(data["evidence"][i])
if len(evidence_items) == 6:
break

for evidence_item in evidence_items:
variant_id = evidence_item['variant_id']
gene_id = evidence_item['gene_id']
assertions = evidence_item['assertions']
variant_id = evidence_item["variant_id"]
gene_id = evidence_item["gene_id"]
assertions = evidence_item["assertions"]

for v in data['variants']:
if v['id'] == variant_id:
for v in data["variants"]:
if v["id"] == variant_id:
variant = v

for g in data['genes']:
if g['id'] == gene_id:
for g in data["genes"]:
if g["id"] == gene_id:
gene = g

with open(f"{PROJECT_ROOT}/analysis/civic/examples/harvester/"
f"{evidence_item['name']}.json", 'w+') as f:
with open(
f"{PROJECT_ROOT}/analysis/civic/examples/harvester/"
f"{evidence_item['name']}.json",
"w+",
) as f:
example = {
'EVIDENCE': evidence_item,
'GENE': gene,
'VARIANT': variant,
'ASSERTIONS': assertions
"EVIDENCE": evidence_item,
"GENE": gene,
"VARIANT": variant,
"ASSERTIONS": assertions,
}

json.dump(example, f, indent=4)
Expand All @@ -45,26 +48,30 @@ def create_variant_examples(data):
"""
variants_ids = [12, 1, 221, 190]
variants = list()
for i in range(len(data['variants'])):
if data['variants'][i]['id'] in variants_ids:
variants.append(data['variants'][i])
for i in range(len(data["variants"])):
if data["variants"][i]["id"] in variants_ids:
variants.append(data["variants"][i])

for variant in variants:
with open(f"{PROJECT_ROOT}/analysis/civic/examples/harvester/"
f"{variant['name'].lower()}.json", 'w+') as f:
variant['evidence_items'] = variant['evidence_items'][0]
with open(
f"{PROJECT_ROOT}/analysis/civic/examples/harvester/"
f"{variant['name'].lower()}.json",
"w+",
) as f:
variant["evidence_items"] = variant["evidence_items"][0]
f.write(json.dumps(variant, indent=4))


if __name__ == '__main__':
if __name__ == "__main__":
c = CIViCHarvester()
c.harvest()
latest = sorted((APP_ROOT / "data" / "civic" / "harvester").glob("civic_harvester_*.json"))[-1] # noqa: E501
latest = sorted(
(APP_ROOT / "data" / "civic" / "harvester").glob("civic_harvester_*.json")
)[-1]
with open(latest, "r") as f:
civic_data = json.load(f)

civic_ex_dir =\
PROJECT_ROOT / 'analysis' / 'civic' / 'examples' / 'harvester'
civic_ex_dir = PROJECT_ROOT / "analysis" / "civic" / "examples" / "harvester"
civic_ex_dir.mkdir(exist_ok=True, parents=True)

create_evidence_examples(civic_data)
Expand Down
111 changes: 61 additions & 50 deletions analysis/civic/examples/transform/civic_transform_example.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,29 @@
"""Create an example json file for CIViC Transform."""
import json

from metakb import PROJECT_ROOT, APP_ROOT
from metakb import APP_ROOT, PROJECT_ROOT
from metakb.transform import CIViCTransform


def create_civic_example(civic_data):
"""Create CIViC transform examples from list of evidence items."""
ex = {
'statements': [],
'propositions': [],
'variation_descriptors': [],
'gene_descriptors': [],
'therapy_descriptors': [],
'disease_descriptors': [],
'methods': [],
'documents': []
"statements": [],
"propositions": [],
"variation_descriptors": [],
"gene_descriptors": [],
"therapy_descriptors": [],
"disease_descriptors": [],
"methods": [],
"documents": [],
}
supported_by_statement_ids = set()
for s in civic_data['statements']:
if s['id'] == 'civic.aid:6':
supported_by_statement_ids = \
{s for s in s['supported_by'] if s.startswith('civic.eid')}
supported_by_statement_ids.add(s['id'])
for s in civic_data["statements"]:
if s["id"] == "civic.aid:6":
supported_by_statement_ids = {
s for s in s["supported_by"] if s.startswith("civic.eid")
}
supported_by_statement_ids.add(s["id"])
break

proposition_ids = set()
Expand All @@ -32,56 +33,66 @@ def create_civic_example(civic_data):
gids = set()
methods = set()
documents = set()
for s in civic_data['statements']:
if s['id'] in supported_by_statement_ids:
ex['statements'].append(s)
proposition_ids.add(s['proposition'])
vids.add(s['variation_descriptor'])
tids.add(s['therapy_descriptor'])
dids.add(s['disease_descriptor'])
methods.add(s['method'])
documents.update({d for d in s['supported_by'] if
not d.startswith('civic.eid')})
for s in civic_data["statements"]:
if s["id"] in supported_by_statement_ids:
ex["statements"].append(s)
proposition_ids.add(s["proposition"])
vids.add(s["variation_descriptor"])
tids.add(s["therapy_descriptor"])
dids.add(s["disease_descriptor"])
methods.add(s["method"])
documents.update(
{d for d in s["supported_by"] if not d.startswith("civic.eid")}
)

for p in civic_data['propositions']:
if p['id'] in proposition_ids:
ex['propositions'].append(p)
for p in civic_data["propositions"]:
if p["id"] in proposition_ids:
ex["propositions"].append(p)

for v in civic_data['variation_descriptors']:
if v['id'] in vids:
ex['variation_descriptors'].append(v)
gids.add(v['gene_context'])
for v in civic_data["variation_descriptors"]:
if v["id"] in vids:
ex["variation_descriptors"].append(v)
gids.add(v["gene_context"])

for t in civic_data['therapy_descriptors']:
if t['id'] in tids:
ex['therapy_descriptors'].append(t)
for t in civic_data["therapy_descriptors"]:
if t["id"] in tids:
ex["therapy_descriptors"].append(t)

for d in civic_data['disease_descriptors']:
if d['id'] in dids:
ex['disease_descriptors'].append(d)
for d in civic_data["disease_descriptors"]:
if d["id"] in dids:
ex["disease_descriptors"].append(d)

for g in civic_data['gene_descriptors']:
if g['id'] in gids:
ex['gene_descriptors'].append(g)
for g in civic_data["gene_descriptors"]:
if g["id"] in gids:
ex["gene_descriptors"].append(g)

for m in civic_data['methods']:
if m['id'] in methods:
ex['methods'].append(m)
for m in civic_data["methods"]:
if m["id"] in methods:
ex["methods"].append(m)

for d in civic_data['documents']:
if d['id'] in documents:
ex['documents'].append(d)
for d in civic_data["documents"]:
if d["id"] in documents:
ex["documents"].append(d)

with open(PROJECT_ROOT / "analysis" / "civic" / "examples" / # noqa: W504
"transform" / "civic_cdm_example.json", 'w+') as f2:
with open(
PROJECT_ROOT
/ "analysis"
/ "civic"
/ "examples"
/ "transform"
/ "civic_cdm_example.json",
"w+",
) as f2:
json.dump(ex, f2, indent=4)


if __name__ == '__main__':
if __name__ == "__main__":
civic = CIViCTransform()
civic.transform()
civic.create_json()
latest = sorted((APP_ROOT / "data" / "civic" / "transform").glob("civic_cdm_*.json"))[-1] # noqa: E501
latest = sorted(
(APP_ROOT / "data" / "civic" / "transform").glob("civic_cdm_*.json")
)[-1]
with open(latest, "r") as f:
civic_data = json.load(f)
create_civic_example(civic_data)
10 changes: 5 additions & 5 deletions analysis/graph/db_helper.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
"""Utility function to load/reload graph for development."""
from metakb.database import Graph
from metakb import APP_ROOT
import json

from metakb import APP_ROOT
from metakb.database import Graph

g = Graph(uri="bolt://localhost:7687", credentials=("neo4j", "admin"))
g.clear()

fpath = APP_ROOT / 'data' / 'civic' / 'transform' / 'civic_cdm.json'
with open(fpath, 'r') as f:
fpath = APP_ROOT / "data" / "civic" / "transform" / "civic_cdm.json"
with open(fpath, "r") as f:
items = json.load(f)

count = 0
for item in items:
if 'assertion' in item.keys():
if "assertion" in item.keys():
continue
else:
g.add_transformed_data(item)
Expand Down
2 changes: 2 additions & 0 deletions analysis/graph/missing_diseases_counts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
TALL and T-Cell Acute Lymphoid Leukemia, 6
T-Cell Acute Lymphoid Leukemia, 6
Loading
Loading