Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ci updates #7

Merged
merged 23 commits into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
11c579e
remove test env file to prepare for dynamic test env generation
philippguevorguian Feb 21, 2024
ae19952
remove breaking flash attention and conda-pack deps
philippguevorguian Feb 21, 2024
aa64b67
Merge branch 'main' into CI_updates
philippguevorguian Feb 21, 2024
5bc7dbb
print test information
philippguevorguian Feb 21, 2024
634a94c
add linting for github actions to pre commit
philippguevorguian Feb 23, 2024
6badeb7
run unit tests in CI via github actions
philippguevorguian Feb 23, 2024
a3dc04b
add error handling to json line loading
philippguevorguian Feb 23, 2024
6933f21
expand example unit test coverage
philippguevorguian Feb 23, 2024
8a36b75
set output directory to correspond to experiment hash
philippguevorguian Feb 23, 2024
b8494ba
remove redundant requirements file
philippguevorguian Feb 26, 2024
5f823b4
add test status
philippguevorguian Feb 26, 2024
3dbdc39
Merge branch 'main' into CI_updates
philippguevorguian Feb 26, 2024
c519710
add test status
philippguevorguian Feb 26, 2024
8677627
reintroduce separate test environment file
philippguevorguian Feb 26, 2024
cc3f3f8
add test status
philippguevorguian Feb 26, 2024
0720d06
specify project name for pip install
philippguevorguian Feb 26, 2024
a64289f
add test status
philippguevorguian Feb 26, 2024
ebe98ea
update test environment name
philippguevorguian Feb 26, 2024
a03d3a5
add test status
philippguevorguian Feb 26, 2024
d079a83
explicitly activate conda env before steps which require it
philippguevorguian Feb 26, 2024
6d7e416
add test status
philippguevorguian Feb 26, 2024
86eb89a
add default shell
philippguevorguian Feb 26, 2024
d18fb6c
add test status
philippguevorguian Feb 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ jobs:
test:
name: Test
runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}

steps:
- uses: actions/checkout@v4
Expand All @@ -21,12 +24,33 @@ jobs:
with:
python-version: '3.10'

# - name: Remove flash-attn dependency
# run: |
# sed -i '/flash-attn/d' environment.yml

# - name: Remove conda-pack dependency
# run: |
# sed -i '/conda-pack/d' environment.yml

# - name: Remove chemlactica dependency
# run: |
# sed -i '/chemlactica/d' environment.yml

- name: Set up Conda
uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
environment-file: test_environment.yml
activate-environment: testenv
auto-update-conda: true

- name: Install local chemlactica package within Conda environment
run: |
pip install . # Install dependencies within the Conda environment

- name: Run unit tests
run: |
python3 confirm_tests.py --run unit

- name: list commits on PR
run: |
response=$(curl --request GET \
Expand Down
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ repos:
rev: 6.0.0
hooks:
- id: flake8

- repo: https://github.com/rhysd/actionlint
rev: v1.6.26
hooks:
- id: actionlint
1 change: 1 addition & 0 deletions chemlactica/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def train(

broadcast_object_list(experiment_hash_list)
print(f"Process {accelerator.process_index} aim hash: {experiment_hash_list[0]}")
experiment_hash = experiment_hash_list[0]

if not valid_batch_size:
valid_batch_size = train_batch_size
Expand Down
14 changes: 8 additions & 6 deletions chemlactica/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@


def load_jsonl_line(jsonl_line):
_maybe_compound_dict = json.loads(jsonl_line)
if isinstance(_maybe_compound_dict, dict):
return _maybe_compound_dict
else:
return json.loads(_maybe_compound_dict)
return json.loads(jsonl_line)
try:
_maybe_compound_dict = json.loads(jsonl_line)
if isinstance(_maybe_compound_dict, dict):
return _maybe_compound_dict
else:
return json.loads(_maybe_compound_dict)
except json.JSONDecodeError as e:
raise ValueError(f"Error decoding JSON: {e}")


def generate_assay_docs(examples, train_config):
Expand Down
24 changes: 16 additions & 8 deletions confirm_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,19 @@ class TestType(enum.Enum):
INTEGRATION = "integration"


def print_test_details(unit_test_result):
print(f"\nTotal Tests: {unit_test_result.testsRun}")
print(f"Failures: {len(unit_test_result.failures)}")
print(f"Errors: {len(unit_test_result.errors)}")
print(f"Skipped: {len(unit_test_result.skipped)}")
print(f"Successful: {unit_test_result.wasSuccessful()}")
if unit_test_result.failures or unit_test_result.errors:
print("\nDetails about failures and errors:")
for failure in unit_test_result.failures + unit_test_result.errors:
print(f"\nTest: {failure[0]}")
print(f"Details: {failure[1]}")


def write_test_status(
git_commit_hash: str, status: str = "FAIL", file_name: str = "test_status"
):
Expand All @@ -31,16 +44,11 @@ def run_unit_tests():
loader = unittest.TestLoader()
# Discover and load unit tests
unit_test_suite = loader.discover("unit_tests", pattern="*test*")
for test in unit_test_suite:
print(test)

# Run the unit tests
runner = unittest.TextTestRunner()
runner = unittest.TextTestRunner(failfast=False, verbosity=2)
result = runner.run(unit_test_suite)
if result.wasSuccessful():
print("All tests passed!")
else:
print("Some tests failed.")
print_test_details(result)


if __name__ == "__main__":
Expand Down Expand Up @@ -87,7 +95,7 @@ def run_unit_tests():
confirm = args.confirm
gpus = args.gpus
if run is not None:
match run:
match (run):
case TestType.UNIT:
run_unit_tests()
case TestType.INTEGRATION:
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ description = "Language modelling for chemistry by YerevanN"
readme = "README.md"
requires-python = ">=3.8"
license = {text = "MIT"}

[tool.setuptools]
packages = ["chemlactica"]
112 changes: 0 additions & 112 deletions requirements.txt

This file was deleted.

2 changes: 1 addition & 1 deletion test_environment.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: test_cl11.8_t_4.37
name: testenv
channels:
- pytorch
- nvidia
Expand Down
2 changes: 1 addition & 1 deletion test_status.yaml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
fe40dade26e27de4bd050161752291203ce9d39a: PASS
86eb89a6651607c56835456eb9d2f0ae6cd222cc: PASS
50 changes: 49 additions & 1 deletion unit_tests/test_something.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,55 @@
import unittest
import torch
from chemlactica.utils.dataset_utils import load_jsonl_line
from chemlactica.utils.dataset_utils import group_texts
from unittest.mock import Mock


class TestDataProcessing(unittest.TestCase):
def test_something(self):
def test_positive(self):
result = 2 + 2
self.assertEqual(result, 4, "Expected result: 4")


class TestLoadJsonlLine(unittest.TestCase):
# Can load a valid JSONL line as a dictionary
def test_load_valid_jsonl_line_as_dict(self):
jsonl_line = """{"key": "value"}"""
loaded_line = load_jsonl_line(jsonl_line)
assert load_jsonl_line(jsonl_line) == loaded_line

# Returns None when given an empty string
def test_raise_value_error_empyty_line(self):
jsonl_line = ""
with self.assertRaises(ValueError):
load_jsonl_line(jsonl_line)


class TestGroupTexts(unittest.TestCase):
def test_empty_attention_mask(self):
# Mock the get_tokenizer function
mocker = Mock()
mocker.eos_token_id = 0
mocker.return_value = mocker

# Create example input tensors
examples = {"input_ids": [torch.tensor([1, 2, 3])], "attention_mask": []}

# Set train_config
train_config = {"tokenizer_path": "path/to/tokenizer", "block_size": 3}

# Call the group_texts function
with self.assertRaises(Exception):
group_texts(examples, train_config)

# def test_splits_into_correct_size_chunks(self):
# mocker = Mock()
# mocker.eos_token_id = 0
# mocker.return_value = mocker
# train_config = {"block_size": 2037}
# examples = {"input_ids": [torch.tensor([1, 2, 3])], "attention_mask": []}

# self.assertTrue(all(len(ids) ==
# train_config["block_size"] for ids in result["input_ids"]))
# self.assertTrue(all(len(mask) ==
# train_config["block_size"] for mask in result["attention_mask"]))
Loading