Skip to content

Commit

Permalink
Merge pull request #18 from FR-DC/0.0.5
Browse files Browse the repository at this point in the history
0.0.5
  • Loading branch information
Eve-ning authored Nov 23, 2023
2 parents 5fc20cd + 301b4f5 commit 177cbfe
Show file tree
Hide file tree
Showing 15 changed files with 683 additions and 342 deletions.
63 changes: 63 additions & 0 deletions .github/workflows/model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: Model Training

on:
pull_request:

jobs:
build:

runs-on: self-hosted
container:
image: docker://ghcr.io/iterative/cml:0-dvc2-base1-gpu
volumes:
- /home/runner/work/frdc-ml/_github_home:/root
env:
AGENT_TOOLSDIRECTORY: "/root/venv"

steps:
- uses: actions/checkout@v3

- name: Force change owner
run: |
chown -R root: ~
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install via exported requirements.txt
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest poetry
poetry export --with dev --without-hashes -o requirements.txt
pip3 install -r requirements.txt
pip3 install torch torchvision torchaudio
- name: Set up gcloud
id: 'auth'
uses: 'google-github-actions/auth@v1'
with:
credentials_json: '${{ secrets.FRDC_DOWNLOAD_KEY }}'

- name: Set up Cloud SDK
uses: 'google-github-actions/setup-gcloud@v1'

- name: Set up WandB
run: |
echo "WANDB_API_KEY=${{ secrets.WANDB_API_KEY }}" >> $GITHUB_ENV
- name: Add src as PYTHONPATH
run: |
echo "PYTHONPATH=src" >> $GITHUB_ENV
- name: Run Model Training
run: |
python3 -m tests.model_tests.chestnut_dec_may.main
- name: Comment results via CML
run: |
cml comment update \
--target=pr \
--token ${{ secrets.GITHUB_TOKEN }} \
tests/model_tests/chestnut_dec_may/report.md
7 changes: 1 addition & 6 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python CI

on:
Expand Down Expand Up @@ -34,7 +31,7 @@ jobs:
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'

# We don't necessarily need to install the CUDA version of torch, so we'll do with cpu
# We don't necessarily need to install the CUDA version of torch, so we'll do with cpu
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand All @@ -52,5 +49,3 @@ jobs:
- name: Test with pytest
run: |
pytest
Empty file.
881 changes: 565 additions & 316 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pytest = "^7.4.2"
pre-commit = "^3.5.0"
black = "^23.10.0"
flake8 = "^6.1.0"
wandb = "^0.16.0"


[tool.poetry.group.glcm.dependencies]
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

from frdc.train import FRDCDataModule
from frdc.train import FRDCModule
from pipeline.model_tests.chestnut_dec_may.preprocess import preprocess
from pipeline.model_tests.utils import get_dataset
from .preprocess import preprocess
from tests.model_tests.utils import get_dataset

# Get our Test
# TODO: Ideally, we should have a separate dataset for testing.
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
This test is done by training a model on the 20201218 dataset, then testing on
the 20210510 dataset.
"""
from pathlib import Path

import lightning as pl
import numpy as np
Expand All @@ -16,19 +17,30 @@

from frdc.models import FaceNet
from frdc.train import FRDCDataModule, FRDCModule
from pipeline.model_tests.chestnut_dec_may.augmentation import augmentation
from pipeline.model_tests.chestnut_dec_may.preprocess import preprocess
from pipeline.model_tests.utils import get_dataset
from tests.model_tests.chestnut_dec_may.augmentation import augmentation
from tests.model_tests.chestnut_dec_may.preprocess import preprocess
from tests.model_tests.utils import get_dataset
from lightning.pytorch.loggers import WandbLogger
import wandb

assert wandb.run is None

def train_val_test_split(x: TensorDataset) -> list[Dataset, Dataset, Dataset]:
wandb.setup(wandb.Settings(program=__name__, program_relpath=__name__))
run = wandb.init()
logger = WandbLogger(name="chestnut_dec_may", project="frdc")


def train_val_test_split(
x: TensorDataset,
) -> list[Dataset, Dataset, Dataset]:
# Defines how to split the dataset into train, val, test subsets.
# TODO: Quite ugly as it uses the global variables segments_0 and
# segments_1. Will need to refactor this.
return [
Subset(x, list(range(len(segments_0)))),
Subset(
x, list(range(len(segments_0), len(segments_0) + len(segments_1)))
x,
list(range(len(segments_0), len(segments_0) + len(segments_1))),
),
[],
]
Expand All @@ -40,12 +52,13 @@ def train_val_test_split(x: TensorDataset) -> list[Dataset, Dataset, Dataset]:
"chestnut_nature_park", "20210510", "90deg43m85pct255deg/map"
)


# Concatenate the datasets
segments = [*segments_0, *segments_1]
labels = [*labels_0, *labels_1]

BATCH_SIZE = 5
EPOCHS = 100
EPOCHS = 50
LR = 1e-3

# Prepare the datamodule and trainer
Expand All @@ -66,9 +79,12 @@ def train_val_test_split(x: TensorDataset) -> list[Dataset, Dataset, Dataset]:

trainer = pl.Trainer(
max_epochs=EPOCHS,
# fast_dev_run=True,
# Set the seed for reproducibility
# TODO: Though this is set, the results are still not reproducible.
deterministic=True,
# fast_dev_run=True,
accelerator="cpu",
log_every_n_steps=4,
callbacks=[
# Stop training if the validation loss doesn't improve for 4 epochs
Expand All @@ -78,11 +94,13 @@ def train_val_test_split(x: TensorDataset) -> list[Dataset, Dataset, Dataset]:
# Save the best model
ModelCheckpoint(monitor="val_loss", mode="min", save_top_k=1),
],
logger=logger,
)

m = FRDCModule(
# Our model is the "FaceNet" model
# TODO: It's not really the FaceNet model, but a modified version of it.
# TODO: It's not really the FaceNet model,
# but a modified version of it.
model_cls=FaceNet,
model_kwargs=dict(n_out_classes=len(set(labels))),
# We use the Adam optimizer
Expand All @@ -94,3 +112,15 @@ def train_val_test_split(x: TensorDataset) -> list[Dataset, Dataset, Dataset]:
trainer.fit(m, datamodule=dm)
# TODO: Quite hacky, but we need to save the label encoder for prediction.
np.save("le.npy", dm.le.classes_)

report = f"""
# Chestnut Nature Park (Dec 2020 vs May 2021)
[WandB Report]({run.get_url()})
TODO: Authentication for researchers
"""

with open(Path(__file__).parent / "report.md", "w") as f:
f.write(report)


wandb.finish()
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import numpy as np
import torch
from glcm_cupy import Features

# from glcm_cupy import Features
from torchvision.transforms.v2 import Resize

from frdc.models import FaceNet
from frdc.preprocess.glcm_padded import append_glcm_padded_cached

# from frdc.preprocess.glcm_padded import append_glcm_padded_cached
from frdc.preprocess.scale import scale_normal_per_band, scale_0_1_per_band


Expand All @@ -24,16 +26,17 @@ def segment_preprocess(ar: np.ndarray) -> torch.Tensor:

# Add a small epsilon to avoid upper bound of 1.0
ar = scale_0_1_per_band(ar, epsilon=0.001)
ar = append_glcm_padded_cached(
ar,
step_size=7,
bin_from=1,
bin_to=128,
radius=3,
features=(Features.MEAN,),
)
# We can then scale normal for better neural network convergence
# ar = append_glcm_padded_cached(
# ar,
# step_size=7,
# bin_from=1,
# bin_to=128,
# radius=3,
# features=(Features.MEAN,),
# )
# # We can then scale normal for better neural network convergence
ar = scale_normal_per_band(ar)
ar = np.rollaxis(ar, axis=2)

# TODO: Doesn't seem like we have any channel preprocessing here.
# ar = np.stack([
Expand Down
File renamed without changes.

0 comments on commit 177cbfe

Please sign in to comment.