From 8f7d06a70d995a5f7223d4bd432fcf38c2312166 Mon Sep 17 00:00:00 2001
From: Eve-ning <johnchangqi@hotmail.com>
Date: Fri, 24 Nov 2023 12:39:28 +0800
Subject: [PATCH 1/7] Ignore Shell files

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 2c949384..03b064f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,4 +165,5 @@ cython_debug/
 rsc/**/*.tif
 
 **/*/lightning_logs
-*.zip
\ No newline at end of file
+*.zip
+*.sh
\ No newline at end of file

From 6cdee0c29e5d4042dc0b98fce8f0a61f9b46a130 Mon Sep 17 00:00:00 2001
From: Eve-ning <johnchangqi@hotmail.com>
Date: Fri, 24 Nov 2023 12:43:42 +0800
Subject: [PATCH 2/7] Add check for GPU availability

---
 .github/workflows/model.yml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml
index 902848da..df2bfa28 100644
--- a/.github/workflows/model.yml
+++ b/.github/workflows/model.yml
@@ -12,6 +12,7 @@ jobs:
       volumes:
         - /home/runner/work/frdc-ml/_github_home:/root
       env:
+        # This is where setup-python will install and cache the venv
         AGENT_TOOLSDIRECTORY: "/root/venv"
 
     steps:
@@ -34,7 +35,11 @@ jobs:
           pip3 install -r requirements.txt
           pip3 install torch torchvision torchaudio
 
-      - name: Set up gcloud
+      - name: Check torch.cuda.is_available
+        run: |
+          python3 -c 'import torch; torch.cuda.is_available()' && exit 0 || exit 1
+
+      - name: Auth gcloud
         id: 'auth'
         uses: 'google-github-actions/auth@v1'
         with:
@@ -47,10 +52,12 @@ jobs:
         run: |
           echo "WANDB_API_KEY=${{ secrets.WANDB_API_KEY }}" >> $GITHUB_ENV
 
+      # Our project has src as a source path, explicitly add that in.
       - name: Add src as PYTHONPATH
         run: |
           echo "PYTHONPATH=src" >> $GITHUB_ENV
 
+      # Do not do cd as it'll break PYTHONPATH.
       - name: Run Model Training
         run: |
           python3 -m tests.model_tests.chestnut_dec_may.main

From 7e368686ccfbae438566168606e00435cf8400f8 Mon Sep 17 00:00:00 2001
From: Eve-ning <johnchangqi@hotmail.com>
Date: Fri, 24 Nov 2023 12:55:44 +0800
Subject: [PATCH 3/7] Re-cache venv/ and force lightning to use gpu

---
 .github/workflows/model.yml                | 1 +
 tests/model_tests/chestnut_dec_may/main.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml
index df2bfa28..1805699c 100644
--- a/.github/workflows/model.yml
+++ b/.github/workflows/model.yml
@@ -60,6 +60,7 @@ jobs:
       # Do not do cd as it'll break PYTHONPATH.
       - name: Run Model Training
         run: |
+          git config --global --add safe.directory /__w/FRDC-ML/FRDC-ML'
           python3 -m tests.model_tests.chestnut_dec_may.main
 
       - name: Comment results via CML
diff --git a/tests/model_tests/chestnut_dec_may/main.py b/tests/model_tests/chestnut_dec_may/main.py
index 3e2fda79..f33e408f 100644
--- a/tests/model_tests/chestnut_dec_may/main.py
+++ b/tests/model_tests/chestnut_dec_may/main.py
@@ -84,7 +84,7 @@ def train_val_test_split(
     # TODO: Though this is set, the results are still not reproducible.
     deterministic=True,
     # fast_dev_run=True,
-    accelerator="cpu",
+    accelerator="gpu",
     log_every_n_steps=4,
     callbacks=[
         # Stop training if the validation loss doesn't improve for 4 epochs

From a8fb690b475a5a7657785cef3099097d7bbf6793 Mon Sep 17 00:00:00 2001
From: Eve-ning <johnchangqi@hotmail.com>
Date: Fri, 24 Nov 2023 13:03:24 +0800
Subject: [PATCH 4/7] Fix bad shell cmd

---
 .github/workflows/model.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml
index 1805699c..df2bfa28 100644
--- a/.github/workflows/model.yml
+++ b/.github/workflows/model.yml
@@ -60,7 +60,6 @@ jobs:
       # Do not do cd as it'll break PYTHONPATH.
       - name: Run Model Training
         run: |
-          git config --global --add safe.directory /__w/FRDC-ML/FRDC-ML'
           python3 -m tests.model_tests.chestnut_dec_may.main
 
       - name: Comment results via CML

From 36836e8a512a5e1ebc0c2d9265cfb7d6c9e615ea Mon Sep 17 00:00:00 2001
From: Eve-ning <johnchangqi@hotmail.com>
Date: Fri, 24 Nov 2023 13:05:51 +0800
Subject: [PATCH 5/7] Add sleep for debugging

---
 .github/workflows/model.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml
index df2bfa28..53bba981 100644
--- a/.github/workflows/model.yml
+++ b/.github/workflows/model.yml
@@ -60,6 +60,7 @@ jobs:
       # Do not do cd as it'll break PYTHONPATH.
       - name: Run Model Training
         run: |
+          sleep 1000
           python3 -m tests.model_tests.chestnut_dec_may.main
 
       - name: Comment results via CML

From 64c987720e01683b02feb557f0e4171e074ea78c Mon Sep 17 00:00:00 2001
From: Eve-ning <johnchangqi@hotmail.com>
Date: Fri, 24 Nov 2023 13:17:28 +0800
Subject: [PATCH 6/7] Debug session

---
 .github/workflows/model.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml
index 53bba981..fe43ad1a 100644
--- a/.github/workflows/model.yml
+++ b/.github/workflows/model.yml
@@ -57,6 +57,9 @@ jobs:
         run: |
           echo "PYTHONPATH=src" >> $GITHUB_ENV
 
+      - name: Setup tmate session
+        uses: mxschmitt/action-tmate@v3
+
       # Do not do cd as it'll break PYTHONPATH.
       - name: Run Model Training
         run: |

From 9aa4f78fe192c4164badbb0a736b148c3039d2d0 Mon Sep 17 00:00:00 2001
From: Eve-ning <johnchangqi@hotmail.com>
Date: Fri, 24 Nov 2023 13:24:37 +0800
Subject: [PATCH 7/7] Fix GPU not mounted

---
 .github/workflows/model.yml | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml
index fe43ad1a..95d8b953 100644
--- a/.github/workflows/model.yml
+++ b/.github/workflows/model.yml
@@ -14,6 +14,7 @@ jobs:
       env:
         # This is where setup-python will install and cache the venv
         AGENT_TOOLSDIRECTORY: "/root/venv"
+      options: --gpus all
 
     steps:
       - uses: actions/checkout@v3
@@ -35,9 +36,8 @@ jobs:
           pip3 install -r requirements.txt
           pip3 install torch torchvision torchaudio
 
-      - name: Check torch.cuda.is_available
-        run: |
-          python3 -c 'import torch; torch.cuda.is_available()' && exit 0 || exit 1
+      - name: Check CUDA is available
+        run: nvidia-smi
 
       - name: Auth gcloud
         id: 'auth'
@@ -57,13 +57,9 @@ jobs:
         run: |
           echo "PYTHONPATH=src" >> $GITHUB_ENV
 
-      - name: Setup tmate session
-        uses: mxschmitt/action-tmate@v3
-
       # Do not do cd as it'll break PYTHONPATH.
       - name: Run Model Training
         run: |
-          sleep 1000
           python3 -m tests.model_tests.chestnut_dec_may.main
 
       - name: Comment results via CML