Merge pull request #16 from FR-DC/0.0.4

0.0.4
FR-DC · Nov 6, 2023 · 5fc20cd · 5fc20cd
2 parents 094b645 + 271d93d
commit 5fc20cd
Show file tree

Hide file tree

Showing 39 changed files with 1,766 additions and 229 deletions.
diff --git a/.gitignore b/.gitignore
@@ -164,9 +164,5 @@ cython_debug/
 # Ignores the raw .tif files
 rsc/**/*.tif
 
-# Ignore any secrets files
-.secrets/
-# REMOVE ONLY IF THE SECRET FILES ARE IN .secrets
-*.json
-
-**/*/lightning_logs
+**/*/lightning_logs
+*.zip
diff --git a/Writerside/c.list b/Writerside/c.list
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE categories
+        SYSTEM "https://resources.jetbrains.com/writerside/1.0/categories.dtd">
+<categories>
+    <category id="wrs" name="Writerside documentation" order="1"/>
+</categories>
diff --git a/Writerside/d.tree b/Writerside/d.tree
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE instance-profile
+        SYSTEM
+        "https://resources.jetbrains.com/writerside/1.0/product-profile.dtd">
+
+<instance-profile id="d"
+                  name="Documentation"
+                  start-page="Overview.md">
+
+    <toc-element topic="Overview.md"/>
+    <toc-element topic="Getting-Started.md"/>
+    <toc-element toc-title="Tutorials">
+        <toc-element topic="Retrieve-our-Datasets.md"/>
+    </toc-element>
+    <toc-element toc-title="Model Tests">
+        <toc-element topic="Model-Test-Chestnut-May-Dec.md"/>
+    </toc-element>
+    <toc-element toc-title="API">
+        <toc-element topic="load.dataset.md"/>
+        <toc-element topic="preprocessing.scale.md"/>
+        <toc-element topic="preprocessing.extract_segments.md"/>
+        <toc-element topic="preprocessing.morphology.md"/>
+        <toc-element topic="preprocessing.glcm_padded.md"/>
+        <toc-element topic="train.frdc_lightning.md"/>
+    </toc-element>
+</instance-profile>
diff --git a/Writerside/images/cm-chestnut-maydec.png b/Writerside/images/cm-chestnut-maydec.png
diff --git a/Writerside/images/graph-chestnut-maydec.png b/Writerside/images/graph-chestnut-maydec.png
diff --git a/Writerside/topics/Getting-Started.md b/Writerside/topics/Getting-Started.md
@@ -0,0 +1,191 @@
+# Getting Started
+
+<procedure title="Installing the Dev. Environment" id="install">
+    <step>Ensure that you have the right version of Python.
+        The required Python version can be seen in <code>pyproject.toml</code>
+        <code-block lang="ini">
+            [tool.poetry.dependencies]
+            python = "..."
+        </code-block>
+    </step>
+    <step>Start by cloning our repository.
+        <code-block lang="shell">
+          git clone https://github.com/Forest-Recovery-Digital-Companion/FRDC-ML.git
+        </code-block>
+    </step>
+    <step>Then, create a Python Virtual Env <code>pyvenv</code>
+        <tabs>
+        <tab title="Windows">
+          <code-block lang="shell">python -m venv venv/</code-block>
+        </tab>
+        <tab title="Linux">
+          <code-block lang="shell">python3 -m venv venv/</code-block>
+        </tab>
+        </tabs> 
+    </step>
+    <step>
+        <a href="https://python-poetry.org/docs/">Install Poetry</a>
+        Then check if it's installed with
+        <code-block lang="shell">poetry --version</code-block>
+        <warning>
+        If <code>poetry</code> is not found, it's likely not in the user PATH.
+        </warning>
+    </step>
+    <step>Activate the virtual environment
+        <tabs>
+        <tab title="Windows">
+        <code-block lang="shell">
+            cd venv/Scripts
+            activate
+            cd ../..
+        </code-block>
+        </tab>
+        <tab title="Linux">
+        <code-block lang="shell">
+            source venv/bin/activate
+        </code-block>
+        </tab>
+        </tabs> 
+    </step>
+    <step>Install the dependencies. You should be in the same directory as
+        <code>pyproject.toml</code>
+        <code-block lang="shell">
+            poetry install --with dev
+        </code-block>
+    </step>
+    <step>Install Pre-Commit Hooks
+        <code-block lang="shell">
+            pre-commit install
+        </code-block>
+    </step>
+</procedure>
+
+<procedure title="Setting Up Google Cloud" id="gcloud">
+    <step>
+        We use Google Cloud to store our datasets. To set up Google Cloud,
+        <a href="https://cloud.google.com/sdk/docs/install">
+          install the Google Cloud CLI
+        </a>
+    </step>
+    <step>
+        Then,
+        <a href="https://cloud.google.com/sdk/docs/initializing">
+          authenticate your account
+        </a>.
+        <code-block lang="shell">gcloud auth login</code-block>
+    </step>
+    <step>
+        Finally, 
+        <a href="https://cloud.google.com/docs/authentication/provide-credentials-adc">
+          set up Application Default Credentials (ADC)
+        </a>.
+        <code-block lang="shell">gcloud auth application-default login</code-block>
+    </step>
+    <step>
+        To make sure everything is working, <a anchor="tests">run the tests</a>.
+    </step>
+</procedure>
+
+<procedure title="Pre-commit Hooks" collapsible="true">
+    <note>This is optional but recommended.
+    Pre-commit hooks are a way to ensure that your code is formatted correctly.
+    This is done by running a series of checks before you commit your code.
+    </note>
+    <step>
+        <code-block lang="shell">
+            pre-commit install
+        </code-block>
+    </step>
+</procedure>
+
+<procedure title="Running the Tests" collapsible="true" id="tests">
+    <step>
+        Run the tests to make sure everything is working
+        <code-block lang="shell">
+            pytest
+        </code-block>
+    </step>
+<step>
+    In case of errors:
+    <deflist>
+        <def title="google.auth.exceptions.DefaultCredentialsError">
+            If you get this error, it means that you haven't authenticated your
+            Google Cloud account.
+            See <a anchor="gcloud">Setting Up Google Cloud</a>
+        </def>
+        <def title="ModuleNotFoundError" collapsible="true">
+            If you get this error, it means that you haven't installed the
+            dependencies.
+            See <a anchor="install">Installing the Dev. Environment</a>
+        </def>
+    </deflist>
+</step>
+</procedure>
+
+
+## Our Repository Structure
+
+Before starting development, take a look at our repository structure. This will
+help you understand where to put your code.
+
+```mermaid
+graph LR
+    FRDC -- " Core Dependencies " --> src/frdc/
+    FRDC -- " Resources " --> rsc/
+    FRDC -- " Pipeline " --> pipeline/
+    FRDC -- " Tests " --> tests/
+    FRDC -- " Repo Dependencies " --> pyproject.toml,poetry.lock
+    src/frdc/ -- " Dataset Loaders " --> ./load/
+    src/frdc/ -- " Preprocessing Fn. " --> ./preprocess/
+    src/frdc/ -- " Train Deps " --> ./train/
+    src/frdc/ -- " Model Architectures " --> ./models/
+    rsc/ -- " Datasets ... " --> ./dataset_name/
+    pipeline/ -- " Model Training Pipeline " --> ./model_tests/
+```
+
+src/frdc/
+: Source Code for our package. These are the unit components of our pipeline.
+
+rsc/
+: Resources. These are usually cached datasets
+
+pipeline/
+: Pipeline code. These are the full ML tests of our pipeline.
+
+tests/
+: PyTest tests. These are unit tests & integration tests.
+
+### Unit, Integration, and Pipeline Tests
+
+We have 3 types of tests:
+
+- Unit Tests are usually small, single function tests.
+- Integration Tests are larger tests that tests a mock pipeline.
+- Pipeline Tests are the true production pipeline tests that will generate a
+  model.
+
+### Where Should I contribute?
+
+<deflist>
+<def title="Changing a small component">
+If you're changing a small component, such as a argument for preprocessing,
+a new model architecture, or a new configuration for a dataset, take a look
+at the <code>src/frdc/</code> directory.
+</def>
+<def title="Adding a test">
+By adding a new component, you'll need to add a new test. Take a look at the
+<code>tests/</code> directory.
+</def>
+<def title="Changing the pipeline">
+If you're a ML Researcher, you'll probably be changing the pipeline. Take a
+look at the <code>pipeline/</code> directory.
+</def>
+<def title="Adding a dependency">
+If you're adding a new dependency, use <code>poetry add PACKAGE</code> and
+commit the changes to <code>pyproject.toml</code> and <code>poetry.lock</code>.
+<note>
+    E.g. Adding <code>numpy</code> is the same as 
+    <code>poetry add numpy</code>
+</note>
+</def>
+</deflist>
diff --git a/Writerside/topics/Model-Test-Chestnut-May-Dec.md b/Writerside/topics/Model-Test-Chestnut-May-Dec.md
@@ -0,0 +1,113 @@
+# Model Test Chestnut May-Dec
+
+This test is used to evaluate the model performance on the Chestnut Nature Park
+May & December dataset.
+
+See this script in <code>pipeline/model_tests/chestnut_dec_may/main.py</code>.
+
+## Motivation
+
+The usage of this model will be to classify trees in unseen datasets under
+different conditions. In this test, we'll evaluate it under a different season.
+
+A caveat is that it'll be evaluated on the same set of trees, so it's not a
+representative of a field-test. However, given difficulties of yielding
+datasets, this still gives us a good preliminary idea of how the model will
+perform in different conditions.
+
+## Methodology
+
+We simply train on the December dataset, and test on the May dataset.
+
+```mermaid
+graph LR
+    Model -- Train --> DecDataset
+    Model -- Test --> MayDataset
+```
+
+> The inverse of this test is also plausible.
+
+> Ideally, we should have a Validation set to tune the hyperparameters, but
+> given the limitations of the dataset, we'll skip this step.
+> {style='warning'}
+
+## Model
+
+The current Model used is a simple InceptionV3 Transfer Learning model, with
+the last layer replaced with a fully connected layer(s).
+
+```mermaid
+graph LR
+    Input --> InceptionV3
+    InceptionV3[InceptionV3 Frozen] --> FC["FC Layer(s)"]
+    FC --> Softmax
+    Softmax --> Output
+    Input -- Cross Entropy Loss --> Output
+```
+
+> We didn't find significant evidence of improvements of using a more complex
+> FC layer, so multiple or single FC layer are feasible.
+
+## Preprocessing
+
+We perform the following steps:
+
+```mermaid
+graph v
+    Segment --> Scale[Scale Values to 0-1]
+    Scale --> GLCM[GLCM Step 7, Rad 3, Bin 128, Mean Feature]
+    GLCM --> ScaleNorm[Scale Values to 0 Mean 1 Var]
+    ScaleNorm --> Resize[Resize to 299x299]
+```
+
+> We need to scale to 0-1 before GLCM, so that GLCM can bin the values
+> correctly.
+
+### Augmentation
+
+The following augmentations are used:
+
+```mermaid
+graph >
+    Segment --> HFLip[Horizontal Flip 50%]
+    HFLip --> VFLip[Vertical Flip 50%]
+```
+
+> This only operates on training data.
+
+## Hyperparameters
+
+The following hyperparameters are used:
+
+- Optimizer: Adam
+- Learning Rate: 1e-3
+- Batch Size: 5
+- Epochs: 100
+- Early Stopping: 4
+
+## Results
+
+We yield around 40% accuracy on the test set, compared to around 65% for the
+training set. Raising the training accuracy with a more complex model may
+improve the test accuracy, however, due to instability of our test
+results, we can't be sure of this.
+
+### Result Images {collapsible="true"}
+
+<tabs>
+<tab title="Training Graph">
+<img src="graph-chestnut-maydec.png" alt="graph-chestnut-maydec.png" />
+</tab>
+<tab title="Confusion Matrix">
+<img src="cm-chestnut-maydec.png" alt="cm-chestnut-maydec.png" />
+</tab>
+</tabs>
+
+### Caveats
+
+- The test set is very small, so the results are not very representative.
+- The test set is the same set of trees, so it's not a true test of the model
+  performance in different conditions.
+- There are many classes with 1 sample, so the model may not be able to learn
+  the features of these classes well.
+
diff --git a/Writerside/topics/Overview.md b/Writerside/topics/Overview.md
@@ -0,0 +1,17 @@
+# Overview
+
+Forest Recovery Digital Companion (FRDC) is a ML-assisted companion for
+ecologists to automatically classify surveyed trees via an Unmanned Aerial
+Vehicle (UAV).
+
+This package, FRDC-ML is the Machine Learning backbone of this project, 
+a centralized repository of tools and model architectures to be used in the
+FRDC pipeline.
+
+[**Get started here**](Getting-Started.md)
+
+## Other Projects
+
+FRDC-UI
+: [The User Interface Repository](https://github.com/Forest-Recovery-Digital-Companion/FRDC-UI/) 
+for FRDC, a WebApp GUI for ecologists to adjust annotations.