From 8eb93b6c28dfa5b7c0539a14d29947870f99e1e3 Mon Sep 17 00:00:00 2001 From: Evening Date: Thu, 26 Oct 2023 15:07:44 +0800 Subject: [PATCH] Update docs --- docs/HelpTOC.json | 2 +- docs/Map.jhm | 2 +- docs/getting-started.html | 18 +++++----- docs/load-dataset.html | 8 ++--- docs/model-test-chestnut-may-dec.html | 2 +- docs/overview.html | 2 +- docs/preprocessing-extract-segments.html | 16 ++++----- docs/preprocessing-glcm-padded.html | 4 +-- docs/preprocessing-morphology.html | 6 ++-- docs/preprocessing-scale.html | 4 +-- docs/retrieve-our-datasets.html | 42 ++++++++++++++++++++++++ docs/train-frdc-lightning.html | 4 +-- 12 files changed, 76 insertions(+), 34 deletions(-) create mode 100644 docs/retrieve-our-datasets.html diff --git a/docs/HelpTOC.json b/docs/HelpTOC.json index 9596ec5b..322920fe 100644 --- a/docs/HelpTOC.json +++ b/docs/HelpTOC.json @@ -1 +1 @@ -{"entities":{"pages":{"Overview":{"id":"Overview","title":"Overview","url":"overview.html","level":0,"tabIndex":0},"Getting-Started":{"id":"Getting-Started","title":"Getting Started","url":"getting-started.html","level":0,"tabIndex":1},"Model-Test-Chestnut-May-Dec":{"id":"Model-Test-Chestnut-May-Dec","title":"Model Test Chestnut May-Dec","url":"model-test-chestnut-may-dec.html","level":0,"tabIndex":2},"load.dataset":{"id":"load.dataset","title":"load.dataset","url":"load-dataset.html","level":0,"tabIndex":3},"preprocessing.scale":{"id":"preprocessing.scale","title":"preprocessing.scale","url":"preprocessing-scale.html","level":0,"tabIndex":4},"preprocessing.extract_segments":{"id":"preprocessing.extract_segments","title":"preprocessing.extract_segments","url":"preprocessing-extract-segments.html","level":0,"tabIndex":5},"preprocessing.morphology":{"id":"preprocessing.morphology","title":"preprocessing.morphology","url":"preprocessing-morphology.html","level":0,"tabIndex":6},"preprocessing.glcm_padded":{"id":"preprocessing.glcm_padded","title":"preprocessing.glcm_padded","url":"preprocessing-glcm-padded.html","level":0,"tabIndex":7},"train.frdc_lightning":{"id":"train.frdc_lightning","title":"train.frdc_datamodule \u0026 frdc_module","url":"train-frdc-lightning.html","level":0,"tabIndex":8}}},"topLevelIds":["Overview","Getting-Started","Model-Test-Chestnut-May-Dec","load.dataset","preprocessing.scale","preprocessing.extract_segments","preprocessing.morphology","preprocessing.glcm_padded","train.frdc_lightning"]} \ No newline at end of file +{"entities":{"pages":{"Overview":{"id":"Overview","title":"Overview","url":"overview.html","level":0,"tabIndex":0},"Getting-Started":{"id":"Getting-Started","title":"Getting Started","url":"getting-started.html","level":0,"tabIndex":1},"87c6272d_78682":{"id":"87c6272d_78682","title":"Tutorials","level":0,"pages":["Retrieve-our-Datasets"],"tabIndex":2},"Retrieve-our-Datasets":{"id":"Retrieve-our-Datasets","title":"Retrieve our Datasets","url":"retrieve-our-datasets.html","level":1,"parentId":"87c6272d_78682","tabIndex":0},"87c6272d_78684":{"id":"87c6272d_78684","title":"Model Tests","level":0,"pages":["Model-Test-Chestnut-May-Dec"],"tabIndex":3},"Model-Test-Chestnut-May-Dec":{"id":"Model-Test-Chestnut-May-Dec","title":"Model Test Chestnut May-Dec","url":"model-test-chestnut-may-dec.html","level":1,"parentId":"87c6272d_78684","tabIndex":0},"87c6272d_78686":{"id":"87c6272d_78686","title":"API","level":0,"pages":["load.dataset","preprocessing.scale","preprocessing.extract_segments","preprocessing.morphology","preprocessing.glcm_padded","train.frdc_lightning"],"tabIndex":4},"load.dataset":{"id":"load.dataset","title":"load.dataset","url":"load-dataset.html","level":1,"parentId":"87c6272d_78686","tabIndex":0},"preprocessing.scale":{"id":"preprocessing.scale","title":"preprocessing.scale","url":"preprocessing-scale.html","level":1,"parentId":"87c6272d_78686","tabIndex":1},"preprocessing.extract_segments":{"id":"preprocessing.extract_segments","title":"preprocessing.extract_segments","url":"preprocessing-extract-segments.html","level":1,"parentId":"87c6272d_78686","tabIndex":2},"preprocessing.morphology":{"id":"preprocessing.morphology","title":"preprocessing.morphology","url":"preprocessing-morphology.html","level":1,"parentId":"87c6272d_78686","tabIndex":3},"preprocessing.glcm_padded":{"id":"preprocessing.glcm_padded","title":"preprocessing.glcm_padded","url":"preprocessing-glcm-padded.html","level":1,"parentId":"87c6272d_78686","tabIndex":4},"train.frdc_lightning":{"id":"train.frdc_lightning","title":"train.frdc_datamodule \u0026 frdc_module","url":"train-frdc-lightning.html","level":1,"parentId":"87c6272d_78686","tabIndex":5}}},"topLevelIds":["Overview","Getting-Started","87c6272d_78682","87c6272d_78684","87c6272d_78686"]} \ No newline at end of file diff --git a/docs/Map.jhm b/docs/Map.jhm index 3785e5e0..2de2b668 100644 --- a/docs/Map.jhm +++ b/docs/Map.jhm @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/getting-started.html b/docs/getting-started.html index 64c56e7f..ddf5b5e8 100644 --- a/docs/getting-started.html +++ b/docs/getting-started.html @@ -1,20 +1,20 @@ - Getting Started | Documentation

Documentation 0.0.4 Help

Getting Started

Installing the Dev. Environment

  1. Ensure that you have the right version of Python. The required Python version can be seen in pyproject.toml

    + Getting Started | Documentation

    Documentation 0.0.4 Help

    Getting Started

    Installing the Dev. Environment

    1. Ensure that you have the right version of Python. The required Python version can be seen in pyproject.toml

      [tool.poetry.dependencies] python = "..." -
    2. Start by cloning our repository.

      +
    3. Start by cloning our repository.

      git clone https://github.com/Forest-Recovery-Digital-Companion/FRDC-ML.git -
    4. Then, create a Python Virtual Env pyvenv

      python -m venv venv/
      python3 -m venv venv/
    5. Install Poetry Then check if it's installed with

      poetry --version
    6. Activate the virtual environment

      +
    7. Then, create a Python Virtual Env pyvenv

      python -m venv venv/
      python3 -m venv venv/
    8. Install Poetry Then check if it's installed with

      poetry --version
    9. Activate the virtual environment

      cd venv/Scripts activate cd ../.. -
      +
      source venv/bin/activate -
    10. Install the dependencies. You should be in the same directory as pyproject.toml

      +
  2. Install the dependencies. You should be in the same directory as pyproject.toml

    poetry install --with dev -
  3. Install Pre-Commit Hooks

    +
  4. Install Pre-Commit Hooks

    pre-commit install -

Setting Up Google Cloud

  1. We use Google Cloud to store our datasets. To set up Google Cloud, install the Google Cloud CLI

  2. Then, authenticate your account.

    gcloud auth login
  3. Finally, set up Application Default Credentials (ADC).

    gcloud auth application-default login
  4. To make sure everything is working, run the tests.

Pre-commit Hooks

  • +

Setting Up Google Cloud

  1. We use Google Cloud to store our datasets. To set up Google Cloud, install the Google Cloud CLI

  2. Then, authenticate your account.

    gcloud auth login
  3. Finally, set up Application Default Credentials (ADC).

    gcloud auth application-default login
  4. To make sure everything is working, run the tests.

Pre-commit Hooks

  • pre-commit install -

Running the Tests

  1. Run the tests to make sure everything is working

    +

Running the Tests

  1. Run the tests to make sure everything is working

    pytest -
  2. In case of errors:

    google.auth.exceptions.DefaultCredentialsError

    If you get this error, it means that you haven't authenticated your Google Cloud account. See Setting Up Google Cloud

    ModuleNotFoundError

    If you get this error, it means that you haven't installed the dependencies. See Installing the Dev. Environment

Our Repository Structure

Before starting development, take a look at our repository structure. This will help you understand where to put your code.

Core Dependencies
Resources
Pipeline
Tests
Repo Dependencies
Dataset Loaders
Preprocessing Fn.
Train Deps
Model Architectures
Datasets ...
Model Training Pipeline
FRDC
src/frdc/
rsc/
pipeline/
tests/
pyproject.toml,poetry.lock
./load/
./preprocess/
./train/
./models/
./dataset_name/
./model_tests/
src/frdc/

Source Code for our package. These are the unit components of our pipeline.

rsc/

Resources. These are usually cached datasets

pipeline/

Pipeline code. These are the full ML tests of our pipeline.

tests/

PyTest tests. These are unit tests & integration tests.

Unit, Integration, and Pipeline Tests

We have 3 types of tests:

  • Unit Tests are usually small, single function tests.

  • Integration Tests are larger tests that tests a mock pipeline.

  • Pipeline Tests are the true production pipeline tests that will generate a model.

Where Should I contribute?

Changing a small component

If you're changing a small component, such as a argument for preprocessing, a new model architecture, or a new configuration for a dataset, take a look at the src/frdc/ directory.

Adding a test

By adding a new component, you'll need to add a new test. Take a look at the tests/ directory.

Changing the pipeline

If you're a ML Researcher, you'll probably be changing the pipeline. Take a look at the pipeline/ directory.

Adding a dependency

If you're adding a new dependency, use poetry add PACKAGE and commit the changes to pyproject.toml and poetry.lock.

Last modified: 26 October 2023
\ No newline at end of file +
  • In case of errors:

    google.auth.exceptions.DefaultCredentialsError

    If you get this error, it means that you haven't authenticated your Google Cloud account. See Setting Up Google Cloud

    ModuleNotFoundError

    If you get this error, it means that you haven't installed the dependencies. See Installing the Dev. Environment

  • Our Repository Structure

    Before starting development, take a look at our repository structure. This will help you understand where to put your code.

    Core Dependencies
    Resources
    Pipeline
    Tests
    Repo Dependencies
    Dataset Loaders
    Preprocessing Fn.
    Train Deps
    Model Architectures
    Datasets ...
    Model Training Pipeline
    FRDC
    src/frdc/
    rsc/
    pipeline/
    tests/
    pyproject.toml,poetry.lock
    ./load/
    ./preprocess/
    ./train/
    ./models/
    ./dataset_name/
    ./model_tests/
    src/frdc/

    Source Code for our package. These are the unit components of our pipeline.

    rsc/

    Resources. These are usually cached datasets

    pipeline/

    Pipeline code. These are the full ML tests of our pipeline.

    tests/

    PyTest tests. These are unit tests & integration tests.

    Unit, Integration, and Pipeline Tests

    We have 3 types of tests:

    • Unit Tests are usually small, single function tests.

    • Integration Tests are larger tests that tests a mock pipeline.

    • Pipeline Tests are the true production pipeline tests that will generate a model.

    Where Should I contribute?

    Changing a small component

    If you're changing a small component, such as a argument for preprocessing, a new model architecture, or a new configuration for a dataset, take a look at the src/frdc/ directory.

    Adding a test

    By adding a new component, you'll need to add a new test. Take a look at the tests/ directory.

    Changing the pipeline

    If you're a ML Researcher, you'll probably be changing the pipeline. Take a look at the pipeline/ directory.

    Adding a dependency

    If you're adding a new dependency, use poetry add PACKAGE and commit the changes to pyproject.toml and poetry.lock.

    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/load-dataset.html b/docs/load-dataset.html index 7d1aedfb..e82495dc 100644 --- a/docs/load-dataset.html +++ b/docs/load-dataset.html @@ -1,4 +1,4 @@ - load.dataset | Documentation

    Documentation 0.0.4 Help

    load.dataset

    Classes

    FRDCDownloader

    This facilitates authentication and downloading from GCS.

    FRDCDataset

    This uses the Downloader to download and load the dataset. It also implements useful helper functions to load FRDC-specific datasets, such as loading our images and labels.

    Usage

    An example loading our Chestnut Nature Park dataset. We retrieve the

    • hyperspectral bands

    • order of the bands

    • bounding boxes

    • labels

    + load.dataset | Documentation

    Documentation 0.0.4 Help

    load.dataset

    Classes

    FRDCDownloader

    This facilitates authentication and downloading from GCS.

    FRDCDataset

    This uses the Downloader to download and load the dataset. It also implements useful helper functions to load FRDC-specific datasets, such as loading our images and labels.

    Usage

    An example loading our Chestnut Nature Park dataset. We retrieve the

    • hyperspectral bands

    • order of the bands

    • bounding boxes

    • labels

    from frdc.load import FRDCDataset ds = FRDCDataset(site='chestnut_nature_park', @@ -6,7 +6,7 @@ version=None, ) ar, order = ds.get_ar_bands() bounds, labels = ds.get_bounds_and_labels() -

    Custom Authentication & Downloads

    If you need granular control over

    • where the files are downloaded

    • the credentials used

    • the project used

    • the bucket used

    Then pass in a FRDCDownloader object to FRDCDataset.

    +

    Custom Authentication & Downloads

    If you need granular control over

    • where the files are downloaded

    • the credentials used

    • the project used

    • the bucket used

    Then pass in a FRDCDownloader object to FRDCDataset.

    from frdc.load import FRDCDownloader, FRDCDataset dl = FRDCDownloader(credentials=..., @@ -19,7 +19,7 @@ dl=dl) ar, order = ds.get_ar_bands() bounds, labels = ds.get_bounds_and_labels() -

    If you have a file not easily downloadable by FRDCDataset, you can use FRDCDownloader to download it.

    +

    If you have a file not easily downloadable by FRDCDataset, you can use FRDCDownloader to download it.

    from frdc.load import FRDCDownloader dl = FRDCDownloader(credentials=..., @@ -28,4 +28,4 @@ bucket_name=...) dl.download_file(path_glob="path/to/gcs/file") -

    API

    FRDCDataset

    FRDCDataset(site, date, version, dl)

    Initializes the dataset downloader.


    This doesn't immediately download the dataset, but only when you call the get_* functions.


    The site, date, version must match the dataset path on GCS. For example if the dataset is at gs://frdc-scan/my-site/20201218/90deg/map,

    • site='my-site'

    • date='20201218'

    • version='90deg/map'

    If the dataset doesn't have a "version", for example: gs://frdc-scan/my-site/20201218, then you can pass in version=None.


    get_ar_bands()

    Gets the NDArray bands (H x W x C) and channel order as tuple[np.ndarray, list[str]].


    This downloads (if missing) and retrieves the stacked NDArray bands. This wraps around get_ar_bands_as_dict(), thus if you want more control over how the bands are loaded, use that instead.

    get_ar_bands_as_dict()

    Gets the NDArray bands (H x W) as a dict[str, np.ndarray].


    This downloads (if missing) and retrieves the individual NDArray bands as a dictionary. The keys are the band names, and the values are the NDArray bands.

    get_bounds_and_labels()

    Gets the bounding boxes and labels as tuple[list[Rect], list[str]].


    This downloads (if missing) and retrieves the bounding boxes and labels as a tuple. The first element is a list of bounding boxes, and the second element is a list of labels.


    FRDCDownloader

    list_gcs_datasets(anchor)

    Lists all GCS datasets in the bucket as DataFrame


    This works by checking which folders have a specific file, which we call the anchor.

    download_file(path_glob, local_exists_ok)

    Downloads a file from GCS.


    This takes in a path glob, a string containing wildcards, and downloads exactly 1 file. If it matches 0 or more than 1 file, it will raise an error.


    If local_exists_ok is True, it will not download the file if it already exists locally. However, if it's False, it will download the file only if the hashes don't match.

    Last modified: 26 October 2023
    \ No newline at end of file +

    API

    FRDCDataset

    FRDCDataset(site, date, version, dl)

    Initializes the dataset downloader.


    This doesn't immediately download the dataset, but only when you call the get_* functions.


    The site, date, version must match the dataset path on GCS. For example if the dataset is at gs://frdc-scan/my-site/20201218/90deg/map,

    • site='my-site'

    • date='20201218'

    • version='90deg/map'

    If the dataset doesn't have a "version", for example: gs://frdc-scan/my-site/20201218, then you can pass in version=None.


    get_ar_bands()

    Gets the NDArray bands (H x W x C) and channel order as tuple[np.ndarray, list[str]].


    This downloads (if missing) and retrieves the stacked NDArray bands. This wraps around get_ar_bands_as_dict(), thus if you want more control over how the bands are loaded, use that instead.

    get_ar_bands_as_dict()

    Gets the NDArray bands (H x W) as a dict[str, np.ndarray].


    This downloads (if missing) and retrieves the individual NDArray bands as a dictionary. The keys are the band names, and the values are the NDArray bands.

    get_bounds_and_labels()

    Gets the bounding boxes and labels as tuple[list[Rect], list[str]].


    This downloads (if missing) and retrieves the bounding boxes and labels as a tuple. The first element is a list of bounding boxes, and the second element is a list of labels.


    FRDCDownloader

    list_gcs_datasets(anchor)

    Lists all GCS datasets in the bucket as DataFrame


    This works by checking which folders have a specific file, which we call the anchor.

    download_file(path_glob, local_exists_ok)

    Downloads a file from GCS.


    This takes in a path glob, a string containing wildcards, and downloads exactly 1 file. If it matches 0 or more than 1 file, it will raise an error.


    If local_exists_ok is True, it will not download the file if it already exists locally. However, if it's False, it will download the file only if the hashes don't match.

    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/model-test-chestnut-may-dec.html b/docs/model-test-chestnut-may-dec.html index 6c0777ab..7a6a2e70 100644 --- a/docs/model-test-chestnut-may-dec.html +++ b/docs/model-test-chestnut-may-dec.html @@ -1 +1 @@ - Model Test Chestnut May-Dec | Documentation

    Documentation 0.0.4 Help

    Model Test Chestnut May-Dec

    This test is used to evaluate the model performance on the Chestnut Nature Park May & December dataset.

    See this script in pipeline/model_tests/chestnut_dec_may/main.py.

    Motivation

    The usage of this model will be to classify trees in unseen datasets under different conditions. In this test, we'll evaluate it under a different season.

    A caveat is that it'll be evaluated on the same set of trees, so it's not a representative of a field-test. However, given difficulties of yielding datasets, this still gives us a good preliminary idea of how the model will perform in different conditions.

    Methodology

    We simply train on the December dataset, and test on the May dataset.

    Train
    Test
    Model
    DecDataset
    MayDataset

    Model

    The current Model used is a simple InceptionV3 Transfer Learning model, with the last layer replaced with a fully connected layer(s).

    Cross Entropy Loss
    Input
    InceptionV3 Frozen
    FC Layer(s)
    Softmax
    Output

    Preprocessing

    We perform the following steps:

    Segment
    Scale Values to 0-1
    GLCM Step 7, Rad 3, Bin 128, Mean Feature
    Scale Values to 0 Mean 1 Var
    Resize to 299x299

    Augmentation

    The following augmentations are used:

    Segment
    Horizontal Flip 50%
    Vertical Flip 50%

    Hyperparameters

    The following hyperparameters are used:

    • Optimizer: Adam

    • Learning Rate: 1e-3

    • Batch Size: 5

    • Epochs: 100

    • Early Stopping: 4

    Results

    We yield around 40% accuracy on the test set, compared to around 65% for the training set. Raising the training accuracy with a more complex model may improve the test accuracy, however, due to instability of our test results, we can't be sure of this.

    Result Images

    graph-chestnut-maydec.png
    cm-chestnut-maydec.png

    Caveats

    • The test set is very small, so the results are not very representative.

    • The test set is the same set of trees, so it's not a true test of the model performance in different conditions.

    • There are many classes with 1 sample, so the model may not be able to learn the features of these classes well.

    Last modified: 26 October 2023
    \ No newline at end of file + Model Test Chestnut May-Dec | Documentation

    Documentation 0.0.4 Help

    Model Test Chestnut May-Dec

    This test is used to evaluate the model performance on the Chestnut Nature Park May & December dataset.

    See this script in pipeline/model_tests/chestnut_dec_may/main.py.

    Motivation

    The usage of this model will be to classify trees in unseen datasets under different conditions. In this test, we'll evaluate it under a different season.

    A caveat is that it'll be evaluated on the same set of trees, so it's not a representative of a field-test. However, given difficulties of yielding datasets, this still gives us a good preliminary idea of how the model will perform in different conditions.

    Methodology

    We simply train on the December dataset, and test on the May dataset.

    Train
    Test
    Model
    DecDataset
    MayDataset

    Model

    The current Model used is a simple InceptionV3 Transfer Learning model, with the last layer replaced with a fully connected layer(s).

    Cross Entropy Loss
    Input
    InceptionV3 Frozen
    FC Layer(s)
    Softmax
    Output

    Preprocessing

    We perform the following steps:

    Segment
    Scale Values to 0-1
    GLCM Step 7, Rad 3, Bin 128, Mean Feature
    Scale Values to 0 Mean 1 Var
    Resize to 299x299

    Augmentation

    The following augmentations are used:

    Segment
    Horizontal Flip 50%
    Vertical Flip 50%

    Hyperparameters

    The following hyperparameters are used:

    • Optimizer: Adam

    • Learning Rate: 1e-3

    • Batch Size: 5

    • Epochs: 100

    • Early Stopping: 4

    Results

    We yield around 40% accuracy on the test set, compared to around 65% for the training set. Raising the training accuracy with a more complex model may improve the test accuracy, however, due to instability of our test results, we can't be sure of this.

    Result Images

    graph-chestnut-maydec.png
    cm-chestnut-maydec.png

    Caveats

    • The test set is very small, so the results are not very representative.

    • The test set is the same set of trees, so it's not a true test of the model performance in different conditions.

    • There are many classes with 1 sample, so the model may not be able to learn the features of these classes well.

    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/overview.html b/docs/overview.html index e1030fc2..72478e87 100644 --- a/docs/overview.html +++ b/docs/overview.html @@ -1 +1 @@ - Overview | Documentation

    Documentation 0.0.4 Help

    Overview

    Forest Recovery Digital Companion (FRDC) is a ML-assisted companion for ecologists to automatically classify surveyed trees via an Unmanned Aerial Vehicle (UAV).

    This package, FRDC-ML is the Machine Learning backbone of this project, a centralized repository of tools and model architectures to be used in the FRDC pipeline.

    Get started here

    Other Projects

    FRDC-UI

    The User Interface Repository for FRDC, a WebApp GUI for ecologists to adjust annotations.

    Last modified: 26 October 2023
    \ No newline at end of file + Overview | Documentation

    Documentation 0.0.4 Help

    Overview

    Forest Recovery Digital Companion (FRDC) is a ML-assisted companion for ecologists to automatically classify surveyed trees via an Unmanned Aerial Vehicle (UAV).

    This package, FRDC-ML is the Machine Learning backbone of this project, a centralized repository of tools and model architectures to be used in the FRDC pipeline.

    Get started here

    Other Projects

    FRDC-UI

    The User Interface Repository for FRDC, a WebApp GUI for ecologists to adjust annotations.

    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/preprocessing-extract-segments.html b/docs/preprocessing-extract-segments.html index 163678d2..39000f35 100644 --- a/docs/preprocessing-extract-segments.html +++ b/docs/preprocessing-extract-segments.html @@ -1,4 +1,4 @@ - preprocessing.extract_segments | Documentation

    Documentation 0.0.4 Help

    preprocessing.extract_segments

    Functions

    extract_segments_from_labels

    Extracts segments from a label classification.

    extract_segments_from_bounds

    Extracts segments from Rect bounds.

    remove_small_segments_from_labels

    Removes small segments from a label classification.

    Extract with Boundaries

    A boundary is a Rect object that represents the minimum bounding box of a segment, with x0, y0, x1, y1 coordinates.

    It simply slices the original image to the bounding box. The origin is the top left corner of the image.

    + preprocessing.extract_segments | Documentation

    Documentation 0.0.4 Help

    preprocessing.extract_segments

    Functions

    extract_segments_from_labels

    Extracts segments from a label classification.

    extract_segments_from_bounds

    Extracts segments from Rect bounds.

    remove_small_segments_from_labels

    Removes small segments from a label classification.

    Extract with Boundaries

    A boundary is a Rect object that represents the minimum bounding box of a segment, with x0, y0, x1, y1 coordinates.

    It simply slices the original image to the bounding box. The origin is the top left corner of the image.

    +-----------------+ +-----------+ | Original | | Segmented | | Image | | Image | @@ -9,7 +9,7 @@ +-----+-----+-----+ 1, 2, 0, 2 +-----+-----+ | 7 | 8 | 9 | x0 y0 x1 y1 | 8 | 9 | +-----+-----+-----+ +-----+-----+ -
    +
    +-----------------+ +-----------------+ | Original | | Segmented | | Image | | Image | @@ -20,7 +20,7 @@ +-----+-----+-----+ 1, 2, 0, 2 +-----+-----+-----+ | 7 | 8 | 9 | x0 y0 x1 y1 | 0 | 8 | 9 | +-----+-----+-----+ +-----+-----+-----+ -

    Extract with Labels

    A label classification is a np.ndarray where each pixel is mapped to a segment. The segments are mapped to a unique integer. In our project, the 0th label is the background.

    For example, a label classification of 3 segments will look like this:

    +

    Extract with Labels

    A label classification is a np.ndarray where each pixel is mapped to a segment. The segments are mapped to a unique integer. In our project, the 0th label is the background.

    For example, a label classification of 3 segments will look like this:

    +-----------------+ +-----------------+ | Label | | Original | | Classification | | Image | @@ -31,7 +31,7 @@ +-----+-----+-----+ +-----+-----+-----+ | 1 | 1 | 0 | | 7 | 8 | 9 | +-----+-----+-----+ +-----+-----+-----+ -

    The extraction will take the minimum bounding box of each segment and return a list of segments.

    For example, the label 1 and 2 extracted images will be

    +

    The extraction will take the minimum bounding box of each segment and return a list of segments.

    For example, the label 1 and 2 extracted images will be

    +-----------+ +-----------+ | Extracted | | Extracted | | Segment 1 | | Segment 2 | @@ -42,7 +42,7 @@ +-----+-----+ +-----+-----+ | 7 | 8 | +-----+-----+ -
    +
    +-----------------+ +-----------------+ | Extracted | | Extracted | | Segment 1 | | Segment 2 | @@ -53,7 +53,7 @@ +-----+-----+-----+ +-----+-----+-----+ | 7 | 8 | 0 | | 0 | 0 | 0 | +-----+-----+-----+ +-----+-----+-----+ -
    • If cropped is False, the segments are padded with 0s to the original image size. While this can ensure shape consistency, it can consume more memory for large images.

    • If cropped is True, the segments are cropped to the minimum bounding box. This can save memory, but the shape of the segments will be inconsistent.

    Usage

    Extract from Bounds and Labels

    Extract segments from bounds and labels.

    +
    • If cropped is False, the segments are padded with 0s to the original image size. While this can ensure shape consistency, it can consume more memory for large images.

    • If cropped is True, the segments are cropped to the minimum bounding box. This can save memory, but the shape of the segments will be inconsistent.

    Usage

    Extract from Bounds and Labels

    Extract segments from bounds and labels.

    import numpy as np from frdc.load import FRDCDataset from frdc.preprocess.extract_segments import extract_segments_from_bounds @@ -65,7 +65,7 @@ bounds, labels = ds.get_bounds_and_labels() segments: list[np.ndarray] = extract_segments_from_bounds(ar, bounds) -

    Extract from Auto-Segmentation

    Extract segments from a label classification.

    +

    Extract from Auto-Segmentation

    Extract segments from a label classification.

    from skimage.morphology import remove_small_objects, remove_small_holes import numpy as np @@ -91,4 +91,4 @@ min_height=10, min_width=10) segments: list[np.ndarray] = extract_segments_from_labels(ar, ar_labels) -

    API

    extract_segments_from_labels(ar, ar_labels, cropped)

    Extracts segments from a label classification.


    ar_labels is a label classification as a np.ndarray

    extract_segments_from_bounds(ar, bounds, cropped)

    Extracts segments from Rect bounds.


    bounds is a list of Rect bounds.

    remove_small_segments_from_labels(ar_labels, min_height, min_width)

    Removes small segments from a label classification.


    Last modified: 26 October 2023
    \ No newline at end of file +

    API

    extract_segments_from_labels(ar, ar_labels, cropped)

    Extracts segments from a label classification.


    ar_labels is a label classification as a np.ndarray

    extract_segments_from_bounds(ar, bounds, cropped)

    Extracts segments from Rect bounds.


    bounds is a list of Rect bounds.

    remove_small_segments_from_labels(ar_labels, min_height, min_width)

    Removes small segments from a label classification.


    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/preprocessing-glcm-padded.html b/docs/preprocessing-glcm-padded.html index 32feea67..11159608 100644 --- a/docs/preprocessing-glcm-padded.html +++ b/docs/preprocessing-glcm-padded.html @@ -1,4 +1,4 @@ - preprocessing.glcm_padded | Documentation

    Documentation 0.0.4 Help

    preprocessing.glcm_padded

    Functions

    glcm_padded

    Computes the GLCM of the NDArray bands with padding.

    glcm_padded_cached

    Computes the GLCM of the NDArray bands with padding, and caches it.

    append_glcm_padded_cached

    Computes the GLCM of the NDArray bands with padding, and caches it and also appends it onto the original array.

    Usage

    We show a few examples of how to use the GLCM functions.

    + preprocessing.glcm_padded | Documentation

    Documentation 0.0.4 Help

    preprocessing.glcm_padded

    Functions

    glcm_padded

    Computes the GLCM of the NDArray bands with padding.

    glcm_padded_cached

    Computes the GLCM of the NDArray bands with padding, and caches it.

    append_glcm_padded_cached

    Computes the GLCM of the NDArray bands with padding, and caches it and also appends it onto the original array.

    Usage

    We show a few examples of how to use the GLCM functions.

    import numpy as np from glcm_cupy import Features @@ -23,4 +23,4 @@ ar_glcm_cached_appended = append_glcm_padded_cached(ar, bin_from=1, bin_to=4, radius=3) -
    • ar_glcm is the GLCM of the original array, with the last dimension being the GLCM features. The number of features is determined by the features parameter, which defaults to all features.

    • ar_glcm_2_features selects only 2 features, with the last dimension being the 2 GLCM features specified.

    • ar_glcm_cached caches the GLCM so that if you call it again, it will return the cached version. It stores its data at the project root dir, under .cache/.

    • ar_glcm_cached_appended is a wrapper around ar_glcm_cached, it appends the GLCM features onto the original array. It's equivalent to calling ar_glcm_cached and then np.concatenate on the final axes.

    Caching

    GLCM is an expensive operation, thus we recommend to cache it if the input parameters will be the same. This is especially useful if you're experimenting with the same dataset with constant parameters.

    API

    glcm_padded(ar, bin_from, bin_to, radius, step_size, features)

    Computes the GLCM of the NDArray bands with padding.


    • ar is the input array

    • bin_from is the upper bound of the input

    • bin_to is the upper bound of the GLCM input, i.e. the resolution that GLCM operates on

    • radius is the radius of the GLCM

    • step_size is the step size of the GLCM

    • features is the list of GLCM features to compute

    The return shape is

    See glcm_cupy for the GLCM Features.

    glcm_padded_cached(ar, bin_from, bin_to, radius, step_size, features)

    Computes the GLCM of the NDArray bands with padding, and caches it.


    See glcm_padded for the parameters and output shape

    append_glcm_padded_cached(ar, bin_from, bin_to, radius, step_size, features)

    Computes the GLCM of the NDArray bands with padding, and caches it and also appends it onto the original array.


    See glcm_padded for the parameters


    The return shape is:

    The function automatically flattens the last 2 dimensions of the GLCM features, and appends it onto the original array.

    Last modified: 26 October 2023
    \ No newline at end of file +
    • ar_glcm is the GLCM of the original array, with the last dimension being the GLCM features. The number of features is determined by the features parameter, which defaults to all features.

    • ar_glcm_2_features selects only 2 features, with the last dimension being the 2 GLCM features specified.

    • ar_glcm_cached caches the GLCM so that if you call it again, it will return the cached version. It stores its data at the project root dir, under .cache/.

    • ar_glcm_cached_appended is a wrapper around ar_glcm_cached, it appends the GLCM features onto the original array. It's equivalent to calling ar_glcm_cached and then np.concatenate on the final axes.

    Caching

    GLCM is an expensive operation, thus we recommend to cache it if the input parameters will be the same. This is especially useful if you're experimenting with the same dataset with constant parameters.

    API

    glcm_padded(ar, bin_from, bin_to, radius, step_size, features)

    Computes the GLCM of the NDArray bands with padding.


    • ar is the input array

    • bin_from is the upper bound of the input

    • bin_to is the upper bound of the GLCM input, i.e. the resolution that GLCM operates on

    • radius is the radius of the GLCM

    • step_size is the step size of the GLCM

    • features is the list of GLCM features to compute

    The return shape is

    See glcm_cupy for the GLCM Features.

    glcm_padded_cached(ar, bin_from, bin_to, radius, step_size, features)

    Computes the GLCM of the NDArray bands with padding, and caches it.


    See glcm_padded for the parameters and output shape

    append_glcm_padded_cached(ar, bin_from, bin_to, radius, step_size, features)

    Computes the GLCM of the NDArray bands with padding, and caches it and also appends it onto the original array.


    See glcm_padded for the parameters


    The return shape is:

    The function automatically flattens the last 2 dimensions of the GLCM features, and appends it onto the original array.

    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/preprocessing-morphology.html b/docs/preprocessing-morphology.html index 39df4e66..db402a61 100644 --- a/docs/preprocessing-morphology.html +++ b/docs/preprocessing-morphology.html @@ -1,4 +1,4 @@ - preprocessing.morphology | Documentation

    Documentation 0.0.4 Help

    preprocessing.morphology

    Functions

    threshold_binary_mask

    Thresholds a selected NDArray bands to yield a binary mask.

    binary_watershed

    Performs watershed on a binary mask to yield a mapped label classification

    Usage

    Perform auto-segmentation on a dataset to yield a label classification.

    + preprocessing.morphology | Documentation

    Documentation 0.0.4 Help

    preprocessing.morphology

    Functions

    threshold_binary_mask

    Thresholds a selected NDArray bands to yield a binary mask.

    binary_watershed

    Performs watershed on a binary mask to yield a mapped label classification

    Usage

    Perform auto-segmentation on a dataset to yield a label classification.

    from frdc.load import FRDCDataset from frdc.preprocess.morphology import ( threshold_binary_mask, binary_watershed @@ -10,6 +10,6 @@ ar, order = ds.get_ar_bands() mask = threshold_binary_mask(ar, order.index('NIR'), 90 / 256) ar_label = binary_watershed(mask) -

    API

    threshold_binary_mask(ar, band_idx, threshold_value)

    Thresholds a selected NDArray bands to yield a binary mask as np.ndarray


    This is equivalent to

    +

    API

    threshold_binary_mask(ar, band_idx, threshold_value)

    Thresholds a selected NDArray bands to yield a binary mask as np.ndarray


    This is equivalent to

    ar[..., band_idx] > threshold_value -
    binary_watershed(ar_mask, peaks_footprint, watershed_compactness)

    Performs watershed on a binary mask to yield a mapped label classification as a np.ndarray


    • peaks_footprint is the footprint of skimage.feature.peak_local_max

    • watershed_compactness is the compactness of skimage.morphology.watershed

    Last modified: 26 October 2023
    \ No newline at end of file +
    binary_watershed(ar_mask, peaks_footprint, watershed_compactness)

    Performs watershed on a binary mask to yield a mapped label classification as a np.ndarray


    • peaks_footprint is the footprint of skimage.feature.peak_local_max

    • watershed_compactness is the compactness of skimage.morphology.watershed

    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/preprocessing-scale.html b/docs/preprocessing-scale.html index 41504a1d..28868ce7 100644 --- a/docs/preprocessing-scale.html +++ b/docs/preprocessing-scale.html @@ -1,4 +1,4 @@ - preprocessing.scale | Documentation

    Documentation 0.0.4 Help

    preprocessing.scale

    Functions

    scale_0_1_per_band

    Scales the NDArray bands to [0, 1] per band.

    scale_normal_per_band

    Scales the NDArray bands to zero mean unit variance per band.

    scale_static_per_band

    Scales the NDArray bands by a predefined configuration.

    Usage

    + preprocessing.scale | Documentation

    Documentation 0.0.4 Help

    preprocessing.scale

    Functions

    scale_0_1_per_band

    Scales the NDArray bands to [0, 1] per band.

    scale_normal_per_band

    Scales the NDArray bands to zero mean unit variance per band.

    scale_static_per_band

    Scales the NDArray bands by a predefined configuration.

    Usage

    from frdc.load import FRDCDataset from frdc.preprocess.scale import ( scale_0_1_per_band, scale_normal_per_band, scale_static_per_band @@ -12,4 +12,4 @@ ar_01 = scale_0_1_per_band(ar) ar_norm = scale_normal_per_band(ar) ar_static = scale_static_per_band(ar, order, BAND_MAX_CONFIG) -

    API

    scale_0_1_per_band(ar)

    Scales the NDArray bands to [0, 1] per band.


    scale_normal_per_band(ar)

    Scales the NDArray bands to zero mean unit variance per band.


    scale_static_per_band(ar, order, config)

    Scales the NDArray bands by a predefined configuration.


    The config is of dict[str, tuple[int, int]] where the key is the band name, and the value is a tuple of (min, max). Take a look at frdc.conf.BAND_MAX_CONFIG for an example.

    Last modified: 26 October 2023
    \ No newline at end of file +

    API

    scale_0_1_per_band(ar)

    Scales the NDArray bands to [0, 1] per band.


    scale_normal_per_band(ar)

    Scales the NDArray bands to zero mean unit variance per band.


    scale_static_per_band(ar, order, config)

    Scales the NDArray bands by a predefined configuration.


    The config is of dict[str, tuple[int, int]] where the key is the band name, and the value is a tuple of (min, max). Take a look at frdc.conf.BAND_MAX_CONFIG for an example.

    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/retrieve-our-datasets.html b/docs/retrieve-our-datasets.html new file mode 100644 index 00000000..a9e52257 --- /dev/null +++ b/docs/retrieve-our-datasets.html @@ -0,0 +1,42 @@ + Retrieve our Datasets | Documentation

    Documentation 0.0.4 Help

    Retrieve our Datasets

    In this tutorial, we'll learn how to :

    • Retrieve FRDC's Hyperspectral Image Data as np.ndarray

    • Retrieve FRDC's Ground Truth bounds and labels

    • Slice/segment the image data by the bounds

    Prerequisites

    • New here? Get Started.

    • Setup the Google Cloud Authorization to download the data.

    Retrieve the Data

    To retrieve the data, use FRDCDataset

    Here, we'll download and load our

    • ar: Hyperspectral Image Data

    • order: The order of the bands

    • bounds: The bounds of the trees (segments)

    • labels: The labels of the trees (segments)

    +from frdc.load.dataset import FRDCDataset + +ds = FRDCDataset(site="chestnut_nature_park", date="20201218", version=None) +ar, order = ds.get_ar_bands() +bounds, labels = ds.get_bounds_and_labels() +

    What Datasets are there?

    +>>> from frdc.load.dataset import FRDCDownloader +>>> df = FRDCDownloader().list_gcs_datasets() +>>> print(df) +# 0 DEBUG/0 +# 1 casuarina/20220418/183deg +# 2 casuarina/20220418/93deg +# 3 chestnut_nature_park/20201218 +# ... +
    • The first part of the path is the site, and the second part is the date.

    • The version is the rest of the path, if there isn't any, use None.

    • site="ds"

    • date="date"

    • version="ver"

    • site="ds"

    • date="date"

    • version="ver/01/data"

    • site="ds"

    • date="date"

    • version=None

    Segment the Data

    To segment the data, use Extract Segments.

    Here, we'll segment the data by the bounds.

    +from frdc.load.dataset import FRDCDataset +from frdc.preprocess.extract_segments import extract_segments_from_bounds + +ds = FRDCDataset(site="chestnut_nature_park", date="20201218", version=None) +ar, order = ds.get_ar_bands() +bounds, labels = ds.get_bounds_and_labels() +segments = extract_segments_from_bounds(ar, bounds) +

    segments is a list of np.ndarray of shape H, W, C, representing a tree. The order of segments is the same as labels, so you can use labels to identify the tree.

    Plot the Data (Optional)

    We can then use these data to plot out the first tree segment.

    +import matplotlib.pyplot as plt + +from frdc.load.dataset import FRDCDataset +from frdc.preprocess.extract_segments import extract_segments_from_bounds +from frdc.preprocess.scale import scale_0_1_per_band + +ds = FRDCDataset(site="chestnut_nature_park", date="20201218", version=None) +ar, order = ds.get_ar_bands() +bounds, labels = ds.get_bounds_and_labels() +segments = extract_segments_from_bounds(ar, bounds) +segment_0_bgr = segments[0] +segment_0_rgb = segment_0_bgr[..., [2, 1, 0]] +segment_0_rgb_scaled = scale_0_1_per_band(segment_0_rgb) + +plt.imshow(segment_0_rgb_scaled) +plt.title(f"Tree {labels[0]}") +plt.show() +

    See also: preprocessing.scale.scale_0_1_per_band

    MatPlotLib cannot show the data correctly as-is, so we need to

    • Convert the data from BGR to RGB

    • Scale the data to 0-1 per band

    Last modified: 26 October 2023
    \ No newline at end of file diff --git a/docs/train-frdc-lightning.html b/docs/train-frdc-lightning.html index 7c945754..122fb48a 100644 --- a/docs/train-frdc-lightning.html +++ b/docs/train-frdc-lightning.html @@ -1,4 +1,4 @@ - train.frdc_datamodule & frdc_module | Documentation

    Documentation 0.0.4 Help

    train.frdc_datamodule & frdc_module

    These are FRDC specific LightningDataModule and LightningModule, a core component in the PyTorch Lightning ecosystem to provide a simple interface to train and evaluate models.

    Classes

    FRDCDataModule

    The FRDC PyTorch Lightning DataModule.

    FRDCModule

    The FRDC PyTorch Lightning Module.

    Usage

    API

    FRDCDataModule(segments, labels, preprocess, augmentation, train_val_test_split, batch_size)

    Initializes the FRDC PyTorch Lightning DataModule.


    • segments, labels are retrieved from

    • preprocess is a function that takes in a segment and returns a preprocessed segment. In particular, it should accept a list of NumPy NDArrays and return a single stacked PyToch Tensor.

    • augmentation is a function that takes in a segment and returns an augmented segment. In particular, it takes in a PyTorch Tensor and returns another.

    • train_val_test_split is a function that takes a TensorDataset and returns a list of 3 TensorDatasets, for train, val and test respectively.

    • batch_size is the batch size.

    FRDCModule(model_cls, model_kwargs, optim_cls, optim_kwargs)

    Initializes the FRDC PyTorch Lightning Module.


    • model_cls is the Class of the model.

    • model_kwargs is the kwargs to pass to the model.

    • optim_cls is the Class of the optimizer.

    • optim_kwargs is the kwargs to pass to the optimizer.

    Internally, the module will initialize the model and optimizer as follows:

    + train.frdc_datamodule & frdc_module | Documentation

    Documentation 0.0.4 Help

    train.frdc_datamodule & frdc_module

    These are FRDC specific LightningDataModule and LightningModule, a core component in the PyTorch Lightning ecosystem to provide a simple interface to train and evaluate models.

    Classes

    FRDCDataModule

    The FRDC PyTorch Lightning DataModule.

    FRDCModule

    The FRDC PyTorch Lightning Module.

    Usage

    API

    FRDCDataModule(segments, labels, preprocess, augmentation, train_val_test_split, batch_size)

    Initializes the FRDC PyTorch Lightning DataModule.


    • segments, labels are retrieved from

    • preprocess is a function that takes in a segment and returns a preprocessed segment. In particular, it should accept a list of NumPy NDArrays and return a single stacked PyToch Tensor.

    • augmentation is a function that takes in a segment and returns an augmented segment. In particular, it takes in a PyTorch Tensor and returns another.

    • train_val_test_split is a function that takes a TensorDataset and returns a list of 3 TensorDatasets, for train, val and test respectively.

    • batch_size is the batch size.

    FRDCModule(model_cls, model_kwargs, optim_cls, optim_kwargs)

    Initializes the FRDC PyTorch Lightning Module.


    • model_cls is the Class of the model.

    • model_kwargs is the kwargs to pass to the model.

    • optim_cls is the Class of the optimizer.

    • optim_kwargs is the kwargs to pass to the optimizer.

    Internally, the module will initialize the model and optimizer as follows:

    model = model_cls(**model_kwargs) optim = optim_cls(model.parameters(), **optim_kwargs) -
    Last modified: 26 October 2023
    \ No newline at end of file +
    Last modified: 26 October 2023
    \ No newline at end of file