Merge pull request #59 from biigle/unknot

UnKnoT
biigle · Oct 30, 2020 · c6168e7 · c6168e7
2 parents be45a0e + fd979e3
commit c6168e7
Show file tree

Hide file tree

Showing 44 changed files with 1,934 additions and 613 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,12 +1,13 @@
-numpy==1.16.*
-scipy==1.1.*
-scikit-learn==0.22.*
-scikit-image==0.16.*
-Pillow==7.1.*
 cython
-matplotlib==3.1.*
-tensorflow==1.15.4
-keras==2.1.*
-opencv-python-headless>=3.4.5,<4.0
 h5py==2.10.*
 imgaug==0.3.*
+keras==2.1.*
+matplotlib==3.1.*
+numpy==1.16.*
+opencv-python-headless>=3.4.5,<4.0
+Pillow==7.1.*
+pyvips==2.1.*
+scikit-image==0.16.*
+scikit-learn==0.22.*
+scipy==1.1.*
+tensorflow==1.15.4
diff --git a/src/Http/Controllers/Api/KnowledgeTransferVolumeController.php b/src/Http/Controllers/Api/KnowledgeTransferVolumeController.php
@@ -0,0 +1,57 @@
+<?php
+
+namespace Biigle\Modules\Maia\Http\Controllers\Api;
+
+use Biigle\Http\Controllers\Api\Controller;
+use Biigle\Volume;
+use DB;
+use Illuminate\Http\Request;
+
+class KnowledgeTransferVolumeController extends Controller
+{
+    /**
+     * List all volumes that can be accessed and used for knowledge transfer.
+     *
+     * @api {get} volumes/filter/knowledge-transfer Get volumes for knowledge transfer
+     * @apiGroup Maia
+     * @apiName IndexKnowledgeTransferVolumes
+     * @apiPermission user
+     * @apiDescription These are volumes where all images have `distance_to_ground` information.
+     *
+     * @apiSuccessExample {json} Success response:
+     * [
+     *     {
+     *         "id": 1,
+     *         "name": "My Volume",
+     *         "projects": [
+     *             {
+     *                 "id": 123,
+     *                 "name": "My Project"
+     *             }
+     *         ]
+     *     }
+     * ]
+     *
+     * @param Request $request
+     * @return \Illuminate\Http\Response
+     */
+    public function index(Request $request)
+    {
+        return Volume::accessibleBy($request->user())
+            ->select('id', 'name')
+            ->has('images.annotations')
+            ->whereNotExists(function ($query) {
+                $query->select(DB::raw(1))
+                    ->from('images')
+                    ->whereRaw('images.volume_id = volumes.id')
+                    ->whereNull('attrs->metadata->distance_to_ground');
+            })
+            ->with(['projects' => function ($query) {
+                $query->select('id', 'name');
+            }])
+            ->get()
+            ->each(function ($volume) {
+                $volume->setHidden(['doi', 'video_link', 'gis_link']);
+            });
+    }
+}
diff --git a/src/Http/Controllers/Api/MaiaJobController.php b/src/Http/Controllers/Api/MaiaJobController.php
@@ -23,19 +23,24 @@ class MaiaJobController extends Controller
      *
      * @apiParam {Number} id The volume ID.
      *
-     * @apiParam (Required parameters) {number} nd_clusters Number of different kinds of images to expect. Images are of the same kind if they have similar lighting conditions or show similar patterns (e.g. sea floor, habitat types). Increase this number if you expect many different kinds of images. Lower the number to 1 if you have very few images and/or the content is largely uniform.
-     * @apiParam (Required parameters) {number} nd_patch_size Size in pixels of the image patches used determine the training proposals. Increase the size if the images contain larger objects of interest, decrease the size if the objects are smaller. Larger patch sizes take longer to compute. Must be an odd number.
-     * @apiParam (Required parameters) {number} nd_threshold Percentile of pixel saliency values used to determine the saliency threshold. Lower this value to get more training proposals. The default value should be fine for most cases.
-     * @apiParam (Required parameters) {number} nd_latent_size Learning capability used to determine training proposals. Increase this number to ignore more complex objects and patterns.
-     * @apiParam (Required parameters) {number} nd_trainset_size Number of training image patches used to determine training proposals. You can increase this number for a large volume but it will take longer to compute.
-     * @apiParam (Required parameters) {number} nd_epochs Time spent on training when determining the training proposals.
-     * @apiParam (Required parameters) {number} nd_stride A higher stride increases the speed of the novelty detection but reduces the sensitivity to small regions or objects.
-     * @apiParam (Required parameters) {number} nd_ignore_radius Ignore training proposals or annotation candidates which have a radius smaller or equal than this value in pixels.
-     * @apiParam (Required parameters) {number} is_epochs_head Time spent on training only the head layers of Mask R-CNN for instance segmentation.
-     * @apiParam (Required parameters) {number} is_epochs_all Time spent on training  all layers of Mask R-CNN for instance segmentation.
-     * @apiParam (Optional parameters) {booolean} use_existing Set to `true` to use existing annotations as training proposals.
-     * @apiParam (Optional parameters) {Array} restrict_labels Array of label IDs to restrict the existing annotations to, which should be used as training proposals. `use_existing` must be set if this parameter is present.
-     * @apiParam (Optional parameters) {boolean} skip_nd Set to `true` to skip the novelty detection stage and take only existing annotations as training proposals. `use_existing` must be set if this parameter is present. Also, all `nd_*` parameters are ignored and no longer required if this parameter is set.
+     * @apiParam (Required parameters) {string} training_data_method One of `novelty_detection` (to perform novelty detection to generate training data), `own_annotations` (to use existing annotations of the same volume as training data) or `knowledge_transfer` (to use knowlegde transfer to get training data from another volume).
+     * @apiParam (Required parameters) {array} is_train_scheme An array containing objects with the following properties. `layers`: Either `heads` or `all`, `epochs`: Number of epochs to train this step, `learing_rate`: Learing rate to use in this step.
+     *
+     * @apiParam (Required parameters for novelty detection) {number} nd_clusters Number of different kinds of images to expect. Images are of the same kind if they have similar lighting conditions or show similar patterns (e.g. sea floor, habitat types). Increase this number if you expect many different kinds of images. Lower the number to 1 if you have very few images and/or the content is largely uniform.
+     * @apiParam (Required parameters for novelty detection) {number} nd_patch_size Size in pixels of the image patches used determine the training proposals. Increase the size if the images contain larger objects of interest, decrease the size if the objects are smaller. Larger patch sizes take longer to compute. Must be an odd number.
+     * @apiParam (Required parameters for novelty detection) {number} nd_threshold Percentile of pixel saliency values used to determine the saliency threshold. Lower this value to get more training proposals. The default value should be fine for most cases.
+     * @apiParam (Required parameters for novelty detection) {number} nd_latent_size Learning capability used to determine training proposals. Increase this number to ignore more complex objects and patterns.
+     * @apiParam (Required parameters for novelty detection) {number} nd_trainset_size Number of training image patches used to determine training proposals. You can increase this number for a large volume but it will take longer to compute.
+     * @apiParam (Required parameters for novelty detection) {number} nd_epochs Time spent on training when determining the training proposals.
+     * @apiParam (Required parameters for novelty detection) {number} nd_stride A higher stride increases the speed of the novelty detection but reduces the sensitivity to small regions or objects.
+     * @apiParam (Required parameters for novelty detection) {number} nd_ignore_radius Ignore training proposals or annotation candidates which have a radius smaller or equal than this value in pixels.
+     *
+     *
+     * @apiParam (Optional parameters for existing annotations) {Array} oa_restrict_labels Array of label IDs to restrict the existing annotations to, which should be used as training proposals.
+     *
+     * @apiParam (Required parameters for knowledge transfer) {number} kt_volume_id The ID of the volume from which to get the annotations for knowledge transfer.
+     *
+     * @apiParam (Optional parameters for knowledge transfer) {Array} kt_restrict_labels Array of label IDs to restrict the annotations of the other volume to, which should be used as training proposals.
      *
      * @param StoreMaiaJob $request
      * @return \Illuminate\Http\Response
@@ -45,19 +50,26 @@ public function store(StoreMaiaJob $request)
         $job = new MaiaJob;
         $job->volume_id = $request->volume->id;
         $job->user_id = $request->user()->id;
-        $job->state_id = State::noveltyDetectionId();
         $paramKeys = [
-            'use_existing',
-            'restrict_labels',
-            'skip_nd',
+            'training_data_method',
             // is_* are parameters for instance segmentation.
-            'is_epochs_head',
-            'is_epochs_all',
+            'is_train_scheme',
         ];
 
-        if (!$request->has('skip_nd')) {
+        if ($request->input('training_data_method') === MaiaJob::TRAIN_OWN_ANNOTATIONS) {
+            $job->state_id = State::instanceSegmentationId();
+            $paramKeys = array_merge($paramKeys, [
+                'oa_restrict_labels',
+            ]);
+        } else if ($request->input('training_data_method') === MaiaJob::TRAIN_KNOWLEDGE_TRANSFER) {
+            $job->state_id = State::instanceSegmentationId();
+            $paramKeys = array_merge($paramKeys, [
+                'kt_volume_id',
+                'kt_restrict_labels',
+            ]);
+        } else {
+            $job->state_id = State::noveltyDetectionId();
             $paramKeys = array_merge($paramKeys, [
-                // nd_* are parameters for novelty detection.
                 'nd_clusters',
                 'nd_patch_size',
                 'nd_threshold',

diff --git a/src/Http/Controllers/Views/MaiaJobController.php b/src/Http/Controllers/Views/MaiaJobController.php
@@ -3,6 +3,7 @@
 namespace Biigle\Modules\Maia\Http\Controllers\Views;
 
 use Biigle\Http\Controllers\Views\Controller;
+use Biigle\ImageAnnotation;
 use Biigle\LabelTree;
 use Biigle\Modules\Maia\MaiaJob;
 use Biigle\Modules\Maia\MaiaJobState as State;
@@ -52,12 +53,32 @@ public function index($id)
 
         $newestJobHasFailed = $jobs->isNotEmpty() ? $jobs[0]->hasFailed() : false;
 
+        $defaultTrainScheme = collect([
+            ['layers' => 'heads', 'epochs' => 10, 'learning_rate' => 0.001],
+            ['layers' => 'heads', 'epochs' => 10, 'learning_rate' => 0.0005],
+            ['layers' => 'heads', 'epochs' => 10, 'learning_rate' => 0.0001],
+            ['layers' => 'all', 'epochs' => 10, 'learning_rate' => 0.0001],
+            ['layers' => 'all', 'epochs' => 10, 'learning_rate' => 0.00005],
+            ['layers' => 'all', 'epochs' => 10, 'learning_rate' => 0.00001],
+        ]);
+
+        $canUseExistingAnnotations = ImageAnnotation::join('images', 'images.id', '=', 'image_annotations.image_id')
+            ->where('images.volume_id', $volume->id)
+            ->exists();
+
+        $canUseKnowledgeTransfer = !$volume->images()
+            ->whereNull('attrs->metadata->distance_to_ground')
+            ->exists();
+
         return view('maia::index', compact(
             'volume',
             'jobs',
             'hasJobsInProgress',
             'hasJobsRunning',
-            'newestJobHasFailed'
+            'newestJobHasFailed',
+            'defaultTrainScheme',
+            'canUseExistingAnnotations',
+            'canUseKnowledgeTransfer'
         ));
     }
 

diff --git a/src/Http/Requests/StoreMaiaJob.php b/src/Http/Requests/StoreMaiaJob.php
@@ -4,12 +4,16 @@
 
 use Biigle\Modules\Maia\MaiaJob;
 use Biigle\Modules\Maia\MaiaJobState as State;
+use Biigle\Modules\Maia\Rules\KnowledgeTransferVolume;
 use Biigle\Modules\Maia\Rules\OddNumber;
 use Biigle\Volume;
 use Illuminate\Foundation\Http\FormRequest;
+use Biigle\Modules\Maia\Traits\QueriesExistingAnnotations;
 
 class StoreMaiaJob extends FormRequest
 {
+    use QueriesExistingAnnotations;
+
     /**
      * The volume to create the MAIA job for.
      *
@@ -37,20 +41,28 @@ public function authorize()
     public function rules()
     {
         return [
-            'use_existing' => 'required_with:restrict_labels,skip_nd|boolean',
-            'restrict_labels' => 'array',
-            'restrict_labels.*' => 'integer|exists:labels,id',
-            'skip_nd' => 'boolean',
-            'nd_clusters' => 'required_unless:skip_nd,true|integer|min:1|max:100',
-            'nd_patch_size' => ['required_unless:skip_nd,true', 'integer', 'min:3', 'max:99', new OddNumber],
-            'nd_threshold' => 'required_unless:skip_nd,true|integer|min:0|max:99',
-            'nd_latent_size' => 'required_unless:skip_nd,true|numeric|min:0.05|max:0.75',
-            'nd_trainset_size' => 'required_unless:skip_nd,true|integer|min:1000|max:100000',
-            'nd_epochs' => 'required_unless:skip_nd,true|integer|min:50|max:1000',
-            'nd_stride' => 'required_unless:skip_nd,true|integer|min:1|max:10',
-            'nd_ignore_radius' => 'required_unless:skip_nd,true|integer|min:0',
-            'is_epochs_head' => 'required|integer|min:1',
-            'is_epochs_all' => 'required|integer|min:1',
+            'training_data_method' => 'required|in:novelty_detection,own_annotations,knowledge_transfer',
+
+            'nd_clusters' => 'required_if:training_data_method,novelty_detection|integer|min:1|max:100',
+            'nd_patch_size' => ['required_if:training_data_method,novelty_detection', 'integer', 'min:3', 'max:99', new OddNumber],
+            'nd_threshold' => 'required_if:training_data_method,novelty_detection|integer|min:0|max:99',
+            'nd_latent_size' => 'required_if:training_data_method,novelty_detection|numeric|min:0.05|max:0.75',
+            'nd_trainset_size' => 'required_if:training_data_method,novelty_detection|integer|min:1000|max:100000',
+            'nd_epochs' => 'required_if:training_data_method,novelty_detection|integer|min:50|max:1000',
+            'nd_stride' => 'required_if:training_data_method,novelty_detection|integer|min:1|max:10',
+            'nd_ignore_radius' => 'required_if:training_data_method,novelty_detection|integer|min:0',
+
+            'oa_restrict_labels' => 'array',
+            'oa_restrict_labels.*' => 'integer|exists:labels,id',
+
+            'kt_volume_id' => ['required_if:training_data_method,knowledge_transfer', 'integer', 'exists:volumes,id', new KnowledgeTransferVolume],
+            'kt_restrict_labels.*' => 'integer|exists:labels,id',
+
+            'is_train_scheme' => 'required|array|min:1',
+            'is_train_scheme.*' => 'array',
+            'is_train_scheme.*.layers' => 'required|in:heads,all',
+            'is_train_scheme.*.epochs' => 'required|integer|min:1',
+            'is_train_scheme.*.learning_rate' => 'required|numeric|min:0|max:1',
         ];
     }
 
@@ -83,9 +95,41 @@ public function withValidator($validator)
                 $validator->errors()->add('volume', 'New MAIA jobs cannot be created for volumes with very large images.');
             }
 
-            if (!$this->input('skip_nd') && $this->volume->images()->count() < $this->input('nd_clusters')) {
+            if ($this->input('training_data_method') === MaiaJob::TRAIN_NOVELTY_DETECTION && $this->volume->images()->count() < $this->input('nd_clusters')) {
                 $validator->errors()->add('nd_clusters', 'The number of image clusters must not be greater than the number of images in the volume.');
             }
+
+            if ($this->input('training_data_method') === MaiaJob::TRAIN_OWN_ANNOTATIONS && $this->hasNoExistingAnnotations()) {
+                $validator->errors()->add('training_data_method', 'There are no existing annotations (with the chosen labels) in this volume.');
+            }
+
+            if ($this->input('training_data_method') === MaiaJob::TRAIN_KNOWLEDGE_TRANSFER && $this->hasNoKnowledgeTransferAnnotations()) {
+                $validator->errors()->add('training_data_method', 'There are no existing annotations (with the chosen labels) in the volume chosen for knowledge transfer.');
+            }
         });
     }
+
+    /**
+     * Determine if there are existing annotations that can be used as training data.
+     *
+     * @return boolean
+     */
+    protected function hasNoExistingAnnotations()
+    {
+        $restrictLabels = $this->input('oa_restrict_labels', []);
+
+        return !$this->getExistingAnnotationsQuery($this->volume->id, $restrictLabels)->exists();
+    }
+
+    /**
+     * Determine if there are existing annotations in the volume chosen for knowledge transfer.
+     *
+     * @return boolean
+     */
+    protected function hasNoKnowledgeTransferAnnotations()
+    {
+        $restrictLabels = $this->input('kt_restrict_labels', []);
+
+        return !$this->getExistingAnnotationsQuery($this->input('kt_volume_id'), $restrictLabels)->exists();
+    }
 }
diff --git a/src/Http/routes.php b/src/Http/routes.php
@@ -40,4 +40,6 @@
 
     $router->get('maia-jobs/{id}/images/{id2}/training-proposals', 'MaiaJobImagesController@indexTrainingProposals');
     $router->get('maia-jobs/{id}/images/{id2}/annotation-candidates', 'MaiaJobImagesController@indexAnnotationCandidates');
+
+    $router->get('volumes/filter/knowledge-transfer', 'KnowledgeTransferVolumeController@index');
 });