From d01daef355c0a8d07dabbb271108438c68a69c12 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 16 Apr 2021 10:04:15 +0200
Subject: [PATCH 01/27] added bitmask difference with test

---
 lightly/api/bitmask.py    | 15 +++++++++++++++
 tests/api/test_BitMask.py |  6 ++++++
 2 files changed, 21 insertions(+)

diff --git a/lightly/api/bitmask.py b/lightly/api/bitmask.py
index 1cc9c9e27..43ee288e8 100644
--- a/lightly/api/bitmask.py
+++ b/lightly/api/bitmask.py
@@ -67,6 +67,11 @@ def _intersection(x: int, y: int) -> int:
     """
     return x & y
 
+def _difference(x: int, y: int) -> int:
+    """Uses difference to get the intersection of the two masks.
+    """
+    return x - y
+
 
 def _get_kth_bit(x: int, k: int) -> int:
     """Returns the kth bit in the mask from the right.
@@ -173,6 +178,16 @@ def intersection(self, other):
         """
         self.x = _intersection(self.x, other.x)
 
+    def difference(self, other):
+        """Calculates the difference of two bit masks.
+        Examples:
+            >>> mask1 = BitMask.from_bin('0b0111')
+            >>> mask2 = BitMask.from_bin('0b1100')
+            >>> mask1.difference(mask2)
+            >>> # mask1.binstring is '0b0011'
+        """
+        self.x = _difference(self.x, other.x)
+
     def get_kth_bit(self, k: int) -> bool:
         """Returns the boolean value of the kth bit from the right.
         """
diff --git a/tests/api/test_BitMask.py b/tests/api/test_BitMask.py
index d6997cae0..736879d9c 100644
--- a/tests/api/test_BitMask.py
+++ b/tests/api/test_BitMask.py
@@ -80,6 +80,12 @@ def test_intersection(self):
         mask_a.intersection(mask_b)
         self.assertEqual(mask_a.x, int("0b100", 2))
 
+    def test_difference(self):
+        mask_a = BitMask.from_bin("0b101")
+        mask_b = BitMask.from_bin("0b001")
+        mask_a.difference(mask_b)
+        self.assertEqual(mask_a.x, int("0b100", 2))
+
     def test_nonzero_bits(self):
 
         mask = BitMask.from_bin("0b0")

From 16c37b77faaedc740c58bb04e501c505d64bbb01 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 16 Apr 2021 10:35:18 +0200
Subject: [PATCH 02/27] added Bitmask.from_length() constructor

---
 lightly/api/bitmask.py    | 10 +++++++++-
 tests/api/test_BitMask.py | 13 ++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/lightly/api/bitmask.py b/lightly/api/bitmask.py
index 43ee288e8..9ad2886b0 100644
--- a/lightly/api/bitmask.py
+++ b/lightly/api/bitmask.py
@@ -67,6 +67,7 @@ def _intersection(x: int, y: int) -> int:
     """
     return x & y
 
+
 def _difference(x: int, y: int) -> int:
     """Uses difference to get the intersection of the two masks.
     """
@@ -128,6 +129,13 @@ def from_bin(cls, binstring: str):
         """
         return cls(_bin_to_int(binstring))
 
+    @classmethod
+    def from_length(cls, length: int):
+        """Creates a all-true bitmask of a predefined length
+        """
+        binstring = '0b' + '1' * length
+        return cls.from_bin(binstring)
+
     def to_hex(self):
         """Creates a BitMask from a hex string.
         """
@@ -209,4 +217,4 @@ def unset_kth_bit(self, k: int):
             >>> mask.unset_kth_bit(2)
             >>> # mask.binstring is '0b1011'
         """
-        self.x = _unset_kth_bit(self.x, k)
\ No newline at end of file
+        self.x = _unset_kth_bit(self.x, k)
diff --git a/tests/api/test_BitMask.py b/tests/api/test_BitMask.py
index 736879d9c..5dc91f855 100644
--- a/tests/api/test_BitMask.py
+++ b/tests/api/test_BitMask.py
@@ -21,6 +21,17 @@ def test_get_and_set(self):
         mask.unset_kth_bit(4)
         self.assertFalse(mask.get_kth_bit(4))
 
+    def test_large_bitmasks(self):
+        bitstring = "0b" + "1" * 5678
+        mask = BitMask.from_bin(bitstring)
+        mask_as_bitstring = mask.to_bin()
+        self.assertEqual(mask_as_bitstring, bitstring)
+
+    def test_bitmask_from_length(self):
+        length = 4
+        mask = BitMask.from_length(length)
+        self.assertEqual(mask.to_bin(), "0b1111")
+
     def test_get_and_set_outside_of_range(self):
 
         mask = BitMask.from_bin("0b11110000")
@@ -99,4 +110,4 @@ def test_nonzero_bits(self):
         also_indices = mask.to_indices()
 
         for i, j in zip(indices, also_indices):
-            self.assertEqual(i, j)
\ No newline at end of file
+            self.assertEqual(i, j)

From b21e56eaf80eef953b364ee735c0ce1acf278a98 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 16 Apr 2021 10:51:01 +0200
Subject: [PATCH 03/27] added Bitmask.subset_a_list() method

---
 lightly/api/bitmask.py    | 12 ++++++++++++
 tests/api/test_BitMask.py |  8 ++++++++
 2 files changed, 20 insertions(+)

diff --git a/lightly/api/bitmask.py b/lightly/api/bitmask.py
index 9ad2886b0..495ec1ff5 100644
--- a/lightly/api/bitmask.py
+++ b/lightly/api/bitmask.py
@@ -196,6 +196,18 @@ def difference(self, other):
         """
         self.x = _difference(self.x, other.x)
 
+    def subset_a_list(self, list_: List):
+        """Returns a subset of a list depending on the bitmask
+        Examples:
+            >>> list_to_subset = [4, 7, 9, 1]
+            >>> mask = BitMask.from_bin("0b0101")
+            >>> masked_list = mask.subset_a_list(list_to_subset)
+            >>> # masked_list = [7, 1]
+        """
+        bits = self.to_bin()
+        reversed_masked_list = [e for e, bit in zip(reversed(list_),reversed(bits)) if bit == "1"]
+        return list(reversed(reversed_masked_list))
+
     def get_kth_bit(self, k: int) -> bool:
         """Returns the boolean value of the kth bit from the right.
         """
diff --git a/tests/api/test_BitMask.py b/tests/api/test_BitMask.py
index 5dc91f855..2c02c0f5f 100644
--- a/tests/api/test_BitMask.py
+++ b/tests/api/test_BitMask.py
@@ -97,6 +97,14 @@ def test_difference(self):
         mask_a.difference(mask_b)
         self.assertEqual(mask_a.x, int("0b100", 2))
 
+    def test_subset_a_list(self):
+        list_ = [4, 7, 9, 1]
+        mask = BitMask.from_bin("0b0101")
+        target_masked_list = [7, 1]
+        masked_list = mask.subset_a_list(list_)
+        self.assertEqual(target_masked_list,masked_list)
+
+
     def test_nonzero_bits(self):
 
         mask = BitMask.from_bin("0b0")

From ef07f20edf687dba681e86d0fa7bbf51da7429ad Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 16 Apr 2021 11:14:26 +0200
Subject: [PATCH 04/27] bitmasks: added equality and minus operator overloads

---
 lightly/api/bitmask.py    |  6 ++++++
 tests/api/test_BitMask.py | 12 ++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/lightly/api/bitmask.py b/lightly/api/bitmask.py
index 495ec1ff5..78ca34a26 100644
--- a/lightly/api/bitmask.py
+++ b/lightly/api/bitmask.py
@@ -196,6 +196,12 @@ def difference(self, other):
         """
         self.x = _difference(self.x, other.x)
 
+    def __sub__(self, other):
+        return BitMask(self.x - other.x)
+
+    def __eq__(self, other):
+        return self.to_bin() == other.to_bin()
+
     def subset_a_list(self, list_: List):
         """Returns a subset of a list depending on the bitmask
         Examples:
diff --git a/tests/api/test_BitMask.py b/tests/api/test_BitMask.py
index 2c02c0f5f..0a5d0ba9a 100644
--- a/tests/api/test_BitMask.py
+++ b/tests/api/test_BitMask.py
@@ -97,6 +97,18 @@ def test_difference(self):
         mask_a.difference(mask_b)
         self.assertEqual(mask_a.x, int("0b100", 2))
 
+    def test_operator_minus(self):
+        mask_a = BitMask.from_bin("0b101")
+        mask_b = BitMask.from_bin("0b001")
+        mask_target = BitMask.from_bin("0b100")
+        self.assertEqual(mask_a-mask_b, mask_target)
+
+    def test_equal(self):
+        mask_a = BitMask.from_bin("0b101")
+        mask_b = BitMask.from_bin("0b101")
+        self.assertEqual(mask_a, mask_b)
+
+
     def test_subset_a_list(self):
         list_ = [4, 7, 9, 1]
         mask = BitMask.from_bin("0b0101")

From a26e5f6df14b320a2b7f6bd8fcf3fb74121b5f15 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 16 Apr 2021 11:15:01 +0200
Subject: [PATCH 05/27] added "added set" to agent and computed the three sets
 more efficient with bitmasks

---
 lightly/active_learning/agents/agent.py       | 79 ++++++++++---------
 .../test_active_learning_agent.py             |  7 +-
 2 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/lightly/active_learning/agents/agent.py b/lightly/active_learning/agents/agent.py
index 8b708b5c5..2eab9020d 100644
--- a/lightly/active_learning/agents/agent.py
+++ b/lightly/active_learning/agents/agent.py
@@ -76,28 +76,29 @@ def _set_labeled_and_unlabeled_set(self, preselected_tag_data: TagData = None):
                 optional param, then it must not be loaded from the API
 
         """
-        if self.preselected_tag_id is None:
-            self.labeled_set = []
-        else:
+        self.bitmask_labeled_set = BitMask.from_hex("0x0")
+        self.bitmask_added_set = BitMask.from_hex("0x0")
+        if self.preselected_tag_id is not None:
             if preselected_tag_data is None:
                 preselected_tag_data = self.api_workflow_client.tags_api.get_tag_by_tag_id(
                     self.api_workflow_client.dataset_id, tag_id=self.preselected_tag_id)
-            chosen_samples_ids = BitMask.from_hex(preselected_tag_data.bit_mask_data).to_indices()
-            self.labeled_set = [self.api_workflow_client.filenames_on_server[i] for i in chosen_samples_ids]
+            new_bitmask_labeled_set = BitMask.from_hex(preselected_tag_data.bit_mask_data)
+            self.bitmask_added_set = new_bitmask_labeled_set - self.bitmask_labeled_set
+            self.bitmask_labeled_set = new_bitmask_labeled_set
 
-        if not hasattr(self, "unlabeled_set"):
-            if self.query_tag_id is None:
-                self.unlabeled_set = self.api_workflow_client.filenames_on_server
-            else:
-                query_tag_data = self.api_workflow_client.tags_api.get_tag_by_tag_id(
-                    self.api_workflow_client.dataset_id, tag_id=self.query_tag_id)
-                chosen_samples_ids = BitMask.from_hex(query_tag_data.bit_mask_data).to_indices()
-                self.unlabeled_set = [self.api_workflow_client.filenames_on_server[i] for i in chosen_samples_ids]
+        if self.query_tag_id is None:
+            bitmask_query_tag = BitMask.from_length(len(self.api_workflow_client.filenames_on_server))
+        else:
+            query_tag_data = self.api_workflow_client.tags_api.get_tag_by_tag_id(
+                self.api_workflow_client.dataset_id, tag_id=self.query_tag_id)
+            bitmask_query_tag = BitMask.from_hex(query_tag_data.bit_mask_data)
+        self.bitmask_unlabeled_set = bitmask_query_tag - self.bitmask_labeled_set
 
-        filenames_labeled = set(self.labeled_set)
-        self.unlabeled_set = [f for f in self.unlabeled_set if f not in filenames_labeled]
+        self.labeled_set = self.bitmask_labeled_set.subset_a_list(self.api_workflow_client.filenames_on_server)
+        self.added_set = self.bitmask_added_set.subset_a_list(self.api_workflow_client.filenames_on_server)
+        self.unlabeled_set = self.bitmask_unlabeled_set.subset_a_list(self.api_workflow_client.filenames_on_server)
 
-    def query(self, sampler_config: SamplerConfig, al_scorer: Scorer = None) -> List[str]:
+    def query(self, sampler_config: SamplerConfig, al_scorer: Scorer = None) -> Tuple[List[str], List[str]]:
         """Performs an active learning query.
 
         As part of it, the self.labeled_set and self.unlabeled_set are updated
@@ -110,7 +111,7 @@ def query(self, sampler_config: SamplerConfig, al_scorer: Scorer = None) -> List
                 An instance of a class inheriting from Scorer, e.g. a ClassificationScorer.
 
         Returns:
-            The filenames of the samples in the new labeled_set.
+            The filenames of the samples in the new labeled_set and the added filenames.
 
         """
         # check input
@@ -119,29 +120,29 @@ def query(self, sampler_config: SamplerConfig, al_scorer: Scorer = None) -> List
                            "including the current labeled set "
                            "(sampler_config.n_samples) "
                             "is smaller than the number of samples in the current labeled set.")
-            return self.labeled_set
-
-        # calculate scores
-        if al_scorer is not None:
-            no_unlabeled_samples = len(self.unlabeled_set)
-            no_samples_with_predictions = len(al_scorer.model_output)
-            if no_unlabeled_samples != no_samples_with_predictions:
-                raise ValueError(f"The scorer must have exactly as much samples as in the unlabeled set,"
-                                 f"but there are {no_samples_with_predictions} predictions in the scorer,"
-                                 f"but {no_unlabeled_samples} in the unlabeled set.")
-            scores_dict = al_scorer._calculate_scores()
+
         else:
-            scores_dict = None
+            # calculate scores
+            if al_scorer is not None:
+                no_unlabeled_samples = len(self.unlabeled_set)
+                no_samples_with_predictions = len(al_scorer.model_output)
+                if no_unlabeled_samples != no_samples_with_predictions:
+                    raise ValueError(f"The scorer must have exactly as much samples as in the unlabeled set,"
+                                     f"but there are {no_samples_with_predictions} predictions in the scorer,"
+                                     f"but {no_unlabeled_samples} in the unlabeled set.")
+                scores_dict = al_scorer._calculate_scores()
+            else:
+                scores_dict = None
 
-        # perform the sampling
-        new_tag_data = self.api_workflow_client.sampling(
-            sampler_config=sampler_config,
-            al_scores=scores_dict,
-            preselected_tag_id=self.preselected_tag_id,
-            query_tag_id=self.query_tag_id)
+            # perform the sampling
+            new_tag_data = self.api_workflow_client.sampling(
+                sampler_config=sampler_config,
+                al_scores=scores_dict,
+                preselected_tag_id=self.preselected_tag_id,
+                query_tag_id=self.query_tag_id)
 
-        # set the newly chosen tag as the new preselected_tag_id and update the sets
-        self.preselected_tag_id = new_tag_data.id
-        self._set_labeled_and_unlabeled_set(new_tag_data)
+            # set the newly chosen tag as the new preselected_tag_id and update the sets
+            self.preselected_tag_id = new_tag_data.id
+            self._set_labeled_and_unlabeled_set(new_tag_data)
 
-        return self.labeled_set
+        return self.labeled_set, self.added_set
diff --git a/tests/active_learning/test_active_learning_agent.py b/tests/active_learning/test_active_learning_agent.py
index a14a399a3..8212d6178 100644
--- a/tests/active_learning/test_active_learning_agent.py
+++ b/tests/active_learning/test_active_learning_agent.py
@@ -30,7 +30,10 @@ def test_agent(self):
                         predictions = np.random.rand(len(agent.unlabeled_set), 10).astype(np.float32)
                         predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
                         al_scorer = ScorerClassification(predictions_normalized)
-                        chosen_filenames = agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
+                        labeled_set, added_set = agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
                     else:
                         sampler_config = SamplerConfig(n_samples=n_samples)
-                        chosen_filenames = agent.query(sampler_config=sampler_config)
+                        labeled_set, added_set = agent.query(sampler_config=sampler_config)
+
+                    assert len(added_set) <= len(labeled_set)
+                    assert set(added_set).issubset(labeled_set)

From ba192987699e6507833b2e88c8d940925193ea4b Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 16 Apr 2021 17:28:31 +0200
Subject: [PATCH 06/27] added generated tag arithmetics endpoints

---
 .../swagger_client/__init__.py                |   3 +
 .../swagger_client/api/tags_api.py            | 107 ++++++++
 .../swagger_client/models/__init__.py         |   3 +
 .../models/tag_arithmetics_operation.py       | 102 ++++++++
 .../models/tag_arithmetics_request.py         | 230 ++++++++++++++++++
 .../models/tag_bit_mask_response.py           | 124 ++++++++++
 6 files changed, 569 insertions(+)
 create mode 100644 lightly/openapi_generated/swagger_client/models/tag_arithmetics_operation.py
 create mode 100644 lightly/openapi_generated/swagger_client/models/tag_arithmetics_request.py
 create mode 100644 lightly/openapi_generated/swagger_client/models/tag_bit_mask_response.py

diff --git a/lightly/openapi_generated/swagger_client/__init__.py b/lightly/openapi_generated/swagger_client/__init__.py
index b810383d4..1f3b68217 100644
--- a/lightly/openapi_generated/swagger_client/__init__.py
+++ b/lightly/openapi_generated/swagger_client/__init__.py
@@ -73,7 +73,10 @@
 from lightly.openapi_generated.swagger_client.models.sampling_create_request import SamplingCreateRequest
 from lightly.openapi_generated.swagger_client.models.sampling_method import SamplingMethod
 from lightly.openapi_generated.swagger_client.models.tag_active_learning_scores_data import TagActiveLearningScoresData
+from lightly.openapi_generated.swagger_client.models.tag_arithmetics_operation import TagArithmeticsOperation
+from lightly.openapi_generated.swagger_client.models.tag_arithmetics_request import TagArithmeticsRequest
 from lightly.openapi_generated.swagger_client.models.tag_bit_mask_data import TagBitMaskData
+from lightly.openapi_generated.swagger_client.models.tag_bit_mask_response import TagBitMaskResponse
 from lightly.openapi_generated.swagger_client.models.tag_change_data import TagChangeData
 from lightly.openapi_generated.swagger_client.models.tag_create_request import TagCreateRequest
 from lightly.openapi_generated.swagger_client.models.tag_creator import TagCreator
diff --git a/lightly/openapi_generated/swagger_client/api/tags_api.py b/lightly/openapi_generated/swagger_client/api/tags_api.py
index fa9e859a2..fca7de663 100644
--- a/lightly/openapi_generated/swagger_client/api/tags_api.py
+++ b/lightly/openapi_generated/swagger_client/api/tags_api.py
@@ -547,3 +547,110 @@ def get_tags_by_dataset_id_with_http_info(self, dataset_id, **kwargs):  # noqa:
             _preload_content=params.get('_preload_content', True),
             _request_timeout=params.get('_request_timeout'),
             collection_formats=collection_formats)
+
+    def perform_tag_arithmetics(self, body, dataset_id, **kwargs):  # noqa: E501
+        """perform_tag_arithmetics  # noqa: E501
+
+        performs tag arithmetics to compute a new bitmask out of two existing and optionally create a tag for it  # noqa: E501
+        This method makes a synchronous HTTP request by default. To make an
+        asynchronous HTTP request, please pass async_req=True
+        >>> thread = api.perform_tag_arithmetics(body, dataset_id, async_req=True)
+        >>> result = thread.get()
+
+        :param async_req bool
+        :param TagArithmeticsRequest body: (required)
+        :param MongoObjectID dataset_id: ObjectId of the dataset (required)
+        :return: TagBitMaskResponse
+                 If the method is called asynchronously,
+                 returns the request thread.
+        """
+        kwargs['_return_http_data_only'] = True
+        if kwargs.get('async_req'):
+            return self.perform_tag_arithmetics_with_http_info(body, dataset_id, **kwargs)  # noqa: E501
+        else:
+            (data) = self.perform_tag_arithmetics_with_http_info(body, dataset_id, **kwargs)  # noqa: E501
+            return data
+
+    def perform_tag_arithmetics_with_http_info(self, body, dataset_id, **kwargs):  # noqa: E501
+        """perform_tag_arithmetics  # noqa: E501
+
+        performs tag arithmetics to compute a new bitmask out of two existing and optionally create a tag for it  # noqa: E501
+        This method makes a synchronous HTTP request by default. To make an
+        asynchronous HTTP request, please pass async_req=True
+        >>> thread = api.perform_tag_arithmetics_with_http_info(body, dataset_id, async_req=True)
+        >>> result = thread.get()
+
+        :param async_req bool
+        :param TagArithmeticsRequest body: (required)
+        :param MongoObjectID dataset_id: ObjectId of the dataset (required)
+        :return: TagBitMaskResponse
+                 If the method is called asynchronously,
+                 returns the request thread.
+        """
+
+        all_params = ['body', 'dataset_id']  # noqa: E501
+        all_params.append('async_req')
+        all_params.append('_return_http_data_only')
+        all_params.append('_preload_content')
+        all_params.append('_request_timeout')
+
+        params = locals()
+        for key, val in six.iteritems(params['kwargs']):
+            if key not in all_params:
+                raise TypeError(
+                    "Got an unexpected keyword argument '%s'"
+                    " to method perform_tag_arithmetics" % key
+                )
+            params[key] = val
+        del params['kwargs']
+        # verify the required parameter 'body' is set
+        if self.api_client.client_side_validation and ('body' not in params or
+                                                       params['body'] is None):  # noqa: E501
+            raise ValueError("Missing the required parameter `body` when calling `perform_tag_arithmetics`")  # noqa: E501
+        # verify the required parameter 'dataset_id' is set
+        if self.api_client.client_side_validation and ('dataset_id' not in params or
+                                                       params['dataset_id'] is None):  # noqa: E501
+            raise ValueError("Missing the required parameter `dataset_id` when calling `perform_tag_arithmetics`")  # noqa: E501
+
+        collection_formats = {}
+
+        path_params = {}
+        if 'dataset_id' in params:
+            path_params['datasetId'] = params['dataset_id']  # noqa: E501
+
+        query_params = []
+
+        header_params = {}
+
+        form_params = []
+        local_var_files = {}
+
+        body_params = None
+        if 'body' in params:
+            body_params = params['body']
+        # HTTP header `Accept`
+        header_params['Accept'] = self.api_client.select_header_accept(
+            ['application/json'])  # noqa: E501
+
+        # HTTP header `Content-Type`
+        header_params['Content-Type'] = self.api_client.select_header_content_type(  # noqa: E501
+            ['application/json'])  # noqa: E501
+
+        # Authentication setting
+        auth_settings = ['ApiKeyAuth', 'auth0Bearer']  # noqa: E501
+
+        return self.api_client.call_api(
+            '/v1/datasets/{datasetId}/tags/arithmetics', 'POST',
+            path_params,
+            query_params,
+            header_params,
+            body=body_params,
+            post_params=form_params,
+            files=local_var_files,
+            response_type='TagBitMaskResponse',  # noqa: E501
+            auth_settings=auth_settings,
+            async_req=params.get('async_req'),
+            _return_http_data_only=params.get('_return_http_data_only'),
+            _preload_content=params.get('_preload_content', True),
+            _request_timeout=params.get('_request_timeout'),
+            collection_formats=collection_formats)
diff --git a/lightly/openapi_generated/swagger_client/models/__init__.py b/lightly/openapi_generated/swagger_client/models/__init__.py
index ee9d8b864..5bc544857 100644
--- a/lightly/openapi_generated/swagger_client/models/__init__.py
+++ b/lightly/openapi_generated/swagger_client/models/__init__.py
@@ -56,7 +56,10 @@
 from lightly.openapi_generated.swagger_client.models.sampling_create_request import SamplingCreateRequest
 from lightly.openapi_generated.swagger_client.models.sampling_method import SamplingMethod
 from lightly.openapi_generated.swagger_client.models.tag_active_learning_scores_data import TagActiveLearningScoresData
+from lightly.openapi_generated.swagger_client.models.tag_arithmetics_operation import TagArithmeticsOperation
+from lightly.openapi_generated.swagger_client.models.tag_arithmetics_request import TagArithmeticsRequest
 from lightly.openapi_generated.swagger_client.models.tag_bit_mask_data import TagBitMaskData
+from lightly.openapi_generated.swagger_client.models.tag_bit_mask_response import TagBitMaskResponse
 from lightly.openapi_generated.swagger_client.models.tag_change_data import TagChangeData
 from lightly.openapi_generated.swagger_client.models.tag_create_request import TagCreateRequest
 from lightly.openapi_generated.swagger_client.models.tag_creator import TagCreator
diff --git a/lightly/openapi_generated/swagger_client/models/tag_arithmetics_operation.py b/lightly/openapi_generated/swagger_client/models/tag_arithmetics_operation.py
new file mode 100644
index 000000000..99bddd916
--- /dev/null
+++ b/lightly/openapi_generated/swagger_client/models/tag_arithmetics_operation.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+
+"""
+    Lightly API
+
+    Lightly.ai enables you to do self-supervised learning in an easy and intuitive way. The lightly.ai OpenAPI spec defines how one can interact with our REST API to unleash the full potential of lightly.ai  # noqa: E501
+
+    OpenAPI spec version: 1.0.0
+    Contact: support@lightly.ai
+    Generated by: https://github.com/swagger-api/swagger-codegen.git
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from lightly.openapi_generated.swagger_client.configuration import Configuration
+
+
+class TagArithmeticsOperation(object):
+    """NOTE: This class is auto generated by the swagger code generator program.
+
+    Do not edit the class manually.
+    """
+
+    """
+    allowed enum values
+    """
+    UNION = "UNION"
+    INTERSECTION = "INTERSECTION"
+    DIFFERENCE = "DIFFERENCE"
+
+    """
+    Attributes:
+      swagger_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    swagger_types = {
+    }
+
+    attribute_map = {
+    }
+
+    def __init__(self, _configuration=None):  # noqa: E501
+        """TagArithmeticsOperation - a model defined in Swagger"""  # noqa: E501
+        if _configuration is None:
+            _configuration = Configuration()
+        self._configuration = _configuration
+        self.discriminator = None
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.swagger_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+        if issubclass(TagArithmeticsOperation, dict):
+            for key, value in self.items():
+                result[key] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, TagArithmeticsOperation):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, TagArithmeticsOperation):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/lightly/openapi_generated/swagger_client/models/tag_arithmetics_request.py b/lightly/openapi_generated/swagger_client/models/tag_arithmetics_request.py
new file mode 100644
index 000000000..7ef31eb04
--- /dev/null
+++ b/lightly/openapi_generated/swagger_client/models/tag_arithmetics_request.py
@@ -0,0 +1,230 @@
+# coding: utf-8
+
+"""
+    Lightly API
+
+    Lightly.ai enables you to do self-supervised learning in an easy and intuitive way. The lightly.ai OpenAPI spec defines how one can interact with our REST API to unleash the full potential of lightly.ai  # noqa: E501
+
+    OpenAPI spec version: 1.0.0
+    Contact: support@lightly.ai
+    Generated by: https://github.com/swagger-api/swagger-codegen.git
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from lightly.openapi_generated.swagger_client.configuration import Configuration
+
+
+class TagArithmeticsRequest(object):
+    """NOTE: This class is auto generated by the swagger code generator program.
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      swagger_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    swagger_types = {
+        'tag_id1': 'MongoObjectID',
+        'tag_id2': 'MongoObjectID',
+        'operation': 'TagArithmeticsOperation',
+        'new_tag_name': 'TagName',
+        'creator': 'TagCreator'
+    }
+
+    attribute_map = {
+        'tag_id1': 'tagId1',
+        'tag_id2': 'tagId2',
+        'operation': 'operation',
+        'new_tag_name': 'newTagName',
+        'creator': 'creator'
+    }
+
+    def __init__(self, tag_id1=None, tag_id2=None, operation=None, new_tag_name=None, creator=None, _configuration=None):  # noqa: E501
+        """TagArithmeticsRequest - a model defined in Swagger"""  # noqa: E501
+        if _configuration is None:
+            _configuration = Configuration()
+        self._configuration = _configuration
+
+        self._tag_id1 = None
+        self._tag_id2 = None
+        self._operation = None
+        self._new_tag_name = None
+        self._creator = None
+        self.discriminator = None
+
+        self.tag_id1 = tag_id1
+        self.tag_id2 = tag_id2
+        self.operation = operation
+        if new_tag_name is not None:
+            self.new_tag_name = new_tag_name
+        if creator is not None:
+            self.creator = creator
+
+    @property
+    def tag_id1(self):
+        """Gets the tag_id1 of this TagArithmeticsRequest.  # noqa: E501
+
+
+        :return: The tag_id1 of this TagArithmeticsRequest.  # noqa: E501
+        :rtype: MongoObjectID
+        """
+        return self._tag_id1
+
+    @tag_id1.setter
+    def tag_id1(self, tag_id1):
+        """Sets the tag_id1 of this TagArithmeticsRequest.
+
+
+        :param tag_id1: The tag_id1 of this TagArithmeticsRequest.  # noqa: E501
+        :type: MongoObjectID
+        """
+        if self._configuration.client_side_validation and tag_id1 is None:
+            raise ValueError("Invalid value for `tag_id1`, must not be `None`")  # noqa: E501
+
+        self._tag_id1 = tag_id1
+
+    @property
+    def tag_id2(self):
+        """Gets the tag_id2 of this TagArithmeticsRequest.  # noqa: E501
+
+
+        :return: The tag_id2 of this TagArithmeticsRequest.  # noqa: E501
+        :rtype: MongoObjectID
+        """
+        return self._tag_id2
+
+    @tag_id2.setter
+    def tag_id2(self, tag_id2):
+        """Sets the tag_id2 of this TagArithmeticsRequest.
+
+
+        :param tag_id2: The tag_id2 of this TagArithmeticsRequest.  # noqa: E501
+        :type: MongoObjectID
+        """
+        if self._configuration.client_side_validation and tag_id2 is None:
+            raise ValueError("Invalid value for `tag_id2`, must not be `None`")  # noqa: E501
+
+        self._tag_id2 = tag_id2
+
+    @property
+    def operation(self):
+        """Gets the operation of this TagArithmeticsRequest.  # noqa: E501
+
+
+        :return: The operation of this TagArithmeticsRequest.  # noqa: E501
+        :rtype: TagArithmeticsOperation
+        """
+        return self._operation
+
+    @operation.setter
+    def operation(self, operation):
+        """Sets the operation of this TagArithmeticsRequest.
+
+
+        :param operation: The operation of this TagArithmeticsRequest.  # noqa: E501
+        :type: TagArithmeticsOperation
+        """
+        if self._configuration.client_side_validation and operation is None:
+            raise ValueError("Invalid value for `operation`, must not be `None`")  # noqa: E501
+
+        self._operation = operation
+
+    @property
+    def new_tag_name(self):
+        """Gets the new_tag_name of this TagArithmeticsRequest.  # noqa: E501
+
+
+        :return: The new_tag_name of this TagArithmeticsRequest.  # noqa: E501
+        :rtype: TagName
+        """
+        return self._new_tag_name
+
+    @new_tag_name.setter
+    def new_tag_name(self, new_tag_name):
+        """Sets the new_tag_name of this TagArithmeticsRequest.
+
+
+        :param new_tag_name: The new_tag_name of this TagArithmeticsRequest.  # noqa: E501
+        :type: TagName
+        """
+
+        self._new_tag_name = new_tag_name
+
+    @property
+    def creator(self):
+        """Gets the creator of this TagArithmeticsRequest.  # noqa: E501
+
+
+        :return: The creator of this TagArithmeticsRequest.  # noqa: E501
+        :rtype: TagCreator
+        """
+        return self._creator
+
+    @creator.setter
+    def creator(self, creator):
+        """Sets the creator of this TagArithmeticsRequest.
+
+
+        :param creator: The creator of this TagArithmeticsRequest.  # noqa: E501
+        :type: TagCreator
+        """
+
+        self._creator = creator
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.swagger_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+        if issubclass(TagArithmeticsRequest, dict):
+            for key, value in self.items():
+                result[key] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, TagArithmeticsRequest):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, TagArithmeticsRequest):
+            return True
+
+        return self.to_dict() != other.to_dict()
diff --git a/lightly/openapi_generated/swagger_client/models/tag_bit_mask_response.py b/lightly/openapi_generated/swagger_client/models/tag_bit_mask_response.py
new file mode 100644
index 000000000..ce40bfb40
--- /dev/null
+++ b/lightly/openapi_generated/swagger_client/models/tag_bit_mask_response.py
@@ -0,0 +1,124 @@
+# coding: utf-8
+
+"""
+    Lightly API
+
+    Lightly.ai enables you to do self-supervised learning in an easy and intuitive way. The lightly.ai OpenAPI spec defines how one can interact with our REST API to unleash the full potential of lightly.ai  # noqa: E501
+
+    OpenAPI spec version: 1.0.0
+    Contact: support@lightly.ai
+    Generated by: https://github.com/swagger-api/swagger-codegen.git
+"""
+
+
+import pprint
+import re  # noqa: F401
+
+import six
+
+from lightly.openapi_generated.swagger_client.configuration import Configuration
+
+
+class TagBitMaskResponse(object):
+    """NOTE: This class is auto generated by the swagger code generator program.
+
+    Do not edit the class manually.
+    """
+
+    """
+    Attributes:
+      swagger_types (dict): The key is attribute name
+                            and the value is attribute type.
+      attribute_map (dict): The key is attribute name
+                            and the value is json key in definition.
+    """
+    swagger_types = {
+        'bit_mask_data': 'TagBitMaskData'
+    }
+
+    attribute_map = {
+        'bit_mask_data': 'bitMaskData'
+    }
+
+    def __init__(self, bit_mask_data=None, _configuration=None):  # noqa: E501
+        """TagBitMaskResponse - a model defined in Swagger"""  # noqa: E501
+        if _configuration is None:
+            _configuration = Configuration()
+        self._configuration = _configuration
+
+        self._bit_mask_data = None
+        self.discriminator = None
+
+        self.bit_mask_data = bit_mask_data
+
+    @property
+    def bit_mask_data(self):
+        """Gets the bit_mask_data of this TagBitMaskResponse.  # noqa: E501
+
+
+        :return: The bit_mask_data of this TagBitMaskResponse.  # noqa: E501
+        :rtype: TagBitMaskData
+        """
+        return self._bit_mask_data
+
+    @bit_mask_data.setter
+    def bit_mask_data(self, bit_mask_data):
+        """Sets the bit_mask_data of this TagBitMaskResponse.
+
+
+        :param bit_mask_data: The bit_mask_data of this TagBitMaskResponse.  # noqa: E501
+        :type: TagBitMaskData
+        """
+        if self._configuration.client_side_validation and bit_mask_data is None:
+            raise ValueError("Invalid value for `bit_mask_data`, must not be `None`")  # noqa: E501
+
+        self._bit_mask_data = bit_mask_data
+
+    def to_dict(self):
+        """Returns the model properties as a dict"""
+        result = {}
+
+        for attr, _ in six.iteritems(self.swagger_types):
+            value = getattr(self, attr)
+            if isinstance(value, list):
+                result[attr] = list(map(
+                    lambda x: x.to_dict() if hasattr(x, "to_dict") else x,
+                    value
+                ))
+            elif hasattr(value, "to_dict"):
+                result[attr] = value.to_dict()
+            elif isinstance(value, dict):
+                result[attr] = dict(map(
+                    lambda item: (item[0], item[1].to_dict())
+                    if hasattr(item[1], "to_dict") else item,
+                    value.items()
+                ))
+            else:
+                result[attr] = value
+        if issubclass(TagBitMaskResponse, dict):
+            for key, value in self.items():
+                result[key] = value
+
+        return result
+
+    def to_str(self):
+        """Returns the string representation of the model"""
+        return pprint.pformat(self.to_dict())
+
+    def __repr__(self):
+        """For `print` and `pprint`"""
+        return self.to_str()
+
+    def __eq__(self, other):
+        """Returns true if both objects are equal"""
+        if not isinstance(other, TagBitMaskResponse):
+            return False
+
+        return self.to_dict() == other.to_dict()
+
+    def __ne__(self, other):
+        """Returns true if both objects are not equal"""
+        if not isinstance(other, TagBitMaskResponse):
+            return True
+
+        return self.to_dict() != other.to_dict()

From 8c4546760a964171cecc25a2709d3d284060af6f Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 16 Apr 2021 19:46:47 +0200
Subject: [PATCH 07/27] created tests for download_cli

---
 lightly/cli/__init__.py                       |  2 +-
 lightly/cli/config/config.yaml                |  4 ++
 .../{download_cli.py => download_cli_file.py} |  0
 .../mocked_api_workflow_client.py             | 18 +++--
 tests/cli/test_cli_download.py                | 67 +++++++++++++++++++
 5 files changed, 84 insertions(+), 7 deletions(-)
 rename lightly/cli/{download_cli.py => download_cli_file.py} (100%)
 create mode 100644 tests/cli/test_cli_download.py

diff --git a/lightly/cli/__init__.py b/lightly/cli/__init__.py
index e64a86144..f8ff2886b 100644
--- a/lightly/cli/__init__.py
+++ b/lightly/cli/__init__.py
@@ -10,4 +10,4 @@
 from lightly.cli.train_cli import train_cli
 from lightly.cli.embed_cli import embed_cli
 from lightly.cli.upload_cli import upload_cli
-from lightly.cli.download_cli import download_cli
+from lightly.cli.download_cli_file import download_cli
diff --git a/lightly/cli/config/config.yaml b/lightly/cli/config/config.yaml
index 7524b406f..be8a525a2 100644
--- a/lightly/cli/config/config.yaml
+++ b/lightly/cli/config/config.yaml
@@ -18,6 +18,7 @@ resize: -1                    # Allow resizing of the images before uploading, u
 embedding_name: 'default'     # Name of the embedding to be used on the platform.
 emb_upload_bsz: 32            # Number of embeddings which are uploaded in a single batch.
 tag_name: 'initial-tag'       # Name of the requested tag on the Lightly platform.
+exclude_parent_tag: False     # If true, only the samples in the defined tag, but without the parent tag, are taken
 
 ### training and embeddings
 pre_trained: True             # Whether to use a pre-trained model or not
@@ -129,6 +130,9 @@ hydra:
       Download a list of files in a given tag from the Lightly web solution
       > lightly-download tag_name='my-tag' dataset_id='your_dataset_id' token='your_access_token'
 
+      Download a list of files in a given tag without filenames from the parent tag from the Lightly web solution
+      > lightly-download tag_name='my-tag' dataset_id='your_dataset_id' token='your_access_token' exclude_parent_tag=True
+
       Copy all files in a given tag from a source directory to a target directory
       > lightly-download tag_name='my-tag' dataset_id='your_dataset_id' token='your_access_token' input_dir='data/' output_dir='new_data/'
 
diff --git a/lightly/cli/download_cli.py b/lightly/cli/download_cli_file.py
similarity index 100%
rename from lightly/cli/download_cli.py
rename to lightly/cli/download_cli_file.py
diff --git a/tests/api_workflow/mocked_api_workflow_client.py b/tests/api_workflow/mocked_api_workflow_client.py
index 17659615d..0cdace1ae 100644
--- a/tests/api_workflow/mocked_api_workflow_client.py
+++ b/tests/api_workflow/mocked_api_workflow_client.py
@@ -16,7 +16,7 @@
 from typing import *
 
 from lightly.openapi_generated.swagger_client import ScoresApi, CreateEntityResponse, SamplesApi, SampleCreateRequest, \
-    InitialTagCreateRequest, ApiClient, VersioningApi, QuotaApi
+    InitialTagCreateRequest, ApiClient, VersioningApi, QuotaApi, TagArithmeticsRequest, TagBitMaskResponse
 from lightly.openapi_generated.swagger_client.api.embeddings_api import EmbeddingsApi
 from lightly.openapi_generated.swagger_client.api.jobs_api import JobsApi
 from lightly.openapi_generated.swagger_client.api.mappings_api import MappingsApi
@@ -93,19 +93,25 @@ def get_tag_by_tag_id(self, dataset_id, tag_id, **kwargs):
 
     def get_tags_by_dataset_id(self, dataset_id, **kwargs):
         tag_1 = TagData(id='inital_tag_id', dataset_id=dataset_id, prev_tag_id=None,
-                        bit_mask_data="0x80bda23e9", name='initial-tag', tot_size=15,
+                        bit_mask_data="0xF", name='initial-tag', tot_size=4,
                         created_at=1577836800, changes=dict())
         tag_2 = TagData(id='query_tag_id_xyz', dataset_id=dataset_id, prev_tag_id="initial-tag",
-                        bit_mask_data="0x80bda23e9", name='query_tag_name_xyz', tot_size=15,
+                        bit_mask_data="0xF", name='query_tag_name_xyz', tot_size=4,
                         created_at=1577836800, changes=dict())
         tag_3 = TagData(id='preselected_tag_id_xyz', dataset_id=dataset_id, prev_tag_id="initial-tag",
-                        bit_mask_data="0x80bda23e9", name='preselected_tag_name_xyz', tot_size=15,
+                        bit_mask_data="0x1", name='preselected_tag_name_xyz', tot_size=4,
                         created_at=1577836800, changes=dict())
-        tags = [tag_1, tag_2, tag_3]
-        no_tags_to_return = getattr(self, "no_tags", 3)
+        tag_4 = TagData(id='sampled_tag_xyz', dataset_id=dataset_id, prev_tag_id="preselected_tag_id_xyz",
+                        bit_mask_data="0x3", name='sampled_tag_xyz', tot_size=4,
+                        created_at=1577836800, changes=dict())
+        tags = [tag_1, tag_2, tag_3, tag_4]
+        no_tags_to_return = getattr(self, "no_tags", 4)
         tags = tags[:no_tags_to_return]
         return tags
 
+    def perform_tag_arithmetics(self, body: TagArithmeticsRequest, dataset_id, **kwargs):
+        return TagBitMaskResponse(bit_mask_data="0x2")
+
 
 class MockedScoresApi(ScoresApi):
     def create_or_update_active_learning_score_by_tag_id(self, body, dataset_id, tag_id, **kwargs) -> \
diff --git a/tests/cli/test_cli_download.py b/tests/cli/test_cli_download.py
new file mode 100644
index 000000000..a9e3ccd2d
--- /dev/null
+++ b/tests/cli/test_cli_download.py
@@ -0,0 +1,67 @@
+import os
+import re
+import tempfile
+
+from hydra.experimental import compose, initialize
+
+import lightly
+from tests.api_workflow.mocked_api_workflow_client import MockedApiWorkflowSetup, MockedApiWorkflowClient
+
+
+#in download_cli_file.py: from lightly.api.api_workflow_client import ApiWorkflowClient
+
+class TestCLIDownload(MockedApiWorkflowSetup):
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        lightly.cli.download_cli_file.ApiWorkflowClient = MockedApiWorkflowClient
+        initialize(config_path="../../lightly/cli/config", job_name="test_app")
+
+    def setUp(self):
+        self.cfg = compose(config_name="config", overrides=["token='123'", "dataset_id='XYZ'"])
+
+
+    def parse_cli_string(self, cli_words: str):
+        cli_words = cli_words.replace("lightly-download ", "")
+        cli_words = re.split("=| ", cli_words)
+        assert len(cli_words) % 2 == 0
+        dict_keys = cli_words[0::2]
+        dict_values = cli_words[1::2]
+        for key, value in zip(dict_keys, dict_values):
+            value = value.strip('\"')
+            value = value.strip('\'')
+            self.cfg[key] = value
+
+    def test_parse_cli_string(self):
+        cli_string = "lightly-download token='123' dataset_id='XYZ'"
+        self.parse_cli_string(cli_string)
+        assert self.cfg["token"] == '123'
+        assert self.cfg["dataset_id"] == 'XYZ'
+
+    def test_download_base(self):
+        cli_string = "lightly-download token='123' dataset_id='XYZ'"
+        self.parse_cli_string(cli_string)
+        lightly.cli.download_cli_file.download_cli(self.cfg)
+
+    def test_download_tag_name(self):
+        cli_string = "lightly-download token='123' dataset_id='XYZ' tag_name='sampled_tag_xyz'"
+        self.parse_cli_string(cli_string)
+        lightly.cli.download_cli_file.download_cli(self.cfg)
+
+    def test_download_tag_name_nonexisting(self):
+        cli_string = "lightly-download token='123' dataset_id='XYZ' tag_name='nonexisting_xyz'"
+        self.parse_cli_string(cli_string)
+        lightly.cli.download_cli_file.download_cli(self.cfg)
+
+    def test_download_tag_name_exclude_parent(self):
+        cli_string = "lightly-download token='123' dataset_id='XYZ' tag_name='sampled_tag_xyz' exclude_parent_tag=True"
+        self.parse_cli_string(cli_string)
+        lightly.cli.download_cli_file.download_cli(self.cfg)
+
+    def tearDown(self) -> None:
+        try:
+            os.remove(f"{self.cfg['tag_name']}.txt")
+        except FileNotFoundError:
+            pass
+
+

From 3518293f270cf4183c0c4c24422ff72312ddd08e Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Fri, 16 Apr 2021 19:53:37 +0200
Subject: [PATCH 08/27] bugfix: "lightly-download =
 lightly.cli.download_cli_file:entry"

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b9af51836..f0d9527f8 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,7 @@ def load_requirements(path_dir=PATH_ROOT, filename='base.txt', comment_char='#')
             "lightly-embed = lightly.cli.embed_cli:entry",
             "lightly-magic = lightly.cli.lightly_cli:entry",
             "lightly-upload = lightly.cli.upload_cli:entry",
-            "lightly-download = lightly.cli.download_cli:entry",
+            "lightly-download = lightly.cli.download_cli_file:entry",
             "lightly-version = lightly.cli.version_cli:entry",
         ]
     }

From 43dee8cc7b251794de5af09644a71c3f18421e9a Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 00:49:17 +0200
Subject: [PATCH 09/27] Bugfix: actual change got lost in renaming

---
 lightly/cli/download_cli_file.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/lightly/cli/download_cli_file.py b/lightly/cli/download_cli_file.py
index feacb2a37..57ce8367d 100644
--- a/lightly/cli/download_cli_file.py
+++ b/lightly/cli/download_cli_file.py
@@ -20,10 +20,11 @@
 from lightly.api.utils import getenv
 from lightly.api.api_workflow_client import ApiWorkflowClient
 from lightly.api.bitmask import BitMask
+from lightly.openapi_generated.swagger_client import TagData, TagArithmeticsRequest, TagArithmeticsOperation, \
+    TagBitMaskResponse
 
 
 def _download_cli(cfg, is_cli_call=True):
-
     tag_name = cfg['tag_name']
     dataset_id = cfg['dataset_id']
     token = cfg['token']
@@ -50,12 +51,24 @@ def _download_cli(cfg, is_cli_call=True):
         return
 
     # get tag data
-    tag_data = api_workflow_client.tags_api.get_tag_by_tag_id(
+    tag_data: TagData = api_workflow_client.tags_api.get_tag_by_tag_id(
         dataset_id=dataset_id, tag_id=tag_id
     )
-    
+
+    if cfg["exclude_parent_tag"]:
+        parent_tag_id = tag_data.prev_tag_id
+        tag_arithmetics_request = TagArithmeticsRequest(
+            tag_id1=tag_data.id,
+            tag_id2=parent_tag_id,
+            operation=TagArithmeticsOperation.DIFFERENCE)
+        bit_mask_response: TagBitMaskResponse \
+            = api_workflow_client.tags_api.perform_tag_arithmetics(body=tag_arithmetics_request, dataset_id=dataset_id)
+        bit_mask_data = bit_mask_response.bit_mask_data
+    else:
+        bit_mask_data = tag_data.bit_mask_data
+
     # get samples
-    chosen_samples_ids = BitMask.from_hex(tag_data.bit_mask_data).to_indices()
+    chosen_samples_ids = BitMask.from_hex(bit_mask_data).to_indices()
     samples = [api_workflow_client.filenames_on_server[i] for i in chosen_samples_ids]
 
     # store sample names in a .txt file

From 5214e42f56286ff1749ce0dd17e2cd6a990bd7b0 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 08:40:32 +0200
Subject: [PATCH 10/27] upload_cli: use number of workers specified

---
 lightly/cli/upload_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightly/cli/upload_cli.py b/lightly/cli/upload_cli.py
index a5c648d4d..d4ac0abb9 100644
--- a/lightly/cli/upload_cli.py
+++ b/lightly/cli/upload_cli.py
@@ -52,7 +52,7 @@ def _upload_cli(cfg, is_cli_call=True):
         mode = cfg['upload']
         dataset = LightlyDataset(input_dir=input_dir, transform=transform)
         api_workflow_client.upload_dataset(
-            input=dataset, mode=mode
+            input=dataset, mode=mode, max_workers=cfg['loader']['num_workers']
         )
 
     if path_to_embeddings:

From c39541f501547fa2f1708e3153488a0d0618a2f2 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 08:52:38 +0200
Subject: [PATCH 11/27] umocked test_api now uses upload_cli to test setting
 loader.num_workers

---
 tests/UNMOCKED_end2end_tests/test_api.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/UNMOCKED_end2end_tests/test_api.py b/tests/UNMOCKED_end2end_tests/test_api.py
index 7f15b2a70..da919e31d 100644
--- a/tests/UNMOCKED_end2end_tests/test_api.py
+++ b/tests/UNMOCKED_end2end_tests/test_api.py
@@ -4,6 +4,9 @@
 from typing import List, Tuple
 
 import numpy as np
+from hydra.experimental import initialize, compose
+
+from lightly.cli import upload_cli
 from lightly.data.dataset import LightlyDataset
 
 from lightly.active_learning.scorers.classification import ScorerClassification
@@ -69,7 +72,14 @@ def create_new_dataset_with_embeddings(path_to_dataset: str,
     api_workflow_client.create_new_dataset_with_unique_name(dataset_basename=dataset_name)
 
     # upload to the dataset
-    api_workflow_client.upload_dataset(input=path_to_dataset)
+    initialize(config_path="../../lightly/cli/config", job_name="test_app")
+    cfg = compose(config_name="config", overrides=[
+        f"input_dir='{path_to_dataset}'",
+        f"token='{token}'",
+        f"dataset_id={api_workflow_client.dataset_id}",
+        f"loader.num_workers=9"
+        ])
+    upload_cli(cfg)
 
     # calculate and save the embeddings
     path_to_embeddings_csv = f"{path_to_dataset}/embeddings.csv"

From a9364cd8acd00ca023703258e65141c6730969b2 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 09:32:07 +0200
Subject: [PATCH 12/27] upload_dataset: handle case where max_workers=0

---
 lightly/api/api_workflow_upload_dataset.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lightly/api/api_workflow_upload_dataset.py b/lightly/api/api_workflow_upload_dataset.py
index a64220a64..4d1795fd1 100644
--- a/lightly/api/api_workflow_upload_dataset.py
+++ b/lightly/api/api_workflow_upload_dataset.py
@@ -61,6 +61,7 @@ def upload_dataset(self, input: Union[str, LightlyDataset], max_workers: int = 8
 
         # handle the case where len(dataset) < max_workers
         max_workers = min(len(dataset), max_workers)
+        max_workers = max(max_workers, 1)
 
         # upload the samples
         if verbose:

From e53f4ff5562144065342e3792ba68a3e58ad8310 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 11:54:09 +0200
Subject: [PATCH 13/27] much more tests and bugfixes for bitmask and agent

---
 lightly/active_learning/agents/agent.py       | 72 ++++++++++---------
 lightly/api/bitmask.py                        | 17 ++---
 .../test_active_learning_agent.py             | 27 ++++++-
 tests/api/test_BitMask.py                     | 47 +++++++++---
 4 files changed, 107 insertions(+), 56 deletions(-)

diff --git a/lightly/active_learning/agents/agent.py b/lightly/active_learning/agents/agent.py
index 2eab9020d..493817c61 100644
--- a/lightly/active_learning/agents/agent.py
+++ b/lightly/active_learning/agents/agent.py
@@ -49,7 +49,8 @@ class ActiveLearningAgent:
 
     """
 
-    def __init__(self, api_workflow_client: ApiWorkflowClient, query_tag_name: str = None, preselected_tag_name: str = None):
+    def __init__(self, api_workflow_client: ApiWorkflowClient, query_tag_name: str = None,
+                 preselected_tag_name: str = None):
 
         self.api_workflow_client = api_workflow_client
         if query_tag_name is not None or preselected_tag_name is not None:
@@ -76,10 +77,12 @@ def _set_labeled_and_unlabeled_set(self, preselected_tag_data: TagData = None):
                 optional param, then it must not be loaded from the API
 
         """
-        self.bitmask_labeled_set = BitMask.from_hex("0x0")
-        self.bitmask_added_set = BitMask.from_hex("0x0")
-        if self.preselected_tag_id is not None:
-            if preselected_tag_data is None:
+
+        if not hasattr(self, "bitmask_labeled_set"):
+            self.bitmask_labeled_set = BitMask.from_hex("0x0")  # empty labeled set
+            self.bitmask_added_set = BitMask.from_hex("0x0")  # empty added set
+        if self.preselected_tag_id is not None:  # else the default values (empty labeled and added set) are kept
+            if preselected_tag_data is None:  # if it is not passed as argument, it must be loaded from the API
                 preselected_tag_data = self.api_workflow_client.tags_api.get_tag_by_tag_id(
                     self.api_workflow_client.dataset_id, tag_id=self.preselected_tag_id)
             new_bitmask_labeled_set = BitMask.from_hex(preselected_tag_data.bit_mask_data)
@@ -111,38 +114,43 @@ def query(self, sampler_config: SamplerConfig, al_scorer: Scorer = None) -> Tupl
                 An instance of a class inheriting from Scorer, e.g. a ClassificationScorer.
 
         Returns:
-            The filenames of the samples in the new labeled_set and the added filenames.
+            The filenames of the samples in the new labeled_set
+            and the filenames of the samples chosen by the sampler.
+            This added_set was added to the old labeled_set
+            to form the new labeled_set.
 
         """
         # check input
         if sampler_config.n_samples < len(self.labeled_set):
             warnings.warn("ActiveLearningAgent.query: The number of samples which should be sampled "
-                           "including the current labeled set "
-                           "(sampler_config.n_samples) "
-                            "is smaller than the number of samples in the current labeled set.")
-
+                          "including the current labeled set "
+                          "(sampler_config.n_samples) "
+                          "is smaller than the number of samples in the current labeled set."
+                          "Skipping the sampling and returning the old labeled_set and"
+                          "no ne filenames.")
+            return self.labeled_set, []
+
+        # calculate scores
+        if al_scorer is not None:
+            no_unlabeled_samples = len(self.unlabeled_set)
+            no_samples_with_predictions = len(al_scorer.model_output)
+            if no_unlabeled_samples != no_samples_with_predictions:
+                raise ValueError(f"The scorer must have exactly as many samples as in the unlabeled set,"
+                                 f"but there are {no_samples_with_predictions} predictions in the scorer,"
+                                 f"but {no_unlabeled_samples} in the unlabeled set.")
+            scores_dict = al_scorer._calculate_scores()
         else:
-            # calculate scores
-            if al_scorer is not None:
-                no_unlabeled_samples = len(self.unlabeled_set)
-                no_samples_with_predictions = len(al_scorer.model_output)
-                if no_unlabeled_samples != no_samples_with_predictions:
-                    raise ValueError(f"The scorer must have exactly as much samples as in the unlabeled set,"
-                                     f"but there are {no_samples_with_predictions} predictions in the scorer,"
-                                     f"but {no_unlabeled_samples} in the unlabeled set.")
-                scores_dict = al_scorer._calculate_scores()
-            else:
-                scores_dict = None
-
-            # perform the sampling
-            new_tag_data = self.api_workflow_client.sampling(
-                sampler_config=sampler_config,
-                al_scores=scores_dict,
-                preselected_tag_id=self.preselected_tag_id,
-                query_tag_id=self.query_tag_id)
-
-            # set the newly chosen tag as the new preselected_tag_id and update the sets
-            self.preselected_tag_id = new_tag_data.id
-            self._set_labeled_and_unlabeled_set(new_tag_data)
+            scores_dict = None
+
+        # perform the sampling
+        new_tag_data = self.api_workflow_client.sampling(
+            sampler_config=sampler_config,
+            al_scores=scores_dict,
+            preselected_tag_id=self.preselected_tag_id,
+            query_tag_id=self.query_tag_id)
+
+        # set the newly chosen tag as the new preselected_tag_id and update the sets
+        self.preselected_tag_id = new_tag_data.id
+        self._set_labeled_and_unlabeled_set(new_tag_data)
 
         return self.labeled_set, self.added_set
diff --git a/lightly/api/bitmask.py b/lightly/api/bitmask.py
index 78ca34a26..0c3b1da7d 100644
--- a/lightly/api/bitmask.py
+++ b/lightly/api/bitmask.py
@@ -2,7 +2,7 @@
 
 # Copyright (c) 2020. Lightly AG and its affiliates.
 # All Rights Reserved
-
+from copy import deepcopy
 from typing import List
 
 
@@ -68,12 +68,6 @@ def _intersection(x: int, y: int) -> int:
     return x & y
 
 
-def _difference(x: int, y: int) -> int:
-    """Uses difference to get the intersection of the two masks.
-    """
-    return x - y
-
-
 def _get_kth_bit(x: int, k: int) -> int:
     """Returns the kth bit in the mask from the right.
     """
@@ -108,7 +102,7 @@ class BitMask:
         >>> # for a dataset with 10 images, assume the following tag
         >>> # 0001011001 where the 1st, 4th, 5th and 7th image are selected
         >>> # this tag would be stored as 0x59.
-        >>> hexstring = 0x59                    # what you receive from the api
+        >>> hexstring = '0x59'                    # what you receive from the api
         >>> mask = BitMask.from_hex(hexstring)  # create a bitmask from it
         >>> indices = mask.to_indices()         # get list of indices which are one
         >>> # indices is [0, 3, 4, 6]
@@ -194,10 +188,13 @@ def difference(self, other):
             >>> mask1.difference(mask2)
             >>> # mask1.binstring is '0b0011'
         """
-        self.x = _difference(self.x, other.x)
+        self.union(other)
+        self.x = self.x - other.x
 
     def __sub__(self, other):
-        return BitMask(self.x - other.x)
+        ret = deepcopy(self)
+        ret.difference(other)
+        return ret
 
     def __eq__(self, other):
         return self.to_bin() == other.to_bin()
diff --git a/tests/active_learning/test_active_learning_agent.py b/tests/active_learning/test_active_learning_agent.py
index 8212d6178..a84d2a53d 100644
--- a/tests/active_learning/test_active_learning_agent.py
+++ b/tests/active_learning/test_active_learning_agent.py
@@ -20,13 +20,16 @@ def test_agent(self):
         for method in [SamplingMethod.CORAL, SamplingMethod.CORESET, SamplingMethod.RANDOM]:
             for agent in [agent_0, agent_1, agent_2, agent_3]:
                 for batch_size in [2, 6]:
+                    n_old_labeled = len(agent.labeled_set)
+                    n_old_unlabeled = len(agent.unlabeled_set)
+
                     n_samples = len(agent.labeled_set) + batch_size
-                    if method == SamplingMethod.CORAL and len(agent.labeled_set) > 0:
+                    if method == SamplingMethod.CORAL and len(agent.labeled_set) == 0:
                         sampler_config = SamplerConfig(n_samples=n_samples, method=SamplingMethod.CORESET)
                     else:
                         sampler_config = SamplerConfig(n_samples=n_samples, method=method)
 
-                    if sampler_config.method == SamplingMethod.CORESET:
+                    if sampler_config.method == SamplingMethod.CORAL:
                         predictions = np.random.rand(len(agent.unlabeled_set), 10).astype(np.float32)
                         predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
                         al_scorer = ScorerClassification(predictions_normalized)
@@ -35,5 +38,23 @@ def test_agent(self):
                         sampler_config = SamplerConfig(n_samples=n_samples)
                         labeled_set, added_set = agent.query(sampler_config=sampler_config)
 
-                    assert len(added_set) <= len(labeled_set)
+                    self.assertEqual(n_old_labeled + len(added_set), len(labeled_set))
                     assert set(added_set).issubset(labeled_set)
+                    self.assertEqual(len(list(set(agent.labeled_set) & set(agent.unlabeled_set))), 0)
+                    self.assertEqual(n_old_unlabeled - len(added_set), len(agent.unlabeled_set))
+
+    def test_agent_wrong_scores(self):
+        self.api_workflow_client.embedding_id = "embedding_id_xyz"
+
+        agent = ActiveLearningAgent(self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz")
+        method = SamplingMethod.CORAL
+        n_samples = len(agent.labeled_set) + 2
+
+        n_predictions = len(agent.unlabeled_set) - 3  # the -3 should cause en error
+        predictions = np.random.rand(n_predictions, 10).astype(np.float32)
+        predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
+        al_scorer = ScorerClassification(predictions_normalized)
+
+        sampler_config = SamplerConfig(n_samples=n_samples, method=method)
+        with self.assertRaises(ValueError):
+            labeled_set, added_set = agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
diff --git a/tests/api/test_BitMask.py b/tests/api/test_BitMask.py
index 0a5d0ba9a..8e362d719 100644
--- a/tests/api/test_BitMask.py
+++ b/tests/api/test_BitMask.py
@@ -1,4 +1,6 @@
 import unittest
+from random import random, seed, randint
+
 from lightly.api.bitmask import BitMask
 
 N = 10
@@ -91,31 +93,54 @@ def test_intersection(self):
         mask_a.intersection(mask_b)
         self.assertEqual(mask_a.x, int("0b100", 2))
 
-    def test_difference(self):
-        mask_a = BitMask.from_bin("0b101")
-        mask_b = BitMask.from_bin("0b001")
+    def assert_difference(self, bistring_1: str, bitstring_2: str, target: str):
+        mask_a = BitMask.from_bin(bistring_1)
+        mask_b = BitMask.from_bin(bitstring_2)
         mask_a.difference(mask_b)
-        self.assertEqual(mask_a.x, int("0b100", 2))
+        self.assertEqual(mask_a.x, int(target, 2))
+
+    def test_differences(self):
+        self.assert_difference("0b101", "0b001", "0b100")
+        self.assert_difference("0b0111", "0b1100", "0b0011")
+        self.assert_difference("0b10111", "0b01100", "0b10011")
+
+    def random_bitsting(self, length: int):
+        bitsting = '0b'
+        for i in range(length):
+            bitsting += str(randint(0, 1))
+        return bitsting
+
+    def test_difference_random(self):
+        seed(42)
+        for rep in range(10):
+            for string_length in range(1, 100, 10):
+                bitstring_1 = self.random_bitsting(string_length)
+                bitstring_2 = self.random_bitsting(string_length)
+                target = '0b'
+                for bit_1, bit_2 in zip(bitstring_1[2:], bitstring_2[2:]):
+                    if bit_1 == '1' and bit_2 == '0':
+                        target += '1'
+                    else:
+                        target += '0'
+                self.assert_difference(bitstring_1, bitstring_2, target)
 
     def test_operator_minus(self):
-        mask_a = BitMask.from_bin("0b101")
-        mask_b = BitMask.from_bin("0b001")
-        mask_target = BitMask.from_bin("0b100")
-        self.assertEqual(mask_a-mask_b, mask_target)
+        mask_a = BitMask.from_bin("0b10111")
+        mask_b = BitMask.from_bin("0b01100")
+        mask_target = BitMask.from_bin("0b10011")
+        self.assertEqual(mask_a - mask_b, mask_target)
 
     def test_equal(self):
         mask_a = BitMask.from_bin("0b101")
         mask_b = BitMask.from_bin("0b101")
         self.assertEqual(mask_a, mask_b)
 
-
     def test_subset_a_list(self):
         list_ = [4, 7, 9, 1]
         mask = BitMask.from_bin("0b0101")
         target_masked_list = [7, 1]
         masked_list = mask.subset_a_list(list_)
-        self.assertEqual(target_masked_list,masked_list)
-
+        self.assertEqual(target_masked_list, masked_list)
 
     def test_nonzero_bits(self):
 

From 396d5319f4f7e4465d8017e97691a7928e1dc8d0 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 12:02:46 +0200
Subject: [PATCH 14/27] renamed bitmask.subset_a_list to
 masked_select_from_list

---
 lightly/active_learning/agents/agent.py | 6 +++---
 lightly/api/bitmask.py                  | 4 ++--
 tests/api/test_BitMask.py               | 8 ++++++--
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/lightly/active_learning/agents/agent.py b/lightly/active_learning/agents/agent.py
index 493817c61..550522b07 100644
--- a/lightly/active_learning/agents/agent.py
+++ b/lightly/active_learning/agents/agent.py
@@ -97,9 +97,9 @@ def _set_labeled_and_unlabeled_set(self, preselected_tag_data: TagData = None):
             bitmask_query_tag = BitMask.from_hex(query_tag_data.bit_mask_data)
         self.bitmask_unlabeled_set = bitmask_query_tag - self.bitmask_labeled_set
 
-        self.labeled_set = self.bitmask_labeled_set.subset_a_list(self.api_workflow_client.filenames_on_server)
-        self.added_set = self.bitmask_added_set.subset_a_list(self.api_workflow_client.filenames_on_server)
-        self.unlabeled_set = self.bitmask_unlabeled_set.subset_a_list(self.api_workflow_client.filenames_on_server)
+        self.labeled_set = self.bitmask_labeled_set.masked_select_from_list(self.api_workflow_client.filenames_on_server)
+        self.added_set = self.bitmask_added_set.masked_select_from_list(self.api_workflow_client.filenames_on_server)
+        self.unlabeled_set = self.bitmask_unlabeled_set.masked_select_from_list(self.api_workflow_client.filenames_on_server)
 
     def query(self, sampler_config: SamplerConfig, al_scorer: Scorer = None) -> Tuple[List[str], List[str]]:
         """Performs an active learning query.
diff --git a/lightly/api/bitmask.py b/lightly/api/bitmask.py
index 0c3b1da7d..d0cb0f011 100644
--- a/lightly/api/bitmask.py
+++ b/lightly/api/bitmask.py
@@ -199,12 +199,12 @@ def __sub__(self, other):
     def __eq__(self, other):
         return self.to_bin() == other.to_bin()
 
-    def subset_a_list(self, list_: List):
+    def masked_select_from_list(self, list_: List):
         """Returns a subset of a list depending on the bitmask
         Examples:
             >>> list_to_subset = [4, 7, 9, 1]
             >>> mask = BitMask.from_bin("0b0101")
-            >>> masked_list = mask.subset_a_list(list_to_subset)
+            >>> masked_list = mask.masked_select_from_list(list_to_subset)
             >>> # masked_list = [7, 1]
         """
         bits = self.to_bin()
diff --git a/tests/api/test_BitMask.py b/tests/api/test_BitMask.py
index 8e362d719..a0eab9851 100644
--- a/tests/api/test_BitMask.py
+++ b/tests/api/test_BitMask.py
@@ -1,4 +1,5 @@
 import unittest
+from copy import deepcopy
 from random import random, seed, randint
 
 from lightly.api.bitmask import BitMask
@@ -126,9 +127,12 @@ def test_difference_random(self):
 
     def test_operator_minus(self):
         mask_a = BitMask.from_bin("0b10111")
+        mask_a_old = deepcopy(mask_a)
         mask_b = BitMask.from_bin("0b01100")
         mask_target = BitMask.from_bin("0b10011")
-        self.assertEqual(mask_a - mask_b, mask_target)
+        diff = mask_a - mask_b
+        self.assertEqual(diff, mask_target)
+        self.assertEqual(mask_a_old, mask_a)  # make sure the original mask is unchanged.
 
     def test_equal(self):
         mask_a = BitMask.from_bin("0b101")
@@ -139,7 +143,7 @@ def test_subset_a_list(self):
         list_ = [4, 7, 9, 1]
         mask = BitMask.from_bin("0b0101")
         target_masked_list = [7, 1]
-        masked_list = mask.subset_a_list(list_)
+        masked_list = mask.masked_select_from_list(list_)
         self.assertEqual(target_masked_list, masked_list)
 
     def test_nonzero_bits(self):

From b61ad0fce78da3980861cb6a83d0282412feb5e0 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 13:15:08 +0200
Subject: [PATCH 15/27] renamed download_cli_file back to download_cli by using
 sys.modules

---
 lightly/cli/__init__.py                            |  2 +-
 .../cli/{download_cli_file.py => download_cli.py}  |  3 ++-
 setup.py                                           |  2 +-
 tests/cli/test_cli_download.py                     | 14 ++++++++------
 4 files changed, 12 insertions(+), 9 deletions(-)
 rename lightly/cli/{download_cli_file.py => download_cli.py} (98%)

diff --git a/lightly/cli/__init__.py b/lightly/cli/__init__.py
index f8ff2886b..e64a86144 100644
--- a/lightly/cli/__init__.py
+++ b/lightly/cli/__init__.py
@@ -10,4 +10,4 @@
 from lightly.cli.train_cli import train_cli
 from lightly.cli.embed_cli import embed_cli
 from lightly.cli.upload_cli import upload_cli
-from lightly.cli.download_cli_file import download_cli
+from lightly.cli.download_cli import download_cli
diff --git a/lightly/cli/download_cli_file.py b/lightly/cli/download_cli.py
similarity index 98%
rename from lightly/cli/download_cli_file.py
rename to lightly/cli/download_cli.py
index 57ce8367d..5ee69c3f8 100644
--- a/lightly/cli/download_cli_file.py
+++ b/lightly/cli/download_cli.py
@@ -10,6 +10,7 @@
 
 import os
 import shutil
+import warnings
 
 import hydra
 from tqdm import tqdm
@@ -47,7 +48,7 @@ def _download_cli(cfg, is_cli_call=True):
     tag_name_id_dict = dict([tag.name, tag.id] for tag in api_workflow_client._get_all_tags())
     tag_id = tag_name_id_dict.get(tag_name, None)
     if tag_id is None:
-        print(f'The specified tag {tag_name} does not exist.')
+        warnings.warn(f'The specified tag {tag_name} does not exist.')
         return
 
     # get tag data
diff --git a/setup.py b/setup.py
index f0d9527f8..b9af51836 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,7 @@ def load_requirements(path_dir=PATH_ROOT, filename='base.txt', comment_char='#')
             "lightly-embed = lightly.cli.embed_cli:entry",
             "lightly-magic = lightly.cli.lightly_cli:entry",
             "lightly-upload = lightly.cli.upload_cli:entry",
-            "lightly-download = lightly.cli.download_cli_file:entry",
+            "lightly-download = lightly.cli.download_cli:entry",
             "lightly-version = lightly.cli.version_cli:entry",
         ]
     }
diff --git a/tests/cli/test_cli_download.py b/tests/cli/test_cli_download.py
index a9e3ccd2d..8ad66cce1 100644
--- a/tests/cli/test_cli_download.py
+++ b/tests/cli/test_cli_download.py
@@ -1,5 +1,6 @@
 import os
 import re
+import sys
 import tempfile
 
 from hydra.experimental import compose, initialize
@@ -8,13 +9,13 @@
 from tests.api_workflow.mocked_api_workflow_client import MockedApiWorkflowSetup, MockedApiWorkflowClient
 
 
-#in download_cli_file.py: from lightly.api.api_workflow_client import ApiWorkflowClient
+#in download_cli.py: from lightly.api.api_workflow_client import ApiWorkflowClient
 
 class TestCLIDownload(MockedApiWorkflowSetup):
 
     @classmethod
     def setUpClass(cls) -> None:
-        lightly.cli.download_cli_file.ApiWorkflowClient = MockedApiWorkflowClient
+        sys.modules["lightly.cli.download_cli"].ApiWorkflowClient = MockedApiWorkflowClient
         initialize(config_path="../../lightly/cli/config", job_name="test_app")
 
     def setUp(self):
@@ -41,22 +42,23 @@ def test_parse_cli_string(self):
     def test_download_base(self):
         cli_string = "lightly-download token='123' dataset_id='XYZ'"
         self.parse_cli_string(cli_string)
-        lightly.cli.download_cli_file.download_cli(self.cfg)
+        lightly.cli.download_cli(self.cfg)
 
     def test_download_tag_name(self):
         cli_string = "lightly-download token='123' dataset_id='XYZ' tag_name='sampled_tag_xyz'"
         self.parse_cli_string(cli_string)
-        lightly.cli.download_cli_file.download_cli(self.cfg)
+        lightly.cli.download_cli(self.cfg)
 
     def test_download_tag_name_nonexisting(self):
         cli_string = "lightly-download token='123' dataset_id='XYZ' tag_name='nonexisting_xyz'"
         self.parse_cli_string(cli_string)
-        lightly.cli.download_cli_file.download_cli(self.cfg)
+        with self.assertWarns(Warning):
+            lightly.cli.download_cli(self.cfg)
 
     def test_download_tag_name_exclude_parent(self):
         cli_string = "lightly-download token='123' dataset_id='XYZ' tag_name='sampled_tag_xyz' exclude_parent_tag=True"
         self.parse_cli_string(cli_string)
-        lightly.cli.download_cli_file.download_cli(self.cfg)
+        lightly.cli.download_cli(self.cfg)
 
     def tearDown(self) -> None:
         try:

From b3e2b7866a6d52c3978bed9d147dc6909f7c4da5 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 13:46:19 +0200
Subject: [PATCH 16/27] Better warning string

---
 lightly/active_learning/agents/agent.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lightly/active_learning/agents/agent.py b/lightly/active_learning/agents/agent.py
index 550522b07..a601117ca 100644
--- a/lightly/active_learning/agents/agent.py
+++ b/lightly/active_learning/agents/agent.py
@@ -126,8 +126,7 @@ def query(self, sampler_config: SamplerConfig, al_scorer: Scorer = None) -> Tupl
                           "including the current labeled set "
                           "(sampler_config.n_samples) "
                           "is smaller than the number of samples in the current labeled set."
-                          "Skipping the sampling and returning the old labeled_set and"
-                          "no ne filenames.")
+                          "Skipping the sampling and returning the previous labeled set.")
             return self.labeled_set, []
 
         # calculate scores

From c9c0aae8f781ccd2de7ba131e52368a291ebb385 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 15:44:08 +0200
Subject: [PATCH 17/27] renamed scorer.calculate_scores()

---
 docs/source/getting_started/active_learning.rst     |  7 ++++---
 lightly/active_learning/agents/agent.py             |  2 +-
 lightly/active_learning/scorers/classification.py   |  8 +++++++-
 lightly/active_learning/scorers/detection.py        |  8 +++++++-
 lightly/active_learning/scorers/scorer.py           | 10 +++++++++-
 tests/active_learning/test_ScorerClassification.py  |  2 +-
 tests/active_learning/test_ScorerObjectDetection.py |  6 +++---
 7 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/docs/source/getting_started/active_learning.rst b/docs/source/getting_started/active_learning.rst
index 5b829a3ca..58e244522 100644
--- a/docs/source/getting_started/active_learning.rst
+++ b/docs/source/getting_started/active_learning.rst
@@ -45,9 +45,10 @@ Lightly makes use of the following concepts for active learning:
    
 * **Scorer:** :py:class:`lightly.active_learning.scorers.scorer.Scorer`
    The `Scorer` takes as input the predictions of a pre-trained model on the set
-   of unlabeled images. It evaluates different scores based on how certain the model
-   is about the images and passes them to the API so the sampler can use them with
-   Coral.
+   of unlabeled images. It offers a `calculate_scores()` method, which evaluates
+   different scores based on how certain the model is about the images. When
+   performing a sampling, the scores are passed to the API so the sampler can use
+   them with Coral.
    
 
 Continue reading to see how these components interact and how active learning is
diff --git a/lightly/active_learning/agents/agent.py b/lightly/active_learning/agents/agent.py
index a601117ca..ba9ecdddf 100644
--- a/lightly/active_learning/agents/agent.py
+++ b/lightly/active_learning/agents/agent.py
@@ -137,7 +137,7 @@ def query(self, sampler_config: SamplerConfig, al_scorer: Scorer = None) -> Tupl
                 raise ValueError(f"The scorer must have exactly as many samples as in the unlabeled set,"
                                  f"but there are {no_samples_with_predictions} predictions in the scorer,"
                                  f"but {no_unlabeled_samples} in the unlabeled set.")
-            scores_dict = al_scorer._calculate_scores()
+            scores_dict = al_scorer.calculate_scores()
         else:
             scores_dict = None
 
diff --git a/lightly/active_learning/scorers/classification.py b/lightly/active_learning/scorers/classification.py
index d830f6dc9..0a2f5cfd4 100644
--- a/lightly/active_learning/scorers/classification.py
+++ b/lightly/active_learning/scorers/classification.py
@@ -67,7 +67,13 @@ class ScorerClassification(Scorer):
     def __init__(self, model_output: np.ndarray):
         super(ScorerClassification, self).__init__(model_output)
 
-    def _calculate_scores(self) -> Dict[str, np.ndarray]:
+    def calculate_scores(self) -> Dict[str, np.ndarray]:
+        """Calculates and returns the active learning scores.
+
+        Returns:
+            A dictionary mapping from the score name (as string)
+            to the scores (as a single-dimensional numpy array).
+        """
         scores = dict()
         scores["prediction-margin"] = self._get_prediction_margin_score()
         scores["prediction-entropy"] = self._get_prediction_entropy_score()
diff --git a/lightly/active_learning/scorers/detection.py b/lightly/active_learning/scorers/detection.py
index 2b91ee6e3..86fff5957 100644
--- a/lightly/active_learning/scorers/detection.py
+++ b/lightly/active_learning/scorers/detection.py
@@ -177,7 +177,13 @@ def _check_config(self):
         else:
             self.config = default_conf
 
-    def _calculate_scores(self) -> Dict[str, np.ndarray]:
+    def calculate_scores(self) -> Dict[str, np.ndarray]:
+        """Calculates and returns the active learning scores.
+
+        Returns:
+            A dictionary mapping from the score name (as string)
+            to the scores (as a single-dimensional numpy array).
+        """
         scores = dict()
         scores['object-frequency'] = self._get_object_frequency()
         scores['prediction-margin'] = self._get_prediction_margin()
diff --git a/lightly/active_learning/scorers/scorer.py b/lightly/active_learning/scorers/scorer.py
index 6e8a6079c..31bb868e1 100644
--- a/lightly/active_learning/scorers/scorer.py
+++ b/lightly/active_learning/scorers/scorer.py
@@ -8,5 +8,13 @@ class Scorer():
     def __init__(self, model_output):
         self.model_output = model_output
 
-    def _calculate_scores(self) -> Dict[str, np.ndarray]:
+    def calculate_scores(self) -> Dict[str, np.ndarray]:
+        """Calculates and returns the active learning scores
+
+        Which scores are calculated depends on the implementation
+        of this parent class by the child classes.
+        Returns:
+            A dictionary mapping from the score name (as string)
+            to the scores (as a single-dimensional numpy array).
+        """
         raise NotImplementedError
diff --git a/tests/active_learning/test_ScorerClassification.py b/tests/active_learning/test_ScorerClassification.py
index e5c1d523d..4924faa63 100644
--- a/tests/active_learning/test_ScorerClassification.py
+++ b/tests/active_learning/test_ScorerClassification.py
@@ -13,7 +13,7 @@ def test_score_calculation(self):
         predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
         model_output = predictions_normalized
         scorer = ScorerClassification(model_output)
-        scores = scorer._calculate_scores()
+        scores = scorer.calculate_scores()
         scores_prediction_entropy = scores["prediction-entropy"]
         scores_prediction_margin = scores["prediction-margin"]
 
diff --git a/tests/active_learning/test_ScorerObjectDetection.py b/tests/active_learning/test_ScorerObjectDetection.py
index 473f4384f..9e024c565 100644
--- a/tests/active_learning/test_ScorerObjectDetection.py
+++ b/tests/active_learning/test_ScorerObjectDetection.py
@@ -69,7 +69,7 @@ def test_object_detection_scorer(self):
             )
 
         scorer = ScorerObjectDetection(self.dummy_data)
-        scores = scorer._calculate_scores()
+        scores = scorer.calculate_scores()
 
         res = scores['object-frequency']
         self.assertEqual(len(res), len(self.dummy_data))
@@ -101,7 +101,7 @@ def test_object_detection_scorer_config(self):
 
         # check for default config
         scorer = ScorerObjectDetection(self.dummy_data)
-        scores = scorer._calculate_scores()
+        scores = scorer.calculate_scores()
         expected_default_config = {
             'frequency_penalty': 0.25,
             'min_score': 0.9
@@ -114,7 +114,7 @@ def test_object_detection_scorer_config(self):
             'min_score': 0.6
         }
         scorer = ScorerObjectDetection(self.dummy_data, config=new_config)
-        scores = scorer._calculate_scores()
+        scores = scorer.calculate_scores()
         self.assertDictEqual(scorer.config, new_config)
 
         # check for invalid key passed

From fd579f90a12d7087c882206279295a2d1148f897 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Mon, 19 Apr 2021 16:28:21 +0200
Subject: [PATCH 18/27] scorer.py: shorter docstring

---
 lightly/active_learning/scorers/scorer.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/lightly/active_learning/scorers/scorer.py b/lightly/active_learning/scorers/scorer.py
index 31bb868e1..d62b9c217 100644
--- a/lightly/active_learning/scorers/scorer.py
+++ b/lightly/active_learning/scorers/scorer.py
@@ -9,12 +9,6 @@ def __init__(self, model_output):
         self.model_output = model_output
 
     def calculate_scores(self) -> Dict[str, np.ndarray]:
-        """Calculates and returns the active learning scores
-
-        Which scores are calculated depends on the implementation
-        of this parent class by the child classes.
-        Returns:
-            A dictionary mapping from the score name (as string)
-            to the scores (as a single-dimensional numpy array).
+        """Calculates and returns active learning scores in a dictionary.
         """
         raise NotImplementedError

From 7ba5a0fbf37581feecf7271d81c50a20ac433f05 Mon Sep 17 00:00:00 2001
From: Philipp Wirth <65946090+philippmwirth@users.noreply.github.com>
Date: Tue, 20 Apr 2021 07:24:51 +0200
Subject: [PATCH 19/27] 327 Add check for comma in filenames of CSV file (#302)

* Add check for comma in filenames of CSV file

* Add tests for better coverage
---
 lightly/api/api_workflow_upload_embeddings.py | 22 ++++++++++++++++---
 .../test_api_workflow_upload_embeddings.py    | 20 ++++++++++++++++-
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/lightly/api/api_workflow_upload_embeddings.py b/lightly/api/api_workflow_upload_embeddings.py
index a82fc0c9d..6595e9fbb 100644
--- a/lightly/api/api_workflow_upload_embeddings.py
+++ b/lightly/api/api_workflow_upload_embeddings.py
@@ -5,6 +5,18 @@
 from lightly.openapi_generated.swagger_client.models.write_csv_url_data import WriteCSVUrlData
 
 
+
+def _is_valid_filename(filename: str):
+    """Returns False if the filename is misformatted.
+
+    """
+    invalid_characters = [',']
+    for character in invalid_characters:
+        if character in filename:
+            return False
+    return True
+
+
 class _UploadEmbeddingsMixin:
 
     def set_embedding_id_by_name(self, embedding_name: str = None):
@@ -39,6 +51,7 @@ def upload_embeddings(self, path_to_embeddings_csv: str, name: str):
         embeddings_on_server: List[DatasetEmbeddingData] = \
             self.embeddings_api.get_embeddings_by_dataset_id(dataset_id=self.dataset_id)
         names_embeddings_on_server = [embedding.name for embedding in embeddings_on_server]
+
         if name in names_embeddings_on_server:
             print(f"Aborting upload, embedding with name='{name}' already exists.")
             self.embedding_id = next(embedding for embedding in embeddings_on_server if embedding.name == name).id
@@ -79,10 +92,13 @@ def _order_csv_by_filenames(self, path_to_embeddings_csv: str) -> str:
             filenames = [row[index_filenames] for row in rows_without_header]
 
             if len(filenames) != len(self.filenames_on_server):
-                raise ValueError(f"There are {len(filenames)} rows in the embedding file, but "
-                                 f"{len(self.filenames_on_server)} filenames/samples on the server.")
+                raise ValueError(f'There are {len(filenames)} rows in the embedding file, but '
+                                 f'{len(self.filenames_on_server)} filenames/samples on the server.')
             if set(filenames) != set(self.filenames_on_server):
-                raise ValueError(f"The filenames in the embedding file and the filenames on the server do not align")
+                raise ValueError(f'The filenames in the embedding file and the filenames on the server do not align')
+            invalid_filenames = [f for f in filenames if not _is_valid_filename(f)]
+            if len(invalid_filenames) > 0:
+                raise ValueError(f'Invalid filename(s) in embedding file: {invalid_filenames}')
 
             rows_without_header_ordered = self._order_list_by_filenames(filenames, rows_without_header)
 
diff --git a/tests/api_workflow/test_api_workflow_upload_embeddings.py b/tests/api_workflow/test_api_workflow_upload_embeddings.py
index 943613632..9fedc15b2 100644
--- a/tests/api_workflow/test_api_workflow_upload_embeddings.py
+++ b/tests/api_workflow/test_api_workflow_upload_embeddings.py
@@ -9,7 +9,10 @@
 
 
 class TestApiWorkflowUploadEmbeddigns(MockedApiWorkflowSetup):
-    def t_ester_upload_embedding(self, n_data, special_name_first_sample: bool = False):
+    def t_ester_upload_embedding(self,
+                                 n_data,
+                                 special_name_first_sample: bool = False,
+                                 comma_in_first_sample: bool = False):
         # create fake embeddings
         folder_path = tempfile.mkdtemp()
         path_to_embeddings = os.path.join(
@@ -19,6 +22,8 @@ def t_ester_upload_embedding(self, n_data, special_name_first_sample: bool = Fal
         sample_names = [f'img_{i}.jpg' for i in range(n_data)]
         if special_name_first_sample:
             sample_names[0] = "bliblablub"
+        if comma_in_first_sample:
+            sample_names[0] = "bli,blablu"
         labels = [0] * len(sample_names)
         save_embeddings(
             path_to_embeddings,
@@ -44,6 +49,11 @@ def test_upload_wrong_filenames(self):
         with self.assertRaises(ValueError):
             self.t_ester_upload_embedding(n_data=n_data, special_name_first_sample=True)
 
+    def test_upload_comma_filenames(self):
+        n_data = len(self.api_workflow_client.mappings_api.sample_names)
+        with self.assertRaises(ValueError):
+            self.t_ester_upload_embedding(n_data=n_data, comma_in_first_sample=True)
+
     def test_set_embedding_id_success(self):
         embedding_name = self.api_workflow_client.embeddings_api.embeddings[0].name
         self.api_workflow_client.set_embedding_id_by_name(embedding_name)
@@ -55,3 +65,11 @@ def test_set_embedding_id_failure(self):
 
     def test_set_embedding_id_default(self):
         self.api_workflow_client.set_embedding_id_by_name()
+
+    def test_is_valid_filename(self):
+        filenames = [',a', ',', 'a,', 'a']
+        is_valid = [False, False, False, True]
+        result = [
+            lightly.api.api_workflow_upload_embeddings._is_valid_filename(f) for f in filenames
+        ]
+        self.assertListEqual(is_valid, result)

From 6ee6647f42f80ac1bb96bdab62fb1b205815be57 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Tue, 20 Apr 2021 08:18:15 +0200
Subject: [PATCH 20/27] rename lightly_subset to lightly_subset

---
 lightly/data/{lighty_subset.py => lightly_subset.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename lightly/data/{lighty_subset.py => lightly_subset.py} (100%)

diff --git a/lightly/data/lighty_subset.py b/lightly/data/lightly_subset.py
similarity index 100%
rename from lightly/data/lighty_subset.py
rename to lightly/data/lightly_subset.py

From 65122d1f6d7d6857e1c54dacdeb2b7311cacbcf7 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Tue, 20 Apr 2021 08:18:29 +0200
Subject: [PATCH 21/27] Update test_LightlySubset.py

---
 tests/data/test_LightlySubset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/data/test_LightlySubset.py b/tests/data/test_LightlySubset.py
index 53a3ec9e3..e6dd40050 100644
--- a/tests/data/test_LightlySubset.py
+++ b/tests/data/test_LightlySubset.py
@@ -6,7 +6,7 @@
 import torchvision
 
 from lightly.data.dataset import LightlyDataset
-from lightly.data.lighty_subset import LightlySubset
+from lightly.data.lightly_subset import LightlySubset
 
 from tests.data.test_LightlyDataset import TestLightlyDataset
 

From bd982c9513effd75c11d9b2671de09a4be6ce155 Mon Sep 17 00:00:00 2001
From: Philipp Wirth <65946090+philippmwirth@users.noreply.github.com>
Date: Tue, 20 Apr 2021 08:40:19 +0200
Subject: [PATCH 22/27] 304 fix import api workflow client pw (#305)

* Fix imports for api workflow client and al utils

* Add active learning utils to docs

* Add more tests for imports
---
 docs/source/lightly.active_learning.rst   |  6 ++++++
 lightly/active_learning/utils/__init__.py |  7 +++++++
 lightly/api/__init__.py                   |  1 +
 tests/imports/test_nested_imports.py      | 17 +++++++++++++----
 tests/imports/test_seminested_imports.py  |  8 ++++----
 5 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/docs/source/lightly.active_learning.rst b/docs/source/lightly.active_learning.rst
index 2c9df027c..27fe5ed79 100644
--- a/docs/source/lightly.active_learning.rst
+++ b/docs/source/lightly.active_learning.rst
@@ -22,3 +22,9 @@ lightly.active_learning
 .. automodule:: lightly.active_learning.scorers.detection
    :members:
 
+.utils
+--------
+.. automodule:: lightly.active_learning.utils.bounding_box
+   :members:
+.. automodule:: lightly.active_learning.utils.object_detection_output
+   :members:
\ No newline at end of file
diff --git a/lightly/active_learning/utils/__init__.py b/lightly/active_learning/utils/__init__.py
index e69de29bb..81ba8feb5 100644
--- a/lightly/active_learning/utils/__init__.py
+++ b/lightly/active_learning/utils/__init__.py
@@ -0,0 +1,7 @@
+""" Collection of Utils for Active Learning """
+
+# Copyright (c) 2020. Lightly AG and its affiliates.
+# All Rights Reserved
+
+from lightly.active_learning.utils.bounding_box import BoundingBox
+from lightly.active_learning.utils.object_detection_output import ObjectDetectionOutput
\ No newline at end of file
diff --git a/lightly/api/__init__.py b/lightly/api/__init__.py
index 86570f7b8..2aa24e779 100644
--- a/lightly/api/__init__.py
+++ b/lightly/api/__init__.py
@@ -3,4 +3,5 @@
 # Copyright (c) 2020. Lightly AG and its affiliates.
 # All Rights Reserved
 
+from lightly.api.api_workflow_client import ApiWorkflowClient
 from lightly.api import routes
diff --git a/tests/imports/test_nested_imports.py b/tests/imports/test_nested_imports.py
index b21207ce1..e1f854a2b 100644
--- a/tests/imports/test_nested_imports.py
+++ b/tests/imports/test_nested_imports.py
@@ -7,17 +7,26 @@
 class TestNestedImports(unittest.TestCase):
 
     def test_nested_imports(self):
-        # active learning (commented out don't work)
-        #lightly.active_learning.agents.agent.ActiveLearningAgent
-        #lightly.active_learning.agents.ActiveLearningAgent
+        # active learning
+        lightly.active_learning.agents.agent.ActiveLearningAgent
+        lightly.active_learning.agents.ActiveLearningAgent
         lightly.active_learning.config.sampler_config.SamplerConfig
-        #lightly.active_learning.scorers.classification.ScorerClassification
+        lightly.active_learning.config.SamplerConfig
+        lightly.active_learning.scorers.classification.ScorerClassification
+        lightly.active_learning.scorers.ScorerClassification
+        lightly.active_learning.scorers.detection.ScorerObjectDetection
+        lightly.active_learning.scorers.ScorerObjectDetection
+        lightly.active_learning.utils.bounding_box.BoundingBox
+        lightly.active_learning.utils.BoundingBox
+        lightly.active_learning.utils.object_detection_output.ObjectDetectionOutput
+        lightly.active_learning.utils.ObjectDetectionOutput
 
         # api imports
         lightly.api.routes.users.docker.get_authorization
         lightly.api.routes.users.docker.get_soft_authorization
         lightly.api.routes.users.docker.post_diagnostics
         lightly.api.api_workflow_client.ApiWorkflowClient
+        lightly.api.ApiWorkflowClient
         lightly.api.bitmask.BitMask
 
         # data imports
diff --git a/tests/imports/test_seminested_imports.py b/tests/imports/test_seminested_imports.py
index 944bd1446..60f602bc4 100644
--- a/tests/imports/test_seminested_imports.py
+++ b/tests/imports/test_seminested_imports.py
@@ -9,10 +9,10 @@ class TestSemiNestedImports(unittest.TestCase):
     def test_seminested_imports(self):
         from lightly import active_learning
         # active learning (commented out don't work)
-        #lightly.active_learning.agents.agent.ActiveLearningAgent
-        #lightly.active_learning.agents.ActiveLearningAgent
-        active_learning.config.sampler_config.SamplerConfig
-        #lightly.active_learning.scorers.classification.ScorerClassification
+        active_learning.agents.ActiveLearningAgent
+        active_learning.config.SamplerConfig
+        active_learning.scorers.ScorerClassification
+        active_learning.scorers.ScorerObjectDetection
 
         # api imports
         from lightly import api

From 0170e85026afce30334fd8401177e353fac42a8e Mon Sep 17 00:00:00 2001
From: Philipp Wirth <65946090+philippmwirth@users.noreply.github.com>
Date: Wed, 21 Apr 2021 11:18:03 +0200
Subject: [PATCH 23/27] 132 Docs on embeddings upload (#303)

* Fix docstrings and add api workflow clients to autodocs

* Add short section on how to upload embeddings from the Python package

* Add short section on how to upload images from Pytyhon

* Make comments more detailed
---
 docs/source/getting_started/platform.rst      | 33 +++++++++++++++++--
 docs/source/lightly.api.rst                   | 14 ++++++++
 lightly/api/api_workflow_client.py            | 10 +++---
 lightly/api/api_workflow_upload_embeddings.py | 19 ++++++++---
 4 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/docs/source/getting_started/platform.rst b/docs/source/getting_started/platform.rst
index 5e4f0d648..5b62e5ca3 100644
--- a/docs/source/getting_started/platform.rst
+++ b/docs/source/getting_started/platform.rst
@@ -159,12 +159,39 @@ drag-and-drop or using the Python Package according to:
     You can upload up to 1'000 images using the frontend.
 
 
+Images can also be uploaded from a Python script:
+
+.. code-block:: python
+
+    from lightly.api.api_workflow_client import ApiWorkflowClient
+    client = ApiWorkflowClient(token='123'm dataset_id='xyz')
+
+    # change mode to 'thumbnails' or 'meta' if you're working with sensitive data
+    client.upload_dataset('path/to/your/images/', mode='full')
+
+
 Upload Embeddings
 -------------------------
 
-Embeddings can be uploaded using the Python Package.
-You can not upload embedding through the web interface. Instead
-:ref:`ref-upload-embedding-lightly`
+Embeddings can be uploaded using the Python Package or the front-end. The simplest
+way to upload the embeddings is from the command line: :ref:`ref-upload-embedding-lightly`.
+
+If you have a numpy array of image embeddings, the filenames of the images, and categorical pseudo-labels,
+you can use the `save_embeddings` function to store them in a lightly-compatible CSV format and upload
+them from your Python code or using the CLI. The following snippet shows how to upload the embeddings from Python.
+
+.. code-block:: python
+
+    from lightly.utils import save_embeddings
+    from lightly.api.api_workflow_client import ApiWorkflowClient
+
+    # store the embeddings in a lightly compatible CSV format before uploading
+    # them to the platform
+    save_embeddings('embeddings.csv', embeddings, labels, filenames)
+
+    # upload the embeddings.csv file to the platform
+    client = ApiWorkflowClient(token='123', dataset_id='xyz')
+    client.upload_embeddings('embeddings.csv', name='my-embeddings')
 
 
 Sampling
diff --git a/docs/source/lightly.api.rst b/docs/source/lightly.api.rst
index 601d36964..18d01e78b 100644
--- a/docs/source/lightly.api.rst
+++ b/docs/source/lightly.api.rst
@@ -8,6 +8,20 @@ lightly.api
 .. automodule:: lightly.api.api_workflow_client
    :members:
 
+.. automodule:: lightly.api.api_workflow_datasets
+   :members:
+
+.. automodule:: lightly.api.api_workflow_download_dataset
+   :members:
+
+.. automodule:: lightly.api.api_workflow_sampling
+   :members:
+
+.. automodule:: lightly.api.api_workflow_upload_dataset
+   :members:
+
+.. automodule:: lightly.api.api_workflow_upload_embeddings
+   :members:
 
 .utils
 ---------------
diff --git a/lightly/api/api_workflow_client.py b/lightly/api/api_workflow_client.py
index 26f38533a..286008300 100644
--- a/lightly/api/api_workflow_client.py
+++ b/lightly/api/api_workflow_client.py
@@ -85,12 +85,11 @@ def check_version_compatibility(self):
 
     @property
     def dataset_id(self) -> str:
-        ''' Returns the dataset_id
+        '''The current dataset_id.
 
         If the dataset_id is set, it is returned.
-        If it is unset, then the dataset_id of the last modified dataset is taken.
-
-        '''
+        If it is not set, then the dataset_id of the last modified dataset is selected.
+        ''' 
         try:
             return self._dataset_id
         except AttributeError:
@@ -127,6 +126,9 @@ def _order_list_by_filenames(self, filenames_for_list: List[str], list_to_order:
 
     @property
     def filenames_on_server(self):
+        '''The list of the filenames in the dataset.
+
+        '''
         if not hasattr(self, "_filenames_on_server"):
             self._filenames_on_server = self.mappings_api. \
                 get_sample_mappings_by_dataset_id(dataset_id=self.dataset_id, field="fileName")
diff --git a/lightly/api/api_workflow_upload_embeddings.py b/lightly/api/api_workflow_upload_embeddings.py
index 6595e9fbb..7bc6b918c 100644
--- a/lightly/api/api_workflow_upload_embeddings.py
+++ b/lightly/api/api_workflow_upload_embeddings.py
@@ -20,6 +20,15 @@ def _is_valid_filename(filename: str):
 class _UploadEmbeddingsMixin:
 
     def set_embedding_id_by_name(self, embedding_name: str = None):
+        """Sets the embedding id of the client by embedding name.
+
+        Args:
+            embedding_name:
+                Name under which the embedding was uploaded.
+    
+        Raises:
+            ValueError if the embedding does not exist.
+        """
         embeddings: List[DatasetEmbeddingData] = \
             self.embeddings_api.get_embeddings_by_dataset_id(dataset_id=self.dataset_id)
 
@@ -38,14 +47,14 @@ def upload_embeddings(self, path_to_embeddings_csv: str, name: str):
         First checks that the specified embedding name is not on ther server. If it is, the upload is aborted.
         Then creates a new csv with the embeddings in the order specified on the server. Next it uploads it to the server.
         The received embedding_id is saved as a property of self.
+
         Args:
-            path_to_embeddings_csv: the filepath to the .csv containing the embeddings, e.g. "path/to/embeddings.csv"
-            name: The name of the embedding. If an embedding with such a name already exists on the server,
+            path_to_embeddings_csv:
+                The path to the .csv containing the embeddings, e.g. "path/to/embeddings.csv"
+            name:
+                The name of the embedding. If an embedding with such a name already exists on the server,
                 the upload is aborted.
 
-        Returns:
-            None
-
         """
         # get the names of the current embeddings on the server:
         embeddings_on_server: List[DatasetEmbeddingData] = \

From 3a9b4d8a477c248c8c9d93c7464d747da707570c Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Wed, 21 Apr 2021 18:51:40 +0200
Subject: [PATCH 24/27] upload-cli: allow creation of dataset (#308)

- changed one line in GitHub action to solve #313
- added parameter `new_dataset_name` to the `config.yaml`
- allow to use `lightly-upload` and `lightly-magic` either with creating a new dataset in the webapp (if `new_dataset_name` is defined) or uploads to an existing dataset (if `dataset_id` is defined)
- wrote test for the `upload_cli` and `lightly_cli` increasing coverage quite a lot
- added docstrings to the `upload_cli` including the new parameter
---
 .github/workflows/test.yml                    |  2 +-
 .../getting_started/command_line_tool.rst     | 10 ++-
 .../source/tutorials/structure_your_input.rst |  2 +-
 lightly/cli/config/config.yaml                | 11 +--
 lightly/cli/lightly_cli.py                    |  2 +-
 lightly/cli/upload_cli.py                     | 55 ++++++++-----
 tests/cli/test_cli_download.py                |  5 +-
 tests/cli/test_cli_magic.py                   | 76 +++++++++++++++++
 tests/cli/test_cli_upload.py                  | 81 +++++++++++++++++++
 9 files changed, 213 insertions(+), 31 deletions(-)
 create mode 100644 tests/cli/test_cli_magic.py
 create mode 100644 tests/cli/test_cli_upload.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0936d87e8..b9ad492b7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -27,7 +27,7 @@ jobs:
       run: pip install -e '.[all]'
     - name: Run Pytest
       run: |
-        LIGHTLY_SERVER_LOCATION="localhost:-1"
+        export LIGHTLY_SERVER_LOCATION="localhost:-1"
         pip install pytest-cov
         python -m pytest -s -v --runslow --cov=./lightly --cov-report=xml --ignore=./lightly/openapi_generated/
     - name: Upload coverage to Codecov
diff --git a/docs/source/getting_started/command_line_tool.rst b/docs/source/getting_started/command_line_tool.rst
index 046ffb615..91f560864 100644
--- a/docs/source/getting_started/command_line_tool.rst
+++ b/docs/source/getting_started/command_line_tool.rst
@@ -99,7 +99,9 @@ Upload data using the CLI
 In this example we will upload a dataset to the Lightly Platform.
 First, make sure you have an account on `Lightly <https://www.lightly.ai>`_. 
 A free account is sufficient. Log in to the app and create a new dataset. 
-You will get a *token* and *dataset_id* which can be used to upload your dataset
+You will get a *token* and *dataset_id* which can be used to upload your dataset.
+Alternatively, you can create a new dataset directly with the *token*
+by providing the *new_dataset_name* instead of the *dataset_id*.
 
 .. code-block:: bash
 
@@ -110,6 +112,9 @@ You will get a *token* and *dataset_id* which can be used to upload your dataset
     lightly-upload input_dir=cat embeddings=your_embedding.csv \
                    token=your_token dataset_id=your_dataset_id
 
+    # create a new dataset and upload to it
+    lightly-upload input_dir=cat token=your_token new_dataset_name=your_dataset_name
+
 .. note:: To obtain your *token* and *dataset_id* check: 
           :ref:`ref-authentication-token` and :ref:`ref-webapp-dataset-id`.
 
@@ -120,6 +125,7 @@ Upload embeddings using the CLI
 ----------------------------------
 
 You can upload embeddings directly to the Lightly Platform using the CLI.
+Again, you can use the *dataset_id* and *new_dataset_name* interchangeably.
 
 .. code-block:: bash
 
@@ -129,7 +135,7 @@ You can upload embeddings directly to the Lightly Platform using the CLI.
 
     # you can upload the dataset together with the embeddings
     lightly-upload input_dir=cat embeddings=your_embedding.csv \
-                   token=your_token dataset_id=your_dataset_id
+                   token=your_token new_dataset_name=your_dataset_name
 
 
 Download data using the CLI
diff --git a/docs/source/tutorials/structure_your_input.rst b/docs/source/tutorials/structure_your_input.rst
index 271a611bf..1bc1c08d7 100644
--- a/docs/source/tutorials/structure_your_input.rst
+++ b/docs/source/tutorials/structure_your_input.rst
@@ -156,7 +156,7 @@ To upload the three videos from above to the platform, you can use
 
 .. code-block:: bash
 
-    lightly-upload token='123' dataset_id='XYZ' input_dir='data/'
+    lightly-upload token='123' new_dataset_name='my_video_dataset' input_dir='data/'
 
 All other operations (like training a self-supervised model and embedding the frames individually)
 also work on video data. Give it a try! 
diff --git a/lightly/cli/config/config.yaml b/lightly/cli/config/config.yaml
index be8a525a2..1a36b16f0 100644
--- a/lightly/cli/config/config.yaml
+++ b/lightly/cli/config/config.yaml
@@ -7,18 +7,19 @@ embeddings: ''                # Path to csv file which holds embeddings.
 checkpoint: ''                # Path to a model checkpoint. If left empty, a pre-trained model
                               # will be used.
 
-### platform
+### Lightly platform
 # The following arguments are required for requests to the
 # Lightly platform.
-token: ''                     # User access token to the platform.
-dataset_id: ''                # Identifier of the dataset on the platform
+token: ''                     # User access token to the Lightly platform.
+dataset_id: ''                # Identifier of the dataset on the Lightly platform.
+new_dataset_name: ''          # Name of the new dataset to be created on the Lightly platform
 upload: 'full'                # Whether to upload full images, thumbnails only, or metadata only.
                               # Must be one of ['full', 'thumbnails', 'none']
 resize: -1                    # Allow resizing of the images before uploading, usage =-1, =x, =[x,y]
-embedding_name: 'default'     # Name of the embedding to be used on the platform.
+embedding_name: 'default'     # Name of the embedding to be used on the Lightly platform.
 emb_upload_bsz: 32            # Number of embeddings which are uploaded in a single batch.
 tag_name: 'initial-tag'       # Name of the requested tag on the Lightly platform.
-exclude_parent_tag: False     # If true, only the samples in the defined tag, but without the parent tag, are taken
+exclude_parent_tag: False     # If true, only the samples in the defined tag, but without the parent tag, are taken.
 
 ### training and embeddings
 pre_trained: True             # Whether to use a pre-trained model or not
diff --git a/lightly/cli/lightly_cli.py b/lightly/cli/lightly_cli.py
index b5fc78c71..184d3d638 100644
--- a/lightly/cli/lightly_cli.py
+++ b/lightly/cli/lightly_cli.py
@@ -31,7 +31,7 @@ def _lightly_cli(cfg, is_cli_call=True):
     embeddings = _embed_cli(cfg, is_cli_call)
     cfg['embeddings'] = embeddings
 
-    if cfg['token'] and cfg['dataset_id']:
+    if cfg['token'] and (cfg['dataset_id'] or cfg['new_dataset_name']):
         _upload_cli(cfg)   
 
 
diff --git a/lightly/cli/upload_cli.py b/lightly/cli/upload_cli.py
index d4ac0abb9..28fa49ccd 100644
--- a/lightly/cli/upload_cli.py
+++ b/lightly/cli/upload_cli.py
@@ -7,6 +7,7 @@
 
 # Copyright (c) 2020. Lightly AG and its affiliates.
 # All Rights Reserved
+import warnings
 
 import hydra
 
@@ -20,7 +21,6 @@
 
 
 def _upload_cli(cfg, is_cli_call=True):
-
     input_dir = cfg['input_dir']
     if input_dir and is_cli_call:
         input_dir = fix_input_path(input_dir)
@@ -31,6 +31,23 @@ def _upload_cli(cfg, is_cli_call=True):
 
     dataset_id = cfg['dataset_id']
     token = cfg['token']
+    new_dataset_name = cfg['new_dataset_name']
+
+    if not token:
+        warnings.warn('Please specify your access token. For help, try: lightly-upload --help')
+        return
+
+    dataset_id_ok = dataset_id and len(dataset_id) > 0
+    new_dataset_name_ok = new_dataset_name and len(new_dataset_name) > 0
+    if new_dataset_name_ok and not dataset_id_ok:
+        api_workflow_client = ApiWorkflowClient(token=token)
+        api_workflow_client.create_dataset(dataset_name=new_dataset_name)
+    elif dataset_id_ok and not new_dataset_name_ok:
+        api_workflow_client = ApiWorkflowClient(token=token, dataset_id=dataset_id)
+    else:
+        warnings.warn('Please specify either the dataset_id of an existing dataset or a new_dataset_name. '
+                      'For help, try: lightly-upload --help')
+        return
 
     size = cfg['resize']
     if not isinstance(size, int):
@@ -39,15 +56,6 @@ def _upload_cli(cfg, is_cli_call=True):
     if isinstance(size, tuple) or size > 0:
         transform = torchvision.transforms.Resize(size)
 
-    if not token or not dataset_id:
-        print('Please specify your access token and dataset id.')
-        print('For help, try: lightly-upload --help')
-        return
-
-    api_workflow_client = ApiWorkflowClient(
-        token=token, dataset_id=dataset_id
-    )
-
     if input_dir:
         mode = cfg['upload']
         dataset = LightlyDataset(input_dir=input_dir, transform=transform)
@@ -79,13 +87,16 @@ def upload_cli(cfg):
             Path to the csv file storing the embeddings generated by
             lightly.
         token:
-            User access token to the Lightly platform. If dataset_id
-            and token are specified, the images and embeddings are 
-            uploaded to the platform.
+            User access token to the Lightly platform. If needs to be
+            specified to upload the images and embeddings to the platform.
         dataset_id:
-            Identifier of the dataset on the Lightly platform. If 
-            dataset_id and token are specified, the images and 
-            embeddings are uploaded to the platform.
+            Identifier of the dataset on the Lightly platform.
+            Either the dataset_id or the new_dataset_name need to be
+            specified.
+        new_dataset_name:
+            The name of the new dataset to create on the Lightly platform.
+            Either the dataset_id or the new_dataset_name need to be
+            specified.
         upload:
             String to determine whether to upload the full images, 
             thumbnails only, or metadata only.
@@ -102,11 +113,14 @@ def upload_cli(cfg):
             to (size * height / width, size).
 
     Examples:
-        >>> # upload thumbnails to the Lightly platform
+        >>> # create a new dataset on the Lightly platform and upload thumbnails to it
+        >>> lightly-upload input_dir=data/ token='123' new_dataset_name='new_dataset_name_xyz'
+        >>>
+        >>> # upload thumbnails to the Lightly platform to an existing dataset
         >>> lightly-upload input_dir=data/ token='123' dataset_id='XYZ'
         >>> 
-        >>> # upload full images to the Lightly platform
-        >>> lightly-upload input_dir=data/ token='123' dataset_id='XYZ' upload='full'
+        >>> # create a new dataset on the Lightly platform and upload full images to it
+        >>> lightly-upload input_dir=data/ token='123' new_dataset_name='new_dataset_name_xyz' upload='full'
         >>>
         >>> # upload metadata to the Lightly platform
         >>> lightly-upload input_dir=data/ token='123' dataset_id='XYZ' upload='metadata'
@@ -117,6 +131,9 @@ def upload_cli(cfg):
         >>> # upload both, images and embeddings in a single command
         >>> lightly-upload input_dir=data/ embeddings=embeddings.csv upload='full' \\
         >>>     token='123' dataset_id='XYZ'
+        >>> # create a new dataset on the Lightly platform and upload both, images and embeddings
+        >>> lightly-upload input_dir=data/ embeddings=embeddings.csv upload='full' \\
+        >>>     token='123' new_dataset_name='new_dataset_name_xyz'
 
     """
     _upload_cli(cfg)
diff --git a/tests/cli/test_cli_download.py b/tests/cli/test_cli_download.py
index 8ad66cce1..80ac41dda 100644
--- a/tests/cli/test_cli_download.py
+++ b/tests/cli/test_cli_download.py
@@ -16,10 +16,11 @@ class TestCLIDownload(MockedApiWorkflowSetup):
     @classmethod
     def setUpClass(cls) -> None:
         sys.modules["lightly.cli.download_cli"].ApiWorkflowClient = MockedApiWorkflowClient
-        initialize(config_path="../../lightly/cli/config", job_name="test_app")
+
 
     def setUp(self):
-        self.cfg = compose(config_name="config", overrides=["token='123'", "dataset_id='XYZ'"])
+        with initialize(config_path="../../lightly/cli/config", job_name="test_app"):
+            self.cfg = compose(config_name="config", overrides=["token='123'", "dataset_id='XYZ'"])
 
 
     def parse_cli_string(self, cli_words: str):
diff --git a/tests/cli/test_cli_magic.py b/tests/cli/test_cli_magic.py
new file mode 100644
index 000000000..e7d28df74
--- /dev/null
+++ b/tests/cli/test_cli_magic.py
@@ -0,0 +1,76 @@
+import os
+import re
+import sys
+import tempfile
+
+import torchvision
+from hydra.experimental import compose, initialize
+
+import lightly
+from tests.api_workflow.mocked_api_workflow_client import MockedApiWorkflowSetup, MockedApiWorkflowClient
+
+
+class TestCLIMagic(MockedApiWorkflowSetup):
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        sys.modules["lightly.cli.upload_cli"].ApiWorkflowClient = MockedApiWorkflowClient
+
+    def setUp(self):
+        MockedApiWorkflowSetup.setUp(self)
+        self.create_fake_dataset()
+        with initialize(config_path="../../lightly/cli/config", job_name="test_app"):
+            self.cfg = compose(config_name="config", overrides=[
+                "token='123'",
+                f"input_dir={self.folder_path}",
+                "trainer.max_epochs=0"
+            ])
+
+    def create_fake_dataset(self):
+        n_data = len(self.api_workflow_client.filenames_on_server)
+        self.dataset = torchvision.datasets.FakeData(size=n_data, image_size=(3, 32, 32))
+
+        self.folder_path = tempfile.mkdtemp()
+        sample_names = [f'img_{i}.jpg' for i in range(n_data)]
+        self.sample_names = sample_names
+        for sample_idx in range(n_data):
+            data = self.dataset[sample_idx]
+            path = os.path.join(self.folder_path, sample_names[sample_idx])
+            data[0].save(path)
+
+    def parse_cli_string(self, cli_words: str):
+        cli_words = cli_words.replace("lightly-magic ", "")
+        cli_words = re.split("=| ", cli_words)
+        assert len(cli_words) % 2 == 0
+        dict_keys = cli_words[0::2]
+        dict_values = cli_words[1::2]
+        for key, value in zip(dict_keys, dict_values):
+            value = value.strip('\"')
+            value = value.strip('\'')
+            self.cfg[key] = value
+
+    def test_parse_cli_string(self):
+        cli_string = "lightly-magic dataset_id='XYZ' upload='thumbnails'"
+        self.parse_cli_string(cli_string)
+        assert self.cfg["dataset_id"] == 'XYZ'
+        assert self.cfg["upload"] == 'thumbnails'
+
+    def test_magic_new_dataset_name(self):
+        cli_string = "lightly-magic new_dataset_name='new_dataset_name_xyz'"
+        self.parse_cli_string(cli_string)
+        lightly.cli.lightly_cli(self.cfg)
+
+    def test_magic_new_dataset_id(self):
+        cli_string = "lightly-magic dataset_id='xyz'"
+        self.parse_cli_string(cli_string)
+        lightly.cli.lightly_cli(self.cfg)
+
+    def tearDown(self) -> None:
+        for filename in ["embeddings.csv", "embeddings_sorted.csv"]:
+            try:
+                os.remove(filename)
+            except FileNotFoundError:
+                pass
+
+
+
diff --git a/tests/cli/test_cli_upload.py b/tests/cli/test_cli_upload.py
new file mode 100644
index 000000000..c71efad8a
--- /dev/null
+++ b/tests/cli/test_cli_upload.py
@@ -0,0 +1,81 @@
+import os
+import re
+import sys
+import tempfile
+
+import torchvision
+from hydra.experimental import compose, initialize
+
+import lightly
+from tests.api_workflow.mocked_api_workflow_client import MockedApiWorkflowSetup, MockedApiWorkflowClient
+
+
+class TestCLIUpload(MockedApiWorkflowSetup):
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        sys.modules["lightly.cli.upload_cli"].ApiWorkflowClient = MockedApiWorkflowClient
+
+    def setUp(self):
+        self.create_fake_dataset()
+        with initialize(config_path="../../lightly/cli/config", job_name="test_app"):
+            self.cfg = compose(config_name="config", overrides=["token='123'", f"input_dir={self.folder_path}"])
+
+    def create_fake_dataset(self, n_data: int=5):
+        self.dataset = torchvision.datasets.FakeData(size=n_data,
+                                                     image_size=(3, 32, 32))
+
+        self.folder_path = tempfile.mkdtemp()
+        sample_names = [f'img_{i}.jpg' for i in range(n_data)]
+        self.sample_names = sample_names
+        for sample_idx in range(n_data):
+            data = self.dataset[sample_idx]
+            path = os.path.join(self.folder_path, sample_names[sample_idx])
+            data[0].save(path)
+
+    def parse_cli_string(self, cli_words: str):
+        cli_words = cli_words.replace("lightly-upload ", "")
+        cli_words = re.split("=| ", cli_words)
+        assert len(cli_words) % 2 == 0
+        dict_keys = cli_words[0::2]
+        dict_values = cli_words[1::2]
+        for key, value in zip(dict_keys, dict_values):
+            value = value.strip('\"')
+            value = value.strip('\'')
+            self.cfg[key] = value
+
+    def test_parse_cli_string(self):
+        cli_string = "lightly-upload dataset_id='XYZ' upload='thumbnails'"
+        self.parse_cli_string(cli_string)
+        assert self.cfg["dataset_id"] == 'XYZ'
+        assert self.cfg["upload"] == 'thumbnails'
+
+    def test_upload_no_token(self):
+        self.cfg['token']=''
+        with self.assertWarns(UserWarning):
+            lightly.cli.upload_cli(self.cfg)
+
+    def test_upload_new_dataset_name(self):
+        cli_string = "lightly-upload new_dataset_name='new_dataset_name_xyz'"
+        self.parse_cli_string(cli_string)
+        lightly.cli.upload_cli(self.cfg)
+
+    def test_upload_new_dataset_id(self):
+        cli_string = "lightly-upload dataset_id='xyz'"
+        self.parse_cli_string(cli_string)
+        lightly.cli.upload_cli(self.cfg)
+
+    def test_upload_no_dataset(self):
+        cli_string = "lightly-upload input_dir=data/ token='123'"
+        self.parse_cli_string(cli_string)
+        with self.assertWarns(UserWarning):
+            lightly.cli.upload_cli(self.cfg)
+
+    def test_upload_both_dataset(self):
+        cli_string = "lightly-upload new_dataset_name='new_dataset_name_xyz' dataset_id='xyz'"
+        self.parse_cli_string(cli_string)
+        with self.assertWarns(UserWarning):
+            lightly.cli.upload_cli(self.cfg)
+
+
+

From 3f6203b6314e8efb8c72e25f08844cc631e9dcd5 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Thu, 22 Apr 2021 10:32:14 +0200
Subject: [PATCH 25/27] Created 3 templates for pull requests (#312)

Created 3 templates for following purposes:
- minimal: Make it easy for external developers to do a PR
- checklist: a medium-sized checklist with three checklist subsections: type of change, tests, docs
- checklist_full: extends the checklist with an example for a manual test and more elaborate further issues
---
 .../PR_template_checklist.md                  | 21 +++++++++++++
 .../PR_template_checklist_full.md             | 31 +++++++++++++++++++
 .../PR_template_minimal.md                    | 12 +++++++
 3 files changed, 64 insertions(+)
 create mode 100644 .github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist.md
 create mode 100644 .github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md
 create mode 100644 .github/templates/PULL_REQUEST_TEMPLATE/PR_template_minimal.md

diff --git a/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist.md b/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist.md
new file mode 100644
index 000000000..e9402109a
--- /dev/null
+++ b/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist.md
@@ -0,0 +1,21 @@
+closes #issue_number
+
+## Description
+- [ ] My change is breaking
+Please_describe_what_you_changed_and_why___You_do_not_need_to_repeat_stuff_from_the_issue
+
+## Tests
+- [ ] My change is covered by existing tests.
+- [ ] My change needs new tests.
+- [ ] I have added/adapted the tests accordingly.
+- [ ] I have manually tested the change. if_yes_describe_how
+
+## Documentation
+- [ ] I have added docstrings to all public functions/methods.
+- [ ] My change requires a change to the documentation ( `.rst` files).
+- [ ] I have updated the documentation accordingly.
+- [ ] The autodocs update the documentation accordingly.
+
+## Implications / comments / further issues
+- #e_g_link_to_issue_to_cover_breaking_changes
+
diff --git a/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md b/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md
new file mode 100644
index 000000000..aaf66fe49
--- /dev/null
+++ b/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md
@@ -0,0 +1,31 @@
+closes #issue_number
+
+## Description
+- [ ] My change is breaking
+Please_describe_what_you_changed_and_why___You_do_not_need_to_repeat_stuff_from_the_issue
+
+## Tests
+- [ ] My change is covered by existing tests
+- [ ] My change needs new tests
+- [ ] I have added/adapted tests accordingly.
+- [ ] I have manually tested the change. 
+
+If applicable, describe the manual test procedure, e.g:
+```bash
+pip uninstall lightly
+export BRANCH_NAME="branch_name"
+pip install "git+https://github.com/lightly-ai/lightly.git@$BRANCH_NAME"
+lightly-cli_do_something_command
+```
+
+## Documentation
+- [ ] I have added docstrings to all changed/added public functions/methods.
+- [ ] My change requires a change to the documentation ( `.rst` files).
+- [ ] I have updated the documentation accordingly.
+- [ ] The autodocs update the documentation accordingly.`
+
+## Improvements put into another issue:
+- #issue_number
+
+## Issues covering the breaking change:
+- #link_to_issue_in_other_repo to adapt the other side of the breaking change
\ No newline at end of file
diff --git a/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_minimal.md b/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_minimal.md
new file mode 100644
index 000000000..f2ec4432c
--- /dev/null
+++ b/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_minimal.md
@@ -0,0 +1,12 @@
+closes #issue_number
+
+## Description
+Please_describe_what_you_changed_and_why___You_do_not_need_to_repeat_stuff_from_the_issue
+
+## Documentation
+- [ ] I have updated the documentation.
+- [ ] I need help on it.
+
+## Tests
+- [ ] I have updated the tests.
+- [ ] I need help on it.
\ No newline at end of file

From 2e81ff1589075d4fcf962b7b2b5ca121e62bc986 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Thu, 22 Apr 2021 13:50:01 +0200
Subject: [PATCH 26/27] Bugfix: put PR templates one folder up (#316)

---
 .../PULL_REQUEST_TEMPLATE/PR_template_checklist.md                | 0
 .../PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md           | 0
 .../{templates => }/PULL_REQUEST_TEMPLATE/PR_template_minimal.md  | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename .github/{templates => }/PULL_REQUEST_TEMPLATE/PR_template_checklist.md (100%)
 rename .github/{templates => }/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md (100%)
 rename .github/{templates => }/PULL_REQUEST_TEMPLATE/PR_template_minimal.md (100%)

diff --git a/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist.md b/.github/PULL_REQUEST_TEMPLATE/PR_template_checklist.md
similarity index 100%
rename from .github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist.md
rename to .github/PULL_REQUEST_TEMPLATE/PR_template_checklist.md
diff --git a/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md b/.github/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md
similarity index 100%
rename from .github/templates/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md
rename to .github/PULL_REQUEST_TEMPLATE/PR_template_checklist_full.md
diff --git a/.github/templates/PULL_REQUEST_TEMPLATE/PR_template_minimal.md b/.github/PULL_REQUEST_TEMPLATE/PR_template_minimal.md
similarity index 100%
rename from .github/templates/PULL_REQUEST_TEMPLATE/PR_template_minimal.md
rename to .github/PULL_REQUEST_TEMPLATE/PR_template_minimal.md

From 7314e93077d11fc905c4a5152ad2ab4663122b22 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Thu, 22 Apr 2021 14:51:41 +0200
Subject: [PATCH 27/27] bump version 1.1.6 (#319)

---
 lightly/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightly/__init__.py b/lightly/__init__.py
index c4463f560..e25608181 100644
--- a/lightly/__init__.py
+++ b/lightly/__init__.py
@@ -74,7 +74,7 @@
 # All Rights Reserved
 
 __name__ = 'lightly'
-__version__ = '1.1.5'
+__version__ = '1.1.6'
 
 
 try: