bigmlcom · jaor · Mar 26, 2025 · Mar 21, 2025 · Mar 21, 2025 · Mar 21, 2025
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -3,6 +3,12 @@
 History
 -------
 
+9.8.2 (2025-03-21)
+------------------
+
+- Retrying annotations update to avoid temporary concurrency issues in
+  source composites updates.
+
 9.8.1 (2025-01-14)
 ------------------
 

diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py
@@ -24,6 +24,8 @@
 import sys
 import os
 import numbers
+import time
+import logging
 
 from urllib import parse
 
@@ -67,9 +69,14 @@
 from bigml.api_handlers.resourcehandler import ResourceHandlerMixin, LOGGER
 from bigml.fields import Fields
 
+LOG_FORMAT = '%(asctime)-15s: %(message)s'
+LOGGER = logging.getLogger('BigML')
+CONSOLE = logging.StreamHandler()
+CONSOLE.setLevel(logging.WARNING)
+LOGGER.addHandler(CONSOLE)
 
-MAX_CHANGES = 500
-
+MAX_CHANGES = 5
+MAX_RETRIES = 5
 
 def compact_regions(regions):
     """Returns the list of regions in the compact value used for updates """
@@ -508,6 +515,8 @@ def update_composite_annotations(self, source, images_file,
             try:
                 _ = file_list.index(filename)
             except ValueError:
+                LOGGER.error("WARNING: Could not find annotated file (%s)"
+                             " in the composite's sources list", filename)
                 continue
             for key in annotation.keys():
                 if key == "file":
@@ -539,9 +548,12 @@ def update_composite_annotations(self, source, images_file,
                                     "components": source_ids})
                 elif optype == "regions":
                     for value, source_id in values:
+                        if isinstance(value, dict):
+                            # dictionary should contain the bigml-coco format
+                            value = compact_regions(value)
                         changes.append(
                             {"field": field,
-                             "value": compact_regions(value),
+                             "value": value,
                              "components": [source_id]})
                 else:
                     for value, source_id in values:
@@ -550,16 +562,36 @@ def update_composite_annotations(self, source, images_file,
                              "value": value,
                              "components": [source_id]})
             except Exception:
+                LOGGER.error("WARNING: Problem adding annotation to %s (%s)",
+                             field, values)
                 pass
 
         # we need to limit the amount of changes per update
         batches_number = int(len(changes) / MAX_CHANGES)
         for offset in range(0, batches_number + 1):
-            new_batch = changes[offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES]
+            new_batch = changes[
+                offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES]
             if new_batch:
                 source = self.update_source(source,
                                             {"row_values": new_batch})
-                self.ok(source)
+                counter = 0
+                while source["error"] is not None and counter < MAX_RETRIES:
+                    # retrying in case update is temporarily unavailable
+                    counter += 1
+                    time.sleep(counter)
+                    source = self.get_source(source)
+                    self.ok(source)
+                    source = self.update_source(source,
+                                                {"row_values": new_batch})
+                if source["error"] is not None:
+                    err_str = json.dumps(source["error"])
+                    v_str = json.dumps(new_batch)
+                    LOGGER.error("WARNING: Some annotations were not updated "
+                                 f" (error: {err_str}, values: {v_str})")
+                if not self.ok(source):
+                    raise Exception(
+                        f"Failed to update {len(new_batch)} annotations.")
+                time.sleep(0.1)
 
         return source
 

diff --git a/bigml/bigmlconnection.py b/bigml/bigmlconnection.py
@@ -406,7 +406,7 @@ def _create(self, url, body, verify=None, organization=None):
                     error = json_load(response.content)
                     LOGGER.error(self.error_message(error, method='create'))
                 elif code != HTTP_ACCEPTED:
-                    LOGGER.error("Unexpected error (%s)", code)
+                    LOGGER.error("CREATE Unexpected error (%s)", code)
                     code = HTTP_INTERNAL_SERVER_ERROR
             except ValueError as exc:
                 LOGGER.error("Malformed response: %s", str(exc))
@@ -489,7 +489,7 @@ def _get(self, url, query_string='',
                 LOGGER.error(self.error_message(error, method='get',
                              resource_id=resource_id))
             else:
-                LOGGER.error("Unexpected error (%s)", code)
+                LOGGER.error("GET Unexpected error (%s)", code)
                 code = HTTP_INTERNAL_SERVER_ERROR
 
         except ValueError as exc:
@@ -582,7 +582,7 @@ def _list(self, url, query_string='', organization=None):
                           HTTP_TOO_MANY_REQUESTS]:
                 error = json_load(response.content)
             else:
-                LOGGER.error("Unexpected error (%s)", code)
+                LOGGER.error("LIST Unexpected error (%s)", code)
                 code = HTTP_INTERNAL_SERVER_ERROR
         except ValueError as exc:
             LOGGER.error("Malformed response: %s", str(exc))
@@ -662,7 +662,7 @@ def _update(self, url, body, organization=None, resource_id=None):
                 LOGGER.error(self.error_message(error, method='update',
                              resource_id=resource_id))
             else:
-                LOGGER.error("Unexpected error (%s)", code)
+                LOGGER.error("UPDATE Unexpected error (%s)", code)
                 code = HTTP_INTERNAL_SERVER_ERROR
         except ValueError:
             LOGGER.error("Malformed response")

diff --git a/bigml/tests/create_dataset_steps.py b/bigml/tests/create_dataset_steps.py
@@ -222,3 +222,16 @@ def clone_dataset(step, dataset):
 def the_cloned_dataset_is(step, dataset):
     """Checking the dataset is a clone"""
     eq_(world.dataset["origin"], dataset)
+
+
+def check_annotations(step, annotations_field, annotations_num):
+    """Checking the dataset contains a number of annotations"""
+    annotations_num = int(annotations_num)
+    field = world.dataset["fields"][annotations_field]
+    if field["optype"] == "regions":
+        count = field["summary"]["regions"]["sum"]
+    else:
+        count = 0
+        for _, num in field["summary"]["categories"]:
+            count += num
+    eq_(count, annotations_num)
diff --git a/bigml/tests/read_resource_steps.py b/bigml/tests/read_resource_steps.py
@@ -14,6 +14,7 @@
 # License for the specific language governing permissions and limitations
 # under the License.
 
+import time
 
 from datetime import datetime
 
@@ -46,6 +47,7 @@ def wait_until_status_code_is(code1, code2, secs, resource_info):
     if status['code'] == int(code2):
         world.errors.append(resource_info)
     eq_(status['code'], int(code1))
+    time.sleep(0.1) # added to avoid synch mongo issues
     return i_get_the_resource(resource_info)
 
 

diff --git a/bigml/tests/test_22_source_args.py b/bigml/tests/test_22_source_args.py
@@ -25,6 +25,7 @@
 from .world import world, setup_module, teardown_module, show_doc, \
     show_method
 from . import create_source_steps as source_create
+from . import create_dataset_steps as dataset_create
 
 
 class TestUploadSource:
@@ -125,3 +126,38 @@ def test_scenario3(self):
             source_create.the_source_is_finished(
                 self, example["source_wait"])
             source_create.the_cloned_source_origin_is(self, source)
+
+    def test_scenario4(self):
+        """
+        Scenario: Successfully adding annotatations to composite source:
+            Given I create an annotated images data source uploading a "<data>" file
+            And I wait until the source is ready less than <source_wait> secs
+            And I create a dataset
+            And I wait until the dataset is ready less than <dataset_wait> secs
+            Then the new dataset has <annotations_num> annotations in the <annotations_field> field
+        """
+        headers = ["data", "source_wait", "dataset_wait", "annotations_num",
+                   "annotations_field"]
+        examples = [
+            ['data/images/metadata.json', '500', '500', '12',
+             '100002'],
+            ['data/images/metadata_compact.json', '500', '500', '3',
+             '100003']]
+        show_doc(self.test_scenario4)
+        for example in examples:
+            example = dict(zip(headers, example))
+            show_method(self, self.bigml["method"], example)
+            source_create.i_create_annotated_source(
+                self,
+                example["data"],
+                args={"image_analysis": {"enabled": False,
+                                         "extracted_features": []}})
+            source_create.the_source_is_finished(
+                self, example["source_wait"])
+            dataset_create.i_create_a_dataset(self)
+            dataset_create.the_dataset_is_finished_in_less_than(
+                self, example["dataset_wait"])
+            dataset_create.check_annotations(self, 
+                                             example["annotations_field"],
+                                             example["annotations_num"])
+
diff --git a/bigml/version.py b/bigml/version.py
@@ -1 +1 @@
-__version__ = '9.8.1'
+__version__ = '9.8.2'
diff --git a/data/images/annotations_compact.json b/data/images/annotations_compact.json
@@ -0,0 +1,2 @@
+[{"file": "f1/fruits1f.png", "my_regions": "[[\"region1\" 0.2 0.2 0.4 0.4]]"},
+ {"file": "f1/fruits1.png", "my_regions": "[[\"region2\" 0.3 0.3 0.5 0.5] [\"region1\" 0.6 0.6 0.8 0.8]]"}]
diff --git a/data/images/metadata_compact.json b/data/images/metadata_compact.json
@@ -0,0 +1,5 @@
+{"description": "Fruit images to test colour distributions with regions",
+ "images_file": "./fruits_hist.zip",
+ "new_fields": [{"name": "my_regions", "optype": "regions"}],
+ "source_id": null,
+ "annotations": "./annotations_compact.json"}
diff --git a/setup.py b/setup.py
@@ -50,7 +50,8 @@
     download_url="https://github.com/bigmlcom/python",
     license="http://www.apache.org/licenses/LICENSE-2.0",
     setup_requires = ['pytest'],
-    install_requires = ["setuptools==69.0.0", "unidecode", "bigml-chronos>=0.4.3", "requests",
+    install_requires = ["setuptools==70.0.0", "unidecode",
+        "bigml-chronos>=0.4.3", "requests",
         "requests-toolbelt", "msgpack", "numpy>=1.22", "scipy",
         "javascript"],
     extras_require={"images": IMAGES_DEPENDENCIES,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[{"file": "f1/fruits1f.png", "my_regions": "[[\"region1\" 0.2 0.2 0.4 0.4]]"},
		{"file": "f1/fruits1.png", "my_regions": "[[\"region2\" 0.3 0.3 0.5 0.5] [\"region1\" 0.6 0.6 0.8 0.8]]"}]