diff --git a/HISTORY.rst b/HISTORY.rst index e4bd9d7d..63308907 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,12 @@ History ------- +9.8.2 (2025-03-21) +------------------ + +- Retrying annotations update to avoid temporary concurrency issues in + source composites updates. + 9.8.1 (2025-01-14) ------------------ diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py index 8f3568ea..71863b34 100644 --- a/bigml/api_handlers/sourcehandler.py +++ b/bigml/api_handlers/sourcehandler.py @@ -24,6 +24,8 @@ import sys import os import numbers +import time +import logging from urllib import parse @@ -67,9 +69,14 @@ from bigml.api_handlers.resourcehandler import ResourceHandlerMixin, LOGGER from bigml.fields import Fields +LOG_FORMAT = '%(asctime)-15s: %(message)s' +LOGGER = logging.getLogger('BigML') +CONSOLE = logging.StreamHandler() +CONSOLE.setLevel(logging.WARNING) +LOGGER.addHandler(CONSOLE) -MAX_CHANGES = 500 - +MAX_CHANGES = 5 +MAX_RETRIES = 5 def compact_regions(regions): """Returns the list of regions in the compact value used for updates """ @@ -508,6 +515,8 @@ def update_composite_annotations(self, source, images_file, try: _ = file_list.index(filename) except ValueError: + LOGGER.error("WARNING: Could not find annotated file (%s)" + " in the composite's sources list", filename) continue for key in annotation.keys(): if key == "file": @@ -539,9 +548,12 @@ def update_composite_annotations(self, source, images_file, "components": source_ids}) elif optype == "regions": for value, source_id in values: + if isinstance(value, dict): + # dictionary should contain the bigml-coco format + value = compact_regions(value) changes.append( {"field": field, - "value": compact_regions(value), + "value": value, "components": [source_id]}) else: for value, source_id in values: @@ -550,16 +562,36 @@ def update_composite_annotations(self, source, images_file, "value": value, "components": [source_id]}) except Exception: + LOGGER.error("WARNING: Problem adding annotation to %s (%s)", + field, values) pass # we need to limit the amount of changes per update batches_number = int(len(changes) / MAX_CHANGES) for offset in range(0, batches_number + 1): - new_batch = changes[offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] + new_batch = changes[ + offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] if new_batch: source = self.update_source(source, {"row_values": new_batch}) - self.ok(source) + counter = 0 + while source["error"] is not None and counter < MAX_RETRIES: + # retrying in case update is temporarily unavailable + counter += 1 + time.sleep(counter) + source = self.get_source(source) + self.ok(source) + source = self.update_source(source, + {"row_values": new_batch}) + if source["error"] is not None: + err_str = json.dumps(source["error"]) + v_str = json.dumps(new_batch) + LOGGER.error("WARNING: Some annotations were not updated " + f" (error: {err_str}, values: {v_str})") + if not self.ok(source): + raise Exception( + f"Failed to update {len(new_batch)} annotations.") + time.sleep(0.1) return source diff --git a/bigml/bigmlconnection.py b/bigml/bigmlconnection.py index a5a796c5..c1b337f0 100644 --- a/bigml/bigmlconnection.py +++ b/bigml/bigmlconnection.py @@ -406,7 +406,7 @@ def _create(self, url, body, verify=None, organization=None): error = json_load(response.content) LOGGER.error(self.error_message(error, method='create')) elif code != HTTP_ACCEPTED: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("CREATE Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError as exc: LOGGER.error("Malformed response: %s", str(exc)) @@ -489,7 +489,7 @@ def _get(self, url, query_string='', LOGGER.error(self.error_message(error, method='get', resource_id=resource_id)) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("GET Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError as exc: @@ -582,7 +582,7 @@ def _list(self, url, query_string='', organization=None): HTTP_TOO_MANY_REQUESTS]: error = json_load(response.content) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("LIST Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError as exc: LOGGER.error("Malformed response: %s", str(exc)) @@ -662,7 +662,7 @@ def _update(self, url, body, organization=None, resource_id=None): LOGGER.error(self.error_message(error, method='update', resource_id=resource_id)) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("UPDATE Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError: LOGGER.error("Malformed response") diff --git a/bigml/tests/create_dataset_steps.py b/bigml/tests/create_dataset_steps.py index a04d854a..b341ba51 100644 --- a/bigml/tests/create_dataset_steps.py +++ b/bigml/tests/create_dataset_steps.py @@ -222,3 +222,16 @@ def clone_dataset(step, dataset): def the_cloned_dataset_is(step, dataset): """Checking the dataset is a clone""" eq_(world.dataset["origin"], dataset) + + +def check_annotations(step, annotations_field, annotations_num): + """Checking the dataset contains a number of annotations""" + annotations_num = int(annotations_num) + field = world.dataset["fields"][annotations_field] + if field["optype"] == "regions": + count = field["summary"]["regions"]["sum"] + else: + count = 0 + for _, num in field["summary"]["categories"]: + count += num + eq_(count, annotations_num) diff --git a/bigml/tests/read_resource_steps.py b/bigml/tests/read_resource_steps.py index d406b8d6..bf702e04 100644 --- a/bigml/tests/read_resource_steps.py +++ b/bigml/tests/read_resource_steps.py @@ -14,6 +14,7 @@ # License for the specific language governing permissions and limitations # under the License. +import time from datetime import datetime @@ -46,6 +47,7 @@ def wait_until_status_code_is(code1, code2, secs, resource_info): if status['code'] == int(code2): world.errors.append(resource_info) eq_(status['code'], int(code1)) + time.sleep(0.1) # added to avoid synch mongo issues return i_get_the_resource(resource_info) diff --git a/bigml/tests/test_22_source_args.py b/bigml/tests/test_22_source_args.py index 478a0959..8fe3567e 100644 --- a/bigml/tests/test_22_source_args.py +++ b/bigml/tests/test_22_source_args.py @@ -25,6 +25,7 @@ from .world import world, setup_module, teardown_module, show_doc, \ show_method from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create class TestUploadSource: @@ -125,3 +126,38 @@ def test_scenario3(self): source_create.the_source_is_finished( self, example["source_wait"]) source_create.the_cloned_source_origin_is(self, source) + + def test_scenario4(self): + """ + Scenario: Successfully adding annotatations to composite source: + Given I create an annotated images data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + Then the new dataset has annotations in the field + """ + headers = ["data", "source_wait", "dataset_wait", "annotations_num", + "annotations_field"] + examples = [ + ['data/images/metadata.json', '500', '500', '12', + '100002'], + ['data/images/metadata_compact.json', '500', '500', '3', + '100003']] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_annotated_source( + self, + example["data"], + args={"image_analysis": {"enabled": False, + "extracted_features": []}}) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.check_annotations(self, + example["annotations_field"], + example["annotations_num"]) + diff --git a/bigml/version.py b/bigml/version.py index d33e46a3..977225b8 100644 --- a/bigml/version.py +++ b/bigml/version.py @@ -1 +1 @@ -__version__ = '9.8.1' +__version__ = '9.8.2' diff --git a/data/images/annotations_compact.json b/data/images/annotations_compact.json new file mode 100644 index 00000000..294de440 --- /dev/null +++ b/data/images/annotations_compact.json @@ -0,0 +1,2 @@ +[{"file": "f1/fruits1f.png", "my_regions": "[[\"region1\" 0.2 0.2 0.4 0.4]]"}, + {"file": "f1/fruits1.png", "my_regions": "[[\"region2\" 0.3 0.3 0.5 0.5] [\"region1\" 0.6 0.6 0.8 0.8]]"}] diff --git a/data/images/metadata_compact.json b/data/images/metadata_compact.json new file mode 100644 index 00000000..45db412f --- /dev/null +++ b/data/images/metadata_compact.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions with regions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "my_regions", "optype": "regions"}], + "source_id": null, + "annotations": "./annotations_compact.json"} diff --git a/setup.py b/setup.py index adcd3b08..c7858b6c 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,8 @@ download_url="https://github.com/bigmlcom/python", license="http://www.apache.org/licenses/LICENSE-2.0", setup_requires = ['pytest'], - install_requires = ["setuptools==69.0.0", "unidecode", "bigml-chronos>=0.4.3", "requests", + install_requires = ["setuptools==70.0.0", "unidecode", + "bigml-chronos>=0.4.3", "requests", "requests-toolbelt", "msgpack", "numpy>=1.22", "scipy", "javascript"], extras_require={"images": IMAGES_DEPENDENCIES,