Skip to content

Improving annotation upload for source composites #408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
History
-------

9.8.2 (2025-03-21)
------------------

- Retrying annotations update to avoid temporary concurrency issues in
source composites updates.

9.8.1 (2025-01-14)
------------------

Expand Down
42 changes: 37 additions & 5 deletions bigml/api_handlers/sourcehandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import sys
import os
import numbers
import time
import logging

from urllib import parse

Expand Down Expand Up @@ -67,9 +69,14 @@
from bigml.api_handlers.resourcehandler import ResourceHandlerMixin, LOGGER
from bigml.fields import Fields

LOG_FORMAT = '%(asctime)-15s: %(message)s'
LOGGER = logging.getLogger('BigML')
CONSOLE = logging.StreamHandler()
CONSOLE.setLevel(logging.WARNING)
LOGGER.addHandler(CONSOLE)

MAX_CHANGES = 500

MAX_CHANGES = 5
MAX_RETRIES = 5

def compact_regions(regions):
"""Returns the list of regions in the compact value used for updates """
Expand Down Expand Up @@ -508,6 +515,8 @@ def update_composite_annotations(self, source, images_file,
try:
_ = file_list.index(filename)
except ValueError:
LOGGER.error("WARNING: Could not find annotated file (%s)"
" in the composite's sources list", filename)
continue
for key in annotation.keys():
if key == "file":
Expand Down Expand Up @@ -539,9 +548,12 @@ def update_composite_annotations(self, source, images_file,
"components": source_ids})
elif optype == "regions":
for value, source_id in values:
if isinstance(value, dict):
# dictionary should contain the bigml-coco format
value = compact_regions(value)
changes.append(
{"field": field,
"value": compact_regions(value),
"value": value,
"components": [source_id]})
else:
for value, source_id in values:
Expand All @@ -550,16 +562,36 @@ def update_composite_annotations(self, source, images_file,
"value": value,
"components": [source_id]})
except Exception:
LOGGER.error("WARNING: Problem adding annotation to %s (%s)",
field, values)
pass

# we need to limit the amount of changes per update
batches_number = int(len(changes) / MAX_CHANGES)
for offset in range(0, batches_number + 1):
new_batch = changes[offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES]
new_batch = changes[
offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES]
if new_batch:
source = self.update_source(source,
{"row_values": new_batch})
self.ok(source)
counter = 0
while source["error"] is not None and counter < MAX_RETRIES:
# retrying in case update is temporarily unavailable
counter += 1
time.sleep(counter)
source = self.get_source(source)
self.ok(source)
source = self.update_source(source,
{"row_values": new_batch})
if source["error"] is not None:
err_str = json.dumps(source["error"])
v_str = json.dumps(new_batch)
LOGGER.error("WARNING: Some annotations were not updated "
f" (error: {err_str}, values: {v_str})")
if not self.ok(source):
raise Exception(
f"Failed to update {len(new_batch)} annotations.")
time.sleep(0.1)

return source

Expand Down
8 changes: 4 additions & 4 deletions bigml/bigmlconnection.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ def _create(self, url, body, verify=None, organization=None):
error = json_load(response.content)
LOGGER.error(self.error_message(error, method='create'))
elif code != HTTP_ACCEPTED:
LOGGER.error("Unexpected error (%s)", code)
LOGGER.error("CREATE Unexpected error (%s)", code)
code = HTTP_INTERNAL_SERVER_ERROR
except ValueError as exc:
LOGGER.error("Malformed response: %s", str(exc))
Expand Down Expand Up @@ -489,7 +489,7 @@ def _get(self, url, query_string='',
LOGGER.error(self.error_message(error, method='get',
resource_id=resource_id))
else:
LOGGER.error("Unexpected error (%s)", code)
LOGGER.error("GET Unexpected error (%s)", code)
code = HTTP_INTERNAL_SERVER_ERROR

except ValueError as exc:
Expand Down Expand Up @@ -582,7 +582,7 @@ def _list(self, url, query_string='', organization=None):
HTTP_TOO_MANY_REQUESTS]:
error = json_load(response.content)
else:
LOGGER.error("Unexpected error (%s)", code)
LOGGER.error("LIST Unexpected error (%s)", code)
code = HTTP_INTERNAL_SERVER_ERROR
except ValueError as exc:
LOGGER.error("Malformed response: %s", str(exc))
Expand Down Expand Up @@ -662,7 +662,7 @@ def _update(self, url, body, organization=None, resource_id=None):
LOGGER.error(self.error_message(error, method='update',
resource_id=resource_id))
else:
LOGGER.error("Unexpected error (%s)", code)
LOGGER.error("UPDATE Unexpected error (%s)", code)
code = HTTP_INTERNAL_SERVER_ERROR
except ValueError:
LOGGER.error("Malformed response")
Expand Down
13 changes: 13 additions & 0 deletions bigml/tests/create_dataset_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,16 @@ def clone_dataset(step, dataset):
def the_cloned_dataset_is(step, dataset):
"""Checking the dataset is a clone"""
eq_(world.dataset["origin"], dataset)


def check_annotations(step, annotations_field, annotations_num):
"""Checking the dataset contains a number of annotations"""
annotations_num = int(annotations_num)
field = world.dataset["fields"][annotations_field]
if field["optype"] == "regions":
count = field["summary"]["regions"]["sum"]
else:
count = 0
for _, num in field["summary"]["categories"]:
count += num
eq_(count, annotations_num)
2 changes: 2 additions & 0 deletions bigml/tests/read_resource_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# License for the specific language governing permissions and limitations
# under the License.

import time

from datetime import datetime

Expand Down Expand Up @@ -46,6 +47,7 @@ def wait_until_status_code_is(code1, code2, secs, resource_info):
if status['code'] == int(code2):
world.errors.append(resource_info)
eq_(status['code'], int(code1))
time.sleep(0.1) # added to avoid synch mongo issues
return i_get_the_resource(resource_info)


Expand Down
36 changes: 36 additions & 0 deletions bigml/tests/test_22_source_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .world import world, setup_module, teardown_module, show_doc, \
show_method
from . import create_source_steps as source_create
from . import create_dataset_steps as dataset_create


class TestUploadSource:
Expand Down Expand Up @@ -125,3 +126,38 @@ def test_scenario3(self):
source_create.the_source_is_finished(
self, example["source_wait"])
source_create.the_cloned_source_origin_is(self, source)

def test_scenario4(self):
"""
Scenario: Successfully adding annotatations to composite source:
Given I create an annotated images data source uploading a "<data>" file
And I wait until the source is ready less than <source_wait> secs
And I create a dataset
And I wait until the dataset is ready less than <dataset_wait> secs
Then the new dataset has <annotations_num> annotations in the <annotations_field> field
"""
headers = ["data", "source_wait", "dataset_wait", "annotations_num",
"annotations_field"]
examples = [
['data/images/metadata.json', '500', '500', '12',
'100002'],
['data/images/metadata_compact.json', '500', '500', '3',
'100003']]
show_doc(self.test_scenario4)
for example in examples:
example = dict(zip(headers, example))
show_method(self, self.bigml["method"], example)
source_create.i_create_annotated_source(
self,
example["data"],
args={"image_analysis": {"enabled": False,
"extracted_features": []}})
source_create.the_source_is_finished(
self, example["source_wait"])
dataset_create.i_create_a_dataset(self)
dataset_create.the_dataset_is_finished_in_less_than(
self, example["dataset_wait"])
dataset_create.check_annotations(self,
example["annotations_field"],
example["annotations_num"])

2 changes: 1 addition & 1 deletion bigml/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '9.8.1'
__version__ = '9.8.2'
2 changes: 2 additions & 0 deletions data/images/annotations_compact.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[{"file": "f1/fruits1f.png", "my_regions": "[[\"region1\" 0.2 0.2 0.4 0.4]]"},
{"file": "f1/fruits1.png", "my_regions": "[[\"region2\" 0.3 0.3 0.5 0.5] [\"region1\" 0.6 0.6 0.8 0.8]]"}]
5 changes: 5 additions & 0 deletions data/images/metadata_compact.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"description": "Fruit images to test colour distributions with regions",
"images_file": "./fruits_hist.zip",
"new_fields": [{"name": "my_regions", "optype": "regions"}],
"source_id": null,
"annotations": "./annotations_compact.json"}
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
download_url="https://github.com/bigmlcom/python",
license="http://www.apache.org/licenses/LICENSE-2.0",
setup_requires = ['pytest'],
install_requires = ["setuptools==69.0.0", "unidecode", "bigml-chronos>=0.4.3", "requests",
install_requires = ["setuptools==70.0.0", "unidecode",
"bigml-chronos>=0.4.3", "requests",
"requests-toolbelt", "msgpack", "numpy>=1.22", "scipy",
"javascript"],
extras_require={"images": IMAGES_DEPENDENCIES,
Expand Down