From 464680f191059e1551d9b185b5b79d9145f5d248 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 16 Dec 2021 14:54:00 +0000 Subject: [PATCH 1/6] Support images_by_name for Screen and Plate --- src/omero_metadata/populate.py | 78 ++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index b6b68a39..ab5ed502 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -27,7 +27,7 @@ from builtins import chr from builtins import str from builtins import range -from future.utils import native_str +from future.utils import isidentifier, native_str from past.builtins import basestring from builtins import object import logging @@ -225,11 +225,6 @@ def create_columns(self): def columns_sanity_check(self, columns): column_types = [column.__class__ for column in columns] column_names = [column.name for column in columns] - if WellColumn in column_types and ImageColumn in column_types: - log.debug(column_types) - raise MetadataError( - ('Well Column and Image Column cannot be resolved at ' - 'the same time. Pick one.')) if RoiColumn in column_types and ROI_NAME_COLUMN in column_names: log.debug('Found both ROI names and IDs. Not appending either.') return False @@ -382,8 +377,8 @@ def get_well_name(self, well_id, plate=None): row = self.AS_ALPHA[row] return '%s%d' % (row, col + 1) - def get_image_id_by_name(self, iname, dname=None): - return self.wrapper.get_image_id_by_name(iname, dname) + def get_image_id_by_name(self, iname, did=None): + return self.wrapper.get_image_id_by_name(iname, did) def get_image_name_by_id(self, iid, pid=None): return self.wrapper.get_image_name_by_id(iid, pid) @@ -568,7 +563,7 @@ def get_image_name_by_id(self, iid, pid=None): raise Exception("Cannot resolve image to plate") return self.images_by_id[pid][iid].name.val - def parse_plate(self, plate, wells_by_location, wells_by_id, images_by_id): + def parse_plate(self, plate, wells_by_location, wells_by_id, images_by_id, images_by_name): """ Accepts PlateData instances """ @@ -588,6 +583,7 @@ def parse_plate(self, plate, wells_by_location, wells_by_id, images_by_id): for well_sample in well.well_samples: image = well_sample.image images_by_id[image.id.val] = image + images_by_name[image.name.val] = image log.debug('Completed parsing plate: %s' % plate.name.val) for row in wells_by_location: log.debug('%s: %r' % (row, list(wells_by_location[row].keys()))) @@ -632,6 +628,9 @@ def __init__(self, value_resolver): super(ScreenWrapper, self).__init__(value_resolver) self._load() + def get_image_id_by_name(self, iname, pid=None): + return self.images_by_name[pid][iname].id.val + def get_plate_name_by_id(self, plate): plate = self.plates_by_id[plate] return plate.name.val @@ -660,6 +659,7 @@ def _load(self): if self.target_object is None: raise MetadataError('Could not find target object!') self.target_name = unwrap(self.target_object.getName()) + self.images_by_name = dict() self.images_by_id = dict() self.wells_by_location = dict() self.wells_by_id = dict() @@ -683,8 +683,10 @@ def _load(self): wells_by_id = dict() self.wells_by_location[plate.name.val] = wells_by_location self.wells_by_id[plate.id.val] = wells_by_id + images_by_name = dict() + self.images_by_name[plate.id.val] = images_by_name self.parse_plate( - plate, wells_by_location, wells_by_id, images_by_id + plate, wells_by_location, wells_by_id, images_by_id, images_by_name ) @@ -736,18 +738,22 @@ def _load(self): self.images_by_id = dict() images_by_id = dict() + self.images_by_name = dict() + images_by_name = dict() + self.images_by_name[self.target_object.id.val] = images_by_name + self.wells_by_location[self.target_object.name.val] = wells_by_location self.wells_by_id[self.target_object.id.val] = wells_by_id self.images_by_id[self.target_object.id.val] = images_by_id self.parse_plate( PlateData(self.target_object), - wells_by_location, wells_by_id, images_by_id + wells_by_location, wells_by_id, images_by_id, images_by_name ) class PDIWrapper(ValueWrapper): - def get_image_id_by_name(self, iname, dname=None): + def get_image_id_by_name(self, iname, did=None): raise Exception("to be implemented by subclasses") @@ -759,7 +765,7 @@ def __init__(self, value_resolver): self.images_by_name = dict() self._load() - def get_image_id_by_name(self, iname, dname=None): + def get_image_id_by_name(self, iname, did=None): return self.images_by_name[iname].id.val def get_image_name_by_id(self, iid, did): @@ -817,8 +823,8 @@ def __init__(self, value_resolver): self.datasets_by_name = dict() self._load() - def get_image_id_by_name(self, iname, dname=None): - return self.images_by_name[dname][iname].id.val + def get_image_id_by_name(self, iname, did=None): + return self.images_by_name[did][iname].id.val def get_image_name_by_id(self, iid, did=None): return self.images_by_id[did][iid].name.val @@ -1129,12 +1135,7 @@ def parse(self): return self.parse_from_handle_stream(f2) def preprocess_data(self, reader): - # Get count of data columns - e.g. NOT Well Name - column_count = 0 - for column in self.columns: - if column.name not in ADDED_COLUMN_NAMES: - column_count += 1 - for i, row in enumerate(reader): + for row in reader: row = [(self.columns[i], value) for i, value in enumerate(row)] for column, original_value in row: log.debug('Original value %s, %s', @@ -1355,11 +1356,11 @@ def post_process(self): DatasetI is target_class or ProjectI is target_class) and \ resolve_image_ids and not resolve_image_names: + # PDI - need to know Image IDs from Names iid = -1 + log.debug(image_column) + iname = image_name_column.values[i] try: - log.debug(image_column) - iname = image_name_column.values[i] - did = self.target_object.id.val if "dataset name" in columns_by_name \ and target_class is not DatasetI: dname = columns_by_name["dataset name"].values[i] @@ -1372,12 +1373,14 @@ def post_process(self): iname, did) except KeyError: log.warn( - "%d not found in image ids" % iid) + "%s not found in image names" % iname) assert i == len(image_column.values) image_column.values.append(iid) elif image_name_column is not None and ( ScreenI is target_class or - PlateI is target_class): + PlateI is target_class) and \ + resolve_image_names and not resolve_image_ids: + # HCS - we need to know image Names from IDs iid = image_column.values[i] log.info("Checking image %s", iid) pid = None @@ -1388,6 +1391,29 @@ def post_process(self): image_name_column.size = max( image_name_column.size, len(iname) ) + elif image_name_column is not None and ( + ScreenI is target_class or + PlateI is target_class) and \ + resolve_image_ids and not resolve_image_names: + # HCS - we need to know image IDs from Names + log.debug(image_column) + iname = image_name_column.values[i] + iid = -1 + try: + if "plate" in columns_by_name: + pid = int(columns_by_name["plate"].values[i]) + elif "plate name" in columns_by_name \ + and target_class is not PlateI: + pname = columns_by_name["plate name"].values[i] + pid = self.value_resolver.wrapper.plates_by_name[ + pname].id.val + log.debug("Using Plate:%d" % pid) + iid = self.value_resolver.get_image_id_by_name( + iname, pid) + except KeyError: + log.warn( + "%s not found in image names" % iname) + image_column.values.append(iid) elif target_class is not ImageI: log.info('Missing image name column, skipping.') From 1bb59af54fc25c314df8f23509d46d62e510994a Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 17 Dec 2021 11:10:54 +0000 Subject: [PATCH 2/6] Handle images_by_name when target_object is Plate --- src/omero_metadata/populate.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index ab5ed502..90d44776 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -701,6 +701,10 @@ def get_well_by_id(self, well_id, plate=None): wells = self.wells_by_id[plate] return wells[well_id] + def get_image_id_by_name(self, iname, pid=None): + plate = self.target_object.id.val + return self.images_by_name[plate][iname].id.val + def subselect(self, rows, names): """ If we're processing a plate but the bulk-annotations file contains @@ -1400,13 +1404,11 @@ def post_process(self): iname = image_name_column.values[i] iid = -1 try: + # If target class is Screen, a plate column should exist if "plate" in columns_by_name: pid = int(columns_by_name["plate"].values[i]) - elif "plate name" in columns_by_name \ - and target_class is not PlateI: - pname = columns_by_name["plate name"].values[i] - pid = self.value_resolver.wrapper.plates_by_name[ - pname].id.val + elif target_class is PlateI: + pid = self.target_object.id.val log.debug("Using Plate:%d" % pid) iid = self.value_resolver.get_image_id_by_name( iname, pid) From 33a95544f814c2f1fd025fa7eb04e2bab6deeffe Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 20 Dec 2021 10:57:31 +0000 Subject: [PATCH 3/6] add testing of Image Name to ID in Plate --- src/omero_metadata/populate.py | 15 +++--- test/integration/metadata/test_populate.py | 61 ++++++++++++++++------ 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index 90d44776..e5e38d56 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -27,7 +27,7 @@ from builtins import chr from builtins import str from builtins import range -from future.utils import isidentifier, native_str +from future.utils import native_str from past.builtins import basestring from builtins import object import logging @@ -563,7 +563,8 @@ def get_image_name_by_id(self, iid, pid=None): raise Exception("Cannot resolve image to plate") return self.images_by_id[pid][iid].name.val - def parse_plate(self, plate, wells_by_location, wells_by_id, images_by_id, images_by_name): + def parse_plate(self, plate, wells_by_location, wells_by_id, + images_by_id, images_by_name): """ Accepts PlateData instances """ @@ -686,7 +687,8 @@ def _load(self): images_by_name = dict() self.images_by_name[plate.id.val] = images_by_name self.parse_plate( - plate, wells_by_location, wells_by_id, images_by_id, images_by_name + plate, wells_by_location, wells_by_id, + images_by_id, images_by_name ) @@ -1139,7 +1141,7 @@ def parse(self): return self.parse_from_handle_stream(f2) def preprocess_data(self, reader): - for row in reader: + for i, row in enumerate(reader): row = [(self.columns[i], value) for i, value in enumerate(row)] for column, original_value in row: log.debug('Original value %s, %s', @@ -1362,9 +1364,10 @@ def post_process(self): resolve_image_ids and not resolve_image_names: # PDI - need to know Image IDs from Names iid = -1 - log.debug(image_column) - iname = image_name_column.values[i] try: + log.debug(image_column) + iname = image_name_column.values[i] + did = self.target_object.id.val if "dataset name" in columns_by_name \ and target_class is not DatasetI: dname = columns_by_name["dataset name"].values[i] diff --git a/test/integration/metadata/test_populate.py b/test/integration/metadata/test_populate.py index 62a366fd..0e5c79bd 100644 --- a/test/integration/metadata/test_populate.py +++ b/test/integration/metadata/test_populate.py @@ -37,6 +37,7 @@ import shutil from omero.api import RoiOptions +from omero.gateway import BlitzGateway from omero.grid import ImageColumn from omero.grid import RoiColumn from omero.grid import StringColumn @@ -143,12 +144,26 @@ def create_screen(self, row_count, col_count): plate_cols=col_count)[0] plate2 = self.test.import_plates(plate_rows=row_count, plate_cols=col_count)[0] + # Rename Images like "A1_Field-0" to match names in csv + conn = BlitzGateway(client_obj=self.test.client) + update = conn.getUpdateService() + images_by_id = {} + for pid in [plate1.id.val, plate2.id.val]: + plate = conn.getObject("Plate", pid) + for well in plate.listChildren(): + for field_index, ws in enumerate(well.listChildren()): + img = ws.getImage()._obj + img.name = rstring(f'{ well.getWellPos() }_Field-{field_index}') + img = update.saveAndReturnObject(img) + images_by_id[img.id.val] = img plate1 = self.set_name(plate1, "P001") plate2 = self.set_name(plate2, "P002") screen = ScreenI() screen.name = rstring("Screen") screen.linkPlate(plate1.proxy()) screen.linkPlate(plate2.proxy()) + # cache images_by_id for checking result + self.images_by_id = images_by_id return self.test.client.sf.getUpdateService().\ saveAndReturnObject(screen) @@ -174,6 +189,20 @@ def assert_columns(self, columns): col_names = "Well,Well Type,Concentration,Well Name" assert col_names == ",".join([c.name for c in columns]) + def assert_table_row(self, row_values, row_index): + # Check rows, based on self.create_csv() + # Unsure where the lower-casing is happening + if "A1" in row_values or "a1" in row_values: + assert "Control" in row_values + elif "A2" in row_values or "a2" in row_values: + assert "Treatment" in row_values + elif "roi1" in row_values: + assert 0.5 in row_values + assert 100 in row_values + elif "roi2" in row_values: + assert 'nan' in [str(value) for value in row_values] + assert 200 in row_values + def assert_child_annotations(self, oas): for ma, wid, wr, wc in oas: assert isinstance(ma, MapAnnotationI) @@ -200,14 +229,14 @@ def get_all_map_annotations(self): class Screen2Plates(Fixture): def __init__(self): - self.count = 6 + self.count = 8 self.ann_count = 4 self.row_count = 1 self.col_count = 2 self.csv = self.create_csv( - col_names="Plate,Well,Well Type,Concentration", - row_data=("P001,A1,Control,0", "P001,A2,Treatment,10", - "P002,A1,Control,0", "P002,A2,Treatment,10")) + col_names="Plate,Well,Image Name,Well Type,Concentration", + row_data=("P001,A1,A1_Field-0,Control,0", "P001,A2,A2_Field-0,Treatment,10", + "P002,A1,A1_Field-0,Control,0", "P002,A2,A2_Field-0,Treatment,10")) self.screen = None def assert_row_count(self, rows): @@ -217,10 +246,18 @@ def assert_row_count(self, rows): assert rows == 2 * self.row_count * self.col_count def assert_columns(self, columns): - # Adds Plate Name,Well Name columns - col_names = "Plate,Well,Well Type,Concentration,Plate Name,Well Name" + # Adds Plate Name,Well Name, Image columns + col_names = "Plate,Well,Image Name,Well Type,Concentration,Plate Name,Well Name,Image" assert col_names == ",".join([c.name for c in columns]) + def assert_table_row(self, row_values, row_index): + super(Screen2Plates, self).assert_table_row(row_values, row_index) + # last column should contain valid Image ID + image_id = row_values[-1] + image_name = row_values[2] + assert image_id in self.images_by_id + assert self.images_by_id[image_id].name.val == image_name + def get_target(self): if not self.screen: self.screen = self.create_screen(self.row_count, self.col_count) @@ -1082,17 +1119,7 @@ def _assert_parsing_context_values(self, t, fixture): row_values = [col.values[0] for col in t.read( list(range(len(cols))), hit, hit+1).columns] assert len(row_values) == fixture.count - # Unsure where the lower-casing is happening - if "A1" in row_values or "a1" in row_values: - assert "Control" in row_values - elif "A2" in row_values or "a2" in row_values: - assert "Treatment" in row_values - elif "roi1" in row_values: - assert 0.5 in row_values - assert 100 in row_values - elif "roi2" in row_values: - assert 'nan' in [str(value) for value in row_values] - assert 200 in row_values + fixture.assert_table_row(row_values, hit) def _test_bulk_to_map_annotation_context(self, fixture, batch_size): # self._testPopulateMetadataPlate() From 803f97c555ea9e5cfd6fc66e49b8aa79131785e8 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 20 Dec 2021 11:04:55 +0000 Subject: [PATCH 4/6] flake8 fixes --- test/integration/metadata/test_populate.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/integration/metadata/test_populate.py b/test/integration/metadata/test_populate.py index 0e5c79bd..a70b1ead 100644 --- a/test/integration/metadata/test_populate.py +++ b/test/integration/metadata/test_populate.py @@ -153,7 +153,8 @@ def create_screen(self, row_count, col_count): for well in plate.listChildren(): for field_index, ws in enumerate(well.listChildren()): img = ws.getImage()._obj - img.name = rstring(f'{ well.getWellPos() }_Field-{field_index}') + img.name = rstring( + f'{ well.getWellPos() }_Field-{field_index}') img = update.saveAndReturnObject(img) images_by_id[img.id.val] = img plate1 = self.set_name(plate1, "P001") @@ -235,8 +236,10 @@ def __init__(self): self.col_count = 2 self.csv = self.create_csv( col_names="Plate,Well,Image Name,Well Type,Concentration", - row_data=("P001,A1,A1_Field-0,Control,0", "P001,A2,A2_Field-0,Treatment,10", - "P002,A1,A1_Field-0,Control,0", "P002,A2,A2_Field-0,Treatment,10")) + row_data=("P001,A1,A1_Field-0,Control,0", + "P001,A2,A2_Field-0,Treatment,10", + "P002,A1,A1_Field-0,Control,0", + "P002,A2,A2_Field-0,Treatment,10")) self.screen = None def assert_row_count(self, rows): @@ -247,7 +250,8 @@ def assert_row_count(self, rows): def assert_columns(self, columns): # Adds Plate Name,Well Name, Image columns - col_names = "Plate,Well,Image Name,Well Type,Concentration,Plate Name,Well Name,Image" + col_names = ("Plate,Well,Image Name,Well Type," + "Concentration,Plate Name,Well Name,Image") assert col_names == ",".join([c.name for c in columns]) def assert_table_row(self, row_values, row_index): From edc08533ee1d81be076605acde00837bb46baaa1 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 8 Feb 2023 14:50:24 +0000 Subject: [PATCH 5/6] Add comments to try and help understanding --- src/omero_metadata/populate.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index 94e4e0f5..999d9ace 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -1231,6 +1231,9 @@ def parse(self): def preprocess_data(self, reader): for i, row in enumerate(reader): + # For each row in the table, + # add a single value to the columns.values + # then call post.process() to resolve ID -> name or name -> ID row = [(self.columns[i], value) for i, value in enumerate(row)] for column, original_value in row: log.debug('Original value %s, %s', @@ -1315,8 +1318,10 @@ def populate_from_reader(self, self.populate_row(row) row_count = row_count + 1 if row_count >= batch_size: + # Call post_process() for this batch self.post_process() table.addData(self.columns) + # clear row data ready for next batch for column in self.columns: column.values = [] row_count = 0 @@ -1325,6 +1330,7 @@ def populate_from_reader(self, if row_count != 0: log.debug("DATA TO ADD") log.debug(self.columns) + # Call post_process for final remaining rows (less than batch_size) self.post_process() table.addData(self.columns) @@ -1357,6 +1363,11 @@ def populate(self, rows): log.warning('Skip empty row %d', r + 1) def post_process(self): + # post_process is called at 2 points in the populate workflow... + # First called during preprocess_data() on each row at a time (when + # each column.values list has a single value) + # then again during populate_from_reader(), when all rows are processed + # in batches. target_class = self.target_object.__class__ columns_by_name = dict() well_column = None @@ -1436,6 +1447,7 @@ def post_process(self): DatasetI is target_class or ProjectI is target_class) and \ resolve_image_names and not resolve_image_ids: + # PDI - need to know Image Names from Image IDs iname = "" try: log.debug(image_name_column) From 78e55539b68e351ed5ef202854718b49a37729a7 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 8 Feb 2023 14:52:20 +0000 Subject: [PATCH 6/6] Use log.debug() instead of warn() or info() The script is working fine, so it looks like these are invalid warnings but it is hard to understand the working of the script --- src/omero_metadata/populate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index 999d9ace..e09f619c 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -1384,7 +1384,7 @@ def post_process(self): for column in self.columns: columns_by_name[column.name.lower()] = column if column.__class__ is PlateColumn: - log.warn("PlateColumn is unimplemented") + log.debug("PlateColumn is unimplemented") elif column.__class__ is WellColumn: well_column = column elif column.name == WELL_NAME_COLUMN: @@ -1531,7 +1531,7 @@ def post_process(self): "%s not found in image names" % iname) image_column.values.append(iid) elif target_class is not ImageI: - log.info('Missing image name column, skipping.') + log.debug('Missing image name column, skipping.') if plate_name_column is not None: plate = columns_by_name['plate'].values[i] # FIXME