Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support images_by_name for Screen and Plate #64

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
89 changes: 66 additions & 23 deletions src/omero_metadata/populate.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,6 @@ def create_columns(self):
def columns_sanity_check(self, columns):
column_types = [column.__class__ for column in columns]
column_names = [column.name for column in columns]
if WellColumn in column_types and ImageColumn in column_types:
log.debug(column_types)
raise MetadataError(
('Well Column and Image Column cannot be resolved at '
'the same time. Pick one.'))
if RoiColumn in column_types and ROI_NAME_COLUMN in column_names:
log.debug('Found both ROI names and IDs. Not appending either.')
return False
Expand Down Expand Up @@ -390,8 +385,8 @@ def get_well_name(self, well_id, plate=None):
row = self.AS_ALPHA[row]
return '%s%d' % (row, col + 1)

def get_image_id_by_name(self, iname, dname=None):
return self.wrapper.get_image_id_by_name(iname, dname)
def get_image_id_by_name(self, iname, did=None):
return self.wrapper.get_image_id_by_name(iname, did)

def get_image_name_by_id(self, iid, pid=None):
return self.wrapper.get_image_name_by_id(iid, pid)
Expand Down Expand Up @@ -574,7 +569,8 @@ def get_image_name_by_id(self, iid, pid=None):
raise Exception("Cannot resolve image to plate")
return self.images_by_id[pid][iid].name.val

def parse_plate(self, plate, wells_by_location, wells_by_id, images_by_id):
def parse_plate(self, plate, wells_by_location, wells_by_id,
images_by_id, images_by_name):
"""
Accepts PlateData instances
"""
Expand All @@ -594,6 +590,7 @@ def parse_plate(self, plate, wells_by_location, wells_by_id, images_by_id):
for well_sample in well.well_samples:
image = well_sample.image
images_by_id[image.id.val] = image
images_by_name[image.name.val] = image
log.debug('Completed parsing plate: %s' % plate.name.val)
for row in wells_by_location:
log.debug('%s: %r' % (row, list(wells_by_location[row].keys())))
Expand Down Expand Up @@ -638,6 +635,9 @@ def __init__(self, value_resolver):
super(ScreenWrapper, self).__init__(value_resolver)
self._load()

def get_image_id_by_name(self, iname, pid=None):
return self.images_by_name[pid][iname].id.val

def get_plate_name_by_id(self, plate):
plate = self.plates_by_id[plate]
return plate.name.val
Expand Down Expand Up @@ -666,6 +666,7 @@ def _load(self):
if self.target_object is None:
raise MetadataError('Could not find target object!')
self.target_name = unwrap(self.target_object.getName())
self.images_by_name = dict()
self.images_by_id = dict()
self.wells_by_location = dict()
self.wells_by_id = dict()
Expand All @@ -689,8 +690,11 @@ def _load(self):
wells_by_id = dict()
self.wells_by_location[plate.name.val] = wells_by_location
self.wells_by_id[plate.id.val] = wells_by_id
images_by_name = dict()
self.images_by_name[plate.id.val] = images_by_name
self.parse_plate(
plate, wells_by_location, wells_by_id, images_by_id
plate, wells_by_location, wells_by_id,
images_by_id, images_by_name
)


Expand All @@ -705,6 +709,10 @@ def get_well_by_id(self, well_id, plate=None):
wells = self.wells_by_id[plate]
return wells[well_id]

def get_image_id_by_name(self, iname, pid=None):
plate = self.target_object.id.val
return self.images_by_name[plate][iname].id.val

def subselect(self, rows, names):
"""
If we're processing a plate but the bulk-annotations file contains
Expand Down Expand Up @@ -742,18 +750,22 @@ def _load(self):
self.images_by_id = dict()
images_by_id = dict()

self.images_by_name = dict()
images_by_name = dict()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the goal of this line?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That creates a dictionary that is assigned to self.images_by_name[self.target_object.id.val] and is then passed to the method below where it's populated. This is just following the same pattern as for images_by_id = dict() in the lines above.

self.images_by_name[self.target_object.id.val] = images_by_name

self.wells_by_location[self.target_object.name.val] = wells_by_location
self.wells_by_id[self.target_object.id.val] = wells_by_id
self.images_by_id[self.target_object.id.val] = images_by_id
self.parse_plate(
PlateData(self.target_object),
wells_by_location, wells_by_id, images_by_id
wells_by_location, wells_by_id, images_by_id, images_by_name
)


class PDIWrapper(ValueWrapper):

def get_image_id_by_name(self, iname, dname=None):
def get_image_id_by_name(self, iname, did=None):
raise Exception("to be implemented by subclasses")


Expand Down Expand Up @@ -793,7 +805,7 @@ def resolve_shape(self, value):
log.warn('Wrong input type for Shape ID: %s' % value)
return -1

def get_image_id_by_name(self, iname, dname=None):
def get_image_id_by_name(self, iname, did=None):
return self.images_by_name[iname].id.val

def get_image_name_by_id(self, iid, did):
Expand Down Expand Up @@ -887,8 +899,8 @@ def __init__(self, value_resolver):
self.datasets_by_name = dict()
self._load()

def get_image_id_by_name(self, iname, dname=None):
return self.images_by_name[dname][iname].id.val
def get_image_id_by_name(self, iname, did=None):
return self.images_by_name[did][iname].id.val

def get_image_name_by_id(self, iid, did=None):
return self.images_by_id[did][iid].name.val
Expand Down Expand Up @@ -1218,12 +1230,10 @@ def parse(self):
return self.parse_from_handle_stream(f2)

def preprocess_data(self, reader):
# Get count of data columns - e.g. NOT Well Name
column_count = 0
for column in self.columns:
if column.name not in ADDED_COLUMN_NAMES:
column_count += 1
sbesson marked this conversation as resolved.
Show resolved Hide resolved
for i, row in enumerate(reader):
# For each row in the table,
# add a single value to the columns.values
# then call post.process() to resolve ID -> name or name -> ID
row = [(self.columns[i], value) for i, value in enumerate(row)]
for column, original_value in row:
log.debug('Original value %s, %s',
Expand Down Expand Up @@ -1308,8 +1318,10 @@ def populate_from_reader(self,
self.populate_row(row)
row_count = row_count + 1
if row_count >= batch_size:
# Call post_process() for this batch
self.post_process()
table.addData(self.columns)
# clear row data ready for next batch
for column in self.columns:
column.values = []
row_count = 0
Expand All @@ -1318,6 +1330,7 @@ def populate_from_reader(self,
if row_count != 0:
log.debug("DATA TO ADD")
log.debug(self.columns)
# Call post_process for final remaining rows (less than batch_size)
self.post_process()
table.addData(self.columns)

Expand Down Expand Up @@ -1350,6 +1363,11 @@ def populate(self, rows):
log.warning('Skip empty row %d', r + 1)

def post_process(self):
# post_process is called at 2 points in the populate workflow...
# First called during preprocess_data() on each row at a time (when
# each column.values list has a single value)
# then again during populate_from_reader(), when all rows are processed
# in batches.
target_class = self.target_object.__class__
columns_by_name = dict()
well_column = None
Expand All @@ -1366,7 +1384,7 @@ def post_process(self):
for column in self.columns:
columns_by_name[column.name.lower()] = column
if column.__class__ is PlateColumn:
log.warn("PlateColumn is unimplemented")
log.debug("PlateColumn is unimplemented")
elif column.__class__ is WellColumn:
well_column = column
elif column.name == WELL_NAME_COLUMN:
Expand Down Expand Up @@ -1429,6 +1447,7 @@ def post_process(self):
DatasetI is target_class or
ProjectI is target_class) and \
resolve_image_names and not resolve_image_ids:
# PDI - need to know Image Names from Image IDs
iname = ""
try:
log.debug(image_name_column)
Expand All @@ -1454,6 +1473,7 @@ def post_process(self):
DatasetI is target_class or
ProjectI is target_class) and \
resolve_image_ids and not resolve_image_names:
# PDI - need to know Image IDs from Names
iid = -1
try:
log.debug(image_column)
Expand All @@ -1471,12 +1491,14 @@ def post_process(self):
iname, did)
except KeyError:
log.warn(
"%d not found in image ids" % iid)
"%s not found in image names" % iname)
assert i == len(image_column.values)
image_column.values.append(iid)
elif image_name_column is not None and (
ScreenI is target_class or
PlateI is target_class):
PlateI is target_class) and \
resolve_image_names and not resolve_image_ids:
# HCS - we need to know image Names from IDs
iid = image_column.values[i]
log.info("Checking image %s", iid)
pid = None
Expand All @@ -1487,8 +1509,29 @@ def post_process(self):
image_name_column.size = max(
image_name_column.size, len(iname)
)
elif image_name_column is not None and (
ScreenI is target_class or
PlateI is target_class) and \
resolve_image_ids and not resolve_image_names:
# HCS - we need to know image IDs from Names
log.debug(image_column)
iname = image_name_column.values[i]
iid = -1
try:
# If target class is Screen, a plate column should exist
if "plate" in columns_by_name:
pid = int(columns_by_name["plate"].values[i])
elif target_class is PlateI:
pid = self.target_object.id.val
log.debug("Using Plate:%d" % pid)
iid = self.value_resolver.get_image_id_by_name(
iname, pid)
except KeyError:
log.warn(
"%s not found in image names" % iname)
image_column.values.append(iid)
elif target_class is not ImageI:
log.info('Missing image name column, skipping.')
log.debug('Missing image name column, skipping.')

if plate_name_column is not None:
plate = columns_by_name['plate'].values[i] # FIXME
Expand Down
45 changes: 39 additions & 6 deletions test/integration/metadata/test_populate.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,27 @@ def create_screen(self, row_count, col_count):
plate_cols=col_count)[0]
plate2 = self.test.import_plates(plate_rows=row_count,
plate_cols=col_count)[0]
# Rename Images like "A1_Field-0" to match names in csv
conn = BlitzGateway(client_obj=self.test.client)
update = conn.getUpdateService()
images_by_id = {}
for pid in [plate1.id.val, plate2.id.val]:
plate = conn.getObject("Plate", pid)
for well in plate.listChildren():
for field_index, ws in enumerate(well.listChildren()):
img = ws.getImage()._obj
img.name = rstring(
f'{ well.getWellPos() }_Field-{field_index}')
img = update.saveAndReturnObject(img)
images_by_id[img.id.val] = img
plate1 = self.set_name(plate1, "P001")
plate2 = self.set_name(plate2, "P002")
screen = ScreenI()
screen.name = rstring("Screen")
screen.linkPlate(plate1.proxy())
screen.linkPlate(plate2.proxy())
# cache images_by_id for checking result
self.images_by_id = images_by_id
return self.test.client.sf.getUpdateService().\
saveAndReturnObject(screen)

Expand All @@ -176,11 +191,18 @@ def assert_columns(self, columns):
assert col_names == ",".join([c.name for c in columns])

def assert_values(self, row_values):
# Check rows, based on self.create_csv()
# Unsure where the lower-casing is happening
if "A1" in row_values or "a1" in row_values:
assert "Control" in row_values
elif "A2" in row_values or "a2" in row_values:
assert "Treatment" in row_values
elif "roi1" in row_values:
assert 0.5 in row_values
assert 100 in row_values
elif "roi2" in row_values:
assert 'nan' in [str(value) for value in row_values]
assert 200 in row_values

def assert_child_annotations(self, oas):
for ma, wid, wr, wc in oas:
Expand Down Expand Up @@ -208,14 +230,16 @@ def get_all_map_annotations(self):
class Screen2Plates(Fixture):

def __init__(self):
self.count = 6
self.count = 8
self.ann_count = 4
self.row_count = 1
self.col_count = 2
self.csv = self.create_csv(
col_names="Plate,Well,Well Type,Concentration",
row_data=("P001,A1,Control,0", "P001,A2,Treatment,10",
"P002,A1,Control,0", "P002,A2,Treatment,10"))
col_names="Plate,Well,Image Name,Well Type,Concentration",
row_data=("P001,A1,A1_Field-0,Control,0",
"P001,A2,A2_Field-0,Treatment,10",
"P002,A1,A1_Field-0,Control,0",
"P002,A2,A2_Field-0,Treatment,10"))
self.screen = None

def assert_row_count(self, rows):
Expand All @@ -225,10 +249,19 @@ def assert_row_count(self, rows):
assert rows == 2 * self.row_count * self.col_count

def assert_columns(self, columns):
# Adds Plate Name,Well Name columns
col_names = "Plate,Well,Well Type,Concentration,Plate Name,Well Name"
# Adds Plate Name,Well Name, Image columns
col_names = ("Plate,Well,Image Name,Well Type,"
"Concentration,Plate Name,Well Name,Image")
assert col_names == ",".join([c.name for c in columns])

def assert_values(self, row_values):
super(Screen2Plates, self).assert_values(row_values)
# last column should contain valid Image ID
image_id = row_values[-1]
image_name = row_values[2]
assert image_id in self.images_by_id
assert self.images_by_id[image_id].name.val == image_name

def get_target(self):
if not self.screen:
self.screen = self.create_screen(self.row_count, self.col_count)
Expand Down