diff --git a/.bumpversion.cfg b/.bumpversion.cfg index c38ecdaf..ee1374ac 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.9.1.dev0 +current_version = 0.10.1.dev0 commit = True tag = True sign_tags = True diff --git a/CHANGES.rst b/CHANGES.rst index 73333acb..df2fc3bd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,11 @@ CHANGES ======= +0.10.0 +------ + +* Populate metadata supports ROIs and Shapes when target is a Dataset + 0.9.0 ----- diff --git a/README.rst b/README.rst index 480540f4..1e568745 100644 --- a/README.rst +++ b/README.rst @@ -65,8 +65,8 @@ populate This command creates an ``OMERO.table`` (bulk annotation) from a ``CSV`` file and links the table as a ``File Annotation`` to a parent container such as Screen, Plate, Project -or Dataset. It also attempts to convert Image or Well names from the ``CSV`` into -Image or Well IDs in the ``OMERO.table``. +Dataset or Image. It also attempts to convert Image, Well or ROI names from the ``CSV`` into +object IDs in the ``OMERO.table``. The ``CSV`` file must be provided as local file with ``--file path/to/file.csv``. @@ -86,10 +86,10 @@ The ``# header`` row is optional. Default column type is ``String``. NB: Column names should not contain spaces if you want to be able to query by these columns. -Examples: +**Project / Dataset** To add a table to a Project, the ``CSV`` file needs to specify ``Dataset Name`` -and ``Image Name``:: +and ``Image Name`` or ``Image ID``:: $ omero metadata populate Project:1 --file path/to/project.csv @@ -102,7 +102,8 @@ project.csv:: img-03.png,dataset01,0.093,3,TRITC img-04.png,dataset01,0.429,4,Cy5 -This will create an OMERO.table linked to the Project like this: +This will create an OMERO.table linked to the Project like this with +a new ``Image`` column with IDs: ========== ============ ======== ============= ============ ===== Image Name Dataset Name ROI_Area Channel_Index Channel_Name Image @@ -115,6 +116,9 @@ img-04.png dataset01 0.429 4 Cy5 36641 If the target is a Dataset instead of a Project, the ``Dataset Name`` column is not needed. + +**Screen / Plate** + To add a table to a Screen, the ``CSV`` file needs to specify ``Plate`` name and ``Well``. If a ``# header`` is specified, column types must be ``well`` and ``plate``. @@ -142,36 +146,45 @@ Well Plate Drug Concentration Cell_Count Percent_Mitotic Well Name Plat If the target is a Plate instead of a Screen, the ``Plate`` column is not needed. -If the target is an Image, a csv with ROI-level and object-level data can be used to create an -``OMERO.table`` (bulk annotation) as a ``File Annotation`` on an Image. -The ROI identifying column can be an ``roi`` type column containing ROI ID, and ``Roi Name`` -column will be appended automatically (see example below). Alternatively, the input column can be +**ROIs** + +If the target is an Image or a Dataset, a ``CSV`` with ROI-level or Shape-level data can be used to create an +``OMERO.table`` (bulk annotation) as a ``File Annotation`` linked to the target object. +If there is an ``roi`` column (header type ``roi``) containing ROI IDs, an ``Roi Name`` +column will be appended automatically (see example below). If a column of Shape IDs named ``shape`` +of type ``l`` is included, the Shape IDs will be validated (and set to -1 if invalid). +Also if an ``image`` column of Image IDs is included, an ``Image Name`` column will be added. +NB: Columns of type ``shape`` aren't yet supported on the OMERO.server. + +Alternatively, if the target is an Image, the ROI input column can be ``Roi Name`` (with type ``s``), and an ``roi`` type column will be appended containing ROI IDs. In this case, it is required that ROIs on the Image in OMERO have the ``Name`` attribute set. image.csv:: - # header roi,l,d,l - Roi,object,probability,area - 501,1,0.8,250 - 502,1,0.9,500 - 503,1,0.2,25 - 503,2,0.8,400 - 503,3,0.5,200 + # header roi,l,l,d,l + Roi,shape,object,probability,area + 501,1066,1,0.8,250 + 502,1067,2,0.9,500 + 503,1068,3,0.2,25 + 503,1069,4,0.8,400 + 503,1070,5,0.5,200 This will create an OMERO.table linked to the Image like this: -=== ====== =========== ==== ======== -Roi object probability area Roi Name -=== ====== =========== ==== ======== -501 1 0.8 250 Sample1 -502 1 0.9 500 Sample2 -503 1 0.2 25 Sample3 -503 2 0.8 400 Sample3 -503 3 0.5 200 Sample3 -=== ====== =========== ==== ======== - -Note that the ROI-level ``OMERO.table`` is not visible in the OMERO.web UI right-hand panel, but can be visualized by clicking the "eye" on the bulk annotation attachment on the Image. +=== ===== ====== =========== ==== ======== +Roi shape object probability area Roi Name +=== ===== ====== =========== ==== ======== +501 1066 1 0.8 250 Sample1 +502 1067 2 0.9 500 Sample2 +503 1068 3 0.2 25 Sample3 +503 1069 4 0.8 400 Sample3 +503 1070 5 0.5 200 Sample3 +=== ===== ====== =========== ==== ======== + +Note that the ROI-level data from an ``OMERO.table`` is not visible +in the OMERO.web UI right-hand panel under the ``Tables`` tab, +but the table can be visualized by clicking the "eye" on the bulk annotation attachment on the Image. Developer install ================= diff --git a/setup.py b/setup.py index cdc649f8..745aee7d 100644 --- a/setup.py +++ b/setup.py @@ -92,7 +92,7 @@ def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() -version = '0.9.1.dev0' +version = '0.10.1.dev0' url = "https://github.com/ome/omero-metadata/" setup( diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index e5e38d56..a4f9013b 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -248,6 +248,7 @@ def create_columns_image(self): return self._create_columns("image") def _create_columns(self, klass): + target_class = self.target_object.__class__ if self.types is not None and len(self.types) != len(self.headers): message = "Number of columns and column types not equal." raise MetadataError(message) @@ -303,7 +304,7 @@ def _create_columns(self, klass): self.DEFAULT_COLUMN_SIZE, list())) # Ensure ImageColumn is named "Image" column.name = "Image" - if column.__class__ is RoiColumn: + if column.__class__ is RoiColumn and target_class != DatasetI: append.append(StringColumn(ROI_NAME_COLUMN, '', self.DEFAULT_COLUMN_SIZE, list())) # Ensure RoiColumn is named 'Roi' @@ -441,7 +442,7 @@ def resolve(self, column, value, row): try: return images_by_id[int(value)].id.val except KeyError: - log.debug('Image Id: %i not found!' % (value)) + log.debug('Image Id: %s not found!' % (value)) return -1 return if WellColumn is column_class: @@ -453,6 +454,8 @@ def resolve(self, column, value, row): return self.wrapper.resolve_dataset(column, row, value) if RoiColumn is column_class: return self.wrapper.resolve_roi(column, row, value) + if column_as_lower == 'shape': + return self.wrapper.resolve_shape(value) if column_as_lower in ('row', 'column') \ and column_class is LongColumn: try: @@ -769,8 +772,36 @@ def __init__(self, value_resolver): super(DatasetWrapper, self).__init__(value_resolver) self.images_by_id = dict() self.images_by_name = dict() + self.rois_by_id = None + self.shapes_by_id = None self._load() + def resolve_roi(self, column, row, value): + # Support Dataset table with known ROI IDs + if self.rois_by_id is None: + self._load_rois() + try: + return self.rois_by_id[int(value)].id.val + except KeyError: + log.warn('Dataset is missing ROI: %s' % value) + return -1 + except ValueError: + log.warn('Wrong input type for ROI ID: %s' % value) + return -1 + + def resolve_shape(self, value): + # Support Dataset table with known Shape IDs + if self.rois_by_id is None: + self._load_rois() + try: + return self.shapes_by_id[int(value)].id.val + except KeyError: + log.warn('Dataset is missing Shape: %s' % value) + return -1 + except ValueError: + log.warn('Wrong input type for Shape ID: %s' % value) + return -1 + def get_image_id_by_name(self, iname, did=None): return self.images_by_name[iname].id.val @@ -812,12 +843,48 @@ def _load(self): images_by_id[iid] = image if iname in self.images_by_name: raise Exception("Image named %s(id=%d) present. (id=%s)" % ( - iname, self.images_by_name[iname], iid + iname, self.images_by_name[iname].id.val, iid )) self.images_by_name[iname] = image self.images_by_id[self.target_object.id.val] = images_by_id log.debug('Completed parsing dataset: %s' % self.target_name) + def _load_rois(self): + log.debug('Loading ROIs in Dataset:%d' % self.target_object.id.val) + self.rois_by_id = {} + self.shapes_by_id = {} + query_service = self.client.getSession().getQueryService() + parameters = omero.sys.ParametersI() + parameters.addId(self.target_object.id.val) + data = list() + while True: + parameters.page(len(data), 1000) + rv = unwrap(query_service.projection(( + 'select distinct i, r, s ' + 'from Shape s ' + 'join s.roi as r ' + 'join r.image as i ' + 'join i.datasetLinks as dil ' + 'join dil.parent as d ' + 'where d.id = :id order by s.id desc'), + parameters, {'omero.group': '-1'})) + if len(rv) == 0: + break + else: + data.extend(rv) + if not data: + raise MetadataError("No ROIs on images in target Dataset") + + for image, roi, shape in data: + # we only care about *IDs* of ROIs and Shapes in the Dataset + rid = roi.id.val + sid = shape.id.val + self.rois_by_id[rid] = roi + self.shapes_by_id[sid] = shape + + log.debug('Completed loading ROIs and Shapes in Dataset: %s' + % self.target_object.id.val) + class ProjectWrapper(PDIWrapper): @@ -906,6 +973,7 @@ class ImageWrapper(ValueWrapper): def __init__(self, value_resolver): super(ImageWrapper, self).__init__(value_resolver) self.rois_by_id = dict() + self.shapes_by_id = dict() self.rois_by_name = dict() self.ambiguous_naming = False self._load() @@ -916,15 +984,25 @@ def get_roi_id_by_name(self, rname): def get_roi_name_by_id(self, rid): return unwrap(self.rois_by_id[rid].name) + def resolve_shape(self, value): + try: + return self.shapes_by_id[int(value)].id.val + except KeyError: + log.warn('Image is missing Shape: %s' % value) + return -1 + except ValueError: + log.warn('Wrong input type for Shape ID: %s' % value) + return -1 + def resolve_roi(self, column, row, value): try: return self.rois_by_id[int(value)].id.val except KeyError: log.warn('Image is missing ROI: %s' % value) - return Skip() + return -1 except ValueError: log.warn('Wrong input type for ROI ID: %s' % value) - return Skip() + return -1 def _load(self): query_service = self.client.getSession().getQueryService() @@ -942,9 +1020,10 @@ def _load(self): while True: parameters.page(len(data), 1000) rv = query_service.findAllByQuery(( - 'select distinct r from Image as i ' - 'join i.rois as r ' - 'where i.id = :id order by r.id desc'), + 'select distinct s from Shape as s ' + 'join s.roi as r ' + 'join r.image as i ' + 'where i.id = :id order by s.id desc'), parameters, {'omero.group': '-1'}) if len(rv) == 0: break @@ -955,15 +1034,19 @@ def _load(self): rois_by_id = dict() rois_by_name = dict() - for roi in data: + shapes_by_id = dict() + for shape in data: + roi = shape.roi rid = roi.id.val rois_by_id[rid] = roi + shapes_by_id[shape.id.val] = shape if unwrap(roi.name) in rois_by_name.keys(): log.warn('Conflicting ROI names.') self.ambiguous_naming = True rois_by_name[unwrap(roi.name)] = roi self.rois_by_id = rois_by_id self.rois_by_name = rois_by_name + self.shapes_by_id = shapes_by_id log.debug('Completed parsing image: %s' % self.target_name) @@ -1155,8 +1238,8 @@ def preprocess_data(self, reader): if isinstance(value, basestring): column.size = max( column.size, len(value.encode('utf-8'))) - # The following are needed for - # getting post process column sizes + # The following IDs are needed for + # post_process() to get column sizes for names if column.__class__ is WellColumn: column.values.append(value) elif column.__class__ is ImageColumn: @@ -1171,6 +1254,8 @@ def preprocess_data(self, reader): log.error('Original value "%s" now "%s" of bad type!' % ( original_value, value)) raise + # we call post_process on each single (mostly empty) row + # to get ids -> names self.post_process() for column in self.columns: column.values = [] diff --git a/test/integration/metadata/test_populate.py b/test/integration/metadata/test_populate.py index a70b1ead..efcb998d 100644 --- a/test/integration/metadata/test_populate.py +++ b/test/integration/metadata/test_populate.py @@ -796,7 +796,7 @@ def __init__(self): ) self.image = None self.rois = None - self.names = ("roi1", "roi2") + self.roi_names = ("roi1", "roi2") self.table_name = None def assert_columns(self, columns): @@ -805,8 +805,7 @@ def assert_columns(self, columns): assert col_names == ",".join([c.name for c in columns]) def assert_row_count(self, rows): - # Hard-coded in createCsv's arguments - assert rows == 2 + assert rows == len(self.roi_names) def get_target(self): if not self.image: @@ -821,7 +820,7 @@ def create_rois(self): if not self.image: return [] rois = [] - for roi_name in self.names: + for roi_name in self.roi_names: roi = RoiI() roi.name = rstring(roi_name) roi.setImage(ImageI(self.image.id.val, False)) @@ -851,6 +850,142 @@ def assert_child_annotations(self, oas): assert len(oas) == 0 +class RoiIdsInImage(Image2Rois): + + def __init__(self): + self.count = 6 + self.ann_count = 0 + self.image = None + self.rois = None + self.roi_names = ("nucleus", "ER", "nucleolus") + self.table_name = None + # csv is created on demand, after ROIs created so we know IDs + self.csv = None + + def get_csv(self): + if self.csv is None: + # need ROI IDs... + self.get_target() + row_data = [] + row_idx = 0 + for roi in self.rois: + for shape in roi.copyShapes(): + ids = [roi.id.val, shape.id.val] + row_data.append("%s,%s,Cell,0.5,100" % tuple(ids)) + # test handling of invalid IDs + # set either shape or roi ID to be invalid + ids[row_idx % 2] = 1 + row_data.append("%s,%s,Cell,0.5,100" % tuple(ids)) + row_idx += 1 + self.csv = self.create_csv( + # shape columns identified by name not type + col_names="Roi,shape,Feature,RoiArea,Count", + row_data=row_data, + header="# header roi,l,s,d,l" + ) + return self.csv + + def assert_columns(self, columns): + # Adds a new 'Roi Name' column + col_names = "Roi,shape,Feature,RoiArea,Count,Roi Name" + assert col_names == ",".join([c.name for c in columns]) + + def assert_child_annotations(self, oas): + assert len(oas) == 0 + + def assert_row_count(self, rows): + # we have 2 csv rows per ROI (one row is invalid) + assert rows == len(self.roi_names) * 2 + + +class RoiIdsInDataset(RoiIdsInImage): + """Tests roi column with ROI IDs in a Dataset""" + + def __init__(self): + self.count = 7 + self.shapes_per_roi = 3 + self.ann_count = 0 + self.dataset = None + self.rois = None + self.roi_names = ("nucleus", "ER", "nucleolus") + self.table_name = None + # csv is created on demand, after ROIs created so we know IDs + self.csv = None + + def get_target(self): + if not self.dataset: + dataset = self.create_dataset(names=["ImageOne", "ImageTwo"]) + self.set_name(dataset, "DatasetWithROIs") + # reload dataset to avoid unloaded exceptions etc. + self.dataset = self.test.client.sf.getQueryService().get( + 'Dataset', dataset.id.val) + self.rois = self.create_rois() + return self.dataset + + def get_csv(self): + if self.csv is None: + # need ROI IDs... + self.get_target() + row_data = [] + row_idx = 0 + for roi in self.rois: + for shape in roi.copyShapes(): + ids = [roi.id.val, shape.id.val, roi.image.id.val] + row_data.append("%s,%s,%s,Cell,0.5,100" % tuple(ids)) + # test handling of invalid IDs + # set either shape, roi or image ID to be invalid + ids[row_idx % 3] = 1 + row_data.append("%s,%s,%s,Cell,0.5,100" % tuple(ids)) + row_idx += 1 + self.csv = self.create_csv( + col_names="Roi,shape,Image,Feature,RoiArea,Count", + row_data=row_data, + header="# header roi,l,image,s,d,l" + ) + return self.csv + + def create_rois(self): + if not self.dataset: + return [] + rois = [] + conn = BlitzGateway(client_obj=self.test.client) + ds = conn.getObject("Dataset", self.dataset.id) + for image in ds.listChildren(): + for roi_name in self.roi_names: + roi = RoiI() + roi.name = rstring(roi_name) + roi.setImage(ImageI(image.id, False)) + for count in range(self.shapes_per_roi): + point = PointI() + point.x = rdouble(count * 10) + point.y = rdouble(10) + roi.addShape(point) + rois.append(roi) + us = self.test.client.sf.getUpdateService() + return us.saveAndReturnArray(rois) + + def assert_columns(self, columns): + # Adds a new 'Image Name' column as we have an 'image' ID column + # but NOT 'Roi Name' as above for Image + # see https://github.com/ome/omero-metadata/issues/65 + col_names = "Roi,shape,Image,Feature,RoiArea,Count,Image Name" + assert col_names == ",".join([c.name for c in columns]) + + def assert_row_count(self, rows): + # we have 2 csv rows per Shape (one row is invalid) + assert rows == len(self.rois) * self.shapes_per_roi * 2 + + def get_annotations(self): + query = """select d from Dataset d + left outer join fetch d.annotationLinks links + left outer join fetch links.child + where d.id=%s""" % self.dataset.id.val + qs = self.test.client.sf.getQueryService() + ds = qs.findByQuery(query, None) + anns = ds.linkedAnnotationList() + return anns + + class Image2RoisNoNan(Image2Rois): """ Tests that creating LongColumn or DoubleColumn with empty value @@ -1189,6 +1324,8 @@ def teardown_method(self, method): class TestPopulateMetadata(TestPopulateMetadataHelper): METADATA_FIXTURES = ( + RoiIdsInDataset(), + RoiIdsInImage(), Screen2Plates(), Plate2Wells(), Dataset2Images(),