-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Single source for resource and column names #15
Changes from all commits
7f9a8c6
a3ce9be
21ef58b
74c8b93
05a6047
91841d9
0a69a1d
fb27964
5f6e393
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,10 @@ | ||
# import needed libraries | ||
import collections | ||
import csv | ||
import io | ||
import os | ||
from collections import defaultdict | ||
|
||
import datapackage as dp | ||
import jsontableschema | ||
from jsontableschema import infer | ||
import dpkg.names as names | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use relative imports within the package? |
||
|
||
|
||
def create_dpkg(top_level_dict, dict_, directory, joint_id): | ||
|
@@ -29,25 +26,22 @@ def create_dpkg(top_level_dict, dict_, directory, joint_id): | |
|
||
# the objects block # | ||
key = 'objects' | ||
objects_table = dict_.get(key) | ||
path = key + '.csv' | ||
with io.open(directory + os.sep + key + '.csv') as stream: | ||
headers = stream.readline().rstrip('\n').split(',') | ||
values = csv.reader(stream) | ||
schema = infer(headers, values, row_limit=50, | ||
primary_key=joint_id) | ||
referenced_resource = key + 'Table' | ||
|
||
myDP.descriptor['resources'].append( | ||
{"name": key + 'Table', | ||
{"name": names.OBJECTS_TABLE_NAME, | ||
"path": path, | ||
"schema": schema, | ||
} | ||
) | ||
|
||
# the links block # | ||
key = 'links' | ||
links_table = dict_.get(key) | ||
path = key + '.csv' | ||
with io.open(directory + os.sep + key + '.csv') as stream: | ||
headers = stream.readline().rstrip('\n').split(',') | ||
|
@@ -57,13 +51,13 @@ def create_dpkg(top_level_dict, dict_, directory, joint_id): | |
"fields": joint_id, | ||
"reference": { | ||
"datapackage": "", | ||
"resource": referenced_resource, | ||
"resource": names.OBJECTS_TABLE_NAME, | ||
"fields": joint_id | ||
} | ||
}] | ||
|
||
myDP.descriptor['resources'].append( | ||
{"name": key + 'Table', | ||
{"name": names.LINKS_TABLE_NAME, | ||
"path": path, | ||
"schema": schema, | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
OBJECTS_TABLE_NAME = "objects_table" | ||
LINKS_TABLE_NAME = "links_table" | ||
TRACKS_TABLE_NAME = "tracks_table" | ||
|
||
X_COORD_NAME = "x_coord_cmso" | ||
Y_COORD_NAME = "y_coord_cmso" | ||
Z_COORD_NAME = "z_coord_cmso" | ||
FRAME_NAME = "frame_cmso" | ||
OBJECT_NAME = "object_id_cmso" | ||
LINK_NAME = "link_id_cmso" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @simleo - the fields in this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file (does not necessarily have to be called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,8 @@ | |
import xlrd | ||
from xlrd import XLRDError | ||
|
||
import dpkg.names as names | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use relative imports within the package? |
||
|
||
|
||
def xls_to_csv(xls_file): | ||
"""Utility function to read Excel files.""" | ||
|
@@ -248,23 +250,25 @@ def read_cellprofiler(cp_file, track_dict): | |
cp_df = pd.read_csv(cp_file) | ||
# dictionary for the objects | ||
objects_dict = {} | ||
x = track_dict.get('x_coord_cmso') | ||
y = track_dict.get('y_coord_cmso') | ||
x = track_dict.get(names.X_COORD_NAME) | ||
y = track_dict.get(names.Y_COORD_NAME) | ||
frame = track_dict.get(names.FRAME_NAME) | ||
obj_id = track_dict.get(names.OBJECT_NAME) | ||
# parse the digits used for the tracking settings (e.g. 15) | ||
digits = x.split('_')[2] | ||
# sort the dataframe by [track_id, ImageNumber] | ||
# sort the dataframe by [track_id, frame] | ||
track_id = 'TrackObjects_Label_' + digits | ||
cp_df = cp_df.sort_values([track_id, 'ImageNumber']) | ||
cp_df = cp_df.sort_values([track_id, frame]) | ||
|
||
parent_obj_id = 'TrackObjects_ParentObjectNumber_' + digits | ||
parent_img_id = 'TrackObjects_ParentImageNumber_' + digits | ||
# create new Object identifiers | ||
cp_df.reset_index(inplace = True) | ||
for index, row in cp_df.iterrows(): | ||
objects_dict[index] = [row.ImageNumber, row[x], row[y]] | ||
objects_dict[index] = [row[frame], row[x], row[y]] | ||
|
||
objects_df = pd.DataFrame([[key, value[0], value[1], value[2]] for key, value in objects_dict.items()], columns= | ||
["ObjectID", "ImageNumber", x, y]) | ||
[obj_id, frame, x, y]) | ||
|
||
# dictionary for the links | ||
links_dict = {} | ||
|
@@ -284,7 +288,7 @@ def read_cellprofiler(cp_file, track_dict): | |
parentObject = row[parent_obj_id] | ||
|
||
for j, r in tmp.iterrows(): | ||
if (r.ObjectNumber == parentObject) and (r.ImageNumber == parentImage): | ||
if (r.ObjectNumber == parentObject) and (r[frame] == parentImage): | ||
unique_parent_object = j | ||
break | ||
|
||
|
@@ -306,7 +310,7 @@ def read_cellprofiler(cp_file, track_dict): | |
for key, value in links_dict.items(): | ||
for object_ in value: | ||
links_df = links_df.append([[key, object_]]) | ||
links_df.columns = ['LINK_ID', 'ObjectID'] | ||
links_df.columns = [track_dict.get(names.LINK_NAME), obj_id] | ||
|
||
return (objects_df, links_df) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,81 @@ | ||
{"resources": [{"path": "objects.csv", "name": "objectsTable", "schema": {"primaryKey": "ObjectID", "fields": [{"format": "default", "constraints": {"unique": true}, "description": "", "title": "", "name": "ObjectID", "type": "integer"}, {"name": "ImageNumber", "format": "default", "description": "", "title": "", "type": "number"}, {"name": "TrackObjects_TrajectoryX_15", "format": "default", "description": "", "title": "", "type": "number"}, {"name": "TrackObjects_TrajectoryY_15", "format": "default", "description": "", "title": "", "type": "number"}]}}, {"path": "links.csv", "name": "linksTable", "schema": {"foreignKeys": [{"reference": {"datapackage": "", "resource": "objectsTable", "fields": "ObjectID"}, "fields": "ObjectID"}], "fields": [{"name": "LINK_ID", "format": "default", "description": "", "title": "", "type": "integer"}, {"name": "ObjectID", "format": "default", "description": "", "title": "", "type": "integer"}]}}], "author_email": "[email protected]", "title": "A CMSO data package representation of cell tracking data", "author_institute": "VIB", "author": "paola masuzzo", "name": "CMSO_tracks"} | ||
{ | ||
"author": "paola masuzzo", | ||
"author_email": "[email protected]", | ||
"author_institute": "VIB", | ||
"name": "CMSO_tracks", | ||
"resources": [ | ||
{ | ||
"name": "objects_table", | ||
"path": "objects.csv", | ||
"schema": { | ||
"fields": [ | ||
{ | ||
"constraints": { | ||
"unique": true | ||
}, | ||
"description": "", | ||
"format": "default", | ||
"name": "ObjectID", | ||
"title": "", | ||
"type": "integer" | ||
}, | ||
{ | ||
"description": "", | ||
"format": "default", | ||
"name": "ImageNumber", | ||
"title": "", | ||
"type": "number" | ||
}, | ||
{ | ||
"description": "", | ||
"format": "default", | ||
"name": "TrackObjects_TrajectoryX_15", | ||
"title": "", | ||
"type": "number" | ||
}, | ||
{ | ||
"description": "", | ||
"format": "default", | ||
"name": "TrackObjects_TrajectoryY_15", | ||
"title": "", | ||
"type": "number" | ||
} | ||
], | ||
"primaryKey": "ObjectID" | ||
} | ||
}, | ||
{ | ||
"name": "links_table", | ||
"path": "links.csv", | ||
"schema": { | ||
"fields": [ | ||
{ | ||
"description": "", | ||
"format": "default", | ||
"name": "LINK_ID", | ||
"title": "", | ||
"type": "integer" | ||
}, | ||
{ | ||
"description": "", | ||
"format": "default", | ||
"name": "ObjectID", | ||
"title": "", | ||
"type": "integer" | ||
} | ||
], | ||
"foreignKeys": [ | ||
{ | ||
"fields": "ObjectID", | ||
"reference": { | ||
"datapackage": "", | ||
"fields": "ObjectID", | ||
"resource": "objects_table" | ||
} | ||
} | ||
] | ||
} | ||
} | ||
], | ||
"title": "A CMSO data package representation of cell tracking data" | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Going towards spec compliance, nice!