From 7f9a8c66f3fdf8299ffded1beb65d3d10bcde795 Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 21 Apr 2017 15:08:27 +0100 Subject: [PATCH 1/9] createdp: removed unused stuff --- dpkg/createdp.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dpkg/createdp.py b/dpkg/createdp.py index 699b41a..1860fc9 100644 --- a/dpkg/createdp.py +++ b/dpkg/createdp.py @@ -1,12 +1,8 @@ -# import needed libraries -import collections import csv import io import os -from collections import defaultdict import datapackage as dp -import jsontableschema from jsontableschema import infer @@ -29,7 +25,6 @@ def create_dpkg(top_level_dict, dict_, directory, joint_id): # the objects block # key = 'objects' - objects_table = dict_.get(key) path = key + '.csv' with io.open(directory + os.sep + key + '.csv') as stream: headers = stream.readline().rstrip('\n').split(',') @@ -47,7 +42,6 @@ def create_dpkg(top_level_dict, dict_, directory, joint_id): # the links block # key = 'links' - links_table = dict_.get(key) path = key + '.csv' with io.open(directory + os.sep + key + '.csv') as stream: headers = stream.readline().rstrip('\n').split(',') From a3ce9be4931bcf2db8aba3e81ba837bef7fd5994 Mon Sep 17 00:00:00 2001 From: simleo Date: Mon, 24 Apr 2017 15:10:59 +0100 Subject: [PATCH 2/9] pretty-print json files to ease future diffs --- .../CellProfiler/example_1/output/dp/dp.json | 82 ++++++++++++++++- .../CellProfiler/example_2/output/dp/dp.json | 82 ++++++++++++++++- examples/ICY/example_1/dp/dp.json | 89 ++++++++++++++++++- examples/ICY/example_2/dp/dp.json | 89 ++++++++++++++++++- examples/TrackMate/example_1/dp/dp.json | 82 ++++++++++++++++- examples/TrackMate/example_2/dp/dp.json | 82 ++++++++++++++++- 6 files changed, 500 insertions(+), 6 deletions(-) diff --git a/examples/CellProfiler/example_1/output/dp/dp.json b/examples/CellProfiler/example_1/output/dp/dp.json index 9b8ab35..ebc35fb 100644 --- a/examples/CellProfiler/example_1/output/dp/dp.json +++ b/examples/CellProfiler/example_1/output/dp/dp.json @@ -1 +1,81 @@ -{"resources": [{"path": "objects.csv", "name": "objectsTable", "schema": {"primaryKey": "ObjectID", "fields": [{"format": "default", "constraints": {"unique": true}, "description": "", "title": "", "name": "ObjectID", "type": "integer"}, {"name": "ImageNumber", "format": "default", "description": "", "title": "", "type": "number"}, {"name": "TrackObjects_TrajectoryX_15", "format": "default", "description": "", "title": "", "type": "number"}, {"name": "TrackObjects_TrajectoryY_15", "format": "default", "description": "", "title": "", "type": "number"}]}}, {"path": "links.csv", "name": "linksTable", "schema": {"foreignKeys": [{"reference": {"datapackage": "", "resource": "objectsTable", "fields": "ObjectID"}, "fields": "ObjectID"}], "fields": [{"name": "LINK_ID", "format": "default", "description": "", "title": "", "type": "integer"}, {"name": "ObjectID", "format": "default", "description": "", "title": "", "type": "integer"}]}}], "author_email": "paola.masuzzo@email.com", "title": "A CMSO data package representation of cell tracking data", "author_institute": "VIB", "author": "paola masuzzo", "name": "CMSO_tracks"} \ No newline at end of file +{ + "author": "paola masuzzo", + "author_email": "paola.masuzzo@email.com", + "author_institute": "VIB", + "name": "CMSO_tracks", + "resources": [ + { + "name": "objectsTable", + "path": "objects.csv", + "schema": { + "fields": [ + { + "constraints": { + "unique": true + }, + "description": "", + "format": "default", + "name": "ObjectID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "ImageNumber", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "TrackObjects_TrajectoryX_15", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "TrackObjects_TrajectoryY_15", + "title": "", + "type": "number" + } + ], + "primaryKey": "ObjectID" + } + }, + { + "name": "linksTable", + "path": "links.csv", + "schema": { + "fields": [ + { + "description": "", + "format": "default", + "name": "LINK_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "ObjectID", + "title": "", + "type": "integer" + } + ], + "foreignKeys": [ + { + "fields": "ObjectID", + "reference": { + "datapackage": "", + "fields": "ObjectID", + "resource": "objectsTable" + } + } + ] + } + } + ], + "title": "A CMSO data package representation of cell tracking data" +} diff --git a/examples/CellProfiler/example_2/output/dp/dp.json b/examples/CellProfiler/example_2/output/dp/dp.json index beec101..ebc35fb 100644 --- a/examples/CellProfiler/example_2/output/dp/dp.json +++ b/examples/CellProfiler/example_2/output/dp/dp.json @@ -1 +1,81 @@ -{"name": "CMSO_tracks", "author_institute": "VIB", "resources": [{"schema": {"primaryKey": "ObjectID", "fields": [{"description": "", "name": "ObjectID", "type": "integer", "constraints": {"unique": true}, "title": "", "format": "default"}, {"description": "", "format": "default", "title": "", "name": "ImageNumber", "type": "number"}, {"description": "", "format": "default", "title": "", "name": "TrackObjects_TrajectoryX_15", "type": "number"}, {"description": "", "format": "default", "title": "", "name": "TrackObjects_TrajectoryY_15", "type": "number"}]}, "path": "objects.csv", "name": "objectsTable"}, {"schema": {"foreignKeys": [{"reference": {"resource": "objectsTable", "fields": "ObjectID", "datapackage": ""}, "fields": "ObjectID"}], "fields": [{"description": "", "format": "default", "title": "", "name": "LINK_ID", "type": "integer"}, {"description": "", "format": "default", "title": "", "name": "ObjectID", "type": "integer"}]}, "path": "links.csv", "name": "linksTable"}], "title": "A CMSO data package representation of cell tracking data", "author_email": "paola.masuzzo@email.com", "author": "paola masuzzo"} \ No newline at end of file +{ + "author": "paola masuzzo", + "author_email": "paola.masuzzo@email.com", + "author_institute": "VIB", + "name": "CMSO_tracks", + "resources": [ + { + "name": "objectsTable", + "path": "objects.csv", + "schema": { + "fields": [ + { + "constraints": { + "unique": true + }, + "description": "", + "format": "default", + "name": "ObjectID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "ImageNumber", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "TrackObjects_TrajectoryX_15", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "TrackObjects_TrajectoryY_15", + "title": "", + "type": "number" + } + ], + "primaryKey": "ObjectID" + } + }, + { + "name": "linksTable", + "path": "links.csv", + "schema": { + "fields": [ + { + "description": "", + "format": "default", + "name": "LINK_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "ObjectID", + "title": "", + "type": "integer" + } + ], + "foreignKeys": [ + { + "fields": "ObjectID", + "reference": { + "datapackage": "", + "fields": "ObjectID", + "resource": "objectsTable" + } + } + ] + } + } + ], + "title": "A CMSO data package representation of cell tracking data" +} diff --git a/examples/ICY/example_1/dp/dp.json b/examples/ICY/example_1/dp/dp.json index 40c57ec..5807070 100644 --- a/examples/ICY/example_1/dp/dp.json +++ b/examples/ICY/example_1/dp/dp.json @@ -1 +1,88 @@ -{"author": "paola masuzzo", "author_institute": "VIB", "resources": [{"name": "objectsTable", "path": "objects.csv", "schema": {"primaryKey": "OBJECT_ID", "fields": [{"constraints": {"unique": true}, "description": "", "title": "", "name": "OBJECT_ID", "type": "integer", "format": "default"}, {"name": "t", "type": "number", "format": "default", "description": "", "title": ""}, {"name": "x", "type": "number", "format": "default", "description": "", "title": ""}, {"name": "y", "type": "number", "format": "default", "description": "", "title": ""}, {"name": "z", "type": "number", "format": "default", "description": "", "title": ""}]}}, {"name": "linksTable", "path": "links.csv", "schema": {"foreignKeys": [{"reference": {"datapackage": "", "resource": "objectsTable", "fields": "OBJECT_ID"}, "fields": "OBJECT_ID"}], "fields": [{"name": "LINK_ID", "type": "integer", "format": "default", "description": "", "title": ""}, {"name": "OBJECT_ID", "type": "integer", "format": "default", "description": "", "title": ""}]}}], "title": "A CMSO data package representation of cell tracking data", "name": "CMSO_tracks", "author_email": "paola.masuzzo@email.com"} \ No newline at end of file +{ + "author": "paola masuzzo", + "author_email": "paola.masuzzo@email.com", + "author_institute": "VIB", + "name": "CMSO_tracks", + "resources": [ + { + "name": "objectsTable", + "path": "objects.csv", + "schema": { + "fields": [ + { + "constraints": { + "unique": true + }, + "description": "", + "format": "default", + "name": "OBJECT_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "t", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "x", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "y", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "z", + "title": "", + "type": "number" + } + ], + "primaryKey": "OBJECT_ID" + } + }, + { + "name": "linksTable", + "path": "links.csv", + "schema": { + "fields": [ + { + "description": "", + "format": "default", + "name": "LINK_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "OBJECT_ID", + "title": "", + "type": "integer" + } + ], + "foreignKeys": [ + { + "fields": "OBJECT_ID", + "reference": { + "datapackage": "", + "fields": "OBJECT_ID", + "resource": "objectsTable" + } + } + ] + } + } + ], + "title": "A CMSO data package representation of cell tracking data" +} diff --git a/examples/ICY/example_2/dp/dp.json b/examples/ICY/example_2/dp/dp.json index 675f50e..5807070 100644 --- a/examples/ICY/example_2/dp/dp.json +++ b/examples/ICY/example_2/dp/dp.json @@ -1 +1,88 @@ -{"author_email": "paola.masuzzo@email.com", "title": "A CMSO data package representation of cell tracking data", "resources": [{"path": "objects.csv", "name": "objectsTable", "schema": {"fields": [{"type": "integer", "title": "", "description": "", "format": "default", "name": "OBJECT_ID", "constraints": {"unique": true}}, {"description": "", "type": "number", "format": "default", "name": "t", "title": ""}, {"description": "", "type": "number", "format": "default", "name": "x", "title": ""}, {"description": "", "type": "number", "format": "default", "name": "y", "title": ""}, {"description": "", "type": "number", "format": "default", "name": "z", "title": ""}], "primaryKey": "OBJECT_ID"}}, {"path": "links.csv", "name": "linksTable", "schema": {"fields": [{"description": "", "type": "integer", "format": "default", "name": "LINK_ID", "title": ""}, {"description": "", "type": "integer", "format": "default", "name": "OBJECT_ID", "title": ""}], "foreignKeys": [{"fields": "OBJECT_ID", "reference": {"resource": "objectsTable", "fields": "OBJECT_ID", "datapackage": ""}}]}}], "name": "CMSO_tracks", "author_institute": "VIB", "author": "paola masuzzo"} \ No newline at end of file +{ + "author": "paola masuzzo", + "author_email": "paola.masuzzo@email.com", + "author_institute": "VIB", + "name": "CMSO_tracks", + "resources": [ + { + "name": "objectsTable", + "path": "objects.csv", + "schema": { + "fields": [ + { + "constraints": { + "unique": true + }, + "description": "", + "format": "default", + "name": "OBJECT_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "t", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "x", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "y", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "z", + "title": "", + "type": "number" + } + ], + "primaryKey": "OBJECT_ID" + } + }, + { + "name": "linksTable", + "path": "links.csv", + "schema": { + "fields": [ + { + "description": "", + "format": "default", + "name": "LINK_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "OBJECT_ID", + "title": "", + "type": "integer" + } + ], + "foreignKeys": [ + { + "fields": "OBJECT_ID", + "reference": { + "datapackage": "", + "fields": "OBJECT_ID", + "resource": "objectsTable" + } + } + ] + } + } + ], + "title": "A CMSO data package representation of cell tracking data" +} diff --git a/examples/TrackMate/example_1/dp/dp.json b/examples/TrackMate/example_1/dp/dp.json index 40ade44..0f29f45 100644 --- a/examples/TrackMate/example_1/dp/dp.json +++ b/examples/TrackMate/example_1/dp/dp.json @@ -1 +1,81 @@ -{"author": "paola masuzzo", "title": "A CMSO data package representation of cell tracking data", "author_email": "paola.masuzzo@email.com", "name": "CMSO_tracks", "author_institute": "VIB", "resources": [{"name": "objectsTable", "path": "objects.csv", "schema": {"fields": [{"format": "default", "constraints": {"unique": true}, "type": "integer", "title": "", "name": "SPOT_ID", "description": ""}, {"name": "FRAME", "format": "default", "description": "", "type": "integer", "title": ""}, {"name": "POSITION_X", "format": "default", "description": "", "type": "number", "title": ""}, {"name": "POSITION_Y", "format": "default", "description": "", "type": "number", "title": ""}], "primaryKey": "SPOT_ID"}}, {"name": "linksTable", "path": "links.csv", "schema": {"fields": [{"name": "LINK_ID", "format": "default", "description": "", "type": "integer", "title": ""}, {"name": "SPOT_ID", "format": "default", "description": "", "type": "integer", "title": ""}], "foreignKeys": [{"fields": "SPOT_ID", "reference": {"fields": "SPOT_ID", "datapackage": "", "resource": "objectsTable"}}]}}]} \ No newline at end of file +{ + "author": "paola masuzzo", + "author_email": "paola.masuzzo@email.com", + "author_institute": "VIB", + "name": "CMSO_tracks", + "resources": [ + { + "name": "objectsTable", + "path": "objects.csv", + "schema": { + "fields": [ + { + "constraints": { + "unique": true + }, + "description": "", + "format": "default", + "name": "SPOT_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "FRAME", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "POSITION_X", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "POSITION_Y", + "title": "", + "type": "number" + } + ], + "primaryKey": "SPOT_ID" + } + }, + { + "name": "linksTable", + "path": "links.csv", + "schema": { + "fields": [ + { + "description": "", + "format": "default", + "name": "LINK_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "SPOT_ID", + "title": "", + "type": "integer" + } + ], + "foreignKeys": [ + { + "fields": "SPOT_ID", + "reference": { + "datapackage": "", + "fields": "SPOT_ID", + "resource": "objectsTable" + } + } + ] + } + } + ], + "title": "A CMSO data package representation of cell tracking data" +} diff --git a/examples/TrackMate/example_2/dp/dp.json b/examples/TrackMate/example_2/dp/dp.json index efa29b3..0f29f45 100644 --- a/examples/TrackMate/example_2/dp/dp.json +++ b/examples/TrackMate/example_2/dp/dp.json @@ -1 +1,81 @@ -{"resources": [{"schema": {"fields": [{"format": "default", "type": "integer", "constraints": {"unique": true}, "name": "SPOT_ID", "description": "", "title": ""}, {"format": "default", "title": "", "type": "integer", "description": "", "name": "FRAME"}, {"format": "default", "title": "", "type": "number", "description": "", "name": "POSITION_X"}, {"format": "default", "title": "", "type": "number", "description": "", "name": "POSITION_Y"}], "primaryKey": "SPOT_ID"}, "path": "objects.csv", "name": "objectsTable"}, {"schema": {"fields": [{"format": "default", "title": "", "type": "integer", "description": "", "name": "LINK_ID"}, {"format": "default", "title": "", "type": "integer", "description": "", "name": "SPOT_ID"}], "foreignKeys": [{"fields": "SPOT_ID", "reference": {"resource": "objectsTable", "fields": "SPOT_ID", "datapackage": ""}}]}, "path": "links.csv", "name": "linksTable"}], "author_institute": "VIB", "author": "paola masuzzo", "author_email": "paola.masuzzo@email.com", "name": "CMSO_tracks", "title": "A CMSO data package representation of cell tracking data"} \ No newline at end of file +{ + "author": "paola masuzzo", + "author_email": "paola.masuzzo@email.com", + "author_institute": "VIB", + "name": "CMSO_tracks", + "resources": [ + { + "name": "objectsTable", + "path": "objects.csv", + "schema": { + "fields": [ + { + "constraints": { + "unique": true + }, + "description": "", + "format": "default", + "name": "SPOT_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "FRAME", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "POSITION_X", + "title": "", + "type": "number" + }, + { + "description": "", + "format": "default", + "name": "POSITION_Y", + "title": "", + "type": "number" + } + ], + "primaryKey": "SPOT_ID" + } + }, + { + "name": "linksTable", + "path": "links.csv", + "schema": { + "fields": [ + { + "description": "", + "format": "default", + "name": "LINK_ID", + "title": "", + "type": "integer" + }, + { + "description": "", + "format": "default", + "name": "SPOT_ID", + "title": "", + "type": "integer" + } + ], + "foreignKeys": [ + { + "fields": "SPOT_ID", + "reference": { + "datapackage": "", + "fields": "SPOT_ID", + "resource": "objectsTable" + } + } + ] + } + } + ], + "title": "A CMSO data package representation of cell tracking data" +} From 21ef58b374b1125a7bd34d4576764686af04d7ac Mon Sep 17 00:00:00 2001 From: simleo Date: Mon, 24 Apr 2017 15:49:34 +0100 Subject: [PATCH 3/9] define resource names in a module; pretty print json --- README.rst | 6 +++--- dpkg/createdp.py | 8 ++++---- dpkg/dpkg.py | 8 +++++++- dpkg/names.py | 3 +++ dpkg/pushtopandas.py | 5 +++-- examples/CellProfiler/example_1/output/dp/dp.json | 6 +++--- examples/CellProfiler/example_2/output/dp/dp.json | 6 +++--- examples/ICY/example_1/dp/dp.json | 6 +++--- examples/ICY/example_2/dp/dp.json | 6 +++--- examples/TrackMate/example_1/dp/dp.json | 6 +++--- examples/TrackMate/example_2/dp/dp.json | 6 +++--- 11 files changed, 38 insertions(+), 28 deletions(-) create mode 100644 dpkg/names.py diff --git a/README.rst b/README.rst index fcc711d..0b20edc 100644 --- a/README.rst +++ b/README.rst @@ -52,7 +52,7 @@ This last file will look something like this: { "resources": [{ - "name": "objectsTable", + "name": "objects_table", "schema": { "primaryKey": "SPOT_ID", "fields": [{ @@ -86,12 +86,12 @@ This last file will look something like this: }, "path": "objects.csv" }, { - "name": "linksTable", + "name": "links_table", "schema": { "foreignKeys": [{ "fields": "SPOT_ID", "reference": { - "resource": "objectsTable", + "resource": "objects_table", "fields": "SPOT_ID", "datapackage": "" } diff --git a/dpkg/createdp.py b/dpkg/createdp.py index 1860fc9..1d41d81 100644 --- a/dpkg/createdp.py +++ b/dpkg/createdp.py @@ -4,6 +4,7 @@ import datapackage as dp from jsontableschema import infer +import names def create_dpkg(top_level_dict, dict_, directory, joint_id): @@ -31,10 +32,9 @@ def create_dpkg(top_level_dict, dict_, directory, joint_id): values = csv.reader(stream) schema = infer(headers, values, row_limit=50, primary_key=joint_id) - referenced_resource = key + 'Table' myDP.descriptor['resources'].append( - {"name": key + 'Table', + {"name": names.OBJECTS_TABLE_NAME, "path": path, "schema": schema, } @@ -51,13 +51,13 @@ def create_dpkg(top_level_dict, dict_, directory, joint_id): "fields": joint_id, "reference": { "datapackage": "", - "resource": referenced_resource, + "resource": names.OBJECTS_TABLE_NAME, "fields": joint_id } }] myDP.descriptor['resources'].append( - {"name": key + 'Table', + {"name": names.LINKS_TABLE_NAME, "path": path, "schema": schema, } diff --git a/dpkg/dpkg.py b/dpkg/dpkg.py index ef94319..595bcbf 100644 --- a/dpkg/dpkg.py +++ b/dpkg/dpkg.py @@ -3,6 +3,7 @@ import math import os import sys +import json import numpy as np import pandas as pd @@ -17,6 +18,11 @@ # global variable - file name from the command line f = sys.argv[1] + +def to_json(dp): + return json.dumps(dp.to_dict(), indent=4, sort_keys=True) + + def lookAndReadConfigFile(): """Looks for configuration file and tries to read it. """ @@ -54,7 +60,7 @@ def lookAndReadConfigFile(): # write the dp.json to file with open(directory + os.sep + 'dp.json', 'w') as f_json: - f_json.write(dp.to_json()) + f_json.write(to_json(dp) + '\n') print(">>> json file written to directory") # push to pandas diff --git a/dpkg/names.py b/dpkg/names.py new file mode 100644 index 0000000..fff1ed6 --- /dev/null +++ b/dpkg/names.py @@ -0,0 +1,3 @@ +OBJECTS_TABLE_NAME = "objects_table" +LINKS_TABLE_NAME = "links_table" +TRACKS_TABLE_NAME = "tracks_table" diff --git a/dpkg/pushtopandas.py b/dpkg/pushtopandas.py index 0d4082b..e00886a 100644 --- a/dpkg/pushtopandas.py +++ b/dpkg/pushtopandas.py @@ -14,8 +14,9 @@ def push_to_pandas(directory, object_id_cmso): storage = dp.push_datapackage(descriptor=descr, backend='pandas') print(storage.buckets) - objects = storage['objects___objectstable'] - links = storage['links___linkstable'] + # FIXME: the following is hardwired + objects = storage['objects___objects_table'] + links = storage['links___links_table'] objects.reset_index(inplace=True) print(objects.head()), print(links.head()) diff --git a/examples/CellProfiler/example_1/output/dp/dp.json b/examples/CellProfiler/example_1/output/dp/dp.json index ebc35fb..a0ee785 100644 --- a/examples/CellProfiler/example_1/output/dp/dp.json +++ b/examples/CellProfiler/example_1/output/dp/dp.json @@ -5,7 +5,7 @@ "name": "CMSO_tracks", "resources": [ { - "name": "objectsTable", + "name": "objects_table", "path": "objects.csv", "schema": { "fields": [ @@ -45,7 +45,7 @@ } }, { - "name": "linksTable", + "name": "links_table", "path": "links.csv", "schema": { "fields": [ @@ -70,7 +70,7 @@ "reference": { "datapackage": "", "fields": "ObjectID", - "resource": "objectsTable" + "resource": "objects_table" } } ] diff --git a/examples/CellProfiler/example_2/output/dp/dp.json b/examples/CellProfiler/example_2/output/dp/dp.json index ebc35fb..a0ee785 100644 --- a/examples/CellProfiler/example_2/output/dp/dp.json +++ b/examples/CellProfiler/example_2/output/dp/dp.json @@ -5,7 +5,7 @@ "name": "CMSO_tracks", "resources": [ { - "name": "objectsTable", + "name": "objects_table", "path": "objects.csv", "schema": { "fields": [ @@ -45,7 +45,7 @@ } }, { - "name": "linksTable", + "name": "links_table", "path": "links.csv", "schema": { "fields": [ @@ -70,7 +70,7 @@ "reference": { "datapackage": "", "fields": "ObjectID", - "resource": "objectsTable" + "resource": "objects_table" } } ] diff --git a/examples/ICY/example_1/dp/dp.json b/examples/ICY/example_1/dp/dp.json index 5807070..5049585 100644 --- a/examples/ICY/example_1/dp/dp.json +++ b/examples/ICY/example_1/dp/dp.json @@ -5,7 +5,7 @@ "name": "CMSO_tracks", "resources": [ { - "name": "objectsTable", + "name": "objects_table", "path": "objects.csv", "schema": { "fields": [ @@ -52,7 +52,7 @@ } }, { - "name": "linksTable", + "name": "links_table", "path": "links.csv", "schema": { "fields": [ @@ -77,7 +77,7 @@ "reference": { "datapackage": "", "fields": "OBJECT_ID", - "resource": "objectsTable" + "resource": "objects_table" } } ] diff --git a/examples/ICY/example_2/dp/dp.json b/examples/ICY/example_2/dp/dp.json index 5807070..5049585 100644 --- a/examples/ICY/example_2/dp/dp.json +++ b/examples/ICY/example_2/dp/dp.json @@ -5,7 +5,7 @@ "name": "CMSO_tracks", "resources": [ { - "name": "objectsTable", + "name": "objects_table", "path": "objects.csv", "schema": { "fields": [ @@ -52,7 +52,7 @@ } }, { - "name": "linksTable", + "name": "links_table", "path": "links.csv", "schema": { "fields": [ @@ -77,7 +77,7 @@ "reference": { "datapackage": "", "fields": "OBJECT_ID", - "resource": "objectsTable" + "resource": "objects_table" } } ] diff --git a/examples/TrackMate/example_1/dp/dp.json b/examples/TrackMate/example_1/dp/dp.json index 0f29f45..15eed33 100644 --- a/examples/TrackMate/example_1/dp/dp.json +++ b/examples/TrackMate/example_1/dp/dp.json @@ -5,7 +5,7 @@ "name": "CMSO_tracks", "resources": [ { - "name": "objectsTable", + "name": "objects_table", "path": "objects.csv", "schema": { "fields": [ @@ -45,7 +45,7 @@ } }, { - "name": "linksTable", + "name": "links_table", "path": "links.csv", "schema": { "fields": [ @@ -70,7 +70,7 @@ "reference": { "datapackage": "", "fields": "SPOT_ID", - "resource": "objectsTable" + "resource": "objects_table" } } ] diff --git a/examples/TrackMate/example_2/dp/dp.json b/examples/TrackMate/example_2/dp/dp.json index 0f29f45..15eed33 100644 --- a/examples/TrackMate/example_2/dp/dp.json +++ b/examples/TrackMate/example_2/dp/dp.json @@ -5,7 +5,7 @@ "name": "CMSO_tracks", "resources": [ { - "name": "objectsTable", + "name": "objects_table", "path": "objects.csv", "schema": { "fields": [ @@ -45,7 +45,7 @@ } }, { - "name": "linksTable", + "name": "links_table", "path": "links.csv", "schema": { "fields": [ @@ -70,7 +70,7 @@ "reference": { "datapackage": "", "fields": "SPOT_ID", - "resource": "objectsTable" + "resource": "objects_table" } } ] From 74c8b939c644a2d75100c1bada47b1453fd976f8 Mon Sep 17 00:00:00 2001 From: simleo Date: Mon, 24 Apr 2017 16:24:48 +0100 Subject: [PATCH 4/9] moved dp writer script out of the python package --- dpkg/createdp.py | 2 +- dpkg/dpkg.py => scripts/create_dpkg.py | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) rename dpkg/dpkg.py => scripts/create_dpkg.py (95%) diff --git a/dpkg/createdp.py b/dpkg/createdp.py index 1d41d81..857fbc0 100644 --- a/dpkg/createdp.py +++ b/dpkg/createdp.py @@ -4,7 +4,7 @@ import datapackage as dp from jsontableschema import infer -import names +import dpkg.names as names def create_dpkg(top_level_dict, dict_, directory, joint_id): diff --git a/dpkg/dpkg.py b/scripts/create_dpkg.py similarity index 95% rename from dpkg/dpkg.py rename to scripts/create_dpkg.py index 595bcbf..2bb2c47 100644 --- a/dpkg/dpkg.py +++ b/scripts/create_dpkg.py @@ -1,4 +1,3 @@ -import collections import csv import math import os @@ -8,12 +7,11 @@ import numpy as np import pandas as pd -import configuration -import createdp -import plot -import pushtopandas -import readfile -from configuration import readConfigFile +import dpkg.createdp as createdp +import dpkg.plot as plot +import dpkg.pushtopandas as pushtopandas +import dpkg.readfile as readfile +from dpkg.configuration import readConfigFile # global variable - file name from the command line f = sys.argv[1] From 05a6047668b0c71ceab79a168d94c77f31b550ef Mon Sep 17 00:00:00 2001 From: simleo Date: Mon, 24 Apr 2017 16:41:34 +0100 Subject: [PATCH 5/9] remove writeConfigFile --- README.rst | 10 +------- dpkg/configuration/writeConfigFile.py | 35 --------------------------- 2 files changed, 1 insertion(+), 44 deletions(-) delete mode 100644 dpkg/configuration/writeConfigFile.py diff --git a/README.rst b/README.rst index 0b20edc..04802e7 100644 --- a/README.rst +++ b/README.rst @@ -5,15 +5,7 @@ This Python project aims to create a simple Python package to produce data packa Steps to follow to use the package: -+ **step 1** - modify the parameters in the file *writeConfigFile.py*, go to the directory containing your tracking file and run: - -.. code-block:: python - - python writeConfigFile.py - -this will create the *.ini configuration file* - -This file should look something like this: ++ **step 1** - create a `cell_track_dpkg.ini` configuration file and place it in the same directory as your tracking file. The file must be structured as follows: .. code-block:: diff --git a/dpkg/configuration/writeConfigFile.py b/dpkg/configuration/writeConfigFile.py deleted file mode 100644 index 977319a..0000000 --- a/dpkg/configuration/writeConfigFile.py +++ /dev/null @@ -1,35 +0,0 @@ -# This writes a simple configuration file with two sections -import configparser - -#### TOP LEVEL INFO SECTION ######## -title = 'example-cell-migration-tracking-file' -name = 'tracking-file-track_mate' -author = 'paola masuzzo' -author_email = 'paola.masuzzo@ugent.be' -author_institute = 'VIB' - -#### TRACKING DATA SECTION ######## -x = 'yourX' -y = 'yourY' -z = 'yourZ' -frame = 'yourFrame' -object_id = 'yourObjectID' -link_id = 'yourLinkID' - -config = configparser.ConfigParser() -config['TOP_LEVEL_INFO'] = {'title': title, - 'name': name, - 'author': author, - 'author_email': author_email, - 'author_institute': author_institute} - -config['TRACKING_DATA'] = {'x_coord_cmso': x, - 'y_coord_cmso': y, - 'z_coord_cmso': z, - 'frame_cmso': frame, - 'object_id_cmso': object_id, - 'link_id_cmso': link_id} - - -with open('cell_track_dpkg.ini', 'w') as configfile: - config.write(configfile) From 91841d9b7fc378c767d35024fc77991eb7f919ab Mon Sep 17 00:00:00 2001 From: simleo Date: Mon, 24 Apr 2017 16:57:00 +0100 Subject: [PATCH 6/9] add column header refs to the names module --- dpkg/names.py | 7 +++++++ dpkg/readfile.py | 6 ++++-- scripts/create_dpkg.py | 11 ++++++----- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/dpkg/names.py b/dpkg/names.py index fff1ed6..fe40c9f 100644 --- a/dpkg/names.py +++ b/dpkg/names.py @@ -1,3 +1,10 @@ OBJECTS_TABLE_NAME = "objects_table" LINKS_TABLE_NAME = "links_table" TRACKS_TABLE_NAME = "tracks_table" + +X_COORD_NAME = "x_coord_cmso" +Y_COORD_NAME = "y_coord_cmso" +Z_COORD_NAME = "z_coord_cmso" +FRAME_NAME = "frame_cmso" +OBJECT_NAME = "object_id_cmso" +LINK_NAME = "link_id_cmso" diff --git a/dpkg/readfile.py b/dpkg/readfile.py index 67d592a..1268c9d 100644 --- a/dpkg/readfile.py +++ b/dpkg/readfile.py @@ -7,6 +7,8 @@ import xlrd from xlrd import XLRDError +import dpkg.names as names + def xls_to_csv(xls_file): """Utility function to read Excel files.""" @@ -248,8 +250,8 @@ def read_cellprofiler(cp_file, track_dict): cp_df = pd.read_csv(cp_file) # dictionary for the objects objects_dict = {} - x = track_dict.get('x_coord_cmso') - y = track_dict.get('y_coord_cmso') + x = track_dict.get(names.X_COORD_NAME) + y = track_dict.get(names.Y_COORD_NAME) # parse the digits used for the tracking settings (e.g. 15) digits = x.split('_')[2] # sort the dataframe by [track_id, ImageNumber] diff --git a/scripts/create_dpkg.py b/scripts/create_dpkg.py index 2bb2c47..fb774b3 100644 --- a/scripts/create_dpkg.py +++ b/scripts/create_dpkg.py @@ -11,6 +11,7 @@ import dpkg.plot as plot import dpkg.pushtopandas as pushtopandas import dpkg.readfile as readfile +import dpkg.names as names from dpkg.configuration import readConfigFile # global variable - file name from the command line @@ -35,8 +36,8 @@ def lookAndReadConfigFile(): config_dict = lookAndReadConfigFile() top_level_dict = config_dict.get('TOP_LEVEL_INFO') track_dict = config_dict.get('TRACKING_DATA') -joint_id = track_dict.get('object_id_cmso') -link_id = track_dict.get('link_id_cmso') +joint_id = track_dict.get(names.OBJECT_NAME) +link_id = track_dict.get(names.LINK_NAME) # read file - returns a dictionary with objects and links dict_ = readfile.read_file(f, track_dict) @@ -77,9 +78,9 @@ def lookAndReadConfigFile(): # aggregation of tracks as well for further analytics objects_links_tracks = pd.merge(objects_links, tracks, how='outer', on=link_id) -x = track_dict.get('x_coord_cmso') -y = track_dict.get('y_coord_cmso') -frame = track_dict.get('frame_cmso') +x = track_dict.get(names.X_COORD_NAME) +y = track_dict.get(names.Y_COORD_NAME) +frame = track_dict.get(names.FRAME_NAME) # basic visualizations try: plot.prepareforplot(objects_links_tracks, x, y, frame) From 0a69a1d58548a4d2f918d222badc5cad38de5fff Mon Sep 17 00:00:00 2001 From: simleo Date: Tue, 25 Apr 2017 12:12:26 +0100 Subject: [PATCH 7/9] read_cellprofiler: fix some hardwired values; use centralized names --- dpkg/readfile.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dpkg/readfile.py b/dpkg/readfile.py index 1268c9d..2e69940 100644 --- a/dpkg/readfile.py +++ b/dpkg/readfile.py @@ -252,21 +252,23 @@ def read_cellprofiler(cp_file, track_dict): objects_dict = {} x = track_dict.get(names.X_COORD_NAME) y = track_dict.get(names.Y_COORD_NAME) + frame = track_dict.get(names.FRAME_NAME) + obj_id = track_dict.get(names.OBJECT_NAME) # parse the digits used for the tracking settings (e.g. 15) digits = x.split('_')[2] - # sort the dataframe by [track_id, ImageNumber] + # sort the dataframe by [track_id, frame] track_id = 'TrackObjects_Label_' + digits - cp_df = cp_df.sort_values([track_id, 'ImageNumber']) + cp_df = cp_df.sort_values([track_id, frame]) parent_obj_id = 'TrackObjects_ParentObjectNumber_' + digits parent_img_id = 'TrackObjects_ParentImageNumber_' + digits # create new Object identifiers cp_df.reset_index(inplace = True) for index, row in cp_df.iterrows(): - objects_dict[index] = [row.ImageNumber, row[x], row[y]] + objects_dict[index] = [row[frame], row[x], row[y]] objects_df = pd.DataFrame([[key, value[0], value[1], value[2]] for key, value in objects_dict.items()], columns= - ["ObjectID", "ImageNumber", x, y]) + [obj_id, frame, x, y]) # dictionary for the links links_dict = {} @@ -286,7 +288,7 @@ def read_cellprofiler(cp_file, track_dict): parentObject = row[parent_obj_id] for j, r in tmp.iterrows(): - if (r.ObjectNumber == parentObject) and (r.ImageNumber == parentImage): + if (r.ObjectNumber == parentObject) and (r[frame] == parentImage): unique_parent_object = j break @@ -308,7 +310,7 @@ def read_cellprofiler(cp_file, track_dict): for key, value in links_dict.items(): for object_ in value: links_df = links_df.append([[key, object_]]) - links_df.columns = ['LINK_ID', 'ObjectID'] + links_df.columns = [track_dict.get(names.LINK_NAME), obj_id] return (objects_df, links_df) From fb2796409f4a063ebb709075ea093d507b9f8228 Mon Sep 17 00:00:00 2001 From: simleo Date: Wed, 26 Apr 2017 17:23:38 +0100 Subject: [PATCH 8/9] pushtopandas: build bucket names from resource names/paths --- dpkg/pushtopandas.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dpkg/pushtopandas.py b/dpkg/pushtopandas.py index e00886a..d91f2ee 100644 --- a/dpkg/pushtopandas.py +++ b/dpkg/pushtopandas.py @@ -1,8 +1,14 @@ -# import needed libraries import os import datapackage as dp +try: + from datapackage.mappers import convert_path # datapackage version 0.x +except ImportError: + from datapackage.pushpull import _convert_path as convert_path import pandas as pd +import dpkg.names as names + + def push_to_pandas(directory, object_id_cmso): """Push the datapackage to a pandas storage. @@ -14,9 +20,8 @@ def push_to_pandas(directory, object_id_cmso): storage = dp.push_datapackage(descriptor=descr, backend='pandas') print(storage.buckets) - # FIXME: the following is hardwired - objects = storage['objects___objects_table'] - links = storage['links___links_table'] + objects = storage[convert_path("objects.csv", names.OBJECTS_TABLE_NAME)] + links = storage[convert_path("links.csv", names.LINKS_TABLE_NAME)] objects.reset_index(inplace=True) print(objects.head()), print(links.head()) From 5f6e39307a14458247d1614bfd536d0f4ae31ff6 Mon Sep 17 00:00:00 2001 From: simleo Date: Thu, 27 Apr 2017 09:48:31 +0100 Subject: [PATCH 9/9] update README --- README.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 04802e7..c955e14 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,13 @@ This Python project aims to create a simple Python package to produce data packa Steps to follow to use the package: -+ **step 1** - create a `cell_track_dpkg.ini` configuration file and place it in the same directory as your tracking file. The file must be structured as follows: ++ **step 1** - Install the package (note it's Python 3 only at the moment): + +.. code-block:: + + python setup.py install + ++ **step 2** - create a ``cell_track_dpkg.ini`` configuration file and place it in the same directory as your tracking file. The file must be structured as follows: .. code-block:: @@ -25,11 +31,11 @@ Steps to follow to use the package: link_id_cmso = the link identifier -+ **step 2** - run: ++ **step 3** - move to the ``scripts`` directory and run: .. code-block:: python - python dpkg.py your_tracking_file + python create_dpkg.py your_tracking_file this will create a **dp** directory containing: