CellMigStandOrg · sbesson · Apr 27, 2017 · Apr 21, 2017 · Apr 24, 2017 · Apr 24, 2017
diff --git a/README.rst b/README.rst
@@ -5,15 +5,13 @@ This Python project aims to create a simple Python package to produce data packa
 
 Steps to follow to use the package:
 
-+ **step 1** - modify the parameters in the file *writeConfigFile.py*, go to the directory containing your tracking file and run:
++ **step 1** - Install the package (note it's Python 3 only at the moment):
 
-.. code-block:: python
-
-  python writeConfigFile.py
+.. code-block::
 
-this will create the *.ini configuration file*
+   python setup.py install
 
-This file should look something like this:
++ **step 2** - create a ``cell_track_dpkg.ini`` configuration file and place it in the same directory as your tracking file. The file must be structured as follows:
 
 .. code-block::
 
@@ -33,11 +31,11 @@ This file should look something like this:
   link_id_cmso = the link identifier
 
 
-+  **step 2** - run:
++  **step 3** - move to the ``scripts`` directory and run:
 
 .. code-block:: python
 
-  python dpkg.py your_tracking_file
+  python create_dpkg.py your_tracking_file
 
 this will create a **dp** directory containing:
 
@@ -52,7 +50,7 @@ This last file will look something like this:
 
   {
       "resources": [{
-          "name": "objectsTable",
+          "name": "objects_table",
           "schema": {
               "primaryKey": "SPOT_ID",
               "fields": [{
@@ -86,12 +84,12 @@ This last file will look something like this:
           },
           "path": "objects.csv"
       }, {
-          "name": "linksTable",
+          "name": "links_table",
           "schema": {
               "foreignKeys": [{
                   "fields": "SPOT_ID",
                   "reference": {
-                      "resource": "objectsTable",
+                      "resource": "objects_table",
                       "fields": "SPOT_ID",
                       "datapackage": ""
                   }

diff --git a/dpkg/configuration/writeConfigFile.py b/dpkg/configuration/writeConfigFile.py
diff --git a/dpkg/createdp.py b/dpkg/createdp.py
@@ -1,13 +1,10 @@
-# import needed libraries
-import collections
 import csv
 import io
 import os
-from collections import defaultdict
 
 import datapackage as dp
-import jsontableschema
 from jsontableschema import infer
+import dpkg.names as names
 
 
 def create_dpkg(top_level_dict, dict_, directory, joint_id):
@@ -29,25 +26,22 @@ def create_dpkg(top_level_dict, dict_, directory, joint_id):
 
     # the objects block #
     key = 'objects'
-    objects_table = dict_.get(key)
     path = key + '.csv'
     with io.open(directory + os.sep + key + '.csv') as stream:
         headers = stream.readline().rstrip('\n').split(',')
         values = csv.reader(stream)
         schema = infer(headers, values, row_limit=50,
                        primary_key=joint_id)
-        referenced_resource = key + 'Table'
 
     myDP.descriptor['resources'].append(
-        {"name": key + 'Table',
+        {"name": names.OBJECTS_TABLE_NAME,
          "path": path,
          "schema": schema,
          }
     )
 
     # the links block #
     key = 'links'
-    links_table = dict_.get(key)
     path = key + '.csv'
     with io.open(directory + os.sep + key + '.csv') as stream:
         headers = stream.readline().rstrip('\n').split(',')
@@ -57,13 +51,13 @@ def create_dpkg(top_level_dict, dict_, directory, joint_id):
             "fields": joint_id,
             "reference": {
                 "datapackage": "",
-                "resource": referenced_resource,
+                "resource": names.OBJECTS_TABLE_NAME,
                 "fields": joint_id
             }
         }]
 
     myDP.descriptor['resources'].append(
-        {"name": key + 'Table',
+        {"name": names.LINKS_TABLE_NAME,
          "path": path,
          "schema": schema,
          }

diff --git a/dpkg/names.py b/dpkg/names.py
@@ -0,0 +1,10 @@
+OBJECTS_TABLE_NAME = "objects_table"
+LINKS_TABLE_NAME = "links_table"
+TRACKS_TABLE_NAME = "tracks_table"
+
+X_COORD_NAME = "x_coord_cmso"
+Y_COORD_NAME = "y_coord_cmso"
+Z_COORD_NAME = "z_coord_cmso"
+FRAME_NAME = "frame_cmso"
+OBJECT_NAME = "object_id_cmso"
+LINK_NAME = "link_id_cmso"
diff --git a/dpkg/pushtopandas.py b/dpkg/pushtopandas.py
@@ -1,8 +1,14 @@
-# import needed libraries
 import os
 import datapackage as dp
+try:
+    from datapackage.mappers import convert_path  # datapackage version 0.x
+except ImportError:
+    from datapackage.pushpull import _convert_path as convert_path
 import pandas as pd
 
+import dpkg.names as names
+
+
 def push_to_pandas(directory, object_id_cmso):
     """Push the datapackage to a pandas storage.
 
@@ -14,8 +20,8 @@ def push_to_pandas(directory, object_id_cmso):
     storage = dp.push_datapackage(descriptor=descr, backend='pandas')
     print(storage.buckets)
 
-    objects = storage['objects___objectstable']
-    links = storage['links___linkstable']
+    objects = storage[convert_path("objects.csv", names.OBJECTS_TABLE_NAME)]
+    links = storage[convert_path("links.csv", names.LINKS_TABLE_NAME)]
 
     objects.reset_index(inplace=True)
     print(objects.head()), print(links.head())

diff --git a/dpkg/readfile.py b/dpkg/readfile.py
@@ -7,6 +7,8 @@
 import xlrd
 from xlrd import XLRDError
 
+import dpkg.names as names
+
 
 def xls_to_csv(xls_file):
     """Utility function to read Excel files."""
@@ -248,23 +250,25 @@ def read_cellprofiler(cp_file, track_dict):
     cp_df = pd.read_csv(cp_file)
     # dictionary for the objects
     objects_dict = {}
-    x = track_dict.get('x_coord_cmso')
-    y = track_dict.get('y_coord_cmso')
+    x = track_dict.get(names.X_COORD_NAME)
+    y = track_dict.get(names.Y_COORD_NAME)
+    frame = track_dict.get(names.FRAME_NAME)
+    obj_id = track_dict.get(names.OBJECT_NAME)
     # parse the digits used for the tracking settings (e.g. 15)
     digits = x.split('_')[2]
-    # sort the dataframe by [track_id, ImageNumber]
+    # sort the dataframe by [track_id, frame]
     track_id = 'TrackObjects_Label_' + digits
-    cp_df = cp_df.sort_values([track_id, 'ImageNumber'])
+    cp_df = cp_df.sort_values([track_id, frame])
 
     parent_obj_id = 'TrackObjects_ParentObjectNumber_' + digits
     parent_img_id = 'TrackObjects_ParentImageNumber_' + digits
     # create new Object identifiers
     cp_df.reset_index(inplace = True)
     for index, row in cp_df.iterrows():
-        objects_dict[index] = [row.ImageNumber, row[x], row[y]]
+        objects_dict[index] = [row[frame], row[x], row[y]]
 
     objects_df = pd.DataFrame([[key, value[0], value[1], value[2]] for key, value in objects_dict.items()], columns=
-                              ["ObjectID", "ImageNumber", x, y])
+                              [obj_id, frame, x, y])
 
     # dictionary for the links
     links_dict = {}
@@ -284,7 +288,7 @@ def read_cellprofiler(cp_file, track_dict):
                 parentObject = row[parent_obj_id]
 
                 for j, r in tmp.iterrows():
-                    if (r.ObjectNumber == parentObject) and (r.ImageNumber == parentImage):
+                    if (r.ObjectNumber == parentObject) and (r[frame] == parentImage):
                         unique_parent_object = j
                         break
 
@@ -306,7 +310,7 @@ def read_cellprofiler(cp_file, track_dict):
     for key, value in links_dict.items():
         for object_ in value:
             links_df = links_df.append([[key, object_]])
-    links_df.columns = ['LINK_ID', 'ObjectID']
+    links_df.columns = [track_dict.get(names.LINK_NAME), obj_id]
 
     return (objects_df, links_df)
 

diff --git a/examples/CellProfiler/example_1/output/dp/dp.json b/examples/CellProfiler/example_1/output/dp/dp.json
@@ -1 +1,81 @@
-{"resources": [{"path": "objects.csv", "name": "objectsTable", "schema": {"primaryKey": "ObjectID", "fields": [{"format": "default", "constraints": {"unique": true}, "description": "", "title": "", "name": "ObjectID", "type": "integer"}, {"name": "ImageNumber", "format": "default", "description": "", "title": "", "type": "number"}, {"name": "TrackObjects_TrajectoryX_15", "format": "default", "description": "", "title": "", "type": "number"}, {"name": "TrackObjects_TrajectoryY_15", "format": "default", "description": "", "title": "", "type": "number"}]}}, {"path": "links.csv", "name": "linksTable", "schema": {"foreignKeys": [{"reference": {"datapackage": "", "resource": "objectsTable", "fields": "ObjectID"}, "fields": "ObjectID"}], "fields": [{"name": "LINK_ID", "format": "default", "description": "", "title": "", "type": "integer"}, {"name": "ObjectID", "format": "default", "description": "", "title": "", "type": "integer"}]}}], "author_email": "[email protected]", "title": "A CMSO data package representation of cell tracking data", "author_institute": "VIB", "author": "paola masuzzo", "name": "CMSO_tracks"}
+{
+    "author": "paola masuzzo",
+    "author_email": "[email protected]",
+    "author_institute": "VIB",
+    "name": "CMSO_tracks",
+    "resources": [
+        {
+            "name": "objects_table",
+            "path": "objects.csv",
+            "schema": {
+                "fields": [
+                    {
+                        "constraints": {
+                            "unique": true
+                        },
+                        "description": "",
+                        "format": "default",
+                        "name": "ObjectID",
+                        "title": "",
+                        "type": "integer"
+                    },
+                    {
+                        "description": "",
+                        "format": "default",
+                        "name": "ImageNumber",
+                        "title": "",
+                        "type": "number"
+                    },
+                    {
+                        "description": "",
+                        "format": "default",
+                        "name": "TrackObjects_TrajectoryX_15",
+                        "title": "",
+                        "type": "number"
+                    },
+                    {
+                        "description": "",
+                        "format": "default",
+                        "name": "TrackObjects_TrajectoryY_15",
+                        "title": "",
+                        "type": "number"
+                    }
+                ],
+                "primaryKey": "ObjectID"
+            }
+        },
+        {
+            "name": "links_table",
+            "path": "links.csv",
+            "schema": {
+                "fields": [
+                    {
+                        "description": "",
+                        "format": "default",
+                        "name": "LINK_ID",
+                        "title": "",
+                        "type": "integer"
+                    },
+                    {
+                        "description": "",
+                        "format": "default",
+                        "name": "ObjectID",
+                        "title": "",
+                        "type": "integer"
+                    }
+                ],
+                "foreignKeys": [
+                    {
+                        "fields": "ObjectID",
+                        "reference": {
+                            "datapackage": "",
+                            "fields": "ObjectID",
+                            "resource": "objects_table"
+                        }
+                    }
+                ]
+            }
+        }
+    ],
+    "title": "A CMSO data package representation of cell tracking data"
+}