0.0.5

keomabrun · Apr 27, 2018 · 9b981c4 · 9b981c4
1 parent 63bfee7
commit 9b981c4
Show file tree

Hide file tree

Showing 7 changed files with 29,938 additions and 42,782 deletions.
diff --git a/README.md b/README.md
@@ -17,16 +17,26 @@ CCBY: Cassette by Alvaro Cabrera from the Noun Project
 python -m k7.k7 --check myfile.k7
 ```
 
+### File file with missing lines
+
+A k7 file should contain i
+
+```
+python -m k7.k7 --fill myfile.k7
+```
+
+This will create a fille called `fill_myfile.k7` that contains the missing lines.
+
 # k7 format
 
 ```
-{"location": "grenoble", "tx_length": 100, "start_date": "2018-01-11 16:32:22", "stop_date": "2018-01-13 16:21:30", "tx_count": 100, "node_count": 44, "channel_count": 16, "transaction_count": 10, "tx_ifdur": 100}
-datetime,src,dst,channels,mean_rssi,pdr,transaction_id
-2018-01-11 16:33:07,05-43-32-ff-03-d9-a5-68,05-43-32-ff-02-d5-25-53,11,-53.2,1.0,0
-2018-01-11 16:33:07,05-43-32-ff-03-d9-a5-68,05-43-32-ff-03-db-b2-76,11,-84.03,0.97,0
-2018-01-11 16:33:07,05-43-32-ff-03-d9-a5-68,05-43-32-ff-03-d7-93-78,11,-83.88,1.0,0
-2018-01-11 16:33:30,05-43-32-ff-03-d6-95-84,05-43-32-ff-03-da-a9-72,11,-67.03,1.0,0
-2018-01-11 16:33:30,05-43-32-ff-03-d6-95-84,05-43-32-ff-03-da-c0-81,11,-70.0,1.0,0
+{"location": "grenoble", "tx_length": 100, "start_date": "2018-01-11 16:32:22", "stop_date": "2018-01-13 16:21:30", "node_count": 44, "channel_count": 16, "transaction_count": 10, "tx_ifdur": 100}
+datetime,src,dst,channels,mean_rssi,pdr,tx_count,transaction_id
+2018-01-11 16:33:07,05-43-32-ff-03-d9-a5-68,05-43-32-ff-02-d5-25-53,[11],-53.2,1.0,0
+2018-01-11 16:33:07,05-43-32-ff-03-d9-a5-68,05-43-32-ff-03-db-b2-76,[11],-84.03,0.97,0
+2018-01-11 16:33:07,05-43-32-ff-03-d9-a5-68,05-43-32-ff-03-d7-93-78,[11],-83.88,1.0,0
+2018-01-11 16:33:30,05-43-32-ff-03-d6-95-84,05-43-32-ff-03-da-a9-72,[11],-67.03,1.0,0
+2018-01-11 16:33:30,05-43-32-ff-03-d6-95-84,05-43-32-ff-03-da-c0-81,[11],-70.0,1.0,0
 ...
 ```
 
@@ -39,23 +49,26 @@ Ex:
 ```
 
 ## Data
-| datetime            | src         | dst         | channels | mean_rssi | pdr         | transaction_id |
-|---------------------|-------------|-------------|----------|-----------|-------------|----------------|
-|  iso8601 string     | string      | string      | string   | float     | float (0-1) | int            |
+| datetime            | src         | dst         | channels | mean_rssi | pdr         | tx_count | transaction_id |
+|---------------------|-------------|-------------|----------|-----------|-------------|----------|----------------|
+|  iso8601 string     | string      | string      | list     | float     | float (0-1) | int      | int            |
 
 ### Standard example:
 
-| datetime            | src         | dst         | channels | mean_rssi | pdr  | transaction_id |
-|---------------------|-------------|-------------|----------|-----------|------|----------------|
-| 2017-12-19 21:35:41 | d7-94-75    | d7-94-79    | 11       | -74.5     | 1.0  | 1              |
+| datetime            | src         | dst         | channels | mean_rssi | pdr  | tx_count | transaction_id |
+|---------------------|-------------|-------------|----------|-----------|------|----------|----------------|
+| 2017-12-19 21:35:41 | d7-94-75    | d7-94-79    | [11]     | -74.5     | 1.0  | int      | 1              |
 
 ### The source or destination can be empty (i.e when measured on all the neighbors of the src):
 
-| datetime            | src         | dst         | channel  | mean_rssi | pdr  | transaction_id |
-|---------------------|-------------|-------------|----------|-----------|------|----------------|
-| 2017-12-19 21:35:41 | d7-94-75    | d7-94-79    | 11       | -74.5     | 0.7  | 1              |
+| datetime            | src         | dst         | channels | mean_rssi | pdr  | tx_count | transaction_id |
+|---------------------|-------------|-------------|----------|-----------|------|----------|----------------|
+| 2017-12-19 21:35:41 | d7-94-75    | d7-94-79    | [11]     | -74.5     | 0.7  | int      | 1              |
+
+### Multiple channels:
+
+| datetime            | src         | dst         | channels | mean_rssi | pdr  | tx_count | transaction_id |
+|---------------------|-------------|-------------|----------|-----------|------|----------|----------------|
+| 2017-12-19 21:35:41 | d7-94-75    | d7-94-79    | [11;12]  | -79.5     | 1.0  | int      | 2              |
 
-### The channel can be a range:
-| datetime            | src         | dst         | channels | mean_rssi | pdr  | transaction_id |
-|---------------------|-------------|-------------|----------|-----------|------|----------------|
-| 2017-12-19 21:35:41 | d7-94-75    | d7-94-79    | 11-25    | -79.5     | 1.0  | 2              |
+When the channel list contains more than one element, it means that the PDR and RSSI value are calculated (averaged) over multiple channels.
diff --git a/k7/k7.py b/k7/k7.py
@@ -11,17 +11,17 @@
     'stop_date',
     'location',
 ]
-REQUIRED_DATA_FIELDS = [
+REQUIRED_DATA_FIELDS = (
     'datetime',
     'src',
     'dst',
     'channels',
     'mean_rssi',
     'pdr',
     'transaction_id'
-]
+)
 
-__version__ = "0.0.4"
+__version__ = "0.0.5"
 
 def read(file_path):
     """
@@ -38,15 +38,15 @@ def read(file_path):
         with open(file_path, 'r') as f:
             header = json.loads(f.readline())
     else:
-        raise Exception("Suported file extensions are: {0}".format(["k7.gz", "k7"]))
+        raise Exception("Supported file extensions are: {0}".format(["k7.gz", "k7"]))
 
     # read data
-    df = pd.read_csv(file_path,
-                     parse_dates = ['datetime'],
-                     index_col = [0],  # make datetime column as index
-                     dtype={'channels': str},
-                     skiprows=1,
-                     )
+    df = pd.read_csv(
+        file_path,
+        parse_dates = ['datetime'],
+        index_col = [0],  # make datetime column as index
+        skiprows = 1
+    )
     return header, df
 
 def write(output_file_path, header, data):
@@ -55,8 +55,13 @@ def write(output_file_path, header, data):
     :param output_file_path:
     :param dict header:
     :param pandas.Dataframe data:
-    :return:
+    :return: None
     """
+
+    # convert channel list to string
+    #data.channels = data.channels.apply(lambda x: channel_list_to_str(x))
+
+    # write to file
     with open(output_file_path, 'w') as f:
         # write header
         json.dump(header, f)
@@ -65,45 +70,83 @@ def write(output_file_path, header, data):
         # write data
         data.to_csv(f)
 
-def match(trace, source, destination, channel=None, transaction_id=None):
+def match(trace, source, destination, channels=None, transaction_id=None):
     """
     Find matching rows in the k7
     :param pandas.Dataframe trace:
     :param str source:
     :param str destination:
-    :param int channel:
+    :param list channels:
     :param int transaction_id:
     :return: None | pandas.core.series.Series
     """
 
     # channel
-    if channel is None:
-        channels = "11-26"
-    else:
-        channels = str(channel + 11)
+    if channels is None:
+        channels = [c for c in range(11, 27)]
 
     # transaction id
     if transaction_id is None:
         transaction_id = trace.transaction_id.min()
 
     # get rows
-    series = trace[(trace.src == source) &
-                   (trace.dst == destination) &
-                   (trace.channels == channels) &
-                   (trace.transaction_id == transaction_id)
-            ]
+    series = trace[
+        (trace.src == source) &
+        (trace.dst == destination) &
+        (trace.channels == channel_list_to_str(channels)) &
+        (trace.transaction_id == transaction_id)
+    ]
 
     if len(series) >= 1:
         return series.iloc[0]  # return first element
     else:
         return None
-    # elif len(series) > 1:
-    #     log.warn("Multiple occurrences found for same transaction ID.")
+
+def fill(file_path):
+    """
+    Add lines with PDR and RSSI for each link if missing
+    :return: None
+    """
+
+    header, df = read(file_path)
+
+    missing_rows = []
+    for name, group in df.groupby(["src", "dst", "transaction_id"]):
+        first_date = group.index.min()
+        src, dst, t_id = name[0], name[1], name[2]
+
+        # find missing channels in group
+        missing_channels = get_missing_channels(header['channels'], group)
+
+        # add missing row to list
+        for c in missing_channels:
+            missing_rows.append(
+                {
+                    "datetime": first_date,
+                    "src": src,
+                    "dst": dst,
+                    "channels": [c],
+                    "mean_rssi": None,
+                    "pdr": 0,
+                    "transaction_id": t_id,
+                }
+            )
+
+    if missing_rows:
+        # convert missing rows into dataframe
+        df_missing = pd.DataFrame(missing_rows)
+        df_missing.set_index("datetime", inplace=True)
+
+        # Merge Dataframes
+        df_result = pd.concat([df, df_missing])
+        df_result.sort_index(inplace=True)
+
+        write("filled_" + file_path, header, df_result)
 
 def check(file_path):
     """
     Check if k7 format is respected
-    :return:
+    :return: None
     """
 
     header, df = read(file_path)
@@ -112,6 +155,42 @@ def check(file_path):
         if required_header not in header:
             print "Header {0} missing".format(required_header)
 
+    max_num_links = sum([i for i in range(1, header['node_count'] + 1)])
+    lines_per_transaction = len(header['channels']) * max_num_links
+    for name, group in df.groupby(["src", "dst", "transaction_id"]):
+        # find missing channels in group
+        missing_channels = get_missing_channels(header['channels'], group)
+        if missing_channels:
+            print "Channel missing for transaction {0}: {1}"\
+                  .format(name, missing_channels)
+
+# ========================= helpers ===========================================
+
+def get_missing_channels(required_channels, data):
+    """ Find missing channels in a dataframe
+    :param list required_channels:
+    :param pandas.Dataframe data:
+    :rtype: list
+    """
+    channels = []
+    for channel_str in data.channels:
+        channel_list = [int(c) for c in channel_str.strip("[]").split(';')]
+        for channel in channel_list:
+            if channel not in channels:
+                channels.append(channel)
+    return list(set(required_channels) - set(channels))
+
+def channel_list_to_str(channel_list):
+    """
+    [11,12,13] --> "[11;12;13]"
+    :param list channel_list:
+    :return:
+    :rtype: str
+    """
+    return '[' + ';'.join([str(c) for c in channel_list]) + ']'
+
+# ========================= main ==============================================
+
 if __name__ == "__main__":
     import argparse
 
@@ -122,13 +201,20 @@ def check(file_path):
                         type=str,
                         dest='file_to_check',
                         )
+    parser.add_argument("--fill",
+                        help="add missing rows",
+                        type=str,
+                        dest='file_to_fill',
+                        )
     parser.add_argument('-v', '--version',
                         action='version',
                         version='%(prog)s ' + __version__)
     args = parser.parse_args()
 
     # run corresponding method
-    if hasattr(args, "file_to_check"):
+    if args.file_to_check is not None:
         check(args.file_to_check)
+    elif args.file_to_fill is not None:
+        fill(args.file_to_fill)
     else:
         print "Command {0} does not exits."