Fix implicit ordering of columns in server base

Previously, just calling .stack() would reorder the second level values and then be reassigned the order those values/tags were provided in in the original multi level column dataframe. Leading to the wrong features names being assigned to the wrong data
equinor · Aug 2, 2019 · e49340d · e49340d
1 parent 17cdd4b
commit e49340d
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 11 deletions.
diff --git a/gordo_components/server/views/base.py b/gordo_components/server/views/base.py
@@ -320,8 +320,18 @@ def multi_lvl_column_dataframe_to_dict(df: pd.DataFrame) -> typing.List[dict]:
             # Stack the dataframe so second level column names become second level indexs
             df.stack()
             # For each column now, unstack the previous second level names (which are now the indexes of the series)
-            # back into a dataframe with those names, and convert to list
-            .apply(lambda col: col.unstack().dropna(axis=1).values.tolist())
+            # back into a dataframe with those names, and convert to list; if it's a Series we'll need to reshape it
+            .apply(
+                lambda col: col.reindex(df[col.name].columns, level=1)
+                .unstack()
+                .dropna(axis=1)
+                .values.tolist()
+                if isinstance(df[col.name], pd.DataFrame)
+                else col.unstack()
+                .rename(columns={"": col.name})[col.name]
+                .values.reshape(-1, 1)
+                .tolist()
+            )
         )
 
         results: typing.List[dict] = []

diff --git a/tests/gordo_components/client/test_client.py b/tests/gordo_components/client/test_client.py
@@ -412,21 +412,22 @@ def _endpoint_metadata(name: str, healthy: bool) -> EndpointMetadata:
     )
 
 
-def test_ml_server_dataframe_to_dict_and_back():
+@pytest.mark.parametrize("tags", [["C", "A", "B", "D"], tu.SENSORS_STR_LIST])
+def test_ml_server_dataframe_to_dict_and_back(tags: typing.List[str]):
     """
     Tests the flow of the server creating a dataframe from the model's data, putting into
-    a dict of string to lists of values, and the client being able to reconstruct it back
+    a dict of string to df. lists of values, and the client being able to reconstruct it back
     to the original dataframe (less the second level names)
     """
     # Some synthetic data
-    original_input = np.random.random((10, len(tu.SENSORTAG_LIST)))
-    model_output = np.random.random((10, len(tu.SENSORTAG_LIST)))
-    transformed_model_input = np.random.random((10, len(tu.SENSORTAG_LIST)))
-    inverse_transformed_model_output = np.random.random((10, len(tu.SENSORTAG_LIST)))
+    original_input = np.random.random((10, len(tags)))
+    model_output = np.random.random((10, len(tags)))
+    transformed_model_input = np.random.random((10, len(tags)))
+    inverse_transformed_model_output = np.random.random((10, len(tags)))
 
     # Convert this data into a dataframe with multi index columns
     df = BaseModelView.make_base_dataframe(
-        tu.SENSORTAG_LIST,
+        tags,
         original_input,
         model_output,
         transformed_model_input,

diff --git a/tests/gordo_components/server/test_anomaly_view.py b/tests/gordo_components/server/test_anomaly_view.py
@@ -49,13 +49,13 @@ def test_anomaly_prediction_endpoint(
     # start and end dates, because the server can't know what those are
     assert "start" in record
     assert (
-        len(record["start"]) == 0
+        record["start"][0] is None
         if data_to_post is not None
         else isinstance(record["start"][0], str)
     )
     assert "end" in record
     assert (
-        len(record["end"]) == 0
+        record["end"][0] is None
         if data_to_post is not None
         else isinstance(record["end"][0], str)
     )