Skip to content

Commit 6741d61

Browse files
committed
used black to reformat
1 parent 4093c50 commit 6741d61

19 files changed

+1147
-660
lines changed

diff_viz/data_loading.py

+84-53
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,25 @@
44

55
import pandas as pd
66
import numpy as np
7-
#import diff_predictor
7+
8+
# import diff_predictor
9+
810

911
def check_mpt_data(df, expected_columns):
1012
"""
1113
Checks that a pandas DataFrame has at least one row of data and contains specific columns.
12-
14+
1315
Parameters
1416
-----------
1517
df : pandas.DataFrame
1618
The DataFrame to check.
1719
expected_columns : list
1820
A list of column names that the DataFrame is expected to have.
19-
21+
2022
Returns
2123
--------
2224
columns_present, has_data: bool
23-
True if the DataFrame contains at least one row of data and all of the expected columns,
25+
True if the DataFrame contains at least one row of data and all of the expected columns,
2426
False otherwise.
2527
"""
2628
# Check that all of the expected columns are present
@@ -31,82 +33,108 @@ def check_mpt_data(df, expected_columns):
3133
# Return True if both the expected columns and data are present
3234
return columns_present and has_data
3335

34-
def clean_mpt_data(df, features_to_keep='default', target_column=None):
36+
37+
def clean_mpt_data(df, features_to_keep="default", target_column=None):
3538
"""
3639
Cleans a pandas DataFrame containing MPT data.
37-
40+
3841
Parameters
3942
-----------
4043
df : pandas.DataFrame
4144
The DataFrame to clean.
42-
45+
4346
Returns
4447
--------
4548
df: pandas.DataFrame
4649
The cleaned DataFrame.
4750
"""
4851

4952
default_feature_list = [
50-
'alpha', # Fitted anomalous diffusion alpha exponenet
51-
'D_fit', # Fitted anomalous diffusion coefficient
52-
'kurtosis', # Kurtosis of track
53-
'asymmetry1', # Asymmetry of trajecory (0 for circular symmetric, 1 for linear)
54-
'asymmetry2', # Ratio of the smaller to larger principal radius of gyration
55-
'asymmetry3', # An asymmetric feature that accnts for non-cylindrically symmetric pt distributions
56-
'AR', # Aspect ratio of long and short side of trajectory's minimum bounding rectangle
57-
'elongation', # Est. of amount of extension of trajectory from centroid
58-
'boundedness', # How much a particle with Deff is restricted by a circular confinement of radius r
59-
'fractal_dim', # Measure of how complicated a self similar figure is
60-
'trappedness', # Probability that a particle with Deff is trapped in a region
61-
'efficiency', # Ratio of squared net displacement to the sum of squared step lengths
62-
'straightness', # Ratio of net displacement to the sum of squared step lengths
63-
'MSD_ratio', # MSD ratio of the track
64-
'Deff1', # Effective diffusion coefficient at 0.33 s
65-
'Deff2', # Effective diffusion coefficient at 3.3 s
66-
'Mean alpha',
67-
'Mean D_fit',
68-
'Mean kurtosis',
69-
'Mean asymmetry1',
70-
'Mean asymmetry2',
71-
'Mean asymmetry3',
72-
'Mean AR',
73-
'Mean elongation',
74-
'Mean boundedness',
75-
'Mean fractal_dim',
76-
'Mean trappedness',
77-
'Mean efficiency',
78-
'Mean straightness',
79-
'Mean MSD_ratio',
80-
'Mean Deff1',
81-
'Mean Deff2',
53+
"alpha", # Fitted anomalous diffusion alpha exponenet
54+
"D_fit", # Fitted anomalous diffusion coefficient
55+
"kurtosis", # Kurtosis of track
56+
"asymmetry1", # Asymmetry of trajecory (0 for circular symmetric, 1 for linear)
57+
"asymmetry2", # Ratio of the smaller to larger principal radius of gyration
58+
"asymmetry3", # An asymmetric feature that accnts for non-cylindrically symmetric pt distributions
59+
"AR", # Aspect ratio of long and short side of trajectory's minimum bounding rectangle
60+
"elongation", # Est. of amount of extension of trajectory from centroid
61+
"boundedness", # How much a particle with Deff is restricted by a circular confinement of radius r
62+
"fractal_dim", # Measure of how complicated a self similar figure is
63+
"trappedness", # Probability that a particle with Deff is trapped in a region
64+
"efficiency", # Ratio of squared net displacement to the sum of squared step lengths
65+
"straightness", # Ratio of net displacement to the sum of squared step lengths
66+
"MSD_ratio", # MSD ratio of the track
67+
"Deff1", # Effective diffusion coefficient at 0.33 s
68+
"Deff2", # Effective diffusion coefficient at 3.3 s
69+
"Mean alpha",
70+
"Mean D_fit",
71+
"Mean kurtosis",
72+
"Mean asymmetry1",
73+
"Mean asymmetry2",
74+
"Mean asymmetry3",
75+
"Mean AR",
76+
"Mean elongation",
77+
"Mean boundedness",
78+
"Mean fractal_dim",
79+
"Mean trappedness",
80+
"Mean efficiency",
81+
"Mean straightness",
82+
"Mean MSD_ratio",
83+
"Mean Deff1",
84+
"Mean Deff2",
8285
]
8386

8487
if target_column is not None:
8588
assert target_column in df.columns, "Target column not in DataFrame"
8689
assert df[target_column].notna().all(), "Target column contains NaN values"
8790

88-
if features_to_keep == 'default' and target_column is None: #user wants all default features
91+
if (
92+
features_to_keep == "default" and target_column is None
93+
): # user wants all default features
8994
df = df[default_feature_list]
90-
df = df[~df[list(set(default_feature_list)-set(['Deff2', 'Mean Deff2']))].isin([np.inf, np.nan, -np.inf]).any(axis=1)]
91-
elif features_to_keep == 'default' and target_column is not None: #user wants all default features and target column
95+
df = df[
96+
~df[list(set(default_feature_list) - set(["Deff2", "Mean Deff2"]))]
97+
.isin([np.inf, np.nan, -np.inf])
98+
.any(axis=1)
99+
]
100+
elif (
101+
features_to_keep == "default" and target_column is not None
102+
): # user wants all default features and target column
92103
df = df[default_feature_list + [target_column]]
93-
df = df[~df[list(set(default_feature_list)-set(['Deff2', 'Mean Deff2']))].isin([np.inf, np.nan, -np.inf]).any(axis=1)]
94-
elif features_to_keep != 'default' and target_column is None: #user wants specific features
104+
df = df[
105+
~df[list(set(default_feature_list) - set(["Deff2", "Mean Deff2"]))]
106+
.isin([np.inf, np.nan, -np.inf])
107+
.any(axis=1)
108+
]
109+
elif (
110+
features_to_keep != "default" and target_column is None
111+
): # user wants specific features
95112
df = df[features_to_keep]
96-
df = df[~df[list(set(features_to_keep)-set(['Deff2', 'Mean Deff2']))].isin([np.inf, np.nan, -np.inf]).any(axis=1)]
113+
df = df[
114+
~df[list(set(features_to_keep) - set(["Deff2", "Mean Deff2"]))]
115+
.isin([np.inf, np.nan, -np.inf])
116+
.any(axis=1)
117+
]
97118
else:
98-
df = df[features_to_keep + [target_column]] #user wants specific features and target column
99-
df = df[~df[list(set(features_to_keep)-set(['Deff2', 'Mean Deff2']))].isin([np.inf, np.nan, -np.inf]).any(axis=1)]
119+
df = df[
120+
features_to_keep + [target_column]
121+
] # user wants specific features and target column
122+
df = df[
123+
~df[list(set(features_to_keep) - set(["Deff2", "Mean Deff2"]))]
124+
.isin([np.inf, np.nan, -np.inf])
125+
.any(axis=1)
126+
]
100127

101-
df = df.fillna(0) #setting any Deff2, Mean Deff2, to 0
128+
df = df.fillna(0) # setting any Deff2, Mean Deff2, to 0
102129
# This may also fill NA target columns with 0, which may not be desired
103-
130+
104131
return df
105132

106-
def combine_csvs(file_list, class_list, features_to_keep='default', target_column=None):
133+
134+
def combine_csvs(file_list, class_list, features_to_keep="default", target_column=None):
107135
"""
108136
Combines multiple CSV files into a single DataFrame.
109-
137+
110138
Parameters
111139
-----------
112140
file_list : list
@@ -116,7 +144,7 @@ def combine_csvs(file_list, class_list, features_to_keep='default', target_colum
116144
target_column : str
117145
The name of the target column to keep in the combined DataFrame.
118146
119-
147+
120148
Returns
121149
--------
122150
df: pandas.DataFrame
@@ -131,10 +159,13 @@ def combine_csvs(file_list, class_list, features_to_keep='default', target_colum
131159
df[target_column] = unique_class
132160
df_list.append(df)
133161
full_df = pd.concat(df_list)
134-
full_df = clean_mpt_data(full_df, features_to_keep=features_to_keep, target_column=target_column)
162+
full_df = clean_mpt_data(
163+
full_df, features_to_keep=features_to_keep, target_column=target_column
164+
)
135165

136166
return full_df
137167

168+
138169
def concatenate_csv_files(uploaded_files):
139170
dfs = []
140171
for uploaded_file in uploaded_files:

0 commit comments

Comments
 (0)