4
4
5
5
import pandas as pd
6
6
import numpy as np
7
- #import diff_predictor
7
+
8
+ # import diff_predictor
9
+
8
10
9
11
def check_mpt_data (df , expected_columns ):
10
12
"""
11
13
Checks that a pandas DataFrame has at least one row of data and contains specific columns.
12
-
14
+
13
15
Parameters
14
16
-----------
15
17
df : pandas.DataFrame
16
18
The DataFrame to check.
17
19
expected_columns : list
18
20
A list of column names that the DataFrame is expected to have.
19
-
21
+
20
22
Returns
21
23
--------
22
24
columns_present, has_data: bool
23
- True if the DataFrame contains at least one row of data and all of the expected columns,
25
+ True if the DataFrame contains at least one row of data and all of the expected columns,
24
26
False otherwise.
25
27
"""
26
28
# Check that all of the expected columns are present
@@ -31,82 +33,108 @@ def check_mpt_data(df, expected_columns):
31
33
# Return True if both the expected columns and data are present
32
34
return columns_present and has_data
33
35
34
- def clean_mpt_data (df , features_to_keep = 'default' , target_column = None ):
36
+
37
+ def clean_mpt_data (df , features_to_keep = "default" , target_column = None ):
35
38
"""
36
39
Cleans a pandas DataFrame containing MPT data.
37
-
40
+
38
41
Parameters
39
42
-----------
40
43
df : pandas.DataFrame
41
44
The DataFrame to clean.
42
-
45
+
43
46
Returns
44
47
--------
45
48
df: pandas.DataFrame
46
49
The cleaned DataFrame.
47
50
"""
48
51
49
52
default_feature_list = [
50
- ' alpha' , # Fitted anomalous diffusion alpha exponenet
51
- ' D_fit' , # Fitted anomalous diffusion coefficient
52
- ' kurtosis' , # Kurtosis of track
53
- ' asymmetry1' , # Asymmetry of trajecory (0 for circular symmetric, 1 for linear)
54
- ' asymmetry2' , # Ratio of the smaller to larger principal radius of gyration
55
- ' asymmetry3' , # An asymmetric feature that accnts for non-cylindrically symmetric pt distributions
56
- 'AR' , # Aspect ratio of long and short side of trajectory's minimum bounding rectangle
57
- ' elongation' , # Est. of amount of extension of trajectory from centroid
58
- ' boundedness' , # How much a particle with Deff is restricted by a circular confinement of radius r
59
- ' fractal_dim' , # Measure of how complicated a self similar figure is
60
- ' trappedness' , # Probability that a particle with Deff is trapped in a region
61
- ' efficiency' , # Ratio of squared net displacement to the sum of squared step lengths
62
- ' straightness' , # Ratio of net displacement to the sum of squared step lengths
63
- ' MSD_ratio' , # MSD ratio of the track
64
- ' Deff1' , # Effective diffusion coefficient at 0.33 s
65
- ' Deff2' , # Effective diffusion coefficient at 3.3 s
66
- ' Mean alpha' ,
67
- ' Mean D_fit' ,
68
- ' Mean kurtosis' ,
69
- ' Mean asymmetry1' ,
70
- ' Mean asymmetry2' ,
71
- ' Mean asymmetry3' ,
72
- ' Mean AR' ,
73
- ' Mean elongation' ,
74
- ' Mean boundedness' ,
75
- ' Mean fractal_dim' ,
76
- ' Mean trappedness' ,
77
- ' Mean efficiency' ,
78
- ' Mean straightness' ,
79
- ' Mean MSD_ratio' ,
80
- ' Mean Deff1' ,
81
- ' Mean Deff2' ,
53
+ " alpha" , # Fitted anomalous diffusion alpha exponenet
54
+ " D_fit" , # Fitted anomalous diffusion coefficient
55
+ " kurtosis" , # Kurtosis of track
56
+ " asymmetry1" , # Asymmetry of trajecory (0 for circular symmetric, 1 for linear)
57
+ " asymmetry2" , # Ratio of the smaller to larger principal radius of gyration
58
+ " asymmetry3" , # An asymmetric feature that accnts for non-cylindrically symmetric pt distributions
59
+ "AR" , # Aspect ratio of long and short side of trajectory's minimum bounding rectangle
60
+ " elongation" , # Est. of amount of extension of trajectory from centroid
61
+ " boundedness" , # How much a particle with Deff is restricted by a circular confinement of radius r
62
+ " fractal_dim" , # Measure of how complicated a self similar figure is
63
+ " trappedness" , # Probability that a particle with Deff is trapped in a region
64
+ " efficiency" , # Ratio of squared net displacement to the sum of squared step lengths
65
+ " straightness" , # Ratio of net displacement to the sum of squared step lengths
66
+ " MSD_ratio" , # MSD ratio of the track
67
+ " Deff1" , # Effective diffusion coefficient at 0.33 s
68
+ " Deff2" , # Effective diffusion coefficient at 3.3 s
69
+ " Mean alpha" ,
70
+ " Mean D_fit" ,
71
+ " Mean kurtosis" ,
72
+ " Mean asymmetry1" ,
73
+ " Mean asymmetry2" ,
74
+ " Mean asymmetry3" ,
75
+ " Mean AR" ,
76
+ " Mean elongation" ,
77
+ " Mean boundedness" ,
78
+ " Mean fractal_dim" ,
79
+ " Mean trappedness" ,
80
+ " Mean efficiency" ,
81
+ " Mean straightness" ,
82
+ " Mean MSD_ratio" ,
83
+ " Mean Deff1" ,
84
+ " Mean Deff2" ,
82
85
]
83
86
84
87
if target_column is not None :
85
88
assert target_column in df .columns , "Target column not in DataFrame"
86
89
assert df [target_column ].notna ().all (), "Target column contains NaN values"
87
90
88
- if features_to_keep == 'default' and target_column is None : #user wants all default features
91
+ if (
92
+ features_to_keep == "default" and target_column is None
93
+ ): # user wants all default features
89
94
df = df [default_feature_list ]
90
- df = df [~ df [list (set (default_feature_list )- set (['Deff2' , 'Mean Deff2' ]))].isin ([np .inf , np .nan , - np .inf ]).any (axis = 1 )]
91
- elif features_to_keep == 'default' and target_column is not None : #user wants all default features and target column
95
+ df = df [
96
+ ~ df [list (set (default_feature_list ) - set (["Deff2" , "Mean Deff2" ]))]
97
+ .isin ([np .inf , np .nan , - np .inf ])
98
+ .any (axis = 1 )
99
+ ]
100
+ elif (
101
+ features_to_keep == "default" and target_column is not None
102
+ ): # user wants all default features and target column
92
103
df = df [default_feature_list + [target_column ]]
93
- df = df [~ df [list (set (default_feature_list )- set (['Deff2' , 'Mean Deff2' ]))].isin ([np .inf , np .nan , - np .inf ]).any (axis = 1 )]
94
- elif features_to_keep != 'default' and target_column is None : #user wants specific features
104
+ df = df [
105
+ ~ df [list (set (default_feature_list ) - set (["Deff2" , "Mean Deff2" ]))]
106
+ .isin ([np .inf , np .nan , - np .inf ])
107
+ .any (axis = 1 )
108
+ ]
109
+ elif (
110
+ features_to_keep != "default" and target_column is None
111
+ ): # user wants specific features
95
112
df = df [features_to_keep ]
96
- df = df [~ df [list (set (features_to_keep )- set (['Deff2' , 'Mean Deff2' ]))].isin ([np .inf , np .nan , - np .inf ]).any (axis = 1 )]
113
+ df = df [
114
+ ~ df [list (set (features_to_keep ) - set (["Deff2" , "Mean Deff2" ]))]
115
+ .isin ([np .inf , np .nan , - np .inf ])
116
+ .any (axis = 1 )
117
+ ]
97
118
else :
98
- df = df [features_to_keep + [target_column ]] #user wants specific features and target column
99
- df = df [~ df [list (set (features_to_keep )- set (['Deff2' , 'Mean Deff2' ]))].isin ([np .inf , np .nan , - np .inf ]).any (axis = 1 )]
119
+ df = df [
120
+ features_to_keep + [target_column ]
121
+ ] # user wants specific features and target column
122
+ df = df [
123
+ ~ df [list (set (features_to_keep ) - set (["Deff2" , "Mean Deff2" ]))]
124
+ .isin ([np .inf , np .nan , - np .inf ])
125
+ .any (axis = 1 )
126
+ ]
100
127
101
- df = df .fillna (0 ) # setting any Deff2, Mean Deff2, to 0
128
+ df = df .fillna (0 ) # setting any Deff2, Mean Deff2, to 0
102
129
# This may also fill NA target columns with 0, which may not be desired
103
-
130
+
104
131
return df
105
132
106
- def combine_csvs (file_list , class_list , features_to_keep = 'default' , target_column = None ):
133
+
134
+ def combine_csvs (file_list , class_list , features_to_keep = "default" , target_column = None ):
107
135
"""
108
136
Combines multiple CSV files into a single DataFrame.
109
-
137
+
110
138
Parameters
111
139
-----------
112
140
file_list : list
@@ -116,7 +144,7 @@ def combine_csvs(file_list, class_list, features_to_keep='default', target_colum
116
144
target_column : str
117
145
The name of the target column to keep in the combined DataFrame.
118
146
119
-
147
+
120
148
Returns
121
149
--------
122
150
df: pandas.DataFrame
@@ -131,10 +159,13 @@ def combine_csvs(file_list, class_list, features_to_keep='default', target_colum
131
159
df [target_column ] = unique_class
132
160
df_list .append (df )
133
161
full_df = pd .concat (df_list )
134
- full_df = clean_mpt_data (full_df , features_to_keep = features_to_keep , target_column = target_column )
162
+ full_df = clean_mpt_data (
163
+ full_df , features_to_keep = features_to_keep , target_column = target_column
164
+ )
135
165
136
166
return full_df
137
167
168
+
138
169
def concatenate_csv_files (uploaded_files ):
139
170
dfs = []
140
171
for uploaded_file in uploaded_files :
0 commit comments