|
| 1 | +from hypothesis.extra.pandas import columns, column, data_frames, range_indexes |
| 2 | +import pandas as pd |
| 3 | +import numpy as np |
| 4 | +from hypothesis import strategies as st, given, settings |
| 5 | + |
| 6 | +def hypothesis_features_dataframe(include_target_col=False): |
| 7 | + float_without_nan_st = st.floats(min_value=0.0001, max_value=3030, allow_nan=False) |
| 8 | + float_with_nan_st = st.floats(allow_nan=True, allow_infinity=False) |
| 9 | + int_st = st.integers(min_value=0, max_value=1000) |
| 10 | + |
| 11 | + np.random.seed(1234) |
| 12 | + param = {} |
| 13 | + df_columns = {'alpha': {'elements': float_with_nan_st, 'unique': True}, |
| 14 | + 'D_fit': {'elements': float_with_nan_st, 'unique': True}, |
| 15 | + 'kurtosis': {'elements': float_with_nan_st, 'unique': True}, |
| 16 | + 'asymmetry1': {'elements': float_with_nan_st, 'unique': True}, |
| 17 | + 'asymmetry2': {'elements': float_with_nan_st, 'unique': True}, |
| 18 | + 'asymmetry3': {'elements': float_with_nan_st, 'unique': True}, |
| 19 | + 'AR': {'elements': float_with_nan_st, 'unique': True}, |
| 20 | + 'elongation': {'elements': float_with_nan_st, 'unique': True}, |
| 21 | + 'boundedness': {'elements': float_with_nan_st, 'unique': True}, |
| 22 | + 'fractal_dim': {'elements': float_with_nan_st, 'unique': True}, |
| 23 | + 'trappedness': {'elements': float_with_nan_st, 'unique': True}, |
| 24 | + 'efficiency': {'elements': float_with_nan_st, 'unique': True}, |
| 25 | + 'straightness': {'elements': float_with_nan_st, 'unique': True}, |
| 26 | + 'MSD_ratio': {'elements': float_with_nan_st, 'unique': True}, |
| 27 | + 'frames': {'elements': int_st, 'unique': True}, |
| 28 | + 'Deff1': {'elements': float_with_nan_st, 'unique': True}, |
| 29 | + 'Deff2': {'elements': float_with_nan_st, 'unique': True}, |
| 30 | + # 'angle_mean', |
| 31 | + # 'angle_mag_mean', |
| 32 | + # 'angle_var', |
| 33 | + # 'dist_tot', |
| 34 | + # 'dist_net', |
| 35 | + # 'progression', |
| 36 | + 'Mean alpha': {'elements': float_with_nan_st, 'unique': True}, |
| 37 | + 'Mean D_fit': {'elements': float_with_nan_st, 'unique': True}, |
| 38 | + 'Mean kurtosis': {'elements': float_with_nan_st, 'unique': True}, |
| 39 | + 'Mean asymmetry1': {'elements': float_with_nan_st, 'unique': True}, |
| 40 | + 'Mean asymmetry2': {'elements': float_with_nan_st, 'unique': True}, |
| 41 | + 'Mean asymmetry3': {'elements': float_with_nan_st, 'unique': True}, |
| 42 | + 'Mean AR': {'elements': float_with_nan_st, 'unique': True}, |
| 43 | + 'Mean elongation': {'elements': float_with_nan_st, 'unique': True}, |
| 44 | + 'Mean boundedness': {'elements': float_with_nan_st, 'unique': True}, |
| 45 | + 'Mean fractal_dim': {'elements': float_with_nan_st, 'unique': True}, |
| 46 | + 'Mean trappedness': {'elements': float_with_nan_st, 'unique': True}, |
| 47 | + 'Mean efficiency': {'elements': float_with_nan_st, 'unique': True}, |
| 48 | + 'Mean straightness': {'elements': float_with_nan_st, 'unique': True}, |
| 49 | + 'Mean MSD_ratio': {'elements': float_with_nan_st, 'unique': True}, |
| 50 | + 'Mean Deff1': {'elements': float_with_nan_st, 'unique': True}, |
| 51 | + 'Mean Deff2': {'elements': float_with_nan_st, 'unique': True}, |
| 52 | + } |
| 53 | + |
| 54 | + |
| 55 | + test_dfs = data_frames(index=range_indexes(min_size=10), columns=[column(key, **value) for key, value in df_columns.items()]) |
| 56 | + |
| 57 | + return test_dfs |
| 58 | + |
| 59 | +# def features_dataframe(features=categories, include_target_col=True): |
| 60 | + |
| 61 | +# data_cols = columns(names_or_number=features, dtype=float, elements=st.floats()) |
| 62 | +# position_cols = columns(names_or_number=['X', 'Y'], dtype=float, elements=st.floats(min_value=0.0, max_value=2048.0)) |
| 63 | + |
| 64 | +# df_columns = [data_cols, position_cols] |
| 65 | +# if include_target_col: |
| 66 | +# target_col = column(name='target', dtype=int, elements=st.integers(min_value=0, max_value=20)) #up to twenty unique targets |
| 67 | +# df_columns.append(target_col) |
| 68 | + |
| 69 | +# df = data_frames(columns=df_columns, index=range_indexes(min_size=10)) |
| 70 | +# return df |
| 71 | + |
| 72 | +dfs = hypothesis_features_dataframe(include_target_col=False) |
| 73 | +print('done') |
0 commit comments