Skip to content

Commit

Permalink
feature extraction added.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ziaeemehr committed Dec 21, 2023
1 parent d7ec7be commit 41ede78
Show file tree
Hide file tree
Showing 10 changed files with 3,459 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/documents.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: 3.8
- name: Install dependencies
run: |
pip install sphinx sphinx_rtd_theme myst_parser
Expand Down
3 changes: 3 additions & 0 deletions examples/intro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import vbi

vbi.tests()
1 change: 1 addition & 0 deletions vbi/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .tests.all_tests import tests
1 change: 1 addition & 0 deletions vbi/feature_extraction/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# from .tests.all_tests import tests
294 changes: 294 additions & 0 deletions vbi/feature_extraction/calc_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
import os
import vbi
import sys
import tqdm
import importlib
import numpy as np
import pandas as pd
from multiprocessing import Pool
from vbi.feature_extraction.features import *
from vbi.feature_extraction.features_utils import *
from vbi.feature_extraction.features_settings import *
from vbi.feature_extraction.utility import *


def calc_features(ts,
fs,
fea_dict,
preprocess=None, # preprocess function
preprocess_args=None, # arguments for preprocess function
**kwargs):
# window_size=None, # window size for preprocessing #!TODO
'''
Extract features from time series data
Parameters
----------
dict_features : dictionary
Dictionary of features to extract
ts : nd-array
Input from which the features are extracted
fs : int, float
Sampling frequency, set to 1 if not used
Returns
-------
labels: list
List of labels of the features
features: list
List of features extracted
'''

features_path = fea_dict['features_path'] if (
"features_path" in fea_dict.keys()) else None

if features_path:
module_name = features_path.split(os.sep)[-1][:-3]
sys.path.append(
features_path[:-len(features_path.split(os.sep)[-1])-1])
exec("import " + module_name)
importlib.reload(sys.modules[features_path.split(os.sep)[-1][:-3]])
exec("from " + module_name + " import *")

# module = sys.modules[module_name]
# print(module.calc_mean)
# print(module.calc_mean([1,2,3], 1, 2))

def length(x):
return (len(x)) if (len(x) > 0) else 0

labels = []
features = []
info = {}

domain = list(fea_dict.keys())
# remove features_path from domain if exists
if 'features_path' in domain:
domain.remove('features_path')

for _type in domain:
domain_feats = fea_dict[_type]
for fe in domain_feats:
if fea_dict[_type][fe]['use'] == 'yes':
c = length(features)
func_name = fe
func = fea_dict[_type][fe]['function']
params = fea_dict[_type][fe]['parameters']

if params is None:
params = {}

if 'fs' in params.keys():
params['fs'] = fs

if preprocess is not None:
ts = preprocess(ts, **preprocess_args)

val, lab = eval(func)(ts, **params)

if isinstance(val, (np.ndarray, list)):
labels.extend(lab)
features.extend(val)
else:
labels.append(func_name)
features.append(val)
info[func_name] = {'index': [c, length(features)]}

return features, labels, info


def extract_features(ts,
fs,
fea_dict,
output_format='list',
**kwargs):
# window_size=None, # window size for preprocessing #!TODO
'''
Extract features from time series data
Parameters
----------
ts : list of np.ndarray [[n_regions x n_samples]]
Input from which the features are extracted
fs : int, float
Sampling frequency
cfg : dictionary
Dictionary of features to extract
output_format : string
Output format, either
'list' (list of numpy arrays)
'dataframe' (pandas dataframe)
(default is 'list')
**kwargs
--------
n_workers : int
Number of workers for parallelization, default is 1
Parallelization is done by ensembles (first dimension of ts)
dtype : type
Data type of the features extracted, default is np.float32
verbose : boolean
If True, print the some information
preprocess : function
Function for preprocessing the time series
preprocess_args : dictionary
Arguments for preprocessing function
Returns
-------
Data: object
Object with the following attributes:
values: list of numpy arrays or pandas dataframe
extracted features
labels: list of strings
List of labels of the features
info: dictionary
Dictionary with the information of the features extracted
'''

labels = []
features = []

n_workers = kwargs.get('n_workers', 1)
dtype = kwargs.get('dtype', np.float32)
# verbose = kwargs.get('verbose', False)
preprocess = kwargs.get('preprocess', None)
preprocess_args = kwargs.get('preprocess_args', None)

def update_bar(_):
pbar.update()

ts = prepare_input(ts)
n_trial, n_region, n_sample = ts.shape

if n_workers == 1:
features = []

for i in tqdm.tqdm(range(n_trial)):
fea, labels, info = calc_features(
ts[i, :, :], fs, fea_dict, **kwargs)
features.append(np.array(fea).astype(dtype))
else:

for i in range(n_trial):
_, labels, info = calc_features(ts[i], fs, fea_dict,
preprocess=preprocess,
preprocess_args=preprocess_args,
**kwargs)
if info:
break
with Pool(processes=n_workers) as pool:
with tqdm.tqdm(total=n_trial) as pbar:
async_res = [pool.apply_async(calc_features,
args=(ts[i],
fs,
fea_dict
),
kwds=dict(kwargs),
callback=update_bar)
for i in range(n_trial)]
features = [np.array(res.get()[0]).astype(dtype)
for res in async_res]

if output_format == 'dataframe':
features = pd.DataFrame(features)
features.columns = labels

class Data:
pass
data = Data()
data.values = features
data.labels = labels
data.info = info

return data


def dataframe_feature_extractor(ts,
fs,
fea_dict,
**kwargs):
'''
extract features from time series data and return a pandas dataframe
Parameters
----------
ts : list of np.ndarray [[n_regions x n_samples]]
Input from which the features are extracted
fs : int, float
Sampling frequency
cfg : dictionary
Dictionary of features to extract
**kwargs
--------
n_workers : int
Number of workers for parallelization, default is 1
Parallelization is done by ensembles (first dimension of ts)
dtype : type
Data type of the features extracted, default is np.float32
verbose : boolean
If True, print the some information
preprocess : function
Function for preprocessing the time series
preprocess_args : dictionary
Arguments for preprocessing function
Returns
-------
Data: object
Object with the following attributes:
values: pandas dataframe
extracted features
labels: list of strings
List of labels of the features
info: dictionary
Dictionary with the information of the features extracted
'''
output_format = 'dataframe'
return extract_features(ts, fs, fea_dict, output_format, **kwargs)


def list_feature_extractor(ts,
fs,
fea_dict,
**kwargs):
'''
extract features from time series data and return a pandas dataframe
Parameters
----------
ts : list of np.ndarray [[n_regions x n_samples]]
Input from which the features are extracted
fs : int, float
Sampling frequency
cfg : dictionary
Dictionary of features to extract
**kwargs
--------
n_workers : int
Number of workers for parallelization, default is 1
Parallelization is done by ensembles (first dimension of ts)
dtype : type
Data type of the features extracted, default is np.float32
verbose : boolean
If True, print the some information
preprocess : function
Function for preprocessing the time series
preprocess_args : dictionary
Arguments for preprocessing function
Returns
-------
Data: object
Object with the following attributes:
values: list of numpy arrays
extracted features
labels: list of strings
List of labels of the features
info: dictionary
Dictionary with the information of the features extracted
'''
output_format = 'list'
return extract_features(ts, fs, fea_dict, output_format, **kwargs)
Loading

0 comments on commit 41ede78

Please sign in to comment.