Skip to content

Commit

Permalink
Merge pull request #60 from fact-project/fix_select_off_data
Browse files Browse the repository at this point in the history
Fix select off data
  • Loading branch information
maxnoe authored May 16, 2017
2 parents 7d36ce2 + 9c37eec commit 188088d
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 10 deletions.
2 changes: 1 addition & 1 deletion fact/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.10.3
0.10.4
3 changes: 3 additions & 0 deletions fact/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .binning import ontime_binning, qla_binning, groupby_observation_blocks, bin_runs

from .core import calc_run_summary_source_independent, split_on_off_source_independent
from .core import calc_run_summary_source_dependent, split_on_off_source_dependent


__all__ = [
Expand All @@ -11,5 +12,7 @@
'groupby_observation_blocks',
'bin_runs',
'calc_run_summary_source_independent',
'calc_run_summary_source_dependent',
'split_on_off_source_dependent',
'split_on_off_source_independent',
]
71 changes: 62 additions & 9 deletions fact/analysis/core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pandas as pd
import re

from .statistics import li_ma_significance

Expand All @@ -9,6 +10,9 @@
)


off_key_re = re.compile('([a-zA-z1-9]+)_Off_([0-9])')


def calc_run_summary_source_independent(
events, runs,
prediction_threshold,
Expand Down Expand Up @@ -102,10 +106,16 @@ def split_on_off_source_independent(

on_data = events.query('{} <= {}'.format(theta_key, theta_cut))

off_data = pd.concat([
events.query('{} <= {}'.format(theta_off_key, theta_cut))
for theta_off_key in theta_off_keys
])
off_dfs = []
for region, theta_off_key in enumerate(theta_off_keys, start=1):
off_df = events.query('{} <= {}'.format(theta_off_key, theta_cut))

off_df['off_region'] = region
drop_off_columns(off_df, region, inplace=True)

off_dfs.append(off_df)

off_data = pd.concat(off_dfs)

return on_data, off_data

Expand Down Expand Up @@ -172,6 +182,8 @@ def split_on_off_source_dependent(
):
'''
Split events dataframe into on and off region
For the off regions, keys are renamed to their "on" equivalents
and the "off" keys are dropped.
Parameters
----------
Expand All @@ -185,11 +197,52 @@ def split_on_off_source_dependent(
off_prediction_keys: list[str]
Iterable of keys to the classifier predictions for the off regions
'''
on_data = events.query('{} >= {}'.format(on_prediction_key, prediction_threshold)).copy()
on_data = events.query('{} >= {}'.format(
on_prediction_key, prediction_threshold)
).copy()

off_dfs = []
for region, off_key in enumerate(off_prediction_keys, start=1):
off_df = events.query('{} >= {}'.format(
off_key, prediction_threshold)
).copy()

off_df['off_region'] = region

off_data = pd.concat([
events.query('{} >= {}'.format(off_key, prediction_threshold)).copy()
for off_key in off_prediction_keys
])
off_df.drop(on_prediction_key, axis=1, inplace=True)
off_df[on_prediction_key] = off_df[off_key]
off_df.drop(off_key, axis=1, inplace=True)

drop_off_columns(off_df, region, inplace=True)

off_dfs.append(off_df)

off_data = pd.concat(off_dfs)

return on_data, off_data


def drop_off_columns(df, off_region, inplace=False):
'''
Replace the "On" column with the column
of the respective off region.
For example for `off_region = 1`, `Theta` is replaced by
Theta_Off_1 and all Theta_Off_<N> columns are dropped.
Same for all other columns, containing the pattern `_Off_<N>`
'''
if inplace is False:
df = df.copy()

for col in df.columns:
m = off_key_re.match(col)
if m:
on_key, key_region = m.groups()
# if
if int(key_region) == off_region:
df.drop(on_key, axis=1, inplace=True)
df[on_key] = df[col]

df.drop(col, axis=1, inplace=True)

if inplace is False:
return df

0 comments on commit 188088d

Please sign in to comment.