Skip to content

Commit

Permalink
Added option to return all metrics in triple collocation function. Ge…
Browse files Browse the repository at this point in the history
…neralized confidence interval function to any percentage.
  • Loading branch information
fcollas committed Jan 11, 2024
1 parent 86f2a68 commit 2598178
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 60 deletions.
4 changes: 2 additions & 2 deletions tests/test_triple_collocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_triple_collocation_simulated_data():
assert len(tc_result['data_sources'][ref].keys()) == 6


def test_ci_95():
def test_ci():

n = 1500
T = [np.sin(0.2 * i) + 2 for i in range(n)]
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_ci_95():
dict_data = {'X': X, 'Y': Y, 'Z': Z}
ref = 'X'

ci_95 = tc.bootstrap_ci_95(dict_data)
ci_95 = tc.bootstrap_ci(dict_data)

assert isinstance(ci_95, dict)
assert set(dict_data.keys()) == set(ci_95.keys())
Expand Down
120 changes: 62 additions & 58 deletions wavy/triple_collocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,17 +219,17 @@ def SNR_dB(A, B, C, var_err_A):
return 10*np.log10(sensitivity_estimates(A, B, C)/var_err_A)


def triple_collocation_validate(result_dict,
metric_list=['var_est','rmse','si',
'rho','mean','std'],
def triple_collocation_validate(result_dict,
metric_list=['var_est', 'rmse', 'si',
'rho', 'mean', 'std'],
ref=None):
'''
Runs the triple collocation given a dictionary
containing three measurements, returns results
in a dictionary.
results_dict: {'name of measurement':list of values}
metric_list: List of the metrics to return, among 'var_est',
metric_list: Str "all" or List of the metrics to return, among 'var_est',
'rmse', 'si', 'rho', 'sens', 'snr', 'snr_db', 'fmse', 'mean',
'std'
ref: Name of one of the measurements, must correspond
Expand All @@ -241,23 +241,23 @@ def triple_collocation_validate(result_dict,
measure_names = list(result_dict.keys())
measures = list(result_dict.values())

if ref == None:
if ref is None:
ref = measures_names[0]
print('No reference was specified.',
ref, 'was set as the reference.')
print('No reference was specified.',
ref, 'was set as the reference.')

tc_validate = {'data_sources':{key:{} for key in measure_names},
'reference_dataset':ref}
tc_validate = {'data_sources': {key: {} for key in measure_names},
'reference_dataset': ref}

mean_ref = np.mean(result_dict[ref])

A = measures[0]
B = measures[1]
C = measures[2]

cov_ab = np.cov(A,B)
cov_bc = np.cov(B,C)
cov_ac = np.cov(A,C)
cov_ab = np.cov(A, B)
cov_bc = np.cov(B, C)
cov_ac = np.cov(A, C)

# Sensitivity
sens = [(cov_ab[0][1]*cov_ac[0][1])/cov_bc[0][1],
Expand All @@ -266,63 +266,63 @@ def triple_collocation_validate(result_dict,

# Estimate of the variance of random error
var_est = [cov_ab[0][0] - sens[0],
cov_ab[1][1] - sens[1],
cov_bc[1][1] - sens[2]]
cov_ab[1][1] - sens[1],
cov_bc[1][1] - sens[2]]

# Root Mean Square Error
rmse = [np.sqrt(var_est[0]),
np.sqrt(var_est[1]),
np.sqrt(var_est[2])]

# Scatter Index
if 'si' in metric_list:
if 'si' in metric_list or metric_list == 'all':
si = [rmse[0]/mean_ref*100,
rmse[1]/mean_ref*100,
rmse[2]/mean_ref*100]
rmse[1]/mean_ref*100,
rmse[2]/mean_ref*100]

# Signal to Noise Ratio
snr = [sens[0]/var_est[0],
sens[1]/var_est[1],
sens[2]/var_est[2]]

# Fractional Mean Squared Error
if 'fmse' in metric_list:
if 'fmse' in metric_list or metric_list == 'all':
fmse = [1/(1+snr[0]),
1/(1+snr[1]),
1/(1+snr[2])]

# Signal to Noise Ratio (dB)
if 'snr_db' in metric_list:
if 'snr_db' in metric_list or metric_list == 'all':
snr_db = [10*np.log10(snr[0]),
10*np.log10(snr[1]),
10*np.log10(snr[2])]

# Data truth correlation
if 'rho' in metric_list:
if 'rho' in metric_list or metric_list == 'all':
rho = [sens[0]/cov_ab[0][0],
sens[1]/cov_ab[1][1],
sens[2]/cov_bc[1][1]]

for i,k in enumerate(measure_names):
if 'var_est' in metric_list:
for i, k in enumerate(measure_names):
if 'var_est' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['var_est'] = var_est[i]
if 'rmse' in metric_list:
if 'rmse' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['RMSE'] = rmse[i]
if 'si' in metric_list:
if 'si' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['SI'] = si[i]
if 'sens' in metric_list:
if 'sens' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['sensitivity'] = sens[i]
if 'rho' in metric_list:
if 'rho' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['rho'] = rho[i]
if 'snr' in metric_list:
if 'snr' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['SNR'] = snr[i]
if 'fmse' in metric_list:
if 'fmse' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['fMSE'] = fmse[i]
if 'snr_db' in metric_list:
if 'snr_db' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['SNR_dB'] = snr_db[i]
if 'mean' in metric_list:
if 'mean' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['mean'] = np.mean(measures[i])
if 'std' in metric_list:
if 'std' in metric_list or metric_list == 'all':
tc_validate['data_sources'][k]['std'] = np.std(measures[i])

return tc_validate
Expand Down Expand Up @@ -350,23 +350,24 @@ def disp_tc_validation(tc_validate, dec=3):
print(format_row.format(ds, *row))

print("\n The reference for the SI is:", ref)


def bootstrap_ci_95(result_dict,
sample_size=None,
metric='var_est',
n_bootstrap=100,
ref=None):

def bootstrap_ci(result_dict,
conf=95.,
sample_size=None,
metric='var_est',
n_bootstrap=100,
ref=None):
'''
Calculate 95% confidence intervals for a given estimate
returned by triple_collocation_validate function, using
bootstrap sampling.
bootstrap sampling.
results_dict: {'name of measurement':list of values}
sample_size: size of the bootstrap samples that will
be drawn from the data.
metric: metric for which the confidence interval is
required. Must be one of the list that can be given
be drawn from the data.
metric: metric for which the confidence interval is
required. Must be one of the list that can be given
to triple_collocation_validate metric_list argument.
n_bootstrap: number of generated bootstrap samples.
ref: Name of one of the measurements, must correspond
Expand All @@ -379,33 +380,36 @@ def bootstrap_ci_95(result_dict,
'''
measure_names = list(result_dict.keys())
measures = list(result_dict.values())
if ref == None:

if ref is None:
ref = measure_names[0]
if sample_size == None:
if sample_size is None:
sample_size = len(measures[0])

indexes = np.arange(0,len(measures[0]))

dict_res_tc = {m:[] for m in measure_names}

indexes = np.arange(0, len(measures[0]))

dict_res_tc = {m: [] for m in measure_names}

for i in range(n_bootstrap):

rand_ind_tmp = random.choices(indexes, k=sample_size)
result_dict_tmp = {key:[result_dict[key][j] for j in rand_ind_tmp] for\
result_dict_tmp = {key: [result_dict[key][j] for j in rand_ind_tmp] for
key in measure_names}
tc_validate_tmp = triple_collocation_validate(result_dict_tmp,
metric_list=[metric],
ref=ref)

for m in measure_names:
dict_res_tc[m].append(list(tc_validate_tmp['data_sources'][m].values())[0])

dict_ci = {m:{} for m in measure_names}
for m in measure_names:
dict_res_tc[m].append(
list(tc_validate_tmp['data_sources'][m].values())[0]
)

dict_ci = {m: {} for m in measure_names}
for m in measure_names:
dict_ci[m]['mean'] = np.mean(dict_res_tc[m])
percentiles = np.percentile(dict_res_tc[m], [2.5, 97.5])
percentiles = np.percentile(dict_res_tc[m],
[(100-conf)/2, conf + (100-conf)/2])
dict_ci[m]['ci_l'] = percentiles[0]
dict_ci[m]['ci_u'] = percentiles[1]

return dict_ci

0 comments on commit 2598178

Please sign in to comment.