From ba78e67c895f47984d03cb41c552efc1ae92cdfc Mon Sep 17 00:00:00 2001 From: Oliver Cliff Date: Fri, 17 Dec 2021 10:49:31 +1100 Subject: [PATCH] Changed MPI->multivariate for computing each SPI to match the bivariate signature. Also fixed the pdist signature (it was adjacency) --- demos/demo.py | 21 +++++++++++++++++++-- pyspi/base.py | 6 +++--- pyspi/calculator.py | 12 ++++++------ pyspi/statistics/basic.py | 2 +- pyspi/statistics/causal.py | 2 +- pyspi/statistics/distance.py | 2 +- pyspi/statistics/spectral.py | 8 ++++---- pyspi/statistics/wavelet.py | 4 ++-- test/test_statistics.py | 14 +++++++------- 9 files changed, 44 insertions(+), 27 deletions(-) diff --git a/demos/demo.py b/demos/demo.py index 4e9b06d..501910e 100644 --- a/demos/demo.py +++ b/demos/demo.py @@ -7,14 +7,31 @@ import seaborn as sns -calc = Calculator(dataset=load_dataset('forex')) +# Load one of our stored datasets +dataset = load_dataset('forex') + +# visualize the dataset as a heat map (also called a temporal raster plot or carpet plot) +plt.pcolormesh(dataset.to_numpy(squeeze=True),cmap='coolwarm',vmin=-2,vmax=2) +plt.show() + +# Instantiate the calculator (inputting the dataset) +calc = Calculator(dataset=dataset) + +# Compute all SPIs (this may take a while) calc.compute() + +# Now, we can access all of the matrices by calling calc.table. +# This property will be an Nx(SN) pandas dataframe, where N is the number of processes in the dataset and S is the number of SPIs +print(calc.table) + +# We can use this to compute the correlation between all of the methods on this dataset... corrmat = calc.table.stack().corr(method='spearman').abs() +# ...and plot this correlation matrix sns.set(font_scale=0.5) g = sns.clustermap( corrmat.fillna(0), mask=corrmat.isna(), center=0.0, cmap='RdYlBu_r', xticklabels=1, yticklabels=1 ) plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=45, ha='right') -plt.show() +plt.show() \ No newline at end of file diff --git a/pyspi/base.py b/pyspi/base.py index 75df47c..9be30a9 100644 --- a/pyspi/base.py +++ b/pyspi/base.py @@ -89,7 +89,7 @@ def bivariate(self,data,i=None,j=None): raise NotImplementedError("Method not yet overloaded.") @parse_multivariate - def mpi(self,data): + def multivariate(self,data): """ Compute the dependency statistics for the entire multivariate dataset """ A = np.empty((data.n_processes,data.n_processes)) @@ -135,8 +135,8 @@ def ispositive(self): return False @parse_multivariate - def mpi(self,data): - A = super(undirected,self).mpi(data) + def multivariate(self,data): + A = super(undirected,self).multivariate(data) li = np.tril_indices(data.n_processes,-1) A[li] = A.T[li] diff --git a/pyspi/calculator.py b/pyspi/calculator.py index 8def463..d213aeb 100644 --- a/pyspi/calculator.py +++ b/pyspi/calculator.py @@ -16,7 +16,7 @@ """ TODO: use the MPI class for each entry in the table """ -class MPI(): +class multivariate(): def __init__(self, procnames, S=None): if S is None: S = np.full((len(procnames),len(procnames)),np.nan) @@ -158,14 +158,14 @@ def compute(self,replication=None): replication = 0 pbar = tqdm(self.spis.keys()) - for m in pbar: - pbar.set_description(f'Processing [{self._name}: {m}]') + for spi in pbar: + pbar.set_description(f'Processing [{self._name}: {spi}]') start_time = time.time() try: - self._table[m] = self._spis[m].mpi(self.dataset) + self._table[spi] = self._spis[spi].multivariate(self.dataset) except Exception as err: - warnings.warn(f'Caught {type(err)} for SPI "{self._statnames[m]}": {err}') - self._table[m] = np.NaN + warnings.warn(f'Caught {type(err)} for SPI "{spi}": {err}') + self._table[spi] = np.NaN pbar.close() def rmmin(self): diff --git a/pyspi/statistics/basic.py b/pyspi/statistics/basic.py index 2494c7f..15f7cc4 100644 --- a/pyspi/statistics/basic.py +++ b/pyspi/statistics/basic.py @@ -42,7 +42,7 @@ def _from_cache(self,data): return mycov @parse_multivariate - def mpi(self,data): + def multivariate(self,data): mycov = self._from_cache(data) matrix = getattr(mycov,self._kind+'_') np.fill_diagonal(matrix,np.nan) diff --git a/pyspi/statistics/causal.py b/pyspi/statistics/causal.py index 4cc925d..ecbcf70 100644 --- a/pyspi/statistics/causal.py +++ b/pyspi/statistics/causal.py @@ -120,7 +120,7 @@ def _from_cache(self,data): return ccmf @parse_multivariate - def mpi(self,data): + def multivariate(self,data): ccmf = self._from_cache(data) if self._statistic == 'mean': diff --git a/pyspi/statistics/distance.py b/pyspi/statistics/distance.py index 3489147..d6abc7c 100644 --- a/pyspi/statistics/distance.py +++ b/pyspi/statistics/distance.py @@ -20,7 +20,7 @@ def __init__(self,metric='euclidean',**kwargs): self.name += f'_{metric}' @parse_multivariate - def adjacency(self,data): + def multivariate(self,data): return pairwise_distances(data.to_numpy(squeeze=True),metric=self._metric) """ TODO: include optional kernels in each method diff --git a/pyspi/statistics/spectral.py b/pyspi/statistics/spectral.py index b6bfc0b..d22e2d1 100644 --- a/pyspi/statistics/spectral.py +++ b/pyspi/statistics/spectral.py @@ -77,7 +77,7 @@ def _get_cache(self,data): return res, freq @parse_multivariate - def mpi(self, data): + def multivariate(self, data): adj_freq, freq = self._get_cache(data) freq_id = np.where((freq >= self._fmin) * (freq <= self._fmax))[0] @@ -299,7 +299,7 @@ def _get_statistic(self,C): # self.name = self.name + paramstr # @parse_multivariate -# def mpi(self,data): +# def multivariate(self,data): # # This should be changed to conditioning on all, rather than averaging all conditionals # if not hasattr(data,'pcoh'): # z = np.squeeze(data.to_numpy()) @@ -372,7 +372,7 @@ def _get_cache(self,data): return F, freq @parse_multivariate - def mpi(self,data): + def multivariate(self,data): try: F, freq = self._get_cache(data) freq_id = np.where((freq >= self._fmin) * (freq <= self._fmax))[0] @@ -399,7 +399,7 @@ def __init__(self,orth=False,log=False,absolute=False): self.name += '_abs' @parse_multivariate - def mpi(self, data): + def multivariate(self, data): z = np.moveaxis(data.to_numpy(),2,0) adj = np.squeeze(mnec.envelope_correlation(z,orthogonalize=self._orth,log=self._log,absolute=self._absolute)) np.fill_diagonal(adj,np.nan) diff --git a/pyspi/statistics/wavelet.py b/pyspi/statistics/wavelet.py index e33b25b..38b052e 100644 --- a/pyspi/statistics/wavelet.py +++ b/pyspi/statistics/wavelet.py @@ -59,7 +59,7 @@ def _get_cache(self,data): return conn, freq_id @parse_multivariate - def mpi(self, data): + def multivariate(self, data): adj_freq, freq_id = self._get_cache(data) try: adj = self._statfn(adj_freq[...,freq_id,:], axis=(2,3)) @@ -189,7 +189,7 @@ def _get_cache(self,data): return psi, freq_id @parse_multivariate - def mpi(self, data): + def multivariate(self, data): adj_freq, freq_id = self._get_cache(data) adj = self._statfn(np.real(adj_freq[...,freq_id]), axis=(2,3)) diff --git a/test/test_statistics.py b/test/test_statistics.py index 06afe8c..2c7a1f2 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -53,7 +53,7 @@ def test_yaml(): assert calc.n_statistics == len(calc._statistics), ( 'Property not equal to number of statistics') -def test_mpi(): +def test_multivariate(): # Load in all base statistics from the YAML file data = get_data() @@ -81,14 +81,14 @@ def test_mpi(): if any([m.name == e for e in excuse_stochastic]): continue - m.mpi(get_more_data()) + m.multivariate(get_more_data()) - scratch_adj = m.mpi(data.to_numpy()) - adj = m.mpi(data) + scratch_adj = m.multivariate(data.to_numpy()) + adj = m.multivariate(data) assert np.allclose(adj,scratch_adj,rtol=1e-1,atol=1e-2,equal_nan=True), ( f'{m.name} ({m.humanname}) mpi output changed between cached and strach computations: {adj} != {scratch_adj}') - recomp_adj = m.mpi(data) + recomp_adj = m.multivariate(data) assert np.allclose(adj,recomp_adj,rtol=1e-1,atol=1e-2,equal_nan=True), ( f'{m.name} ({m.humanname}) mpi output changed when recomputing.') @@ -111,7 +111,7 @@ def test_mpi(): assert t_s == pytest.approx(new_t_s,rel=1e-1,abs=1e-2), ( f'{m.name} ({m.humanname}) Bivariate output from cache mismatch results from scratch for computation ({j},{i}): {t_s} != {new_t_s}') except NotImplementedError: - a = m.mpi(p[[i,j]]) + a = m.multivariate(p[[i,j]]) s_t, t_s = a[0,1], a[1,0] if not math.isfinite(s_t): @@ -206,7 +206,7 @@ def test_group(): test_yaml() test_load() test_group() - test_mpi() + test_multivariate() # This was a bit tricky to implement so just ensuring it passes a test from the creator's website test_ccm() \ No newline at end of file