diff --git a/Webpost2.html b/Webpost2.html index 848977a..bcd65b7 100644 --- a/Webpost2.html +++ b/Webpost2.html @@ -7331,7 +7331,7 @@ if (!diagrams.length) { return; } - const mermaid = (await import("https://cdnjs.cloudflare.com/ajax/libs/mermaid/10.3.1/mermaid.esm.min.mjs")).default; + const mermaid = (await import("https://cdnjs.cloudflare.com/ajax/libs/mermaid/10.6.0/mermaid.esm.min.mjs")).default; const parser = new DOMParser(); mermaid.initialize({ @@ -7539,7 +7539,7 @@
The first new detail is that our simulations have noise, which modifies the least squares solution. We recall from the last notebook that the function form of the least squares solution without noise is
-$$ +$$ \begin{align} W_{out} = (X^TX)^{-1}X^TY = X^+Y \end{align} +$$
where $+$ denotes the pseudo-inverse. Generalizing to the noisy setting the form becomes
$$ -where $+$ denotes the pseudo-inverse. Generalizing to the noisy setting the form becomes -$$ \begin{align} W_{out} = \langle \overline{XX^T}\rangle^+ \langle \overline{XY^T} \rangle, \end{align} -$$
-where $\langle \cdot \rangle$ is an average over the noise, and $\overline{\cdot}$ is an average over the input randomness (we have been using the uniform measure). The derivation can be found in [1], and for this note we suffice to comment that the solution now depends on the covariances between the reservoir data $X$ and itself, and between the target functions $Y$ and the reservoir data. We compute this first, wrapped in a function so that we can parallelize over gpus. We additionally chunk all computations to prevent the GPU memory from running out.
-[1] Anthony M. Polloreno, Reuben R. W. Wang, Nikolas A. Tezak: “A Note on Noisy Reservoir Computation”, 2023; [http://arxiv.org/abs/2302.10862 arXiv:2302.10862].
+$$where $\langle \cdot \rangle$ is an average over the noise, and $\overline{\cdot}$ is an average over the input randomness (we have been using the uniform measure). The derivation can be found in [1], and for this note we suffice to comment that the solution now depends on the covariances between the reservoir data $X$ and itself, and between the target functions $Y$ and the reservoir data. We compute this first, wrapped in a function so that we can parallelize over gpus. We additionally chunk all computations to prevent the GPU memory from running out.
+def gpu_job(device, res_size, noise, ridge):
-
+
# Utilities.
def compute_wout():
chunk_size = 1
@@ -7645,7 +7643,7 @@ Part 2: Simulationsridge_mat = torch.eye(a.shape[1]).to(device)
Wout = torch.linalg.lstsq(a + ridge*ridge_mat, b, rcond=None)[0]
return Wout
-
+
def compute_powerset(data):
to_reduce = [data[:, :, idx] for idx in generate_powerset(range(data.shape[2]))[1:]]
ones = torch.ones(to_reduce[0][:, :, 0].shape)
@@ -7654,13 +7652,13 @@ Part 2: Simulationsrtn.append(reduce(lambda a, b: a*b, [r[:, :, i] for i in range(r.shape[2])], ones))
rtn.append(reduce(lambda a, b: ((a*b).T/(torch.max(np.abs(a*b), dim=1)[0])).T, [r[:, :, i] for i in range(r.shape[2])], ones))
return rtn
-
+
def set_seed(seed):
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
-
+
def generate_powerset(s):
power_set = []
# Generate all subsets
@@ -7668,7 +7666,7 @@ Part 2: Simulationsfor subset in combinations(s, subset_size):
power_set.append(list(subset))
return power_set
-
+
set_seed(0)
inSize, outSize, resSize, alpha, sparsity = 1, 1, res_size, 0.7, 0.8
data, Y = fetchData(train_cycles + test_cycles + 100)
@@ -7693,8 +7691,8 @@ Part 2: Simulationsdm = torch.stack(rtn, dim=2)
dms.append(dm)
dms = torch.stack(dms)
-
-
+
+
test = res(data_test, Ws, Wins, resSize, device)
rtn = compute_powerset(test)
test = torch.stack(rtn, dim=2)
@@ -7708,7 +7706,7 @@ Part 2: Simulationsexpand_test = torch.tensor(Y_test[50:])[:, None].expand(-1, dm.shape[0]).T.to(device)
NRMSEs_lstsq = torch.sqrt(torch.mean((expand_test-Yhat)**2, axis=1)/torch.var(expand_test, axis=1))
rtn = []
-
+
# remove outliers
for n in NRMSEs_lstsq:
if n <=2:
@@ -7845,7 +7843,7 @@ Speeding up simulationplt.title("Test Performance")
plt.yscale('log')
plt.ylim(.2,1)
-
+
# Plot for second half of data
plt.subplot(2, 1, 2)
plt.plot(Y_test[index:], color='red', linewidth=5, label='Target Value')
@@ -7887,7 +7885,7 @@ Speeding up simulationdef reservoir(data, Win, Wres, inSize, resSize, alpha, batch_number, power_set):
# Apple "metal performance shaders".
device = torch.device("mps")
-
+
# Convert Wres, Win, and data to tensor and assign device
Wres = torch.tensor(Wres, device=device, dtype=torch.float32)
Win = torch.tensor(Win, device=device, dtype=torch.float32)
@@ -7897,14 +7895,14 @@ Speeding up simulation# We'll trim some of the data for simplicity, we can fix this later.
data = data[:batch_number*batch_size]
new_data = data.view(batch_number, batch_size)
-
+
# Replicate Wres, Win for batch_number times
Wres_copies = Wres.repeat(batch_number, 1, 1)
Win_copies = Win.repeat(batch_number, 1, 1)
-
+
R_copies = 0.1 * (torch.ones((batch_number, resSize), device=device) - 0.5)
dm = torch.zeros((batch_number, batch_size - 50, 1 + inSize + resSize), device=device)
-
+
for t in range(batch_size):
u = new_data[:, t, None]
ones = torch.hstack((torch.ones(u.shape[0], 1, device=device), u))
@@ -7917,7 +7915,7 @@ Speeding up simulationnew_data = new_data[:, 50:].flatten()
s = list(dm.T)[2:]
chosen_subsets = []
-
+
if power_set:
# Generate all subsets
ones = torch.ones(new_data.shape[0], dtype=torch.float32)
@@ -7928,7 +7926,7 @@ Speeding up simulationpower_signals = [torch.tensor(pw, dtype=torch.float32)
for i, pw in enumerate(power_signals)]
power_signals = [el.cpu() for el in list(dm.T)] + power_signals
-
+
return torch.vstack(power_signals).T, new_data
And we see rough agreement! This is a simple model, but it highlights how we might expect the exponentiality to arise in the zero noise limit, and polynomial scaling in the noisy setting. In fact, while this model is simple enough that the exponential reduction in performance can be analytically predicted, understanding the general behavior in the presence of more general noise is more complicated, and is the subject of the theory developed in [3].
-[3] Anthony M. Polloreno: “Limits to Reservoir Learning”, 2023; [http://arxiv.org/abs/2307.14474 arXiv:2307.14474].
+