ml-explore
diff --git a/‎cifar/dataset.py
+4-5 b/‎cifar/dataset.py
+4-5
diff --git a/‎cifar/main.py
+12-5 b/‎cifar/main.py
+12-5
diff --git a/‎cifar/requirements.txt
+2-2 b/‎cifar/requirements.txt
+2-2
diff --git a/‎cvae/dataset.py
+1 b/‎cvae/dataset.py
+1
diff --git a/‎cvae/main.py
+52-52 b/‎cvae/main.py
+52-52
diff --git a/‎cvae/requirements.txt
+1-1 b/‎cvae/requirements.txt
+1-1
diff --git a/‎cvae/model.py ‎cvae/vae.py b/‎cvae/model.py ‎cvae/vae.py
diff --git a/‎gcn/main.py
+18-7 b/‎gcn/main.py
+18-7
diff --git a/‎llms/mlx_lm/requirements.txt
+1-1 b/‎llms/mlx_lm/requirements.txt
+1-1
diff --git a/‎mnist/main.py
+15-9 b/‎mnist/main.py
+15-9
diff --git a/‎mnist/requirements.txt
+2-2 b/‎mnist/requirements.txt
+2-2
diff --git a/‎normalizing_flow/main.py
+15-8 b/‎normalizing_flow/main.py
+15-8
@@ -1,14 +1,12 @@
-import math
-
-import mlx.core as mx
+import numpy as np
 from mlx.data.datasets import load_cifar10
 
 
 def get_cifar10(batch_size, root=None):
     tr = load_cifar10(root=root)
 
-    mean = mx.array([0.485, 0.456, 0.406]).reshape((1, 1, 3))
-    std = mx.array([0.229, 0.224, 0.225]).reshape((1, 1, 3))
+    mean = np.array([0.485, 0.456, 0.406]).reshape((1, 1, 3))
+    std = np.array([0.229, 0.224, 0.225]).reshape((1, 1, 3))
 
     def normalize(x):
         x = x.astype("float32") / 255.0
@@ -23,6 +21,7 @@ def normalize(x):
         .image_random_crop("image", 32, 32)
         .key_transform("image", normalize)
         .batch(batch_size)
+        .prefetch(4, 4)
     )
 
     test = load_cifar10(root=root, train=False)
 
@@ -1,5 +1,6 @@
 import argparse
 import time
+from functools import partial
 
 import mlx.core as mx
 import mlx.nn as nn
@@ -33,19 +34,25 @@ def train_step(model, inp, tgt):
         acc = mx.mean(mx.argmax(output, axis=1) == tgt)
         return loss, acc
 
-    train_step_fn = nn.value_and_grad(model, train_step)
-
     losses = []
     accs = []
     samples_per_sec = []
 
+    state = [model.state, optimizer.state]
+
+    @partial(mx.compile, inputs=state, outputs=state)
+    def step(inp, tgt):
+        train_step_fn = nn.value_and_grad(model, train_step)
+        (loss, acc), grads = train_step_fn(model, inp, tgt)
+        optimizer.update(model, grads)
+        return loss, acc
+
     for batch_counter, batch in enumerate(train_iter):
         x = mx.array(batch["image"])
         y = mx.array(batch["label"])
         tic = time.perf_counter()
-        (loss, acc), grads = train_step_fn(model, x, y)
-        optimizer.update(model, grads)
-        mx.eval(model.parameters(), optimizer.state)
+        loss, acc = step(x, y)
+        mx.eval(state)
         toc = time.perf_counter()
         loss = loss.item()
         acc = acc.item()
 
@@ -1,3 +1,3 @@
-mlx>=0.0.9
+mlx>=0.2
 mlx-data
-numpy
+numpy
@@ -23,6 +23,7 @@ def normalize(x):
         .image_resize("image", h=img_size[0], w=img_size[1])
         .key_transform("image", normalize)
         .batch(batch_size)
+        .prefetch(4, 4)
     )
 
     # iterator over test set
 
@@ -2,14 +2,15 @@
 
 import argparse
 import time
+from functools import partial
 from pathlib import Path
 
 import dataset
 import mlx.core as mx
 import mlx.nn as nn
 import mlx.optimizers as optim
-import model
 import numpy as np
+import vae
 from mlx.utils import tree_flatten
 from PIL import Image
 
@@ -53,44 +54,6 @@ def loss_fn(model, X):
     return recon_loss + kl_div
 
 
-def train_epoch(model, data, optimizer, epoch):
-    loss_acc = 0.0
-    throughput_acc = 0.0
-    loss_and_grad_fn = nn.value_and_grad(model, loss_fn)
-
-    # Iterate over training batches
-    for batch_count, batch in enumerate(data):
-        X = mx.array(batch["image"])
-
-        throughput_tic = time.perf_counter()
-
-        # Forward pass + backward pass + update
-        loss, grads = loss_and_grad_fn(model, X)
-        optimizer.update(model, grads)
-
-        # Evaluate updated model parameters
-        mx.eval(model.parameters(), optimizer.state)
-
-        throughput_toc = time.perf_counter()
-        throughput_acc += X.shape[0] / (throughput_toc - throughput_tic)
-        loss_acc += loss.item()
-
-        if batch_count > 0 and (batch_count % 10 == 0):
-            print(
-                " | ".join(
-                    [
-                        f"Epoch {epoch:4d}",
-                        f"Loss {(loss_acc / batch_count):10.2f}",
-                        f"Throughput {(throughput_acc / batch_count):8.2f} im/s",
-                        f"Batch {batch_count:5d}",
-                    ]
-                ),
-                end="\r",
-            )
-
-    return loss_acc, throughput_acc, batch_count
-
-
 def reconstruct(model, batch, out_file):
     # Reconstruct a single batch only
     images = mx.array(batch["image"])
@@ -127,10 +90,10 @@ def main(args):
     save_dir.mkdir(parents=True, exist_ok=True)
 
     # Load the model
-    vae = model.CVAE(args.latent_dims, img_size, args.max_filters)
-    mx.eval(vae.parameters())
+    model = vae.CVAE(args.latent_dims, img_size, args.max_filters)
+    mx.eval(model.parameters())
 
-    num_params = sum(x.size for _, x in tree_flatten(vae.trainable_parameters()))
+    num_params = sum(x.size for _, x in tree_flatten(model.trainable_parameters()))
     print("Number of trainable params: {:0.04f} M".format(num_params / 1e6))
 
     optimizer = optim.AdamW(learning_rate=args.lr)
@@ -139,20 +102,54 @@ def main(args):
     train_batch = next(train_iter)
     test_batch = next(test_iter)
 
+    state = [model.state, optimizer.state]
+
+    @partial(mx.compile, inputs=state, outputs=state)
+    def step(X):
+        loss_and_grad_fn = nn.value_and_grad(model, loss_fn)
+        loss, grads = loss_and_grad_fn(model, X)
+        optimizer.update(model, grads)
+        return loss
+
     for e in range(1, args.epochs + 1):
         # Reset iterators and stats at the beginning of each epoch
         train_iter.reset()
-        vae.train()
+        model.train()
 
         # Train one epoch
         tic = time.perf_counter()
-        loss_acc, throughput_acc, batch_count = train_epoch(
-            vae, train_iter, optimizer, e
-        )
+        loss_acc = 0.0
+        throughput_acc = 0.0
+
+        # Iterate over training batches
+        for batch_count, batch in enumerate(train_iter):
+            X = mx.array(batch["image"])
+            throughput_tic = time.perf_counter()
+
+            # Forward pass + backward pass + update
+            loss = step(X)
+
+            # Evaluate updated model parameters
+            mx.eval(state)
+
+            throughput_toc = time.perf_counter()
+            throughput_acc += X.shape[0] / (throughput_toc - throughput_tic)
+            loss_acc += loss.item()
+
+            if batch_count > 0 and (batch_count % 10 == 0):
+                print(
+                    " | ".join(
+                        [
+                            f"Epoch {e:4d}",
+                            f"Loss {(loss_acc / batch_count):10.2f}",
+                            f"Throughput {(throughput_acc / batch_count):8.2f} im/s",
+                            f"Batch {batch_count:5d}",
+                        ]
+                    ),
+                    end="\r",
+                )
         toc = time.perf_counter()
 
-        vae.eval()
-
         print(
             " | ".join(
                 [
@@ -163,14 +160,17 @@ def main(args):
                 ]
             )
         )
+
+        model.eval()
+
         # Reconstruct a batch of training and test images
-        reconstruct(vae, train_batch, save_dir / f"train_{e:03d}.png")
-        reconstruct(vae, test_batch, save_dir / f"test_{e:03d}.png")
+        reconstruct(model, train_batch, save_dir / f"train_{e:03d}.png")
+        reconstruct(model, test_batch, save_dir / f"test_{e:03d}.png")
 
         # Generate images
-        generate(vae, save_dir / f"generated_{e:03d}.png")
+        generate(model, save_dir / f"generated_{e:03d}.png")
 
-        vae.save_weights(str(save_dir / "weights.npz"))
+        model.save_weights(str(save_dir / "weights.npz"))
 
 
 if __name__ == "__main__":
 
@@ -1,4 +1,4 @@
-mlx>=0.0.9
+mlx>=0.2
 mlx-data
 numpy
 Pillow
@@ -1,4 +1,6 @@
+import time
 from argparse import ArgumentParser
+from functools import partial
 
 import mlx.core as mx
 import mlx.nn as nn
@@ -47,23 +49,31 @@ def main(args):
     mx.eval(gcn.parameters())
 
     optimizer = optim.Adam(learning_rate=args.lr)
-    loss_and_grad_fn = nn.value_and_grad(gcn, forward_fn)
 
-    best_val_loss = float("inf")
-    cnt = 0
+    state = [gcn.state, optimizer.state, mx.random.state]
 
-    # Training loop
-    for epoch in range(args.epochs):
-        # Loss
+    @partial(mx.compile, inputs=state, outputs=state)
+    def step():
+        loss_and_grad_fn = nn.value_and_grad(gcn, forward_fn)
         (loss, y_hat), grads = loss_and_grad_fn(
             gcn, x, adj, y, train_mask, args.weight_decay
         )
         optimizer.update(gcn, grads)
-        mx.eval(gcn.parameters(), optimizer.state)
+        return loss, y_hat
+
+    best_val_loss = float("inf")
+    cnt = 0
+
+    # Training loop
+    for epoch in range(args.epochs):
+        tic = time.time()
+        loss, y_hat = step()
+        mx.eval(state)
 
         # Validation
         val_loss = loss_fn(y_hat[val_mask], y[val_mask])
         val_acc = eval_fn(y_hat[val_mask], y[val_mask])
+        toc = time.time()
 
         # Early stopping
         if val_loss < best_val_loss:
@@ -81,6 +91,7 @@ def main(args):
                     f"Train loss: {loss.item():.3f}",
                     f"Val loss: {val_loss.item():.3f}",
                     f"Val acc: {val_acc.item():.2f}",
+                    f"Time: {1e3*(toc - tic):.3f} (ms)",
                 ]
             )
         )
 
@@ -1,4 +1,4 @@
-mlx
+mlx>=0.1
 numpy
 transformers>=4.37.0
 protobuf
@@ -2,6 +2,7 @@
 
 import argparse
 import time
+from functools import partial
 
 import mlx.core as mx
 import mlx.nn as nn
@@ -34,10 +35,6 @@ def loss_fn(model, X, y):
     return nn.losses.cross_entropy(model(X), y, reduction="mean")
 
 
-def eval_fn(model, X, y):
-    return mx.mean(mx.argmax(model(X), axis=1) == y)
-
-
 def batch_iterate(batch_size, X, y):
     perm = mx.array(np.random.permutation(y.size))
     for s in range(0, y.size, batch_size):
@@ -65,16 +62,25 @@ def main(args):
     model = MLP(num_layers, train_images.shape[-1], hidden_dim, num_classes)
     mx.eval(model.parameters())
 
-    loss_and_grad_fn = nn.value_and_grad(model, loss_fn)
     optimizer = optim.SGD(learning_rate=learning_rate)
+    loss_and_grad_fn = nn.value_and_grad(model, loss_fn)
+
+    @partial(mx.compile, inputs=model.state, outputs=model.state)
+    def step(X, y):
+        loss, grads = loss_and_grad_fn(model, X, y)
+        optimizer.update(model, grads)
+        return loss
+
+    @partial(mx.compile, inputs=model.state)
+    def eval_fn(X, y):
+        return mx.mean(mx.argmax(model(X), axis=1) == y)
 
     for e in range(num_epochs):
         tic = time.perf_counter()
         for X, y in batch_iterate(batch_size, train_images, train_labels):
-            loss, grads = loss_and_grad_fn(model, X, y)
-            optimizer.update(model, grads)
-            mx.eval(model.parameters(), optimizer.state)
-        accuracy = eval_fn(model, test_images, test_labels)
+            step(X, y)
+            mx.eval(model.state)
+        accuracy = eval_fn(test_images, test_labels)
         toc = time.perf_counter()
         print(
             f"Epoch {e}: Test accuracy {accuracy.item():.3f},"
 
@@ -1,2 +1,2 @@
-mlx
-numpy
+mlx>=0.2
+numpy
@@ -1,5 +1,7 @@
 # Copyright © 2023-2024 Apple Inc.
 
+from functools import partial
+
 import matplotlib.pyplot as plt
 import mlx.core as mx
 import mlx.nn as nn
@@ -27,18 +29,23 @@ def main(args):
     def loss_fn(model, x):
         return -mx.mean(model(x))
 
-    loss_and_grad_fn = nn.value_and_grad(model, loss_fn)
     optimizer = optim.Adam(learning_rate=args.learning_rate)
 
-    with trange(args.n_steps) as steps:
-        for step in steps:
-            idx = np.random.choice(x.shape[0], replace=False, size=args.n_batch)
-            loss, grads = loss_and_grad_fn(model, mx.array(x[idx]))
+    state = [model.state, optimizer.state]
 
-            optimizer.update(model, grads)
-            mx.eval(model.parameters())
+    @partial(mx.compile, inputs=state, outputs=state)
+    def step(x):
+        loss_and_grad_fn = nn.value_and_grad(model, loss_fn)
+        loss, grads = loss_and_grad_fn(model, x)
+        optimizer.update(model, grads)
+        return loss
 
-            steps.set_postfix(val=loss)
+    with trange(args.n_steps) as steps:
+        for it in steps:
+            idx = np.random.choice(x.shape[0], replace=False, size=args.n_batch)
+            loss = step(mx.array(x[idx]))
+            mx.eval(state)
+            steps.set_postfix(val=loss.item())
 
     # Plot samples from trained flow
Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@ def normalize(x):`
`23`	`23`	`.image_resize("image", h=img_size[0], w=img_size[1])`
`24`	`24`	`.key_transform("image", normalize)`
`25`	`25`	`.batch(batch_size)`
	`26`	`+ .prefetch(4, 4)`
`26`	`27`	`)`
`27`	`28`
`28`	`29`	`# iterator over test set`
-Original file line number
+Diff line change
 -mlx
 -numpy
 +mlx>=0.2
 +numpy