-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Initial `DDIM` implementation * cleanup and improvements * format * downgrade python * update ci * update * bring back cache
- Loading branch information
Showing
26 changed files
with
2,269 additions
and
248 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,9 @@ | ||
# fanan | ||
logs/ | ||
models/ | ||
output/ | ||
data/ | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
fanan: | ||
seed: 37 | ||
log_interval: 10 | ||
|
||
|
||
mesh: | ||
n_data_parallel: 1 | ||
n_fsdp_parallel: 1 | ||
n_sequence_parallel: 1 | ||
n_tensors_parallel: 1 | ||
|
||
|
||
data: | ||
dataset_name: "oxford_flowers102" | ||
batch_size: 16 | ||
cache: False | ||
image_size: 64 | ||
num_channels: 3 | ||
|
||
|
||
arch: | ||
architecture_name: "ddim" | ||
diffusion: | ||
diffusion_steps: 80 | ||
|
||
|
||
training: | ||
total_steps: 10_000 | ||
eval_every_steps: 100 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
__all__ = ["Config"] | ||
__all__ = ["Config", "_mesh_cfg"] | ||
|
||
from fanan.config.base import Config | ||
from fanan.config.base import Config, _mesh_cfg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import logging | ||
|
||
import jax | ||
import numpy as np | ||
import tensorflow as tf | ||
from jax.experimental import mesh_utils | ||
from tqdm import tqdm | ||
|
||
from fanan.config import Config | ||
from fanan.modeling.architectures import get_architecture | ||
|
||
|
||
class Cortex: | ||
"""The Cortex class represents the core component of the neural network | ||
model. It is responsible for initializing the model, training the model, | ||
and storing the model state. | ||
Args: | ||
config (Config): The configuration object containing the model settings. | ||
Attributes: | ||
config (Config): The configuration object containing the model settings. | ||
devices (list): The list of devices used for computation. | ||
mesh (Mesh): The mesh object representing the distributed computation mesh. | ||
architecture (Architecture): The architecture object representing the neural network architecture. | ||
state (TrainState): The train state object representing the current state of the model. | ||
Methods: | ||
__init__(self, config: Config) -> None: Initializes the Cortex object. | ||
initialize_train_state(self) -> None: Initializes the train state of the model. | ||
train(self, dataset) -> None: Trains the model using the given dataset. | ||
""" | ||
|
||
def __init__(self, config: Config) -> None: | ||
self.config = config | ||
self.devices = mesh_utils.create_device_mesh( | ||
devices=jax.devices(), | ||
mesh_shape=( | ||
self.config.mesh.n_data_parallel, | ||
self.config.mesh.n_fsdp_parallel, | ||
self.config.mesh.n_sequence_parallel, | ||
self.config.mesh.n_tensors_parallel, | ||
), | ||
contiguous_submeshes=True, | ||
) | ||
logging.info(f"{self.devices=}") | ||
|
||
self.mesh = jax.sharding.Mesh( | ||
devices=self.devices, | ||
axis_names=self.config.mesh.mesh_axis_names, | ||
) | ||
logging.info(f"{self.mesh=}") | ||
|
||
self.architecture = get_architecture(self.config) | ||
self._writer = tf.summary.create_file_writer("./logs") | ||
|
||
def train(self, train_dataloader_iter, val_dataloader_iter) -> None: | ||
"""Trains the model using the given dataset. | ||
This method trains the model using the given dataset by iterating over the dataset | ||
and performing training steps for each batch. | ||
Args: | ||
dataset: The dataset used for training. | ||
Returns: | ||
None | ||
""" | ||
|
||
# main loop | ||
losses = [] | ||
pbar = tqdm(range(self.config.training.total_steps)) | ||
for step in pbar: | ||
batch = next(train_dataloader_iter) | ||
loss = self.architecture.train_step(batch=batch) | ||
losses.append(loss) | ||
|
||
if step % self.config.training.eval_every_steps == 0: | ||
batch = next(val_dataloader_iter) | ||
generated_images = self.architecture.eval_step(batch=batch) | ||
with self._writer.as_default(): | ||
tf.summary.image("generated", generated_images, step=step, max_outputs=8) | ||
|
||
avg_loss = np.mean(losses) | ||
pbar.set_postfix( | ||
{ | ||
"step_loss": f"{loss:.5f}", | ||
"avg_loss": f"{avg_loss:.5f}", | ||
} | ||
) | ||
|
||
with self._writer.as_default(): | ||
tf.summary.scalar("loss", avg_loss, step=step) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import logging | ||
from typing import Any | ||
|
||
import jax | ||
import tensorflow as tf | ||
import tensorflow_datasets as tfds | ||
|
||
from fanan.config.base import Config | ||
|
||
|
||
def normalize_to_neg_one_to_one(img): | ||
return img * 2 - 1 | ||
|
||
|
||
def crop_and_resize(image: tf.Tensor, resolution: int = 64) -> tf.Tensor: | ||
height, width = tf.shape(image)[0], tf.shape(image)[1] | ||
crop_size = tf.minimum(height, width) | ||
# image = image[ | ||
# (height - crop) // 2 : (height + crop) // 2, | ||
# (width - crop) // 2 : (width + crop) // 2, | ||
# ] | ||
image = tf.image.crop_to_bounding_box( | ||
image=image, | ||
offset_height=(height - crop_size) // 2, | ||
offset_width=(width - crop_size) // 2, | ||
target_height=crop_size, | ||
target_width=crop_size, | ||
) | ||
image = tf.image.resize( | ||
image, | ||
size=(resolution, resolution), | ||
antialias=True, | ||
method=tf.image.ResizeMethod.BICUBIC, | ||
) | ||
return tf.cast(image, tf.uint8) | ||
|
||
|
||
def get_dataset_iterator(config: Config, split: str = "train") -> Any: | ||
if config.data.batch_size % jax.device_count() > 0: | ||
raise ValueError( | ||
f"batch size {config.data.batch_size} must be divisible by the number of devices {jax.device_count()}" | ||
) | ||
|
||
batch_size = config.data.batch_size // jax.process_count() | ||
|
||
platform = jax.local_devices()[0].platform | ||
input_dtype = (tf.bfloat16 if platform == "tpu" else tf.float16) if config.training.half_precision else tf.float32 | ||
|
||
dataset_builder = tfds.builder(config.data.dataset_name) | ||
dataset_builder.download_and_prepare() | ||
|
||
def preprocess_fn(d: dict) -> dict[str, Any]: | ||
image = d.get("image") | ||
image = crop_and_resize(image=image, resolution=config.data.image_size) | ||
# image = tf.image.flip_left_right(image) | ||
image = tf.image.convert_image_dtype(image, input_dtype) | ||
# return {"image": image} | ||
return image | ||
|
||
# create split for current process | ||
num_examples = dataset_builder.info.splits[split].num_examples | ||
logging.info(f"Total {split=} examples: {num_examples=}") | ||
split_size = num_examples // jax.process_count() | ||
logging.info(f"Split size: {split_size=}") | ||
start = jax.process_index() * split_size | ||
split = f"{split}[{start}:{start + split_size}]" | ||
|
||
ds = dataset_builder.as_dataset(split=split) | ||
options = tf.data.Options() | ||
options.threading.private_threadpool_size = 48 | ||
ds.with_options(options) | ||
|
||
ds = ds.map(preprocess_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) | ||
if config.data.cache: | ||
ds = ds.cache() | ||
|
||
ds = ds.repeat() | ||
ds = ds.shuffle(16 * batch_size, seed=config.fanan.seed) | ||
ds = ds.batch(batch_size, drop_remainder=True) | ||
ds = ds.prefetch(tf.data.experimental.AUTOTUNE) | ||
|
||
return iter(tfds.as_numpy(ds)) | ||
|
||
|
||
def get_dataset(config: Config) -> Any: | ||
train_ds = get_dataset_iterator(config, split="train") | ||
val_ds = get_dataset_iterator(config, split="test") | ||
return train_ds, val_ds |
Oops, something went wrong.