diff --git a/.gitignore b/.gitignore new file mode 100755 index 00000000..c18dd8d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/Dockerfile b/Dockerfile new file mode 100755 index 00000000..d29740ee --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +FROM tensorflow/tensorflow:1.14.0-gpu-py3 + +RUN pip install scipy==1.3.3 +RUN pip install requests==2.22.0 +RUN pip install Pillow==6.2.1 +RUN pip install h5py==2.9.0 +RUN pip install imageio==2.9.0 +RUN pip install imageio-ffmpeg==0.4.2 +RUN pip install tqdm==4.49.0 diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100755 index 00000000..13127034 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,97 @@ +Copyright (c) 2020, NVIDIA Corporation. All rights reserved. + + +NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator Augmentation (ADA) + + +======================================================================= + +1. Definitions + +"Licensor" means any person or entity that distributes its Work. + +"Software" means the original work of authorship made available under +this License. + +"Work" means the Software and any additions to or derivative works of +the Software that are made available under this License. + +The terms "reproduce," "reproduction," "derivative works," and +"distribution" have the meaning as provided under U.S. copyright law; +provided, however, that for the purposes of this License, derivative +works shall not include works that remain separable from, or merely +link (or bind by name) to the interfaces of, the Work. + +Works, including the Software, are "made available" under this License +by including in or with the Work either (a) a copyright notice +referencing the applicability of this License to the Work, or (b) a +copy of this License. + +2. License Grants + + 2.1 Copyright Grant. Subject to the terms and conditions of this + License, each Licensor grants to you a perpetual, worldwide, + non-exclusive, royalty-free, copyright license to reproduce, + prepare derivative works of, publicly display, publicly perform, + sublicense and distribute its Work and any resulting derivative + works in any form. + +3. Limitations + + 3.1 Redistribution. You may reproduce or distribute the Work only + if (a) you do so under this License, (b) you include a complete + copy of this License with your distribution, and (c) you retain + without modification any copyright, patent, trademark, or + attribution notices that are present in the Work. + + 3.2 Derivative Works. You may specify that additional or different + terms apply to the use, reproduction, and distribution of your + derivative works of the Work ("Your Terms") only if (a) Your Terms + provide that the use limitation in Section 3.3 applies to your + derivative works, and (b) you identify the specific derivative + works that are subject to Your Terms. Notwithstanding Your Terms, + this License (including the redistribution requirements in Section + 3.1) will continue to apply to the Work itself. + + 3.3 Use Limitation. The Work and any derivative works thereof only + may be used or intended for use non-commercially. Notwithstanding + the foregoing, NVIDIA and its affiliates may use the Work and any + derivative works commercially. As used herein, "non-commercially" + means for research or evaluation purposes only. + + 3.4 Patent Claims. If you bring or threaten to bring a patent claim + against any Licensor (including any claim, cross-claim or + counterclaim in a lawsuit) to enforce any patents that you allege + are infringed by any Work, then your rights under this License from + such Licensor (including the grant in Section 2.1) will terminate + immediately. + + 3.5 Trademarks. This License does not grant any rights to use any + Licensor’s or its affiliates’ names, logos, or trademarks, except + as necessary to reproduce the notices described in this License. + + 3.6 Termination. If you violate any term of this License, then your + rights under this License (including the grant in Section 2.1) will + terminate immediately. + +4. Disclaimer of Warranty. + +THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +THIS LICENSE. + +5. Limitation of Liability. + +EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES. + +======================================================================= diff --git a/README.md b/README.md new file mode 100755 index 00000000..b0e8ad61 --- /dev/null +++ b/README.md @@ -0,0 +1,376 @@ +## StyleGAN2 with adaptive discriminator augmentation (ADA)
— Official TensorFlow implementation + +![Teaser image](./docs/stylegan2-ada-teaser-1024x252.png) + +**Training Generative Adversarial Networks with Limited Data**
+Tero Karras, Miika Aittala, Janne Hellsten, Samuli Laine, Jaakko Lehtinen, Timo Aila
+https://arxiv.org/abs/2006.06676
+ +Abstract: *Training generative adversarial networks (GAN) using too little data typically leads to discriminator overfitting, causing training to diverge. We propose an adaptive discriminator augmentation mechanism that significantly stabilizes training in limited data regimes. The approach does not require changes to loss functions or network architectures, and is applicable both when training from scratch and when fine-tuning an existing GAN on another dataset. We demonstrate, on several datasets, that good results are now possible using only a few thousand training images, often matching StyleGAN2 results with an order of magnitude fewer images. We expect this to open up new application domains for GANs. We also find that the widely used CIFAR-10 is, in fact, a limited data benchmark, and improve the record FID from 5.59 to 2.42.* + +For business inquiries, please contact [researchinquiries@nvidia.com](mailto:researchinquiries@nvidia.com)
+For press and other inquiries, please contact Hector Marinez at [hmarinez@nvidia.com](mailto:hmarinez@nvidia.com)
+ +## What's new + +This repository supersedes the original [StyleGAN2](https://github.com/NVlabs/stylegan2) with the following new features: + +* **ADA**: Significantly better results for datasets with less than ~30k training images. State-of-the-art results for CIFAR-10. +* **Mixed-precision support**: ~1.6x faster training, ~1.3x faster inference, ~1.5x lower GPU memory consumption. +* **Automatic hyperparameter selection**: Reasonable out-of-the-box results for any dataset resolution and GPU count. +* **Clean codebase**: Extensive refactoring and simplification. The code should be generally easier to work with. +* **Command line tools**: Easily reproduce training runs from the paper, generate projection videos for arbitrary images, etc. +* **Network import**: Full support for network pickles produced by [StyleGAN](https://github.com/NVlabs/stylegan) and [StyleGAN2](https://github.com/NVlabs/stylegan2). Faster loading times. +* **Augmentation pipeline**: Self-contained, reusable GPU implementation of extensive high-quality image augmentations. +* **Bugfixes** + +## External data repository + +| Path | Description +| :--- | :---------- +| [stylegan2-ada](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/) | Main directory hosted on Amazon S3 +|   ├  [ada-paper.pdf](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/ada-paper.pdf) | Paper PDF +|   ├  [images](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/images/) | Curated example images produced using the pre-trained models +|   ├  [videos](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/videos/) | Curated example interpolation videos +|   └  [pretrained](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/) | Pre-trained models +|     ├  metfaces.pkl | MetFaces at 1024x1024, transfer learning from FFHQ using ADA +|     ├  brecahad.pkl | BreCaHAD at 512x512, trained from scratch using ADA +|     ├  afhqcat.pkl | AFHQ Cat at 512x512, trained from scratch using ADA +|     ├  afhqdog.pkl | AFHQ Dog at 512x512, trained from scratch using ADA +|     ├  afhqwild.pkl | AFHQ Wild at 512x512, trained from scratch using ADA +|     ├  cifar10.pkl | Class-conditional CIFAR-10 at 32x32 +|     ├  ffhq.pkl | FFHQ at 1024x1024, trained using original StyleGAN2 +|     ├  [paper-fig7c-training-set-sweeps](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig7c-training-set-sweeps/) | All models used in Fig.7c (baseline, ADA, bCR) +|     ├  [paper-fig8a-comparison-methods](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig8a-comparison-methods/) | All models used in Fig.8a (comparison methods) +|     ├  [paper-fig8b-discriminator-capacity](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig8b-discriminator-capacity/) | All models used in Fig.8b (discriminator capacity) +|     ├  [paper-fig11a-small-datasets](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig11a-small-datasets/) | All models used in Fig.11a (small datasets, transfer learning) +|     ├  [paper-fig11b-cifar10](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig11b-cifar10/) | All models used in Fig.11b (CIFAR-10) +|     ├  [transfer-learning-source-nets](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/) | Models used as starting point for transfer learning +|     └  [metrics](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/) | Feature detectors used by the quality metrics + +## Requirements + +* Linux and Windows are supported, but we recommend Linux for performance and compatibility reasons. +* 64-bit Python 3.6 or 3.7. We recommend Anaconda3 with numpy 1.14.3 or newer. +* We recommend TensorFlow 1.14, which we used for all experiments in the paper, but TensorFlow 1.15 is also supported on Linux. TensorFlow 2.x is not supported. +* On Windows you need to use TensorFlow 1.14, as the standard 1.15 installation does not include necessary C++ headers. +* 1–8 high-end NVIDIA GPUs with at least 12 GB of GPU memory, NVIDIA drivers, CUDA 10.0 toolkit and cuDNN 7.5. +* Docker users: use the [provided Dockerfile](./Dockerfile) to build an image with the required library dependencies. + +The generator and discriminator networks rely heavily on custom TensorFlow ops that are compiled on the fly using NVCC. On Windows, the compilation requires Microsoft Visual Studio to be in `PATH`. We recommend installing [Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/) and adding it into `PATH` using `"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"`. + +## Getting started + +Pre-trained networks are stored as `*.pkl` files that can be referenced using local filenames or URLs: + +```.bash +# Generate curated MetFaces images without truncation (Fig.10 left) +python generate.py --outdir=out --trunc=1 --seeds=85,265,297,849 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl + +# Generate uncurated MetFaces images with truncation (Fig.12 upper left) +python generate.py --outdir=out --trunc=0.7 --seeds=600-605 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl + +# Generate class conditional CIFAR-10 images (Fig.17 left, Car) +python generate.py --outdir=out --trunc=1 --seeds=0-35 --class=1 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/cifar10.pkl +``` + +Outputs from the above commands are placed under `out/*.png`. You can change the location with `--outdir`. Temporary cache files, such as CUDA build results and downloaded network pickles, will be saved under `$HOME/.cache/dnnlib`. This can be overridden using the `DNNLIB_CACHE_DIR` environment variable. + +**Docker**: You can run the above curated image example using Docker as follows: + +```.bash +docker build --tag stylegan2ada:latest . +docker run --gpus all -it --rm -v `pwd`:/scratch --user $(id -u):$(id -g) stylegan2ada:latest bash -c \ + "(cd /scratch && DNNLIB_CACHE_DIR=/scratch/.cache python3 generate.py --trunc=1 --seeds=85,265,297,849 \ + --outdir=out --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl)" +``` + +## Projecting images to latent space + +To find the matching latent vector for a given image file, run: + +```.bash +python projector.py --outdir=out --target=targetimg.png \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +``` + +For optimal results, the target image should be cropped and aligned similar to the original FFHQ dataset. The above command saves the projection target `out/target.png`, result `out/proj.png`, latent vector `out/dlatents.npz`, and progression video `out/proj.mp4`. You can render the resulting latent vector by specifying `--dlatents` for `python generate.py`: + +```.bash +python generate.py --outdir=out --dlatents=out/dlatents.npz \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +``` + +## Preparing datasets + +Datasets are stored as multi-resolution TFRecords, i.e., the same format used by [StyleGAN](https://github.com/NVlabs/stylegan) and [StyleGAN2](https://github.com/NVlabs/stylegan2). Each dataset consists of multiple `*.tfrecords` files stored under a common directory, e.g., `~/datasets/ffhq/ffhq-r*.tfrecords` + +**MetFaces**: Download the [MetFaces dataset](https://github.com/NVlabs/metfaces-dataset) and convert to TFRecords: + +```.bash +python dataset_tool.py create_from_images ~/datasets/metfaces ~/downloads/metfaces/images +python dataset_tool.py display ~/datasets/metfaces +``` + +**BreCaHAD**: Download the [BreCaHAD dataset](https://figshare.com/articles/BreCaHAD_A_Dataset_for_Breast_Cancer_Histopathological_Annotation_and_Diagnosis/7379186). Generate 512x512 resolution crops and convert to TFRecords: + +```.bash +python dataset_tool.py extract_brecahad_crops --cropsize=512 \ + --output_dir=/tmp/brecahad-crops --brecahad_dir=~/downloads/brecahad/images + +python dataset_tool.py create_from_images ~/datasets/brecahad /tmp/brecahad-crops +python dataset_tool.py display ~/datasets/brecahad +``` + +**AFHQ**: Download the [AFHQ dataset](https://github.com/clovaai/stargan-v2/blob/master/README.md#animal-faces-hq-dataset-afhq) and convert to TFRecords: + +```.bash +python dataset_tool.py create_from_images ~/datasets/afhqcat ~/downloads/afhq/train/cat +python dataset_tool.py create_from_images ~/datasets/afhqdog ~/downloads/afhq/train/dog +python dataset_tool.py create_from_images ~/datasets/afhqwild ~/downloads/afhq/train/wild +python dataset_tool.py display ~/datasets/afhqcat +``` + +**CIFAR-10**: Download the [CIFAR-10 python version](https://www.cs.toronto.edu/~kriz/cifar.html). Convert to two separate TFRecords for unconditional and class-conditional training: + +```.bash +python dataset_tool.py create_cifar10 --ignore_labels=1 \ + ~/datasets/cifar10u ~/downloads/cifar-10-batches-py + +python dataset_tool.py create_cifar10 --ignore_labels=0 \ + ~/datasets/cifar10c ~/downloads/cifar-10-batches-py + +python dataset_tool.py display ~/datasets/cifar10c +``` + +**FFHQ**: Download the [Flickr-Faces-HQ](https://github.com/NVlabs/ffhq-dataset) dataset as TFRecords: + +```.bash +pushd ~ +git clone https://github.com/NVlabs/ffhq-dataset.git +cd ffhq-dataset +python download_ffhq.py --tfrecords +popd +python dataset_tool.py display ~/ffhq-dataset/tfrecords/ffhq +``` + +**LSUN**: Download the desired LSUN categories in LMDB format from the [LSUN project page](https://www.yf.io/p/lsun) and convert to TFRecords: + +```.bash +python dataset_tool.py create_lsun --resolution=256 --max_images=200000 \ + ~/datasets/lsuncat200k ~/downloads/lsun/cat_lmdb + +python dataset_tool.py display ~/datasets/lsuncat200k +``` + +**Custom**: Custom datasets can be created by placing all images under a single directory. The images must be square-shaped and they must all have the same power-of-two dimensions. To convert the images to multi-resolution TFRecords, run: + +```.bash +python dataset_tool.py create_from_images ~/datasets/custom ~/custom-images +python dataset_tool.py display ~/datasets/custom +``` + +## Training new networks + +In its most basic form, training new networks boils down to: + +```.bash +python train.py --outdir=~/training-runs --gpus=1 --data=~/datasets/custom --dry-run +python train.py --outdir=~/training-runs --gpus=1 --data=~/datasets/custom +``` + +The first command is optional; it will validate the arguments, print out the resulting training configuration, and exit. The second command will kick off the actual training. + +In this example, the results will be saved to a newly created directory `~/training-runs/-custom-auto1` (controlled by `--outdir`). The training will export network pickles (`network-snapshot-.pkl`) and example images (`fakes.png`) at regular intervals (controlled by `--snap`). For each pickle, it will also evaluate FID by default (controlled by `--metrics`) and log the resulting scores in `metric-fid50k_full.txt`. + +The name of the output directory (e.g., `00000-custom-auto1`) reflects the hyperparameter configuration that was used. In this case, `custom` indicates the training set (`--data`) and `auto1` indicates the *base configuration* that was used to select the hyperparameters (`--cfg`): + +| Base config | Description +| :---------- | :---------- +| `auto` (default) | Automatically select reasonable defaults based on resolution and GPU count. Serves as a good starting point for new datasets, but does not necessarily lead to optimal results. +| `stylegan2` | Reproduce results for StyleGAN2 config F at 1024x1024 using 1, 2, 4, or 8 GPUs. +| `paper256` | Reproduce results for FFHQ and LSUN Cat at 256x256 using 1, 2, 4, or 8 GPUs. +| `paper512` | Reproduce results for BreCaHAD and AFHQ at 512x512 using 1, 2, 4, or 8 GPUs. +| `paper1024` | Reproduce results for MetFaces at 1024x1024 using 1, 2, 4, or 8 GPUs. +| `cifar` | Reproduce results for CIFAR-10 (tuned configuration) using 1 or 2 GPUs. +| `cifarbaseline` | Reproduce results for CIFAR-10 (baseline configuration) using 1 or 2 GPUs. + +The training configuration can be further customized with additional arguments. Common examples: + +* `--aug=noaug` disables ADA (default: enabled). +* `--mirror=1` amplifies the dataset with x-flips. Often beneficial, even with ADA. +* `--resume=ffhq1024 --snap=10` performs transfer learning from FFHQ trained at 1024x1024. +* `--resume=~/training-runs//network-snapshot-.pkl` resumes where a previous training run left off. +* `--gamma=10` overrides R1 gamma. We strongly recommend trying out at least a few different values for each new dataset. + +Augmentation fine-tuning: + +* `--aug=ada --target=0.7` adjusts ADA target value (default: 0.6). +* `--aug=adarv` selects the alternative ADA heuristic (requires a separate validation set). +* `--augpipe=blit` limits the augmentation pipeline to pixel blitting only. +* `--augpipe=bgcfnc` enables all available augmentations (blit, geom, color, filter, noise, cutout). +* `--cmethod=bcr` enables bCR with small integer translations. + +Please refer to [`python train.py --help`](./docs/train-help.txt) for the full list. + +## Expected training time + +The total training time depends heavily on the resolution, number of GPUs, desired quality, dataset, and hyperparameters. In general, the training time can be expected to scale linearly with respect to the resolution and inversely proportional with respect to the number of GPUs. Small datasets tend to reach their lowest achievable FID faster than larger ones, but the convergence is somewhat less predictable. Transfer learning tends to converge significantly faster than training from scratch. + +To give a rough idea of typical training times, the following figure shows several examples of FID as a function of wallclock time. Each curve corresponds to training a given dataset from scratch using `--cfg=auto` with a given number of NVIDIA Tesla V100 GPUs: + +![Training curves](./docs/stylegan2-ada-training-curves.png) + +Please note that `--cfg=auto` only serves as a reasonable first guess for the hyperparameters — it does not necessarily lead to optimal results for a given dataset. For example, `--cfg=stylegan2` yields considerably better FID for FFHQ-140k at 1024x1024 than illustrated above. We recommend trying out at least a few different values of `--gamma` for each new dataset. + +## Preparing training set sweeps + +In the paper, we perform several experiments using artificially limited/amplified versions of the training data, such as `ffhq30k`, `ffhq140k`, and `lsuncat30k`. These are constructed by first unpacking the original dataset into a temporary directory with `python dataset_tool.py unpack` and then repackaging the appropriate versions into TFRecords with `python dataset_tool.py pack`. In the following examples, the temporary directories are created under `/tmp` and can be safely deleted afterwards. + +```.bash +# Unpack FFHQ images at 256x256 resolution. +python dataset_tool.py unpack --resolution=256 \ + --tfrecord_dir=~/ffhq-dataset/tfrecords/ffhq --output_dir=/tmp/ffhq-unpacked + +# Create subset with 30k images. +python dataset_tool.py pack --num_train=30000 --num_validation=10000 --seed=123 \ + --tfrecord_dir=~/datasets/ffhq30k --unpacked_dir=/tmp/ffhq-unpacked + +# Create amplified version with 140k images. +python dataset_tool.py pack --num_train=70000 --num_validation=0 --mirror=1 --seed=123 \ + --tfrecord_dir=~/datasets/ffhq140k --unpacked_dir=/tmp/ffhq-unpacked + +# Unpack LSUN Cat images at 256x256 resolution. +python dataset_tool.py unpack --resolution=256 \ + --tfrecord_dir=~/datasets/lsuncat200k --output_dir=/tmp/lsuncat200k-unpacked + +# Create subset with 30k images. +python dataset_tool.py pack --num_train=30000 --num_validation=10000 --seed=123 \ + --tfrecord_dir=~/datasets/lsuncat30k --unpacked_dir=/tmp/lsuncat200k-unpacked +``` + +Please note that when training with artifically limited/amplified datasets, the quality metrics (e.g., `fid50k_full`) should still be evaluated against the corresponding original datasets. This can be done by specifying a separate metric dataset for `train.py` and `calc_metrics.py` using the `--metricdata` argument. For example: + +```.bash +python train.py [OTHER_OPTIONS] --data=~/datasets/ffhq30k --metricdata=~/ffhq-dataset/tfrecords/ffhq +``` + +## Reproducing training runs from the paper + +The pre-trained network pickles ([`stylegan2-ada/pretrained/paper-fig*`](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/)) reflect the training configuration the same way as the output directory names, making it straightforward to reproduce a given training run from the paper. For example: + +```.bash +# 1. AFHQ Dog +# paper-fig11a-small-datasets/afhqdog-mirror-paper512-ada.pkl +python train.py --outdir=~/training-runs --gpus=8 --data=~/datasets/afhqdog \ + --mirror=1 --cfg=paper512 --aug=ada + +# 2. Class-conditional CIFAR-10 +# pretrained/paper-fig11b-cifar10/cifar10c-cifar-ada-best-fid.pkl +python train.py --outdir=~/training-runs --gpus=2 --data=~/datasets/cifar10c \ + --cfg=cifar --aug=ada + +# 3. MetFaces with transfer learning from FFHQ +# paper-fig11a-small-datasets/metfaces-mirror-paper1024-ada-resumeffhq1024.pkl +python train.py --outdir=~/training-runs --gpus=8 --data=~/datasets/metfaces \ + --mirror=1 --cfg=paper1024 --aug=ada --resume=ffhq1024 --snap=10 + +# 4. 10k subset of FFHQ with ADA and bCR +# paper-fig7c-training-set-sweeps/ffhq10k-paper256-ada-bcr.pkl +python train.py --outdir=~/training-runs --gpus=8 --data=~/datasets/ffhq10k \ + --cfg=paper256 --aug=ada --cmethod=bcr --metricdata=~/ffhq-dataset/tfrecords/ffhq + +# 5. StyleGAN2 config F +# transfer-learning-source-nets/ffhq-res1024-mirror-stylegan2-noaug.pkl +python train.py --outdir=~/training-runs --gpus=8 --data=~/ffhq-dataset/tfrecords/ffhq \ + --res=1024 --mirror=1 --cfg=stylegan2 --aug=noaug --metrics=fid50k +``` + +**Notes**: +* You can use fewer GPUs than shown in the above examples. This will only increase the training time — it will not affect the quality of the results. +* Example 3 specifies `--snap=10` to export network pickles more frequently than usual. This is recommended, because transfer learning tends to yield very fast convergence. +* Example 4 specifies `--metricdata` to evaluate quality metrics against the original FFHQ dataset, not the artificially limited 10k subset used for training. +* Example 5 specifies `--metrics=fid50k` to evaluate FID the same way as in the StyleGAN2 paper (see below). + +## Quality metrics + +By default, `train.py` will automatically compute FID for each network pickle. We strongly recommend inspecting `metric-fid50k_full.txt` at regular intervals to monitor the training progress. When desired, the automatic computation can be disabled with `--metrics none` to speed up the training. + +Additional quality metrics can also be computed after the training: + +```.bash +# Previous training run: look up options automatically, save result to text file. +python calc_metrics.py --metrics=pr50k3_full \ + --network=~/training-runs/00000-ffhq10k-res64-auto1/network-snapshot-000000.pkl + +# Pretrained network pickle: specify dataset explicitly, print result to stdout. +python calc_metrics.py --metrics=fid50k_full --metricdata=~/datasets/ffhq --mirror=1 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +``` + +The first example will automatically find `training_options.json` stored alongside the network pickle and perform the same operation as if `--metrics pr50k3_full` had been specified during training. The second example will download a pre-trained network pickle, in which case the values of `--mirror` and `--metricdata` have to be specified explicitly. + +Note that many of the metrics have a significant one-off cost (up to an hour or more) when they are calculated for the first time using a given dataset. Also note that the evaluation is done using a different random seed each time, so the results will vary if the same metric is computed multiple times. + +We employ the following metrics in the ADA paper. The expected execution times correspond to using one Tesla V100 GPU at 1024x1024 and 256x256 resolution: + +| Metric | 1024x1024 | 256x256 | Description | +| :----- | :-------: | :-----: | :---------- | +| `fid50k_full` | 15 min | 5 min | Fréchet inception distance[1] against the full dataset. +| `kid50k_full` | 15 min | 5 min | Kernel inception distance[2] against the full dataset. +| `pr50k3_full` | 20 min | 10 min | Precision and recall[3] againt the full dataset. +| `is50k` | 25 min | 5 min | Inception score[4] for CIFAR-10. + +In addition, all metrics that were used in the [StyleGAN](https://github.com/NVlabs/stylegan) and [StyleGAN2](https://github.com/NVlabs/stylegan2) papers are also supported for backwards compatibility: + +| Legacy: StyleGAN2 | 1024x1024 | Description | +| :---------------- | :-------: | :---------- | +| `fid50k` | 15 min | Fréchet inception distance against 50k real images. +| `kid50k` | 15 min | Kernel inception distance against 50k real images. +| `pr50k3` | 20 min | Precision and recall against 50k real images. +| `ppl2_wend` | 40 min | Perceptual path length[5] in W at path endpoints against full image. + +| Legacy: StyleGAN | 1024x1024 | Description | +| :--------------- | :-------: | :---------- | +| `ppl_zfull` | 40 min | Perceptual path length in Z for full paths against cropped image. +| `ppl_wfull` | 40 min | Perceptual path length in W for full paths against cropped image. +| `ppl_zend` | 40 min | Perceptual path length in Z at path endpoints against cropped image. +| `ppl_wend` | 40 min | Perceptual path length in W at path endpoints against cropped image. +| `ls` | 10 hrs | Linear separability[5] with respect to CelebA attributes. + +References: +1. [GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium](https://arxiv.org/abs/1706.08500), Heusel et al. 2017 +2. [Demystifying MMD GANs](https://arxiv.org/abs/1801.01401), Bińkowski et al. 2018 +3. [Improved Precision and Recall Metric for Assessing Generative Models](https://arxiv.org/abs/1904.06991), Kynkäänniemi et al. 2019 +4. [Improved Techniques for Training GANs](https://arxiv.org/abs/1606.03498), Salimans et al. 2016 +5. [A Style-Based Generator Architecture for Generative Adversarial Networks](https://arxiv.org/abs/1812.04948), Karras et al. 2018 + +## License + +Copyright © 2020, NVIDIA Corporation. All rights reserved. + +This work is made available under the [Nvidia Source Code License](https://nvlabs.github.io/stylegan2-ada/license.html). + +## Citation + +``` +@inproceedings{Karras2020ada, + title = {Training Generative Adversarial Networks with Limited Data}, + author = {Tero Karras and Miika Aittala and Janne Hellsten and Samuli Laine and Jaakko Lehtinen and Timo Aila}, + booktitle = {Proc. NeurIPS}, + year = {2020} +} +``` + +## Development + +This is a research reference implementation and is treated as a +one-time code drop. As such, we do not accept outside code +contributions in the form of pull requests. + +## Acknowledgements + +We thank David Luebke for helpful comments; Tero Kuosmanen and Sabu Nadarajan for their support with compute infrastructure; and Edgar Schönfeld for guidance on setting up unconditional BigGAN. diff --git a/calc_metrics.py b/calc_metrics.py new file mode 100755 index 00000000..78b1e440 --- /dev/null +++ b/calc_metrics.py @@ -0,0 +1,163 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Calculate quality metrics for previous training run or pretrained network pickle.""" + +import os +import argparse +import json +import pickle +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_defaults + +#---------------------------------------------------------------------------- + +class UserError(Exception): + pass + +#---------------------------------------------------------------------------- + +def calc_metrics(network_pkl, metric_names, metricdata, mirror, gpus): + tflib.init_tf() + + # Initialize metrics. + metrics = [] + for name in metric_names: + if name not in metric_defaults.metric_defaults: + raise UserError('\n'.join(['--metrics can only contain the following values:', 'none'] + list(metric_defaults.metric_defaults.keys()))) + metrics.append(dnnlib.util.construct_class_by_name(**metric_defaults.metric_defaults[name])) + + # Load network. + if not dnnlib.util.is_url(network_pkl, allow_file_urls=True) and not os.path.isfile(network_pkl): + raise UserError('--network must point to a file or URL') + print(f'Loading network from "{network_pkl}"...') + with dnnlib.util.open_url(network_pkl) as f: + _G, _D, Gs = pickle.load(f) + Gs.print_layers() + + # Look up training options. + run_dir = None + training_options = None + if os.path.isfile(network_pkl): + potential_run_dir = os.path.dirname(network_pkl) + potential_json_file = os.path.join(potential_run_dir, 'training_options.json') + if os.path.isfile(potential_json_file): + print(f'Looking up training options from "{potential_json_file}"...') + run_dir = potential_run_dir + with open(potential_json_file, 'rt') as f: + training_options = json.load(f, object_pairs_hook=dnnlib.EasyDict) + if training_options is None: + print('Could not look up training options; will rely on --metricdata and --mirror') + + # Choose dataset options. + dataset_options = dnnlib.EasyDict() + if training_options is not None: + dataset_options.update(training_options.metric_dataset_args) + dataset_options.resolution = Gs.output_shapes[0][-1] + dataset_options.max_label_size = Gs.input_shapes[1][-1] + if metricdata is not None: + if not os.path.isdir(metricdata): + raise UserError('--metricdata must point to a directory containing *.tfrecords') + dataset_options.path = metricdata + if mirror is not None: + dataset_options.mirror_augment = mirror + if 'path' not in dataset_options: + raise UserError('--metricdata must be specified explicitly') + + # Print dataset options. + print() + print('Dataset options:') + print(json.dumps(dataset_options, indent=2)) + + # Evaluate metrics. + for metric in metrics: + print() + print(f'Evaluating {metric.name}...') + metric.configure(dataset_args=dataset_options, run_dir=run_dir) + metric.run(network_pkl=network_pkl, num_gpus=gpus) + +#---------------------------------------------------------------------------- + +def _str_to_bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + if v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + raise argparse.ArgumentTypeError('Boolean value expected.') + +def _parse_comma_sep(s): + if s is None or s.lower() == 'none' or s == '': + return [] + return s.split(',') + +#---------------------------------------------------------------------------- + +_cmdline_help_epilog = '''examples: + + # Previous training run: look up options automatically, save result to text file. + python %(prog)s --metrics=pr50k3_full \\ + --network=~/training-runs/00000-ffhq10k-res64-auto1/network-snapshot-000000.pkl + + # Pretrained network pickle: specify dataset explicitly, print result to stdout. + python %(prog)s --metrics=fid50k_full --metricdata=~/datasets/ffhq --mirror=1 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl + +available metrics: + + ADA paper: + fid50k_full Frechet inception distance against the full dataset. + kid50k_full Kernel inception distance against the full dataset. + pr50k3_full Precision and recall againt the full dataset. + is50k Inception score for CIFAR-10. + + Legacy: StyleGAN2 + fid50k Frechet inception distance against 50k real images. + kid50k Kernel inception distance against 50k real images. + pr50k3 Precision and recall against 50k real images. + ppl2_wend Perceptual path length in W at path endpoints against full image. + + Legacy: StyleGAN + ppl_zfull Perceptual path length in Z for full paths against cropped image. + ppl_wfull Perceptual path length in W for full paths against cropped image. + ppl_zend Perceptual path length in Z at path endpoints against cropped image. + ppl_wend Perceptual path length in W at path endpoints against cropped image. + ls Linear separability with respect to CelebA attributes. +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Calculate quality metrics for previous training run or pretrained network pickle.', + epilog=_cmdline_help_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--network', help='Network pickle filename or URL', dest='network_pkl', metavar='PATH') + parser.add_argument('--metrics', help='Comma-separated list or "none" (default: %(default)s)', dest='metric_names', type=_parse_comma_sep, default='fid50k_full', metavar='LIST') + parser.add_argument('--metricdata', help='Dataset to evaluate metrics against (default: look up from training options)', metavar='PATH') + parser.add_argument('--mirror', help='Whether the dataset was augmented with x-flips during training (default: look up from training options)', type=_str_to_bool, metavar='BOOL') + parser.add_argument('--gpus', help='Number of GPUs to use (default: %(default)s)', type=int, default=1, metavar='INT') + + args = parser.parse_args() + try: + calc_metrics(**vars(args)) + except UserError as err: + print(f'Error: {err}') + exit(1) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/dataset_tool.py b/dataset_tool.py new file mode 100755 index 00000000..298f4253 --- /dev/null +++ b/dataset_tool.py @@ -0,0 +1,995 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Tool for creating multi-resolution TFRecords datasets.""" + +import os +import sys +import glob +import argparse +import threading +import six.moves.queue as Queue +import traceback +import numpy as np +import tensorflow as tf +import PIL.Image +import dnnlib.tflib as tflib +import scipy +import scipy.ndimage +import scipy.misc +import datetime +from tqdm import tqdm + +from training import dataset + +#---------------------------------------------------------------------------- + +def error(msg): + print('Error: ' + msg) + exit(1) + +#---------------------------------------------------------------------------- + +class TFRecordExporter: + def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10, tfr_prefix=None): + self.tfrecord_dir = tfrecord_dir + if tfr_prefix is None: + self.tfr_prefix = os.path.join(self.tfrecord_dir, os.path.basename(self.tfrecord_dir)) + else: + self.tfr_prefix = os.path.join(self.tfrecord_dir, tfr_prefix) + self.expected_images = expected_images + self.cur_images = 0 + self.shape = None + self.resolution_log2 = None + self.tfr_writers = [] + self.print_progress = print_progress + self.progress_interval = progress_interval + + if self.print_progress: + name = '' if tfr_prefix is None else f' ({tfr_prefix})' + print(f'Creating dataset "{tfrecord_dir}"{name}') + if not os.path.isdir(self.tfrecord_dir): + os.makedirs(self.tfrecord_dir) + assert os.path.isdir(self.tfrecord_dir) + + def close(self): + if self.print_progress: + print('%-40s\r' % 'Flushing data...', end='', flush=True) + for tfr_writer in self.tfr_writers: + tfr_writer.close() + self.tfr_writers = [] + if self.print_progress: + print('%-40s\r' % '', end='', flush=True) + print('Added %d images.' % self.cur_images) + + def choose_shuffled_order(self): # Note: Images and labels must be added in shuffled order. + order = np.arange(self.expected_images) + np.random.RandomState(123).shuffle(order) + return order + + def add_image(self, img): + if self.print_progress and self.cur_images % self.progress_interval == 0: + print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True) + if self.shape is None: + self.shape = img.shape + self.resolution_log2 = int(np.log2(self.shape[1])) + assert self.shape[0] in [1, 3] + assert self.shape[1] == self.shape[2] + assert self.shape[1] == 2**self.resolution_log2 + tfr_opt = tf.io.TFRecordOptions(tf.compat.v1.io.TFRecordCompressionType.NONE) + for lod in range(self.resolution_log2 - 1): + tfr_file = self.tfr_prefix + '-r%02d.tfrecords' % (self.resolution_log2 - lod) + self.tfr_writers.append(tf.io.TFRecordWriter(tfr_file, tfr_opt)) + assert img.shape == self.shape + for lod, tfr_writer in enumerate(self.tfr_writers): + if lod: + img = img.astype(np.float32) + img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25 + quant = np.rint(img).clip(0, 255).astype(np.uint8) + ex = tf.train.Example(features=tf.train.Features(feature={ + 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)), + 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))})) + tfr_writer.write(ex.SerializeToString()) + self.cur_images += 1 + + def add_labels(self, labels): + if self.print_progress: + print('%-40s\r' % 'Saving labels...', end='', flush=True) + assert labels.shape[0] == self.cur_images + with open(self.tfr_prefix + '-rxx.labels', 'wb') as f: + np.save(f, labels.astype(np.float32)) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +# ---------------------------------------------------------------------------- + +class HDF5Exporter: + def __init__(self, h5_filename, resolution, channels, compress=False, expected_images=0, print_progress=True, progress_interval=10): + rlog2 = int(np.floor(np.log2(resolution))) + assert resolution == 2 ** rlog2 + + self.h5_filename = h5_filename + self.resolution = resolution + self.channels = channels + self.expected_images = expected_images + self.cur_images = 0 + self.h5_file = None + self.h5_lods = [] + self.buffers = [] + self.buffer_sizes = [] + self.print_progress = print_progress + self.progress_interval = progress_interval + + if self.print_progress: + print('Creating dataset "%s"' % h5_filename) + import h5py # conda install h5py + self.h5_file = h5py.File(h5_filename, 'w') + for lod in range(rlog2, -1, -1): + r = 2 ** lod + c = channels + bytes_per_item = c * (r ** 2) + chunk_size = int(np.ceil(128.0 / bytes_per_item)) + buffer_size = int(np.ceil(512.0 * np.exp2(20) / bytes_per_item)) + compression = 'gzip' if compress else None + compression_opts = 4 if compress else None + lod = self.h5_file.create_dataset( + 'data%dx%d' % (r, r), shape=(0, c, r, r), dtype=np.uint8, + maxshape=(None, c, r, r), chunks=(chunk_size, c, r, r), + compression=compression, compression_opts=compression_opts) + self.h5_lods.append(lod) + self.buffers.append(np.zeros((buffer_size, c, r, r), dtype=np.uint8)) + self.buffer_sizes.append(0) + + def close(self): + if self.print_progress: + print('%-40s\r' % 'Flushing data...', end='', flush=True) + for lod in range(len(self.h5_lods)): + self._flush_lod(lod) + self.h5_file.close() + self.h5_file = None + self.h5_lods = None + if self.print_progress: + print('%-40s\r' % '', end='', flush=True) + print('Added %d images.' % self.cur_images) + + def add_image(self, img): + self.add_images(np.stack([img])) + + def add_images(self, img): + assert img.ndim == 4 and img.shape[1] == self.channels and img.shape[2] == img.shape[3] + assert img.shape[2] >= self.resolution and img.shape[2] == 2 ** int(np.floor(np.log2(img.shape[2]))) + if self.print_progress and (self.cur_images - 1) % self.progress_interval >= self.progress_interval - img.shape[0]: + print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True) + + for lod in range(len(self.h5_lods)): + while img.shape[2] > self.resolution // (2 ** lod): + img = img.astype(np.float32) + img = (img[:, :, 0::2, 0::2] + img[:, :, 0::2, 1::2] + img[:, :, 1::2, 0::2] + img[:, :, 1::2, 1::2]) * 0.25 + quant = np.uint8(np.clip(np.round(img), 0, 255)) + ofs = 0 + while ofs < quant.shape[0]: + num = min(quant.shape[0] - ofs, self.buffers[lod].shape[0] - self.buffer_sizes[lod]) + self.buffers[lod][self.buffer_sizes[lod]: self.buffer_sizes[lod] + num] = quant[ofs: ofs + num] + self.buffer_sizes[lod] += num + if self.buffer_sizes[lod] == self.buffers[lod].shape[0]: + self._flush_lod(lod) + ofs += num + self.cur_images += img.shape[0] + + def add_labels(self, labels): + if self.print_progress: + print('%-40s\r' % 'Saving labels...', end='', flush=True) + assert labels.shape[0] == self.cur_images + with open(os.path.splitext(self.h5_filename)[0] + '-labels.npy', 'wb') as f: + np.save(f, labels.astype(np.float32)) + + def _flush_lod(self, lod): + num = self.buffer_sizes[lod] + if num > 0: + self.h5_lods[lod].resize(self.h5_lods[lod].shape[0] + num, axis=0) + self.h5_lods[lod][-num:] = self.buffers[lod][:num] + self.buffer_sizes[lod] = 0 + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + +#---------------------------------------------------------------------------- + +class ExceptionInfo(object): + def __init__(self): + self.value = sys.exc_info()[1] + self.traceback = traceback.format_exc() + +#---------------------------------------------------------------------------- + +class WorkerThread(threading.Thread): + def __init__(self, task_queue): + threading.Thread.__init__(self) + self.task_queue = task_queue + + def run(self): + while True: + func, args, result_queue = self.task_queue.get() + if func is None: + break + try: + result = func(*args) + except: + result = ExceptionInfo() + result_queue.put((result, args)) + +#---------------------------------------------------------------------------- + +class ThreadPool(object): + def __init__(self, num_threads): + assert num_threads >= 1 + self.task_queue = Queue.Queue() + self.result_queues = dict() + self.num_threads = num_threads + for _idx in range(self.num_threads): + thread = WorkerThread(self.task_queue) + thread.daemon = True + thread.start() + + def add_task(self, func, args=()): + assert hasattr(func, '__call__') # must be a function + if func not in self.result_queues: + self.result_queues[func] = Queue.Queue() + self.task_queue.put((func, args, self.result_queues[func])) + + def get_result(self, func): # returns (result, args) + result, args = self.result_queues[func].get() + if isinstance(result, ExceptionInfo): + print('\n\nWorker thread caught an exception:\n' + result.traceback) + raise result.value + return result, args + + def finish(self): + for _idx in range(self.num_threads): + self.task_queue.put((None, (), None)) + + def __enter__(self): # for 'with' statement + return self + + def __exit__(self, *excinfo): + self.finish() + + def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None): + if max_items_in_flight is None: max_items_in_flight = self.num_threads * 4 + assert max_items_in_flight >= 1 + results = [] + retire_idx = [0] + + def task_func(prepared, _idx): + return process_func(prepared) + + def retire_result(): + processed, (_prepared, idx) = self.get_result(task_func) + results[idx] = processed + while retire_idx[0] < len(results) and results[retire_idx[0]] is not None: + yield post_func(results[retire_idx[0]]) + results[retire_idx[0]] = None + retire_idx[0] += 1 + + for idx, item in enumerate(item_iterator): + prepared = pre_func(item) + results.append(None) + self.add_task(func=task_func, args=(prepared, idx)) + while retire_idx[0] < idx - max_items_in_flight + 2: + for res in retire_result(): yield res + while retire_idx[0] < len(results): + for res in retire_result(): yield res + +#---------------------------------------------------------------------------- + +def info(tfrecord_dir): + print() + print('%-20s%s' % ('Dataset name:', os.path.basename(tfrecord_dir))) + + bytes_total = 0 + bytes_max = 0 + num_files = 0 + for f in sorted(glob.glob(os.path.join(tfrecord_dir, '*'))): + if os.path.isfile(f): + fs = os.stat(f).st_size + bytes_total += fs + bytes_max = max(bytes_max, fs) + num_files += 1 + print('%-20s%.2f' % ('Total size GB:', bytes_total / (1 << 30))) + print('%-20s%.2f' % ('Largest file GB:', bytes_max / (1 << 30))) + print('%-20s%d' % ('Num files:', num_files)) + + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + + print('%-20s%d' % ('Image width:', dset.shape[2])) + print('%-20s%d' % ('Image height:', dset.shape[1])) + print('%-20s%d' % ('Image channels:', dset.shape[0])) + print('%-20s%s' % ('Image datatype:', dset.dtype)) + print('%-20s%d' % ('Label size:', dset.label_size)) + print('%-20s%s' % ('Label datatype:', dset.label_dtype)) + + num_images = 0 + label_min = np.finfo(np.float64).max + label_max = np.finfo(np.float64).min + label_norm = 0 + lod = max(dset.resolution_log2 - 2, 0) + while True: + print('\r%-20s%d' % ('Num images:', num_images), end='', flush=True) + _images, labels = dset.get_minibatch_np(10000, lod=lod) # not accurate + if labels is None: + break + num_images += labels.shape[0] + if dset.label_size: + label_min = min(label_min, np.min(labels)) + label_max = max(label_max, np.max(labels)) + label_norm += np.sum(np.sqrt(np.sum(np.square(labels), axis=1))) + + print('\r%-20s%d' % ('Num images:', num_images)) + print('%-20s%s' % ('Label range:', '%g -- %g' % (label_min, label_max) if num_images and dset.label_size else 'n/a')) + print('%-20s%s' % ('Label L2 norm:', '%g' % (label_norm / num_images) if num_images and dset.label_size else 'n/a')) + print() + +#---------------------------------------------------------------------------- + +def display(tfrecord_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + import cv2 # pip install opencv-python + + idx = 0 + while True: + images, labels = dset.get_minibatch_np(1) + if images is None: + break + if idx == 0: + print('Displaying images') + cv2.namedWindow('dataset_tool') + print('Press SPACE or ENTER to advance, ESC to exit') + print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) + cv2.imshow('dataset_tool', images[0].transpose(1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR + idx += 1 + if cv2.waitKey() == 27: + break + print('\nDisplayed %d images.' % idx) + +#---------------------------------------------------------------------------- + +def extract(tfrecord_dir, output_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + + print('Extracting images to "%s"' % output_dir) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + idx = 0 + while True: + if idx % 10 == 0: + print('%d\r' % idx, end='', flush=True) + images, _labels = dset.get_minibatch_np(1) + if images is None: + break + if images.shape[1] == 1: + img = PIL.Image.fromarray(images[0][0], 'L') + else: + img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') + img.save(os.path.join(output_dir, 'img%08d.png' % idx)) + idx += 1 + print('Extracted %d images.' % idx) + +#---------------------------------------------------------------------------- + +def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): + max_label_size = 0 if ignore_labels else 'full' + print('Loading dataset "%s"' % tfrecord_dir_a) + tflib.init_tf() + dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle=False) + print('Loading dataset "%s"' % tfrecord_dir_b) + dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + + print('Comparing datasets') + idx = 0 + identical_images = 0 + identical_labels = 0 + while True: + if idx % 100 == 0: + print('%d\r' % idx, end='', flush=True) + images_a, labels_a = dset_a.get_minibatch_np(1) + images_b, labels_b = dset_b.get_minibatch_np(1) + if images_a is None or images_b is None: + if images_a is not None or images_b is not None: + print('Datasets contain different number of images') + break + if images_a.shape == images_b.shape and np.all(images_a == images_b): + identical_images += 1 + else: + print('Image %d is different' % idx) + if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): + identical_labels += 1 + else: + print('Label %d is different' % idx) + idx += 1 + print('Identical images: %d / %d' % (identical_images, idx)) + if not ignore_labels: + print('Identical labels: %d / %d' % (identical_labels, idx)) + +#---------------------------------------------------------------------------- + +def create_mnist(tfrecord_dir, mnist_dir): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file: + labels = np.frombuffer(file.read(), np.uint8, offset=8) + images = images.reshape(-1, 1, 28, 28) + images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (60000,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + images = images.reshape(-1, 28, 28) + images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + + with TFRecordExporter(tfrecord_dir, num_images) as tfr: + rnd = np.random.RandomState(random_seed) + for _idx in range(num_images): + tfr.add_image(images[rnd.randint(images.shape[0], size=3)]) + +#---------------------------------------------------------------------------- + +def create_cifar10(tfrecord_dir, cifar10_dir, ignore_labels): + print('Loading CIFAR-10 from "%s"' % cifar10_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 6): + with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data['data'].reshape(-1, 3, 32, 32)) + labels.append(data['labels']) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert ignore_labels in [0, 1] + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype in [np.int32, np.int64] + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + if not ignore_labels: + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_cifar100(tfrecord_dir, cifar100_dir): + print('Loading CIFAR-100 from "%s"' % cifar100_dir) + import pickle + with open(os.path.join(cifar100_dir, 'train'), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images = data['data'].reshape(-1, 3, 32, 32) + labels = np.array(data['fine_labels']) + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype == np.int32 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 99 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_svhn(tfrecord_dir, svhn_dir): + print('Loading SVHN from "%s"' % svhn_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 4): + with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data[0]) + labels.append(data[1]) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (73257,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None): + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb # pylint: disable=import-error + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True, lock=False).begin(write=False) as txn: + total_images = txn.stat()['entries'] + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images) as tfr: + for _idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + crop = np.min(img.shape[:2]) + img = img[(img.shape[0] - crop) // 2 : (img.shape[0] + crop) // 2, (img.shape[1] - crop) // 2 : (img.shape[1] + crop) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose([2, 0, 1]) # HWC => CHW + tfr.add_image(img) + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + +#---------------------------------------------------------------------------- + +def create_lsun_wide(tfrecord_dir, lmdb_dir, width=512, height=384, max_images=None): + assert width == 2 ** int(np.round(np.log2(width))) + assert height <= width + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb # pylint: disable=import-error + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: + total_images = txn.stat()['entries'] + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images, print_progress=False) as tfr: + for idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + + ch = int(np.round(width * img.shape[0] / img.shape[1])) + if img.shape[1] < width or ch < height: + continue + + img = img[(img.shape[0] - ch) // 2 : (img.shape[0] + ch) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((width, height), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose([2, 0, 1]) # HWC => CHW + + canvas = np.zeros([3, width, width], dtype=np.uint8) + canvas[:, (width - height) // 2 : (width + height) // 2] = img + tfr.add_image(canvas) + print('\r%d / %d => %d ' % (idx + 1, total_images, tfr.cur_images), end='') + + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + print() + +#---------------------------------------------------------------------------- + +def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121): + print('Loading CelebA from "%s"' % celeba_dir) + glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png') + image_filenames = sorted(glob.glob(glob_pattern)) + expected_images = 202599 + if len(image_filenames) != expected_images: + error('Expected to find %d images' % expected_images) + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + assert img.shape == (218, 178, 3) + img = img[cy - 64 : cy + 64, cx - 64 : cx + 64] + img = img.transpose(2, 0, 1) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_images(tfrecord_dir, image_dir, shuffle): + print('Loading images from "%s"' % image_dir) + image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) + if len(image_filenames) == 0: + error('No input images found') + + img = np.asarray(PIL.Image.open(image_filenames[0])) + resolution = img.shape[0] + channels = img.shape[2] if img.ndim == 3 else 1 + if img.shape[1] != resolution: + error('Input images must have the same width and height') + if resolution != 2 ** int(np.floor(np.log2(resolution))): + error('Input image resolution must be a power-of-two') + if channels not in [1, 3]: + error('Input images must be stored as RGB or grayscale') + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames)) + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + if channels == 1: + img = img[np.newaxis, :, :] # HW => CHW + else: + img = img.transpose([2, 0, 1]) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle): + print('Loading HDF5 archive from "%s"' % hdf5_filename) + import h5py # conda install h5py + with h5py.File(hdf5_filename, 'r') as hdf5_file: + hdf5_data = max([value for key, value in hdf5_file.items() if key.startswith('data')], key=lambda lod: lod.shape[3]) + with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(hdf5_data.shape[0]) + for idx in range(order.size): + tfr.add_image(hdf5_data[order[idx]]) + npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy' + if os.path.isfile(npy_filename): + tfr.add_labels(np.load(npy_filename)[order]) + +#---------------------------------------------------------------------------- + +def convert_to_hdf5(hdf5_filename, tfrecord_dir, compress): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + with HDF5Exporter(hdf5_filename, resolution=dset.shape[1], channels=dset.shape[0], compress=compress) as h5: + all_labels = [] + while True: + images, labels = dset.get_minibatch_np(1) + if images is None: + break + h5.add_images(images) + all_labels.append(labels) + all_labels = np.concatenate(all_labels) + if all_labels.size: + h5.add_labels(all_labels) + +#---------------------------------------------------------------------------- + +def hdf5_from_images(hdf5_filename, image_dir, compress): + print('Loading images from "%s"' % image_dir) + image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) + if len(image_filenames) == 0: + error('No input images found') + + img = np.asarray(PIL.Image.open(image_filenames[0])) + resolution = img.shape[0] + channels = img.shape[2] if img.ndim == 3 else 1 + if img.shape[1] != resolution: + error('Input images must have the same width and height') + if resolution != 2 ** int(np.floor(np.log2(resolution))): + error('Input image resolution must be a power-of-two') + if channels not in [1, 3]: + error('Input images must be stored as RGB or grayscale') + + with HDF5Exporter(hdf5_filename, resolution=resolution, channels=channels, compress=compress, expected_images=len(image_filenames)) as h5: + for image_filename in image_filenames: + img = np.asarray(PIL.Image.open(image_filename)) + if channels == 1: + img = img[np.newaxis, :, :] # HW => CHW + else: + img = img.transpose([2, 0, 1]) # HWC => CHW + h5.add_image(img) + +#---------------------------------------------------------------------------- + +def make_png_path(outdir, idx): + idx_str = f'{idx:08d}' + return f'{os.path.join(outdir, idx_str[:5])}/img{idx_str}.png' + +def unpack(tfrecord_dir, output_dir, resolution=None): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + + print('Extracting images to "%s"' % output_dir) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + idx = 0 + labels = [] + while True: + if idx % 10 == 0: + print('%d\r' % idx, end='', flush=True) + images, lbls = dset.get_minibatch_np(1) + if images is None: + break + if images.shape[1] == 1: + img = PIL.Image.fromarray(images[0][0], 'L') + else: + img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') + if resolution is not None: + img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS) + assert lbls.shape[0] == 1 + labels.append(lbls[0]) + png_fname = make_png_path(output_dir, idx) + os.makedirs(os.path.dirname(png_fname), exist_ok=True) + img.save(png_fname) + idx += 1 + np.savez(f'{output_dir}/pack_extras.npz', labels=np.array(labels, dtype=np.uint8), num_images=idx) + print('Extracted %d images.' % idx) + +#---------------------------------------------------------------------------- + +def pack(unpacked_dir, tfrecord_dir, num_train=None, num_validation=None, mirror=0, seed=None): + + def export_samples(source_idx, tfr_prefix): + if source_idx.shape[0] == 0: return + if source_idx.shape != (source_idx.shape[0], 2): + assert len(source_idx.shape) == 1 + source_idx = np.stack([np.zeros(source_idx.shape[0], dtype=np.uint8), source_idx], axis=-1) + with TFRecordExporter(tfrecord_dir, len(source_idx), tfr_prefix=tfr_prefix) as tfr: + for mirror, idx in source_idx: + img = np.asarray(PIL.Image.open(make_png_path(unpacked_dir, idx))) + img = img.transpose([2, 0, 1]) # HWC => CHW + if mirror != 0: + img = img[:, :, ::-1] + tfr.add_image(img) + tfr.add_labels(labels_onehot[source_idx[:,1]]) + + print(f'Loading an unpacked dataset from "{unpacked_dir}"') + + meta = np.load(f'{unpacked_dir}/pack_extras.npz') + num_images = int(meta['num_images']) + labels_onehot = meta['labels'] + assert (labels_onehot.shape[0] == num_images) and (len(labels_onehot.shape) == 2) + + order = np.arange(num_images) + if seed is not None: + np.random.RandomState(seed).shuffle(order) + + # Size the training and validation sets based on command line args. + # + # If the training set size is not specified on the command line, use all + # except what's set aside for the validation set. + n_train = num_train if num_train is not None else num_images + n_valid = num_validation + if num_train is None: + n_train -= n_valid + assert n_train > 0 + assert (n_train + n_valid) <= num_images + + train_idx = order[0:n_train] + valid_idx = order[n_train:n_train+n_valid] + + if mirror != 0: + n = train_idx.shape[0] + train_idx = np.concatenate([ + np.stack([np.zeros(n, dtype=np.uint8), train_idx], axis=-1), + np.stack([np.ones(n, dtype=np.uint8), train_idx], axis=-1) + ]) + if seed is not None: + np.random.RandomState(seed).shuffle(train_idx) + + tfr = os.path.basename(tfrecord_dir) + export_samples(train_idx, tfr_prefix=tfr) + export_samples(valid_idx, tfr_prefix=f'validation-{tfr}') + +#---------------------------------------------------------------------------- + +def extract_brecahad_crops(brecahad_dir, output_dir, cropsize=256): + params = { + 256: { 'overlap': 0.0 }, + 512: { 'overlap': 0.5 } + } + if cropsize not in params: + print('--cropsize must be one of:', ', '.join(str(x) for x in params.keys())) + sys.exit(1) + + os.makedirs(output_dir, exist_ok=True) + + incr = int(cropsize*(1-params[cropsize]['overlap'])) + out_idx = 0 + for fname in tqdm(sorted(glob.glob(os.path.join(brecahad_dir, '*.tif')))): + src = PIL.Image.open(fname) + w, h = src.size + for x in range(0, w-cropsize+1, incr): + for y in range(0, h-cropsize+1, incr): + cropimg = src.crop((x, y, x+cropsize, y+cropsize)) + cropimg.save(os.path.join(output_dir, f'{out_idx:04d}.png')) + out_idx += 1 + print(f'Extracted {out_idx} image crops.') + +#---------------------------------------------------------------------------- + +def execute_cmdline(argv): + prog = argv[0] + parser = argparse.ArgumentParser( + prog = prog, + description = 'Tool for creating multi-resolution TFRecords datasets for StyleGAN and ProGAN.', + epilog = 'Type "%s -h" for more information.' % prog) + + subparsers = parser.add_subparsers(dest='command') + subparsers.required = True + def add_command(cmd, desc, example=None): + epilog = 'Example: %s %s' % (prog, example) if example is not None else None + return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog) + + p = add_command( 'info', 'Display general info about dataset.', + 'info datasets/mnist') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + + p = add_command( 'display', 'Display images in dataset.', + 'display datasets/mnist') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + + p = add_command( 'extract', 'Extract images from dataset.', + 'extract datasets/mnist mnist-images') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + p.add_argument( 'output_dir', help='Directory to extract the images into') + + p = add_command( 'compare', 'Compare two datasets.', + 'compare datasets/mydataset datasets/mnist') + p.add_argument( 'tfrecord_dir_a', help='Directory containing first dataset') + p.add_argument( 'tfrecord_dir_b', help='Directory containing second dataset') + p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0) + + p = add_command( 'create_mnist', 'Create dataset for MNIST.', + 'create_mnist datasets/mnist ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + + p = add_command( 'create_mnistrgb', 'Create dataset for MNIST-RGB.', + 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + p.add_argument( '--num_images', help='Number of composite images to create (default: 1000000)', type=int, default=1000000) + p.add_argument( '--random_seed', help='Random seed (default: 123)', type=int, default=123) + + p = add_command( 'create_cifar10', 'Create dataset for CIFAR-10.', + 'create_cifar10 datasets/cifar10 ~/downloads/cifar10') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar10_dir', help='Directory containing CIFAR-10') + p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0) + + p = add_command( 'create_cifar100', 'Create dataset for CIFAR-100.', + 'create_cifar100 datasets/cifar100 ~/downloads/cifar100') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar100_dir', help='Directory containing CIFAR-100') + + p = add_command( 'create_svhn', 'Create dataset for SVHN.', + 'create_svhn datasets/svhn ~/downloads/svhn') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'svhn_dir', help='Directory containing SVHN') + + p = add_command( 'create_lsun', 'Create dataset for single LSUN category.', + 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--resolution', help='Output resolution (default: 256)', type=int, default=256) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_lsun_wide', 'Create LSUN dataset with non-square aspect ratio.', + 'create_lsun_wide datasets/lsun-car-512x384 ~/downloads/lsun/car_lmdb --width 512 --height 384') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--width', help='Output width (default: 512)', type=int, default=512) + p.add_argument( '--height', help='Output height (default: 384)', type=int, default=384) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_celeba', 'Create dataset for CelebA.', + 'create_celeba datasets/celeba ~/downloads/celeba') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'celeba_dir', help='Directory containing CelebA') + p.add_argument( '--cx', help='Center X coordinate (default: 89)', type=int, default=89) + p.add_argument( '--cy', help='Center Y coordinate (default: 121)', type=int, default=121) + + p = add_command( 'create_from_images', 'Create dataset from a directory full of images.', + 'create_from_images datasets/mydataset myimagedir') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'image_dir', help='Directory containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + p = add_command( 'create_from_hdf5', 'Create dataset from legacy HDF5 archive.', + 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'hdf5_filename', help='HDF5 archive containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + p = add_command( 'convert_to_hdf5', 'Convert dataset to legacy HDF5 archive.', + 'convert_to_hdf5 datasets/celebahq.h5 datasets/celebahq') + p.add_argument( 'hdf5_filename', help='HDF5 archive to be created') + p.add_argument( 'tfrecord_dir', help='Dataset directory to load the images from') + p.add_argument( '--compress', help='Compress the data (default: 0)', type=int, default=0) + + p = add_command( 'hdf5_from_images', 'Create HDF5 archive from a directory of images.', + 'hdf5_from_images datasets/mydataset.h5 myimagedir') + p.add_argument( 'hdf5_filename', help='HDF5 archive to be created') + p.add_argument( 'image_dir', help='Directory containing the images') + p.add_argument( '--compress', help='Compress the data (default: 0)', type=int, default=0) + + p = add_command( 'unpack', 'Unpack a TFRecords dataset to labels and images for later repackaging with `pack`.') + p.add_argument( '--tfrecord_dir', help='Directory containing the source dataset in TFRecords format', required=True) + p.add_argument( '--output_dir', help='Output directory where to extract the dataset as PNG files', required=True) + p.add_argument( '--resolution', help='Resize images to (resolution,resolution) (default: None = no resizing)', type=int, default=None) + + p = add_command( 'pack', 'Repackage an unpacked dataset into TFRecords.') + p.add_argument( '--unpacked_dir', help='Source directory containing an unpacked tfrecords dataset') + p.add_argument( '--tfrecord_dir', help='New dataset directory to be created') + p.add_argument( '--num_train', help='Number of images to pick for the training set (default: None = all)', type=int, default=None) + p.add_argument( '--num_validation', help='Number of images to pick for the validation set (default: 0 = no images)', type=int, default=0) + p.add_argument( '--mirror', help='Number of images to pick for the training set (default: 0 = no mirroring)', type=int, default=0) + p.add_argument( '--seed', help='Shuffle random seed. (default: None = do not shuffle)', type=int, default=None) + + p = add_command( 'extract_brecahad_crops', 'Extract crops from the original BreCaHAD images') + p.add_argument( '--brecahad_dir', help='Source directory for BreCaHAD images. Should contain .tif files.', required=True) + p.add_argument( '--output_dir', help='Output directory for image crops. Will contain .png files', required=True) + p.add_argument( '--cropsize', help='Crop size (resolution,resolution)', type=int, default=256) + + args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h']) + func = globals()[args.command] + del args.command + func(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + execute_cmdline(sys.argv) + +#---------------------------------------------------------------------------- diff --git a/dnnlib/__init__.py b/dnnlib/__init__.py new file mode 100755 index 00000000..c73940d8 --- /dev/null +++ b/dnnlib/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +from .util import EasyDict, make_cache_dir_path diff --git a/dnnlib/tflib/__init__.py b/dnnlib/tflib/__init__.py new file mode 100755 index 00000000..ca852844 --- /dev/null +++ b/dnnlib/tflib/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +from . import autosummary +from . import network +from . import optimizer +from . import tfutil +from . import custom_ops + +from .tfutil import * +from .network import Network + +from .optimizer import Optimizer + +from .custom_ops import get_plugin diff --git a/dnnlib/tflib/autosummary.py b/dnnlib/tflib/autosummary.py new file mode 100755 index 00000000..56dfb960 --- /dev/null +++ b/dnnlib/tflib/autosummary.py @@ -0,0 +1,193 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Helper for adding automatically tracked values to Tensorboard. + +Autosummary creates an identity op that internally keeps track of the input +values and automatically shows up in TensorBoard. The reported value +represents an average over input components. The average is accumulated +constantly over time and flushed when save_summaries() is called. + +Notes: +- The output tensor must be used as an input for something else in the + graph. Otherwise, the autosummary op will not get executed, and the average + value will not get accumulated. +- It is perfectly fine to include autosummaries with the same name in + several places throughout the graph, even if they are executed concurrently. +- It is ok to also pass in a python scalar or numpy array. In this case, it + is added to the average immediately. +""" + +from collections import OrderedDict +import numpy as np +import tensorflow as tf +from tensorboard import summary as summary_lib +from tensorboard.plugins.custom_scalar import layout_pb2 + +from . import tfutil +from .tfutil import TfExpression +from .tfutil import TfExpressionEx + +# Enable "Custom scalars" tab in TensorBoard for advanced formatting. +# Disabled by default to reduce tfevents file size. +enable_custom_scalars = False + +_dtype = tf.float64 +_vars = OrderedDict() # name => [var, ...] +_immediate = OrderedDict() # name => update_op, update_value +_finalized = False +_merge_op = None + + +def _create_var(name: str, value_expr: TfExpression) -> TfExpression: + """Internal helper for creating autosummary accumulators.""" + assert not _finalized + name_id = name.replace("/", "_") + v = tf.cast(value_expr, _dtype) + + if v.shape.is_fully_defined(): + size = np.prod(v.shape.as_list()) + size_expr = tf.constant(size, dtype=_dtype) + else: + size = None + size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) + + if size == 1: + if v.shape.ndims != 0: + v = tf.reshape(v, []) + v = [size_expr, v, tf.square(v)] + else: + v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] + v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) + + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): + var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] + update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) + + if name in _vars: + _vars[name].append(var) + else: + _vars[name] = [var] + return update_op + + +def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx: + """Create a new autosummary. + + Args: + name: Name to use in TensorBoard + value: TensorFlow expression or python value to track + passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. + + Example use of the passthru mechanism: + + n = autosummary('l2loss', loss, passthru=n) + + This is a shorthand for the following code: + + with tf.control_dependencies([autosummary('l2loss', loss)]): + n = tf.identity(n) + """ + tfutil.assert_tf_initialized() + name_id = name.replace("/", "_") + + if tfutil.is_tf_expression(value): + with tf.name_scope("summary_" + name_id), tf.device(value.device): + condition = tf.convert_to_tensor(condition, name='condition') + update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op) + with tf.control_dependencies([update_op]): + return tf.identity(value if passthru is None else passthru) + + else: # python scalar or numpy array + assert not tfutil.is_tf_expression(passthru) + assert not tfutil.is_tf_expression(condition) + if condition: + if name not in _immediate: + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None): + update_value = tf.placeholder(_dtype) + update_op = _create_var(name, update_value) + _immediate[name] = update_op, update_value + update_op, update_value = _immediate[name] + tfutil.run(update_op, {update_value: value}) + return value if passthru is None else passthru + + +def finalize_autosummaries() -> None: + """Create the necessary ops to include autosummaries in TensorBoard report. + Note: This should be done only once per graph. + """ + global _finalized + tfutil.assert_tf_initialized() + + if _finalized: + return None + + _finalized = True + tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list]) + + # Create summary ops. + with tf.device(None), tf.control_dependencies(None): + for name, vars_list in _vars.items(): + name_id = name.replace("/", "_") + with tfutil.absolute_name_scope("Autosummary/" + name_id): + moments = tf.add_n(vars_list) + moments /= moments[0] + with tf.control_dependencies([moments]): # read before resetting + reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list] + with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting + mean = moments[1] + std = tf.sqrt(moments[2] - tf.square(moments[1])) + tf.summary.scalar(name, mean) + if enable_custom_scalars: + tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std) + tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std) + + # Setup layout for custom scalars. + layout = None + if enable_custom_scalars: + cat_dict = OrderedDict() + for series_name in sorted(_vars.keys()): + p = series_name.split("/") + cat = p[0] if len(p) >= 2 else "" + chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] + if cat not in cat_dict: + cat_dict[cat] = OrderedDict() + if chart not in cat_dict[cat]: + cat_dict[cat][chart] = [] + cat_dict[cat][chart].append(series_name) + categories = [] + for cat_name, chart_dict in cat_dict.items(): + charts = [] + for chart_name, series_names in chart_dict.items(): + series = [] + for series_name in series_names: + series.append(layout_pb2.MarginChartContent.Series( + value=series_name, + lower="xCustomScalars/" + series_name + "/margin_lo", + upper="xCustomScalars/" + series_name + "/margin_hi")) + margin = layout_pb2.MarginChartContent(series=series) + charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) + categories.append(layout_pb2.Category(title=cat_name, chart=charts)) + layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) + return layout + +def save_summaries(file_writer, global_step=None): + """Call FileWriter.add_summary() with all summaries in the default graph, + automatically finalizing and merging them on the first call. + """ + global _merge_op + tfutil.assert_tf_initialized() + + if _merge_op is None: + layout = finalize_autosummaries() + if layout is not None: + file_writer.add_summary(layout) + with tf.device(None), tf.control_dependencies(None): + _merge_op = tf.summary.merge_all() + + file_writer.add_summary(_merge_op.eval(), global_step) diff --git a/dnnlib/tflib/custom_ops.py b/dnnlib/tflib/custom_ops.py new file mode 100755 index 00000000..ed31b769 --- /dev/null +++ b/dnnlib/tflib/custom_ops.py @@ -0,0 +1,181 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""TensorFlow custom ops builder. +""" + +import glob +import os +import re +import uuid +import hashlib +import tempfile +import shutil +import tensorflow as tf +from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module + +from .. import util + +#---------------------------------------------------------------------------- +# Global options. + +cuda_cache_path = None +cuda_cache_version_tag = 'v1' +do_not_hash_included_headers = True # Speed up compilation by assuming that headers included by the CUDA code never change. +verbose = True # Print status messages to stdout. + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _find_compiler_bindir(): + hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) + if hostx64_paths != []: + return hostx64_paths[0] + hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) + if hostx64_paths != []: + return hostx64_paths[0] + hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) + if hostx64_paths != []: + return hostx64_paths[0] + vc_bin_dir = 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin' + if os.path.isdir(vc_bin_dir): + return vc_bin_dir + return None + +def _get_compute_cap(device): + caps_str = device.physical_device_desc + m = re.search('compute capability: (\\d+).(\\d+)', caps_str) + major = m.group(1) + minor = m.group(2) + return (major, minor) + +def _get_cuda_gpu_arch_string(): + gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] + if len(gpus) == 0: + raise RuntimeError('No GPU devices found') + (major, minor) = _get_compute_cap(gpus[0]) + return 'sm_%s%s' % (major, minor) + +def _run_cmd(cmd): + with os.popen(cmd) as pipe: + output = pipe.read() + status = pipe.close() + if status is not None: + raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) + +def _prepare_nvcc_cli(opts): + cmd = 'nvcc ' + opts.strip() + cmd += ' --disable-warnings' + cmd += ' --include-path "%s"' % tf.sysconfig.get_include() + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') + + compiler_bindir = _find_compiler_bindir() + if compiler_bindir is None: + # Require that _find_compiler_bindir succeeds on Windows. Allow + # nvcc to use whatever is the default on Linux. + if os.name == 'nt': + raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) + else: + cmd += ' --compiler-bindir "%s"' % compiler_bindir + cmd += ' 2>&1' + return cmd + +#---------------------------------------------------------------------------- +# Main entry point. + +_plugin_cache = dict() + +def get_plugin(cuda_file, extra_nvcc_options=[]): + cuda_file_base = os.path.basename(cuda_file) + cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) + + # Already in cache? + if cuda_file in _plugin_cache: + return _plugin_cache[cuda_file] + + # Setup plugin. + if verbose: + print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) + try: + # Hash CUDA source. + md5 = hashlib.md5() + with open(cuda_file, 'rb') as f: + md5.update(f.read()) + md5.update(b'\n') + + # Hash headers included by the CUDA code by running it through the preprocessor. + if not do_not_hash_included_headers: + if verbose: + print('Preprocessing... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) + _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) + with open(tmp_file, 'rb') as f: + bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros + good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') + for ln in f: + if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas + ln = ln.replace(bad_file_str, good_file_str) + md5.update(ln) + md5.update(b'\n') + + # Select compiler options. + compile_opts = '' + if os.name == 'nt': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') + elif os.name == 'posix': + compile_opts += f' --compiler-options \'-fPIC\'' + compile_opts += f' --compiler-options \'{" ".join(tf.sysconfig.get_compile_flags())}\'' + compile_opts += f' --linker-options \'{" ".join(tf.sysconfig.get_link_flags())}\'' + else: + assert False # not Windows or Linux, w00t? + compile_opts += f' --gpu-architecture={_get_cuda_gpu_arch_string()}' + compile_opts += ' --use_fast_math' + for opt in extra_nvcc_options: + compile_opts += ' ' + opt + nvcc_cmd = _prepare_nvcc_cli(compile_opts) + + # Hash build configuration. + md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') + md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') + md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') + + # Compile if not already compiled. + cache_dir = util.make_cache_dir_path('tflib-cudacache') if cuda_cache_path is None else cuda_cache_path + bin_file_ext = '.dll' if os.name == 'nt' else '.so' + bin_file = os.path.join(cache_dir, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) + if not os.path.isfile(bin_file): + if verbose: + print('Compiling... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) + _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) + os.makedirs(cache_dir, exist_ok=True) + intermediate_file = os.path.join(cache_dir, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) + shutil.copyfile(tmp_file, intermediate_file) + os.rename(intermediate_file, bin_file) # atomic + + # Load. + if verbose: + print('Loading... ', end='', flush=True) + plugin = tf.load_op_library(bin_file) + + # Add to cache. + _plugin_cache[cuda_file] = plugin + if verbose: + print('Done.', flush=True) + return plugin + + except: + if verbose: + print('Failed!', flush=True) + raise + +#---------------------------------------------------------------------------- diff --git a/dnnlib/tflib/network.py b/dnnlib/tflib/network.py new file mode 100755 index 00000000..ff0c169e --- /dev/null +++ b/dnnlib/tflib/network.py @@ -0,0 +1,781 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Helper for managing networks.""" + +import types +import inspect +import re +import uuid +import sys +import copy +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import Any, List, Tuple, Union, Callable + +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +# pylint: disable=protected-access +# pylint: disable=attribute-defined-outside-init +# pylint: disable=too-many-public-methods + +_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. +_import_module_src = dict() # Source code for temporary modules created during pickle import. + + +def import_handler(handler_func): + """Function decorator for declaring custom import handlers.""" + _import_handlers.append(handler_func) + return handler_func + + +class Network: + """Generic network abstraction. + + Acts as a convenience wrapper for a parameterized network construction + function, providing several utility methods and convenient access to + the inputs/outputs/weights. + + Network objects can be safely pickled and unpickled for long-term + archival purposes. The pickling works reliably as long as the underlying + network construction function is defined in a standalone Python module + that has no side effects or application-specific imports. + + Args: + name: Network name. Used to select TensorFlow name and variable scopes. Defaults to build func name if None. + func_name: Fully qualified name of the underlying network construction function, or a top-level function object. + static_kwargs: Keyword arguments to be passed in to the network construction function. + """ + + def __init__(self, name: str = None, func_name: Any = None, **static_kwargs): + # Locate the user-specified build function. + assert isinstance(func_name, str) or util.is_top_level_function(func_name) + if util.is_top_level_function(func_name): + func_name = util.get_top_level_function_name(func_name) + module, func_name = util.get_module_from_obj_name(func_name) + func = util.get_obj_from_module(module, func_name) + + # Dig up source code for the module containing the build function. + module_src = _import_module_src.get(module, None) + if module_src is None: + module_src = inspect.getsource(module) + + # Initialize fields. + self._init_fields(name=(name or func_name), static_kwargs=static_kwargs, build_func=func, build_func_name=func_name, build_module_src=module_src) + + def _init_fields(self, name: str, static_kwargs: dict, build_func: Callable, build_func_name: str, build_module_src: str) -> None: + tfutil.assert_tf_initialized() + assert isinstance(name, str) + assert len(name) >= 1 + assert re.fullmatch(r"[A-Za-z0-9_.\\-]*", name) + assert isinstance(static_kwargs, dict) + assert util.is_pickleable(static_kwargs) + assert callable(build_func) + assert isinstance(build_func_name, str) + assert isinstance(build_module_src, str) + + # Choose TensorFlow name scope. + with tf.name_scope(None): + scope = tf.get_default_graph().unique_name(name, mark_as_used=True) + + # Query current TensorFlow device. + with tfutil.absolute_name_scope(scope), tf.control_dependencies(None): + device = tf.no_op(name="_QueryDevice").device + + # Immutable state. + self._name = name + self._scope = scope + self._device = device + self._static_kwargs = util.EasyDict(copy.deepcopy(static_kwargs)) + self._build_func = build_func + self._build_func_name = build_func_name + self._build_module_src = build_module_src + + # State before _init_graph(). + self._var_inits = dict() # var_name => initial_value, set to None by _init_graph() + self._all_inits_known = False # Do we know for sure that _var_inits covers all the variables? + self._components = None # subnet_name => Network, None if the components are not known yet + + # Initialized by _init_graph(). + self._input_templates = None + self._output_templates = None + self._own_vars = None + + # Cached values initialized the respective methods. + self._input_shapes = None + self._output_shapes = None + self._input_names = None + self._output_names = None + self._vars = None + self._trainables = None + self._var_global_to_local = None + self._run_cache = dict() + + def _init_graph(self) -> None: + assert self._var_inits is not None + assert self._input_templates is None + assert self._output_templates is None + assert self._own_vars is None + + # Initialize components. + if self._components is None: + self._components = util.EasyDict() + + # Choose build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs["is_template_graph"] = True + build_kwargs["components"] = self._components + + # Override scope and device, and ignore surrounding control dependencies. + with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope), tf.device(self.device), tf.control_dependencies(None): + assert tf.get_variable_scope().name == self.scope + assert tf.get_default_graph().get_name_scope() == self.scope + + # Create input templates. + self._input_templates = [] + for param in inspect.signature(self._build_func).parameters.values(): + if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: + self._input_templates.append(tf.placeholder(tf.float32, name=param.name)) + + # Call build func. + out_expr = self._build_func(*self._input_templates, **build_kwargs) + + # Collect output templates and variables. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + self._output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + self._own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/")) + + # Check for errors. + if len(self._input_templates) == 0: + raise ValueError("Network build func did not list any inputs.") + if len(self._output_templates) == 0: + raise ValueError("Network build func did not return any outputs.") + if any(not tfutil.is_tf_expression(t) for t in self._output_templates): + raise ValueError("Network outputs must be TensorFlow expressions.") + if any(t.shape.ndims is None for t in self._input_templates): + raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.") + if any(t.shape.ndims is None for t in self._output_templates): + raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.") + if any(not isinstance(comp, Network) for comp in self._components.values()): + raise ValueError("Components of a Network must be Networks themselves.") + if len(self._components) != len(set(comp.name for comp in self._components.values())): + raise ValueError("Components of a Network must have unique names.") + + # Initialize variables. + if len(self._var_inits): + tfutil.set_vars({self._get_vars()[name]: value for name, value in self._var_inits.items() if name in self._get_vars()}) + remaining_inits = [var.initializer for name, var in self._own_vars.items() if name not in self._var_inits] + if self._all_inits_known: + assert len(remaining_inits) == 0 + else: + tfutil.run(remaining_inits) + self._var_inits = None + + @property + def name(self): + """User-specified name string.""" + return self._name + + @property + def scope(self): + """Unique TensorFlow scope containing template graph and variables, derived from the user-specified name.""" + return self._scope + + @property + def device(self): + """Name of the TensorFlow device that the weights of this network reside on. Determined by the current device at construction time.""" + return self._device + + @property + def static_kwargs(self): + """EasyDict of arguments passed to the user-supplied build func.""" + return copy.deepcopy(self._static_kwargs) + + @property + def components(self): + """EasyDict of sub-networks created by the build func.""" + return copy.copy(self._get_components()) + + def _get_components(self): + if self._components is None: + self._init_graph() + assert self._components is not None + return self._components + + @property + def input_shapes(self): + """List of input tensor shapes, including minibatch dimension.""" + if self._input_shapes is None: + self._input_shapes = [t.shape.as_list() for t in self.input_templates] + return copy.deepcopy(self._input_shapes) + + @property + def output_shapes(self): + """List of output tensor shapes, including minibatch dimension.""" + if self._output_shapes is None: + self._output_shapes = [t.shape.as_list() for t in self.output_templates] + return copy.deepcopy(self._output_shapes) + + @property + def input_shape(self): + """Short-hand for input_shapes[0].""" + return self.input_shapes[0] + + @property + def output_shape(self): + """Short-hand for output_shapes[0].""" + return self.output_shapes[0] + + @property + def num_inputs(self): + """Number of input tensors.""" + return len(self.input_shapes) + + @property + def num_outputs(self): + """Number of output tensors.""" + return len(self.output_shapes) + + @property + def input_names(self): + """Name string for each input.""" + if self._input_names is None: + self._input_names = [t.name.split("/")[-1].split(":")[0] for t in self.input_templates] + return copy.copy(self._input_names) + + @property + def output_names(self): + """Name string for each output.""" + if self._output_names is None: + self._output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates] + return copy.copy(self._output_names) + + @property + def input_templates(self): + """Input placeholders in the template graph.""" + if self._input_templates is None: + self._init_graph() + assert self._input_templates is not None + return copy.copy(self._input_templates) + + @property + def output_templates(self): + """Output tensors in the template graph.""" + if self._output_templates is None: + self._init_graph() + assert self._output_templates is not None + return copy.copy(self._output_templates) + + @property + def own_vars(self): + """Variables defined by this network (local_name => var), excluding sub-networks.""" + return copy.copy(self._get_own_vars()) + + def _get_own_vars(self): + if self._own_vars is None: + self._init_graph() + assert self._own_vars is not None + return self._own_vars + + @property + def vars(self): + """All variables (local_name => var).""" + return copy.copy(self._get_vars()) + + def _get_vars(self): + if self._vars is None: + self._vars = OrderedDict(self._get_own_vars()) + for comp in self._get_components().values(): + self._vars.update((comp.name + "/" + name, var) for name, var in comp._get_vars().items()) + return self._vars + + @property + def trainables(self): + """All trainable variables (local_name => var).""" + return copy.copy(self._get_trainables()) + + def _get_trainables(self): + if self._trainables is None: + self._trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable) + return self._trainables + + @property + def var_global_to_local(self): + """Mapping from variable global names to local names.""" + return copy.copy(self._get_var_global_to_local()) + + def _get_var_global_to_local(self): + if self._var_global_to_local is None: + self._var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items()) + return self._var_global_to_local + + def reset_own_vars(self) -> None: + """Re-initialize all variables of this network, excluding sub-networks.""" + if self._var_inits is None or self._components is None: + tfutil.run([var.initializer for var in self._get_own_vars().values()]) + else: + self._var_inits.clear() + self._all_inits_known = False + + def reset_vars(self) -> None: + """Re-initialize all variables of this network, including sub-networks.""" + if self._var_inits is None: + tfutil.run([var.initializer for var in self._get_vars().values()]) + else: + self._var_inits.clear() + self._all_inits_known = False + if self._components is not None: + for comp in self._components.values(): + comp.reset_vars() + + def reset_trainables(self) -> None: + """Re-initialize all trainable variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self._get_trainables().values()]) + + def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]: + """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s). + The graph is placed on the current TensorFlow device.""" + assert len(in_expr) == self.num_inputs + assert not all(expr is None for expr in in_expr) + self._get_vars() # ensure that all variables have been created + + # Choose build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs.update(dynamic_kwargs) + build_kwargs["is_template_graph"] = False + build_kwargs["components"] = self._components + + # Build TensorFlow graph to evaluate the network. + with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name): + assert tf.get_variable_scope().name == self.scope + valid_inputs = [expr for expr in in_expr if expr is not None] + final_inputs = [] + for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes): + if expr is not None: + expr = tf.identity(expr, name=name) + else: + expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name) + final_inputs.append(expr) + out_expr = self._build_func(*final_inputs, **build_kwargs) + + # Propagate input shapes back to the user-specified expressions. + for expr, final in zip(in_expr, final_inputs): + if isinstance(expr, tf.Tensor): + expr.set_shape(final.shape) + + # Express outputs in the desired format. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + if return_as_list: + out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + return out_expr + + def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str: + """Get the local name of a given variable, without any surrounding name scopes.""" + assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str) + global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name + return self._get_var_global_to_local()[global_name] + + def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression: + """Find variable by local or global name.""" + assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str) + return self._get_vars()[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name + + def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray: + """Get the value of a given variable as NumPy array. + Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible.""" + return self.find_var(var_or_local_name).eval() + + def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None: + """Set the value of a given variable based on the given NumPy array. + Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible.""" + tfutil.set_vars({self.find_var(var_or_local_name): new_value}) + + def __getstate__(self) -> dict: + """Pickle export.""" + state = dict() + state["version"] = 5 + state["name"] = self.name + state["static_kwargs"] = dict(self.static_kwargs) + state["components"] = dict(self.components) + state["build_module_src"] = self._build_module_src + state["build_func_name"] = self._build_func_name + state["variables"] = list(zip(self._get_own_vars().keys(), tfutil.run(list(self._get_own_vars().values())))) + state["input_shapes"] = self.input_shapes + state["output_shapes"] = self.output_shapes + state["input_names"] = self.input_names + state["output_names"] = self.output_names + return state + + def __setstate__(self, state: dict) -> None: + """Pickle import.""" + + # Execute custom import handlers. + for handler in _import_handlers: + state = handler(state) + + # Get basic fields. + assert state["version"] in [2, 3, 4, 5] + name = state["name"] + static_kwargs = state["static_kwargs"] + build_module_src = state["build_module_src"] + build_func_name = state["build_func_name"] + + # Create temporary module from the imported source code. + module_name = "_tflib_network_import_" + uuid.uuid4().hex + module = types.ModuleType(module_name) + sys.modules[module_name] = module + _import_module_src[module] = build_module_src + exec(build_module_src, module.__dict__) # pylint: disable=exec-used + build_func = util.get_obj_from_module(module, build_func_name) + + # Initialize fields. + self._init_fields(name=name, static_kwargs=static_kwargs, build_func=build_func, build_func_name=build_func_name, build_module_src=build_module_src) + self._var_inits.update(copy.deepcopy(state["variables"])) + self._all_inits_known = True + self._components = util.EasyDict(state.get("components", {})) + self._input_shapes = copy.deepcopy(state.get("input_shapes", None)) + self._output_shapes = copy.deepcopy(state.get("output_shapes", None)) + self._input_names = copy.deepcopy(state.get("input_names", None)) + self._output_names = copy.deepcopy(state.get("output_names", None)) + + def clone(self, name: str = None, **new_static_kwargs) -> "Network": + """Create a clone of this network with its own copy of the variables.""" + static_kwargs = dict(self.static_kwargs) + static_kwargs.update(new_static_kwargs) + net = object.__new__(Network) + net._init_fields(name=(name or self.name), static_kwargs=static_kwargs, build_func=self._build_func, build_func_name=self._build_func_name, build_module_src=self._build_module_src) + net.copy_vars_from(self) + return net + + def copy_own_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, excluding sub-networks.""" + + # Source has unknown variables or unknown components => init now. + if (src_net._var_inits is not None and not src_net._all_inits_known) or src_net._components is None: + src_net._get_vars() + + # Both networks are inited => copy directly. + if src_net._var_inits is None and self._var_inits is None: + names = [name for name in self._get_own_vars().keys() if name in src_net._get_own_vars()] + tfutil.set_vars(tfutil.run({self._get_vars()[name]: src_net._get_vars()[name] for name in names})) + return + + # Read from source. + if src_net._var_inits is None: + value_dict = tfutil.run(src_net._get_own_vars()) + else: + value_dict = src_net._var_inits + + # Write to destination. + if self._var_inits is None: + tfutil.set_vars({self._get_vars()[name]: value for name, value in value_dict.items() if name in self._get_vars()}) + else: + self._var_inits.update(value_dict) + + def copy_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, including sub-networks.""" + + # Source has unknown variables or unknown components => init now. + if (src_net._var_inits is not None and not src_net._all_inits_known) or src_net._components is None: + src_net._get_vars() + + # Source is inited, but destination components have not been created yet => set as initial values. + if src_net._var_inits is None and self._components is None: + self._var_inits.update(tfutil.run(src_net._get_vars())) + return + + # Destination has unknown components => init now. + if self._components is None: + self._get_vars() + + # Both networks are inited => copy directly. + if src_net._var_inits is None and self._var_inits is None: + names = [name for name in self._get_vars().keys() if name in src_net._get_vars()] + tfutil.set_vars(tfutil.run({self._get_vars()[name]: src_net._get_vars()[name] for name in names})) + return + + # Copy recursively, component by component. + self.copy_own_vars_from(src_net) + for name, src_comp in src_net._components.items(): + if name in self._components: + self._components[name].copy_vars_from(src_comp) + + def copy_trainables_from(self, src_net: "Network") -> None: + """Copy the values of all trainable variables from the given network, including sub-networks.""" + names = [name for name in self._get_trainables().keys() if name in src_net._get_trainables()] + tfutil.set_vars(tfutil.run({self._get_vars()[name]: src_net._get_vars()[name] for name in names})) + + def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network": + """Create new network with the given parameters, and copy all variables from this network.""" + if new_name is None: + new_name = self.name + static_kwargs = dict(self.static_kwargs) + static_kwargs.update(new_static_kwargs) + net = Network(name=new_name, func_name=new_func_name, **static_kwargs) + net.copy_vars_from(self) + return net + + def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation: + """Construct a TensorFlow op that updates the variables of this network + to be slightly closer to those of the given network.""" + with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"): + ops = [] + for name, var in self._get_vars().items(): + if name in src_net._get_vars(): + cur_beta = beta if var.trainable else beta_nontrainable + new_value = tfutil.lerp(src_net._get_vars()[name], var, cur_beta) + ops.append(var.assign(new_value)) + return tf.group(*ops) + + def run(self, + *in_arrays: Tuple[Union[np.ndarray, None], ...], + input_transform: dict = None, + output_transform: dict = None, + return_as_list: bool = False, + print_progress: bool = False, + minibatch_size: int = None, + num_gpus: int = 1, + assume_frozen: bool = False, + **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: + """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). + + Args: + input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the input + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the output + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. + print_progress: Print progress to the console? Useful for very large input arrays. + minibatch_size: Maximum minibatch size to use, None = disable batching. + num_gpus: Number of GPUs to use. + assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. + dynamic_kwargs: Additional keyword arguments to be passed into the network build function. + """ + assert len(in_arrays) == self.num_inputs + assert not all(arr is None for arr in in_arrays) + assert input_transform is None or util.is_top_level_function(input_transform["func"]) + assert output_transform is None or util.is_top_level_function(output_transform["func"]) + output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) + num_items = in_arrays[0].shape[0] + if minibatch_size is None: + minibatch_size = num_items + + # Construct unique hash key from all arguments that affect the TensorFlow graph. + key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) + def unwind_key(obj): + if isinstance(obj, dict): + return [(key, unwind_key(value)) for key, value in sorted(obj.items())] + if callable(obj): + return util.get_top_level_function_name(obj) + return obj + key = repr(unwind_key(key)) + + # Build graph. + if key not in self._run_cache: + with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): + with tf.device("/cpu:0"): + in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) + + out_split = [] + for gpu in range(num_gpus): + with tf.device(self.device if num_gpus == 1 else "/gpu:%d" % gpu): + net_gpu = self.clone() if assume_frozen else self + in_gpu = in_split[gpu] + + if input_transform is not None: + in_kwargs = dict(input_transform) + in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) + in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) + + assert len(in_gpu) == self.num_inputs + out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) + + if output_transform is not None: + out_kwargs = dict(output_transform) + out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) + out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) + + assert len(out_gpu) == self.num_outputs + out_split.append(out_gpu) + + with tf.device("/cpu:0"): + out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] + self._run_cache[key] = in_expr, out_expr + + # Run minibatches. + in_expr, out_expr = self._run_cache[key] + out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr] + + for mb_begin in range(0, num_items, minibatch_size): + if print_progress: + print("\r%d / %d" % (mb_begin, num_items), end="") + + mb_end = min(mb_begin + minibatch_size, num_items) + mb_num = mb_end - mb_begin + mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] + mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) + + for dst, src in zip(out_arrays, mb_out): + dst[mb_begin: mb_end] = src + + # Done. + if print_progress: + print("\r%d / %d" % (num_items, num_items)) + + if not return_as_list: + out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) + return out_arrays + + def list_ops(self) -> List[TfExpression]: + _ = self.output_templates # ensure that the template graph has been created + include_prefix = self.scope + "/" + exclude_prefix = include_prefix + "_" + ops = tf.get_default_graph().get_operations() + ops = [op for op in ops if op.name.startswith(include_prefix)] + ops = [op for op in ops if not op.name.startswith(exclude_prefix)] + return ops + + def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]: + """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to + individual layers of the network. Mainly intended to be used for reporting.""" + layers = [] + + def recurse(scope, parent_ops, parent_vars, level): + if len(parent_ops) == 0 and len(parent_vars) == 0: + return + + # Ignore specific patterns. + if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]): + return + + # Filter ops and vars by scope. + global_prefix = scope + "/" + local_prefix = global_prefix[len(self.scope) + 1:] + cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]] + cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]] + if not cur_ops and not cur_vars: + return + + # Filter out all ops related to variables. + for var in [op for op in cur_ops if op.type.startswith("Variable")]: + var_prefix = var.name + "/" + cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)] + + # Scope does not contain ops as immediate children => recurse deeper. + contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops) + if (level == 0 or not contains_direct_ops) and (len(cur_ops) != 0 or len(cur_vars) != 0): + visited = set() + for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]: + token = rel_name.split("/")[0] + if token not in visited: + recurse(global_prefix + token, cur_ops, cur_vars, level + 1) + visited.add(token) + return + + # Report layer. + layer_name = scope[len(self.scope) + 1:] + layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1] + layer_trainables = [var for _name, var in cur_vars if var.trainable] + layers.append((layer_name, layer_output, layer_trainables)) + + recurse(self.scope, self.list_ops(), list(self._get_vars().items()), 0) + return layers + + def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None: + """Print a summary table of the network structure.""" + rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]] + rows += [["---"] * 4] + total_params = 0 + + for layer_name, layer_output, layer_trainables in self.list_layers(): + num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables) + weights = [var for var in layer_trainables if var.name.endswith("/weight:0")] + weights.sort(key=lambda x: len(x.name)) + if len(weights) == 0 and len(layer_trainables) == 1: + weights = layer_trainables + total_params += num_params + + if not hide_layers_with_no_params or num_params != 0: + num_params_str = str(num_params) if num_params > 0 else "-" + output_shape_str = str(layer_output.shape) + weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-" + rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]] + + rows += [["---"] * 4] + rows += [["Total", str(total_params), "", ""]] + + widths = [max(len(cell) for cell in column) for column in zip(*rows)] + print() + for row in rows: + print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths))) + print() + + def setup_weight_histograms(self, title: str = None) -> None: + """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" + if title is None: + title = self.name + + with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): + for local_name, var in self._get_trainables().items(): + if "/" in local_name: + p = local_name.split("/") + name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) + else: + name = title + "_toplevel/" + local_name + + tf.summary.histogram(name, var) + +#---------------------------------------------------------------------------- +# Backwards-compatible emulation of legacy output transformation in Network.run(). + +_print_legacy_warning = True + +def _handle_legacy_output_transforms(output_transform, dynamic_kwargs): + global _print_legacy_warning + legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"] + if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs): + return output_transform, dynamic_kwargs + + if _print_legacy_warning: + _print_legacy_warning = False + print() + print("WARNING: Old-style output transformations in Network.run() are deprecated.") + print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'") + print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.") + print() + assert output_transform is None + + new_kwargs = dict(dynamic_kwargs) + new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs} + new_transform["func"] = _legacy_output_transform_func + return new_transform, new_kwargs + +def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): + if out_mul != 1.0: + expr = [x * out_mul for x in expr] + + if out_add != 0.0: + expr = [x + out_add for x in expr] + + if out_shrink > 1: + ksize = [1, 1, out_shrink, out_shrink] + expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr] + + if out_dtype is not None: + if tf.as_dtype(out_dtype).is_integer: + expr = [tf.round(x) for x in expr] + expr = [tf.saturate_cast(x, out_dtype) for x in expr] + return expr diff --git a/dnnlib/tflib/ops/__init__.py b/dnnlib/tflib/ops/__init__.py new file mode 100755 index 00000000..43cce373 --- /dev/null +++ b/dnnlib/tflib/ops/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/dnnlib/tflib/ops/fused_bias_act.cu b/dnnlib/tflib/ops/fused_bias_act.cu new file mode 100755 index 00000000..0268f143 --- /dev/null +++ b/dnnlib/tflib/ops/fused_bias_act.cu @@ -0,0 +1,220 @@ +// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +//------------------------------------------------------------------------ +// CUDA kernel. + +template +struct FusedBiasActKernelParams +{ + const T* x; // [sizeX] + const T* b; // [sizeB] or NULL + const T* xref; // [sizeX] or NULL + const T* yref; // [sizeX] or NULL + T* y; // [sizeX] + + int grad; + int axis; + int act; + float alpha; + float gain; + float clamp; + + int sizeX; + int sizeB; + int stepB; + int loopX; +}; + +template +static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams p) +{ + const float expRange = 80.0f; + const float halfExpRange = 40.0f; + const float seluScale = 1.0507009873554804934193349852946f; + const float seluAlpha = 1.6732632423543772848170429916717f; + + // Loop over elements. + int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x; + for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x) + { + // Load and apply bias. + float x = (float)p.x[xi]; + if (p.b) + x += (float)p.b[(xi / p.stepB) % p.sizeB]; + float xref = (p.xref) ? (float)p.xref[xi] : 0.0f; + float yref = (p.yref) ? (float)p.yref[xi] : 0.0f; + float yy = (p.gain != 0.0f) ? yref / p.gain : 0.0f; + + // Evaluate activation func. + float y; + switch (p.act * 10 + p.grad) + { + // linear + default: + case 10: y = x; break; + case 11: y = x; break; + case 12: y = 0.0f; break; + + // relu + case 20: y = (x > 0.0f) ? x : 0.0f; break; + case 21: y = (yy > 0.0f) ? x : 0.0f; break; + case 22: y = 0.0f; break; + + // lrelu + case 30: y = (x > 0.0f) ? x : x * p.alpha; break; + case 31: y = (yy > 0.0f) ? x : x * p.alpha; break; + case 32: y = 0.0f; break; + + // tanh + case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break; + case 41: y = x * (1.0f - yy * yy); break; + case 42: y = x * (1.0f - yy * yy) * (-2.0f * yy); break; + + // sigmoid + case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break; + case 51: y = x * yy * (1.0f - yy); break; + case 52: y = x * yy * (1.0f - yy) * (1.0f - 2.0f * yy); break; + + // elu + case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break; + case 61: y = (yy >= 0.0f) ? x : x * (yy + 1.0f); break; + case 62: y = (yy >= 0.0f) ? 0.0f : x * (yy + 1.0f); break; + + // selu + case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break; + case 71: y = (yy >= 0.0f) ? x * seluScale : x * (yy + seluScale * seluAlpha); break; + case 72: y = (yy >= 0.0f) ? 0.0f : x * (yy + seluScale * seluAlpha); break; + + // softplus + case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break; + case 81: y = x * (1.0f - expf(-yy)); break; + case 82: { float c = expf(-yy); y = x * c * (1.0f - c); } break; + + // swish + case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break; + case 91: + case 92: + { + float c = expf(xref); + float d = c + 1.0f; + if (p.grad == 1) + y = (xref > halfExpRange) ? x : x * c * (xref + d) / (d * d); + else + y = (xref > halfExpRange) ? 0.0f : x * c * (xref * (2.0f - d) + 2.0f * d) / (d * d * d); + yref = (xref < -expRange) ? 0.0f : xref / (expf(-xref) + 1.0f) * p.gain; + } + break; + } + + // Apply gain. + y *= p.gain; + + // Clamp. + if (p.clamp >= 0.0f) + { + if (p.grad == 0) + y = (fabsf(y) < p.clamp) ? y : (y >= 0.0f) ? p.clamp : -p.clamp; + else + y = (fabsf(yref) < p.clamp) ? y : 0.0f; + } + + // Store. + p.y[xi] = (T)y; + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template +struct FusedBiasActOp : public OpKernel +{ + FusedBiasActKernelParams m_attribs; + + FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("clamp", &m_attribs.clamp)); + OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative")); + OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative")); + OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative")); + } + + void Compute(OpKernelContext* ctx) + { + FusedBiasActKernelParams p = m_attribs; + cudaStream_t stream = ctx->eigen_device().stream(); + + const Tensor& x = ctx->input(0); // [...] + const Tensor& b = ctx->input(1); // [sizeB] or [0] + const Tensor& xref = ctx->input(2); // x.shape or [0] + const Tensor& yref = ctx->input(3); // x.shape or [0] + p.x = x.flat().data(); + p.b = (b.NumElements()) ? b.flat().data() : NULL; + p.xref = (xref.NumElements()) ? xref.flat().data() : NULL; + p.yref = (yref.NumElements()) ? yref.flat().data() : NULL; + OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds")); + OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1")); + OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements")); + OP_REQUIRES(ctx, xref.NumElements() == 0 || xref.NumElements() == x.NumElements(), errors::InvalidArgument("xref has wrong number of elements")); + OP_REQUIRES(ctx, yref.NumElements() == 0 || yref.NumElements() == x.NumElements(), errors::InvalidArgument("yref has wrong number of elements")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large")); + + p.sizeX = (int)x.NumElements(); + p.sizeB = (int)b.NumElements(); + p.stepB = 1; + for (int i = m_attribs.axis + 1; i < x.dims(); i++) + p.stepB *= (int)x.dim_size(i); + + Tensor* y = NULL; // x.shape + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y)); + p.y = y->flat().data(); + + p.loopX = 4; + int blockSize = 4 * 32; + int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("FusedBiasAct") + .Input ("x: T") + .Input ("b: T") + .Input ("xref: T") + .Input ("yref: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("grad: int = 0") + .Attr ("axis: int = 1") + .Attr ("act: int = 0") + .Attr ("alpha: float = 0.0") + .Attr ("gain: float = 1.0") + .Attr ("clamp: float = -1.0"); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); + +//------------------------------------------------------------------------ diff --git a/dnnlib/tflib/ops/fused_bias_act.py b/dnnlib/tflib/ops/fused_bias_act.py new file mode 100755 index 00000000..79991b04 --- /dev/null +++ b/dnnlib/tflib/ops/fused_bias_act.py @@ -0,0 +1,211 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Custom TensorFlow ops for efficient bias and activation.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops +from ...util import EasyDict + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +activation_funcs = { + 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True), + 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True), + 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True), + 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False), + 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False), + 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False), + 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False), + 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False), + 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False), +} + +#---------------------------------------------------------------------------- + +def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, clamp=None, impl='cuda'): + r"""Fused bias and activation function. + + Adds bias `b` to activation tensor `x`, evaluates activation function `act`, + and scales the result by `gain`. Each of the steps is optional. In most cases, + the fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports first and second order gradients, + but not third order gradients. + + Args: + x: Input activation tensor. Can have any shape, but if `b` is defined, the + dimension corresponding to `axis`, as well as the rank, must be known. + b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type + as `x`. The shape must be known, and it must match the dimension of `x` + corresponding to `axis`. + axis: The dimension in `x` corresponding to the elements of `b`. + The value of `axis` is ignored if `b` is not specified. + act: Name of the activation function to evaluate, or `"linear"` to disable. + Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc. + See `activation_funcs` for a full list. `None` is not allowed. + alpha: Shape parameter for the activation function, or `None` to use the default. + gain: Scaling factor for the output tensor, or `None` to use default. + See `activation_funcs` for the default scaling of each activation function. + If unsure, consider specifying `1.0`. + clamp: Clamp the output values to `[-clamp, +clamp]`, or `None` to disable + the clamping (default). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + impl_dict = { + 'ref': _fused_bias_act_ref, + 'cuda': _fused_bias_act_cuda, + } + return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain, clamp=clamp) + +#---------------------------------------------------------------------------- + +def _fused_bias_act_ref(x, b, axis, act, alpha, gain, clamp): + """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype) + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Add bias. + if b.shape[0] != 0: + x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)]) + + # Evaluate activation function. + x = act_spec.func(x, alpha=alpha) + + # Scale by gain. + if gain != 1: + x *= gain + + # Clamp. + if clamp is not None: + clamp = np.asarray(clamp, dtype=x.dtype.name) + assert clamp.shape == () and clamp >= 0 + x = tf.clip_by_value(x, -clamp, clamp) + return x + +#---------------------------------------------------------------------------- + +def _fused_bias_act_cuda(x, b, axis, act, alpha, gain, clamp): + """Fast CUDA implementation of `fused_bias_act()` using custom ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + empty_tensor = tf.constant([], dtype=x.dtype) + b = tf.convert_to_tensor(b) if b is not None else empty_tensor + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Special cases. + if act == 'linear' and b is None and gain == 1.0: + return x + if act_spec.cuda_idx is None: + return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain, clamp=clamp) + + # CUDA op. + cuda_op = _get_plugin().fused_bias_act + cuda_kwargs = dict(axis=int(axis), act=int(act_spec.cuda_idx), gain=float(gain)) + if alpha is not None: + cuda_kwargs['alpha'] = float(alpha) + if clamp is not None: + clamp = np.asarray(clamp, dtype=x.dtype.name) + assert clamp.shape == () and clamp >= 0 + cuda_kwargs['clamp'] = float(clamp.astype(np.float32)) + def ref(tensor, name): + return tensor if act_spec.ref == name else empty_tensor + + # Forward pass: y = func(x, b). + def func_y(x, b): + y = cuda_op(x=x, b=b, xref=empty_tensor, yref=empty_tensor, grad=0, **cuda_kwargs) + y.set_shape(x.shape) + return y + + # Backward pass: dx, db = grad(dy, x, y) + def grad_dx(dy, x, y): + dx = cuda_op(x=dy, b=empty_tensor, xref=ref(x,'x'), yref=ref(y,'y'), grad=1, **cuda_kwargs) + dx.set_shape(x.shape) + return dx + def grad_db(dx): + if b.shape[0] == 0: + return empty_tensor + db = dx + if axis < x.shape.rank - 1: + db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank))) + if axis > 0: + db = tf.reduce_sum(db, list(range(axis))) + db.set_shape(b.shape) + return db + + # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y) + def grad2_d_dy(d_dx, d_db, x, y): + d_dy = cuda_op(x=d_dx, b=d_db, xref=ref(x,'x'), yref=ref(y,'y'), grad=1, **cuda_kwargs) + d_dy.set_shape(x.shape) + return d_dy + def grad2_d_x(d_dx, d_db, x, y): + d_x = cuda_op(x=d_dx, b=d_db, xref=ref(x,'x'), yref=ref(y,'y'), grad=2, **cuda_kwargs) + d_x.set_shape(x.shape) + return d_x + + # Fast version for piecewise-linear activation funcs. + @tf.custom_gradient + def func_zero_2nd_grad(x, b): + y = func_y(x, b) + @tf.custom_gradient + def grad(dy): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + return d_dy + return (dx, db), grad2 + return y, grad + + # Slow version for general activation funcs. + @tf.custom_gradient + def func_nonzero_2nd_grad(x, b): + y = func_y(x, b) + def grad_wrap(dy): + @tf.custom_gradient + def grad_impl(dy, x): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + d_x = grad2_d_x(d_dx, d_db, x, y) + return d_dy, d_x + return (dx, db), grad2 + return grad_impl(dy, x) + return y, grad_wrap + + # Which version to use? + if act_spec.zero_2nd_grad: + return func_zero_2nd_grad(x, b) + return func_nonzero_2nd_grad(x, b) + +#---------------------------------------------------------------------------- diff --git a/dnnlib/tflib/ops/upfirdn_2d.cu b/dnnlib/tflib/ops/upfirdn_2d.cu new file mode 100755 index 00000000..7aad60d5 --- /dev/null +++ b/dnnlib/tflib/ops/upfirdn_2d.cu @@ -0,0 +1,359 @@ +// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +//------------------------------------------------------------------------ +// Helpers. + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +static __host__ __device__ __forceinline__ int floorDiv(int a, int b) +{ + int t = 1 - a / b; + return (a + t * b) / b - t; +} + +//------------------------------------------------------------------------ +// CUDA kernel params. + +template +struct UpFirDn2DKernelParams +{ + const T* x; // [majorDim, inH, inW, minorDim] + const T* k; // [kernelH, kernelW] + T* y; // [majorDim, outH, outW, minorDim] + + int upx; + int upy; + int downx; + int downy; + int padx0; + int padx1; + int pady0; + int pady1; + + int majorDim; + int inH; + int inW; + int minorDim; + int kernelH; + int kernelW; + int outH; + int outW; + int loopMajor; + int loopX; +}; + +//------------------------------------------------------------------------ +// General CUDA implementation for large filter kernels. + +template +static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams p) +{ + // Calculate thread index. + int minorIdx = blockIdx.x * blockDim.x + threadIdx.x; + int outY = minorIdx / p.minorDim; + minorIdx -= outY * p.minorDim; + int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim) + return; + + // Setup Y receptive field. + int midY = outY * p.downy + p.upy - 1 - p.pady0; + int inY = min(max(floorDiv(midY, p.upy), 0), p.inH); + int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY; + int kernelY = midY + p.kernelH - (inY + 1) * p.upy; + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y) + { + // Setup X receptive field. + int midX = outX * p.downx + p.upx - 1 - p.padx0; + int inX = min(max(floorDiv(midX, p.upx), 0), p.inW); + int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX; + int kernelX = midX + p.kernelW - (inX + 1) * p.upx; + + // Initialize pointers. + const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + const T* kp = &p.k[kernelY * p.kernelW + kernelX]; + int xpx = p.minorDim; + int kpx = -p.upx; + int xpy = p.inW * p.minorDim; + int kpy = -p.upy * p.kernelW; + + // Inner loop. + float v = 0.0f; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + v += (float)(*xp) * (float)(*kp); + xp += xpx; + kp += kpx; + } + xp += xpy - w * xpx; + kp += kpy - w * kpx; + } + + // Store result. + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } +} + +//------------------------------------------------------------------------ +// Specialized CUDA implementation for small filter kernels. + +template +static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams p) +{ + //assert(kernelW % upx == 0); + //assert(kernelH % upy == 0); + const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1; + const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1; + __shared__ volatile float sk[kernelH][kernelW]; + __shared__ volatile float sx[tileInH][tileInW]; + + // Calculate tile index. + int minorIdx = blockIdx.x; + int tileOutY = minorIdx / p.minorDim; + minorIdx -= tileOutY * p.minorDim; + tileOutY *= tileOutH; + int tileOutXBase = blockIdx.y * p.loopX * tileOutW; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim) + return; + + // Load filter kernel (flipped). + for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x) + { + int ky = tapIdx / kernelW; + int kx = tapIdx - ky * kernelW; + float v = 0.0f; + if (kx < p.kernelW & ky < p.kernelH) + v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)]; + sk[ky][kx] = v; + } + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW) + { + // Load input pixels. + int tileMidX = tileOutX * downx + upx - 1 - p.padx0; + int tileMidY = tileOutY * downy + upy - 1 - p.pady0; + int tileInX = floorDiv(tileMidX, upx); + int tileInY = floorDiv(tileMidY, upy); + __syncthreads(); + for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x) + { + int relInY = inIdx / tileInW; + int relInX = inIdx - relInY * tileInW; + int inX = relInX + tileInX; + int inY = relInY + tileInY; + float v = 0.0f; + if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH) + v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + sx[relInY][relInX] = v; + } + + // Loop over output pixels. + __syncthreads(); + for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x) + { + int relOutY = outIdx / tileOutW; + int relOutX = outIdx - relOutY * tileOutW; + int outX = relOutX + tileOutX; + int outY = relOutY + tileOutY; + + // Setup receptive field. + int midX = tileMidX + relOutX * downx; + int midY = tileMidY + relOutY * downy; + int inX = floorDiv(midX, upx); + int inY = floorDiv(midY, upy); + int relInX = inX - tileInX; + int relInY = inY - tileInY; + int kernelX = (inX + 1) * upx - midX - 1; // flipped + int kernelY = (inY + 1) * upy - midY - 1; // flipped + + // Inner loop. + float v = 0.0f; + #pragma unroll + for (int y = 0; y < kernelH / upy; y++) + #pragma unroll + for (int x = 0; x < kernelW / upx; x++) + v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx]; + + // Store result. + if (outX < p.outW & outY < p.outH) + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template +struct UpFirDn2DOp : public OpKernel +{ + UpFirDn2DKernelParams m_attribs; + + UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1)); + OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1")); + OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1")); + } + + void Compute(OpKernelContext* ctx) + { + UpFirDn2DKernelParams p = m_attribs; + cudaStream_t stream = ctx->eigen_device().stream(); + + const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim] + const Tensor& k = ctx->input(1); // [kernelH, kernelW] + p.x = x.flat().data(); + p.k = k.flat().data(); + OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4")); + OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large")); + OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large")); + + p.majorDim = (int)x.dim_size(0); + p.inH = (int)x.dim_size(1); + p.inW = (int)x.dim_size(2); + p.minorDim = (int)x.dim_size(3); + p.kernelH = (int)k.dim_size(0); + p.kernelW = (int)k.dim_size(1); + OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1")); + + p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx; + p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy; + OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1")); + + Tensor* y = NULL; // [majorDim, outH, outW, minorDim] + TensorShape ys; + ys.AddDim(p.majorDim); + ys.AddDim(p.outH); + ys.AddDim(p.outW); + ys.AddDim(p.minorDim); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y)); + p.y = y->flat().data(); + OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large")); + + // Choose CUDA kernel to use. + void* cudaKernel = (void*)UpFirDn2DKernel_large; + int tileOutW = -1; + int tileOutH = -1; + + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 24 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 20 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 16 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 12 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 24) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 20) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 16) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 12) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 24 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 20 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 16 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 12 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 24) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 20) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 16) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 12) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 24 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 20 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 16 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 12 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 24) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 20) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 16) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 12) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + + // Choose launch params. + dim3 blockSize; + dim3 gridSize; + if (tileOutW > 0 && tileOutH > 0) // small + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 1; + blockSize = dim3(32 * 8, 1, 1); + gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + else // large + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 4; + blockSize = dim3(4, 32, 1); + gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + + // Launch CUDA kernel. + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("UpFirDn2D") + .Input ("x: T") + .Input ("k: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("upx: int = 1") + .Attr ("upy: int = 1") + .Attr ("downx: int = 1") + .Attr ("downy: int = 1") + .Attr ("padx0: int = 0") + .Attr ("padx1: int = 0") + .Attr ("pady0: int = 0") + .Attr ("pady1: int = 0"); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); + +//------------------------------------------------------------------------ diff --git a/dnnlib/tflib/ops/upfirdn_2d.py b/dnnlib/tflib/ops/upfirdn_2d.py new file mode 100755 index 00000000..55a31af7 --- /dev/null +++ b/dnnlib/tflib/ops/upfirdn_2d.py @@ -0,0 +1,418 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Custom TensorFlow ops for efficient resampling of 2D images.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'): + r"""Pad, upsample, FIR filter, and downsample a batch of 2D images. + + Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]` + and performs the following operations for each image, batched across + `majorDim` and `minorDim`: + + 1. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`). + + 2. Pad the image with zeros by the specified number of pixels on each side + (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value + corresponds to cropping the image. + + 3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the + image so that the footprint of all output pixels lies within the input image. + + 4. Downsample the image by throwing away pixels (`downx`, `downy`). + + This sequence of operations bears close resemblance to scipy.signal.upfirdn(). + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`. + k: 2D FIR filter of the shape `[firH, firW]`. + upx: Integer upsampling factor along the X-axis (default: 1). + upy: Integer upsampling factor along the Y-axis (default: 1). + downx: Integer downsampling factor along the X-axis (default: 1). + downy: Integer downsampling factor along the Y-axis (default: 1). + padx0: Number of pixels to pad on the left side (default: 0). + padx1: Number of pixels to pad on the right side (default: 0). + pady0: Number of pixels to pad on the top side (default: 0). + pady1: Number of pixels to pad on the bottom side (default: 0). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`. + """ + + impl_dict = { + 'ref': _upfirdn_2d_ref, + 'cuda': _upfirdn_2d_cuda, + } + return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + assert x.shape.rank == 4 + inH = x.shape[1].value + inW = x.shape[2].value + minorDim = _shape(x, 3) + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + # Upsample (insert zeros). + x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim]) + x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]]) + x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim]) + + # Pad (crop if negative). + x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]]) + x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :] + + # Convolve with filter. + x = tf.transpose(x, [0, 3, 1, 2]) + x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1]) + w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype) + x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW') + x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1]) + x = tf.transpose(x, [0, 2, 3, 1]) + + # Downsample (throw away pixels). + return x[:, ::downy, ::downx, :] + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Fast CUDA implementation of `upfirdn_2d()` using custom ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + majorDim, inH, inW, minorDim = x.shape.as_list() + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1 + outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1 + assert outW >= 1 and outH >= 1 + + cuda_op = _get_plugin().up_fir_dn2d + kc = tf.constant(k, dtype=x.dtype) + gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype) + gpadx0 = kernelW - padx0 - 1 + gpady0 = kernelH - pady0 - 1 + gpadx1 = inW * upx - outW * downx + padx0 - upx + 1 + gpady1 = inH * upy - outH * downy + pady0 - upy + 1 + + @tf.custom_gradient + def func(x): + y = cuda_op(x=x, k=kc, upx=int(upx), upy=int(upy), downx=int(downx), downy=int(downy), padx0=int(padx0), padx1=int(padx1), pady0=int(pady0), pady1=int(pady1)) + y.set_shape([majorDim, outH, outW, minorDim]) + @tf.custom_gradient + def grad(dy): + dx = cuda_op(x=dy, k=gkc, upx=int(downx), upy=int(downy), downx=int(upx), downy=int(upy), padx0=int(gpadx0), padx1=int(gpadx1), pady0=int(gpady0), pady1=int(gpady1)) + dx.set_shape([majorDim, inH, inW, minorDim]) + return dx, func + return y, grad + return func(x) + +#---------------------------------------------------------------------------- + +def filter_2d(x, k, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Filter a batch of 2D images with the given FIR filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and filters each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + assert isinstance(padding, int) + k = _FilterKernel(k=k, gain=gain) + assert k.w == k.h + pad0 = k.w // 2 + padding + pad1 = (k.w - 1) // 2 + padding + return _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_2d(x, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Upsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and upsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the upsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + assert isinstance(padding, int) + k = _FilterKernel(k if k is not None else [1] * factor, gain * (factor ** 2)) + assert k.w == k.h + pad0 = (k.w + factor - 1) // 2 + padding + pad1 = (k.w - factor) // 2 + padding + return _simple_upfirdn_2d(x, k, up=factor, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def downsample_2d(x, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Downsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and downsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the downsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + assert isinstance(padding, int) + k = _FilterKernel(k if k is not None else [1] * factor, gain) + assert k.w == k.h + pad0 = (k.w - factor + 1) // 2 + padding * factor + pad1 = (k.w - factor) // 2 + padding * factor + return _simple_upfirdn_2d(x, k, down=factor, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_conv_2d(x, w, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + assert isinstance(padding, int) + + # Check weight shape. + w = tf.convert_to_tensor(w) + ch, cw, _inC, _outC = w.shape.as_list() + inC = _shape(w, 2) + outC = _shape(w, 3) + assert cw == ch + + # Fast path for 1x1 convolution. + if cw == 1 and ch == 1: + x = tf.nn.conv2d(x, w, data_format=data_format, strides=[1,1,1,1], padding='VALID') + x = upsample_2d(x, k, factor=factor, gain=gain, padding=padding, data_format=data_format, impl=impl) + return x + + # Setup filter kernel. + k = _FilterKernel(k if k is not None else [1] * factor, gain * (factor ** 2)) + assert k.w == k.h + + # Determine data dimensions. + if data_format == 'NCHW': + stride = [1, 1, factor, factor] + output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + ch, (_shape(x, 3) - 1) * factor + cw] + num_groups = _shape(x, 1) // inC + else: + stride = [1, factor, factor, 1] + output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + ch, (_shape(x, 2) - 1) * factor + cw, outC] + num_groups = _shape(x, 3) // inC + + # Transpose weights. + w = tf.reshape(w, [ch, cw, inC, num_groups, -1]) + w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2]) + w = tf.reshape(w, [ch, cw, -1, num_groups * inC]) + + # Execute. + x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format) + pad0 = (k.w + factor - cw) // 2 + padding + pad1 = (k.w - factor - cw + 3) // 2 + padding + return _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def conv_downsample_2d(x, w, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + assert isinstance(padding, int) + + # Check weight shape. + w = tf.convert_to_tensor(w) + ch, cw, _inC, _outC = w.shape.as_list() + assert cw == ch + + # Fast path for 1x1 convolution. + if cw == 1 and ch == 1: + x = downsample_2d(x, k, factor=factor, gain=gain, padding=padding, data_format=data_format, impl=impl) + x = tf.nn.conv2d(x, w, data_format=data_format, strides=[1,1,1,1], padding='VALID') + return x + + # Setup filter kernel. + k = _FilterKernel(k if k is not None else [1] * factor, gain) + assert k.w == k.h + + # Determine stride. + if data_format == 'NCHW': + s = [1, 1, factor, factor] + else: + s = [1, factor, factor, 1] + + # Execute. + pad0 = (k.w - factor + cw) // 2 + padding * factor + pad1 = (k.w - factor + cw - 1) // 2 + padding * factor + x = _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format) + +#---------------------------------------------------------------------------- +# Internal helpers. + +class _FilterKernel: + def __init__(self, k, gain=1): + k = np.asarray(k, dtype=np.float32) + k /= np.sum(k) + + # Separable. + if k.ndim == 1 and k.size >= 8: + self.w = k.size + self.h = k.size + self.kx = k[np.newaxis, :] + self.ky = k[:, np.newaxis] * gain + self.kxy = None + + # Non-separable. + else: + if k.ndim == 1: + k = np.outer(k, k) + assert k.ndim == 2 + self.w = k.shape[1] + self.h = k.shape[0] + self.kx = None + self.ky = None + self.kxy = k * gain + +def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'): + assert isinstance(k, _FilterKernel) + assert data_format in ['NCHW', 'NHWC'] + assert x.shape.rank == 4 + y = x + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1]) + if k.kx is not None: + y = upfirdn_2d(y, k.kx, upx=up, downx=down, padx0=pad0, padx1=pad1, impl=impl) + if k.ky is not None: + y = upfirdn_2d(y, k.ky, upy=up, downy=down, pady0=pad0, pady1=pad1, impl=impl) + if k.kxy is not None: + y = upfirdn_2d(y, k.kxy, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl) + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)]) + return y + +def _shape(tf_expr, dim_idx): + if tf_expr.shape.rank is not None: + dim = tf_expr.shape[dim_idx].value + if dim is not None: + return dim + return tf.shape(tf_expr)[dim_idx] + +#---------------------------------------------------------------------------- diff --git a/dnnlib/tflib/optimizer.py b/dnnlib/tflib/optimizer.py new file mode 100755 index 00000000..157caeb1 --- /dev/null +++ b/dnnlib/tflib/optimizer.py @@ -0,0 +1,372 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Helper wrapper for a Tensorflow optimizer.""" + +import platform +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import List, Union + +from . import autosummary +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +_collective_ops_warning_printed = False +_collective_ops_group_key = 831766147 +_collective_ops_instance_key = 436340067 + +class Optimizer: + """A Wrapper for tf.train.Optimizer. + + Automatically takes care of: + - Gradient averaging for multi-GPU training. + - Gradient accumulation for arbitrarily large minibatches. + - Dynamic loss scaling and typecasts for FP16 training. + - Ignoring corrupted gradients that contain NaNs/Infs. + - Reporting statistics. + - Well-chosen default settings. + """ + + def __init__(self, + name: str = "Train", # Name string that will appear in TensorFlow graph. + tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class. + learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time. + minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients. + share: "Optimizer" = None, # Share internal state with a previously created optimizer? + use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training? + loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor. + loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow. + loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow. + report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard? + **kwargs): + + # Public fields. + self.name = name + self.learning_rate = learning_rate + self.minibatch_multiplier = minibatch_multiplier + self.id = self.name.replace("/", ".") + self.scope = tf.get_default_graph().unique_name(self.id) + self.optimizer_class = util.get_obj_by_name(tf_optimizer) + self.optimizer_kwargs = dict(kwargs) + self.use_loss_scaling = use_loss_scaling + self.loss_scaling_init = loss_scaling_init + self.loss_scaling_inc = loss_scaling_inc + self.loss_scaling_dec = loss_scaling_dec + + # Private fields. + self._updates_applied = False + self._devices = OrderedDict() # device_name => EasyDict() + self._shared_optimizers = OrderedDict() # device_name => optimizer_class + self._gradient_shapes = None # [shape, ...] + self._report_mem_usage = report_mem_usage + + # Validate arguments. + assert callable(self.optimizer_class) + + # Share internal state if requested. + if share is not None: + assert isinstance(share, Optimizer) + assert self.optimizer_class is share.optimizer_class + assert self.learning_rate is share.learning_rate + assert self.optimizer_kwargs == share.optimizer_kwargs + self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access + + def _get_device(self, device_name: str): + """Get internal state for the given TensorFlow device.""" + tfutil.assert_tf_initialized() + if device_name in self._devices: + return self._devices[device_name] + + # Initialize fields. + device = util.EasyDict() + device.name = device_name + device.optimizer = None # Underlying optimizer: optimizer_class + device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable + device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...] + device.grad_clean = OrderedDict() # Clean gradients: var => grad + device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable + device.grad_acc_count = None # Accumulation counter: tf.Variable + device.grad_acc = OrderedDict() # Accumulated gradients: var => grad + + # Setup TensorFlow objects. + with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None): + if device_name not in self._shared_optimizers: + optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers) + self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) + device.optimizer = self._shared_optimizers[device_name] + if self.use_loss_scaling: + device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var") + + # Register device. + self._devices[device_name] = device + return device + + def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: + """Register the gradients of the given loss function with respect to the given variables. + Intended to be called once per GPU.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + device = self._get_device(loss.device) + + # Validate trainables. + if isinstance(trainable_vars, dict): + trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars + assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 + assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) + assert all(var.device == device.name for var in trainable_vars) + + # Validate shapes. + if self._gradient_shapes is None: + self._gradient_shapes = [var.shape.as_list() for var in trainable_vars] + assert len(trainable_vars) == len(self._gradient_shapes) + assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes)) + + # Report memory usage if requested. + deps = [] + if self._report_mem_usage: + self._report_mem_usage = False + try: + with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]): + deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30)) + except tf.errors.NotFoundError: + pass + + # Compute gradients. + with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps): + loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) + gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage + grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate) + + # Register gradients. + for grad, var in grad_list: + if var not in device.grad_raw: + device.grad_raw[var] = [] + device.grad_raw[var].append(grad) + + def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: + """Construct training op to update the registered variables based on their gradients.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + self._updates_applied = True + all_ops = [] + + # Check for no-op. + if allow_no_op and len(self._devices) == 0: + with tfutil.absolute_name_scope(self.scope): + return tf.no_op(name='TrainingOp') + + # Clean up gradients. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name): + for var, grad in device.grad_raw.items(): + + # Filter out disconnected gradients and convert to float32. + grad = [g for g in grad if g is not None] + grad = [tf.cast(g, tf.float32) for g in grad] + + # Sum within the device. + if len(grad) == 0: + grad = tf.zeros(var.shape) # No gradients => zero. + elif len(grad) == 1: + grad = grad[0] # Single gradient => use as is. + else: + grad = tf.add_n(grad) # Multiple gradients => sum. + + # Scale as needed. + scale = 1.0 / len(device.grad_raw[var]) / len(self._devices) + scale = tf.constant(scale, dtype=tf.float32, name="scale") + if self.minibatch_multiplier is not None: + scale /= tf.cast(self.minibatch_multiplier, tf.float32) + scale = self.undo_loss_scaling(scale) + device.grad_clean[var] = grad * scale + + # Sum gradients across devices. + if len(self._devices) > 1: + with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None): + if platform.system() == "Windows": # Windows => NCCL ops are not available. + self._broadcast_fallback() + elif tf.VERSION.startswith("1.15."): # TF 1.15 => NCCL ops are broken: https://github.com/tensorflow/tensorflow/issues/41539 + self._broadcast_fallback() + else: # Otherwise => NCCL ops are safe to use. + self._broadcast_nccl() + + # Apply updates separately on each device. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name): + # pylint: disable=cell-var-from-loop + + # Accumulate gradients over time. + if self.minibatch_multiplier is None: + acc_ok = tf.constant(True, name='acc_ok') + device.grad_acc = OrderedDict(device.grad_clean) + else: + # Create variables. + with tf.control_dependencies(None): + for var in device.grad_clean.keys(): + device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var") + device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count") + + # Track counter. + count_cur = device.grad_acc_count + 1.0 + count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) + count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) + acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) + all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op)) + + # Track gradients. + for var, grad in device.grad_clean.items(): + acc_var = device.grad_acc_vars[var] + acc_cur = acc_var + grad + device.grad_acc[var] = acc_cur + with tf.control_dependencies([acc_cur]): + acc_inc_op = lambda: tf.assign(acc_var, acc_cur) + acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape)) + all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op)) + + # No overflow => apply gradients. + all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()])) + apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) + all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) + + # Adjust loss scaling. + if self.use_loss_scaling: + ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) + ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) + ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op)) + all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) + + # Last device => report statistics. + if device_idx == len(self._devices) - 1: + all_ops.append(autosummary.autosummary(self.id + "/learning_rate", tf.convert_to_tensor(self.learning_rate))) + all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) + if self.use_loss_scaling: + all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var)) + + # Initialize variables. + self.reset_optimizer_state() + if self.use_loss_scaling: + tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()]) + if self.minibatch_multiplier is not None: + tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]]) + + # Group everything into a single op. + with tfutil.absolute_name_scope(self.scope): + return tf.group(*all_ops, name="TrainingOp") + + def reset_optimizer_state(self) -> None: + """Reset internal state of the underlying optimizer.""" + tfutil.assert_tf_initialized() + tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()]) + + def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]: + """Get or create variable representing log2 of the current dynamic loss scaling factor.""" + return self._get_device(device).loss_scaling_var + + def apply_loss_scaling(self, value: TfExpression) -> TfExpression: + """Apply dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(self.get_loss_scaling_var(value.device)) + + def undo_loss_scaling(self, value: TfExpression) -> TfExpression: + """Undo the effect of dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type + + def _broadcast_nccl(self): + """Sum gradients across devices using NCCL ops (fast path).""" + from tensorflow.python.ops import nccl_ops # pylint: disable=no-name-in-module + for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]): + if any(x.shape.num_elements() > 0 for x in all_vars): + all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)] + all_grads = nccl_ops.all_sum(all_grads) + for device, var, grad in zip(self._devices.values(), all_vars, all_grads): + device.grad_clean[var] = grad + + def _broadcast_fallback(self): + """Sum gradients across devices using TensorFlow collective ops (slow fallback path).""" + from tensorflow.python.ops import collective_ops # pylint: disable=no-name-in-module + global _collective_ops_warning_printed, _collective_ops_group_key, _collective_ops_instance_key + if all(x.shape.num_elements() == 0 for device in self._devices.values() for x in device.grad_clean.values()): + return + if not _collective_ops_warning_printed: + print("------------------------------------------------------------------------") + print("WARNING: Using slow fallback implementation for inter-GPU communication.") + print("Please use TensorFlow 1.14 on Linux for optimal training performance.") + print("------------------------------------------------------------------------") + _collective_ops_warning_printed = True + for device in self._devices.values(): + with tf.device(device.name): + combo = [tf.reshape(x, [x.shape.num_elements()]) for x in device.grad_clean.values()] + combo = tf.concat(combo, axis=0) + combo = collective_ops.all_reduce(combo, merge_op='Add', final_op='Id', + group_size=len(self._devices), group_key=_collective_ops_group_key, + instance_key=_collective_ops_instance_key) + cur_ofs = 0 + for var, grad_old in device.grad_clean.items(): + grad_new = tf.reshape(combo[cur_ofs : cur_ofs + grad_old.shape.num_elements()], grad_old.shape) + cur_ofs += grad_old.shape.num_elements() + device.grad_clean[var] = grad_new + _collective_ops_instance_key += 1 + + +class SimpleAdam: + """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer.""" + + def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): + self.name = name + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.all_state_vars = [] + + def variables(self): + return self.all_state_vars + + def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE): + assert gate_gradients == tf.train.Optimizer.GATE_NONE + return list(zip(tf.gradients(loss, var_list), var_list)) + + def apply_gradients(self, grads_and_vars): + with tf.name_scope(self.name): + state_vars = [] + update_ops = [] + + # Adjust learning rate to deal with startup bias. + with tf.control_dependencies(None): + b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + state_vars += [b1pow_var, b2pow_var] + b1pow_new = b1pow_var * self.beta1 + b2pow_new = b2pow_var * self.beta2 + update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)] + lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new) + + # Construct ops to update each variable. + for grad, var in grads_and_vars: + with tf.control_dependencies(None): + m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + state_vars += [m_var, v_var] + m_new = self.beta1 * m_var + (1 - self.beta1) * grad + v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad) + var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon) + update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)] + + # Group everything together. + self.all_state_vars += state_vars + return tf.group(*update_ops) diff --git a/dnnlib/tflib/tfutil.py b/dnnlib/tflib/tfutil.py new file mode 100755 index 00000000..fe211002 --- /dev/null +++ b/dnnlib/tflib/tfutil.py @@ -0,0 +1,262 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Miscellaneous helper utils for Tensorflow.""" + +import os +import numpy as np +import tensorflow as tf + +# Silence deprecation warnings from TensorFlow 1.13 onwards +import logging +logging.getLogger('tensorflow').setLevel(logging.ERROR) +import tensorflow.contrib # requires TensorFlow 1.x! +tf.contrib = tensorflow.contrib + +from typing import Any, Iterable, List, Union + +TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation] +"""A type that represents a valid Tensorflow expression.""" + +TfExpressionEx = Union[TfExpression, int, float, np.ndarray] +"""A type that can be converted to a valid Tensorflow expression.""" + + +def run(*args, **kwargs) -> Any: + """Run the specified ops in the default session.""" + assert_tf_initialized() + return tf.get_default_session().run(*args, **kwargs) + + +def is_tf_expression(x: Any) -> bool: + """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" + return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) + + +def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]: + """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code.""" + return [dim.value for dim in shape] + + +def flatten(x: TfExpressionEx) -> TfExpression: + """Shortcut function for flattening a tensor.""" + with tf.name_scope("Flatten"): + return tf.reshape(x, [-1]) + + +def log2(x: TfExpressionEx) -> TfExpression: + """Logarithm in base 2.""" + with tf.name_scope("Log2"): + return tf.log(x) * np.float32(1.0 / np.log(2.0)) + + +def exp2(x: TfExpressionEx) -> TfExpression: + """Exponent in base 2.""" + with tf.name_scope("Exp2"): + return tf.exp(x * np.float32(np.log(2.0))) + + +def erfinv(y: TfExpressionEx) -> TfExpression: + """Inverse of the error function.""" + # pylint: disable=no-name-in-module + from tensorflow.python.ops.distributions import special_math + return special_math.erfinv(y) + + +def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx: + """Linear interpolation.""" + with tf.name_scope("Lerp"): + return a + (b - a) * t + + +def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression: + """Linear interpolation with clip.""" + with tf.name_scope("LerpClip"): + return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) + + +def absolute_name_scope(scope: str) -> tf.name_scope: + """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" + return tf.name_scope(scope + "/") + + +def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: + """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" + return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False) + + +def _sanitize_tf_config(config_dict: dict = None) -> dict: + # Defaults. + cfg = dict() + cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. + cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. + cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. + cfg["env.HDF5_USE_FILE_LOCKING"] = "FALSE" # Disable HDF5 file locking to avoid concurrency issues with network shares. + cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. + cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. + + # Remove defaults for environment variables that are already set. + for key in list(cfg): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + if fields[1] in os.environ: + del cfg[key] + + # User overrides. + if config_dict is not None: + cfg.update(config_dict) + return cfg + + +def init_tf(config_dict: dict = None) -> None: + """Initialize TensorFlow session using good default settings.""" + # Skip if already initialized. + if tf.get_default_session() is not None: + return + + # Setup config dict and random seeds. + cfg = _sanitize_tf_config(config_dict) + np_random_seed = cfg["rnd.np_random_seed"] + if np_random_seed is not None: + np.random.seed(np_random_seed) + tf_random_seed = cfg["rnd.tf_random_seed"] + if tf_random_seed == "auto": + tf_random_seed = np.random.randint(1 << 31) + if tf_random_seed is not None: + tf.set_random_seed(tf_random_seed) + + # Setup environment variables. + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + os.environ[fields[1]] = str(value) + + # Create default TensorFlow session. + create_session(cfg, force_as_default=True) + + +def assert_tf_initialized(): + """Check that TensorFlow session has been initialized.""" + if tf.get_default_session() is None: + raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().") + + +def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: + """Create tf.Session based on config dict.""" + # Setup TensorFlow config proto. + cfg = _sanitize_tf_config(config_dict) + config_proto = tf.ConfigProto() + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] not in ["rnd", "env"]: + obj = config_proto + for field in fields[:-1]: + obj = getattr(obj, field) + setattr(obj, fields[-1], value) + + # Create session. + session = tf.Session(config=config_proto) + if force_as_default: + # pylint: disable=protected-access + session._default_session = session.as_default() + session._default_session.enforce_nesting = False + session._default_session.__enter__() + return session + + +def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: + """Initialize all tf.Variables that have not already been initialized. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tf.variables_initializer(tf.report_uninitialized_variables()).run() + """ + assert_tf_initialized() + if target_vars is None: + target_vars = tf.global_variables() + + test_vars = [] + test_ops = [] + + with tf.control_dependencies(None): # ignore surrounding control_dependencies + for var in target_vars: + assert is_tf_expression(var) + + try: + tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) + except KeyError: + # Op does not exist => variable may be uninitialized. + test_vars.append(var) + + with absolute_name_scope(var.name.split(":")[0]): + test_ops.append(tf.is_variable_initialized(var)) + + init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] + run([var.initializer for var in init_vars]) + + +def set_vars(var_to_value_dict: dict) -> None: + """Set the values of given tf.Variables. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] + """ + assert_tf_initialized() + ops = [] + feed_dict = {} + + for var, value in var_to_value_dict.items(): + assert is_tf_expression(var) + + try: + setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op + except KeyError: + with absolute_name_scope(var.name.split(":")[0]): + with tf.control_dependencies(None): # ignore surrounding control_dependencies + setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter + + ops.append(setter) + feed_dict[setter.op.inputs[1]] = value + + run(ops, feed_dict) + + +def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs): + """Create tf.Variable with large initial value without bloating the tf graph.""" + assert_tf_initialized() + assert isinstance(initial_value, np.ndarray) + zeros = tf.zeros(initial_value.shape, initial_value.dtype) + var = tf.Variable(zeros, *args, **kwargs) + set_vars({var: initial_value}) + return var + + +def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False): + """Convert a minibatch of images from uint8 to float32 with configurable dynamic range. + Can be used as an input transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if nhwc_to_nchw: + images = tf.transpose(images, [0, 3, 1, 2]) + return images * ((drange[1] - drange[0]) / 255) + drange[0] + + +def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1): + """Convert a minibatch of images from float32 to uint8 with configurable dynamic range. + Can be used as an output transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if shrink > 1: + ksize = [1, 1, shrink, shrink] + images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") + if nchw_to_nhwc: + images = tf.transpose(images, [0, 2, 3, 1]) + scale = 255 / (drange[1] - drange[0]) + images = images * scale + (0.5 - drange[0] * scale) + return tf.saturate_cast(images, tf.uint8) diff --git a/dnnlib/util.py b/dnnlib/util.py new file mode 100755 index 00000000..0c35b892 --- /dev/null +++ b/dnnlib/util.py @@ -0,0 +1,472 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Miscellaneous utility classes and functions.""" + +import ctypes +import fnmatch +import importlib +import inspect +import numpy as np +import os +import shutil +import sys +import types +import io +import pickle +import re +import requests +import html +import hashlib +import glob +import tempfile +import urllib +import urllib.request +import uuid + +from distutils.util import strtobool +from typing import Any, List, Tuple, Union + + +# Util classes +# ------------------------------------------------------------------------------------------ + + +class EasyDict(dict): + """Convenience class that behaves like a dict but allows access with the attribute syntax.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name: str, value: Any) -> None: + self[name] = value + + def __delattr__(self, name: str) -> None: + del self[name] + + +class Logger(object): + """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file.""" + + def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True): + self.file = None + + if file_name is not None: + self.file = open(file_name, file_mode) + + self.should_flush = should_flush + self.stdout = sys.stdout + self.stderr = sys.stderr + + sys.stdout = self + sys.stderr = self + + def __enter__(self) -> "Logger": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def write(self, text: str) -> None: + """Write text to stdout (and a file) and optionally flush.""" + if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash + return + + if self.file is not None: + self.file.write(text) + + self.stdout.write(text) + + if self.should_flush: + self.flush() + + def flush(self) -> None: + """Flush written text to both stdout and a file, if open.""" + if self.file is not None: + self.file.flush() + + self.stdout.flush() + + def close(self) -> None: + """Flush, close possible files, and remove stdout/stderr mirroring.""" + self.flush() + + # if using multiple loggers, prevent closing in wrong order + if sys.stdout is self: + sys.stdout = self.stdout + if sys.stderr is self: + sys.stderr = self.stderr + + if self.file is not None: + self.file.close() + + +# Cache directories +# ------------------------------------------------------------------------------------------ + +_dnnlib_cache_dir = None + +def set_cache_dir(path: str) -> None: + global _dnnlib_cache_dir + _dnnlib_cache_dir = path + +def make_cache_dir_path(*paths: str) -> str: + if _dnnlib_cache_dir is not None: + return os.path.join(_dnnlib_cache_dir, *paths) + if 'DNNLIB_CACHE_DIR' in os.environ: + return os.path.join(os.environ['DNNLIB_CACHE_DIR'], *paths) + if 'HOME' in os.environ: + return os.path.join(os.environ['HOME'], '.cache', 'dnnlib', *paths) + if 'USERPROFILE' in os.environ: + return os.path.join(os.environ['USERPROFILE'], '.cache', 'dnnlib', *paths) + return os.path.join(tempfile.gettempdir(), '.cache', 'dnnlib', *paths) + +# Small util functions +# ------------------------------------------------------------------------------------------ + + +def format_time(seconds: Union[int, float]) -> str: + """Convert the seconds to human readable string with days, hours, minutes and seconds.""" + s = int(np.rint(seconds)) + + if s < 60: + return "{0}s".format(s) + elif s < 60 * 60: + return "{0}m {1:02}s".format(s // 60, s % 60) + elif s < 24 * 60 * 60: + return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60) + else: + return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60) + + +def ask_yes_no(question: str) -> bool: + """Ask the user the question until the user inputs a valid answer.""" + while True: + try: + print("{0} [y/n]".format(question)) + return strtobool(input().lower()) + except ValueError: + pass + + +def tuple_product(t: Tuple) -> Any: + """Calculate the product of the tuple elements.""" + result = 1 + + for v in t: + result *= v + + return result + + +_str_to_ctype = { + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "int8": ctypes.c_byte, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "float32": ctypes.c_float, + "float64": ctypes.c_double +} + + +def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]: + """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes.""" + type_str = None + + if isinstance(type_obj, str): + type_str = type_obj + elif hasattr(type_obj, "__name__"): + type_str = type_obj.__name__ + elif hasattr(type_obj, "name"): + type_str = type_obj.name + else: + raise RuntimeError("Cannot infer type name from input") + + assert type_str in _str_to_ctype.keys() + + my_dtype = np.dtype(type_str) + my_ctype = _str_to_ctype[type_str] + + assert my_dtype.itemsize == ctypes.sizeof(my_ctype) + + return my_dtype, my_ctype + + +def is_pickleable(obj: Any) -> bool: + try: + with io.BytesIO() as stream: + pickle.dump(obj, stream) + return True + except: + return False + + +# Functionality to import modules/objects by name, and call functions by name +# ------------------------------------------------------------------------------------------ + +def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]: + """Searches for the underlying module behind the name to some python object. + Returns the module and the object name (original name with module part removed).""" + + # allow convenience shorthands, substitute them by full names + obj_name = re.sub("^np.", "numpy.", obj_name) + obj_name = re.sub("^tf.", "tensorflow.", obj_name) + + # list alternatives for (module_name, local_obj_name) + parts = obj_name.split(".") + name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)] + + # try each alternative in turn + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + return module, local_obj_name + except: + pass + + # maybe some of the modules themselves contain errors? + for module_name, _local_obj_name in name_pairs: + try: + importlib.import_module(module_name) # may raise ImportError + except ImportError: + if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"): + raise + + # maybe the requested attribute is missing? + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + except ImportError: + pass + + # we are out of luck, but we have no idea why + raise ImportError(obj_name) + + +def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any: + """Traverses the object name and returns the last (rightmost) python object.""" + if obj_name == '': + return module + obj = module + for part in obj_name.split("."): + obj = getattr(obj, part) + return obj + + +def get_obj_by_name(name: str) -> Any: + """Finds the python object with the given name.""" + module, obj_name = get_module_from_obj_name(name) + return get_obj_from_module(module, obj_name) + + +def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any: + """Finds the python object with the given name and calls it as a function.""" + assert func_name is not None + func_obj = get_obj_by_name(func_name) + assert callable(func_obj) + return func_obj(*args, **kwargs) + + +def construct_class_by_name(*args, class_name: str = None, **kwargs) -> Any: + """Finds the python class with the given name and constructs it with the given arguments.""" + return call_func_by_name(*args, func_name=class_name, **kwargs) + + +def get_module_dir_by_obj_name(obj_name: str) -> str: + """Get the directory path of the module containing the given object name.""" + module, _ = get_module_from_obj_name(obj_name) + return os.path.dirname(inspect.getfile(module)) + + +def is_top_level_function(obj: Any) -> bool: + """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'.""" + return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__ + + +def get_top_level_function_name(obj: Any) -> str: + """Return the fully-qualified name of a top-level function.""" + assert is_top_level_function(obj) + module = obj.__module__ + if module == '__main__': + module = os.path.splitext(os.path.basename(sys.modules[module].__file__))[0] + return module + "." + obj.__name__ + + +# File system helpers +# ------------------------------------------------------------------------------------------ + +def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]: + """List all files recursively in a given directory while ignoring given file and directory names. + Returns list of tuples containing both absolute and relative paths.""" + assert os.path.isdir(dir_path) + base_name = os.path.basename(os.path.normpath(dir_path)) + + if ignores is None: + ignores = [] + + result = [] + + for root, dirs, files in os.walk(dir_path, topdown=True): + for ignore_ in ignores: + dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)] + + # dirs need to be edited in-place + for d in dirs_to_remove: + dirs.remove(d) + + files = [f for f in files if not fnmatch.fnmatch(f, ignore_)] + + absolute_paths = [os.path.join(root, f) for f in files] + relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths] + + if add_base_to_relative: + relative_paths = [os.path.join(base_name, p) for p in relative_paths] + + assert len(absolute_paths) == len(relative_paths) + result += zip(absolute_paths, relative_paths) + + return result + + +def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None: + """Takes in a list of tuples of (src, dst) paths and copies files. + Will create all necessary directories.""" + for file in files: + target_dir_name = os.path.dirname(file[1]) + + # will create all intermediate-level directories + if not os.path.exists(target_dir_name): + os.makedirs(target_dir_name) + + shutil.copyfile(file[0], file[1]) + + +# URL helpers +# ------------------------------------------------------------------------------------------ + +def is_url(obj: Any, allow_file_urls: bool = False) -> bool: + """Determine whether the given object is a valid URL string.""" + if not isinstance(obj, str) or not "://" in obj: + return False + if allow_file_urls and obj.startswith('file://'): + return True + try: + res = requests.compat.urlparse(obj) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + res = requests.compat.urlparse(requests.compat.urljoin(obj, "/")) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + except: + return False + return True + + +def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True, return_filename: bool = False, cache: bool = True) -> Any: + """Download the given URL and return a binary-mode file object to access the data.""" + assert num_attempts >= 1 + assert not (return_filename and (not cache)) + + # Doesn't look like an URL scheme so interpret it as a local filename. + if not re.match('^[a-z]+://', url): + return url if return_filename else open(url, "rb") + + # Handle file URLs. This code handles unusual file:// patterns that + # arise on Windows: + # + # file:///c:/foo.txt + # + # which would translate to a local '/c:/foo.txt' filename that's + # invalid. Drop the forward slash for such pathnames. + # + # If you touch this code path, you should test it on both Linux and + # Windows. + # + # Some internet resources suggest using urllib.request.url2pathname() but + # but that converts forward slashes to backslashes and this causes + # its own set of problems. + if url.startswith('file://'): + filename = urllib.parse.urlparse(url).path + if re.match(r'^/[a-zA-Z]:', filename): + filename = filename[1:] + return filename if return_filename else open(filename, "rb") + + assert is_url(url) + + # Lookup from cache. + if cache_dir is None: + cache_dir = make_cache_dir_path('downloads') + + url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest() + if cache: + cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*")) + if len(cache_files) == 1: + filename = cache_files[0] + return filename if return_filename else open(filename, "rb") + + # Download. + url_name = None + url_data = None + with requests.Session() as session: + if verbose: + print("Downloading %s ..." % url, end="", flush=True) + for attempts_left in reversed(range(num_attempts)): + try: + with session.get(url) as res: + res.raise_for_status() + if len(res.content) == 0: + raise IOError("No data received") + + if len(res.content) < 8192: + content_str = res.content.decode("utf-8") + if "download_warning" in res.headers.get("Set-Cookie", ""): + links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link] + if len(links) == 1: + url = requests.compat.urljoin(url, links[0]) + raise IOError("Google Drive virus checker nag") + if "Google Drive - Quota exceeded" in content_str: + raise IOError("Google Drive download quota exceeded -- please try again later") + + match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", "")) + url_name = match[1] if match else url + url_data = res.content + if verbose: + print(" done") + break + except: + if not attempts_left: + if verbose: + print(" failed") + raise + if verbose: + print(".", end="", flush=True) + + # Save to cache. + if cache: + safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name) + cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name) + temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name) + os.makedirs(cache_dir, exist_ok=True) + with open(temp_file, "wb") as f: + f.write(url_data) + os.replace(temp_file, cache_file) # atomic + if return_filename: + return cache_file + + # Return data as file object. + assert not return_filename + return io.BytesIO(url_data) diff --git a/docs/license.html b/docs/license.html new file mode 100755 index 00000000..ebe83a9a --- /dev/null +++ b/docs/license.html @@ -0,0 +1,153 @@ + + + + + + Nvidia Source Code License-NC + + + + + +

NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator Augmentation (ADA)

+ +
+ +

1. Definitions

+ +

“Licensor” means any person or entity that distributes its Work.

+ +

“Software” means the original work of authorship made available under +this License.

+ +

“Work” means the Software and any additions to or derivative works of +the Software that are made available under this License.

+ +

The terms “reproduce,” “reproduction,” “derivative works,” and +“distribution” have the meaning as provided under U.S. copyright law; +provided, however, that for the purposes of this License, derivative +works shall not include works that remain separable from, or merely +link (or bind by name) to the interfaces of, the Work.

+ +

Works, including the Software, are “made available” under this License +by including in or with the Work either (a) a copyright notice +referencing the applicability of this License to the Work, or (b) a +copy of this License.

+ +

2. License Grants

+ +

2.1 Copyright Grant. Subject to the terms and conditions of this +License, each Licensor grants to you a perpetual, worldwide, +non-exclusive, royalty-free, copyright license to reproduce, +prepare derivative works of, publicly display, publicly perform, +sublicense and distribute its Work and any resulting derivative +works in any form.

+ +

3. Limitations

+ +

3.1 Redistribution. You may reproduce or distribute the Work only +if (a) you do so under this License, (b) you include a complete +copy of this License with your distribution, and (c) you retain +without modification any copyright, patent, trademark, or +attribution notices that are present in the Work.

+ +

3.2 Derivative Works. You may specify that additional or different +terms apply to the use, reproduction, and distribution of your +derivative works of the Work (“Your Terms”) only if (a) Your Terms +provide that the use limitation in Section 3.3 applies to your +derivative works, and (b) you identify the specific derivative +works that are subject to Your Terms. Notwithstanding Your Terms, +this License (including the redistribution requirements in Section +3.1) will continue to apply to the Work itself.

+ +

3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for +use non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work +and any derivative works commercially. As used herein, “non-commercially” means for research or +evaluation purposes only. + +

3.4 Patent Claims. If you bring or threaten to bring a patent claim +against any Licensor (including any claim, cross-claim or +counterclaim in a lawsuit) to enforce any patents that you allege +are infringed by any Work, then your rights under this License from +such Licensor (including the grant in Section 2.1) will terminate immediately. + +

3.5 Trademarks. This License does not grant any rights to use any +Licensor’s or its affiliates’ names, logos, or trademarks, except +as necessary to reproduce the notices described in this License.

+ +

3.6 Termination. If you violate any term of this License, then your +rights under this License (including the grant in Section 2.1) +will terminate immediately.

+ +

4. Disclaimer of Warranty.

+ +

THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +THIS LICENSE.

+ +

5. Limitation of Liability.

+ +

EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES.

+ +
+
+ + + diff --git a/docs/stylegan2-ada-teaser-1024x252.png b/docs/stylegan2-ada-teaser-1024x252.png new file mode 100755 index 00000000..14eb641b Binary files /dev/null and b/docs/stylegan2-ada-teaser-1024x252.png differ diff --git a/docs/stylegan2-ada-training-curves.png b/docs/stylegan2-ada-training-curves.png new file mode 100755 index 00000000..94cbb5e5 Binary files /dev/null and b/docs/stylegan2-ada-training-curves.png differ diff --git a/docs/train-help.txt b/docs/train-help.txt new file mode 100755 index 00000000..0f1a9b2f --- /dev/null +++ b/docs/train-help.txt @@ -0,0 +1,89 @@ +usage: train.py [-h] --outdir DIR [--gpus INT] [--snap INT] [--seed INT] [-n] + --data PATH [--res INT] [--mirror BOOL] [--metrics LIST] + [--metricdata PATH] + [--cfg {auto,stylegan2,paper256,paper512,paper1024,cifar,cifarbaseline}] + [--gamma FLOAT] [--kimg INT] [--aug {noaug,ada,fixed,adarv}] + [--p FLOAT] [--target TARGET] + [--augpipe {blit,geom,color,filter,noise,cutout,bg,bgc,bgcf,bgcfn,bgcfnc}] + [--cmethod {nocmethod,bcr,zcr,pagan,wgangp,auxrot,spectralnorm,shallowmap,adropout}] + [--dcap FLOAT] [--resume RESUME] [--freezed INT] + +Train a GAN using the techniques described in the paper +"Training Generative Adversarial Networks with Limited Data". + +optional arguments: + -h, --help show this help message and exit + +general options: + --outdir DIR Where to save the results (required) + --gpus INT Number of GPUs to use (default: 1 gpu) + --snap INT Snapshot interval (default: 50 ticks) + --seed INT Random seed (default: 1000) + -n, --dry-run Print training options and exit + +training dataset: + --data PATH Training dataset path (required) + --res INT Dataset resolution (default: highest available) + --mirror BOOL Augment dataset with x-flips (default: false) + +metrics: + --metrics LIST Comma-separated list or "none" (default: fid50k_full) + --metricdata PATH Dataset to evaluate metrics against (optional) + +base config: + --cfg {auto,stylegan2,paper256,paper512,paper1024,cifar,cifarbaseline} + Base config (default: auto) + --gamma FLOAT Override R1 gamma + --kimg INT Override training duration + +discriminator augmentation: + --aug {noaug,ada,fixed,adarv} + Augmentation mode (default: ada) + --p FLOAT Specify augmentation probability for --aug=fixed + --target TARGET Override ADA target for --aug=ada and --aug=adarv + --augpipe {blit,geom,color,filter,noise,cutout,bg,bgc,bgcf,bgcfn,bgcfnc} + Augmentation pipeline (default: bgc) + +comparison methods: + --cmethod {nocmethod,bcr,zcr,pagan,wgangp,auxrot,spectralnorm,shallowmap,adropout} + Comparison method (default: nocmethod) + --dcap FLOAT Multiplier for discriminator capacity + +transfer learning: + --resume RESUME Resume from network pickle (default: noresume) + --freezed INT Freeze-D (default: 0 discriminator layers) + +examples: + + # Train custom dataset using 1 GPU. + python train.py --outdir=~/training-runs --gpus=1 --data=~/datasets/custom + + # Train class-conditional CIFAR-10 using 2 GPUs. + python train.py --outdir=~/training-runs --gpus=2 --data=~/datasets/cifar10c \ + --cfg=cifar + + # Transfer learn MetFaces from FFHQ using 4 GPUs. + python train.py --outdir=~/training-runs --gpus=4 --data=~/datasets/metfaces \ + --cfg=paper1024 --mirror=1 --resume=ffhq1024 --snap=10 + + # Reproduce original StyleGAN2 config F. + python train.py --outdir=~/training-runs --gpus=8 --data=~/datasets/ffhq \ + --cfg=stylegan2 --res=1024 --mirror=1 --aug=noaug + +available base configs (--cfg): + auto Automatically select reasonable defaults based on resolution + and GPU count. Good starting point for new datasets. + stylegan2 Reproduce results for StyleGAN2 config F at 1024x1024. + paper256 Reproduce results for FFHQ and LSUN Cat at 256x256. + paper512 Reproduce results for BreCaHAD and AFHQ at 512x512. + paper1024 Reproduce results for MetFaces at 1024x1024. + cifar Reproduce results for CIFAR-10 (tuned configuration). + cifarbaseline Reproduce results for CIFAR-10 (baseline configuration). + +transfer learning source networks (--resume): + ffhq256 FFHQ trained at 256x256 resolution. + ffhq512 FFHQ trained at 512x512 resolution. + ffhq1024 FFHQ trained at 1024x1024 resolution. + celebahq256 CelebA-HQ trained at 256x256 resolution. + lsundog256 LSUN Dog trained at 256x256 resolution. + Custom network pickle. diff --git a/generate.py b/generate.py new file mode 100755 index 00000000..42210a5a --- /dev/null +++ b/generate.py @@ -0,0 +1,123 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Generate images using pretrained network pickle.""" + +import argparse +import os +import pickle +import re + +import numpy as np +import PIL.Image + +import dnnlib +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- + +def generate_images(network_pkl, seeds, truncation_psi, outdir, class_idx, dlatents_npz): + tflib.init_tf() + print('Loading networks from "%s"...' % network_pkl) + with dnnlib.util.open_url(network_pkl) as fp: + _G, _D, Gs = pickle.load(fp) + + os.makedirs(outdir, exist_ok=True) + + # Render images for a given dlatent vector. + if dlatents_npz is not None: + print(f'Generating images from dlatents file "{dlatents_npz}"') + dlatents = np.load(dlatents_npz)['dlatents'] + assert dlatents.shape[1:] == (18, 512) # [N, 18, 512] + imgs = Gs.components.synthesis.run(dlatents, output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)) + for i, img in enumerate(imgs): + fname = f'{outdir}/dlatent{i:02d}.png' + print (f'Saved {fname}') + PIL.Image.fromarray(img, 'RGB').save(fname) + return + + # Render images for dlatents initialized from random seeds. + Gs_kwargs = { + 'output_transform': dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True), + 'randomize_noise': False + } + if truncation_psi is not None: + Gs_kwargs['truncation_psi'] = truncation_psi + + noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')] + label = np.zeros([1] + Gs.input_shapes[1][1:]) + if class_idx is not None: + label[:, class_idx] = 1 + + for seed_idx, seed in enumerate(seeds): + print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) + rnd = np.random.RandomState(seed) + z = rnd.randn(1, *Gs.input_shape[1:]) # [minibatch, component] + tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] + images = Gs.run(z, label, **Gs_kwargs) # [minibatch, height, width, channel] + PIL.Image.fromarray(images[0], 'RGB').save(f'{outdir}/seed{seed:04d}.png') + +#---------------------------------------------------------------------------- + +def _parse_num_range(s): + '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' + + range_re = re.compile(r'^(\d+)-(\d+)$') + m = range_re.match(s) + if m: + return list(range(int(m.group(1)), int(m.group(2))+1)) + vals = s.split(',') + return [int(x) for x in vals] + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + # Generate curated MetFaces images without truncation (Fig.10 left) + python %(prog)s --outdir=out --trunc=1 --seeds=85,265,297,849 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl + + # Generate uncurated MetFaces images with truncation (Fig.12 upper left) + python %(prog)s --outdir=out --trunc=0.7 --seeds=600-605 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl + + # Generate class conditional CIFAR-10 images (Fig.17 left, Car) + python %(prog)s --outdir=out --trunc=1 --seeds=0-35 --class=1 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/cifar10.pkl + + # Render image from projected latent vector + python %(prog)s --outdir=out --dlatents=out/dlatents.npz \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Generate images using pretrained network pickle.', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + g = parser.add_mutually_exclusive_group(required=True) + g.add_argument('--seeds', type=_parse_num_range, help='List of random seeds') + g.add_argument('--dlatents', dest='dlatents_npz', help='Generate images for saved dlatents') + parser.add_argument('--trunc', dest='truncation_psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) + parser.add_argument('--class', dest='class_idx', type=int, help='Class label (default: unconditional)') + parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR') + + args = parser.parse_args() + generate_images(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/metrics/__init__.py b/metrics/__init__.py new file mode 100755 index 00000000..2c61c745 --- /dev/null +++ b/metrics/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/metrics/frechet_inception_distance.py b/metrics/frechet_inception_distance.py new file mode 100755 index 00000000..1f6be674 --- /dev/null +++ b/metrics/frechet_inception_distance.py @@ -0,0 +1,93 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Frechet Inception Distance (FID) from the paper +"GANs trained by a two time-scale update rule converge to a local Nash equilibrium".""" + +import os +import pickle +import numpy as np +import scipy +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +class FID(metric_base.MetricBase): + def __init__(self, max_reals, num_fakes, minibatch_per_gpu, use_cached_real_stats=True, **kwargs): + super().__init__(**kwargs) + self.max_reals = max_reals + self.num_fakes = num_fakes + self.minibatch_per_gpu = minibatch_per_gpu + self.use_cached_real_stats = use_cached_real_stats + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_features.pkl') as f: # identical to http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + feature_net = pickle.load(f) + + # Calculate statistics for reals. + cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if self.use_cached_real_stats and os.path.isfile(cache_file): + with open(cache_file, 'rb') as f: + mu_real, sigma_real = pickle.load(f) + else: + nfeat = feature_net.output_shape[1] + mu_real = np.zeros(nfeat) + sigma_real = np.zeros([nfeat, nfeat]) + num_real = 0 + for images, _labels, num in self._iterate_reals(minibatch_size): + if self.max_reals is not None: + num = min(num, self.max_reals - num_real) + if images.shape[1] == 1: + images = np.tile(images, [1, 3, 1, 1]) + for feat in list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num]: + mu_real += feat + sigma_real += np.outer(feat, feat) + num_real += 1 + if self.max_reals is not None and num_real >= self.max_reals: + break + mu_real /= num_real + sigma_real /= num_real + sigma_real -= np.outer(mu_real, mu_real) + with open(cache_file, 'wb') as f: + pickle.dump((mu_real, sigma_real), f) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + feature_net_clone = feature_net.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + images = tflib.convert_images_to_uint8(images) + result_expr.append(feature_net_clone.get_output_for(images)) + + # Calculate statistics for fakes. + feat_fake = [] + for begin in range(0, self.num_fakes, minibatch_size): + self._report_progress(begin, self.num_fakes) + feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0)) + feat_fake = np.stack(feat_fake[:self.num_fakes]) + mu_fake = np.mean(feat_fake, axis=0) + sigma_fake = np.cov(feat_fake, rowvar=False) + + # Calculate FID. + m = np.square(mu_fake - mu_real).sum() + s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member + dist = m + np.trace(sigma_fake + sigma_real - 2*s) + self._report_result(np.real(dist)) + +#---------------------------------------------------------------------------- diff --git a/metrics/inception_score.py b/metrics/inception_score.py new file mode 100755 index 00000000..c33f0893 --- /dev/null +++ b/metrics/inception_score.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Inception Score (IS) from the paper +"Improved techniques for training GANs".""" + +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +class IS(metric_base.MetricBase): + def __init__(self, num_images, num_splits, minibatch_per_gpu, **kwargs): + super().__init__(**kwargs) + self.num_images = num_images + self.num_splits = num_splits + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_softmax.pkl') as f: + inception = pickle.load(f) + activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + Gs_clone = Gs.clone() + inception_clone = inception.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + images = tflib.convert_images_to_uint8(images) + result_expr.append(inception_clone.get_output_for(images)) + + # Calculate activations for fakes. + for begin in range(0, self.num_images, minibatch_size): + self._report_progress(begin, self.num_images) + end = min(begin + minibatch_size, self.num_images) + activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] + + # Calculate IS. + scores = [] + for i in range(self.num_splits): + part = activations[i * self.num_images // self.num_splits : (i + 1) * self.num_images // self.num_splits] + kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) + kl = np.mean(np.sum(kl, 1)) + scores.append(np.exp(kl)) + self._report_result(np.mean(scores), suffix='_mean') + self._report_result(np.std(scores), suffix='_std') + +#---------------------------------------------------------------------------- diff --git a/metrics/kernel_inception_distance.py b/metrics/kernel_inception_distance.py new file mode 100755 index 00000000..20fa8db5 --- /dev/null +++ b/metrics/kernel_inception_distance.py @@ -0,0 +1,94 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Kernel Inception Distance (KID) from the paper +"Demystifying MMD GANs".""" + +import os +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +def compute_kid(feat_real, feat_fake, num_subsets=100, max_subset_size=1000): + n = feat_real.shape[1] + m = min(min(feat_real.shape[0], feat_fake.shape[0]), max_subset_size) + t = 0 + for _subset_idx in range(num_subsets): + x = feat_fake[np.random.choice(feat_fake.shape[0], m, replace=False)] + y = feat_real[np.random.choice(feat_real.shape[0], m, replace=False)] + a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3 + b = (x @ y.T / n + 1) ** 3 + t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m + return t / num_subsets / m + +#---------------------------------------------------------------------------- + +class KID(metric_base.MetricBase): + def __init__(self, max_reals, num_fakes, minibatch_per_gpu, use_cached_real_stats=True, **kwargs): + super().__init__(**kwargs) + self.max_reals = max_reals + self.num_fakes = num_fakes + self.minibatch_per_gpu = minibatch_per_gpu + self.use_cached_real_stats = use_cached_real_stats + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_features.pkl') as f: # identical to http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + feature_net = pickle.load(f) + + # Calculate statistics for reals. + cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if self.use_cached_real_stats and os.path.isfile(cache_file): + with open(cache_file, 'rb') as f: + feat_real = pickle.load(f) + else: + feat_real = [] + for images, _labels, num in self._iterate_reals(minibatch_size): + if self.max_reals is not None: + num = min(num, self.max_reals - len(feat_real)) + if images.shape[1] == 1: + images = np.tile(images, [1, 3, 1, 1]) + feat_real += list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num] + if self.max_reals is not None and len(feat_real) >= self.max_reals: + break + feat_real = np.stack(feat_real) + with open(cache_file, 'wb') as f: + pickle.dump(feat_real, f) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + feature_net_clone = feature_net.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + images = tflib.convert_images_to_uint8(images) + result_expr.append(feature_net_clone.get_output_for(images)) + + # Calculate statistics for fakes. + feat_fake = [] + for begin in range(0, self.num_fakes, minibatch_size): + self._report_progress(begin, self.num_fakes) + feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0)) + feat_fake = np.stack(feat_fake[:self.num_fakes]) + + # Calculate KID. + kid = compute_kid(feat_real, feat_fake) + self._report_result(np.real(kid), fmt='%-12.8f') + +#---------------------------------------------------------------------------- diff --git a/metrics/linear_separability.py b/metrics/linear_separability.py new file mode 100755 index 00000000..d95e12b8 --- /dev/null +++ b/metrics/linear_separability.py @@ -0,0 +1,184 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Linear Separability (LS) from the paper +"A Style-Based Generator Architecture for Generative Adversarial Networks".""" + +import pickle +from collections import defaultdict +import numpy as np +import sklearn.svm +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +classifier_urls = [ + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-00-male.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-01-smiling.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-02-attractive.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-03-wavy-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-04-young.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-05-5-o-clock-shadow.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-06-arched-eyebrows.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-07-bags-under-eyes.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-08-bald.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-09-bangs.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-10-big-lips.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-11-big-nose.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-12-black-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-13-blond-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-14-blurry.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-15-brown-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-16-bushy-eyebrows.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-17-chubby.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-18-double-chin.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-19-eyeglasses.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-20-goatee.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-21-gray-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-22-heavy-makeup.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-23-high-cheekbones.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-24-mouth-slightly-open.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-25-mustache.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-26-narrow-eyes.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-27-no-beard.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-28-oval-face.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-29-pale-skin.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-30-pointy-nose.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-31-receding-hairline.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-32-rosy-cheeks.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-33-sideburns.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-34-straight-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-35-wearing-earrings.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-36-wearing-hat.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-37-wearing-lipstick.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-38-wearing-necklace.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-39-wearing-necktie.pkl', +] + +#---------------------------------------------------------------------------- + +def prob_normalize(p): + p = np.asarray(p).astype(np.float32) + assert len(p.shape) == 2 + return p / np.sum(p) + +def mutual_information(p): + p = prob_normalize(p) + px = np.sum(p, axis=1) + py = np.sum(p, axis=0) + result = 0.0 + for x in range(p.shape[0]): + p_x = px[x] + for y in range(p.shape[1]): + p_xy = p[x][y] + p_y = py[y] + if p_xy > 0.0: + result += p_xy * np.log2(p_xy / (p_x * p_y)) # get bits as output + return result + +def entropy(p): + p = prob_normalize(p) + result = 0.0 + for x in range(p.shape[0]): + for y in range(p.shape[1]): + p_xy = p[x][y] + if p_xy > 0.0: + result -= p_xy * np.log2(p_xy) + return result + +def conditional_entropy(p): + # H(Y|X) where X corresponds to axis 0, Y to axis 1 + # i.e., How many bits of additional information are needed to where we are on axis 1 if we know where we are on axis 0? + p = prob_normalize(p) + y = np.sum(p, axis=0, keepdims=True) # marginalize to calculate H(Y) + return max(0.0, entropy(y) - mutual_information(p)) # can slip just below 0 due to FP inaccuracies, clean those up. + +#---------------------------------------------------------------------------- + +class LS(metric_base.MetricBase): + def __init__(self, num_samples, num_keep, attrib_indices, minibatch_per_gpu, **kwargs): + assert num_keep <= num_samples + super().__init__(**kwargs) + self.num_samples = num_samples + self.num_keep = num_keep + self.attrib_indices = attrib_indices + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + + # Construct TensorFlow graph for each GPU. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + Gs_clone = Gs.clone() + + # Generate images. + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **G_kwargs) + images = Gs_clone.get_output_for(latents, None, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + + # Downsample to 256x256. The attribute classifiers were built for 256x256. + if images.shape[2] > 256: + factor = images.shape[2] // 256 + images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) + images = tf.reduce_mean(images, axis=[3, 5]) + + # Run classifier for each attribute. + result_dict = dict(latents=latents, dlatents=dlatents[:,-1]) + for attrib_idx in self.attrib_indices: + with dnnlib.util.open_url(classifier_urls[attrib_idx]) as f: + classifier = pickle.load(f) + logits = classifier.get_output_for(images, None) + predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1)) + result_dict[attrib_idx] = predictions + result_expr.append(result_dict) + + # Sampling loop. + results = [] + for begin in range(0, self.num_samples, minibatch_size): + self._report_progress(begin, self.num_samples) + results += tflib.run(result_expr) + results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()} + + # Calculate conditional entropy for each attribute. + conditional_entropies = defaultdict(list) + for attrib_idx in self.attrib_indices: + # Prune the least confident samples. + pruned_indices = list(range(self.num_samples)) + pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) + pruned_indices = pruned_indices[:self.num_keep] + + # Fit SVM to the remaining samples. + svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) + for space in ['latents', 'dlatents']: + svm_inputs = results[space][pruned_indices] + try: + svm = sklearn.svm.LinearSVC() + svm.fit(svm_inputs, svm_targets) + svm.score(svm_inputs, svm_targets) + svm_outputs = svm.predict(svm_inputs) + except: + svm_outputs = svm_targets # assume perfect prediction + + # Calculate conditional entropy. + p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)] + conditional_entropies[space].append(conditional_entropy(p)) + + # Calculate separability scores. + scores = {key: 2**np.sum(values) for key, values in conditional_entropies.items()} + self._report_result(scores['latents'], suffix='_z') + self._report_result(scores['dlatents'], suffix='_w') + +#---------------------------------------------------------------------------- diff --git a/metrics/metric_base.py b/metrics/metric_base.py new file mode 100755 index 00000000..84fab746 --- /dev/null +++ b/metrics/metric_base.py @@ -0,0 +1,137 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Common definitions for quality metrics.""" + +import os +import time +import hashlib +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from training import dataset + +#---------------------------------------------------------------------------- +# Base class for metrics. + +class MetricBase: + def __init__(self, name, force_dataset_args={}, force_G_kwargs={}): + # Constructor args. + self.name = name + self.force_dataset_args = force_dataset_args + self.force_G_kwargs = force_G_kwargs + + # Configuration. + self._dataset_args = dnnlib.EasyDict() + self._run_dir = None + self._progress_fn = None + + # Internal state. + self._results = [] + self._network_name = '' + self._eval_time = 0 + self._dataset = None + + def configure(self, dataset_args={}, run_dir=None, progress_fn=None): + self._dataset_args = dnnlib.EasyDict(dataset_args) + self._dataset_args.update(self.force_dataset_args) + self._run_dir = run_dir + self._progress_fn = progress_fn + + def run(self, network_pkl, num_gpus=1, G_kwargs=dict(is_validation=True)): + self._results = [] + self._network_name = os.path.splitext(os.path.basename(network_pkl))[0] + self._eval_time = 0 + self._dataset = None + + with tf.Graph().as_default(), tflib.create_session().as_default(): # pylint: disable=not-context-manager + self._report_progress(0, 1) + time_begin = time.time() + with dnnlib.util.open_url(network_pkl) as f: + G, D, Gs = pickle.load(f) + + G_kwargs = dnnlib.EasyDict(G_kwargs) + G_kwargs.update(self.force_G_kwargs) + self._evaluate(G=G, D=D, Gs=Gs, G_kwargs=G_kwargs, num_gpus=num_gpus) + + self._eval_time = time.time() - time_begin # pylint: disable=attribute-defined-outside-init + self._report_progress(1, 1) + if self._dataset is not None: + self._dataset.close() + self._dataset = None + + result_str = self.get_result_str() + print(result_str) + if self._run_dir is not None and os.path.isdir(self._run_dir): + with open(os.path.join(self._run_dir, f'metric-{self.name}.txt'), 'at') as f: + f.write(result_str + '\n') + + def get_result_str(self): + title = self._network_name + if len(title) > 29: + title = '...' + title[-26:] + result_str = f'{title:<30s} time {dnnlib.util.format_time(self._eval_time):<12s}' + for res in self._results: + result_str += f' {self.name}{res.suffix} {res.fmt % res.value}' + return result_str.strip() + + def update_autosummaries(self): + for res in self._results: + tflib.autosummary.autosummary('Metrics/' + self.name + res.suffix, res.value) + + def _evaluate(self, **_kwargs): + raise NotImplementedError # to be overridden by subclasses + + def _report_result(self, value, suffix='', fmt='%-10.4f'): + self._results += [dnnlib.EasyDict(value=value, suffix=suffix, fmt=fmt)] + + def _report_progress(self, cur, total): + if self._progress_fn is not None: + self._progress_fn(cur, total) + + def _get_cache_file_for_reals(self, extension='pkl', **kwargs): + all_args = dnnlib.EasyDict(metric_name=self.name) + all_args.update(self._dataset_args) + all_args.update(kwargs) + md5 = hashlib.md5(repr(sorted(all_args.items())).encode('utf-8')) + dataset_name = os.path.splitext(os.path.basename(self._dataset_args.path))[0] + return dnnlib.make_cache_dir_path('metrics', f'{md5.hexdigest()}-{self.name}-{dataset_name}.{extension}') + + def _get_dataset_obj(self): + if self._dataset is None: + self._dataset = dataset.load_dataset(**self._dataset_args) + return self._dataset + + def _iterate_reals(self, minibatch_size): + print(f'Calculating real image statistics for {self.name}...') + dataset_obj = self._get_dataset_obj() + while True: + images = [] + labels = [] + for _ in range(minibatch_size): + image, label = dataset_obj.get_minibatch_np(1) + if image is None: + break + images.append(image) + labels.append(label) + num = len(images) + if num == 0: + break + images = np.concatenate(images + [images[-1]] * (minibatch_size - num), axis=0) + labels = np.concatenate(labels + [labels[-1]] * (minibatch_size - num), axis=0) + yield images, labels, num + if num < minibatch_size: + break + + def _get_random_labels_tf(self, minibatch_size): + return self._get_dataset_obj().get_random_labels_tf(minibatch_size) + +#---------------------------------------------------------------------------- diff --git a/metrics/metric_defaults.py b/metrics/metric_defaults.py new file mode 100755 index 00000000..b456e9c6 --- /dev/null +++ b/metrics/metric_defaults.py @@ -0,0 +1,36 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Default metric definitions.""" + +from dnnlib import EasyDict + +#---------------------------------------------------------------------------- + +metric_defaults = EasyDict([(args.name, args) for args in [ + # ADA paper. + EasyDict(name='fid50k_full', class_name='metrics.frechet_inception_distance.FID', max_reals=None, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)), + EasyDict(name='kid50k_full', class_name='metrics.kernel_inception_distance.KID', max_reals=1000000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)), + EasyDict(name='pr50k3_full', class_name='metrics.precision_recall.PR', max_reals=200000, num_fakes=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)), + EasyDict(name='is50k', class_name='metrics.inception_score.IS', num_images=50000, num_splits=10, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)), + + # Legacy: StyleGAN2. + EasyDict(name='fid50k', class_name='metrics.frechet_inception_distance.FID', max_reals=50000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)), + EasyDict(name='kid50k', class_name='metrics.kernel_inception_distance.KID', max_reals=50000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)), + EasyDict(name='pr50k3', class_name='metrics.precision_recall.PR', max_reals=50000, num_fakes=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000, force_dataset_args=dict(shuffle=False, max_images=None)), + EasyDict(name='ppl2_wend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + + # Legacy: StyleGAN. + EasyDict(name='ppl_zfull', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + EasyDict(name='ppl_wfull', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + EasyDict(name='ppl_zend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + EasyDict(name='ppl_wend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + EasyDict(name='ls', class_name='metrics.linear_separability.LS', num_samples=200000, num_keep=100000, attrib_indices=range(40), minibatch_per_gpu=4, force_dataset_args=dict(shuffle=False, max_images=None)), +]]) + +#---------------------------------------------------------------------------- diff --git a/metrics/perceptual_path_length.py b/metrics/perceptual_path_length.py new file mode 100755 index 00000000..15a327ba --- /dev/null +++ b/metrics/perceptual_path_length.py @@ -0,0 +1,119 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Perceptual Path Length (PPL) from the paper +"A Style-Based Generator Architecture for Generative Adversarial Networks".""" + +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +# Normalize batch of vectors. +def normalize(v): + return v / tf.sqrt(tf.reduce_sum(tf.square(v), axis=-1, keepdims=True)) + +# Spherical interpolation of a batch of vectors. +def slerp(a, b, t): + a = normalize(a) + b = normalize(b) + d = tf.reduce_sum(a * b, axis=-1, keepdims=True) + p = t * tf.math.acos(d) + c = normalize(b - d * a) + d = a * tf.math.cos(p) + c * tf.math.sin(p) + return normalize(d) + +#---------------------------------------------------------------------------- + +class PPL(metric_base.MetricBase): + def __init__(self, num_samples, epsilon, space, sampling, crop, minibatch_per_gpu, **kwargs): + assert space in ['z', 'w'] + assert sampling in ['full', 'end'] + super().__init__(**kwargs) + self.num_samples = num_samples + self.epsilon = epsilon + self.space = space + self.sampling = sampling + self.crop = crop + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + + # Construct TensorFlow graph. + distance_expr = [] + for gpu_idx in range(num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + Gs_clone = Gs.clone() + noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')] + + # Generate random latents and interpolation t-values. + lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) + lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) + labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1]) + + # Interpolate in W or Z. + if self.space == 'w': + dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, labels, **G_kwargs) + dlat_t01 = tf.cast(dlat_t01, tf.float32) + dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] + dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) + dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) + dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) + else: # space == 'z' + lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] + lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) + lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) + lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) + dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, labels, **G_kwargs) + + # Synthesize images. + with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch + images = Gs_clone.components.synthesis.get_output_for(dlat_e01, randomize_noise=False, **G_kwargs) + images = tf.cast(images, tf.float32) + + # Crop only the face region. + if self.crop: + c = int(images.shape[2] // 8) + images = images[:, :, c*3 : c*7, c*2 : c*6] + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + factor = images.shape[2] // 256 + if factor > 1: + images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) + images = tf.reduce_mean(images, axis=[3,5]) + + # Scale dynamic range from [-1,1] to [0,255] for VGG. + images = (images + 1) * (255 / 2) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + + # Evaluate perceptual distance. + img_e0, img_e1 = images[0::2], images[1::2] + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16_zhang_perceptual.pkl') as f: + distance_measure = pickle.load(f) + distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) + + # Sampling loop. + all_distances = [] + for begin in range(0, self.num_samples, minibatch_size): + self._report_progress(begin, self.num_samples) + all_distances += tflib.run(distance_expr) + all_distances = np.concatenate(all_distances, axis=0) + + # Reject outliers. + lo = np.percentile(all_distances, 1, interpolation='lower') + hi = np.percentile(all_distances, 99, interpolation='higher') + filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) + self._report_result(np.mean(filtered_distances)) + +#---------------------------------------------------------------------------- diff --git a/metrics/precision_recall.py b/metrics/precision_recall.py new file mode 100755 index 00000000..dab3fecc --- /dev/null +++ b/metrics/precision_recall.py @@ -0,0 +1,234 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Precision/Recall (PR) from the paper +"Improved Precision and Recall Metric for Assessing Generative Models".""" + +import os +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +def batch_pairwise_distances(U, V): + """ Compute pairwise distances between two batches of feature vectors.""" + with tf.variable_scope('pairwise_dist_block'): + # Squared norms of each row in U and V. + norm_u = tf.reduce_sum(tf.square(U), 1) + norm_v = tf.reduce_sum(tf.square(V), 1) + + # norm_u as a row and norm_v as a column vectors. + norm_u = tf.reshape(norm_u, [-1, 1]) + norm_v = tf.reshape(norm_v, [1, -1]) + + # Pairwise squared Euclidean distances. + D = tf.maximum(norm_u - 2*tf.matmul(U, V, False, True) + norm_v, 0.0) + + return D + +#---------------------------------------------------------------------------- + +class DistanceBlock(): + """Distance block.""" + def __init__(self, num_features, num_gpus): + self.num_features = num_features + self.num_gpus = num_gpus + + # Initialize TF graph to calculate pairwise distances. + with tf.device('/cpu:0'): + self._features_batch1 = tf.placeholder(tf.float16, shape=[None, self.num_features]) + self._features_batch2 = tf.placeholder(tf.float16, shape=[None, self.num_features]) + features_split2 = tf.split(self._features_batch2, self.num_gpus, axis=0) + distances_split = [] + for gpu_idx in range(self.num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + distances_split.append(batch_pairwise_distances(self._features_batch1, features_split2[gpu_idx])) + self._distance_block = tf.concat(distances_split, axis=1) + + def pairwise_distances(self, U, V): + """Evaluate pairwise distances between two batches of feature vectors.""" + return self._distance_block.eval(feed_dict={self._features_batch1: U, self._features_batch2: V}) + +#---------------------------------------------------------------------------- + +class ManifoldEstimator(): + """Finds an estimate for the manifold of given feature vectors.""" + def __init__(self, distance_block, features, row_batch_size, col_batch_size, nhood_sizes, clamp_to_percentile=None): + """Find an estimate of the manifold of given feature vectors.""" + num_images = features.shape[0] + self.nhood_sizes = nhood_sizes + self.num_nhoods = len(nhood_sizes) + self.row_batch_size = row_batch_size + self.col_batch_size = col_batch_size + self._ref_features = features + self._distance_block = distance_block + + # Estimate manifold of features by calculating distances to kth nearest neighbor of each sample. + self.D = np.zeros([num_images, self.num_nhoods], dtype=np.float16) + distance_batch = np.zeros([row_batch_size, num_images], dtype=np.float16) + seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32) + + for begin1 in range(0, num_images, row_batch_size): + end1 = min(begin1 + row_batch_size, num_images) + row_batch = features[begin1:end1] + + for begin2 in range(0, num_images, col_batch_size): + end2 = min(begin2 + col_batch_size, num_images) + col_batch = features[begin2:end2] + + # Compute distances between batches. + distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(row_batch, col_batch) + + # Find the kth nearest neighbor from the current batch. + self.D[begin1:end1, :] = np.partition(distance_batch[0:end1-begin1, :], seq, axis=1)[:, self.nhood_sizes] + + if clamp_to_percentile is not None: + max_distances = np.percentile(self.D, clamp_to_percentile, axis=0) + self.D[self.D > max_distances] = 0 #max_distances # 0 + + def evaluate(self, eval_features, return_realism=False, return_neighbors=False): + """Evaluate if new feature vectors are in the estimated manifold.""" + num_eval_images = eval_features.shape[0] + num_ref_images = self.D.shape[0] + distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float16) + batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32) + #max_realism_score = np.zeros([num_eval_images,], dtype=np.float32) + realism_score = np.zeros([num_eval_images,], dtype=np.float32) + nearest_indices = np.zeros([num_eval_images,], dtype=np.int32) + + for begin1 in range(0, num_eval_images, self.row_batch_size): + end1 = min(begin1 + self.row_batch_size, num_eval_images) + feature_batch = eval_features[begin1:end1] + + for begin2 in range(0, num_ref_images, self.col_batch_size): + end2 = min(begin2 + self.col_batch_size, num_ref_images) + ref_batch = self._ref_features[begin2:end2] + + distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(feature_batch, ref_batch) + + # From the minibatch of new feature vectors, determine if they are in the estimated manifold. + # If a feature vector is inside a hypersphere of some reference sample, then the new sample lies on the estimated manifold. + # The radii of the hyperspheres are determined from distances of neighborhood size k. + samples_in_manifold = distance_batch[0:end1-begin1, :, None] <= self.D + batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32) + + #max_realism_score[begin1:end1] = np.max(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1) + #nearest_indices[begin1:end1] = np.argmax(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1) + nearest_indices[begin1:end1] = np.argmin(distance_batch[0:end1-begin1, :], axis=1) + realism_score[begin1:end1] = self.D[nearest_indices[begin1:end1], 0] / np.min(distance_batch[0:end1-begin1, :], axis=1) + + if return_realism and return_neighbors: + return batch_predictions, realism_score, nearest_indices + elif return_realism: + return batch_predictions, realism_score + elif return_neighbors: + return batch_predictions, nearest_indices + + return batch_predictions + +#---------------------------------------------------------------------------- + +def knn_precision_recall_features(ref_features, eval_features, feature_net, nhood_sizes, + row_batch_size, col_batch_size, num_gpus): + """Calculates k-NN precision and recall for two sets of feature vectors.""" + state = dnnlib.EasyDict() + #num_images = ref_features.shape[0] + num_features = feature_net.output_shape[1] + state.ref_features = ref_features + state.eval_features = eval_features + + # Initialize DistanceBlock and ManifoldEstimators. + distance_block = DistanceBlock(num_features, num_gpus) + state.ref_manifold = ManifoldEstimator(distance_block, state.ref_features, row_batch_size, col_batch_size, nhood_sizes) + state.eval_manifold = ManifoldEstimator(distance_block, state.eval_features, row_batch_size, col_batch_size, nhood_sizes) + + # Evaluate precision and recall using k-nearest neighbors. + #print(f'Evaluating k-NN precision and recall with {num_images} samples...') + #start = time.time() + + # Precision: How many points from eval_features are in ref_features manifold. + state.precision, state.realism_scores, state.nearest_neighbors = state.ref_manifold.evaluate(state.eval_features, return_realism=True, return_neighbors=True) + state.knn_precision = state.precision.mean(axis=0) + + # Recall: How many points from ref_features are in eval_features manifold. + state.recall = state.eval_manifold.evaluate(state.ref_features) + state.knn_recall = state.recall.mean(axis=0) + + #elapsed_time = time.time() - start + #print(f'Done evaluation in: {elapsed_time:g}s') + + return state + +#---------------------------------------------------------------------------- + +class PR(metric_base.MetricBase): + def __init__(self, max_reals, num_fakes, nhood_size, minibatch_per_gpu, row_batch_size, col_batch_size, **kwargs): + super().__init__(**kwargs) + self.max_reals = max_reals + self.num_fakes = num_fakes + self.nhood_size = nhood_size + self.minibatch_per_gpu = minibatch_per_gpu + self.row_batch_size = row_batch_size + self.col_batch_size = col_batch_size + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16.pkl') as f: + feature_net = pickle.load(f) + + # Calculate features for reals. + cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if os.path.isfile(cache_file): + with open(cache_file, 'rb') as f: + feat_real = pickle.load(f) + else: + feat_real = [] + for images, _labels, num in self._iterate_reals(minibatch_size): + if images.shape[1] == 1: images = np.tile(images, [1, 3, 1, 1]) + feat_real += list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num] + if self.max_reals is not None and len(feat_real) >= self.max_reals: + break + if self.max_reals is not None and len(feat_real) > self.max_reals: + feat_real = feat_real[:self.max_reals] + feat_real = np.stack(feat_real) + with open(cache_file, 'wb') as f: + pickle.dump(feat_real, f) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + Gs_clone = Gs.clone() + feature_net_clone = feature_net.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + images = tflib.convert_images_to_uint8(images) + result_expr.append(feature_net_clone.get_output_for(images)) + + # Calculate features for fakes. + feat_fake = [] + for begin in range(0, self.num_fakes, minibatch_size): + self._report_progress(begin, self.num_fakes) + feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0)) + feat_fake = np.stack(feat_fake[:self.num_fakes]) + + # Calculate precision and recall. + state = knn_precision_recall_features(ref_features=feat_real, eval_features=feat_fake, feature_net=feature_net, + nhood_sizes=[self.nhood_size], row_batch_size=self.row_batch_size, col_batch_size=self.row_batch_size, num_gpus=num_gpus) + self._report_result(state.knn_precision[0], suffix='_precision') + self._report_result(state.knn_recall[0], suffix='_recall') + +#---------------------------------------------------------------------------- diff --git a/projector.py b/projector.py new file mode 100755 index 00000000..8f6be7e7 --- /dev/null +++ b/projector.py @@ -0,0 +1,289 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Project given image to the latent space of pretrained network pickle.""" + +import argparse +import os +import pickle +import imageio + +import numpy as np +import PIL.Image +import tensorflow as tf +import tqdm + +import dnnlib +import dnnlib.tflib as tflib + +class Projector: + def __init__(self): + self.num_steps = 1000 + self.dlatent_avg_samples = 10000 + self.initial_learning_rate = 0.1 + self.initial_noise_factor = 0.05 + self.lr_rampdown_length = 0.25 + self.lr_rampup_length = 0.05 + self.noise_ramp_length = 0.75 + self.regularize_noise_weight = 1e5 + self.verbose = True + + self._Gs = None + self._minibatch_size = None + self._dlatent_avg = None + self._dlatent_std = None + self._noise_vars = None + self._noise_init_op = None + self._noise_normalize_op = None + self._dlatents_var = None + self._dlatent_noise_in = None + self._dlatents_expr = None + self._images_float_expr = None + self._images_uint8_expr = None + self._target_images_var = None + self._lpips = None + self._dist = None + self._loss = None + self._reg_sizes = None + self._lrate_in = None + self._opt = None + self._opt_step = None + self._cur_step = None + + def _info(self, *args): + if self.verbose: + print('Projector:', *args) + + def set_network(self, Gs, dtype='float16'): + if Gs is None: + self._Gs = None + return + self._Gs = Gs.clone(randomize_noise=False, dtype=dtype, num_fp16_res=0, fused_modconv=True) + + # Compute dlatent stats. + self._info(f'Computing W midpoint and stddev using {self.dlatent_avg_samples} samples...') + latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:]) + dlatent_samples = self._Gs.components.mapping.run(latent_samples, None) # [N, L, C] + dlatent_samples = dlatent_samples[:, :1, :].astype(np.float32) # [N, 1, C] + self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, C] + self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5 + self._info(f'std = {self._dlatent_std:g}') + + # Setup noise inputs. + self._info('Setting up noise inputs...') + self._noise_vars = [] + noise_init_ops = [] + noise_normalize_ops = [] + while True: + n = f'G_synthesis/noise{len(self._noise_vars)}' + if not n in self._Gs.vars: + break + v = self._Gs.vars[n] + self._noise_vars.append(v) + noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32))) + noise_mean = tf.reduce_mean(v) + noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5 + noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std)) + self._noise_init_op = tf.group(*noise_init_ops) + self._noise_normalize_op = tf.group(*noise_normalize_ops) + + # Build image output graph. + self._info('Building image output graph...') + self._minibatch_size = 1 + self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var') + self._dlatent_noise_in = tf.placeholder(tf.float32, [], name='noise_in') + dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._dlatent_noise_in + self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1]) + self._images_float_expr = tf.cast(self._Gs.components.synthesis.get_output_for(self._dlatents_expr), tf.float32) + self._images_uint8_expr = tflib.convert_images_to_uint8(self._images_float_expr, nchw_to_nhwc=True) + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + proc_images_expr = (self._images_float_expr + 1) * (255 / 2) + sh = proc_images_expr.shape.as_list() + if sh[2] > 256: + factor = sh[2] // 256 + proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5]) + + # Build loss graph. + self._info('Building loss graph...') + self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var') + if self._lpips is None: + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16_zhang_perceptual.pkl') as f: + self._lpips = pickle.load(f) + self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var) + self._loss = tf.reduce_sum(self._dist) + + # Build noise regularization graph. + self._info('Building noise regularization graph...') + reg_loss = 0.0 + for v in self._noise_vars: + sz = v.shape[2] + while True: + reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2 + if sz <= 8: + break # Small enough already + v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale + v = tf.reduce_mean(v, axis=[3, 5]) + sz = sz // 2 + self._loss += reg_loss * self.regularize_noise_weight + + # Setup optimizer. + self._info('Setting up optimizer...') + self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in') + self._opt = tflib.Optimizer(learning_rate=self._lrate_in) + self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars) + self._opt_step = self._opt.apply_updates() + + def start(self, target_images): + assert self._Gs is not None + + # Prepare target images. + self._info('Preparing target images...') + target_images = np.asarray(target_images, dtype='float32') + target_images = (target_images + 1) * (255 / 2) + sh = target_images.shape + assert sh[0] == self._minibatch_size + if sh[2] > self._target_images_var.shape[2]: + factor = sh[2] // self._target_images_var.shape[2] + target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5)) + + # Initialize optimization state. + self._info('Initializing optimization state...') + dlatents = np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1]) + tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: dlatents}) + tflib.run(self._noise_init_op) + self._opt.reset_optimizer_state() + self._cur_step = 0 + + def step(self): + assert self._cur_step is not None + if self._cur_step >= self.num_steps: + return 0, 0 + + # Choose hyperparameters. + t = self._cur_step / self.num_steps + dlatent_noise = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2 + lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length) + lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) + lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length) + learning_rate = self.initial_learning_rate * lr_ramp + + # Execute optimization step. + feed_dict = {self._dlatent_noise_in: dlatent_noise, self._lrate_in: learning_rate} + _, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict) + tflib.run(self._noise_normalize_op) + self._cur_step += 1 + return dist_value, loss_value + + @property + def cur_step(self): + return self._cur_step + + @property + def dlatents(self): + return tflib.run(self._dlatents_expr, {self._dlatent_noise_in: 0}) + + @property + def noises(self): + return tflib.run(self._noise_vars) + + @property + def images_float(self): + return tflib.run(self._images_float_expr, {self._dlatent_noise_in: 0}) + + @property + def images_uint8(self): + return tflib.run(self._images_uint8_expr, {self._dlatent_noise_in: 0}) + +#---------------------------------------------------------------------------- + +def project(network_pkl: str, target_fname: str, outdir: str, save_video: bool, seed: int): + # Load networks. + tflib.init_tf({'rnd.np_random_seed': seed}) + print('Loading networks from "%s"...' % network_pkl) + with dnnlib.util.open_url(network_pkl) as fp: + _G, _D, Gs = pickle.load(fp) + + # Load target image. + target_pil = PIL.Image.open(target_fname) + w, h = target_pil.size + s = min(w, h) + target_pil = target_pil.crop(((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2)) + target_pil= target_pil.convert('RGB') + target_pil = target_pil.resize((Gs.output_shape[3], Gs.output_shape[2]), PIL.Image.ANTIALIAS) + target_uint8 = np.array(target_pil, dtype=np.uint8) + target_float = target_uint8.astype(np.float32).transpose([2, 0, 1]) * (2 / 255) - 1 + + # Initialize projector. + proj = Projector() + proj.set_network(Gs) + proj.start([target_float]) + + # Setup output directory. + os.makedirs(outdir, exist_ok=True) + target_pil.save(f'{outdir}/target.png') + writer = None + if save_video: + writer = imageio.get_writer(f'{outdir}/proj.mp4', mode='I', fps=60, codec='libx264', bitrate='16M') + + # Run projector. + with tqdm.trange(proj.num_steps) as t: + for step in t: + assert step == proj.cur_step + if writer is not None: + writer.append_data(np.concatenate([target_uint8, proj.images_uint8[0]], axis=1)) + dist, loss = proj.step() + t.set_postfix(dist=f'{dist[0]:.4f}', loss=f'{loss:.2f}') + + # Save results. + PIL.Image.fromarray(proj.images_uint8[0], 'RGB').save(f'{outdir}/proj.png') + np.savez(f'{outdir}/dlatents.npz', dlatents=proj.dlatents) + if writer is not None: + writer.close() + +#---------------------------------------------------------------------------- + +def _str_to_bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + if v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + raise argparse.ArgumentTypeError('Boolean value expected.') + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + python %(prog)s --outdir=out --target=targetimg.png \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Project given image to the latent space of pretrained network pickle.', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + parser.add_argument('--target', help='Target image file to project to', dest='target_fname', required=True) + parser.add_argument('--save-video', help='Save an mp4 video of optimization progress (default: true)', type=_str_to_bool, default=True) + parser.add_argument('--seed', help='Random seed', type=int, default=303) + parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR') + project(**vars(parser.parse_args())) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/style_mixing.py b/style_mixing.py new file mode 100755 index 00000000..7d183f85 --- /dev/null +++ b/style_mixing.py @@ -0,0 +1,120 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Generate style mixing image matrix using pretrained network pickle.""" + +import argparse +import os +import pickle +import re + +import numpy as np +import PIL.Image + +import dnnlib +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- + +def style_mixing_example(network_pkl, row_seeds, col_seeds, truncation_psi, col_styles, outdir, minibatch_size=4): + tflib.init_tf() + print('Loading networks from "%s"...' % network_pkl) + with dnnlib.util.open_url(network_pkl) as fp: + _G, _D, Gs = pickle.load(fp) + + w_avg = Gs.get_var('dlatent_avg') # [component] + Gs_syn_kwargs = { + 'output_transform': dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True), + 'randomize_noise': False, + 'minibatch_size': minibatch_size + } + + print('Generating W vectors...') + all_seeds = list(set(row_seeds + col_seeds)) + all_z = np.stack([np.random.RandomState(seed).randn(*Gs.input_shape[1:]) for seed in all_seeds]) # [minibatch, component] + all_w = Gs.components.mapping.run(all_z, None) # [minibatch, layer, component] + all_w = w_avg + (all_w - w_avg) * truncation_psi # [minibatch, layer, component] + w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} # [layer, component] + + print('Generating images...') + all_images = Gs.components.synthesis.run(all_w, **Gs_syn_kwargs) # [minibatch, height, width, channel] + image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))} + + print('Generating style-mixed images...') + for row_seed in row_seeds: + for col_seed in col_seeds: + w = w_dict[row_seed].copy() + w[col_styles] = w_dict[col_seed][col_styles] + image = Gs.components.synthesis.run(w[np.newaxis], **Gs_syn_kwargs)[0] + image_dict[(row_seed, col_seed)] = image + + print('Saving images...') + os.makedirs(outdir, exist_ok=True) + for (row_seed, col_seed), image in image_dict.items(): + PIL.Image.fromarray(image, 'RGB').save(f'{outdir}/{row_seed}-{col_seed}.png') + + print('Saving image grid...') + _N, _C, H, W = Gs.output_shape + canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black') + for row_idx, row_seed in enumerate([None] + row_seeds): + for col_idx, col_seed in enumerate([None] + col_seeds): + if row_seed is None and col_seed is None: + continue + key = (row_seed, col_seed) + if row_seed is None: + key = (col_seed, col_seed) + if col_seed is None: + key = (row_seed, row_seed) + canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx)) + canvas.save(f'{outdir}/grid.png') + +#---------------------------------------------------------------------------- + +def _parse_num_range(s): + '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' + + range_re = re.compile(r'^(\d+)-(\d+)$') + m = range_re.match(s) + if m: + return list(range(int(m.group(1)), int(m.group(2))+1)) + vals = s.split(',') + return [int(x) for x in vals] + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + python %(prog)s --outdir=out --trunc=1 --rows=85,100,75,458,1500 --cols=55,821,1789,293 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Generate style mixing image matrix using pretrained network pickle.', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + parser.add_argument('--rows', dest='row_seeds', type=_parse_num_range, help='Random seeds to use for image rows', required=True) + parser.add_argument('--cols', dest='col_seeds', type=_parse_num_range, help='Random seeds to use for image columns', required=True) + parser.add_argument('--styles', dest='col_styles', type=_parse_num_range, help='Style layer range (default: %(default)s)', default='0-6') + parser.add_argument('--trunc', dest='truncation_psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) + parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR') + + args = parser.parse_args() + style_mixing_example(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/train.py b/train.py new file mode 100755 index 00000000..5b36d792 --- /dev/null +++ b/train.py @@ -0,0 +1,563 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Train a GAN using the techniques described in the paper +"Training Generative Adversarial Networks with Limited Data".""" + +import os +import argparse +import json +import re +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from training import training_loop +from training import dataset +from metrics import metric_defaults + +#---------------------------------------------------------------------------- + +class UserError(Exception): + pass + +#---------------------------------------------------------------------------- + +def setup_training_options( + # General options (not included in desc). + gpus = None, # Number of GPUs: , default = 1 gpu + snap = None, # Snapshot interval: , default = 50 ticks + + # Training dataset. + data = None, # Training dataset (required): + res = None, # Override dataset resolution: , default = highest available + mirror = None, # Augment dataset with x-flips: , default = False + + # Metrics (not included in desc). + metrics = None, # List of metric names: [], ['fid50k_full'] (default), ... + metricdata = None, # Metric dataset (optional): + + # Base config. + cfg = None, # Base config: 'auto' (default), 'stylegan2', 'paper256', 'paper512', 'paper1024', 'cifar', 'cifarbaseline' + gamma = None, # Override R1 gamma: , default = depends on cfg + kimg = None, # Override training duration: , default = depends on cfg + + # Discriminator augmentation. + aug = None, # Augmentation mode: 'ada' (default), 'noaug', 'fixed', 'adarv' + p = None, # Specify p for 'fixed' (required): + target = None, # Override ADA target for 'ada' and 'adarv': , default = depends on aug + augpipe = None, # Augmentation pipeline: 'blit', 'geom', 'color', 'filter', 'noise', 'cutout', 'bg', 'bgc' (default), ..., 'bgcfnc' + + # Comparison methods. + cmethod = None, # Comparison method: 'nocmethod' (default), 'bcr', 'zcr', 'pagan', 'wgangp', 'auxrot', 'spectralnorm', 'shallowmap', 'adropout' + dcap = None, # Multiplier for discriminator capacity: , default = 1 + + # Transfer learning. + resume = None, # Load previous network: 'noresume' (default), 'ffhq256', 'ffhq512', 'ffhq1024', 'celebahq256', 'lsundog256', , + freezed = None, # Freeze-D: , default = 0 discriminator layers +): + # Initialize dicts. + args = dnnlib.EasyDict() + args.G_args = dnnlib.EasyDict(func_name='training.networks.G_main') + args.D_args = dnnlib.EasyDict(func_name='training.networks.D_main') + args.G_opt_args = dnnlib.EasyDict(beta1=0.0, beta2=0.99) + args.D_opt_args = dnnlib.EasyDict(beta1=0.0, beta2=0.99) + args.loss_args = dnnlib.EasyDict(func_name='training.loss.stylegan2') + args.augment_args = dnnlib.EasyDict(class_name='training.augment.AdaptiveAugment') + + # --------------------------- + # General options: gpus, snap + # --------------------------- + + if gpus is None: + gpus = 1 + assert isinstance(gpus, int) + if not (gpus >= 1 and gpus & (gpus - 1) == 0): + raise UserError('--gpus must be a power of two') + args.num_gpus = gpus + + if snap is None: + snap = 50 + assert isinstance(snap, int) + if snap < 1: + raise UserError('--snap must be at least 1') + args.image_snapshot_ticks = snap + args.network_snapshot_ticks = snap + + # ----------------------------------- + # Training dataset: data, res, mirror + # ----------------------------------- + + assert data is not None + assert isinstance(data, str) + data_name = os.path.basename(os.path.abspath(data)) + if not os.path.isdir(data) or len(data_name) == 0: + raise UserError('--data must point to a directory containing *.tfrecords') + desc = data_name + + with tf.Graph().as_default(), tflib.create_session().as_default(): # pylint: disable=not-context-manager + args.train_dataset_args = dnnlib.EasyDict(path=data, max_label_size='full') + dataset_obj = dataset.load_dataset(**args.train_dataset_args) # try to load the data and see what comes out + args.train_dataset_args.resolution = dataset_obj.shape[-1] # be explicit about resolution + args.train_dataset_args.max_label_size = dataset_obj.label_size # be explicit about label size + validation_set_available = dataset_obj.has_validation_set + dataset_obj.close() + dataset_obj = None + + if res is None: + res = args.train_dataset_args.resolution + else: + assert isinstance(res, int) + if not (res >= 4 and res & (res - 1) == 0): + raise UserError('--res must be a power of two and at least 4') + if res > args.train_dataset_args.resolution: + raise UserError(f'--res cannot exceed maximum available resolution in the dataset ({args.train_dataset_args.resolution})') + desc += f'-res{res:d}' + args.train_dataset_args.resolution = res + + if mirror is None: + mirror = False + else: + assert isinstance(mirror, bool) + if mirror: + desc += '-mirror' + args.train_dataset_args.mirror_augment = mirror + + # ---------------------------- + # Metrics: metrics, metricdata + # ---------------------------- + + if metrics is None: + metrics = ['fid50k_full'] + assert isinstance(metrics, list) + assert all(isinstance(metric, str) for metric in metrics) + + args.metric_arg_list = [] + for metric in metrics: + if metric not in metric_defaults.metric_defaults: + raise UserError('\n'.join(['--metrics can only contain the following values:', 'none'] + list(metric_defaults.metric_defaults.keys()))) + args.metric_arg_list.append(metric_defaults.metric_defaults[metric]) + + args.metric_dataset_args = dnnlib.EasyDict(args.train_dataset_args) + if metricdata is not None: + assert isinstance(metricdata, str) + if not os.path.isdir(metricdata): + raise UserError('--metricdata must point to a directory containing *.tfrecords') + args.metric_dataset_args.path = metricdata + + # ----------------------------- + # Base config: cfg, gamma, kimg + # ----------------------------- + + if cfg is None: + cfg = 'auto' + assert isinstance(cfg, str) + desc += f'-{cfg}' + + cfg_specs = { + 'auto': dict(ref_gpus=-1, kimg=25000, mb=-1, mbstd=-1, fmaps=-1, lrate=-1, gamma=-1, ema=-1, ramp=0.05, map=2), # populated dynamically based on 'gpus' and 'res' + 'stylegan2': dict(ref_gpus=8, kimg=25000, mb=32, mbstd=4, fmaps=1, lrate=0.002, gamma=10, ema=10, ramp=None, map=8), # uses mixed-precision, unlike original StyleGAN2 + 'paper256': dict(ref_gpus=8, kimg=25000, mb=64, mbstd=8, fmaps=0.5, lrate=0.0025, gamma=1, ema=20, ramp=None, map=8), + 'paper512': dict(ref_gpus=8, kimg=25000, mb=64, mbstd=8, fmaps=1, lrate=0.0025, gamma=0.5, ema=20, ramp=None, map=8), + 'paper1024': dict(ref_gpus=8, kimg=25000, mb=32, mbstd=4, fmaps=1, lrate=0.002, gamma=2, ema=10, ramp=None, map=8), + 'cifar': dict(ref_gpus=2, kimg=100000, mb=64, mbstd=32, fmaps=0.5, lrate=0.0025, gamma=0.01, ema=500, ramp=0.05, map=2), + 'cifarbaseline': dict(ref_gpus=2, kimg=100000, mb=64, mbstd=32, fmaps=0.5, lrate=0.0025, gamma=0.01, ema=500, ramp=0.05, map=8), + } + + assert cfg in cfg_specs + spec = dnnlib.EasyDict(cfg_specs[cfg]) + if cfg == 'auto': + desc += f'{gpus:d}' + spec.ref_gpus = gpus + spec.mb = max(min(gpus * min(4096 // res, 32), 64), gpus) # keep gpu memory consumption at bay + spec.mbstd = min(spec.mb // gpus, 4) # other hyperparams behave more predictably if mbstd group size remains fixed + spec.fmaps = 1 if res >= 512 else 0.5 + spec.lrate = 0.002 if res >= 1024 else 0.0025 + spec.gamma = 0.0002 * (res ** 2) / spec.mb # heuristic formula + spec.ema = spec.mb * 10 / 32 + + args.total_kimg = spec.kimg + args.minibatch_size = spec.mb + args.minibatch_gpu = spec.mb // spec.ref_gpus + args.D_args.mbstd_group_size = spec.mbstd + args.G_args.fmap_base = args.D_args.fmap_base = int(spec.fmaps * 16384) + args.G_args.fmap_max = args.D_args.fmap_max = 512 + args.G_opt_args.learning_rate = args.D_opt_args.learning_rate = spec.lrate + args.loss_args.r1_gamma = spec.gamma + args.G_smoothing_kimg = spec.ema + args.G_smoothing_rampup = spec.ramp + args.G_args.mapping_layers = spec.map + args.G_args.num_fp16_res = args.D_args.num_fp16_res = 4 # enable mixed-precision training + args.G_args.conv_clamp = args.D_args.conv_clamp = 256 # clamp activations to avoid float16 overflow + + if cfg == 'cifar': + args.loss_args.pl_weight = 0 # disable path length regularization + args.G_args.style_mixing_prob = None # disable style mixing + args.D_args.architecture = 'orig' # disable residual skip connections + + if gamma is not None: + assert isinstance(gamma, float) + if not gamma >= 0: + raise UserError('--gamma must be non-negative') + desc += f'-gamma{gamma:g}' + args.loss_args.r1_gamma = gamma + + if kimg is not None: + assert isinstance(kimg, int) + if not kimg >= 1: + raise UserError('--kimg must be at least 1') + desc += f'-kimg{kimg:d}' + args.total_kimg = kimg + + # --------------------------------------------------- + # Discriminator augmentation: aug, p, target, augpipe + # --------------------------------------------------- + + if aug is None: + aug = 'ada' + else: + assert isinstance(aug, str) + desc += f'-{aug}' + + if aug == 'ada': + args.augment_args.tune_heuristic = 'rt' + args.augment_args.tune_target = 0.6 + + elif aug == 'noaug': + pass + + elif aug == 'fixed': + if p is None: + raise UserError(f'--aug={aug} requires specifying --p') + + elif aug == 'adarv': + if not validation_set_available: + raise UserError(f'--aug={aug} requires separate validation set; please see "python dataset_tool.py pack -h"') + args.augment_args.tune_heuristic = 'rv' + args.augment_args.tune_target = 0.5 + + else: + raise UserError(f'--aug={aug} not supported') + + if p is not None: + assert isinstance(p, float) + if aug != 'fixed': + raise UserError('--p can only be specified with --aug=fixed') + if not 0 <= p <= 1: + raise UserError('--p must be between 0 and 1') + desc += f'-p{p:g}' + args.augment_args.initial_strength = p + + if target is not None: + assert isinstance(target, float) + if aug not in ['ada', 'adarv']: + raise UserError('--target can only be specified with --aug=ada or --aug=adarv') + if not 0 <= target <= 1: + raise UserError('--target must be between 0 and 1') + desc += f'-target{target:g}' + args.augment_args.tune_target = target + + assert augpipe is None or isinstance(augpipe, str) + if augpipe is None: + augpipe = 'bgc' + else: + if aug == 'noaug': + raise UserError('--augpipe cannot be specified with --aug=noaug') + desc += f'-{augpipe}' + + augpipe_specs = { + 'blit': dict(xflip=1, rotate90=1, xint=1), + 'geom': dict(scale=1, rotate=1, aniso=1, xfrac=1), + 'color': dict(brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1), + 'filter': dict(imgfilter=1), + 'noise': dict(noise=1), + 'cutout': dict(cutout=1), + 'bg': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1), + 'bgc': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1), + 'bgcf': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1), + 'bgcfn': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1, noise=1), + 'bgcfnc': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1, noise=1, cutout=1), + } + + assert augpipe in augpipe_specs + if aug != 'noaug': + args.augment_args.apply_func = 'training.augment.augment_pipeline' + args.augment_args.apply_args = augpipe_specs[augpipe] + + # --------------------------------- + # Comparison methods: cmethod, dcap + # --------------------------------- + + assert cmethod is None or isinstance(cmethod, str) + if cmethod is None: + cmethod = 'nocmethod' + else: + desc += f'-{cmethod}' + + if cmethod == 'nocmethod': + pass + + elif cmethod == 'bcr': + args.loss_args.func_name = 'training.loss.cmethods' + args.loss_args.bcr_real_weight = 10 + args.loss_args.bcr_fake_weight = 10 + args.loss_args.bcr_augment = dnnlib.EasyDict(func_name='training.augment.augment_pipeline', xint=1, xint_max=1/32) + + elif cmethod == 'zcr': + args.loss_args.func_name = 'training.loss.cmethods' + args.loss_args.zcr_gen_weight = 0.02 + args.loss_args.zcr_dis_weight = 0.2 + args.G_args.num_fp16_res = args.D_args.num_fp16_res = 0 # disable mixed-precision training + args.G_args.conv_clamp = args.D_args.conv_clamp = None + + elif cmethod == 'pagan': + if aug != 'noaug': + raise UserError(f'--cmethod={cmethod} is not compatible with discriminator augmentation; please specify --aug=noaug') + args.D_args.use_pagan = True + args.augment_args.tune_heuristic = 'rt' # enable ada heuristic + args.augment_args.pop('apply_func', None) # disable discriminator augmentation + args.augment_args.pop('apply_args', None) + args.augment_args.tune_target = 0.95 + + elif cmethod == 'wgangp': + if aug != 'noaug': + raise UserError(f'--cmethod={cmethod} is not compatible with discriminator augmentation; please specify --aug=noaug') + if gamma is not None: + raise UserError(f'--cmethod={cmethod} is not compatible with --gamma') + args.loss_args = dnnlib.EasyDict(func_name='training.loss.wgangp') + args.G_opt_args.learning_rate = args.D_opt_args.learning_rate = 0.001 + args.G_args.num_fp16_res = args.D_args.num_fp16_res = 0 # disable mixed-precision training + args.G_args.conv_clamp = args.D_args.conv_clamp = None + args.lazy_regularization = False + + elif cmethod == 'auxrot': + if args.train_dataset_args.max_label_size > 0: + raise UserError(f'--cmethod={cmethod} is not compatible with label conditioning; please specify a dataset without labels') + args.loss_args.func_name = 'training.loss.cmethods' + args.loss_args.auxrot_alpha = 10 + args.loss_args.auxrot_beta = 5 + args.D_args.score_max = 5 # prepare D to output 5 scalars per image instead of just 1 + + elif cmethod == 'spectralnorm': + args.D_args.use_spectral_norm = True + + elif cmethod == 'shallowmap': + if args.G_args.mapping_layers == 2: + raise UserError(f'--cmethod={cmethod} is a no-op for --cfg={cfg}') + args.G_args.mapping_layers = 2 + + elif cmethod == 'adropout': + if aug != 'noaug': + raise UserError(f'--cmethod={cmethod} is not compatible with discriminator augmentation; please specify --aug=noaug') + args.D_args.adaptive_dropout = 1 + args.augment_args.tune_heuristic = 'rt' # enable ada heuristic + args.augment_args.pop('apply_func', None) # disable discriminator augmentation + args.augment_args.pop('apply_args', None) + args.augment_args.tune_target = 0.6 + + else: + raise UserError(f'--cmethod={cmethod} not supported') + + if dcap is not None: + assert isinstance(dcap, float) + if not dcap > 0: + raise UserError('--dcap must be positive') + desc += f'-dcap{dcap:g}' + args.D_args.fmap_base = max(int(args.D_args.fmap_base * dcap), 1) + args.D_args.fmap_max = max(int(args.D_args.fmap_max * dcap), 1) + + # ---------------------------------- + # Transfer learning: resume, freezed + # ---------------------------------- + + resume_specs = { + 'ffhq256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/ffhq-res256-mirror-paper256-noaug.pkl', + 'ffhq512': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl', + 'ffhq1024': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/ffhq-res1024-mirror-stylegan2-noaug.pkl', + 'celebahq256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/celebahq-res256-mirror-paper256-kimg100000-ada-target0.5.pkl', + 'lsundog256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/lsundog-res256-paper256-kimg100000-noaug.pkl', + } + + assert resume is None or isinstance(resume, str) + if resume is None: + resume = 'noresume' + elif resume == 'noresume': + desc += '-noresume' + elif resume in resume_specs: + desc += f'-resume{resume}' + args.resume_pkl = resume_specs[resume] # predefined url + else: + desc += '-resumecustom' + args.resume_pkl = resume # custom path or url + + if resume != 'noresume': + args.augment_args.tune_kimg = 100 # make ADA react faster at the beginning + args.G_smoothing_rampup = None # disable EMA rampup + + if freezed is not None: + assert isinstance(freezed, int) + if not freezed >= 0: + raise UserError('--freezed must be non-negative') + desc += f'-freezed{freezed:d}' + args.D_args.freeze_layers = freezed + + return desc, args + +#---------------------------------------------------------------------------- + +def run_training(outdir, seed, dry_run, **hyperparam_options): + # Setup training options. + tflib.init_tf({'rnd.np_random_seed': seed}) + run_desc, training_options = setup_training_options(**hyperparam_options) + + # Pick output directory. + prev_run_dirs = [] + if os.path.isdir(outdir): + prev_run_dirs = [x for x in os.listdir(outdir) if os.path.isdir(os.path.join(outdir, x))] + prev_run_ids = [re.match(r'^\d+', x) for x in prev_run_dirs] + prev_run_ids = [int(x.group()) for x in prev_run_ids if x is not None] + cur_run_id = max(prev_run_ids, default=-1) + 1 + training_options.run_dir = os.path.join(outdir, f'{cur_run_id:05d}-{run_desc}') + assert not os.path.exists(training_options.run_dir) + + # Print options. + print() + print('Training options:') + print(json.dumps(training_options, indent=2)) + print() + print(f'Output directory: {training_options.run_dir}') + print(f'Training data: {training_options.train_dataset_args.path}') + print(f'Training length: {training_options.total_kimg} kimg') + print(f'Resolution: {training_options.train_dataset_args.resolution}') + print(f'Number of GPUs: {training_options.num_gpus}') + print() + + # Dry run? + if dry_run: + print('Dry run; exiting.') + return + + # Kick off training. + print('Creating output directory...') + os.makedirs(training_options.run_dir) + with open(os.path.join(training_options.run_dir, 'training_options.json'), 'wt') as f: + json.dump(training_options, f, indent=2) + with dnnlib.util.Logger(os.path.join(training_options.run_dir, 'log.txt')): + training_loop.training_loop(**training_options) + +#---------------------------------------------------------------------------- + +def _str_to_bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + if v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + raise argparse.ArgumentTypeError('Boolean value expected.') + +def _parse_comma_sep(s): + if s is None or s.lower() == 'none' or s == '': + return [] + return s.split(',') + +#---------------------------------------------------------------------------- + +_cmdline_help_epilog = '''examples: + + # Train custom dataset using 1 GPU. + python %(prog)s --outdir=~/training-runs --gpus=1 --data=~/datasets/custom + + # Train class-conditional CIFAR-10 using 2 GPUs. + python %(prog)s --outdir=~/training-runs --gpus=2 --data=~/datasets/cifar10c \\ + --cfg=cifar + + # Transfer learn MetFaces from FFHQ using 4 GPUs. + python %(prog)s --outdir=~/training-runs --gpus=4 --data=~/datasets/metfaces \\ + --cfg=paper1024 --mirror=1 --resume=ffhq1024 --snap=10 + + # Reproduce original StyleGAN2 config F. + python %(prog)s --outdir=~/training-runs --gpus=8 --data=~/datasets/ffhq \\ + --cfg=stylegan2 --res=1024 --mirror=1 --aug=noaug + +available base configs (--cfg): + auto Automatically select reasonable defaults based on resolution + and GPU count. Good starting point for new datasets. + stylegan2 Reproduce results for StyleGAN2 config F at 1024x1024. + paper256 Reproduce results for FFHQ and LSUN Cat at 256x256. + paper512 Reproduce results for BreCaHAD and AFHQ at 512x512. + paper1024 Reproduce results for MetFaces at 1024x1024. + cifar Reproduce results for CIFAR-10 (tuned configuration). + cifarbaseline Reproduce results for CIFAR-10 (baseline configuration). + +transfer learning source networks (--resume): + ffhq256 FFHQ trained at 256x256 resolution. + ffhq512 FFHQ trained at 512x512 resolution. + ffhq1024 FFHQ trained at 1024x1024 resolution. + celebahq256 CelebA-HQ trained at 256x256 resolution. + lsundog256 LSUN Dog trained at 256x256 resolution. + Custom network pickle. +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Train a GAN using the techniques described in the paper\n"Training Generative Adversarial Networks with Limited Data".', + epilog=_cmdline_help_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + group = parser.add_argument_group('general options') + group.add_argument('--outdir', help='Where to save the results (required)', required=True, metavar='DIR') + group.add_argument('--gpus', help='Number of GPUs to use (default: 1 gpu)', type=int, metavar='INT') + group.add_argument('--snap', help='Snapshot interval (default: 50 ticks)', type=int, metavar='INT') + group.add_argument('--seed', help='Random seed (default: %(default)s)', type=int, default=1000, metavar='INT') + group.add_argument('-n', '--dry-run', help='Print training options and exit', action='store_true', default=False) + + group = parser.add_argument_group('training dataset') + group.add_argument('--data', help='Training dataset path (required)', metavar='PATH', required=True) + group.add_argument('--res', help='Dataset resolution (default: highest available)', type=int, metavar='INT') + group.add_argument('--mirror', help='Augment dataset with x-flips (default: false)', type=_str_to_bool, metavar='BOOL') + + group = parser.add_argument_group('metrics') + group.add_argument('--metrics', help='Comma-separated list or "none" (default: fid50k_full)', type=_parse_comma_sep, metavar='LIST') + group.add_argument('--metricdata', help='Dataset to evaluate metrics against (optional)', metavar='PATH') + + group = parser.add_argument_group('base config') + group.add_argument('--cfg', help='Base config (default: auto)', choices=['auto', 'stylegan2', 'paper256', 'paper512', 'paper1024', 'cifar', 'cifarbaseline']) + group.add_argument('--gamma', help='Override R1 gamma', type=float, metavar='FLOAT') + group.add_argument('--kimg', help='Override training duration', type=int, metavar='INT') + + group = parser.add_argument_group('discriminator augmentation') + group.add_argument('--aug', help='Augmentation mode (default: ada)', choices=['noaug', 'ada', 'fixed', 'adarv']) + group.add_argument('--p', help='Specify augmentation probability for --aug=fixed', type=float, metavar='FLOAT') + group.add_argument('--target', help='Override ADA target for --aug=ada and --aug=adarv', type=float) + group.add_argument('--augpipe', help='Augmentation pipeline (default: bgc)', choices=['blit', 'geom', 'color', 'filter', 'noise', 'cutout', 'bg', 'bgc', 'bgcf', 'bgcfn', 'bgcfnc']) + + group = parser.add_argument_group('comparison methods') + group.add_argument('--cmethod', help='Comparison method (default: nocmethod)', choices=['nocmethod', 'bcr', 'zcr', 'pagan', 'wgangp', 'auxrot', 'spectralnorm', 'shallowmap', 'adropout']) + group.add_argument('--dcap', help='Multiplier for discriminator capacity', type=float, metavar='FLOAT') + + group = parser.add_argument_group('transfer learning') + group.add_argument('--resume', help='Resume from network pickle (default: noresume)') + group.add_argument('--freezed', help='Freeze-D (default: 0 discriminator layers)', type=int, metavar='INT') + + args = parser.parse_args() + try: + run_training(**vars(args)) + except UserError as err: + print(f'Error: {err}') + exit(1) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/training/__init__.py b/training/__init__.py new file mode 100755 index 00000000..2c61c745 --- /dev/null +++ b/training/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/training/augment.py b/training/augment.py new file mode 100755 index 00000000..17296fc3 --- /dev/null +++ b/training/augment.py @@ -0,0 +1,587 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Adaptive discriminator augmentation (ADA) from the paper +"Training Generative Adversarial Networks with Limited Data".""" + +import numpy as np +import tensorflow as tf +import scipy.signal +import dnnlib +import dnnlib.tflib as tflib + +from training import loss + +#---------------------------------------------------------------------------- +# Main class for adaptive discriminator augmentation (ADA). +# - Performs adaptive tuning of augmentation strength during training. +# - Acts as a wrapper for the augmentation pipeline. +# - Keeps track of the necessary training statistics. +# - Calculates statistics for the validation set, if available. + +class AdaptiveAugment: + def __init__(self, + apply_func = None, # Function representing the augmentation pipeline. Can be a fully-qualified name, a function object, or None. + apply_args = {}, # Keyword arguments for the augmentation pipeline. + initial_strength = 0, # Augmentation strength (p) to use initially. + tune_heuristic = None, # Heuristic for tuning the augmentation strength dynamically: 'rt', 'rv', None. + tune_target = None, # Target value for the selected heuristic. + tune_kimg = 500, # Adjustment speed, measured in how many kimg it takes for the strength to increase/decrease by one unit. + stat_decay_kimg = 0, # Exponential moving average to use for training statistics, measured as the half-life in kimg. 0 = disable EMA. + ): + tune_stats = { + 'rt': {'Loss/signs/real'}, + 'rv': {'Loss/scores/fake', 'Loss/scores/real', 'Loss/scores/valid'}, + None: {}, + } + assert tune_heuristic in tune_stats + assert apply_func is None or isinstance(apply_func, str) or dnnlib.util.is_top_level_function(apply_func) + + # Configuration. + self.apply_func = dnnlib.util.get_obj_by_name(apply_func) if isinstance(apply_func, str) else apply_func + self.apply_args = apply_args + self.strength = initial_strength + self.tune_heuristic = tune_heuristic + self.tune_target = tune_target + self.tune_kimg = tune_kimg + self.stat_decay_kimg = stat_decay_kimg + + # Runtime state. + self._tune_stats = tune_stats[tune_heuristic] + self._strength_var = None + self._acc_vars = dict() # {name: [var, ...], ...} + self._acc_decay_in = None + self._acc_decay_ops = dict() # {name: op, ...} + self._valid_images = None + self._valid_labels = None + self._valid_images_in = None + self._valid_labels_in = None + self._valid_op = None + self._valid_ofs = 0 + + def init_validation_set(self, D_gpus, training_set): + assert self._valid_images is None + images, labels = training_set.load_validation_set_np() + if images.shape[0] == 0: + return + self._valid_images = images + self._valid_labels = labels + + # Build validation graph. + with tflib.absolute_name_scope('Validation'), tf.control_dependencies(None): + with tf.device('/cpu:0'): + self._valid_images_in = tf.placeholder(training_set.dtype, name='valid_images_in', shape=[None]+training_set.shape) + self._valid_labels_in = tf.placeholder(training_set.label_dtype, name='valid_labels_in', shape=[None,training_set.label_size]) + images_in_gpus = tf.split(self._valid_images_in, len(D_gpus)) + labels_in_gpus = tf.split(self._valid_labels_in, len(D_gpus)) + ops = [] + for gpu, (D_gpu, images_in_gpu, labels_in_gpu) in enumerate(zip(D_gpus, images_in_gpus, labels_in_gpus)): + with tf.device(f'/gpu:{gpu}'): + images_expr = tf.cast(images_in_gpu, tf.float32) * (2 / 255) - 1 + D_valid = loss.eval_D(D_gpu, self, images_expr, labels_in_gpu, report='valid') + ops += [D_valid.scores] + self._valid_op = tf.group(*ops) + + def apply(self, images, labels, enable=True): + if not enable or self.apply_func is None or (self.strength == 0 and self.tune_heuristic is None): + return images, labels + with tf.name_scope('Augment'): + images, labels = self.apply_func(images, labels, strength=self.get_strength_var(), **self.apply_args) + return images, labels + + def get_strength_var(self): + if self._strength_var is None: + with tflib.absolute_name_scope('Augment'), tf.control_dependencies(None): + self._strength_var = tf.Variable(np.float32(self.strength), name='strength', trainable=False) + return self._strength_var + + def report_stat(self, name, expr): + if name in self._tune_stats: + expr = self._increment_acc(name, expr) + return expr + + def tune(self, nimg_delta): + acc = {name: self._read_and_decay_acc(name, nimg_delta) for name in self._tune_stats} + nimg_ratio = nimg_delta / (self.tune_kimg * 1000) + strength = self.strength + + if self.tune_heuristic == 'rt': + assert self.tune_target is not None + rt = acc['Loss/signs/real'] + strength += nimg_ratio * np.sign(rt - self.tune_target) + + if self.tune_heuristic == 'rv': + assert self.tune_target is not None + assert self._valid_images is not None + rv = (acc['Loss/scores/real'] - acc['Loss/scores/valid']) / max(acc['Loss/scores/real'] - acc['Loss/scores/fake'], 1e-8) + strength += nimg_ratio * np.sign(rv - self.tune_target) + + self._set_strength(strength) + + def run_validation(self, minibatch_size): + if self._valid_images is not None: + indices = [(self._valid_ofs + i) % self._valid_images.shape[0] for i in range(minibatch_size)] + tflib.run(self._valid_op, {self._valid_images_in: self._valid_images[indices], self._valid_labels_in: self._valid_labels[indices]}) + self._valid_ofs += len(indices) + + def _set_strength(self, strength): + strength = max(strength, 0) + if self._strength_var is not None and strength != self.strength: + tflib.set_vars({self._strength_var: strength}) + self.strength = strength + + def _increment_acc(self, name, expr): + with tf.name_scope('acc_' + name): + with tf.control_dependencies(None): + acc_var = tf.Variable(tf.zeros(2), name=name, trainable=False) # [acc_num, acc_sum] + if name not in self._acc_vars: + self._acc_vars[name] = [] + self._acc_vars[name].append(acc_var) + expr_num = tf.shape(tf.reshape(expr, [-1]))[0] + expr_sum = tf.reduce_sum(expr) + acc_op = tf.assign_add(acc_var, [expr_num, expr_sum]) + with tf.control_dependencies([acc_op]): + return tf.identity(expr) + + def _read_and_decay_acc(self, name, nimg_delta): + acc_vars = self._acc_vars[name] + acc_num, acc_sum = tuple(np.sum(tflib.run(acc_vars), axis=0)) + if nimg_delta > 0: + with tflib.absolute_name_scope('Augment'), tf.control_dependencies(None): + if self._acc_decay_in is None: + self._acc_decay_in = tf.placeholder(tf.float32, name='acc_decay_in', shape=[]) + if name not in self._acc_decay_ops: + with tf.name_scope('acc_' + name): + ops = [tf.assign(var, var * self._acc_decay_in) for var in acc_vars] + self._acc_decay_ops[name] = tf.group(*ops) + acc_decay = 0.5 ** (nimg_delta / (self.stat_decay_kimg * 1000)) if self.stat_decay_kimg > 0 else 0 + tflib.run(self._acc_decay_ops[name], {self._acc_decay_in: acc_decay}) + return acc_sum / acc_num if acc_num > 0 else 0 + +#---------------------------------------------------------------------------- +# Helper for randomly gating augmentation parameters based on the given probability. + +def gate_augment_params(probability, params, disabled_val): + shape = tf.shape(params) + cond = (tf.random_uniform(shape[:1], 0, 1) < probability) + disabled_val = tf.broadcast_to(tf.convert_to_tensor(disabled_val, dtype=params.dtype), shape) + return tf.where(cond, params, disabled_val) + +#---------------------------------------------------------------------------- +# Helpers for constructing batched transformation matrices. + +def construct_batch_of_matrices(*rows): + rows = [[tf.convert_to_tensor(x, dtype=tf.float32) for x in r] for r in rows] + batch_elems = [x for r in rows for x in r if x.shape.rank != 0] + assert all(x.shape.rank == 1 for x in batch_elems) + batch_size = tf.shape(batch_elems[0])[0] if len(batch_elems) else 1 + rows = [[tf.broadcast_to(x, [batch_size]) for x in r] for r in rows] + return tf.transpose(rows, [2, 0, 1]) + +def translate_2d(tx, ty): + return construct_batch_of_matrices( + [1, 0, tx], + [0, 1, ty], + [0, 0, 1]) + +def translate_3d(tx, ty, tz): + return construct_batch_of_matrices( + [1, 0, 0, tx], + [0, 1, 0, ty], + [0, 0, 1, tz], + [0, 0, 0, 1]) + +def scale_2d(sx, sy): + return construct_batch_of_matrices( + [sx, 0, 0], + [0, sy, 0], + [0, 0, 1]) + +def scale_3d(sx, sy, sz): + return construct_batch_of_matrices( + [sx, 0, 0, 0], + [0, sy, 0, 0], + [0, 0, sz, 0], + [0, 0, 0, 1]) + +def rotate_2d(theta): + return construct_batch_of_matrices( + [tf.cos(theta), tf.sin(-theta), 0], + [tf.sin(theta), tf.cos(theta), 0], + [0, 0, 1]) + +def rotate_3d(v, theta): + vx = v[..., 0]; vy = v[..., 1]; vz = v[..., 2] + s = tf.sin(theta); c = tf.cos(theta); cc = 1 - c + return construct_batch_of_matrices( + [vx*vx*cc+c, vx*vy*cc-vz*s, vx*vz*cc+vy*s, 0], + [vy*vx*cc+vz*s, vy*vy*cc+c, vy*vz*cc-vx*s, 0], + [vz*vx*cc-vy*s, vz*vy*cc+vx*s, vz*vz*cc+c, 0], + [0, 0, 0, 1]) + +def translate_2d_inv(tx, ty): + return translate_2d(-tx, -ty) + +def scale_2d_inv(sx, sy): + return scale_2d(1/sx, 1/sy) + +def rotate_2d_inv(theta): + return rotate_2d(-theta) + +#---------------------------------------------------------------------------- +# Coefficients of various wavelet decomposition low-pass filters. + +wavelets = { + 'haar': [0.7071067811865476, 0.7071067811865476], + 'db1': [0.7071067811865476, 0.7071067811865476], + 'db2': [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025], + 'db3': [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569], + 'db4': [-0.010597401784997278, 0.032883011666982945, 0.030841381835986965, -0.18703481171888114, -0.02798376941698385, 0.6308807679295904, 0.7148465705525415, 0.23037781330885523], + 'db5': [0.003335725285001549, -0.012580751999015526, -0.006241490213011705, 0.07757149384006515, -0.03224486958502952, -0.24229488706619015, 0.13842814590110342, 0.7243085284385744, 0.6038292697974729, 0.160102397974125], + 'db6': [-0.00107730108499558, 0.004777257511010651, 0.0005538422009938016, -0.031582039318031156, 0.02752286553001629, 0.09750160558707936, -0.12976686756709563, -0.22626469396516913, 0.3152503517092432, 0.7511339080215775, 0.4946238903983854, 0.11154074335008017], + 'db7': [0.0003537138000010399, -0.0018016407039998328, 0.00042957797300470274, 0.012550998556013784, -0.01657454163101562, -0.03802993693503463, 0.0806126091510659, 0.07130921926705004, -0.22403618499416572, -0.14390600392910627, 0.4697822874053586, 0.7291320908465551, 0.39653931948230575, 0.07785205408506236], + 'db8': [-0.00011747678400228192, 0.0006754494059985568, -0.0003917403729959771, -0.00487035299301066, 0.008746094047015655, 0.013981027917015516, -0.04408825393106472, -0.01736930100202211, 0.128747426620186, 0.00047248457399797254, -0.2840155429624281, -0.015829105256023893, 0.5853546836548691, 0.6756307362980128, 0.3128715909144659, 0.05441584224308161], + 'sym2': [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025], + 'sym3': [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569], + 'sym4': [-0.07576571478927333, -0.02963552764599851, 0.49761866763201545, 0.8037387518059161, 0.29785779560527736, -0.09921954357684722, -0.012603967262037833, 0.0322231006040427], + 'sym5': [0.027333068345077982, 0.029519490925774643, -0.039134249302383094, 0.1993975339773936, 0.7234076904024206, 0.6339789634582119, 0.01660210576452232, -0.17532808990845047, -0.021101834024758855, 0.019538882735286728], + 'sym6': [0.015404109327027373, 0.0034907120842174702, -0.11799011114819057, -0.048311742585633, 0.4910559419267466, 0.787641141030194, 0.3379294217276218, -0.07263752278646252, -0.021060292512300564, 0.04472490177066578, 0.0017677118642428036, -0.007800708325034148], + 'sym7': [0.002681814568257878, -0.0010473848886829163, -0.01263630340325193, 0.03051551316596357, 0.0678926935013727, -0.049552834937127255, 0.017441255086855827, 0.5361019170917628, 0.767764317003164, 0.2886296317515146, -0.14004724044296152, -0.10780823770381774, 0.004010244871533663, 0.010268176708511255], + 'sym8': [-0.0033824159510061256, -0.0005421323317911481, 0.03169508781149298, 0.007607487324917605, -0.1432942383508097, -0.061273359067658524, 0.4813596512583722, 0.7771857517005235, 0.3644418948353314, -0.05194583810770904, -0.027219029917056003, 0.049137179673607506, 0.003808752013890615, -0.01495225833704823, -0.0003029205147213668, 0.0018899503327594609], +} + +#---------------------------------------------------------------------------- +# Versatile image augmentation pipeline from the paper +# "Training Generative Adversarial Networks with Limited Data". +# +# All augmentations are disabled by default; individual augmentations can +# be enabled by setting their probability multipliers to 1. + +def augment_pipeline( + images, # Input images: NCHW, float32, dynamic range [-1,+1]. + labels, # Input labels. + strength = 1, # Overall multiplier for augmentation probability; can be a Tensor. + debug_percentile = None, # Percentile value for visualizing parameter ranges; None = normal operation. + + # Pixel blitting. + xflip = 0, # Probability multiplier for x-flip. + rotate90 = 0, # Probability multiplier for 90 degree rotations. + xint = 0, # Probability multiplier for integer translation. + xint_max = 0.125, # Range of integer translation, relative to image dimensions. + + # General geometric transformations. + scale = 0, # Probability multiplier for isotropic scaling. + rotate = 0, # Probability multiplier for arbitrary rotation. + aniso = 0, # Probability multiplier for anisotropic scaling. + xfrac = 0, # Probability multiplier for fractional translation. + scale_std = 0.2, # Log2 standard deviation of isotropic scaling. + rotate_max = 1, # Range of arbitrary rotation, 1 = full circle. + aniso_std = 0.2, # Log2 standard deviation of anisotropic scaling. + xfrac_std = 0.125, # Standard deviation of frational translation, relative to image dimensions. + + # Color transformations. + brightness = 0, # Probability multiplier for brightness. + contrast = 0, # Probability multiplier for contrast. + lumaflip = 0, # Probability multiplier for luma flip. + hue = 0, # Probability multiplier for hue rotation. + saturation = 0, # Probability multiplier for saturation. + brightness_std = 0.2, # Standard deviation of brightness. + contrast_std = 0.5, # Log2 standard deviation of contrast. + hue_max = 1, # Range of hue rotation, 1 = full circle. + saturation_std = 1, # Log2 standard deviation of saturation. + + # Image-space filtering. + imgfilter = 0, # Probability multiplier for image-space filtering. + imgfilter_bands = [1,1,1,1], # Probability multipliers for individual frequency bands. + imgfilter_std = 1, # Log2 standard deviation of image-space filter amplification. + + # Image-space corruptions. + noise = 0, # Probability multiplier for additive RGB noise. + cutout = 0, # Probability multiplier for cutout. + noise_std = 0.1, # Standard deviation of additive RGB noise. + cutout_size = 0.5, # Size of the cutout rectangle, relative to image dimensions. +): + # Determine input shape. + batch, channels, height, width = images.shape.as_list() + if batch is None: + batch = tf.shape(images)[0] + + # ------------------------------------- + # Select parameters for pixel blitting. + # ------------------------------------- + + # Initialize inverse homogeneous 2D transform: G_inv @ pixel_out ==> pixel_in + I_3 = tf.eye(3, batch_shape=[batch]) + G_inv = I_3 + + # Apply x-flip with probability (xflip * strength). + if xflip > 0: + i = tf.floor(tf.random_uniform([batch], 0, 2)) + i = gate_augment_params(xflip * strength, i, 0) + if debug_percentile is not None: + i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 2) + G_inv @= scale_2d_inv(1 - 2 * i, 1) + + # Apply 90 degree rotations with probability (rotate90 * strength). + if rotate90 > 0: + i = tf.floor(tf.random_uniform([batch], 0, 4)) + i = gate_augment_params(rotate90 * strength, i, 0) + if debug_percentile is not None: + i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 4) + G_inv @= rotate_2d_inv(-np.pi / 2 * i) + + # Apply integer translation with probability (xint * strength). + if xint > 0: + t = tf.random_uniform([batch, 2], -xint_max, xint_max) + t = gate_augment_params(xint * strength, t, 0) + if debug_percentile is not None: + t = (tf.broadcast_to(debug_percentile, [batch, 2]) * 2 - 1) * xint_max + G_inv @= translate_2d_inv(tf.rint(t[:,0] * width), tf.rint(t[:,1] * height)) + + # -------------------------------------------------------- + # Select parameters for general geometric transformations. + # -------------------------------------------------------- + + # Apply isotropic scaling with probability (scale * strength). + if scale > 0: + s = 2 ** tf.random_normal([batch], 0, scale_std) + s = gate_augment_params(scale * strength, s, 1) + if debug_percentile is not None: + s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * scale_std) + G_inv @= scale_2d_inv(s, s) + + # Apply pre-rotation with probability p_rot. + p_rot = 1 - tf.sqrt(tf.cast(tf.maximum(1 - rotate * strength, 0), tf.float32)) # P(pre OR post) = p + if rotate > 0: + theta = tf.random_uniform([batch], -np.pi * rotate_max, np.pi * rotate_max) + theta = gate_augment_params(p_rot, theta, 0) + if debug_percentile is not None: + theta = (tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * np.pi * rotate_max + G_inv @= rotate_2d_inv(-theta) # Before anisotropic scaling. + + # Apply anisotropic scaling with probability (aniso * strength). + if aniso > 0: + s = 2 ** tf.random_normal([batch], 0, aniso_std) + s = gate_augment_params(aniso * strength, s, 1) + if debug_percentile is not None: + s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * aniso_std) + G_inv @= scale_2d_inv(s, 1 / s) + + # Apply post-rotation with probability p_rot. + if rotate > 0: + theta = tf.random_uniform([batch], -np.pi * rotate_max, np.pi * rotate_max) + theta = gate_augment_params(p_rot, theta, 0) + if debug_percentile is not None: + theta = tf.zeros([batch]) + G_inv @= rotate_2d_inv(-theta) # After anisotropic scaling. + + # Apply fractional translation with probability (xfrac * strength). + if xfrac > 0: + t = tf.random_normal([batch, 2], 0, xfrac_std) + t = gate_augment_params(xfrac * strength, t, 0) + if debug_percentile is not None: + t = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch, 2]) * 2 - 1) * xfrac_std + G_inv @= translate_2d_inv(t[:,0] * width, t[:,1] * height) + + # ---------------------------------- + # Execute geometric transformations. + # ---------------------------------- + + # Execute if the transform is not identity. + if G_inv is not I_3: + + # Setup orthogonal lowpass filter. + Hz = wavelets['sym6'] + Hz = np.asarray(Hz, dtype=np.float32) + Hz = np.reshape(Hz, [-1, 1, 1]).repeat(channels, axis=1) # [tap, channel, 1] + Hz_pad = Hz.shape[0] // 4 + + # Calculate padding. + cx = (width - 1) / 2 + cy = (height - 1) / 2 + cp = np.transpose([[-cx, -cy, 1], [cx, -cy, 1], [cx, cy, 1], [-cx, cy, 1]]) # [xyz, idx] + cp = G_inv @ cp[np.newaxis] # [batch, xyz, idx] + cp = cp[:, :2, :] # [batch, xy, idx] + m_lo = tf.ceil(tf.reduce_max(-cp, axis=[0,2]) - [cx, cy] + Hz_pad * 2) + m_hi = tf.ceil(tf.reduce_max( cp, axis=[0,2]) - [cx, cy] + Hz_pad * 2) + m_lo = tf.clip_by_value(m_lo, [0, 0], [width-1, height-1]) + m_hi = tf.clip_by_value(m_hi, [0, 0], [width-1, height-1]) + + # Pad image and adjust origin. + images = tf.transpose(images, [0, 2, 3, 1]) # NCHW => NHWC + pad = [[0, 0], [m_lo[1], m_hi[1]], [m_lo[0], m_hi[0]], [0, 0]] + images = tf.pad(tensor=images, paddings=pad, mode='REFLECT') + T_in = translate_2d(cx + m_lo[0], cy + m_lo[1]) + T_out = translate_2d_inv(cx + Hz_pad, cy + Hz_pad) + G_inv = T_in @ G_inv @ T_out + + # Upsample. + shape = [batch, tf.shape(images)[1] * 2, tf.shape(images)[2] * 2, channels] + images = tf.nn.depthwise_conv2d_backprop_input(input_sizes=shape, filter=Hz[np.newaxis, :], out_backprop=images, strides=[1,2,2,1], padding='SAME', data_format='NHWC') + images = tf.nn.depthwise_conv2d_backprop_input(input_sizes=shape, filter=Hz[:, np.newaxis], out_backprop=images, strides=[1,1,1,1], padding='SAME', data_format='NHWC') + G_inv = scale_2d(2, 2) @ G_inv @ scale_2d_inv(2, 2) # Account for the increased resolution. + + # Execute transformation. + transforms = tf.reshape(G_inv, [-1, 9])[:, :8] + shape = [(height + Hz_pad * 2) * 2, (width + Hz_pad * 2) * 2] + images = tf.contrib.image.transform(images=images, transforms=transforms, output_shape=shape, interpolation='BILINEAR') + + # Downsample and crop. + images = tf.nn.depthwise_conv2d(input=images, filter=Hz[np.newaxis,:], strides=[1,1,1,1], padding='SAME', data_format='NHWC') + images = tf.nn.depthwise_conv2d(input=images, filter=Hz[:,np.newaxis], strides=[1,2,2,1], padding='SAME', data_format='NHWC') + images = images[:, Hz_pad : height + Hz_pad, Hz_pad : width + Hz_pad, :] + images = tf.transpose(images, [0, 3, 1, 2]) # NHWC => NCHW + + # -------------------------------------------- + # Select parameters for color transformations. + # -------------------------------------------- + + # Initialize homogeneous 3D transformation matrix: C @ color_in ==> color_out + I_4 = tf.eye(4, batch_shape=[batch]) + C = I_4 + + # Apply brightness with probability (brightness * strength). + if brightness > 0: + b = tf.random_normal([batch], 0, brightness_std) + b = gate_augment_params(brightness * strength, b, 0) + if debug_percentile is not None: + b = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * brightness_std + C = translate_3d(b, b, b) @ C + + # Apply contrast with probability (contrast * strength). + if contrast > 0: + c = 2 ** tf.random_normal([batch], 0, contrast_std) + c = gate_augment_params(contrast * strength, c, 1) + if debug_percentile is not None: + c = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * contrast_std) + C = scale_3d(c, c, c) @ C + + # Apply luma flip with probability (lumaflip * strength). + v = np.array([1, 1, 1, 0]) / np.sqrt(3) # Luma axis. + if lumaflip > 0: + i = tf.floor(tf.random_uniform([batch], 0, 2)) + i = gate_augment_params(lumaflip * strength, i, 0) + if debug_percentile is not None: + i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 2) + i = tf.reshape(i, [batch, 1, 1]) + C = (I_4 - 2 * np.outer(v, v) * i) @ C # Householder reflection. + + # Apply hue rotation with probability (hue * strength). + if hue > 0 and channels > 1: + theta = tf.random_uniform([batch], -np.pi * hue_max, np.pi * hue_max) + theta = gate_augment_params(hue * strength, theta, 0) + if debug_percentile is not None: + theta = (tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * np.pi * hue_max + C = rotate_3d(v, theta) @ C # Rotate around v. + + # Apply saturation with probability (saturation * strength). + if saturation > 0 and channels > 1: + s = 2 ** tf.random_normal([batch], 0, saturation_std) + s = gate_augment_params(saturation * strength, s, 1) + if debug_percentile is not None: + s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * saturation_std) + s = tf.reshape(s, [batch, 1, 1]) + C = (np.outer(v, v) + (I_4 - np.outer(v, v)) * s) @ C + + # ------------------------------ + # Execute color transformations. + # ------------------------------ + + # Execute if the transform is not identity. + if C is not I_4: + images = tf.reshape(images, [batch, channels, height * width]) + if channels == 3: + images = C[:, :3, :3] @ images + C[:, :3, 3:] + elif channels == 1: + C = tf.reduce_mean(C[:, :3, :], axis=1, keepdims=True) + images = images * tf.reduce_sum(C[:, :, :3], axis=2, keepdims=True) + C[:, :, 3:] + else: + raise ValueError('Image must be RGB (3 channels) or L (1 channel)') + images = tf.reshape(images, [batch, channels, height, width]) + + # ---------------------- + # Image-space filtering. + # ---------------------- + + if imgfilter > 0: + num_bands = 4 + assert len(imgfilter_bands) == num_bands + expected_power = np.array([10, 1, 1, 1]) / 13 # Expected power spectrum (1/f). + + # Apply amplification for each band with probability (imgfilter * strength * band_strength). + g = tf.ones([batch, num_bands]) # Global gain vector (identity). + for i, band_strength in enumerate(imgfilter_bands): + t_i = 2 ** tf.random_normal([batch], 0, imgfilter_std) + t_i = gate_augment_params(imgfilter * strength * band_strength, t_i, 1) + if debug_percentile is not None: + t_i = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * imgfilter_std) if band_strength > 0 else tf.ones([batch]) + t = tf.ones([batch, num_bands]) # Temporary gain vector. + t = tf.concat([t[:, :i], t_i[:, np.newaxis], t[:, i+1:]], axis=-1) # Replace i'th element. + t /= tf.sqrt(tf.reduce_sum(expected_power * tf.square(t), axis=-1, keepdims=True)) # Normalize power. + g *= t # Accumulate into global gain. + + # Construct filter bank. + Hz_lo = wavelets['sym2'] + Hz_lo = np.asarray(Hz_lo, dtype=np.float32) # H(z) + Hz_hi = Hz_lo * ((-1) ** np.arange(Hz_lo.size)) # H(-z) + Hz_lo2 = np.convolve(Hz_lo, Hz_lo[::-1]) / 2 # H(z) * H(z^-1) / 2 + Hz_hi2 = np.convolve(Hz_hi, Hz_hi[::-1]) / 2 # H(-z) * H(-z^-1) / 2 + Hz_bands = np.eye(num_bands, 1) # Bandpass(H(z), b_i) + for i in range(1, num_bands): + Hz_bands = np.dstack([Hz_bands, np.zeros_like(Hz_bands)]).reshape(num_bands, -1)[:, :-1] + Hz_bands = scipy.signal.convolve(Hz_bands, [Hz_lo2]) + Hz_bands[i, (Hz_bands.shape[1] - Hz_hi2.size) // 2 : (Hz_bands.shape[1] + Hz_hi2.size) // 2] += Hz_hi2 + + # Construct combined amplification filter. + Hz_prime = g @ Hz_bands # [batch, tap] + Hz_prime = tf.transpose(Hz_prime) # [tap, batch] + Hz_prime = tf.tile(Hz_prime[:, :, np.newaxis], [1, 1, channels]) # [tap, batch, channels] + Hz_prime = tf.reshape(Hz_prime, [-1, batch * channels, 1]) # [tap, batch * channels, 1] + + # Apply filter. + images = tf.reshape(images, [1, -1, height, width]) + pad = Hz_bands.shape[1] // 2 + pad = [[0,0], [0,0], [pad, pad], [pad, pad]] + images = tf.pad(tensor=images, paddings=pad, mode='REFLECT') + images = tf.nn.depthwise_conv2d(input=images, filter=Hz_prime[np.newaxis,:], strides=[1,1,1,1], padding='VALID', data_format='NCHW') + images = tf.nn.depthwise_conv2d(input=images, filter=Hz_prime[:,np.newaxis], strides=[1,1,1,1], padding='VALID', data_format='NCHW') + images = tf.reshape(images, [-1, channels, height, width]) + + # ------------------------ + # Image-space corruptions. + # ------------------------ + + # Apply additive RGB noise with probability (noise * strength). + if noise > 0: + sigma = tf.abs(tf.random_normal([batch], 0, noise_std)) + sigma = gate_augment_params(noise * strength, sigma, 0) + if debug_percentile is not None: + sigma = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch])) * noise_std + sigma = tf.reshape(sigma, [-1, 1, 1, 1]) + images += tf.random_normal([batch, channels, height, width]) * sigma + + # Apply cutout with probability (cutout * strength). + if cutout > 0: + size = tf.fill([batch, 2], cutout_size) + size = gate_augment_params(cutout * strength, size, 0) + center = tf.random_uniform([batch, 2], 0, 1) + if debug_percentile is not None: + size = tf.fill([batch, 2], cutout_size) + center = tf.broadcast_to(debug_percentile, [batch, 2]) + size = tf.reshape(size, [batch, 2, 1, 1, 1]) + center = tf.reshape(center, [batch, 2, 1, 1, 1]) + coord_x = tf.reshape(tf.range(width, dtype=tf.float32), [1, 1, 1, width]) + coord_y = tf.reshape(tf.range(height, dtype=tf.float32), [1, 1, height, 1]) + mask_x = (tf.abs((coord_x + 0.5) / width - center[:, 0]) >= size[:, 0] / 2) + mask_y = (tf.abs((coord_y + 0.5) / height - center[:, 1]) >= size[:, 1] / 2) + mask = tf.cast(tf.logical_or(mask_x, mask_y), tf.float32) + images *= mask + + return images, labels + +#---------------------------------------------------------------------------- diff --git a/training/dataset.py b/training/dataset.py new file mode 100755 index 00000000..b96876ed --- /dev/null +++ b/training/dataset.py @@ -0,0 +1,233 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Streaming images and labels from dataset created with dataset_tool.py.""" + +import os +import glob +import numpy as np +import tensorflow as tf +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- +# Dataset class that loads images from tfrecords files. + +class TFRecordDataset: + def __init__(self, + tfrecord_dir, # Directory containing a collection of tfrecords files. + resolution = None, # Dataset resolution, None = autodetect. + label_file = None, # Relative path of the labels file, None = autodetect. + max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components. + max_images = None, # Maximum number of images to use, None = use all images. + max_validation = 10000, # Maximum size of the validation set, None = use all available images. + mirror_augment = False, # Apply mirror augment? + repeat = True, # Repeat dataset indefinitely? + shuffle = True, # Shuffle images? + shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling. + prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching. + buffer_mb = 256, # Read buffer size (megabytes). + num_threads = 2, # Number of concurrent threads. + _is_validation = False, +): + self.tfrecord_dir = tfrecord_dir + self.resolution = None + self.resolution_log2 = None + self.shape = [] # [channels, height, width] + self.dtype = 'uint8' + self.label_file = label_file + self.label_size = None # components + self.label_dtype = None + self.has_validation_set = None + self.mirror_augment = mirror_augment + self.repeat = repeat + self.shuffle = shuffle + self._max_validation = max_validation + self._np_labels = None + self._tf_minibatch_in = None + self._tf_labels_var = None + self._tf_labels_dataset = None + self._tf_datasets = dict() + self._tf_iterator = None + self._tf_init_ops = dict() + self._tf_minibatch_np = None + self._cur_minibatch = -1 + self._cur_lod = -1 + + # List files in the dataset directory. + assert os.path.isdir(self.tfrecord_dir) + all_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*'))) + self.has_validation_set = (self._max_validation > 0) and any(os.path.basename(f).startswith('validation-') for f in all_files) + all_files = [f for f in all_files if os.path.basename(f).startswith('validation-') == _is_validation] + + # Inspect tfrecords files. + tfr_files = [f for f in all_files if f.endswith('.tfrecords')] + assert len(tfr_files) >= 1 + tfr_shapes = [] + for tfr_file in tfr_files: + tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) + for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt): + tfr_shapes.append(self.parse_tfrecord_np(record).shape) + break + + # Autodetect label filename. + if self.label_file is None: + guess = [f for f in all_files if f.endswith('.labels')] + if len(guess): + self.label_file = guess[0] + elif not os.path.isfile(self.label_file): + guess = os.path.join(self.tfrecord_dir, self.label_file) + if os.path.isfile(guess): + self.label_file = guess + + # Determine shape and resolution. + max_shape = max(tfr_shapes, key=np.prod) + self.resolution = resolution if resolution is not None else max_shape[1] + self.resolution_log2 = int(np.log2(self.resolution)) + self.shape = [max_shape[0], self.resolution, self.resolution] + tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes] + assert all(shape[0] == max_shape[0] for shape in tfr_shapes) + assert all(shape[1] == shape[2] for shape in tfr_shapes) + assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods)) + assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1)) + + # Load labels. + assert max_label_size == 'full' or max_label_size >= 0 + self._np_labels = np.zeros([1<<30, 0], dtype=np.float32) + if self.label_file is not None and max_label_size != 0: + self._np_labels = np.load(self.label_file) + assert self._np_labels.ndim == 2 + if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size: + self._np_labels = self._np_labels[:, :max_label_size] + if max_images is not None and self._np_labels.shape[0] > max_images: + self._np_labels = self._np_labels[:max_images] + self.label_size = self._np_labels.shape[1] + self.label_dtype = self._np_labels.dtype.name + + # Build TF expressions. + with tf.name_scope('Dataset'), tf.device('/cpu:0'), tf.control_dependencies(None): + self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[]) + self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var') + self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var) + for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods): + if tfr_lod < 0: + continue + dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20) + if max_images is not None: + dset = dset.take(max_images) + dset = dset.map(self.parse_tfrecord_tf, num_parallel_calls=num_threads) + dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset)) + bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize + if self.shuffle and shuffle_mb > 0: + dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1) + if self.repeat: + dset = dset.repeat() + if prefetch_mb > 0: + dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1) + dset = dset.batch(self._tf_minibatch_in) + self._tf_datasets[tfr_lod] = dset + self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes) + self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()} + + def close(self): + pass + + # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf(). + def configure(self, minibatch_size, lod=0): + lod = int(np.floor(lod)) + assert minibatch_size >= 1 and lod in self._tf_datasets + if self._cur_minibatch != minibatch_size or self._cur_lod != lod: + self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size}) + self._cur_minibatch = minibatch_size + self._cur_lod = lod + + # Get next minibatch as TensorFlow expressions. + def get_minibatch_tf(self): + images, labels = self._tf_iterator.get_next() + if self.mirror_augment: + images = tf.cast(images, tf.float32) + images = tf.where(tf.random_uniform([tf.shape(images)[0]]) < 0.5, images, tf.reverse(images, [3])) + images = tf.cast(images, self.dtype) + return images, labels + + # Get next minibatch as NumPy arrays. + def get_minibatch_np(self, minibatch_size, lod=0): # => (images, labels) or (None, None) + self.configure(minibatch_size, lod) + if self._tf_minibatch_np is None: + with tf.name_scope('Dataset'): + self._tf_minibatch_np = self.get_minibatch_tf() + try: + return tflib.run(self._tf_minibatch_np) + except tf.errors.OutOfRangeError: + return None, None + + # Get random labels as TensorFlow expression. + def get_random_labels_tf(self, minibatch_size): # => labels + with tf.name_scope('Dataset'): + if self.label_size > 0: + with tf.device('/cpu:0'): + return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32)) + return tf.zeros([minibatch_size, 0], self.label_dtype) + + # Get random labels as NumPy array. + def get_random_labels_np(self, minibatch_size): # => labels + if self.label_size > 0: + return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])] + return np.zeros([minibatch_size, 0], self.label_dtype) + + # Load validation set as NumPy array. + def load_validation_set_np(self): + images = [] + labels = [] + if self.has_validation_set: + validation_set = TFRecordDataset( + tfrecord_dir=self.tfrecord_dir, resolution=self.shape[2], max_label_size=self.label_size, + max_images=self._max_validation, repeat=False, shuffle=False, prefetch_mb=0, _is_validation=True) + validation_set.configure(1) + while True: + image, label = validation_set.get_minibatch_np(1) + if image is None: + break + images.append(image) + labels.append(label) + images = np.concatenate(images, axis=0) if len(images) else np.zeros([0] + self.shape, dtype=self.dtype) + labels = np.concatenate(labels, axis=0) if len(labels) else np.zeros([0, self.label_size], self.label_dtype) + assert list(images.shape[1:]) == self.shape + assert labels.shape[1] == self.label_size + assert images.shape[0] <= self._max_validation + return images, labels + + # Parse individual image from a tfrecords file into TensorFlow expression. + @staticmethod + def parse_tfrecord_tf(record): + features = tf.parse_single_example(record, features={ + 'shape': tf.FixedLenFeature([3], tf.int64), + 'data': tf.FixedLenFeature([], tf.string)}) + data = tf.decode_raw(features['data'], tf.uint8) + return tf.reshape(data, features['shape']) + + # Parse individual image from a tfrecords file into NumPy array. + @staticmethod + def parse_tfrecord_np(record): + ex = tf.train.Example() + ex.ParseFromString(record) + shape = ex.features.feature['shape'].int64_list.value # pylint: disable=no-member + data = ex.features.feature['data'].bytes_list.value[0] # pylint: disable=no-member + return np.fromstring(data, np.uint8).reshape(shape) + +#---------------------------------------------------------------------------- +# Construct a dataset object using the given options. + +def load_dataset(path=None, resolution=None, max_images=None, max_label_size=0, mirror_augment=False, repeat=True, shuffle=True, seed=None): + _ = seed + assert os.path.isdir(path) + return TFRecordDataset( + tfrecord_dir=path, + resolution=resolution, max_images=max_images, max_label_size=max_label_size, + mirror_augment=mirror_augment, repeat=repeat, shuffle=shuffle) + +#---------------------------------------------------------------------------- diff --git a/training/loss.py b/training/loss.py new file mode 100755 index 00000000..9d819d29 --- /dev/null +++ b/training/loss.py @@ -0,0 +1,307 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Loss functions.""" + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.autosummary import autosummary + +#---------------------------------------------------------------------------- +# Report statistic for all interested parties (AdaptiveAugment and tfevents). + +def report_stat(aug, name, value): + if aug is not None: + value = aug.report_stat(name, value) + value = autosummary(name, value) + return value + +#---------------------------------------------------------------------------- +# Report loss terms and collect them into EasyDict. + +def report_loss(aug, G_loss, D_loss, G_reg=None, D_reg=None): + assert G_loss is not None and D_loss is not None + terms = dnnlib.EasyDict(G_reg=None, D_reg=None) + terms.G_loss = report_stat(aug, 'Loss/G/loss', G_loss) + terms.D_loss = report_stat(aug, 'Loss/D/loss', D_loss) + if G_reg is not None: terms.G_reg = report_stat(aug, 'Loss/G/reg', G_reg) + if D_reg is not None: terms.D_reg = report_stat(aug, 'Loss/D/reg', D_reg) + return terms + +#---------------------------------------------------------------------------- +# Evaluate G and return results as EasyDict. + +def eval_G(G, latents, labels, return_dlatents=False): + r = dnnlib.EasyDict() + r.args = dnnlib.EasyDict() + r.args.is_training = True + if return_dlatents: + r.args.return_dlatents = True + r.images = G.get_output_for(latents, labels, **r.args) + + r.dlatents = None + if return_dlatents: + r.images, r.dlatents = r.images + return r + +#---------------------------------------------------------------------------- +# Evaluate D and return results as EasyDict. + +def eval_D(D, aug, images, labels, report=None, augment_inputs=True, return_aux=0): + r = dnnlib.EasyDict() + r.images_aug = images + r.labels_aug = labels + if augment_inputs and aug is not None: + r.images_aug, r.labels_aug = aug.apply(r.images_aug, r.labels_aug) + + r.args = dnnlib.EasyDict() + r.args.is_training = True + if aug is not None: + r.args.augment_strength = aug.get_strength_var() + if return_aux > 0: + r.args.score_size = return_aux + 1 + r.scores = D.get_output_for(r.images_aug, r.labels_aug, **r.args) + + r.aux = None + if return_aux: + r.aux = r.scores[:, 1:] + r.scores = r.scores[:, :1] + + if report is not None: + report_ops = [ + report_stat(aug, 'Loss/scores/' + report, r.scores), + report_stat(aug, 'Loss/signs/' + report, tf.sign(r.scores)), + report_stat(aug, 'Loss/squares/' + report, tf.square(r.scores)), + ] + with tf.control_dependencies(report_ops): + r.scores = tf.identity(r.scores) + return r + +#---------------------------------------------------------------------------- +# Non-saturating logistic loss with R1 and path length regularizers, used +# in the paper "Analyzing and Improving the Image Quality of StyleGAN". + +def stylegan2(G, D, aug, fake_labels, real_images, real_labels, r1_gamma=10, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2, **_kwargs): + # Evaluate networks for the main loss. + minibatch_size = tf.shape(fake_labels)[0] + fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + G_fake = eval_G(G, fake_latents, fake_labels, return_dlatents=True) + D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake') + D_real = eval_D(D, aug, real_images, real_labels, report='real') + + # Non-saturating logistic loss from "Generative Adversarial Nets". + with tf.name_scope('Loss_main'): + G_loss = tf.nn.softplus(-D_fake.scores) # -log(sigmoid(D_fake.scores)), pylint: disable=invalid-unary-operand-type + D_loss = tf.nn.softplus(D_fake.scores) # -log(1 - sigmoid(D_fake.scores)) + D_loss += tf.nn.softplus(-D_real.scores) # -log(sigmoid(D_real.scores)), pylint: disable=invalid-unary-operand-type + G_reg = 0 + D_reg = 0 + + # R1 regularizer from "Which Training Methods for GANs do actually Converge?". + if r1_gamma != 0: + with tf.name_scope('Loss_R1'): + r1_grads = tf.gradients(tf.reduce_sum(D_real.scores), [real_images])[0] + r1_penalty = tf.reduce_sum(tf.square(r1_grads), axis=[1,2,3]) + r1_penalty = report_stat(aug, 'Loss/r1_penalty', r1_penalty) + D_reg += r1_penalty * (r1_gamma * 0.5) + + # Path length regularizer from "Analyzing and Improving the Image Quality of StyleGAN". + if pl_weight != 0: + with tf.name_scope('Loss_PL'): + + # Evaluate the regularization term using a smaller minibatch to conserve memory. + G_pl = G_fake + if pl_minibatch_shrink > 1: + pl_minibatch_size = minibatch_size // pl_minibatch_shrink + pl_latents = fake_latents[:pl_minibatch_size] + pl_labels = fake_labels[:pl_minibatch_size] + G_pl = eval_G(G, pl_latents, pl_labels, return_dlatents=True) + + # Compute |J*y|. + pl_noise = tf.random_normal(tf.shape(G_pl.images)) / np.sqrt(np.prod(G.output_shape[2:])) + pl_grads = tf.gradients(tf.reduce_sum(G_pl.images * pl_noise), [G_pl.dlatents])[0] + pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) + + # Track exponential moving average of |J*y|. + with tf.control_dependencies(None): + pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0, dtype=tf.float32) + pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) + pl_update = tf.assign(pl_mean_var, pl_mean) + + # Calculate (|J*y|-a)^2. + with tf.control_dependencies([pl_update]): + pl_penalty = tf.square(pl_lengths - pl_mean) + pl_penalty = report_stat(aug, 'Loss/pl_penalty', pl_penalty) + + # Apply weight. + # + # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean + # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes: + # + # gamma_pl = pl_weight / num_pixels / num_affine_layers + # = 2 / (r^2) / (log2(r) * 2 - 2) + # = 1 / (r^2 * (log2(r) - 1)) + # = ln(2) / (r^2 * (ln(r) - ln(2)) + # + G_reg += tf.tile(pl_penalty, [pl_minibatch_shrink]) * pl_weight + + return report_loss(aug, G_loss, D_loss, G_reg, D_reg) + +#---------------------------------------------------------------------------- +# Hybrid loss used for comparison methods used in the paper +# "Training Generative Adversarial Networks with Limited Data". + +def cmethods(G, D, aug, fake_labels, real_images, real_labels, + r1_gamma=10, r2_gamma=0, + pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2, + bcr_real_weight=0, bcr_fake_weight=0, bcr_augment=None, + zcr_gen_weight=0, zcr_dis_weight=0, zcr_noise_std=0.1, + auxrot_alpha=0, auxrot_beta=0, + **_kwargs, +): + # Evaluate networks for the main loss. + minibatch_size = tf.shape(fake_labels)[0] + fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + G_fake = eval_G(G, fake_latents, fake_labels) + D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake') + D_real = eval_D(D, aug, real_images, real_labels, report='real') + + # Non-saturating logistic loss from "Generative Adversarial Nets". + with tf.name_scope('Loss_main'): + G_loss = tf.nn.softplus(-D_fake.scores) # -log(sigmoid(D_fake.scores)), pylint: disable=invalid-unary-operand-type + D_loss = tf.nn.softplus(D_fake.scores) # -log(1 - sigmoid(D_fake.scores)) + D_loss += tf.nn.softplus(-D_real.scores) # -log(sigmoid(D_real.scores)), pylint: disable=invalid-unary-operand-type + G_reg = 0 + D_reg = 0 + + # R1 and R2 regularizers from "Which Training Methods for GANs do actually Converge?". + if r1_gamma != 0 or r2_gamma != 0: + with tf.name_scope('Loss_R1R2'): + if r1_gamma != 0: + r1_grads = tf.gradients(tf.reduce_sum(D_real.scores), [real_images])[0] + r1_penalty = tf.reduce_sum(tf.square(r1_grads), axis=[1,2,3]) + r1_penalty = report_stat(aug, 'Loss/r1_penalty', r1_penalty) + D_reg += r1_penalty * (r1_gamma * 0.5) + if r2_gamma != 0: + r2_grads = tf.gradients(tf.reduce_sum(D_fake.scores), [G_fake.images])[0] + r2_penalty = tf.reduce_sum(tf.square(r2_grads), axis=[1,2,3]) + r2_penalty = report_stat(aug, 'Loss/r2_penalty', r2_penalty) + D_reg += r2_penalty * (r2_gamma * 0.5) + + # Path length regularizer from "Analyzing and Improving the Image Quality of StyleGAN". + if pl_weight != 0: + with tf.name_scope('Loss_PL'): + pl_minibatch_size = minibatch_size // pl_minibatch_shrink + pl_latents = fake_latents[:pl_minibatch_size] + pl_labels = fake_labels[:pl_minibatch_size] + G_pl = eval_G(G, pl_latents, pl_labels, return_dlatents=True) + pl_noise = tf.random_normal(tf.shape(G_pl.images)) / np.sqrt(np.prod(G.output_shape[2:])) + pl_grads = tf.gradients(tf.reduce_sum(G_pl.images * pl_noise), [G_pl.dlatents])[0] + pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) + with tf.control_dependencies(None): + pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0, dtype=tf.float32) + pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) + pl_update = tf.assign(pl_mean_var, pl_mean) + with tf.control_dependencies([pl_update]): + pl_penalty = tf.square(pl_lengths - pl_mean) + pl_penalty = report_stat(aug, 'Loss/pl_penalty', pl_penalty) + G_reg += tf.tile(pl_penalty, [pl_minibatch_shrink]) * pl_weight + + # bCR regularizer from "Improved consistency regularization for GANs". + if (bcr_real_weight != 0 or bcr_fake_weight != 0) and bcr_augment is not None: + with tf.name_scope('Loss_bCR'): + if bcr_real_weight != 0: + bcr_real_images, bcr_real_labels = dnnlib.util.call_func_by_name(D_real.images_aug, D_real.labels_aug, **bcr_augment) + D_bcr_real = eval_D(D, aug, bcr_real_images, bcr_real_labels, report='real_bcr', augment_inputs=False) + bcr_real_penalty = tf.square(D_bcr_real.scores - D_real.scores) + bcr_real_penalty = report_stat(aug, 'Loss/bcr_penalty/real', bcr_real_penalty) + D_loss += bcr_real_penalty * bcr_real_weight # NOTE: Must not use lazy regularization for this term. + if bcr_fake_weight != 0: + bcr_fake_images, bcr_fake_labels = dnnlib.util.call_func_by_name(D_fake.images_aug, D_fake.labels_aug, **bcr_augment) + D_bcr_fake = eval_D(D, aug, bcr_fake_images, bcr_fake_labels, report='fake_bcr', augment_inputs=False) + bcr_fake_penalty = tf.square(D_bcr_fake.scores - D_fake.scores) + bcr_fake_penalty = report_stat(aug, 'Loss/bcr_penalty/fake', bcr_fake_penalty) + D_loss += bcr_fake_penalty * bcr_fake_weight # NOTE: Must not use lazy regularization for this term. + + # zCR regularizer from "Improved consistency regularization for GANs". + if zcr_gen_weight != 0 or zcr_dis_weight != 0: + with tf.name_scope('Loss_zCR'): + zcr_fake_latents = fake_latents + tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) * zcr_noise_std + G_zcr = eval_G(G, zcr_fake_latents, fake_labels) + if zcr_gen_weight > 0: + zcr_gen_penalty = -tf.reduce_mean(tf.square(G_fake.images - G_zcr.images), axis=[1,2,3]) + zcr_gen_penalty = report_stat(aug, 'Loss/zcr_gen_penalty', zcr_gen_penalty) + G_loss += zcr_gen_penalty * zcr_gen_weight + if zcr_dis_weight > 0: + D_zcr = eval_D(D, aug, G_zcr.images, fake_labels, report='fake_zcr', augment_inputs=False) + zcr_dis_penalty = tf.square(D_fake.scores - D_zcr.scores) + zcr_dis_penalty = report_stat(aug, 'Loss/zcr_dis_penalty', zcr_dis_penalty) + D_loss += zcr_dis_penalty * zcr_dis_weight + + # Auxiliary rotation loss from "Self-supervised GANs via auxiliary rotation loss". + if auxrot_alpha != 0 or auxrot_beta != 0: + with tf.name_scope('Loss_AuxRot'): + idx = tf.range(minibatch_size * 4, dtype=tf.int32) // minibatch_size + b0 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 1)) + b1 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 3)) + b2 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 2)) + if auxrot_alpha != 0: + auxrot_fake = tf.tile(G_fake.images, [4, 1, 1, 1]) + auxrot_fake = tf.where(b0, auxrot_fake, tf.reverse(auxrot_fake, [2])) + auxrot_fake = tf.where(b1, auxrot_fake, tf.reverse(auxrot_fake, [3])) + auxrot_fake = tf.where(b2, auxrot_fake, tf.transpose(auxrot_fake, [0, 1, 3, 2])) + D_auxrot_fake = eval_D(D, aug, auxrot_fake, fake_labels, return_aux=4) + G_loss += tf.nn.sparse_softmax_cross_entropy_with_logits(labels=idx, logits=D_auxrot_fake.aux) * auxrot_alpha + if auxrot_beta != 0: + auxrot_real = tf.tile(real_images, [4, 1, 1, 1]) + auxrot_real = tf.where(b0, auxrot_real, tf.reverse(auxrot_real, [2])) + auxrot_real = tf.where(b1, auxrot_real, tf.reverse(auxrot_real, [3])) + auxrot_real = tf.where(b2, auxrot_real, tf.transpose(auxrot_real, [0, 1, 3, 2])) + D_auxrot_real = eval_D(D, aug, auxrot_real, real_labels, return_aux=4) + D_loss += tf.nn.sparse_softmax_cross_entropy_with_logits(labels=idx, logits=D_auxrot_real.aux) * auxrot_beta + + return report_loss(aug, G_loss, D_loss, G_reg, D_reg) + +#---------------------------------------------------------------------------- +# WGAN-GP loss with epsilon penalty, used in the paper +# "Progressive Growing of GANs for Improved Quality, Stability, and Variation". + +def wgangp(G, D, aug, fake_labels, real_images, real_labels, wgan_epsilon=0.001, wgan_lambda=10, wgan_target=1, **_kwargs): + minibatch_size = tf.shape(fake_labels)[0] + fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + G_fake = eval_G(G, fake_latents, fake_labels) + D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake') + D_real = eval_D(D, aug, real_images, real_labels, report='real') + + # WGAN loss from "Wasserstein Generative Adversarial Networks". + with tf.name_scope('Loss_main'): + G_loss = -D_fake.scores # pylint: disable=invalid-unary-operand-type + D_loss = D_fake.scores - D_real.scores + + # Epsilon penalty from "Progressive Growing of GANs for Improved Quality, Stability, and Variation" + with tf.name_scope('Loss_epsilon'): + epsilon_penalty = report_stat(aug, 'Loss/epsilon_penalty', tf.square(D_real.scores)) + D_loss += epsilon_penalty * wgan_epsilon + + # Gradient penalty from "Improved Training of Wasserstein GANs". + with tf.name_scope('Loss_GP'): + mix_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0, 1, dtype=G_fake.images.dtype) + mix_images = tflib.lerp(tf.cast(real_images, G_fake.images.dtype), G_fake.images, mix_factors) + mix_labels = real_labels # NOTE: Mixing is performed without respect to fake_labels. + D_mix = eval_D(D, aug, mix_images, mix_labels, report='mix') + mix_grads = tf.gradients(tf.reduce_sum(D_mix.scores), [mix_images])[0] + mix_norms = tf.sqrt(tf.reduce_sum(tf.square(mix_grads), axis=[1,2,3])) + mix_norms = report_stat(aug, 'Loss/mix_norms', mix_norms) + gradient_penalty = tf.square(mix_norms - wgan_target) + D_reg = gradient_penalty * (wgan_lambda / (wgan_target**2)) + + return report_loss(aug, G_loss, D_loss, None, D_reg) + +#---------------------------------------------------------------------------- diff --git a/training/networks.py b/training/networks.py new file mode 100755 index 00000000..f9bd0614 --- /dev/null +++ b/training/networks.py @@ -0,0 +1,632 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Network architectures from the paper +"Training Generative Adversarial Networks with Limited Data".""" + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.ops.upfirdn_2d import upsample_2d, downsample_2d, upsample_conv_2d, conv_downsample_2d +from dnnlib.tflib.ops.fused_bias_act import fused_bias_act + +# NOTE: Do not import any application-specific modules here! +# Specify all network parameters as kwargs. + +#---------------------------------------------------------------------------- +# Get/create weight tensor for convolution or fully-connected layer. + +def get_weight(shape, gain=1, equalized_lr=True, lrmul=1, weight_var='weight', trainable=True, use_spectral_norm=False): + fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] for conv2d, [in, out] for fully-connected. + he_std = gain / np.sqrt(fan_in) # He init. + + # Apply equalized learning rate from the paper + # "Progressive Growing of GANs for Improved Quality, Stability, and Variation". + if equalized_lr: + init_std = 1.0 / lrmul + runtime_coef = he_std * lrmul + else: + init_std = he_std / lrmul + runtime_coef = lrmul + + # Create variable. + init = tf.initializers.random_normal(0, init_std) + w = tf.get_variable(weight_var, shape=shape, initializer=init, trainable=trainable) * runtime_coef + if use_spectral_norm: + w = apply_spectral_norm(w, state_var=weight_var+'_sn') + return w + +#---------------------------------------------------------------------------- +# Bias and activation function. + +def apply_bias_act(x, act='linear', gain=None, lrmul=1, clamp=None, bias_var='bias', trainable=True): + b = tf.get_variable(bias_var, shape=[x.shape[1]], initializer=tf.initializers.zeros(), trainable=trainable) * lrmul + return fused_bias_act(x, b=tf.cast(b, x.dtype), act=act, gain=gain, clamp=clamp) + +#---------------------------------------------------------------------------- +# Fully-connected layer. + +def dense_layer(x, fmaps, lrmul=1, weight_var='weight', trainable=True, use_spectral_norm=False): + if len(x.shape) > 2: + x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) + w = get_weight([x.shape[1].value, fmaps], lrmul=lrmul, weight_var=weight_var, trainable=trainable, use_spectral_norm=use_spectral_norm) + w = tf.cast(w, x.dtype) + return tf.matmul(x, w) + +#---------------------------------------------------------------------------- +# 2D convolution op with optional upsampling, downsampling, and padding. + +def conv2d(x, w, up=False, down=False, resample_kernel=None, padding=0): + assert not (up and down) + kernel = w.shape[0].value + assert w.shape[1].value == kernel + assert kernel >= 1 and kernel % 2 == 1 + + w = tf.cast(w, x.dtype) + if up: + x = upsample_conv_2d(x, w, data_format='NCHW', k=resample_kernel, padding=padding) + elif down: + x = conv_downsample_2d(x, w, data_format='NCHW', k=resample_kernel, padding=padding) + else: + padding_mode = {0: 'SAME', -(kernel // 2): 'VALID'}[padding] + x = tf.nn.conv2d(x, w, data_format='NCHW', strides=[1,1,1,1], padding=padding_mode) + return x + +#---------------------------------------------------------------------------- +# 2D convolution layer. + +def conv2d_layer(x, fmaps, kernel, up=False, down=False, resample_kernel=None, lrmul=1, trainable=True, use_spectral_norm=False): + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], lrmul=lrmul, trainable=trainable, use_spectral_norm=use_spectral_norm) + return conv2d(x, tf.cast(w, x.dtype), up=up, down=down, resample_kernel=resample_kernel) + +#---------------------------------------------------------------------------- +# Modulated 2D convolution layer from the paper +# "Analyzing and Improving Image Quality of StyleGAN". + +def modulated_conv2d_layer(x, y, fmaps, kernel, up=False, down=False, demodulate=True, resample_kernel=None, lrmul=1, fused_modconv=False, trainable=True, use_spectral_norm=False): + assert not (up and down) + assert kernel >= 1 and kernel % 2 == 1 + + # Get weight. + wshape = [kernel, kernel, x.shape[1].value, fmaps] + w = get_weight(wshape, lrmul=lrmul, trainable=trainable, use_spectral_norm=use_spectral_norm) + if x.dtype.name == 'float16' and not fused_modconv and demodulate: + w *= np.sqrt(1 / np.prod(wshape[:-1])) / tf.reduce_max(tf.abs(w), axis=[0,1,2]) # Pre-normalize to avoid float16 overflow. + ww = w[np.newaxis] # [BkkIO] Introduce minibatch dimension. + + # Modulate. + s = dense_layer(y, fmaps=x.shape[1].value, weight_var='mod_weight', trainable=trainable, use_spectral_norm=use_spectral_norm) # [BI] Transform incoming W to style. + s = apply_bias_act(s, bias_var='mod_bias', trainable=trainable) + 1 # [BI] Add bias (initially 1). + if x.dtype.name == 'float16' and not fused_modconv and demodulate: + s *= 1 / tf.reduce_max(tf.abs(s)) # Pre-normalize to avoid float16 overflow. + ww *= tf.cast(s[:, np.newaxis, np.newaxis, :, np.newaxis], w.dtype) # [BkkIO] Scale input feature maps. + + # Demodulate. + if demodulate: + d = tf.rsqrt(tf.reduce_sum(tf.square(ww), axis=[1,2,3]) + 1e-8) # [BO] Scaling factor. + ww *= d[:, np.newaxis, np.newaxis, np.newaxis, :] # [BkkIO] Scale output feature maps. + + # Reshape/scale input. + if fused_modconv: + x = tf.reshape(x, [1, -1, x.shape[2], x.shape[3]]) # Fused => reshape minibatch to convolution groups. + w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]), [ww.shape[1], ww.shape[2], ww.shape[3], -1]) + else: + x *= tf.cast(s[:, :, np.newaxis, np.newaxis], x.dtype) # [BIhw] Not fused => scale input activations. + + # 2D convolution. + x = conv2d(x, tf.cast(w, x.dtype), up=up, down=down, resample_kernel=resample_kernel) + + # Reshape/scale output. + if fused_modconv: + x = tf.reshape(x, [-1, fmaps, x.shape[2], x.shape[3]]) # Fused => reshape convolution groups back to minibatch. + elif demodulate: + x *= tf.cast(d[:, :, np.newaxis, np.newaxis], x.dtype) # [BOhw] Not fused => scale output activations. + return x + +#---------------------------------------------------------------------------- +# Normalize 2nd raw moment of the given activation tensor along specified axes. + +def normalize_2nd_moment(x, axis=1, eps=1e-8): + return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=axis, keepdims=True) + eps) + +#---------------------------------------------------------------------------- +# Minibatch standard deviation layer from the paper +# "Progressive Growing of GANs for Improved Quality, Stability, and Variation". + +def minibatch_stddev_layer(x, group_size=None, num_new_features=1): + if group_size is None: + group_size = tf.shape(x)[0] + else: + group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. + + G = group_size + F = num_new_features + _N, C, H, W = x.shape.as_list() + c = C // F + + y = tf.cast(x, tf.float32) # [NCHW] Cast to FP32. + y = tf.reshape(y, [G, -1, F, c, H, W]) # [GnFcHW] Split minibatch N into n groups of size G, and channels C into F groups of size c. + y -= tf.reduce_mean(y, axis=0) # [GnFcHW] Subtract mean over group. + y = tf.reduce_mean(tf.square(y), axis=0) # [nFcHW] Calc variance over group. + y = tf.sqrt(y + 1e-8) # [nFcHW] Calc stddev over group. + y = tf.reduce_mean(y, axis=[2,3,4]) # [nF] Take average over channels and pixels. + y = tf.cast(y, x.dtype) # [nF] Cast back to original data type. + y = tf.reshape(y, [-1, F, 1, 1]) # [nF11] Add missing dimensions. + y = tf.tile(y, [G, 1, H, W]) # [NFHW] Replicate over group and pixels. + return tf.concat([x, y], axis=1) # [NCHW] Append to input as new channels. + +#---------------------------------------------------------------------------- +# Spectral normalization from the paper +# "Spectral Normalization for Generative Adversarial Networks". + +def apply_spectral_norm(w, state_var='sn', iterations=1, eps=1e-8): + fmaps = w.shape[-1].value + w_mat = tf.reshape(w, [-1, fmaps]) + u_var = tf.get_variable(state_var, shape=[1,fmaps], initializer=tf.initializers.random_normal(), trainable=False) + + u = u_var + for _ in range(iterations): + v = tf.matmul(u, w_mat, transpose_b=True) + v *= tf.rsqrt(tf.reduce_sum(tf.square(v)) + eps) + u = tf.matmul(v, w_mat) + sigma_inv = tf.rsqrt(tf.reduce_sum(tf.square(u)) + eps) + u *= sigma_inv + + with tf.control_dependencies([tf.assign(u_var, u)]): + return w * sigma_inv + +#---------------------------------------------------------------------------- +# Main generator network. +# Composed of two sub-networks (mapping and synthesis) that are defined below. + +def G_main( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + + # Evaluation mode. + is_training = False, # Network is under training? Enables and disables specific features. + is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi. + return_dlatents = False, # Return dlatents (W) in addition to the images? + + # Truncation & style mixing. + truncation_psi = 0.5, # Style strength multiplier for the truncation trick. None = disable. + truncation_cutoff = None, # Number of layers for which to apply the truncation trick. None = disable. + truncation_psi_val = None, # Value for truncation_psi to use during validation. + truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation. + dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable. + style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable. + + # Sub-networks. + components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls. + mapping_func = 'G_mapping', # Build func name for the mapping network. + synthesis_func = 'G_synthesis', # Build func name for the synthesis network. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + + **kwargs, # Arguments for sub-networks (mapping and synthesis). +): + # Validate arguments. + assert not is_training or not is_validation + assert isinstance(components, dnnlib.EasyDict) + if is_validation: + truncation_psi = truncation_psi_val + truncation_cutoff = truncation_cutoff_val + if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): + truncation_psi = None + if is_training: + truncation_cutoff = None + if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): + dlatent_avg_beta = None + if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): + style_mixing_prob = None + + # Setup components. + if 'synthesis' not in components: + components.synthesis = tflib.Network('G_synthesis', func_name=globals()[synthesis_func], **kwargs) + num_layers = components.synthesis.input_shape[1] + dlatent_size = components.synthesis.input_shape[2] + if 'mapping' not in components: + components.mapping = tflib.Network('G_mapping', func_name=globals()[mapping_func], dlatent_broadcast=num_layers, **kwargs) + + # Evaluate mapping network. + dlatents = components.mapping.get_output_for(latents_in, labels_in, is_training=is_training, **kwargs) + dlatents = tf.cast(dlatents, tf.float32) + + # Update moving average of W. + dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) + if dlatent_avg_beta is not None: + with tf.variable_scope('DlatentAvg'): + batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) + update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) + with tf.control_dependencies([update_op]): + dlatents = tf.identity(dlatents) + + # Perform style mixing regularization. + if style_mixing_prob is not None: + with tf.variable_scope('StyleMix'): + latents2 = tf.random_normal(tf.shape(latents_in)) + dlatents2 = components.mapping.get_output_for(latents2, labels_in, is_training=is_training, **kwargs) + dlatents2 = tf.cast(dlatents2, tf.float32) + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + mixing_cutoff = tf.cond( + tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, + lambda: tf.random_uniform([], 1, num_layers, dtype=tf.int32), + lambda: num_layers) + dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) + + # Apply truncation. + if truncation_psi is not None: + with tf.variable_scope('Truncation'): + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + layer_psi = np.ones(layer_idx.shape, dtype=np.float32) + if truncation_cutoff is None: + layer_psi *= truncation_psi + else: + layer_psi = tf.where(layer_idx < truncation_cutoff, layer_psi * truncation_psi, layer_psi) + dlatents = tflib.lerp(dlatent_avg, dlatents, layer_psi) + + # Evaluate synthesis network. + images_out = components.synthesis.get_output_for(dlatents, is_training=is_training, force_clean_graph=is_template_graph, **kwargs) + images_out = tf.identity(images_out, name='images_out') + if return_dlatents: + return images_out, dlatents + return images_out + +#---------------------------------------------------------------------------- +# Generator mapping network. + +def G_mapping( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + + # Input & output dimensions. + latent_size = 512, # Latent vector (Z) dimensionality. + label_size = 0, # Label dimensionality, 0 if no labels. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size]. + + # Internal details. + mapping_layers = 8, # Number of mapping layers. + mapping_fmaps = None, # Number of activations in the mapping layers, None = same as dlatent_size. + mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers. + mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers? + label_fmaps = None, # Label embedding dimensionality, None = same as latent_size. + dtype = 'float32', # Data type to use for intermediate activations and outputs. + + **_kwargs, # Ignore unrecognized keyword args. +): + # Inputs. + latents_in.set_shape([None, latent_size]) + labels_in.set_shape([None, label_size]) + latents_in = tf.cast(latents_in, dtype) + labels_in = tf.cast(labels_in, dtype) + x = latents_in + + # Normalize latents. + if normalize_latents: + with tf.variable_scope('Normalize'): + x = normalize_2nd_moment(x) + + # Embed labels, normalize, and concatenate with latents. + if label_size > 0: + with tf.variable_scope('LabelEmbed'): + fmaps = label_fmaps if label_fmaps is not None else latent_size + y = labels_in + y = apply_bias_act(dense_layer(y, fmaps=fmaps)) + y = normalize_2nd_moment(y) + x = tf.concat([x, y], axis=1) + + # Mapping layers. + for layer_idx in range(mapping_layers): + with tf.variable_scope(f'Dense{layer_idx}'): + fmaps = mapping_fmaps if mapping_fmaps is not None and layer_idx < mapping_layers - 1 else dlatent_size + x = apply_bias_act(dense_layer(x, fmaps=fmaps, lrmul=mapping_lrmul), act=mapping_nonlinearity, lrmul=mapping_lrmul) + + # Broadcast. + if dlatent_broadcast is not None: + with tf.variable_scope('Broadcast'): + x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1]) + + # Output. + assert x.dtype == tf.as_dtype(dtype) + return tf.identity(x, name='dlatents_out') + +#---------------------------------------------------------------------------- +# Generator synthesis network. + +def G_synthesis( + dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. + + # Input & output dimensions. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + num_channels = 3, # Number of output color channels. + resolution = 1024, # Output resolution. + + # Capacity. + fmap_base = 16384, # Overall multiplier for the number of feature maps. + fmap_decay = 1, # Log2 feature map reduction when doubling the resolution. + fmap_min = 1, # Minimum number of feature maps in any layer. + fmap_max = 512, # Maximum number of feature maps in any layer. + fmap_const = None, # Number of feature maps in the constant input layer. None = default. + + # Internal details. + use_noise = True, # Enable noise inputs? + randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. + architecture = 'skip', # Architecture: 'orig', 'skip', 'resnet'. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + dtype = 'float32', # Data type to use for intermediate activations and outputs. + num_fp16_res = 0, # Use FP16 for the N highest resolutions, regardless of dtype. + conv_clamp = None, # Clamp the output of convolution layers to [-conv_clamp, +conv_clamp], None = disable clamping. + resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations, None = box filter. + fused_modconv = False, # Implement modulated_conv2d_layer() using grouped convolution? + + **_kwargs, # Ignore unrecognized keyword args. +): + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) + assert architecture in ['orig', 'skip', 'resnet'] + act = nonlinearity + num_layers = resolution_log2 * 2 - 2 + + # Disentangled latent (W). + dlatents_in.set_shape([None, num_layers, dlatent_size]) + dlatents_in = tf.cast(dlatents_in, dtype) + + # Noise inputs. + noise_inputs = [] + if use_noise: + for layer_idx in range(num_layers - 1): + res = (layer_idx + 5) // 2 + shape = [1, 1, 2**res, 2**res] + noise_inputs.append(tf.get_variable(f'noise{layer_idx}', shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) + + # Single convolution layer with all the bells and whistles. + def layer(x, layer_idx, fmaps, kernel, up=False): + x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv) + if use_noise: + if randomize_noise: + noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) + else: + noise = tf.cast(noise_inputs[layer_idx], x.dtype) + noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) + x += noise * tf.cast(noise_strength, x.dtype) + return apply_bias_act(x, act=act, clamp=conv_clamp) + + # Main block for one resolution. + def block(x, res): # res = 3..resolution_log2 + x = tf.cast(x, 'float16' if res > resolution_log2 - num_fp16_res else dtype) + t = x + with tf.variable_scope('Conv0_up'): + x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True) + with tf.variable_scope('Conv1'): + x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3) + if architecture == 'resnet': + with tf.variable_scope('Skip'): + t = conv2d_layer(t, fmaps=nf(res-1), kernel=1, up=True, resample_kernel=resample_kernel) + x = (x + t) * (1 / np.sqrt(2)) + return x + + # Upsampling block. + def upsample(y): + with tf.variable_scope('Upsample'): + return upsample_2d(y, k=resample_kernel) + + # ToRGB block. + def torgb(x, y, res): # res = 2..resolution_log2 + with tf.variable_scope('ToRGB'): + t = modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv) + t = apply_bias_act(t, clamp=conv_clamp) + t = tf.cast(t, dtype) + if y is not None: + t += tf.cast(y, t.dtype) + return t + + # Layers for 4x4 resolution. + y = None + with tf.variable_scope('4x4'): + with tf.variable_scope('Const'): + fmaps = fmap_const if fmap_const is not None else nf(1) + x = tf.get_variable('const', shape=[1, fmaps, 4, 4], initializer=tf.initializers.random_normal()) + x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]) + with tf.variable_scope('Conv'): + x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3) + if architecture == 'skip': + y = torgb(x, y, 2) + + # Layers for >=8x8 resolutions. + for res in range(3, resolution_log2 + 1): + with tf.variable_scope(f'{2**res}x{2**res}'): + x = block(x, res) + if architecture == 'skip': + y = upsample(y) + if architecture == 'skip' or res == resolution_log2: + y = torgb(x, y, res) + + images_out = y + assert images_out.dtype == tf.as_dtype(dtype) + return tf.identity(images_out, name='images_out') + +#---------------------------------------------------------------------------- +# Discriminator. + +def D_main( + images_in, # First input: Images [minibatch, channel, height, width]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + + # Input dimensions. + num_channels = 3, # Number of input color channels. Overridden based on dataset. + resolution = 1024, # Input resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + + # Capacity. + fmap_base = 16384, # Overall multiplier for the number of feature maps. + fmap_decay = 1, # Log2 feature map reduction when doubling the resolution. + fmap_min = 1, # Minimum number of feature maps in any layer. + fmap_max = 512, # Maximum number of feature maps in any layer. + + # Internal details. + mapping_layers = 0, # Number of additional mapping layers for the conditioning labels. + mapping_fmaps = None, # Number of activations in the mapping layers, None = default. + mapping_lrmul = 0.1, # Learning rate multiplier for the mapping layers. + architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + mbstd_group_size = None, # Group size for the minibatch standard deviation layer, None = entire minibatch. + mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer, 0 = disable. + dtype = 'float32', # Data type to use for intermediate activations and outputs. + num_fp16_res = 0, # Use FP16 for the N highest resolutions, regardless of dtype. + conv_clamp = None, # Clamp the output of convolution layers to [-conv_clamp, +conv_clamp], None = disable clamping. + resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations, None = box filter. + + # Comparison methods. + augment_strength = 0, # AdaptiveAugment.get_strength_var() for pagan & adropout. + use_pagan = False, # pagan: Enable? + pagan_num = 16, # pagan: Number of active bits with augment_strength=1. + pagan_fade = 0.5, # pagan: Relative duration of fading in new bits. + score_size = 1, # auxrot: Number of scalars to output. Can vary between evaluations. + score_max = 1, # auxrot: Maximum number of scalars to output. Must be set at construction time. + use_spectral_norm = False, # spectralnorm: Enable? + adaptive_dropout = 0, # adropout: Standard deviation to use with augment_strength=1, 0 = disable. + freeze_layers = 0, # Freeze-D: Number of layers to freeze. + + **_kwargs, # Ignore unrecognized keyword args. +): + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) + assert architecture in ['orig', 'skip', 'resnet'] + if mapping_fmaps is None: + mapping_fmaps = nf(0) + act = nonlinearity + + # Inputs. + images_in.set_shape([None, num_channels, resolution, resolution]) + labels_in.set_shape([None, label_size]) + images_in = tf.cast(images_in, dtype) + labels_in = tf.cast(labels_in, dtype) + + # Label embedding and mapping. + if label_size > 0: + y = labels_in + with tf.variable_scope('LabelEmbed'): + y = apply_bias_act(dense_layer(y, fmaps=mapping_fmaps)) + y = normalize_2nd_moment(y) + for idx in range(mapping_layers): + with tf.variable_scope(f'Mapping{idx}'): + y = apply_bias_act(dense_layer(y, fmaps=mapping_fmaps, lrmul=mapping_lrmul), act=act, lrmul=mapping_lrmul) + labels_in = y + + # Adaptive multiplicative dropout. + def adrop(x): + if adaptive_dropout != 0: + s = [tf.shape(x)[0], x.shape[1]] + [1] * (x.shape.rank - 2) + x *= tf.cast(tf.exp(tf.random_normal(s) * (augment_strength * adaptive_dropout)), x.dtype) + return x + + # Freeze-D. + cur_layer_idx = 0 + def is_next_layer_trainable(): + nonlocal cur_layer_idx + trainable = (cur_layer_idx >= freeze_layers) + cur_layer_idx += 1 + return trainable + + # Construct PA-GAN bit vector. + pagan_bits = None + pagan_signs = None + if use_pagan: + with tf.variable_scope('PAGAN'): + idx = tf.range(pagan_num, dtype=tf.float32) + active = (augment_strength * pagan_num - idx - 1) / max(pagan_fade, 1e-8) + 1 + prob = tf.clip_by_value(active[np.newaxis, :], 0, 1) * 0.5 + rnd = tf.random_uniform([tf.shape(images_in)[0], pagan_num]) + pagan_bits = tf.cast(rnd < prob, dtype=tf.float32) + pagan_signs = tf.reduce_prod(1 - pagan_bits * 2, axis=1, keepdims=True) + + # FromRGB block. + def fromrgb(x, y, res): # res = 2..resolution_log2 + with tf.variable_scope('FromRGB'): + trainable = is_next_layer_trainable() + t = tf.cast(y, 'float16' if res > resolution_log2 - num_fp16_res else dtype) + t = adrop(conv2d_layer(t, fmaps=nf(res-1), kernel=1, trainable=trainable)) + if pagan_bits is not None: + with tf.variable_scope('PAGAN'): + t += dense_layer(tf.cast(pagan_bits, t.dtype), fmaps=nf(res-1), trainable=trainable)[:, :, np.newaxis, np.newaxis] + t = apply_bias_act(t, act=act, clamp=conv_clamp, trainable=trainable) + if x is not None: + t += tf.cast(x, t.dtype) + return t + + # Main block for one resolution. + def block(x, res): # res = 2..resolution_log2 + x = tf.cast(x, 'float16' if res > resolution_log2 - num_fp16_res else dtype) + t = x + with tf.variable_scope('Conv0'): + trainable = is_next_layer_trainable() + x = apply_bias_act(adrop(conv2d_layer(x, fmaps=nf(res-1), kernel=3, trainable=trainable, use_spectral_norm=use_spectral_norm)), act=act, clamp=conv_clamp, trainable=trainable) + with tf.variable_scope('Conv1_down'): + trainable = is_next_layer_trainable() + x = apply_bias_act(adrop(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel, trainable=trainable, use_spectral_norm=use_spectral_norm)), act=act, clamp=conv_clamp, trainable=trainable) + if architecture == 'resnet': + with tf.variable_scope('Skip'): + trainable = is_next_layer_trainable() + t = adrop(conv2d_layer(t, fmaps=nf(res-2), kernel=1, down=True, resample_kernel=resample_kernel, trainable=trainable)) + x = (x + t) * (1 / np.sqrt(2)) + return x + + # Downsampling block. + def downsample(y): + with tf.variable_scope('Downsample'): + return downsample_2d(y, k=resample_kernel) + + # Layers for >=8x8 resolutions. + x = None + y = images_in + for res in range(resolution_log2, 2, -1): + with tf.variable_scope(f'{2**res}x{2**res}'): + if architecture == 'skip' or res == resolution_log2: + x = fromrgb(x, y, res) + x = block(x, res) + if architecture == 'skip': + y = downsample(y) + + # Layers for 4x4 resolution. + with tf.variable_scope('4x4'): + if architecture == 'skip': + x = fromrgb(x, y, 2) + x = tf.cast(x, dtype) + if mbstd_num_features > 0: + with tf.variable_scope('MinibatchStddev'): + x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) + with tf.variable_scope('Conv'): + trainable = is_next_layer_trainable() + x = apply_bias_act(adrop(conv2d_layer(x, fmaps=nf(1), kernel=3, trainable=trainable, use_spectral_norm=use_spectral_norm)), act=act, clamp=conv_clamp, trainable=trainable) + with tf.variable_scope('Dense0'): + trainable = is_next_layer_trainable() + x = apply_bias_act(adrop(dense_layer(x, fmaps=nf(0), trainable=trainable)), act=act, trainable=trainable) + + # Output layer (always trainable). + with tf.variable_scope('Output'): + if label_size > 0: + assert score_max == 1 + x = apply_bias_act(dense_layer(x, fmaps=mapping_fmaps)) + x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True) / np.sqrt(mapping_fmaps) + else: + x = apply_bias_act(dense_layer(x, fmaps=score_max)) + if pagan_signs is not None: + assert score_max == 1 + x *= pagan_signs + scores_out = x[:, :score_size] + + # Output. + assert scores_out.dtype == tf.as_dtype(dtype) + scores_out = tf.identity(scores_out, name='scores_out') + return scores_out + +#---------------------------------------------------------------------------- diff --git a/training/training_loop.py b/training/training_loop.py new file mode 100755 index 00000000..f70c11f8 --- /dev/null +++ b/training/training_loop.py @@ -0,0 +1,326 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Main training loop.""" + +import os +import pickle +import time +import PIL.Image +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.autosummary import autosummary + +from training import dataset + +#---------------------------------------------------------------------------- +# Select size and contents of the image snapshot grids that are exported +# periodically during training. + +def setup_snapshot_image_grid(training_set): + gw = np.clip(7680 // training_set.shape[2], 7, 32) + gh = np.clip(4320 // training_set.shape[1], 4, 32) + + # Unconditional. + if training_set.label_size == 0: + reals, labels = training_set.get_minibatch_np(gw * gh) + return (gw, gh), reals, labels + + # Row per class. + cw, ch = (gw, 1) + nw = (gw - 1) // cw + 1 + nh = (gh - 1) // ch + 1 + + # Collect images. + blocks = [[] for _i in range(nw * nh)] + for _iter in range(1000000): + real, label = training_set.get_minibatch_np(1) + idx = np.argmax(label[0]) + while idx < len(blocks) and len(blocks[idx]) >= cw * ch: + idx += training_set.label_size + if idx < len(blocks): + blocks[idx].append((real, label)) + if all(len(block) >= cw * ch for block in blocks): + break + + # Layout grid. + reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype) + labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype) + for i, block in enumerate(blocks): + for j, (real, label) in enumerate(block): + x = (i % nw) * cw + j % cw + y = (i // nw) * ch + j // cw + if x < gw and y < gh: + reals[x + y * gw] = real[0] + labels[x + y * gw] = label[0] + return (gw, gh), reals, labels + +#---------------------------------------------------------------------------- + +def save_image_grid(images, filename, drange, grid_size): + lo, hi = drange + gw, gh = grid_size + images = np.asarray(images, dtype=np.float32) + images = (images - lo) * (255 / (hi - lo)) + images = np.rint(images).clip(0, 255).astype(np.uint8) + _N, C, H, W = images.shape + images = images.reshape(gh, gw, C, H, W) + images = images.transpose(0, 3, 1, 4, 2) + images = images.reshape(gh * H, gw * W, C) + PIL.Image.fromarray(images, {3: 'RGB', 1: 'L'}[C]).save(filename) + +#---------------------------------------------------------------------------- +# Main training script. + +def training_loop( + run_dir = '.', # Output directory. + G_args = {}, # Options for generator network. + D_args = {}, # Options for discriminator network. + G_opt_args = {}, # Options for generator optimizer. + D_opt_args = {}, # Options for discriminator optimizer. + loss_args = {}, # Options for loss function. + train_dataset_args = {}, # Options for dataset to train with. + metric_dataset_args = {}, # Options for dataset to evaluate metrics against. + augment_args = {}, # Options for adaptive augmentations. + metric_arg_list = [], # Metrics to evaluate during training. + num_gpus = 1, # Number of GPUs to use. + minibatch_size = 32, # Global minibatch size. + minibatch_gpu = 4, # Number of samples processed at a time by one GPU. + G_smoothing_kimg = 10, # Half-life of the exponential moving average (EMA) of generator weights. + G_smoothing_rampup = None, # EMA ramp-up coefficient. + minibatch_repeats = 4, # Number of minibatches to run in the inner loop. + lazy_regularization = True, # Perform regularization as a separate training step? + G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False. + D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False. + total_kimg = 25000, # Total length of the training, measured in thousands of real images. + kimg_per_tick = 4, # Progress snapshot interval. + image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'. + network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'. + resume_pkl = None, # Network pickle to resume training from. + abort_fn = None, # Callback function for determining whether to abort training. + progress_fn = None, # Callback function for updating training progress. +): + assert minibatch_size % (num_gpus * minibatch_gpu) == 0 + start_time = time.time() + + print('Loading training set...') + training_set = dataset.load_dataset(**train_dataset_args) + print('Image shape:', np.int32(training_set.shape).tolist()) + print('Label shape:', [training_set.label_size]) + print() + + print('Constructing networks...') + with tf.device('/gpu:0'): + G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args) + D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args) + Gs = G.clone('Gs') + if resume_pkl is not None: + print(f'Resuming from "{resume_pkl}"') + with dnnlib.util.open_url(resume_pkl) as f: + rG, rD, rGs = pickle.load(f) + G.copy_vars_from(rG) + D.copy_vars_from(rD) + Gs.copy_vars_from(rGs) + G.print_layers() + D.print_layers() + + print('Exporting sample images...') + grid_size, grid_reals, grid_labels = setup_snapshot_image_grid(training_set) + save_image_grid(grid_reals, os.path.join(run_dir, 'reals.png'), drange=[0,255], grid_size=grid_size) + grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:]) + grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=minibatch_gpu) + save_image_grid(grid_fakes, os.path.join(run_dir, 'fakes_init.png'), drange=[-1,1], grid_size=grid_size) + + print(f'Replicating networks across {num_gpus} GPUs...') + G_gpus = [G] + D_gpus = [D] + for gpu in range(1, num_gpus): + with tf.device(f'/gpu:{gpu}'): + G_gpus.append(G.clone(f'{G.name}_gpu{gpu}')) + D_gpus.append(D.clone(f'{D.name}_gpu{gpu}')) + + print('Initializing augmentations...') + aug = None + if augment_args.get('class_name', None) is not None: + aug = dnnlib.util.construct_class_by_name(**augment_args) + aug.init_validation_set(D_gpus=D_gpus, training_set=training_set) + + print('Setting up optimizers...') + G_opt_args = dict(G_opt_args) + D_opt_args = dict(D_opt_args) + for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]: + args['minibatch_multiplier'] = minibatch_size // num_gpus // minibatch_gpu + if lazy_regularization: + mb_ratio = reg_interval / (reg_interval + 1) + args['learning_rate'] *= mb_ratio + if 'beta1' in args: args['beta1'] **= mb_ratio + if 'beta2' in args: args['beta2'] **= mb_ratio + G_opt = tflib.Optimizer(name='TrainG', **G_opt_args) + D_opt = tflib.Optimizer(name='TrainD', **D_opt_args) + G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args) + D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args) + + print('Constructing training graph...') + data_fetch_ops = [] + training_set.configure(minibatch_gpu) + for gpu, (G_gpu, D_gpu) in enumerate(zip(G_gpus, D_gpus)): + with tf.name_scope(f'Train_gpu{gpu}'), tf.device(f'/gpu:{gpu}'): + + # Fetch training data via temporary variables. + with tf.name_scope('DataFetch'): + real_images_var = tf.Variable(name='images', trainable=False, initial_value=tf.zeros([minibatch_gpu] + training_set.shape)) + real_labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([minibatch_gpu, training_set.label_size])) + real_images_write, real_labels_write = training_set.get_minibatch_tf() + real_images_write = tflib.convert_images_from_uint8(real_images_write) + data_fetch_ops += [tf.assign(real_images_var, real_images_write)] + data_fetch_ops += [tf.assign(real_labels_var, real_labels_write)] + + # Evaluate loss function and register gradients. + fake_labels = training_set.get_random_labels_tf(minibatch_gpu) + terms = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, aug=aug, fake_labels=fake_labels, real_images=real_images_var, real_labels=real_labels_var, **loss_args) + if lazy_regularization: + if terms.G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(terms.G_reg * G_reg_interval), G_gpu.trainables) + if terms.D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(terms.D_reg * D_reg_interval), D_gpu.trainables) + else: + if terms.G_reg is not None: terms.G_loss += terms.G_reg + if terms.D_reg is not None: terms.D_loss += terms.D_reg + G_opt.register_gradients(tf.reduce_mean(terms.G_loss), G_gpu.trainables) + D_opt.register_gradients(tf.reduce_mean(terms.D_loss), D_gpu.trainables) + + print('Finalizing training ops...') + data_fetch_op = tf.group(*data_fetch_ops) + G_train_op = G_opt.apply_updates() + D_train_op = D_opt.apply_updates() + G_reg_op = G_reg_opt.apply_updates(allow_no_op=True) + D_reg_op = D_reg_opt.apply_updates(allow_no_op=True) + Gs_beta_in = tf.placeholder(tf.float32, name='Gs_beta_in', shape=[]) + Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta_in) + tflib.init_uninitialized_vars() + with tf.device('/gpu:0'): + peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse() + + print('Initializing metrics...') + summary_log = tf.summary.FileWriter(run_dir) + metrics = [] + for args in metric_arg_list: + metric = dnnlib.util.construct_class_by_name(**args) + metric.configure(dataset_args=metric_dataset_args, run_dir=run_dir) + metrics.append(metric) + + print(f'Training for {total_kimg} kimg...') + print() + if progress_fn is not None: + progress_fn(0, total_kimg) + tick_start_time = time.time() + maintenance_time = tick_start_time - start_time + cur_nimg = 0 + cur_tick = -1 + tick_start_nimg = cur_nimg + running_mb_counter = 0 + + done = False + while not done: + + # Compute EMA decay parameter. + Gs_nimg = G_smoothing_kimg * 1000.0 + if G_smoothing_rampup is not None: + Gs_nimg = min(Gs_nimg, cur_nimg * G_smoothing_rampup) + Gs_beta = 0.5 ** (minibatch_size / max(Gs_nimg, 1e-8)) + + # Run training ops. + for _repeat_idx in range(minibatch_repeats): + rounds = range(0, minibatch_size, minibatch_gpu * num_gpus) + run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0) + run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0) + cur_nimg += minibatch_size + running_mb_counter += 1 + + # Fast path without gradient accumulation. + if len(rounds) == 1: + tflib.run([G_train_op, data_fetch_op]) + if run_G_reg: + tflib.run(G_reg_op) + tflib.run([D_train_op, Gs_update_op], {Gs_beta_in: Gs_beta}) + if run_D_reg: + tflib.run(D_reg_op) + + # Slow path with gradient accumulation. + else: + for _round in rounds: + tflib.run(G_train_op) + if run_G_reg: + tflib.run(G_reg_op) + tflib.run(Gs_update_op, {Gs_beta_in: Gs_beta}) + for _round in rounds: + tflib.run(data_fetch_op) + tflib.run(D_train_op) + if run_D_reg: + tflib.run(D_reg_op) + + # Run validation. + if aug is not None: + aug.run_validation(minibatch_size=minibatch_size) + + # Tune augmentation parameters. + if aug is not None: + aug.tune(minibatch_size * minibatch_repeats) + + # Perform maintenance tasks once per tick. + done = (cur_nimg >= total_kimg * 1000) or (abort_fn is not None and abort_fn()) + if done or cur_tick < 0 or cur_nimg >= tick_start_nimg + kimg_per_tick * 1000: + cur_tick += 1 + tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 + tick_start_nimg = cur_nimg + tick_end_time = time.time() + total_time = tick_end_time - start_time + tick_time = tick_end_time - tick_start_time + + # Report progress. + print(' '.join([ + f"tick {autosummary('Progress/tick', cur_tick):<5d}", + f"kimg {autosummary('Progress/kimg', cur_nimg / 1000.0):<8.1f}", + f"time {dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)):<12s}", + f"sec/tick {autosummary('Timing/sec_per_tick', tick_time):<7.1f}", + f"sec/kimg {autosummary('Timing/sec_per_kimg', tick_time / tick_kimg):<7.2f}", + f"maintenance {autosummary('Timing/maintenance_sec', maintenance_time):<6.1f}", + f"gpumem {autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30):<5.1f}", + f"augment {autosummary('Progress/augment', aug.strength if aug is not None else 0):.3f}", + ])) + autosummary('Timing/total_hours', total_time / (60.0 * 60.0)) + autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0)) + if progress_fn is not None: + progress_fn(cur_nimg // 1000, total_kimg) + + # Save snapshots. + if image_snapshot_ticks is not None and (done or cur_tick % image_snapshot_ticks == 0): + grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=minibatch_gpu) + save_image_grid(grid_fakes, os.path.join(run_dir, f'fakes{cur_nimg // 1000:06d}.png'), drange=[-1,1], grid_size=grid_size) + if network_snapshot_ticks is not None and (done or cur_tick % network_snapshot_ticks == 0): + pkl = os.path.join(run_dir, f'network-snapshot-{cur_nimg // 1000:06d}.pkl') + with open(pkl, 'wb') as f: + pickle.dump((G, D, Gs), f) + if len(metrics): + print('Evaluating metrics...') + for metric in metrics: + metric.run(pkl, num_gpus=num_gpus) + + # Update summaries. + for metric in metrics: + metric.update_autosummaries() + tflib.autosummary.save_summaries(summary_log, cur_nimg) + tick_start_time = time.time() + maintenance_time = tick_start_time - tick_end_time + + print() + print('Exiting...') + summary_log.close() + training_set.close() + +#----------------------------------------------------------------------------