diff --git a/mseg_semantic/config/train/1080_release/mseg-mgda.yaml b/mseg_semantic/config/train/1080_release/mseg-mgda.yaml new file mode 100755 index 0000000..b1210cb --- /dev/null +++ b/mseg_semantic/config/train/1080_release/mseg-mgda.yaml @@ -0,0 +1,67 @@ +# difference with normal mseg.yaml is "use_apex: False", since apex model does not support model.no_sync() +DATA: + dataset: [ + ade20k-150-relabeled, + bdd-relabeled, + cityscapes-19-relabeled, + coco-panoptic-133-relabeled, + idd-39-relabeled, + mapillary-public65-relabeled, + sunrgbd-37-relabeled] + universal: True + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: False + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 713 + train_w: 713 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 1080 + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 1000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6] + dataset_gpu_mapping: { + 'ade20k-150-relabeled': [0], + 'bdd-relabeled': [1], + 'cityscapes-19-relabeled': [2], + 'coco-panoptic-133-relabeled': [3], + 'idd-39-relabeled': [4], + 'mapillary-public65-relabeled': [5], + 'sunrgbd-37-relabeled': [6], + } + workers: 64 # data loader workers + batch_size: 35 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + auto_resume: None # xx + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: False + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/1080_release/mseg-naive-baseline.yaml b/mseg_semantic/config/train/1080_release/mseg-naive-baseline.yaml new file mode 100755 index 0000000..0daa916 --- /dev/null +++ b/mseg_semantic/config/train/1080_release/mseg-naive-baseline.yaml @@ -0,0 +1,66 @@ +DATA: + dataset: [ + ade20k-150, + bdd, + cityscapes-19, + coco-panoptic-133, + idd-39, + mapillary-public65, + sunrgbd-37] + universal: True + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: True + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 713 + train_w: 713 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 1080 + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 1000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6] + dataset_gpu_mapping: { + 'ade20k-150': [0], + 'bdd': [1], + 'cityscapes-19': [2], + 'coco-panoptic-133': [3], + 'idd-39': [4], + 'mapillary-public65': [5], + 'sunrgbd-37': [6], + } + workers: 64 # data loader workers + batch_size: 28 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + auto_resume: None # xx + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/1080_release/mseg-relabeled-1m.yaml b/mseg_semantic/config/train/1080_release/mseg-relabeled-1m.yaml new file mode 100755 index 0000000..e01d506 --- /dev/null +++ b/mseg_semantic/config/train/1080_release/mseg-relabeled-1m.yaml @@ -0,0 +1,66 @@ +DATA: + dataset: [ + ade20k-150-relabeled, + bdd-relabeled, + cityscapes-19-relabeled, + coco-panoptic-133-relabeled, + idd-39-relabeled, + mapillary-public65-relabeled, + sunrgbd-37-relabeled] + universal: True + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: False + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 713 + train_w: 713 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 1080 # image resolution is 1080p at training + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 1000000 # 1 Million crops per dataset is default training duration + train_gpu: [0, 1, 2, 3, 4, 5, 6] + dataset_gpu_mapping: { + 'ade20k-150-relabeled': [0], + 'bdd-relabeled': [1], + 'cityscapes-19-relabeled': [2], + 'coco-panoptic-133-relabeled': [3], + 'idd-39-relabeled': [4], + 'mapillary-public65-relabeled': [5], + 'sunrgbd-37-relabeled': [6] + } + workers: 64 # data loader workers + batch_size: 14 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + auto_resume: None # xx + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/1080_release/mseg-relabeled-3m.yaml b/mseg_semantic/config/train/1080_release/mseg-relabeled-3m.yaml new file mode 100755 index 0000000..0eca840 --- /dev/null +++ b/mseg_semantic/config/train/1080_release/mseg-relabeled-3m.yaml @@ -0,0 +1,66 @@ +DATA: + dataset: [ + ade20k-150-relabeled, + bdd-relabeled, + cityscapes-19-relabeled, + coco-panoptic-133-relabeled, + idd-39-relabeled, + mapillary-public65-relabeled, + sunrgbd-37-relabeled] + universal: True + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: False + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 713 + train_w: 713 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 1080 # image resolution is 1080p for training + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 3000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6] + dataset_gpu_mapping: { + 'ade20k-150-relabeled': [0], + 'bdd-relabeled': [1], + 'cityscapes-19-relabeled': [2], + 'coco-panoptic-133-relabeled': [3], + 'idd-39-relabeled': [4], + 'mapillary-public65-relabeled': [5], + 'sunrgbd-37-relabeled': [6], + } + workers: 64 # data loader workers + batch_size: 35 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + auto_resume: None # xx + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/1080_release/mseg-unrelabeled.yaml b/mseg_semantic/config/train/1080_release/mseg-unrelabeled.yaml new file mode 100755 index 0000000..652d3bb --- /dev/null +++ b/mseg_semantic/config/train/1080_release/mseg-unrelabeled.yaml @@ -0,0 +1,67 @@ +DATA: + dataset: [ + ade20k-150, + bdd, + cityscapes-19, + coco-panoptic-133, + idd-39, + mapillary-public65, + sunrgbd-37] + universal: True + use_multiple_datasets: True + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: False + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 713 + train_w: 713 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 1080 + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 1000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6] + dataset_gpu_mapping: { + 'ade20k-150': [0], + 'bdd': [1], + 'cityscapes-19': [2], + 'coco-panoptic-133': [3], + 'idd-39': [4], + 'mapillary-public65': [5], + 'sunrgbd-37': [6], + } + workers: 64 # data loader workers + batch_size: 35 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + auto_resume: None # xx + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/1080_release/single_oracle.yaml b/mseg_semantic/config/train/1080_release/single_oracle.yaml new file mode 100755 index 0000000..05be94f --- /dev/null +++ b/mseg_semantic/config/train/1080_release/single_oracle.yaml @@ -0,0 +1,53 @@ +DATA: + dataset: single + universal: False + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: False + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 713 + train_w: 713 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 1080 + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 1000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6, 7] + dataset_gpu_mapping: { + 'single': [0, 1, 2, 3, 4, 5, 6, 7], + } + workers: 32 # data loader workers + batch_size: 32 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + auto_resume: None + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/1080_release/single_universal.yaml b/mseg_semantic/config/train/1080_release/single_universal.yaml new file mode 100755 index 0000000..7573799 --- /dev/null +++ b/mseg_semantic/config/train/1080_release/single_universal.yaml @@ -0,0 +1,54 @@ +DATA: + dataset: single + universal: True + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: False + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 713 + train_w: 713 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 1080 + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 1000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6, 7] + dataset_gpu_mapping: { + 'single': [0, 1, 2, 3, 4, 5, 6, 7], + } + workers: 32 # data loader workers + batch_size: 32 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + # path to initial weight (default: none) + init_model_path: /home/zhuangli/useful_home/john_v2/real_world_segmentation/zhuang/pretrained_models/hrnetv2_w48_imagenet_pretrained.pth + resume: # path to latest checkpoint (default: none) + auto_resume: None + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/480_release/mseg-3m.yaml b/mseg_semantic/config/train/480_release/mseg-3m.yaml new file mode 100755 index 0000000..ec90a92 --- /dev/null +++ b/mseg_semantic/config/train/480_release/mseg-3m.yaml @@ -0,0 +1,67 @@ +DATA: + dataset: [ + ade20k-150-relabeled, + bdd-relabeled, + cityscapes-19-relabeled, + coco-panoptic-133-relabeled, + idd-39-relabeled, + mapillary-public65-relabeled, + sunrgbd-37-relabeled] + universal: True + use_multiple_datasets: True + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: False + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 473 + train_w: 473 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 480 + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 3000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6] + dataset_gpu_mapping: { + 'ade20k-150-relabeled': [0], + 'bdd-relabeled': [1], + 'cityscapes-19-relabeled': [2], + 'coco-panoptic-133-relabeled': [3], + 'idd-39-relabeled': [4], + 'mapillary-public65-relabeled': [5], + 'sunrgbd-37-relabeled': [6], + } + workers: 64 # data loader workers + batch_size: 84 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + auto_resume: None # xx + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/720_release/mseg-3m.yaml b/mseg_semantic/config/train/720_release/mseg-3m.yaml new file mode 100755 index 0000000..658ddf6 --- /dev/null +++ b/mseg_semantic/config/train/720_release/mseg-3m.yaml @@ -0,0 +1,67 @@ +DATA: + dataset: [ + ade20k-150-relabeled, + bdd-relabeled, + cityscapes-19-relabeled, + coco-panoptic-133-relabeled, + idd-39-relabeled, + mapillary-public65-relabeled, + sunrgbd-37-relabeled] + universal: True + use_multiple_datasets: True + use_mgda: False # to be determined at argument + +TRAIN: + use_naive_taxonomy: False + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 593 + train_w: 593 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 720 + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 3000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6] + dataset_gpu_mapping: { + 'ade20k-150-relabeled': [0], + 'bdd-relabeled': [1], + 'cityscapes-19-relabeled': [2], + 'coco-panoptic-133-relabeled': [3], + 'idd-39-relabeled': [4], + 'mapillary-public65-relabeled': [5], + 'sunrgbd-37-relabeled': [6], + } + workers: 64 # data loader workers + batch_size: 49 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + auto_resume: None # xx + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/config/train/test.yaml b/mseg_semantic/config/train/test.yaml new file mode 100755 index 0000000..58e80e0 --- /dev/null +++ b/mseg_semantic/config/train/test.yaml @@ -0,0 +1,60 @@ +DATA: + dataset: [coco-panoptic-v1, mapillary, ade20k-v1, sunrgbd-37, idd-new, cityscapes, bdd] + universal: True + use_multiple_datasets: True + use_mgda: False # to be determined at argument + finetune: False + +TRAIN: + tax_version: 3.0 + arch: hrnet + network_name: + layers: + sync_bn: True # adopt sync_bn or not + train_h: 713 + train_w: 713 + scale_min: 0.5 # minimum random scale + scale_max: 2.0 # maximum random scale + short_size: 1080 + rotate_min: -10 # minimum random rotate + rotate_max: 10 # maximum random rotate + zoom_factor: 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label: 255 + aux_weight: 0.4 + num_examples: 2000000 + train_gpu: [0, 1, 2, 3, 4, 5, 6] + dataset_gpu_mapping: { + 'coco-panoptic-v1':[0], + 'mapillary': [1], + 'ade20k-v1': [2], + 'idd-new': [3], + 'cityscapes': [4], + 'sunrgbd-37': [5], + 'bdd': [6], + } + workers: 32 # data loader workers + batch_size: 32 # batch size for training + batch_size_val: 1 # batch size for validation during training, memory and speed tradeoff + base_lr: 0.01 + epochs: 10 + start_epoch: 0 + power: 0.9 + momentum: 0.9 + weight_decay: 0.0001 + manual_seed: + print_freq: 10 + save_freq: 1 + save_path: default + weight: # path to initial weight (default: none) + resume: # path to latest checkpoint (default: none) + evaluate: False # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend +Distributed: + dist_url: tcp://127.0.0.1:6795 + dist_backend: 'nccl' + multiprocessing_distributed: True + world_size: 1 + rank: 0 + use_apex: True + opt_level: 'O0' + keep_batchnorm_fp32: + loss_scale: diff --git a/mseg_semantic/domain_generalization/README.md b/mseg_semantic/domain_generalization/README.md new file mode 100755 index 0000000..608581d --- /dev/null +++ b/mseg_semantic/domain_generalization/README.md @@ -0,0 +1,35 @@ + +## Domain Generalization (DG) Implementation + +As discussed in the [MSeg paper](), we apply a state-of-the-art Domain Generalization (DG) algorithm [1] to MSeg which uses the Classification and Contrastive Semantic Alignment (CCSA) loss. We find that this DG technique seems to hurt performance significantly compared with our technique. + +Suppose we have a deep network h(g(X)), where g(·) is feature extractor, and h(·) is classifier. For context, CCSA ensures sure that the embedding function g(·) maps to a domain invariant space. To do so, we consider every distinct unordered pair of source domains (u, v), and impose the semantic alignment loss as well as the separation loss. + +We adapt the DG technique proposed for the image classification task in [1] to semantic segmentation as follows: +- We add no new parameters to PSPNet, but simply add a contrastive loss. +- We feed a minibatch of 128 crops X through g(·), the ResNet backbone of a PSPNet. We then sample N positive pairs of feature map embeddings, corresponding to an 8 × 8 pixel +region per feature map location, and 3N negative pairs. In our experiments, we set N = 1000 or N = 100. +- We choose these 4N pairs by **first** sampling uniformly randomly **from domains**, and **subsequently sampling uniformly randomly from pixel locations** available in each input crop. +- When N > 1000 with a batch size of 128, CUDA memory is insufficient to compute the Euclidean distances between embeddings, forcing us to use N = 1000. In order to determine positive or negative pairs, we downsample the ground truth label map by 8x with ‘nearest’ interpolation +and then compare the corresponding ground truth semantic class of feature map locations. In such a way, we identify N pairs of embeddings that belong to the same semantic class. + +### Differences from [Original Implementation](https://github.com/samotiian/CCSA) + +Our implementation differs from [1] in the following ways: +1. We sample pairs on the fly, instead of choosing fixed pairs for each epoch in advance. +2. We sample uniformly randomly an image crop from all domains first, then sample uniformly from pixel locations in each image crop. Finding evenly-balanced pairs from each class would +require sampling a very large number of pairs (perhaps billions, since we observe 10^5 times more density in the most populous MSeg class vs. the least populous class). +3. We compute classification loss over all pixel locations and the contrastive loss only +over sampled pixel locations, whereas [1] computed classification loss only over sampled pairs. +4. We use SGD with momentum, a standard optimization technique for PSPNet, rather than using Adadelta. +5. We use a ResNet backbone instead of a VGG backbone for the feature extractor. + +### Code Structure + +The implementation is found in the following files: +- `ccsa_utils.py`: Tools for sampling pairs for a contrastive loss. +- `ccsa_pspnet.py`: PSPNet model architecture with contrastive loss added before PPM. +- `ccsa_data.py`: Pytorch dataloader to form minibatch with uniform sampling from each domain. + +### References +[1] Saeid Motiian, Marco Piccirilli, Donald A. Adjeroh, and Gianfranco Doretto. [Unified deep supervised domain adaptation and generalization.](https://arxiv.org/abs/1709.10190) In The IEEE International Conference on Computer Vision (ICCV), Oct 2017. \ No newline at end of file diff --git a/mseg_semantic/domain_generalization/ccsa_data.py b/mseg_semantic/domain_generalization/ccsa_data.py new file mode 100755 index 0000000..e10df0e --- /dev/null +++ b/mseg_semantic/domain_generalization/ccsa_data.py @@ -0,0 +1,142 @@ +#!/usr/bin/python3 + +import os +import os.path +import cv2 +import numpy as np +import pdb +from torch.utils.data import Dataset +import imageio + +from typing import Any, List, Mapping, Tuple + +from mseg_semantic.utils.dataset import ( + is_image_file, + make_dataset +) + + +""" +Pytorch dataloader class to support domain generalization. + +Get same size as you expect +But domains inside minibatch will be random +""" + + +def append_per_tuple( + dataset_2tuples: List[Tuple[str,str]], + new_val: int + ) -> List[Tuple[str,str,int]]: + """ + Given a list of 2-tuple elements, append to every 2-tuple another fixed + item, such that a list of 3-tuples is returned. + """ + dataset_3tuples = [] + for (val0, val1) in dataset_2tuples: + dataset_3tuples += [(val0,val1,new_val)] + return dataset_3tuples + + +def pad_to_max_sz( + tuple_list: List[Tuple[Any,Any,Any]], + max_sz: int + ) -> List[Tuple[Any,Any,Any]]: + """ + # pad (duplicate) dataset lists of less common datasets. + + Args: + - tuple_list: + - max_sz: + + Returns: + - repeated_data: + """ + repeated_data = [] + while len(repeated_data) < max_sz: + repeated_data.extend(tuple_list) + + # clamp dataset to max dataset length + repeated_data = repeated_data[:max_sz] + assert len(repeated_data) == max_sz + return repeated_data + + +class CCSA_Data(Dataset): + """ """ + def __init__( + self, + split: str='train', + data_roots: Mapping[str,str]=None, + data_lists: Mapping[str,List[Any]]=None, + transform_dict: Mapping[str, Any]=None + ): + """ + Since each dataset requires its own mapping to the universal taxonomy, we + save each such transform/mapping in a dictionary. + + Args: + - split: string representing dataset split + - data_roots: Mapping from dataset name to absolute paths to dataset dirs + - data_lists: Mapping from dataset name to file paths of datasets images + in given split + - transform_dict: Mapping from dataset name to data transform object. + """ + self.split = split + + # Assign an integer ID to each of the separate "domains". + self.domain_idx_map = { + 'coco-panoptic-v1-qvga': 0, + 'mapillary_vistas_comm-qvga': 1, + 'ade20k-v1-qvga': 2 + } + MAX_DATASET_SZ = 118287 # COCO is currently single largest (by #images) + + # data_list contains paths from all domains + self.data_list = [] + for i, dname in enumerate(self.domain_idx_map.keys()): + + # has (rgb_fpath, label_fpath) + dataset_2tuples = make_dataset(split, data_roots[dname], data_lists[dname]) + # now has (rgb_fpath, label_fpath, domain_ID) + dataset_3tuples = append_per_tuple(dataset_2tuples, self.domain_idx_map[dname]) + + repeated_data = pad_to_max_sz(dataset_3tuples, MAX_DATASET_SZ) + self.data_list.extend(repeated_data) + assert len(self.data_list) == MAX_DATASET_SZ * (i+1) + + # should have: num_images = max_dataset_sz * num_domains + assert len(self.data_list) == len(self.domain_idx_map.keys()) * MAX_DATASET_SZ + self.transform_dict = transform_dict + + + def __len__(self): + return len(self.data_list) + + + def __getitem__(self, index): + image_path, label_path, domain_idx = self.data_list[index] + # if 'leftImg8bit' in image_path and ('idd' not in image_path): + # print(image_path, label_path) + # logger.info(image_path + ' ' + label_path) + image = cv2.imread(image_path, cv2.IMREAD_COLOR) # BGR 3 channel ndarray wiht shape H * W * 3 + + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert cv2 read image from BGR order to RGB order + image = np.float32(image) + + label = imageio.imread(label_path) # # GRAY 1 channel ndarray with shape H * W + label = label.astype(np.int64) + + if image.shape[0] != label.shape[0] or image.shape[1] != label.shape[1]: + raise (RuntimeError("Image & label shape mismatch: " + image_path + " " + label_path + "\n")) + + # Each dataset requires its own mapping to the universal taxonomy. + if self.transform_dict is not None: + if self.split != 'test': + image, label = self.transform_dict[domain_idx](image, label) + else: + image, label = self.transform_dict[domain_idx](image, image[:, :, 0]) + + return image, label, domain_idx + + diff --git a/mseg_semantic/domain_generalization/ccsa_pspnet.py b/mseg_semantic/domain_generalization/ccsa_pspnet.py new file mode 100755 index 0000000..656db4c --- /dev/null +++ b/mseg_semantic/domain_generalization/ccsa_pspnet.py @@ -0,0 +1,200 @@ +#!/usr/bin/python3 + +import math +import numpy as np +import os +import pdb +import random +import sys +import time +import torch +from torch import nn +import torch.nn.functional as F + +from mseg_semantic.model.pspnet import PPM +import mseg_semantic.model.resnet as models + +from mseg_semantic.domain_generalization.ccsa_utils import ( + paired_euclidean_distance, + contrastive_loss, + sample_pair_indices, + get_merged_pair_embeddings +) +from mseg_semantic.utils.json_utils import save_json_dict + +""" +Reimplementation of "Unified Deep Supervised Domain Adaptation and Generalization" + +Arxiv: https://arxiv.org/pdf/1709.10190.pdf +Github: https://github.com/samotiian/CCSA + +We take a PSPNet, and add a contrastive loss on its intermediate embeddings. +""" + + +class CCSA_PSPNet(nn.Module): + """ + For the embedding function g, the original authors used the convolutional + layers of the VGG-16 architecture [55] followed by 2 fully + connected layers with output size of 1024 and 128, respectively. + For the prediction function h, they used a fully connected layer with + softmax activation. + + ResNet is our embedding function. Our classifier is PPM + Conv2d layers. + The prediction function should include a softmax function inside of it, + we use 1x1 conv instead of fc layer. + + To create positive and negative pairs for training the network, for each + sample of a source domain the authors randomly selected 5 samples from + each remaining source domain, and help in this way to avoid overfitting. + However, to train a deeper network together with convolutional layers, the + authors state it is enough to create a large amount of positive and + negative pairs. + + We sample each minibatch uniformly from all domains, and then distribute + among workers. Note that we + + Since original authors compute CE loss only on sampled pairs, they compute + CE on A first, then CE on B first, and contrastive loss A->B both times + (with single gradient update after both). We compute CE on all at once. + + CAN TAKE GRADIENT STEPS IN BETWEEN PAIR OF LOSSES, OR AFTER AGGREGATING BOTH LOSSES + forward nad backward + + """ + def __init__(self, layers=50, bins=(1, 2, 3, 6), dropout=0.1, classes=2, zoom_factor=8, use_ppm=True, criterion=nn.CrossEntropyLoss(ignore_index=255), BatchNorm=nn.BatchNorm2d, pretrained=True, network_name=None): + """ + + nn.CrossEntropyLoss() combines nn.LogSoftmax() and nn.NLLLoss() in one single class. + """ + super(CCSA_PSPNet, self).__init__() + assert layers in [50, 101, 152] + assert 2048 % len(bins) == 0 + assert classes > 1 + assert zoom_factor in [1, 2, 4, 8] + self.zoom_factor = zoom_factor + self.use_ppm = use_ppm + self.criterion = criterion + models.BatchNorm = BatchNorm + + if layers == 50: + resnet = models.resnet50(pretrained=pretrained) + elif layers == 101: + resnet = models.resnet101(pretrained=pretrained) + elif layers == 152: + resnet = models.resnet152(pretrained=pretrained) + + + self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.conv2, resnet.bn2, resnet.relu, resnet.conv3, resnet.bn3, resnet.relu, resnet.maxpool) + self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 + + + for n, m in self.layer3.named_modules(): + if 'conv2' in n: + m.dilation, m.padding, m.stride = (2, 2), (2, 2), (1, 1) + elif 'downsample.0' in n: + m.stride = (1, 1) + for n, m in self.layer4.named_modules(): + if 'conv2' in n: + m.dilation, m.padding, m.stride = (4, 4), (4, 4), (1, 1) + elif 'downsample.0' in n: + m.stride = (1, 1) + + fea_dim = 2048 + if use_ppm: + self.ppm = PPM(fea_dim, int(fea_dim/len(bins)), bins, BatchNorm) + fea_dim *= 2 + self.cls = nn.Sequential( + nn.Conv2d(fea_dim, 512, kernel_size=3, padding=1, bias=False), + BatchNorm(512), + nn.ReLU(inplace=True), + nn.Dropout2d(p=dropout), + nn.Conv2d(512, classes, kernel_size=1) + ) + if self.training: + self.aux = nn.Sequential( + nn.Conv2d(1024, 256, kernel_size=3, padding=1, bias=False), + BatchNorm(256), + nn.ReLU(inplace=True), + nn.Dropout2d(p=dropout), + nn.Conv2d(256, classes, kernel_size=1) + ) + + def forward( + self, + x: torch.Tensor, + y: torch.Tensor=None, + batch_domain_idxs: torch.Tensor=None, + alpha: float = 0.25, + num_pos_pairs: int=100): + """ + Forward pass. + + Args: + - x: Tensor of shape (N,C,H,W) + - y: Tensor of shape (N,H,W) + - batch_domain_idxs: Tensor of shape (N,) with domain ID + of each minibatch example. + - alpha: float acting as multiplier on contrastive loss + (convex combination) + - num_pos_pairs: number of pairs to use in contrastive loss + + Returns: + - logits + - main_loss + - aux_ce_loss + """ + x_size = x.size() + assert (x_size[2]-1) % 8 == 0 and (x_size[3]-1) % 8 == 0 + h = int((x_size[2] - 1) / 8 * self.zoom_factor + 1) + w = int((x_size[3] - 1) / 8 * self.zoom_factor + 1) + + x = self.layer0(x) # get 128 channels, 4x downsample in H/W + x = self.layer1(x) # get 256 channels, H/W constant + x = self.layer2(x) # get 512 channels, 2x additional downsample in H/W + x_tmp = self.layer3(x) # get 1024 channels, H/W constant + x = self.layer4(x_tmp) # get 2048 channels, H/W constant + + resnet_embedding = x.clone() + + if self.use_ppm: + x = self.ppm(x) # get 4096 channels from channel concat, H/W constant + x = self.cls(x) # get n_classes channels, H/W constant + if self.zoom_factor != 1: # get n_classes channels, back to input crop H/W (8x) + x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=True) + + if self.training: + aux = self.aux(x_tmp) # get n_classes channels, with 1/8 input crop H/W + if self.zoom_factor != 1: + aux = F.interpolate(aux, size=(h, w), mode='bilinear', align_corners=True) + + # ---- CCSA addition ----- + main_ce_loss = self.criterion(x, y) + aux_ce_loss = self.criterion(aux, y) + + pos_pair_info, neg_pair_info = sample_pair_indices( + y.type(torch.float32), # label map must be floats to use F.interpolate() + batch_domain_idxs, + num_pos_pairs = num_pos_pairs, + neg_to_pos_ratio = 3, + downsample_factor = 8) + + # y_c indicates if class indices are identical (examples are semantic pairs) + y_c, a_embedding, b_embedding = get_merged_pair_embeddings(pos_pair_info, neg_pair_info, resnet_embedding) + + dists = paired_euclidean_distance(a_embedding, b_embedding) + csa_loss = contrastive_loss(y_c, dists) + + # To balance the classification versus the contrastive semantic + # alignment portion of the loss (5), (7) and (8) are normalized + # and weighted by (1-alpha) and by alpha + main_loss = csa_loss * (alpha) + main_ce_loss * (1-alpha) + aux_ce_loss *= (1-alpha) + # ---- CCSA addition ----- + + + return x.max(1)[1], main_loss, aux_ce_loss + else: + return x + + diff --git a/mseg_semantic/domain_generalization/ccsa_train.py b/mseg_semantic/domain_generalization/ccsa_train.py new file mode 100755 index 0000000..96ee871 --- /dev/null +++ b/mseg_semantic/domain_generalization/ccsa_train.py @@ -0,0 +1,805 @@ +#!/usr/bin/python3 + +import time +start = time.time() +# time.sleep(2) + +import apex +# import cv2 + + + +# import math +# import numpy as np +# import os +# import pdb +# import random + +# from taxonomy.utils_flat import * + + + + + + + +# end = time.time() +# print(end - start) + + +""" +TODO: GET THE MODELS TRAINING, THEN GO BACK LATER AND WRITE THE +UNIT TESTS FOR TAXONOMY CONVERTER + +Should have fixed ratios --> then experiment with it. + +Train w/ MGDA +Train w/o MGDA. +Get results on the training set as well. + +Submit the jobs first -- for all training/test sets. + +fix the max_iters -- 1.2 Million examples + +make sure we have the right flags to evaluate on the train dataset. +""" + +""" +NVIDIA Apex has 4 optimization levels: + + O0 (FP32 training): basically a no-op. Everything is FP32 just as before. + O1 (Conservative Mixed Precision): only some whitelist ops are done in FP16. + O2 (Fast Mixed Precision): this is the standard mixed precision training. + It maintains FP32 master weights and optimizer.step acts directly on the FP32 master weights. + O3 (FP16 training): full FP16. Passing keep_batchnorm_fp32=True can speed + things up as cudnn batchnorm is faster anyway. +""" + + + +class ToFlatLabel(object): + def __init__(self, tc_init, dataset): + self.dataset = dataset + self.tc = tc_init + + def __call__(self, image, label): + return image, self.tc.transform_label(label, self.dataset) + +# cv2.ocl.setUseOpenCL(False) +# cv2.setNumThreads(0) + + +def get_parser(): + import argparse + from util import config + + parser = argparse.ArgumentParser(description='PyTorch Semantic Segmentation') + parser.add_argument('--config', type=str, default='config/ade20k/ade20k_pspnet50.yaml', help='config file') + parser.add_argument('opts', help='see config/ade20k/ade20k_pspnet50.yaml for all options', default=None, nargs=argparse.REMAINDER) + args = parser.parse_args() + assert args.config is not None + cfg = config.load_cfg_from_cfg_file(args.config) + if args.opts is not None: + cfg = config.merge_cfg_from_list(cfg, args.opts) + return cfg + + +def get_logger(): + import logging + logger_name = "main-logger" + logger = logging.getLogger(logger_name) + logger.setLevel(logging.INFO) + handler = logging.StreamHandler() + fmt = "[%(asctime)s %(levelname)s %(filename)s line %(lineno)d %(process)d] %(message)s" + handler.setFormatter(logging.Formatter(fmt)) + logger.addHandler(handler) + return logger + + +def worker_init_fn(worker_id): + import random + random.seed(args.manual_seed + worker_id) + + +def main_process(): + return not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % args.ngpus_per_node == 0) + + +def main(): + """ + """ + # with open('test_2.txt', 'a') as f: + # f.write('test') + # f.close() + import torch, os, math + import torch.backends.cudnn as cudnn + import torch.nn as nn + import torch.nn.functional as F + import torch.nn.parallel + import torch.optim + import torch.utils.data + + import torch.multiprocessing as mp + import torch.distributed as dist +# from tensorboardX import SummaryWriter + from util.dataset_config import infos + + from util import config + from util.verification_utils import verify_architecture + from util.avg_meter import AverageMeter, SegmentationAverageMeter + from taxonomy.utils_flat import TaxonomyConverter + from taxonomy.utils_baseline import StupidTaxonomyConverter + import pickle + + + print('Using PyTorch version: ', torch.__version__) + args = get_parser() + os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.train_gpu) + + + ###### FLAT-MIX CODE ####################### + print(os.environ["CUDA_VISIBLE_DEVICES"]) + + # Randomize args.dist_url too avoid conflicts on same machine + args.dist_url = args.dist_url[:-2] + str(os.getpid() % 100).zfill(2) + + from util.dataset_config import infos + + + if args.use_multiple_datasets and args.universal: # multiple datasets training, must be on universal taxononmy + if args.tax_version == 0: + args.tc = StupidTaxonomyConverter(version=args.tax_version) + else: + if args.finetune: + args.tc = TaxonomyConverter(version=args.tax_version, finetune=True, finetune_dataset=args.finetune_dataset) + else: + args.tc = TaxonomyConverter(version=args.tax_version) #, train_datasets=args.dataset, test_datasets=args.test_dataset) #, train_datasets=args.dataset, test_datasets=args.test_dataset) + + args.data_root = {dataset:infos[dataset].dataroot for dataset in args.dataset} + args.train_list = {dataset:infos[dataset].trainlist for dataset in args.dataset} + args.classes = args.tc.classes + # args.save_path = args.save_path.replace("{}", '-'.join([infos[dataset].shortname for dataset in args.dataset])) + + elif (not args.use_multiple_datasets) and args.universal: # single dataset on universal taxonomy training + args.tc = TaxonomyConverter(version=args.tax_version, train_datasets=[args.dataset], test_datasets=args.test_dataset) + args.data_root = infos[args.dataset].dataroot + args.train_list = infos[args.dataset].trainlist + args.classes = args.tc.classes + # args.save_path = args.save_path.replace("{}", info[args.dataset].shortname) + + elif (not args.use_multiple_datasets) and (not args.universal): # single dataset on self taxnonmy training + args.data_root = infos[args.dataset].dataroot + args.train_list = infos[args.dataset].trainlist + args.classes = infos[args.dataset].num_classes + # args.save_path = args.save_path.replace("{}", infos[args.dataset].shortname) + else: + print('wrong mode, please check') + exit() + + # verify arch after args.classes is populated + verify_architecture(args) + + if args.manual_seed is not None: + cudnn.benchmark = False + cudnn.deterministic = True + torch.manual_seed(args.manual_seed) + np.random.seed(args.manual_seed) + torch.manual_seed(args.manual_seed) + torch.cuda.manual_seed_all(args.manual_seed) + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + args.ngpus_per_node = len(args.train_gpu) + if len(args.train_gpu) == 1: + args.sync_bn = False + args.distributed = False + args.multiprocessing_distributed = False + if args.multiprocessing_distributed: + args.world_size = args.ngpus_per_node * args.world_size + mp.spawn(main_worker, nprocs=args.ngpus_per_node, args=(args.ngpus_per_node, args)) + else: + main_worker(args.train_gpu, args.ngpus_per_node, args) + + +def get_train_transform_list(args, split, dataset_name): + """ + Args: + - args: + - split + + Return: + - List of transforms + """ + from util.normalization_utils import get_imagenet_mean_std + from util import transform + + mean, std = get_imagenet_mean_std() + if split == 'train': + transform_list = [ + transform.RandScale([args.scale_min, args.scale_max]), + transform.RandRotate([args.rotate_min, args.rotate_max], padding=mean, ignore_label=args.ignore_label), + transform.RandomGaussianBlur(), + transform.RandomHorizontalFlip(), + transform.Crop([args.train_h, args.train_w], crop_type='rand', padding=mean, ignore_label=args.ignore_label), + transform.ToTensor(), + transform.Normalize(mean=mean, std=std) + ] + elif split == 'val': + transform_list = [ + transform.Crop([args.train_h, args.train_w], crop_type='center', padding=mean, ignore_label=args.ignore_label), + transform.ToTensor(), + transform.Normalize(mean=mean, std=std) + ] + else: + print('Unknown split. Quitting ...') + quit() + + transform_list += [ToFlatLabel(args.tc, dataset_name)] + + return transform.Compose(transform_list) + + +def load_pretrained_weights(args, model, optimizer): + """ + Args: + - args + - model: Passed by reference + + Returns: + - None + """ + import torch, os, math + + resume_iter = 0 + + if args.weight: + if os.path.isfile(args.weight): + if main_process(): + logger.info("=> loading weight '{}'".format(args.weight)) + checkpoint = torch.load(args.weight) + model.load_state_dict(checkpoint['state_dict']) + if main_process(): + logger.info("=> loaded weight '{}'".format(args.weight)) + else: + if main_process(): + logger.info("=> no weight found at '{}'".format(args.weight)) + + if args.resume: + if os.path.isfile(args.resume): + if main_process(): + logger.info("=> loading checkpoint '{}'".format(args.resume)) + # checkpoint = torch.load(args.resume) + checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda()) + # args.start_epoch = checkpoint['epoch'] + args.start_epoch = 0 # we don't really rely on this, but on resume_iter + if args.finetune: + args.start_epoch = 0 + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + resume_iter = checkpoint['current_iter'] + if main_process(): + logger.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) + else: + if main_process(): + logger.info("=> no checkpoint found at '{}'".format(args.resume) + ' Please check') + exit() + + return model, optimizer, resume_iter + + # optimizer = get_optimizer(args.model) + + + +def get_model(args, criterion, BatchNorm): + """ + Args: + - + + Returns: + - + """ + if args.arch == 'psp': + from ccsa.ccsa_pspnet import CCSA_PSPNet + model = CCSA_PSPNet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, criterion=criterion, BatchNorm=BatchNorm, network_name=args.network_name) + + + elif args.arch == 'psa': + model = PSANet(layers=args.layers, classes=args.classes, zoom_factor=args.zoom_factor, psa_type=args.psa_type, + compact=args.compact, shrink_factor=args.shrink_factor, mask_h=args.mask_h, mask_w=args.mask_w, + normalization_factor=args.normalization_factor, psa_softmax=args.psa_softmax, + criterion=criterion, + BatchNorm=BatchNorm) + return model + + +def get_optimizer(args, model): + """ + Create a parameter list, where first 5 entries (ResNet backbone) have low learning rate + to not clobber pre-trained weights, and later entries (PPM derivatives) have high learning rate. + + Args: + - args + - model + + Returns: + - optimizer + """ + import torch, os, math + + if args.arch == 'psp': + modules_ori = [model.layer0, model.layer1, model.layer2, model.layer3, model.layer4] + modules_new = [model.ppm, model.cls, model.aux] + elif args.arch == 'psa': + modules_ori = [model.layer0, model.layer1, model.layer2, model.layer3, model.layer4] + modules_new = [model.psa, model.cls, model.aux] + params_list = [] + for module in modules_ori: + params_list.append(dict(params=module.parameters(), lr=args.base_lr)) + + for module in modules_new: + if args.finetune: + params_list.append(dict(params=module.parameters(), lr=args.base_lr)) + else: + params_list.append(dict(params=module.parameters(), lr=args.base_lr * 10)) + args.index_split = 5 + optimizer = torch.optim.SGD(params_list, lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) + return optimizer + + +def get_rank_to_dataset_map(args): + """ + Obtain a mapping from GPU rank (index) to the name of the dataset residing on this GPU. + + Args: + - args + + Returns: + - rank_to_dataset_map + """ + rank_to_dataset_map = {} + for dataset, gpu_idxs in args.dataset_gpu_mapping.items(): + for gpu_idx in gpu_idxs: + rank_to_dataset_map[gpu_idx] = dataset + print('Rank to dataset map: ', rank_to_dataset_map) + return rank_to_dataset_map + + +def main_worker(gpu, ngpus_per_node, argss): + """ + Consider if a dataset has size 18,000 and is placed on a single GPU, of 4 gpus. + Batch size 32. In this case, len(train_data) = 18,000 but len(train_loader) = 2250 + Because effective batch size is 8. + + Consider if a dataset has size 118287. If placed on 2/4 gpus with batch size 32. + In this case, len(train_data) = 118287 and len(train_loader) = 7393. + """ + + # with open('test_3.txt', 'a') as f: + # f.write('test') + # f.close() + global args + args = argss + + from ccsa.ccsa_data import CCSA_Data + from util import dataset + from taxonomy.utils_flat import TaxonomyConverter + from multiobjective_opt.dist_mgda_utils import scale_loss_and_gradients + import apex + import torch, os, math + import torch.backends.cudnn as cudnn + import torch.nn as nn + import torch.nn.functional as F + import torch.nn.parallel + import torch.optim + import torch.utils.data + + import torch.multiprocessing as mp + import torch.distributed as dist + from tensorboardX import SummaryWriter + from util.dataset_config import infos + + from util import config + from util.verification_utils import verify_architecture + from util.avg_meter import AverageMeter, SegmentationAverageMeter + from util.util import poly_learning_rate + + # with open('test_mainworker.txt', 'a') as f: + # f.write('test\t') + # f.close() +# os.sleep + # time.sleep(30) + if args.sync_bn: + if args.multiprocessing_distributed: + # BatchNorm = torch.nn.SyncBatchNorm + BatchNorm = apex.parallel.SyncBatchNorm + else: + from lib.sync_bn.modules import BatchNorm2d + BatchNorm = BatchNorm2d + else: + BatchNorm = nn.BatchNorm2d + print('Using batchnorm variant: ', BatchNorm) + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + args.rank = args.rank * ngpus_per_node + gpu + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) + + criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) + model = get_model(args, criterion, BatchNorm) + optimizer = get_optimizer(args, model) + + if True: + global logger, writer + logger = get_logger() + writer = SummaryWriter(args.save_path) + args.logger = logger + + if main_process(): + logger.info(args) + logger.info("=> creating model ...") + logger.info("Classes: {}".format(args.classes)) + logger.info(model) + if args.distributed: + torch.cuda.set_device(gpu) + args.batch_size = int(args.batch_size / ngpus_per_node) + args.batch_size_val = int(args.batch_size_val / ngpus_per_node) + args.batch_size_val = max(1, args.batch_size_val) + args.workers = int(args.workers / ngpus_per_node) + if args.use_apex: + model, optimizer = apex.amp.initialize(model.cuda(), optimizer, opt_level=args.opt_level, keep_batchnorm_fp32=args.keep_batchnorm_fp32, loss_scale=args.loss_scale) + model = apex.parallel.DistributedDataParallel(model) + else: + model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu]) + + else: + model = torch.nn.DataParallel(model.cuda()) + + model, optimizer, args.resume_iter = load_pretrained_weights(args, model, optimizer) + + domain_idx_map = { + 'coco-panoptic-v1-qvga': 0, + 'mapillary_vistas_comm-qvga': 1, + 'ade20k-v1-qvga': 2 + } + + train_transform_dict = {} + for dname, domain_idx in domain_idx_map.items(): + train_transform_dict[domain_idx] = get_train_transform_list(args, split='train', dataset_name=dname) + + # FLATMIX ADDITION + train_data = CCSA_Data(split='train', data_roots=args.data_root, data_lists=args.train_list, transform_dict=train_transform_dict) + + from util.txt_utils import read_txt_file + num_examples = len(train_data) + + num_examples_total = args.num_examples + args.epochs = math.ceil(num_examples_total / num_examples) + args.max_iters = math.floor(num_examples_total / (args.batch_size * args.ngpus_per_node)) + + # avoid too frequent saving to waste time, on small datasets + if args.epochs > 200: + args.save_freq = args.epochs // 100 + + logger.info(f'Train data has len {len(train_data)} on {args.rank}') + if args.distributed: + + train_sampler = torch.utils.data.distributed.DistributedSampler(train_data, num_replicas=args.ngpus_per_node, rank=args.rank) + logger.info(f"rank: {args.rank}, actual_replica: {train_sampler.num_replicas}, length of sampler, {len(train_sampler)}") + + else: + train_sampler = None + train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) + logger.info(f'Train loader has len {len(train_loader)} on {args.rank}') + + if args.evaluate: + val_transform = get_train_transform_list(args, split='val') + # val_transform = transform.Compose(val_transform_list) + val_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform) + if args.distributed: + val_sampler = torch.utils.data.distributed.DistributedSampler(val_data) + else: + val_sampler = None + val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler) + + for epoch in range(args.start_epoch, args.epochs): + epoch_log = epoch + 1 + logger.info(f'New epoch {epoch_log} starts on rank {args.rank}') + + if args.distributed: + train_sampler.set_epoch(epoch) + print(f'On training epoch {epoch} in GPU {args.rank}') + loss_train, mIoU_train, mAcc_train, allAcc_train = train(train_loader, model, optimizer, epoch) + # if main_process(): + # writer.add_scalar('loss_train', loss_train, epoch_log) + # writer.add_scalar('mIoU_train', mIoU_train, epoch_log) + # writer.add_scalar('mAcc_train', mAcc_train, epoch_log) + # writer.add_scalar('allAcc_train', allAcc_train, epoch_log) + + if ((epoch_log % args.save_freq == 0)) and main_process(): + filename = args.save_path + '/train_epoch_' + str(epoch_log) + '.pth' + logger.info('Saving checkpoint to: ' + filename) + torch.save({'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), + 'current_iter': (epoch + 1) * len(train_loader), 'max_iter': args.max_iters}, filename) + if epoch_log / args.save_freq > 2: + # if (epoch_log - 3) % 10 != 0: + if not args.finetune: + deletename = args.save_path + '/train_epoch_' + str(epoch_log - args.save_freq * 2) + '.pth' + os.remove(deletename) + + if (epoch == args.epochs - 1) and main_process(): + filename = args.save_path + '/train_epoch_final.pth' + logger.info('Saving checkpoint to: ' + filename) + torch.save({'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), + 'current_iter': (epoch + 1) * len(train_loader), 'max_iter': args.max_iters}, filename) + exit() + + + + # if args.evaluate: + # loss_val, mIoU_val, mAcc_val, allAcc_val = validate(val_loader, model, criterion) + # if main_process(): + # writer.add_scalar('loss_val', loss_val, epoch_log) + # writer.add_scalar('mIoU_val', mIoU_val, epoch_log) + # writer.add_scalar('mAcc_val', mAcc_val, epoch_log) + # writer.add_scalar('allAcc_val', allAcc_val, epoch_log) + + + + + + +def train(train_loader, model, optimizer, epoch): + """ + No MGDA -- whole iteration takes 0.31 sec. + 0.24 sec to run typical backward pass (with no MGDA) + + With MGDA -- whole iteration takes 1.10 sec. + 1.05 sec to run backward pass w/ MGDA subroutine -- scale_loss_and_gradients() in every iteration. + + TODO: Profile which part of Frank-Wolfe is slow + + """ + + from util.avg_meter import AverageMeter, SegmentationAverageMeter + from util.util import poly_learning_rate + + import torch.distributed as dist + from multiobjective_opt.dist_mgda_utils import scale_loss_and_gradients + + + + import torch, os, math, time + + + batch_time = AverageMeter() + data_time = AverageMeter() + main_loss_meter = AverageMeter() + aux_loss_meter = AverageMeter() + loss_meter = AverageMeter() + sam = SegmentationAverageMeter() + + model.train() + # set bn to be eval() and see the norm + # def set_bn_eval(m): + # classname = m.__class__.__name__ + # if classname.find('BatchNorm') != -1: + # m.eval() + # model.apply(set_bn_eval) + end = time.time() + max_iter = args.max_iters + + for i, (input, target, batch_domain_idxs) in enumerate(train_loader): + # pass + # if main_process(): + data_time.update(time.time() - end) + if args.zoom_factor != 8: + h = int((target.size()[1] - 1) / 8 * args.zoom_factor + 1) + w = int((target.size()[2] - 1) / 8 * args.zoom_factor + 1) + # 'nearest' mode doesn't support align_corners mode and 'bilinear' mode is fine for downsampling + target = F.interpolate(target.unsqueeze(1).float(), size=(h, w), mode='bilinear', align_corners=True).squeeze(1).long() + input = input.cuda(non_blocking=True) + target = target.cuda(non_blocking=True) + batch_domain_idxs = batch_domain_idxs.cuda(non_blocking=True) + + if args.use_mgda: + output, loss, main_loss, aux_loss, scales = forward_backward_mgda(input, target, model, optimizer, args) + else: + #print('Batch domain idxs: ', batch_domain_idxs.shape, batch_domain_idxs.device, batch_domain_idxs) + output, loss, main_loss, aux_loss = forward_backward_full_sync(input, target, model, optimizer, args, batch_domain_idxs) + + optimizer.step() + + n = input.size(0) + if args.multiprocessing_distributed: + main_loss, aux_loss, loss = main_loss.detach() * n, aux_loss * n, loss * n # not considering ignore pixels + count = target.new_tensor([n], dtype=torch.long) + dist.all_reduce(main_loss), dist.all_reduce(aux_loss), dist.all_reduce(loss), dist.all_reduce(count) + n = count.item() + main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n + + sam.update_metrics_gpu(output, target, args.classes, args.ignore_label, args.multiprocessing_distributed) + + main_loss_meter.update(main_loss.item(), n) + aux_loss_meter.update(aux_loss.item(), n) + loss_meter.update(loss.item(), n) + # if main_process(): + if i > 0: + batch_time.update(time.time() - end) + end = time.time() + + current_iter = epoch * len(train_loader) + i + 1 + args.resume_iter + current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) + # current_lr = 0 + for index in range(0, args.index_split): + optimizer.param_groups[index]['lr'] = current_lr + for index in range(args.index_split, len(optimizer.param_groups)): + if args.finetune: + optimizer.param_groups[index]['lr'] = current_lr + else: + optimizer.param_groups[index]['lr'] = current_lr * 10 + + remain_iter = max_iter - current_iter + remain_time = remain_iter * batch_time.avg + t_m, t_s = divmod(remain_time, 60) + t_h, t_m = divmod(t_m, 60) + remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) + + if (i + 1) % args.print_freq == 0 and main_process(): + # if True: + logger.info('Epoch: [{}/{}][{}/{}] ' + 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' + 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' + 'Remain {remain_time} ' + 'MainLoss {main_loss_meter.val:.4f} ' + 'AuxLoss {aux_loss_meter.val:.4f} ' + 'Loss {loss_meter.val:.4f} ' + 'Accuracy {accuracy:.4f}.'.format(epoch+1, args.epochs, i + 1, len(train_loader), + batch_time=batch_time, + data_time=data_time, + remain_time=remain_time, + main_loss_meter=main_loss_meter, + aux_loss_meter=aux_loss_meter, + loss_meter=loss_meter, + accuracy=sam.accuracy) + f'current_iter: {current_iter}' + f' rank: {args.rank} ') + if args.use_mgda and main_process(): + # Scales identical in each process, so print out only in main process. + scales_str = [f'{d}: {scale:.2f}' for d,scale in scales.items()] + scales_str = ' , '.join(scales_str) + logger.info(f'Scales: {scales_str}') + + if main_process() and current_iter == max_iter - 5: # early exit to prevent iter number not matching between gpus + break + # if main_process(): + # writer.add_scalar('loss_train_batch', main_loss_meter.val, current_iter) + # writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter) + # writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter) + # writer.add_scalar('allAcc_train_batch', accuracy, current_iter) + + iou_class, accuracy_class, mIoU, mAcc, allAcc = sam.get_metrics() + # if main_process(): + logger.info('Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(epoch+1, args.epochs, mIoU, mAcc, allAcc)) + return main_loss_meter.avg, mIoU, mAcc, allAcc + + +def forward_backward_full_sync(input, target, model, optimizer, args, batch_domain_idxs): + """ + Args: + - input: Tensor of size (?) representing + - target: Tensor of size (?) representing + - model + - optimizer + - args + + Returns: + - output: Tensor of size (?) representing + - loss: Tensor of size (?) representing + - main_loss: Tensor of size (?) representing + - aux_loss: Tensor of size (?) representing + """ + output, main_loss, aux_loss = model(input, target, batch_domain_idxs) + if not args.multiprocessing_distributed: + main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss) + loss = main_loss + args.aux_weight * aux_loss + + optimizer.zero_grad() + if args.use_apex and args.multiprocessing_distributed: + with apex.amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + return output, loss, main_loss, aux_loss + + +def forward_backward_mgda(input, target, model, optimizer, args): + from multiobjective_opt.dist_mgda_utils import scale_loss_and_gradients + """ + We rely upon the ddp.no_sync() of gradients: + https://github.com/pytorch/pytorch/blob/master/torch/nn/parallel/distributed.py + + Args: + - input: Tensor of size (?) representing + - target: Tensor of size (?) representing + - model + - optimizer + - args + + Returns: + - output: Tensor of size (?) representing + - loss: Tensor of size (?) representing + - main_loss: Tensor of size (?) representing + - aux_loss: Tensor of size (?) representing + """ + with model.no_sync(): + output, main_loss, aux_loss = model(input, target) + loss = main_loss + args.aux_weight * aux_loss + loss, scales = scale_loss_and_gradients(loss, optimizer, model, args) + + return output, loss, main_loss, aux_loss, scales + + + + +def validate(val_loader, model, criterion): + if main_process(): + logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') + batch_time = AverageMeter() + data_time = AverageMeter() + loss_meter = AverageMeter() + sam = SegmentationAverageMeter() + + model.eval() + if main_process(): + end = time.time() + for i, (input, target) in enumerate(val_loader): + if main_process(): + data_time.update(time.time() - end) + input = input.cuda(non_blocking=True) + target = target.cuda(non_blocking=True) + output = model(input) + if args.zoom_factor != 8: + output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=True) + loss = criterion(output, target) + + n = input.size(0) + if args.multiprocessing_distributed: + loss = loss * n # not considering ignore pixels + count = target.new_tensor([n], dtype=torch.long) + dist.all_reduce(loss), dist.all_reduce(count) + n = count.item() + loss = loss / n + else: + loss = torch.mean(loss) + + output = output.max(1)[1] + sam.update_metrics_gpu(output, target, args.classes, args.ignore_label, args.multiprocessing_distributed) + loss_meter.update(loss.item(), input.size(0)) + if main_process(): + batch_time.update(time.time() - end) + end = time.time() + if ((i + 1) % args.print_freq == 0) and main_process(): + logger.info('Test: [{}/{}] ' + 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' + 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' + 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) ' + 'Accuracy {accuracy:.4f}.'.format(i + 1, len(val_loader), + data_time=data_time, + batch_time=batch_time, + loss_meter=loss_meter, + accuracy=sam.accuracy)) + + iou_class, accuracy_class, mIoU, mAcc, allAcc = sam.get_metrics() + if main_process(): + logger.info('Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(mIoU, mAcc, allAcc)) + for i in range(args.classes): + logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(i, iou_class[i], accuracy_class[i])) + logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') + return loss_meter.avg, mIoU, mAcc, allAcc + +end = time.time() +print(end-start) +if __name__ == '__main__': + print('main') + + + main() \ No newline at end of file diff --git a/mseg_semantic/domain_generalization/ccsa_utils.py b/mseg_semantic/domain_generalization/ccsa_utils.py new file mode 100755 index 0000000..e619973 --- /dev/null +++ b/mseg_semantic/domain_generalization/ccsa_utils.py @@ -0,0 +1,526 @@ +#!/usr/bin/python3 + +import numpy as np +import time +import torch +import torch.nn.functional as F + +from typing import Tuple + +""" +Set of utilities for metric learning. We use extensive sampling +techniques and also a contrastive loss to learn the metric space. +""" + +def contrastive_loss( + y_c: torch.Tensor, + pred_dists: torch.Tensor, + margin: int = 1 + ) -> torch.Tensor: + """ + Compute the similarities in the separation loss (4) by + computing average pairwise similarities between points + in the embedding space. + + element-wise square, element-wise maximum of two tensors. + + Contrastive loss also defined in: + - "Dimensionality Reduction by Learning an Invariant Mapping" + by Raia Hadsell, Sumit Chopra, Yann LeCun + + Args: + - y_c: Indicates if pairs share the same semantic class label or not + - pred_dists: Distances in the embeddding space between pairs. + + Returns: + - tensor representing contrastive loss values. + """ + N = pred_dists.shape[0] + + # corresponds to "d" in the paper. If same class, pull together. + # Zero loss if all same-class examples have zero distance between them. + pull_losses = y_c * torch.pow(pred_dists, 2) + # corresponds to "k" in the paper. If different class, push apart more than margin + # if semantically different examples have distances are in [0,margin], then there WILL be loss + zero = torch.zeros(N) + device = y_c.device + zero = zero.to(device) + # if pred_dists for non-similar classes are <1, then incur loss >0. + clamped_dists = torch.max(margin - pred_dists, zero ) + push_losses = (1 - y_c) * torch.pow(clamped_dists, 2) + return torch.mean(pull_losses + push_losses) + + +def paired_euclidean_distance(X: torch.Tensor, Y: torch.Tensor) -> torch.Tensor: + """ + Compute the distance in the semantic alignment loss (3) by + computing average pairwise distances between *already paired* + points in the embedding space. + + Note this is NOT computed between all possible pairs. Rather, we + compare i'th vector of X vs. i'th vector of Y (i == j always). + + Args: + - X: Pytorch tensor of shape (N,D) representing N embeddings of dim D + - Y: Pytorch tensor of shape (N,D) representing N embeddings of dim D + + Returns: + - dists: Pytorch tensor of shape (N,) representing distances between + fixed pairs + """ + device = X.device + N, D = X.shape + assert Y.shape == X.shape + eps = 1e-08 * torch.ones((N,1)) + eps = eps.to(device) # make sure in same memory (CPU or CUDA) + # compare i'th vector of x vs. i'th vector of y (i == j always) + diff = torch.pow(X - Y, 2) + + affinities = torch.sum(diff, dim=1, keepdim=True) + # clamp the affinities to be > 1e-8 ?? Unclear why the authors do this... + affinities = torch.max(affinities, eps) + return torch.sqrt(affinities) + + +def downsample_label_map(y: torch.Tensor, d: int = 2): + """ + size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]) – + output spatial size. + + scale_factor (float or Tuple[float]) – multiplier for spatial size. + Has to match input size if it is a tuple. + + mode (str) – algorithm used for upsampling: + 'nearest' | 'linear' | 'bilinear' | 'bicubic' | 'trilinear' | 'area'. Default: 'nearest' + + align_corners (bool, optional) – Geometrically, we consider the pixels of the input + and output as squares rather than points. If set to True, the input and output + tensors are aligned by the center points of their corner pixels, preserving the + values at the corner pixels. If set to False, the input and output tensors are + aligned by the corner points of their corner pixels, and the interpolation uses + edge value padding for out-of-boundary values, making this operation independent + of input size when scale_factor is kept the same. This only has an effect when + mode is 'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: False + + Args: + - Y: Pytorch tensor of shape (batch size, height, width) + - d: downsample factor + + Returns: + - dY: Pytorch tensor of shape (batch_size, height/d, width/d) + """ + b, h, w = y.shape + y = y.unsqueeze(dim=1) # add num_channels = 1 + # Size must be 2 numbers -- for height and width, only + dY = F.interpolate(y, size=(h//d, w//d), mode='nearest') + dY = torch.squeeze(dY, dim=1) + assert dY.shape == (b, h//d, w//d) + return dY + + +def sample_pair_indices( + Y: torch.Tensor, + batch_domain_idxs: torch.Tensor, + num_pos_pairs: int = 100, + neg_to_pos_ratio: int = 3, + downsample_factor: int = 2 +): + """ + In our case, positive/negative pairs can be found in almost any two images + (as long as ground truth label maps are not identical). Thus, we sample negative + positive pairs not on an *image* level, but rather on a pixel-level, as long + as both images come from different domains. + + when i get resnet embedding E1 of + shape (C,H,W) of image1 from domain 1, + and resnet embedding E2 of shape (C,H,W) of + image 2 from domain 2, my contrastive loss will + be between random feature map locations E1[:,x,y] and E2[:,x,y] + + Args: + - Y: torch.Tensor, Pytorch tensor of shape (N,H,W) representing labels + - domain_idxs: torch.Tensor, + - num_pos_pairs: int = 100, + - neg_to_pos_ratio: int = 3, + - downsample_factor: int = 2: + + Returns: + - all_pos_pair_info + - all_neg_pair_info + """ + assert Y.dtype in [torch.float32, torch.float64] # cannot upsample dtype int + INITIAL_SAMPLE_NUM = int(1e6) + # downsample the class label map to the feature map resolution + # use nearest interpolation + dY = downsample_label_map(Y, d=downsample_factor) + _, unique_domain_idxs = count_per_domain_statistics(batch_domain_idxs) + batch_sz, h, w = dY.shape + + # Indices ordered as (bi,hi,wi,bj,hj,wj) + all_pos_pair_info = torch.zeros((0,6), dtype=torch.int64) + all_neg_pair_info = torch.zeros((0,6), dtype=torch.int64) + + # keep sampling until we get enough, append to array each time we get more + dataprep_complete = False + while not dataprep_complete: + + pos_pair_info, neg_pair_info = sample_crossdomain_pos_neg_pairs(dY, batch_domain_idxs, unique_domain_idxs, + w, h, INITIAL_SAMPLE_NUM) + # add to list of positives + all_pos_pair_info = torch.cat([pos_pair_info, all_pos_pair_info]) + # add to list of negatives + all_neg_pair_info = torch.cat([neg_pair_info, all_neg_pair_info]) + + curr_num_pos = all_pos_pair_info.shape[0] + curr_num_neg = all_neg_pair_info.shape[0] + sufficient_pos = (curr_num_pos > num_pos_pairs) + sufficient_neg = (curr_num_neg > neg_to_pos_ratio * num_pos_pairs) + dataprep_complete = sufficient_pos and sufficient_neg + + # shuffle the negatives among themselves + all_pos_pair_info = shuffle_pytorch_tensor(all_pos_pair_info) + # shuffle the positives among themselves + all_neg_pair_info = shuffle_pytorch_tensor(all_neg_pair_info) + + # clip number of pos to num_pos_pairs + all_pos_pair_info = all_pos_pair_info[:num_pos_pairs] + # clip number of neg to 3x positive + all_neg_pair_info = all_neg_pair_info[:neg_to_pos_ratio * num_pos_pairs] + + # we won't backprop through this function + all_pos_pair_info.requires_grad = False + all_neg_pair_info.requires_grad = False + + return all_pos_pair_info, all_neg_pair_info + + +def remove_pairs_from_same_domain( + batch_domain_indices: torch.Tensor, + a_pair_info: torch.Tensor, + b_pair_info: torch.Tensor + ) -> Tuple[torch.Tensor,torch.Tensor]: + """ + In training, we want only pairs from different domains. We + enforce that their feature embeddings are similar. + + We could have 1 million sampled pairs from a minibatch of size 5. + (Number of elements in batch (batch_domain_indices) need not + agree with number of sampled pairs!) + + Args: + - batch_domain_indices: Tensor of shape (K,), for each example + in minibatch, which domain did it come from. + - a_pair_info: (M,3) array representing (bi,hi,wi) + where these represent (batch index, row index, column index) + into a NCHW tensor for samples A. + - b_pair_info: (M,3) as above, but for samples B. (a,b) are paired + + Returns: + - a_pair_info: (N,3), where N <= M (discarded same domain elements) + - b_pair_info: (N,3), where N <= M + """ + batch_dim_a_idxs = a_pair_info[:,0] + batch_dim_b_idxs = b_pair_info[:,0] + # remove locations with identical domains in pos/neg pairs + a_domain = batch_domain_indices[batch_dim_a_idxs] + b_domain = batch_domain_indices[batch_dim_b_idxs] + + is_valid_pair = (a_domain != b_domain).nonzero().squeeze() + return a_pair_info[is_valid_pair], b_pair_info[is_valid_pair] + + +def form_pair_info_tensor( + batch_dim_idxs: torch.Tensor, + px_1d_y: torch.Tensor, + px_1d_x: torch.Tensor + ): + """ Hstack 3 length-N 1d arrays into a (N,3) array + + Args: + - batch_dim_idxs: size (N,) array representing indices + of examples in a minibatch + - px_1d_y: size (N,) array representing row indices + - px_1d_x: size (N,) array representing column indices + + Returns: + - pair_info: (N,3) array + """ + # batch dim + N = batch_dim_idxs.shape[0] + assert batch_dim_idxs.shape == (N,) + assert px_1d_y.shape == (N,) + assert px_1d_x.shape == (N,) + + pair_info = torch.stack([batch_dim_idxs, px_1d_y, px_1d_x]) + return pair_info.t() # tranpose it now + + +def find_matching_pairs( + y: torch.Tensor, + a_pair_info: torch.Tensor, + b_pair_info: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]: + """ + Given a batch of ground truth label maps, and sampled pixel + pair locations (pairs are across label maps), identify which + pairs are matching vs. non-matching and return corresponding metadata + (basically, partition them). + + Args: + - y: Tensor of size (B,H,W) representing 2-d label maps + for B examples. + - a_pair_info: + - b_pair_info: + + Returns: + - pos_pair_info: Pytorch tensor containing info about each positive pair (a,b). Contains + (a batch_idx, a row, a col, b batch_idx, b row, b col) + - neg_pair_info: Same as above, but for negative pairs. + """ + batch_dim_a_idxs = a_pair_info[:,0] + px_1d_a_y = a_pair_info[:,1] + px_1d_a_x = a_pair_info[:,2] + + batch_dim_b_idxs = b_pair_info[:,0] + px_1d_b_y = b_pair_info[:,1] + px_1d_b_x = b_pair_info[:,2] + + # extract category indices + cls_vals_a = y[batch_dim_a_idxs, px_1d_a_y, px_1d_a_x] + cls_vals_b = y[batch_dim_b_idxs, px_1d_b_y, px_1d_b_x] + + # compare category indices for equality + is_same_class = (cls_vals_a == cls_vals_b).nonzero().squeeze() + is_diff_class = (cls_vals_a != cls_vals_b).nonzero().squeeze() + + a_pos_info = a_pair_info[is_same_class] + a_neg_info = a_pair_info[is_diff_class] + + b_pos_info = b_pair_info[is_same_class] + b_neg_info = b_pair_info[is_diff_class] + + pos_pair_info = torch.cat([a_pos_info, b_pos_info], dim=1) + neg_pair_info = torch.cat([a_neg_info, b_neg_info], dim=1) + + return pos_pair_info, neg_pair_info + + +def sample_crossdomain_pos_neg_pairs( + Y: torch.Tensor, + batch_domain_indices: torch.Tensor, + unique_domain_idxs: np.ndarray, + w: int, + h: int, + INITIAL_SAMPLE_NUM: int + ): + """ + Args: + - Y: Pytorch tensor of shape (N,H,W) with batch of ground truth label maps + - batch_domain_indices: which domain each example in the training batch belongs to + - unique_domain_idxs: unique domain IDs + - w: integer representing label map width + - h: integer representing label map height + - INITIAL_SAMPLE_NUM: + + Returns: + - pos_pair_info: Pytorch tensor of shape (N,6) + - neg_pair_info: Pytorch tensor of shape (N,6) + """ + cache_a = sample_px_locations_uniformly(batch_domain_indices, unique_domain_idxs, w, h, INITIAL_SAMPLE_NUM) + batch_dim_a_idxs, px_1d_a_x, px_1d_a_y = cache_a + cache_b = sample_px_locations_uniformly(batch_domain_indices, unique_domain_idxs, w, h, INITIAL_SAMPLE_NUM) + batch_dim_b_idxs, px_1d_b_x, px_1d_b_y = cache_b + + a_pair_info = form_pair_info_tensor(batch_dim_a_idxs, px_1d_a_y, px_1d_a_x) + b_pair_info = form_pair_info_tensor(batch_dim_b_idxs, px_1d_b_y, px_1d_b_x) + + # remove examples where they come from the same domain + a_pair_info, b_pair_info = remove_pairs_from_same_domain(batch_domain_indices, a_pair_info, b_pair_info) + # calculate positive and negative semantic pair assignments + pos_pair_info, neg_pair_info = find_matching_pairs(Y, a_pair_info, b_pair_info) + return pos_pair_info, neg_pair_info + + +def count_per_domain_statistics( + domain_idxs: torch.Tensor + ) -> Tuple[np.ndarray, np.ndarray]: + """ + Args: + - domain_idxs: Pytorch tensor of shape (N,) showing assignment + of each example to each particular domain + + Returns: + - examples_per_domain: Numpy array of shape (max_idx+1,) + where max_idx is the largest domain index. + Containss number of examples per each domain. + - unique_domain_idxs: Numpy array containing unique domain indices. + """ + unique_domain_idxs = torch.unique(domain_idxs).cpu().numpy() + # get the number of examples from each domain + examples_per_domain = np.bincount( domain_idxs.cpu().numpy() ) + return examples_per_domain, unique_domain_idxs + + +def sample_px_locations_uniformly( + batch_domain_indices: torch.Tensor, + unique_domain_idxs: np.ndarray, + w: int, + h: int, + initial_sample_num: int + ): + """ + We are given a list of which batch examples belong to which domains. + We first sample an array of uniformly random domain assignments for samples. + Then for each domain sample, we choose which example it could have come from + (sampling uniformly from the corresponding items in the batch). + + After an example is chosen (sampling uniformly over domains), we sample + uniformly random pixel locations. + + We cannot sample uniformly over classes because of severe imbalance + in each minibatch. + + Args: + - batch_domain_indices: Integer tensor of shape (B) representing + which domain each minibatch example came from, + - unique_domain_idxs: Integer tensor of shape (D), if D domains + present in a minibatch (not necessarily consecutive integers) + - w: integer representing label map width + - h: integer representing label map height + - initial_sample_num: integer representing initial number of samples + + Returns: + - all_batch_dim_idxs: Tensor of shape (initial_sample_num,) + - px_1d_x: Tensor of shape (initial_sample_num,) representing label + map column indices + - px_1d_y: Tensor of shape (initial_sample_num,) representing label + map row indices + """ + sampled_domain_idxs = pytorch_random_choice(unique_domain_idxs, num_samples=initial_sample_num) + + # translate the sampled domains into batchh indices! + all_batch_dim_idxs = torch.ones(initial_sample_num, dtype=torch.int64) * -1 + + # need a loop here -- have to manipulate the batch indices per domain independently + for domain_idx in unique_domain_idxs: + num_samples_in_domain = int( (sampled_domain_idxs == domain_idx).sum().cpu().numpy() ) + + # generate random example/batch indices for each domain + # (drawing from those batch examples that belong to domain) + relevant_batch_idxs = (batch_domain_indices == domain_idx).nonzero().squeeze() + if len(relevant_batch_idxs.shape) == 0: # when just a scalar + relevant_batch_idxs = torch.tensor([ int(relevant_batch_idxs) ]) + domain_batch_dim_idxs = pytorch_random_choice(relevant_batch_idxs.cpu().numpy(), num_samples=num_samples_in_domain) + + relevant_sample_idxs = (sampled_domain_idxs == domain_idx).nonzero().squeeze() + # place the selected batch locations into the correct places for this domain. + all_batch_dim_idxs[relevant_sample_idxs] = domain_batch_dim_idxs + + px_1d_x = pytorch_random_choice(np.arange(w), num_samples=initial_sample_num) + px_1d_y = pytorch_random_choice(np.arange(h), num_samples=initial_sample_num) + + return all_batch_dim_idxs, px_1d_x, px_1d_y + + +def shuffle_pytorch_tensor(x: torch.Tensor) -> torch.Tensor: + """ Do not set torch.manual_seed(1) here, since we want to have + a different random result each time. + + Args: + - x: (N,M) tensor we wish to shuffle along dim=0 + + Returns: + - x: (N,M) tensor represneting shuffled version of input, along dim=0 + """ + n_examples = x.shape[0] + r = torch.randperm(n_examples) + return x[r] + + +def pytorch_random_choice(x: np.ndarray, num_samples: int) -> torch.Tensor: + """ Provide Numpy's "random.choice" functionality to Pytorch. + + Do not put a manual seed in this function, since we want a different + result each time we call it. + + Args: + - x: 1d Numpy array of shape (N,) to sample elements from + (with replacement). + - num_samples + + Returns: + - torch.Tensor of shape (num_samples,) + """ + # valid_idx = x.nonzero().view(-1) + # choice = torch.multinomial(valid_idx.float(), 1) + # return x[valid_idx[choice]] + + vals = np.random.choice(x, num_samples) + return torch.from_numpy(vals) + + +def get_merged_pair_embeddings(pos_pair_info, neg_pair_info, embedding): + """ + Given indices positive pairs (a,b) and negative pairs (a,b), + obtain paired embeddings (stacked together). + + Args: + - pos_pair_info: (N,6) array representing (bi,hi,wi, bj,hj,wj) + where these represent (batch index, row index, column index) + into a NCHW tensor for paired samples A and B. + - neg_pair_info: (M,6) array, as above. + - embedding: (N,C,H,W) array representing output of a + feature extractor backbone, e.g. ResNet. + + Returns: + - y_c: (N+M) array representing binary same-class (1) vs. + different class (0) samples. + - a_embedding: (N+M,C) array + - b_embedding: (N+M,C) array + """ + device = embedding.device + + n_pos = pos_pair_info.shape[0] + n_neg = neg_pair_info.shape[0] + y_c = torch.zeros(n_pos + n_neg, dtype=torch.float32) + y_c[:n_pos] = 1.0 # means belong to same semantic class + + y_c = y_c.to(device) # Make sure in same memory as embedding (CPU or GPU) + + a_pos_embedding, b_pos_embedding = get_pair_embedding(pos_pair_info, embedding) + a_neg_embedding, b_neg_embedding = get_pair_embedding(neg_pair_info, embedding) + + a_embedding = torch.cat([a_pos_embedding, a_neg_embedding]) + b_embedding = torch.cat([b_pos_embedding, b_neg_embedding]) + + return y_c, a_embedding, b_embedding + + +def get_pair_embedding( + pair_info: torch.Tensor, + embedding: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + We are working with N pairs, the k'th pair is (a_k,b_k). + + Args: + - pair_info: (N,6) array representing (bi,hi,wi, bj,hj,wj) + where these represent (batch index, row index, column index) + into a NCHW tensor for paired samples A and B. + - embedding: NCHW tensor representing a minibatch of per-pixel embeddings + + Returns: + - a_embedding: (N,C) array representing channels at pixel (i,j) + of specific minibatch examples + - b_embedding: As above. + """ + bi = pair_info[:,0] + hi = pair_info[:,1] + wi = pair_info[:,2] + + bj = pair_info[:,3] + hj = pair_info[:,4] + wj = pair_info[:,5] + + a_embedding = embedding[bi,:,hi,wi] + b_embedding = embedding[bj,:,hj,wj] + return a_embedding, b_embedding + diff --git a/mseg_semantic/multiobjective_opt/README.md b/mseg_semantic/multiobjective_opt/README.md new file mode 100755 index 0000000..e3fb7ef --- /dev/null +++ b/mseg_semantic/multiobjective_opt/README.md @@ -0,0 +1,18 @@ + +## Multi-Objective Optimization Implementation + +As discussed in the [MSeg paper](), we apply a state-of-the-art multi-task learning algorithm, MGDA, [1] to MSeg. Performance on various datasets (representing diverse domains) can be viewed as different tasks in a multi-task learning framework. Although these different tasks may conflict (which would require a trade-off), a common compromise is to optimize a proxy objective that minimizes a weighted linear combination of per-task losses. + + +The main idea of the Multiple Gradient Descent Algorithm (MGDA is instead of heuristically setting such weights, at each iteration solve a small subproblem to find the pareto optimal weight setting. In each iteration, a loss function and loss function gradient is evaluated independently for each dataset. A gradient descent direction is obtained as a convex combination of these various loss gradients. + +We make a few changes to the original implementation: +1. Since we need as many backward passes as we have tasks, we simply put each task in its own process in the Pytorch [DDP](https://pytorch.org/docs/master/notes/ddp.html) framework. +2. In order to prevent synchronization of DDP processes, we use the `ddp.no_sync()` context, before the `loss.backward()` call. + +The implementation is found in the following files: +- dist_mgda_utils.py: Handles the gathering of gradients across processes and forms convex combination of per-task gradients. +- min_norm_solvers.py: Computes pareto optimal weights per iteration using Frank-Wolfe optimization. + +[1] Ozan Sener and Vladlen Koltun. [Multi-task learning as multi-objective optimization.](https://arxiv.org/abs/1810.04650) In NeurIPS. 2018 + diff --git a/mseg_semantic/multiobjective_opt/dist_mgda_utils.py b/mseg_semantic/multiobjective_opt/dist_mgda_utils.py new file mode 100755 index 0000000..cb83607 --- /dev/null +++ b/mseg_semantic/multiobjective_opt/dist_mgda_utils.py @@ -0,0 +1,214 @@ +#!/usr/bin/python3 + +from collections import defaultdict +import logging +import numpy as np +import os +import pdb +import time +import torch +import torch.distributed as dist + +from typing import List, Mapping + +from mseg_semantic.multiobjective_opt.min_norm_solvers import MinNormSolver +from mseg_semantic.multiobjective_opt.min_norm_solvers_new import MinNormSolver as MinNormSolverNew + + + +def scale_loss_and_gradients(loss: torch.Tensor, optimizer, model, args) -> torch.Tensor: + """ + MGDA --> use Frank-Wolfe iteration to compute scales. + + Find min_norm_element() often takes around 0.51 seconds. + + Args: + - loss: Pytorch tensor + - optimizer: torch.optim object + - model: Network passed by reference + - args + + Returns: + - loss: Pytorch tensor + """ + dataset_names = list(args.dataset_gpu_mapping.keys()) + loss_i_tensor_list = all_gather_create_tensor_list(tensor=loss, ngpus_per_node=args.ngpus_per_node) + dataset_loss_dict = reduce_to_dict_per_dataset(loss_i_tensor_list, args.dataset_gpu_mapping) + + optimizer.zero_grad() + # Independent: each process will only have gradients with respect to its own subset of the minibatch + + # Under ddp.no_sync() context, this is doing an independent backward op + assert not model.require_backward_grad_sync + loss.backward() + + per_dataset_per_param_dict = {} + # list of all gradients, per each dataset + dataset_allgrads = defaultdict(list) + # accumulate the gradients per each task + +######################################## print out unsynced gradients + # for p_name, param in model.named_parameters(): + # if param.grad is not None: + # # grad_i_tensor_list = all_gather_create_tensor_list(tensor=param.grad, ngpus_per_node=args.ngpus_per_node) + # #print(f'grad_i_tensor_list for {p_name}: ', grad_i_tensor_list) + # # dataset_grad_p_dict = reduce_to_dict_per_dataset(grad_i_tensor_list, args.dataset_gpu_mapping) + # # per_dataset_per_param_dict[p_name] = dataset_grad_p_dict + # for dname in dataset_names: + # dataset_allgrads[dname] += [param.grad.clone().flatten()] # TODO: remove the flatten?? + # for dname in dataset_names: + # dataset_allgrads[dname] = torch.cat(dataset_allgrads[dname]) + + # for dname in dataset_names: + # norm = torch.norm(dataset_allgrads[dname]).item() + # args.logger.info(f'rank: {args.rank}, {dname}: norm {norm}') + # no need to sort these now, names are unique +########################################## + dataset_allgrads = defaultdict(list) + for p_name, param in model.named_parameters(): + if param.grad is not None: + grad_i_tensor_list = all_gather_create_tensor_list(tensor=param.grad, ngpus_per_node=args.ngpus_per_node) + #print(f'grad_i_tensor_list for {p_name}: ', grad_i_tensor_list) + dataset_grad_p_dict = reduce_to_dict_per_dataset(grad_i_tensor_list, args.dataset_gpu_mapping) + per_dataset_per_param_dict[p_name] = dataset_grad_p_dict + + for dname in dataset_names: + dataset_allgrads[dname] += [dataset_grad_p_dict[dname].clone().flatten()] # TODO: remove the flatten?? + + current_ns_time = lambda: int(round(time.time() * 1e9)) + + scales = {} + + # sol, min_norm = MinNormSolver.find_min_norm_element([dataset_allgrads[d] for d in dataset_names]) + # for i, d in enumerate(dataset_names): + # scales[d] = float(sol[i]) + # args.logger.info(f'{d}, {scales[d]}') + + for dname in dataset_names: + dataset_allgrads[dname] = torch.cat(dataset_allgrads[dname]) + + # Optionally, could normalize all gradients here. + for dname, grad_list in dataset_allgrads.items(): + _, grad_norm = normalize_tensor_list(grad_list) # dataset_allgrads[dname] + if dist.get_rank() == 0: + print(f'Gradient norms: {dname}: $ {grad_norm:.2f} $, ns = $ {current_ns_time()} $') + + # args.logger.info(dataset_names) + # args.logger.info(dataset_allgrads.keys()) + + + sol, min_norm = MinNormSolverNew.find_min_norm_element([dataset_allgrads[d] for d in dataset_names]) + for i, d in enumerate(dataset_names): + scales[d] = float(sol[i]) + + # args.logger.info(f'{scales}') + + # Scaled back-propagation, we must preserve gradients so we will not call optimizer.zero_grad() again + for p_name, param in model.named_parameters(): + if param.grad is not None: + # Instead of a second backward pass, just use the results of the original backward pass + param.grad = scaled_reduce_dict_to_tensor(per_dataset_per_param_dict[p_name], dataset_names, scales) + + # Multi-task loss -- adding each dataset's scaled loss. + loss = scaled_reduce_dict_to_tensor(dataset_loss_dict, dataset_names, scales) + return loss, scales + + +def reduce_to_dict_per_dataset(tensor_list: List[torch.Tensor], dataset_gpu_mapping: Mapping[str,int]): + """ + Reduce a list to a dictionary. Take an average of gradient values, or an average of losses. + Otherwise loss (and thus gradients) would be larger for whichever dataset gets the most GPUs. + + Args: + - tensor_list, where i'th element comes from a specific GPU + + Returns: + - dataset_tensor_dict: reduced tensors, reduced from corresponding indices i. + """ + assert len(tensor_list) > 0 + + item0 = tensor_list[0] + dataset_tensor_dict = { dataset_name: torch.zeros_like(item0) for dataset_name in dataset_gpu_mapping.keys() } + + for dname, gpu_idxs in dataset_gpu_mapping.items(): + for gpu_idx in gpu_idxs: + dataset_tensor_dict[dname] += tensor_list[gpu_idx] + dataset_tensor_dict[dname] /= (1. * len(gpu_idxs)) + + return dataset_tensor_dict + + +def scaled_reduce_dict_to_tensor(dataset_grad_p_dict: Mapping[str,torch.Tensor], dataset_names: List[str], scales=Mapping[str,float]): + """ + Reduce a dictionary to a single tensor, scaling values in linear combination. + + Args: + - dataset_grad_p_dict + - dataset_names + - scales + + Returns: + - sum_tensor + """ + assert len(dataset_grad_p_dict.values()) > 0 + + item0 = list(dataset_grad_p_dict.values())[0] + sum_tensor = torch.zeros_like(item0) + for dname in dataset_names: + sum_tensor += scales[dname] * dataset_grad_p_dict[dname] + + return sum_tensor + + +def all_gather_create_tensor_list(tensor: torch.Tensor, ngpus_per_node: int) -> List[torch.Tensor]: + """ + torch.distributed.all_gather() is SYNCHRONOUS, i.e. `async_op=False` by default. + This ensures a barrier. + + Args: + - tensor + + Returns: + - tensor_list + """ + # tensor_list -> Output list. It should contain correctly-sized tensors to be used + # for output of the collective. + tensor_list = [ torch.zeros_like(tensor) for _ in range(ngpus_per_node) ] + # Gathers tensors from the whole group in a list. + # The variable `tensor` will not be affected by this operation. + dist.all_gather(tensor_list=tensor_list, tensor=tensor) + return tensor_list + + +def dump_tensor_list_to_disk(tensor_list): + """ + """ + num_tensors = len(tensor_list) + print(f'Saving {num_tensors} tensors to disk') + + +def normalize_tensor_list(tensor): + """ + Args: + - tensor_list: unnnormalized tensor + + Returns: + - tensor: normalized tensor + - norm: norm of vector representing vstacked list + """ + norm = torch.norm(tensor) + return tensor / norm, norm + + +def get_tensor_list_norm(tensor_list: List[torch.Tensor]): + """ Compute the norm of a stacked list of 1d tensors. + + Args: + - tensor_list: + + Returns: + - float representing value of norm + """ + # return torch.norm(torch.cat(tensor_list, dim=0)) + return torch.norm(tensor_list) + diff --git a/mseg_semantic/multiobjective_opt/gradient_analysis.py b/mseg_semantic/multiobjective_opt/gradient_analysis.py new file mode 100755 index 0000000..26bb4ca --- /dev/null +++ b/mseg_semantic/multiobjective_opt/gradient_analysis.py @@ -0,0 +1,419 @@ +#!/usr/bin/python3 + +from collections import defaultdict +import matplotlib.pyplot as plt +import numpy as np +import pdb + + +def read_txt_lines(fpath): + """ + """ + with open(fpath, 'r') as f: + return f.readlines() + + +def parse_norms_and_scales(fpath: str): + """ + Args: + - fpath: path to log file + + Returns: + - None + """ + norm_lists = defaultdict(list) + scales_lists = defaultdict(list) + + txt_lines = read_txt_lines(fpath) + for line in txt_lines: + if '$' in line: + norm, timestamp, dname = parse_norm_line(line) + norm_lists[dname] += [(timestamp,norm)] + if 'Scales' in line: + scales_map = parse_scales_line(line) + for k,v in scales_map.items(): + scales_lists[k] += [v] + + norm_lists = sort_tuple_lists_by_timestamp(norm_lists) + + for dname, norm_list in norm_lists.items(): + timestamps,norms = list(zip(*norm_list)) + norm_lists[dname] = norms + + plot_lists_single_plot(norm_lists, xlabel="Iteration",ylabel="Gradient Norm") + plot_lists_multiple_subplots(norm_lists, xlabel="Iteration",ylabel="Gradient Norm") + + plot_lists_single_plot(scales_lists, xlabel="Iteration",ylabel="MGDA Scale") + plot_lists_multiple_subplots(scales_lists, xlabel="Iteration",ylabel="MGDA Scale") + + +def plot_lists_single_plot(val_lists, xlabel, ylabel): + """ + Args: + - val_lists + - xlabel: + - ylabel: + + Returns: + - None + """ + # Use Shared Plots + fig= plt.figure(dpi=200, facecolor='white') + for dname, val_list in val_lists.items(): + plt.plot(range(len(val_list)), val_list, label=dname) + # plt.plot(range(len(val_list)), val_list, 0.1, marker='.', label=dname) + + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.legend(loc='upper left') + fig.tight_layout(pad=4) + plt.show() #savefig('fig.pdf') + + +def plot_lists_multiple_subplots(val_lists, xlabel, ylabel): + """ + Args: + - val_lists + - xlabel + - ylabel + + Returns: + - None + """ + # Use Individual Plots + fig= plt.figure(dpi=200, facecolor='white') + subplot_counter = 1 + axes =[] + for dname, val_list in val_lists.items(): + if subplot_counter == 1: + axes += [ plt.subplot(4,1,subplot_counter) ] + else: + axes += [ plt.subplot(4,1,subplot_counter, sharex=axes[0], sharey=axes[0]) ] + plt.plot(range(len(val_list)), val_list, label=dname) + plt.xlabel(xlabel ) + plt.ylabel(ylabel) + plt.title(dname) + subplot_counter += 1 + + plt.show() + + +def parse_norm_line(line): + """ + Args: + - line + + Returns: + - norm + - timestamp + - dname + """ + def find_next(str, token='$'): + return str.find(token) + + dname = line[find_next(line, ':')+1:] + dname = dname[:find_next(dname, ':')] + + k = find_next(line) + line = line[k+1:] + norm_str = line[1:find_next(line)] + line = line[find_next(line)+1:] + line = line[find_next(line)+1:] + time_str = line[1:find_next(line)] + + norm = float(norm_str) + timestamp = float(time_str) + + return norm, timestamp, dname.strip() + +def parse_scales_line(line): + """ + Args: + - line: + + Returns: + - scales_dict + """ + def advance_past_token(str, token): + return str[str.find(token) + len(token):] + + scales_dict = {} + line = advance_past_token(line, 'Scales:') + pair_str = line.split(',') + for pair_str in pair_str: + dname, scale = pair_str.split(':') + scales_dict[dname.strip()] = float(scale) + return scales_dict + + +def test_parse_norm_line_1(): + """ + """ + line = 'Gradient norms: ade20k-v1-qvga: $ 11.55 $, ns = $ 1569682972195191808 $' + norm, timestamp, dname = parse_norm_line(line) + assert dname == 'ade20k-v1-qvga' + assert timestamp == 1569682972195191808 + assert norm == 11.55 + + +def test_parse_norm_line_2(): + """ + """ + line = 'Gradient norms: coco-panoptic-v1-qvga: $ 13.65 $, ns = $ 1569682976771436288 $[2019-09-28 08:02:56,933 INFO train.py line 543 91056] Scales: coco-panoptic-v1-qvga: 0.26 , mapillary_vistas_comm-qvga: 0.21 , ade20k-v1-qvga: 0.23 , interiornet-37cls-qvga: 0.29' + norm, timestamp, dname = parse_norm_line(line) + assert dname == 'coco-panoptic-v1-qvga' + assert timestamp == 1569682976771436288 + assert norm == 13.65 + + +def test_parse_scales_line_1(): + """ + """ + line = '[2019-09-28 08:02:58,476 INFO train.py line 543 91056] Scales: coco-panoptic-v1-qvga: 0.28 , mapillary_vistas_comm-qvga: 0.20 , ade20k-v1-qvga: 0.24 , interiornet-37cls-qvga: 0.28' + scales_dict = parse_scales_line(line) + gt_scales_dict = { + 'coco-panoptic-v1-qvga': 0.28 , + 'mapillary_vistas_comm-qvga': 0.20 , + 'ade20k-v1-qvga': 0.24 , + 'interiornet-37cls-qvga': 0.28 + } + assert_dict_equal(scales_dict, gt_scales_dict) + +def assert_dict_equal(dict1, dict2): + """ + """ + assert set(dict1.keys()) == set(dict2.keys()) + for k, v in dict1.items(): + assert v == dict2[k] + + + +def test_parse_scales_line_2(): + """ + """ + line = 'Gradient norms: coco-panoptic-v1-qvga: $ 13.65 $, ns = $ 1569682976771436288 $[2019-09-28 08:02:56,933 INFO train.py line 543 91056] Scales: coco-panoptic-v1-qvga: 0.26 , mapillary_vistas_comm-qvga: 0.21 , ade20k-v1-qvga: 0.23 , interiornet-37cls-qvga: 0.29' + scales_dict = parse_scales_line(line) + gt_scales_dict = { + 'coco-panoptic-v1-qvga': 0.26, + 'mapillary_vistas_comm-qvga': 0.21, + 'ade20k-v1-qvga': 0.23, + 'interiornet-37cls-qvga': 0.29 + } + assert_dict_equal(scales_dict, gt_scales_dict) + + + +def sort_tuple_lists_by_timestamp(norm_lists): + """ + """ + get_timestamp = lambda pair: pair[0] + for k, norm_list in norm_lists.items(): + norm_lists[k] = sorted(norm_list, key=get_timestamp) + + + return norm_lists + + + +def test_sort_tuple_lists_by_timestamp(): + """ """ + norm_lists = { + # tuple has order (timestamp, norm) + 'a': [(1, 3.5), (3, 1.5), (2, 0.5)], + 'b': [(4,0.6), (0, 1.6), (5, 2.6)] + } + + sorted_lists = sort_tuple_lists_by_timestamp(norm_lists) + gt_sorted_lists = { + 'a': [(1, 3.5), (2, 0.5), (3, 1.5)], + 'b': [(0, 1.6), (4, 0.6), (5, 2.6)] + } + assert_dict_equal(sorted_lists, gt_sorted_lists) + + + + +def visualize_losses(): + """ + Get the train loss values from each training run (saved in SLURM output + scripts) and plot them. + """ + expname_to_fname_dict = { + 'camvid-qvga-50epochs-bs16-nomgda' : 'slurm-130924.out', + 'nyudepthv2-36-qvga-50epochs-nomgda-bs16' : 'slurm-138433.out', + 'A-C-M-mgda-10-epochs-6-gpus' : 'slurm-139445.out', + 'A-C-M-I-mgda-3epochs-bs128' : 'slurm-139759.out', # scales uniform after 10% + 'C-no-mgda-bs-32-10epochs' : 'slurm-140714.out', + 'A-C-M-I-3Iepochs-normalize_before_FW-mgda-bs128' : 'slurm-140886.out', + 'A-C-M-I-mgda-3epochs-bs128-nomgda' : 'slurm-140963.out', + 'A-C-M-I-3epochs_2gpus_each_bs128-normalizeunitbeforeFW-mgda-lr1' : 'slurm-141004.out', + 'A-C-M-I-NOMGDA-12epochs_2gpus_each_bs128' : 'slurm-141015.out', + 'A-C-M-I-6epochs_2gpus_each_bs128_crop201_no_mgda-crashed' : 'slurm-141016.out', + 'completed-A-C-M-I-NOMGDA-12epochs_2gpus_each_bs128' : 'slurm-141134.out', + 'A-C-M-I-6epochs_2gpus_each_bs128-no_mgda' : 'slurm-141135.out', + 'A-C-M-I-24epochs_2gpus_each_bs128-no_mgda' : 'slurm-141142.out', + 'A-C-M-I-3epochs_2gpus_each_bs256_no_mgda_lrpoint01' : 'slurm-141362.out', + 'A-C-M-I-3epochs_2gpus_each_bs256-no_mgda_lr1' : 'slurm-141363.out', + 'A-C-M-I-3epochs-2gpus_each_bs256_no_mgda_lrpoint1' : 'slurm-141364.out', + 'A-C-M-I-3epochs_2gpus_each_bs128_no_mgda_lrpoint1' : 'slurm-141365.out', + 'A-C-M-I-3epochs_1gpu_each_bs64_crop201_no_mgda_lrpoint01' : 'slurm-141375.out', + 'A-C-M-I-3epochs_1gpu_each_bs64_crop201_no_mgda_lrpoint001' : 'slurm-141376.out', + 'A-C-M-I-3epochs_1gpu_each_bs64_no_mgda_lrpoint001' : 'slurm-141377.out', + 'A-C-M-I-3epochs_1gpu_each_bs32_no_mgda_lrpoint01' : 'slurm-141378.out', + 'A-C-M-I-3epochs_1gpu_each_bs32_no_mgda_lrpoint001' : 'slurm-141379.out', + } + + SLURM_FILE_DIR = '/Users/johnlamb/Documents/SLURM_FILES' + + for expname, fname in expname_to_fname_dict.items(): + metrics_dict = defaultdict(list) + fpath = f'{SLURM_FILE_DIR}/{fname}' + txt_lines = read_txt_lines(fpath) + for line in txt_lines: + if 'MainLoss' not in line: + continue + MainLoss, AuxLoss, Loss, Accuracy = parse_iter_info_line(line) + metrics_dict['MainLoss'] += [MainLoss] + metrics_dict['AuxLoss'] += [AuxLoss] + metrics_dict['Loss'] += [Loss] + metrics_dict['Accuracy'] += [Accuracy] + + plot_sublots_with_metrics(expname, metrics_dict) + + +def plot_sublots_with_metrics(expname: str, metrics_dict: Mapping[str, List[float]] ): + """ + Render or save a plot of training metrics (e.g. training loss, + training accuracy). Share the x-axis, representing training iterations, + but use different y-axes for different quantities. + + Args: + - metrics_dict: Dictionary mapping the name of a metric to a list + of values. + + Returns: + - None + """ + subplot_counter = 1 + fig = plt.figure(dpi=200, facecolor='white') + + axes = [] + for metric, val_list in metrics_dict.items(): + if subplot_counter == 1: + axes += [ plt.subplot(4,1,subplot_counter) ] + plt.title(expname) + else: + axes += [ plt.subplot(4,1,subplot_counter, sharex=axes[0]) ] + plt.plot(range(len(val_list)), val_list, label=metric) + xlabel = 'iter' + plt.xlabel(xlabel) + ylabel = metric + plt.ylabel(ylabel) + subplot_counter += 1 + + #plt.show() + plt.savefig(f'loss_plots/{expname}.png') + + + +def parse_iter_info_line(line: str) -> Tuple[float,float,float,float]: + """ + Args: + - line: string representing output file line + + Returns: + - MainLoss: float representing PSPNet CE primary loss value + - AuxLoss: float representing PSPNet CE auxiliary loss value + - Loss: float representing combined loss + - Accuracy: float representing pixel accuracy + """ + MainLoss = get_substr(line, start_token='MainLoss', end_token='AuxLoss') + AuxLoss = get_substr(line, start_token='AuxLoss', end_token='Loss') + Loss = get_substr(line, start_token=' Loss', end_token='Accuracy') + Accuracy = get_substr(line, start_token='Accuracy', end_token='.current_iter', alt_end_token='.\n') + return MainLoss, AuxLoss, Loss, Accuracy + + +def get_substr(line: str, start_token: str, end_token: str, alt_end_token: str = None) -> float: + """ + Search a string for a substring that will be contained between two specified tokens. + If the end token may not be always found in the string, an alternate end token can be + provided as well. + + Args: + - line: string representing line of text + - start_token: string + - end_token: string + - alt_end_token: string + + Returns: + - val: floating point number retrieved + """ + i = line.find(start_token) + j = i + len(start_token) + + # `rel_line` is relevant portion of line + rel_line = line[j:] + + if end_token not in rel_line: + rel_line += '\n' + end_token = alt_end_token + k = rel_line.find(end_token) + val = rel_line[:k] + + return float(val) + + + +def test_parse_iter_info_line(): + """ + 3 Simple test cases to make sure that we can parse file lines appropriately. + """ + line = '[2019-10-05 07:09:13,411 INFO train.py line 538 112397] Epoch: [101/101][280/281] Data 0.000 (0.072) Batch 0.812 (0.909) Remain 00:01:30 MainLoss 3.3073 AuxLoss 3.3141 Loss 4.6329 Accuracy 0.1890.current_iter: 28380' + MainLoss, AuxLoss, Loss, Accuracy = parse_iter_info_line(line) + assert MainLoss == 3.3073 + assert AuxLoss == 3.3141 + assert Loss == 4.6329 + assert Accuracy == 0.1890 + + line = '[2019-10-02 15:55:39,707 INFO train.py line 538 27363] Epoch: [2/124][2380/3696] Data 0.000 (0.010) Batch 0.775 (0.763) Remain 95:14:16 MainLoss 0.7233 AuxLoss 1.0095 Loss 1.1271 Accuracy 0.7905.current_iter: 6076' + MainLoss, AuxLoss, Loss, Accuracy = parse_iter_info_line(line) + assert MainLoss == 0.7233 + assert AuxLoss == 1.0095 + assert Loss == 1.1271 + assert Accuracy == 0.7905 + + line = '[2019-09-06 03:12:48,857 INFO train.py line 480 43364] Epoch: [49/50][10/23] Data 0.000 (2.382) Batch 0.220 (2.599) Remain 00:01:33 MainLoss 0.2480 AuxLoss 0.2670 Loss 0.3548 Accuracy 0.9102.' + MainLoss, AuxLoss, Loss, Accuracy = parse_iter_info_line(line) + assert MainLoss == 0.2480 + assert AuxLoss == 0.2670 + assert Loss == 0.3548 + assert Accuracy == 0.9102 + + print('All tests passed.') + + +if __name__ == '__main__': + + # FILE BELOW WAS WHEN I NORMALIZED TO TO UNIT LENGTH + # fpath = '/Users/johnlamb/Documents/train-20190928_080102.log' + #fpath = '/Users/johnlamb/Documents/train-20190928_095930.log' # training A/C/M/I 3 I epochs w/ unit normalization + + # normalize to unit length, but increase learning rate + fpath = '/Users/johnlamb/Documents/slurm-141004.out' + + # FILE BELOW WAS WHEN I DO NOT NORMALIZE TO UNIT LENGTH + #fpath = '/Users/johnlamb/Documents/train-20190928_093558.log' + + #parse_norms_and_scales(fpath) + + # test_parse_norm_line_1() + # test_parse_norm_line_2() + # test_parse_scales_line_1() + # test_parse_scales_line_2() + # test_sort_tuple_lists_by_timestamp() + + # visualize_losses() + #test_parse_iter_info_line() + + diff --git a/mseg_semantic/multiobjective_opt/mgda_workbook.py b/mseg_semantic/multiobjective_opt/mgda_workbook.py new file mode 100755 index 0000000..2b3085f --- /dev/null +++ b/mseg_semantic/multiobjective_opt/mgda_workbook.py @@ -0,0 +1,67 @@ + + +import matplotlib.pyplot as plt +import numpy as np +import pdb + +LIGHT_BLUE = np.array([221, 237, 255]) / 255 + +def main(): + """ """ + v1 = np.array([-1,1]) + v2 = np.array([3,1]) + + # v1 = np.array([-2,2]) + # v2 = np.array([-1,2]) + + # v1 = np.array([2,2]) + # v2 = np.array([0.5,2]) + + plt.arrow(0,0, v1[0], v1[1], color="r", width=0.03, zorder=1.5) + plt.arrow(0,0, v2[0], v2[1], color="m", width=0.03, zorder=1.5) + + method = 'heuristic' # 'analytic' + + print('Gamma = 1: ', v2.dot(v1) >= v1.T.dot(v1)) + print('Gamma = 0: ', v2.dot(v1) >= v2.T.dot(v2)) + + if method == 'heuristic': + alphas = np.linspace(0,1,20) + p = np.zeros((20,2)) + for i, alpha in enumerate(alphas): + p[i,:] = alpha * v1 + (1-alpha) * v2 + + norms = np.linalg.norm(p, axis=1) + min_norm_idx = np.argmin(norms) + for i, alpha in enumerate(alphas): + if i == min_norm_idx: + color = 'g' + zorder = 2 + else: + color = LIGHT_BLUE + zorder = 1 + + dx = p[i,0] + dy = p[i,1] + plt.arrow(0,0, dx, dy, color=color, width=0.01, zorder=zorder) + elif method == 'analytic': + + num = (v2 - v1).T.dot(v2) + denom = np.linalg.norm(v1 - v2) ** 2 + alpha = num / denom + # clip to range [0,1] + alpha = max(min(alpha,1),0) + p = alpha * v1 + (1-alpha) * v2 + dx, dy = p + color = 'g' + zorder = 2 + plt.arrow(0,0, dx, dy, color=color, width=0.01, zorder=zorder) + + + plt.xlim([-2.5,3.5]) + plt.ylim([-0.5,2.5]) + plt.show() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/mseg_semantic/multiobjective_opt/min_norm_solvers.py b/mseg_semantic/multiobjective_opt/min_norm_solvers.py new file mode 100755 index 0000000..da01cdb --- /dev/null +++ b/mseg_semantic/multiobjective_opt/min_norm_solvers.py @@ -0,0 +1,231 @@ + + +import numpy as np +import time +import torch +import torch.distributed as dist + + +class MinNormSolver: + MAX_ITER = 250 + STOP_CRIT = 1e-5 + + def _min_norm_element_from2(v1v1, v1v2, v2v2): + """ + Analytical solution for min_{c} |cx_1 + (1-c)x_2|_2^2 + d is the distance (objective) optimzed + v1v1 = + v1v2 = + v2v2 = + + Case of just 2 tasks. + Algorithm 1 described in the paper: https://arxiv.org/pdf/1810.04650.pdf + + Args: + - v1v1: Tensor representing inner product (v1,v1) + - v1v2: Tensor representing inner product (v1,v2) + - v2v2: Tensor representing inner product (v2,v2) + + Returns: + - gamma: + - cost: + """ + if v1v2 >= v1v1: + # Case: Fig 1, third column + gamma = 0.999 + cost = v1v1 + return gamma, cost + if v1v2 >= v2v2: + # Case: Fig 1, first column + gamma = 0.001 + cost = v2v2 + return gamma, cost + # Case: Fig 1, second column + gamma = -1.0 * ( (v1v2 - v2v2) / (v1v1+v2v2 - 2*v1v2) ) + cost = v2v2 + gamma*(v1v2 - v2v2) + return gamma, cost + + def _min_norm_2d(vecs, dps): + """ + Find the minimum norm solution as combination of two points + This is correct only in 2D + ie. min_c |\sum c_i x_i|_2^2 st. \sum c_i = 1 , 1 >= c_1 >= 0 for all i, c_i + c_j = 1.0 for some i, j + + Accept Pytorch tensors as inputs, and return only Numpy arrays. + + Args: + - vecs: List[List[torch.Tensor]] + - dps: dictionary + + Returns: + - sol: Numpy array + - dps + """ + dmin = 1e8 + + num_tasks = len(vecs) + # loop over the tasks + for i in range(num_tasks): + num_params = len(vecs[i]) + # symmetric, so only look at upper triangle + for j in range(i+1, num_tasks): + if (i,j) not in dps: + dps[(i, j)] = 0.0 + for k in range(num_params): + dps[(i,j)] += torch.dot(vecs[i][k], vecs[j][k]).cpu().numpy() + # make symmetric: ij = ji + dps[(j, i)] = dps[(i, j)] + if (i,i) not in dps: + dps[(i, i)] = 0.0 + for k in range(num_params): + dps[(i,i)] += torch.dot(vecs[i][k], vecs[i][k]).cpu().numpy() + if (j,j) not in dps: + dps[(j, j)] = 0.0 + for k in range(num_params): + dps[(j, j)] += torch.dot(vecs[j][k], vecs[j][k]).cpu().numpy() + c,d = MinNormSolver._min_norm_element_from2(dps[(i,i)], dps[(i,j)], dps[(j,j)]) + if d < dmin: + dmin = d + sol = [(i,j),c,d] + return sol, dps + + def _projection2simplex(y): + """ + Given y, it solves argmin_z |y-z|_2 st \sum z = 1 , 1 >= z_i >= 0 for all i + """ + m = len(y) + sorted_y = np.flip(np.sort(y), axis=0) + tmpsum = 0.0 + tmax_f = (np.sum(y) - 1.0)/m + for i in range(m-1): + tmpsum+= sorted_y[i] + tmax = (tmpsum - 1)/ (i+1.0) + if tmax > sorted_y[i+1]: + tmax_f = tmax + break + return np.maximum(y - tmax_f, np.zeros(y.shape)) + + def _next_point(cur_val, grad, n): + proj_grad = grad - ( np.sum(grad) / n ) + tm1 = -1.0*cur_val[proj_grad<0]/proj_grad[proj_grad<0] + tm2 = (1.0 - cur_val[proj_grad>0])/(proj_grad[proj_grad>0]) + + skippers = np.sum(tm1<1e-7) + np.sum(tm2<1e-7) + t = 1 + if len(tm1[tm1>1e-7]) > 0: + t = np.min(tm1[tm1>1e-7]) + if len(tm2[tm2>1e-7]) > 0: + t = min(t, np.min(tm2[tm2>1e-7])) + + next_point = proj_grad*t + cur_val + next_point = MinNormSolver._projection2simplex(next_point) + return next_point + + def find_min_norm_element(vecs): + """ + Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull + as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1. + It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j}) + Hence, we find the best 2-task solution, and then run the projected gradient descent until convergence + """ + # Solution lying at the combination of two points + dps = {} + init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps) + + n=len(vecs) + sol_vec = np.zeros(n) + sol_vec[init_sol[0][0]] = init_sol[1] + sol_vec[init_sol[0][1]] = 1 - init_sol[1] + + if n < 3: + # This is optimal for n=2, so return the solution + return sol_vec , init_sol[2] + + iter_count = 0 + + grad_mat = np.zeros((n,n)) + for i in range(n): + for j in range(n): + grad_mat[i,j] = dps[(i, j)] + + + while iter_count < MinNormSolver.MAX_ITER: + grad_dir = -1.0*np.dot(grad_mat, sol_vec) + new_point = MinNormSolver._next_point(sol_vec, grad_dir, n) + # Re-compute the inner products for line search + v1v1 = 0.0 + v1v2 = 0.0 + v2v2 = 0.0 + for i in range(n): + for j in range(n): + v1v1 += sol_vec[i]*sol_vec[j]*dps[(i,j)] + v1v2 += sol_vec[i]*new_point[j]*dps[(i,j)] + v2v2 += new_point[i]*new_point[j]*dps[(i,j)] + nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2) + new_sol_vec = nc*sol_vec + (1-nc)*new_point + change = new_sol_vec - sol_vec + if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT: + return sol_vec, nd + sol_vec = new_sol_vec + + def find_min_norm_element_FW(vecs): + """ + Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull + as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1. + It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j}) + Hence, we find the best 2-task solution, and then run the Frank Wolfe until convergence + """ + # Solution lying at the combination of two points + dps = {} + init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps) + + n=len(vecs) + sol_vec = np.zeros(n) + sol_vec[init_sol[0][0]] = init_sol[1] + sol_vec[init_sol[0][1]] = 1 - init_sol[1] + + if n < 3: + # This is optimal for n=2, so return the solution + return sol_vec , init_sol[2] + + iter_count = 0 + + grad_mat = np.zeros((n,n)) + for i in range(n): + for j in range(n): + grad_mat[i,j] = dps[(i, j)] + + while iter_count < MinNormSolver.MAX_ITER: + t_iter = np.argmin(np.dot(grad_mat, sol_vec)) + + v1v1 = np.dot(sol_vec, np.dot(grad_mat, sol_vec)) + v1v2 = np.dot(sol_vec, grad_mat[:, t_iter]) + v2v2 = grad_mat[t_iter, t_iter] + + nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2) + new_sol_vec = nc*sol_vec + new_sol_vec[t_iter] += 1 - nc + + change = new_sol_vec - sol_vec + if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT: + return sol_vec, nd + sol_vec = new_sol_vec + + +def gradient_normalizers(grads, losses, normalization_type): + gn = {} + if normalization_type == 'l2': + for t in grads: + gn[t] = np.sqrt(np.sum([gr.pow(2).sum().data[0] for gr in grads[t]])) + elif normalization_type == 'loss': + for t in grads: + gn[t] = losses[t] + elif normalization_type == 'loss+': + for t in grads: + gn[t] = losses[t] * np.sqrt(np.sum([gr.pow(2).sum().data[0] for gr in grads[t]])) + elif normalization_type == 'none': + for t in grads: + gn[t] = 1.0 + else: + print('ERROR: Invalid Normalization Type') + return gn \ No newline at end of file diff --git a/mseg_semantic/multiobjective_opt/min_norm_solvers_new.py b/mseg_semantic/multiobjective_opt/min_norm_solvers_new.py new file mode 100755 index 0000000..386fee5 --- /dev/null +++ b/mseg_semantic/multiobjective_opt/min_norm_solvers_new.py @@ -0,0 +1,227 @@ + + +import numpy as np +import time +import torch +import torch.distributed as dist + + +class MinNormSolver: + MAX_ITER = 250 + STOP_CRIT = 1e-5 + + def _min_norm_element_from2(v1v1, v1v2, v2v2): + """ + Analytical solution for min_{c} |cx_1 + (1-c)x_2|_2^2 + d is the distance (objective) optimzed + v1v1 = + v1v2 = + v2v2 = + + Case of just 2 tasks. + Algorithm 1 described in the paper: https://arxiv.org/pdf/1810.04650.pdf + + Args: + - v1v1: Tensor representing inner product (v1,v1) + - v1v2: Tensor representing inner product (v1,v2) + - v2v2: Tensor representing inner product (v2,v2) + + Returns: + - gamma: + - cost: + """ + if v1v2 >= v1v1: + # Case: Fig 1, third column + gamma = 0.999 + cost = v1v1 + return gamma, cost + if v1v2 >= v2v2: + # Case: Fig 1, first column + gamma = 0.001 + cost = v2v2 + return gamma, cost + # Case: Fig 1, second column + gamma = -1.0 * ( (v1v2 - v2v2) / (v1v1+v2v2 - 2*v1v2) ) + cost = v2v2 + gamma*(v1v2 - v2v2) + return gamma, cost + + + def _min_norm_2d(vecs, dps): + """ + Find the minimum norm solution as combination of two points + This is correct only in 2D + ie. min_c |\sum c_i x_i|_2^2 st. \sum c_i = 1 , 1 >= c_1 >= 0 for all i, c_i + c_j = 1.0 for some i, j + """ + dmin = 1e8 + for i in range(len(vecs)): + for j in range(i + 1, len(vecs)): + if (i, j) not in dps: + dps[(i, j)] = 0.0 + # for k in range(len(vecs[i])): + # dps[(i, j)] += torch.dot(vecs[i][k], vecs[j][k]).data[0] + dps[(i, j)] = torch.dot(vecs[i], vecs[j]).item() + dps[(j, i)] = dps[(i, j)] + if (i, i) not in dps: + dps[(i, i)] = 0.0 + # for k in range(len(vecs[i])): + # dps[(i, i)] += torch.dot(vecs[i][k], vecs[i][k]).data[0] + dps[(i, i)] = torch.dot(vecs[i], vecs[i]).item() + if (j, j) not in dps: + dps[(j, j)] = 0.0 + # for k in range(len(vecs[i])): + # dps[(j, j)] += torch.dot(vecs[j][k], vecs[j][k]).data[0] + dps[(j, j)] = torch.dot(vecs[j], vecs[j]).item() + c, d = MinNormSolver._min_norm_element_from2(dps[(i, i)], dps[(i, j)], dps[(j, j)]) + if d < dmin: + dmin = d + sol = [(i, j), c, d] + return sol, dps + + def _projection2simplex(y): + """ + Given y, it solves argmin_z |y-z|_2 st \sum z = 1 , 1 >= z_i >= 0 for all i + """ + m = len(y) + sorted_y = np.flip(np.sort(y), axis=0) + tmpsum = 0.0 + tmax_f = (np.sum(y) - 1.0)/m + for i in range(m-1): + tmpsum+= sorted_y[i] + tmax = (tmpsum - 1)/ (i+1.0) + if tmax > sorted_y[i+1]: + tmax_f = tmax + break + return np.maximum(y - tmax_f, np.zeros(y.shape)) + + def _next_point(cur_val, grad, n): + proj_grad = grad - ( np.sum(grad) / n ) + tm1 = -1.0*cur_val[proj_grad<0]/proj_grad[proj_grad<0] + tm2 = (1.0 - cur_val[proj_grad>0])/(proj_grad[proj_grad>0]) + + skippers = np.sum(tm1<1e-7) + np.sum(tm2<1e-7) + t = 1 + if len(tm1[tm1>1e-7]) > 0: + t = np.min(tm1[tm1>1e-7]) + if len(tm2[tm2>1e-7]) > 0: + t = min(t, np.min(tm2[tm2>1e-7])) + + next_point = proj_grad*t + cur_val + next_point = MinNormSolver._projection2simplex(next_point) + return next_point + + def find_min_norm_element(vecs): + """ + Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull + as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1. + It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j}) + Hence, we find the best 2-task solution, and then run the projected gradient descent until convergence + """ + # Solution lying at the combination of two points + dps = {} + init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps) + + # print('init_sol:', init_sol) + # print('dps:', dps) + + n=len(vecs) + sol_vec = np.zeros(n) + sol_vec[init_sol[0][0]] = init_sol[1] + sol_vec[init_sol[0][1]] = 1 - init_sol[1] + + sol_vec = np.ones(n) / n # uniform + # sol_vec = np.array([0.49, 0.01, 0.49, 0.01]) # give coco and ade more weights. + + # print('sol_vec:', sol_vec) + + if n < 3: + # This is optimal for n=2, so return the solution + return sol_vec , init_sol[2] + + iter_count = 0 + + grad_mat = np.zeros((n,n)) + for i in range(n): + for j in range(n): + grad_mat[i,j] = dps[(i, j)] + + + while iter_count < MinNormSolver.MAX_ITER: + grad_dir = -1.0*np.dot(grad_mat, sol_vec) + new_point = MinNormSolver._next_point(sol_vec, grad_dir, n) + # Re-compute the inner products for line search + v1v1 = 0.0 + v1v2 = 0.0 + v2v2 = 0.0 + for i in range(n): + for j in range(n): + v1v1 += sol_vec[i]*sol_vec[j]*dps[(i,j)] + v1v2 += sol_vec[i]*new_point[j]*dps[(i,j)] + v2v2 += new_point[i]*new_point[j]*dps[(i,j)] + nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2) + new_sol_vec = nc*sol_vec + (1-nc)*new_point + change = new_sol_vec - sol_vec + if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT: + return sol_vec, nd + sol_vec = new_sol_vec + + def find_min_norm_element_FW(vecs): + """ + Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull + as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1. + It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j}) + Hence, we find the best 2-task solution, and then run the Frank Wolfe until convergence + """ + # Solution lying at the combination of two points + dps = {} + init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps) + + n=len(vecs) + sol_vec = np.zeros(n) + sol_vec[init_sol[0][0]] = init_sol[1] + sol_vec[init_sol[0][1]] = 1 - init_sol[1] + + if n < 3: + # This is optimal for n=2, so return the solution + return sol_vec , init_sol[2] + + iter_count = 0 + + grad_mat = np.zeros((n,n)) + for i in range(n): + for j in range(n): + grad_mat[i,j] = dps[(i, j)] + + while iter_count < MinNormSolver.MAX_ITER: + t_iter = np.argmin(np.dot(grad_mat, sol_vec)) + + v1v1 = np.dot(sol_vec, np.dot(grad_mat, sol_vec)) + v1v2 = np.dot(sol_vec, grad_mat[:, t_iter]) + v2v2 = grad_mat[t_iter, t_iter] + + nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2) + new_sol_vec = nc*sol_vec + new_sol_vec[t_iter] += 1 - nc + + change = new_sol_vec - sol_vec + if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT: + return sol_vec, nd + sol_vec = new_sol_vec + + +def gradient_normalizers(grads, losses, normalization_type): + gn = {} + if normalization_type == 'l2': + for t in grads: + gn[t] = np.sqrt(np.sum([gr.pow(2).sum().data[0] for gr in grads[t]])) + elif normalization_type == 'loss': + for t in grads: + gn[t] = losses[t] + elif normalization_type == 'loss+': + for t in grads: + gn[t] = losses[t] * np.sqrt(np.sum([gr.pow(2).sum().data[0] for gr in grads[t]])) + elif normalization_type == 'none': + for t in grads: + gn[t] = 1.0 + else: + print('ERROR: Invalid Normalization Type') + return gn \ No newline at end of file diff --git a/mseg_semantic/multiobjective_opt/min_norm_solvers_numpy.py b/mseg_semantic/multiobjective_opt/min_norm_solvers_numpy.py new file mode 100755 index 0000000..85e6e3a --- /dev/null +++ b/mseg_semantic/multiobjective_opt/min_norm_solvers_numpy.py @@ -0,0 +1,176 @@ +import numpy as np + +class MinNormSolverNumpy: + MAX_ITER = 250 + STOP_CRIT = 1e-6 + + def _min_norm_element_from2(v1v1, v1v2, v2v2): + """ + Analytical solution for min_{c} |cx_1 + (1-c)x_2|_2^2 + d is the distance (objective) optimzed + v1v1 = + v1v2 = + v2v2 = + """ + if v1v2 >= v1v1: + # Case: Fig 1, third column + gamma = 0.999 + cost = v1v1 + return gamma, cost + if v1v2 >= v2v2: + # Case: Fig 1, first column + gamma = 0.001 + cost = v2v2 + return gamma, cost + # Case: Fig 1, second column + gamma = -1.0 * ( (v1v2 - v2v2) / (v1v1+v2v2 - 2*v1v2) ) + cost = v2v2 + gamma*(v1v2 - v2v2) + return gamma, cost + + def _min_norm_2d(vecs, dps): + """ + Find the minimum norm solution as combination of two points + This solution is correct if vectors(gradients) lie in 2D + ie. min_c |\sum c_i x_i|_2^2 st. \sum c_i = 1 , 1 >= c_1 >= 0 for all i, c_i + c_j = 1.0 for some i, j + """ + dmin = 1e8 + for i in range(len(vecs)): + for j in range(i+1,len(vecs)): + if (i,j) not in dps: + dps[(i, j)] = 0.0 + dps[(i,j)] = np.dot(vecs[i], vecs[j]) + dps[(j, i)] = dps[(i, j)] + if (i,i) not in dps: + dps[(i, i)] = 0.0 + dps[(i,i)] = np.dot(vecs[i], vecs[i]) + if (j,j) not in dps: + dps[(j, j)] = 0.0 + dps[(j, j)] = np.dot(vecs[j], vecs[j]) + c,d = MinNormSolver._min_norm_element_from2(dps[(i,i)], dps[(i,j)], dps[(j,j)]) + if d < dmin: + dmin = d + sol = [(i,j),c,d] + return sol, dps + + def _projection2simplex(y): + """ + Given y, it solves argmin_z |y-z|_2 st \sum z = 1 , 1 >= z_i >= 0 for all i + """ + m = len(y) + sorted_y = np.flip(np.sort(y), axis=0) + tmpsum = 0.0 + tmax_f = (np.sum(y) - 1.0)/m + for i in range(m-1): + tmpsum+= sorted_y[i] + tmax = (tmpsum - 1)/ (i+1.0) + if tmax > sorted_y[i+1]: + tmax_f = tmax + break + return np.maximum(y - tmax_f, np.zeros(y.shape)) + + def _next_point(cur_val, grad, n): + proj_grad = grad - ( np.sum(grad) / n ) + tm1 = -1.0*cur_val[proj_grad<0]/proj_grad[proj_grad<0] + tm2 = (1.0 - cur_val[proj_grad>0])/(proj_grad[proj_grad>0]) + + skippers = np.sum(tm1<1e-7) + np.sum(tm2<1e-7) + t = 1 + if len(tm1[tm1>1e-7]) > 0: + t = np.min(tm1[tm1>1e-7]) + if len(tm2[tm2>1e-7]) > 0: + t = min(t, np.min(tm2[tm2>1e-7])) + + next_point = proj_grad*t + cur_val + next_point = MinNormSolver._projection2simplex(next_point) + return next_point + + def find_min_norm_element(vecs): + """ + Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull + as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1. + It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j}) + Hence, we find the best 2-task solution, and then run the projected gradient descent until convergence + """ + # Solution lying at the combination of two points + dps = {} + init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps) + + n=len(vecs) + sol_vec = np.zeros(n) + sol_vec[init_sol[0][0]] = init_sol[1] + sol_vec[init_sol[0][1]] = 1 - init_sol[1] + + if n < 3: + # This is optimal for n=2, so return the solution + return sol_vec , init_sol[2] + + iter_count = 0 + + grad_mat = np.zeros((n,n)) + for i in range(n): + for j in range(n): + grad_mat[i,j] = dps[(i, j)] + + while iter_count < MinNormSolver.MAX_ITER: + grad_dir = -1.0*np.dot(grad_mat, sol_vec) + new_point = MinNormSolver._next_point(sol_vec, grad_dir, n) + # Re-compute the inner products for line search + v1v1 = 0.0 + v1v2 = 0.0 + v2v2 = 0.0 + for i in range(n): + for j in range(n): + v1v1 += sol_vec[i]*sol_vec[j]*dps[(i,j)] + v1v2 += sol_vec[i]*new_point[j]*dps[(i,j)] + v2v2 += new_point[i]*new_point[j]*dps[(i,j)] + nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2) + new_sol_vec = nc*sol_vec + (1-nc)*new_point + change = new_sol_vec - sol_vec + if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT: + return sol_vec, nd + sol_vec = new_sol_vec + return sol_vec, nd + + def find_min_norm_element_FW(vecs): + """ + Given a list of vectors (vecs), this method finds the minimum norm element in the convex hull + as min |u|_2 st. u = \sum c_i vecs[i] and \sum c_i = 1. + It is quite geometric, and the main idea is the fact that if d_{ij} = min |u|_2 st u = c x_i + (1-c) x_j; the solution lies in (0, d_{i,j}) + Hence, we find the best 2-task solution, and then run the Frank Wolfe until convergence + """ + # Solution lying at the combination of two points + dps = {} + init_sol, dps = MinNormSolver._min_norm_2d(vecs, dps) + + n=len(vecs) + sol_vec = np.zeros(n) + sol_vec[init_sol[0][0]] = init_sol[1] + sol_vec[init_sol[0][1]] = 1 - init_sol[1] + + if n < 3: + # This is optimal for n=2, so return the solution + return sol_vec , init_sol[2] + + iter_count = 0 + + grad_mat = np.zeros((n,n)) + for i in range(n): + for j in range(n): + grad_mat[i,j] = dps[(i, j)] + + while iter_count < MinNormSolver.MAX_ITER: + t_iter = np.argmin(np.dot(grad_mat, sol_vec)) + + v1v1 = np.dot(sol_vec, np.dot(grad_mat, sol_vec)) + v1v2 = np.dot(sol_vec, grad_mat[:, t_iter]) + v2v2 = grad_mat[t_iter, t_iter] + + nc, nd = MinNormSolver._min_norm_element_from2(v1v1, v1v2, v2v2) + new_sol_vec = nc*sol_vec + new_sol_vec[t_iter] += 1 - nc + + change = new_sol_vec - sol_vec + if np.sum(np.abs(change)) < MinNormSolver.STOP_CRIT: + return sol_vec, nd + sol_vec = new_sol_vec + return sol_vec, nd \ No newline at end of file diff --git a/mseg_semantic/multiobjective_opt/train_multi_task.py b/mseg_semantic/multiobjective_opt/train_multi_task.py new file mode 100755 index 0000000..bebe6ea --- /dev/null +++ b/mseg_semantic/multiobjective_opt/train_multi_task.py @@ -0,0 +1,245 @@ +import sys +import torch +import click +import json +import datetime +from timeit import default_timer as timer + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from torch.utils import data +import torchvision +import types + +from tqdm import tqdm +from tensorboardX import SummaryWriter + +from models.gradient_scaler import MinNormElement +import losses +import datasets +import metrics +import model_selector +from min_norm_solvers import MinNormSolver, gradient_normalizers + +NUM_EPOCHS = 100 + +@click.command() +@click.option('--param_file', default='params.json', help='JSON parameters file') +def train_multi_task(param_file): + with open('configs.json') as config_params: + configs = json.load(config_params) + + with open(param_file) as json_params: + params = json.load(json_params) + + + exp_identifier = [] + for (key, val) in params.items(): + if 'tasks' in key: + continue + exp_identifier+= ['{}={}'.format(key,val)] + + exp_identifier = '|'.join(exp_identifier) + params['exp_id'] = exp_identifier + + writer = SummaryWriter(log_dir='runs/{}_{}'.format(params['exp_id'], datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y"))) + + train_loader, train_dst, val_loader, val_dst = datasets.get_dataset(params, configs) + loss_fn = losses.get_loss(params) + metric = metrics.get_metrics(params) + + model = model_selector.get_model(params) + model_params = [] + for m in model: + model_params += model[m].parameters() + + if 'RMSprop' in params['optimizer']: + optimizer = torch.optim.RMSprop(model_params, lr=params['lr']) + elif 'Adam' in params['optimizer']: + optimizer = torch.optim.Adam(model_params, lr=params['lr']) + elif 'SGD' in params['optimizer']: + optimizer = torch.optim.SGD(model_params, lr=params['lr'], momentum=0.9) + + tasks = params['tasks'] + all_tasks = configs[params['dataset']]['all_tasks'] + print('Starting training with parameters \n \t{} \n'.format(str(params))) + + if 'mgda' in params['algorithm']: + approximate_norm_solution = params['use_approximation'] + if approximate_norm_solution: + print('Using approximate min-norm solver') + else: + print('Using full solver') + n_iter = 0 + loss_init = {} + for epoch in tqdm(range(NUM_EPOCHS)): + start = timer() + print('Epoch {} Started'.format(epoch)) + if (epoch+1) % 10 == 0: + # Every 50 epoch, half the LR + for param_group in optimizer.param_groups: + param_group['lr'] *= 0.85 + print('Half the learning rate{}'.format(n_iter)) + + for m in model: + model[m].train() + + for batch in train_loader: + n_iter += 1 + # First member is always images + images = batch[0] + images = Variable(images.cuda()) + + labels = {} + # Read all targets of all tasks + for i, t in enumerate(all_tasks): + if t not in tasks: + continue + labels[t] = batch[i+1] + labels[t] = Variable(labels[t].cuda()) + + # Scaling the loss functions based on the algorithm choice + loss_data = {} + grads = {} + scale = {} + mask = None + masks = {} + if 'mgda' in params['algorithm']: + # Will use our MGDA_UB if approximate_norm_solution is True. Otherwise, will use MGDA + + if approximate_norm_solution: + optimizer.zero_grad() + # First compute representations (z) + images_volatile = Variable(images.data, volatile=True) + rep, mask = model['rep'](images_volatile, mask) + # As an approximate solution we only need gradients for input + if isinstance(rep, list): + # This is a hack to handle psp-net + rep = rep[0] + rep_variable = [Variable(rep.data.clone(), requires_grad=True)] + list_rep = True + else: + rep_variable = Variable(rep.data.clone(), requires_grad=True) + list_rep = False + + # Compute gradients of each loss function wrt z + for t in tasks: + optimizer.zero_grad() + out_t, masks[t] = model[t](rep_variable, None) + loss = loss_fn[t](out_t, labels[t]) + loss_data[t] = loss.data[0] + loss.backward() + grads[t] = [] + if list_rep: + grads[t].append(Variable(rep_variable[0].grad.data.clone(), requires_grad=False)) + rep_variable[0].grad.data.zero_() + else: + grads[t].append(Variable(rep_variable.grad.data.clone(), requires_grad=False)) + rep_variable.grad.data.zero_() + else: + # This is MGDA + for t in tasks: + # Comptue gradients of each loss function wrt parameters + optimizer.zero_grad() + rep, mask = model['rep'](images, mask) + out_t, masks[t] = model[t](rep, None) + loss = loss_fn[t](out_t, labels[t]) + loss_data[t] = loss.data[0] + loss.backward() + grads[t] = [] + for param in model['rep'].parameters(): + if param.grad is not None: + grads[t].append(Variable(param.grad.data.clone(), requires_grad=False)) + + # Normalize all gradients, this is optional and not included in the paper. See the notebook for details + gn = gradient_normalizers(grads, loss_data, params['normalization_type']) + for t in tasks: + for gr_i in range(len(grads[t])): + grads[t][gr_i] = grads[t][gr_i] / gn[t] + + # Frank-Wolfe iteration to compute scales. + sol, min_norm = MinNormSolver.find_min_norm_element([grads[t] for t in tasks]) + for i, t in enumerate(tasks): + scale[t] = float(sol[i]) + else: + for t in tasks: + masks[t] = None + scale[t] = float(params['scales'][t]) + + # Scaled back-propagation + optimizer.zero_grad() + rep, _ = model['rep'](images, mask) + for i, t in enumerate(tasks): + out_t, _ = model[t](rep, masks[t]) + loss_t = loss_fn[t](out_t, labels[t]) + loss_data[t] = loss_t.data[0] + if i > 0: + loss = loss + scale[t]*loss_t + else: + loss = scale[t]*loss_t + loss.backward() + optimizer.step() + + writer.add_scalar('training_loss', loss.data[0], n_iter) + for t in tasks: + writer.add_scalar('training_loss_{}'.format(t), loss_data[t], n_iter) + + for m in model: + model[m].eval() + + tot_loss = {} + tot_loss['all'] = 0.0 + met = {} + for t in tasks: + tot_loss[t] = 0.0 + met[t] = 0.0 + + num_val_batches = 0 + for batch_val in val_loader: + val_images = Variable(batch_val[0].cuda(), volatile=True) + labels_val = {} + + for i, t in enumerate(all_tasks): + if t not in tasks: + continue + labels_val[t] = batch_val[i+1] + labels_val[t] = Variable(labels_val[t].cuda(), volatile=True) + + val_rep, _ = model['rep'](val_images, None) + for t in tasks: + out_t_val, _ = model[t](val_rep, None) + loss_t = loss_fn[t](out_t_val, labels_val[t]) + tot_loss['all'] += loss_t.data[0] + tot_loss[t] += loss_t.data[0] + metric[t].update(out_t_val, labels_val[t]) + num_val_batches+=1 + + for t in tasks: + writer.add_scalar('validation_loss_{}'.format(t), tot_loss[t]/num_val_batches, n_iter) + metric_results = metric[t].get_result() + for metric_key in metric_results: + writer.add_scalar('metric_{}_{}'.format(metric_key, t), metric_results[metric_key], n_iter) + metric[t].reset() + writer.add_scalar('validation_loss', tot_loss['all']/len(val_dst), n_iter) + + if epoch % 3 == 0: + # Save after every 3 epoch + state = {'epoch': epoch+1, + 'model_rep': model['rep'].state_dict(), + 'optimizer_state' : optimizer.state_dict()} + for t in tasks: + key_name = 'model_{}'.format(t) + state[key_name] = model[t].state_dict() + + torch.save(state, "saved_models/{}_{}_model.pkl".format(params['exp_id'], epoch+1)) + + end = timer() + print('Epoch ended in {}s'.format(end - start)) + + +if __name__ == '__main__': + train_multi_task() \ No newline at end of file diff --git a/mseg_semantic/multiobjective_opt/worker_reduce_demo.py b/mseg_semantic/multiobjective_opt/worker_reduce_demo.py new file mode 100755 index 0000000..9cc117c --- /dev/null +++ b/mseg_semantic/multiobjective_opt/worker_reduce_demo.py @@ -0,0 +1,344 @@ +#!/usr/bin/python3 + +import apex +import argparse +from collections import defaultdict +import numpy as np +import os +import pdb +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.multiprocessing as mp +import torch.distributed as dist + +from typing import List, Mapping + +from mseg_semantic.multiobjective_opt.dist_mgda_utils import ( + scale_loss_and_gradients, + all_gather_create_tensor_list, + reduce_to_dict_per_dataset, + scaled_reduce_dict_to_tensor +) + + + +class LinearModel(nn.Module): + + def __init__(self): + """ """ + super(LinearModel, self).__init__() + + + + #self.bn = torch.nn.BatchNorm1d(num_features) + + self.linear = nn.Linear(1, 1, bias=False) + + + def forward(self, x): + """ """ + + x = self.bn(x) + return self.linear(x) + + +class SyncBatchNormModel(nn.Module): + + def __init__(self): + """ """ + super(SyncBatchNormModel, self).__init__() + self.sync_bn = torch.nn.SyncBatchNorm(num_features=1) + + def forward(self, x): + """ """ + return self.sync_bn(x) + + + +class SpatialBatchNormLayer(nn.Module): + + def __init__(self): + """ """ + super(SpatialBatchNormLayer, self).__init__() + num_features = 1 + self.bn = torch.nn.BatchNorm2d(num_features) + + def forward(self, x): + """ """ + return self.bn(x) + + +def init_weights(m): + print(m) + if type(m) == nn.Linear: + m.weight.data.fill_(3.0) + print(m.weight) + + +def test_single_process(): + """ """ + x = torch.tensor([1.]) + y = torch.tensor([3.]) + net = LinearModel() + net.apply(init_weights) + + loss = (net(x) - y) ** 2 + + loss.backward() + weight_grad = net.linear.weight.grad + print('Pytorch grad: ', weight_grad) + print('Expected grad: ', 2 * (net.linear.weight * x - y) * x) + + + + +def test_multiple_processes(): + """ + + gloo for cpu, nccl for gpu + + Args: + - None + + Returns: + - None + """ + parser = argparse.ArgumentParser(description='Distributed MGDA Unit Tests') + parser.add_argument('--use_apex', action='store_true') # default=True + parser.add_argument('--multiprocessing_distributed', action='store_false') # default=True + + parser.add_argument('--train_gpu', type=List[int], default= [0,1])# [0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('--ngpus_per_node', type=int, default=None) + parser.add_argument('--dist_url', type=str, default='tcp://127.0.0.1:6789') + parser.add_argument('--base_lr', type=float, default=1.) + parser.add_argument('--world_size', type=int, default=1) + parser.add_argument('--rank', type=int, default=0) + parser.add_argument('--dist_backend', type=str, default='nccl') # 'gloo') + parser.add_argument('--dataset_gpu_mapping', type=Mapping[int,str], + default = { + 'coco':[0], + 'mapillary': [1] + } + # default = { + # 'coco':[0,1,2], + # 'mapillary': [3,4], + # 'ade20k': [5,6] + # } + ) + parser.add_argument('--opt_level', type=str, default='O0') + parser.add_argument('--keep_batchnorm_fp32', default=None) + parser.add_argument('--loss_scale', default=None) + args = parser.parse_args() + + args.ngpus_per_node = len(args.train_gpu) + + os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.train_gpu) + args.world_size = args.ngpus_per_node * args.world_size + + # Spawns nprocs processes that run fn with args. + # `main_worker` function is called as fn(i, *args), where i is the process index and + # args is the passed through tuple of arguments. + # nprocs denotes the number of processes to spawn. + mp.spawn(main_worker, nprocs=args.ngpus_per_node, args=(args.ngpus_per_node, args)) + # main_worker(1, args.ngpus_per_node, args) + + +def main_worker(gpu: int, ngpus_per_node: int, argss) -> None: + """ + Args: + - gpu + - ngpus_per_node + + Returns: + - None + """ + global args + args = argss + + args.rank = args.rank * args.ngpus_per_node + gpu + # print('Args: ', args) + # print('Args rank: ', args.rank) + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) + + # print('rank', dist.get_rank()) + + #model = LinearModel() + #model.apply(init_weights) + #model = SpatialBatchNormLayer() + + model = SyncBatchNormModel() + + optimizer = torch.optim.SGD(model.parameters(), lr=args.base_lr) + if main_process(): + print('Creating model in main process') + + torch.cuda.set_device(gpu) + # model = model.cuda() + model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu]) + print('Distributed Model: ', model) + + for name, p in model.named_parameters(): + print(f'name={name}') + + rank_to_dataset_map = {} + for dataset, gpu_idxs in args.dataset_gpu_mapping.items(): + for gpu_idx in gpu_idxs: + rank_to_dataset_map[gpu_idx] = dataset + + dataset = rank_to_dataset_map[args.rank] + + num_train_examples = 2 + x = torch.arange(num_train_examples*2).reshape(num_train_examples,2) * args.rank + x = x.float() + y = torch.ones(num_train_examples,2) * -1 + + print('X shape: ', x.shape) + print('Y shape: ', y.shape) + + torch.cuda.set_device(gpu) + train(x, y, model, optimizer, args) + + + +def main_process(): + """ + """ + return args.rank % args.ngpus_per_node == 0 + + + +def train(inputs, targets, model, optimizer, args) -> None: + """ + Note: ddp.no_sync() is only available in Pytorch >1.2.0 (not 1.1.0) + + Everything is working in terms of gathering/setting gradients + when we're fully under no_sync() for forward/backward + + SyncBatchNorm works correctly even under ddp.no_sync(). + + Args: + - x + - y + - model + - optimizer + - args + + Returns: + - + """ + rank = dist.get_rank() + print(f'Before iters: rank={rank}, iter={iter}, Running mean: ', model.module.sync_bn.running_mean) + + num_iters = inputs.shape[0] + for i in range(num_iters): + + x = inputs[i].reshape(1,1,2,1).cuda(non_blocking=True) + y = targets[i].reshape(1,1,2,1).cuda(non_blocking=True) + # print('x and y shape: ', x.shape, y.shape) + + print(f'rank={rank}, iter={i}: x={x}') + print(f'rank={rank}, iter={i}: y={y}') + + with model.no_sync(): + model(x) + + print(f'rank={rank}, iter={i}, Running mean: ', model.module.sync_bn.running_mean) + continue + + # print(f'rank = {rank}: Loss before detach: ', loss) + + DIST_REGIME = 'all_reduce' # 'mgda' # 'all_gather' # # 'reduce' # + + with model.no_sync(): + optimizer.zero_grad() + loss = (model(x) - y) ** 2 + loss.backward() + + curr_w = model.module.linear.weight.detach().cpu().numpy() + print(f'Iter i={i}, rank={rank}, Curr model weight: ', curr_w ) + + print(f'Iter i={i}, rank={rank}, Actual grad: ', model.module.linear.weight.grad) + single_gpu_expected_grads = 2 * (x.cpu().numpy() * curr_w - y.cpu().numpy() ) * x.cpu().numpy() + print(f'Iter i={i}, rank={rank}, Expected single gpu grad: ',single_gpu_expected_grads) + + all_x = np.arange(2) + all_y = np.ones(2) * -1 + all_expected_grads = 2 * (all_x * curr_w - all_y ) * all_x + print(f'Iter i={i}, rank={rank}, Expected averaged grad: ', np.mean(all_expected_grads)) + + dataset_names = list(args.dataset_gpu_mapping.keys()) + per_dataset_per_param_dict = {} + # list of all gradients, per each dataset + dataset_allgrads = defaultdict(list) + # accumulate the gradients per each task + + # no need to sort these now, names are unique + for p_name, param in model.named_parameters(): + if param.grad is not None: + grad_i_tensor_list = all_gather_create_tensor_list(tensor=param.grad, ngpus_per_node=args.ngpus_per_node) + print(f'grad_i_tensor_list for {p_name}: ', grad_i_tensor_list) + dataset_grad_p_dict = reduce_to_dict_per_dataset(grad_i_tensor_list, args.dataset_gpu_mapping) + per_dataset_per_param_dict[p_name] = dataset_grad_p_dict + print(per_dataset_per_param_dict) + + for dname in dataset_names: + dataset_allgrads[dname] += [dataset_grad_p_dict[dname].clone().flatten()] # TODO: remove the flatten?? + + scales = {'coco': 1, 'mapillary': 3} + + # Scaled back-propagation, we must preserve gradients so we will not call optimizer.zero_grad() again + for p_name, param in model.named_parameters(): + if param.grad is not None: + # Instead of a second backward pass, just use the results of the original backward pass + param.grad = scaled_reduce_dict_to_tensor(per_dataset_per_param_dict[p_name], dataset_names, scales) + print(f'Set {p_name} param.grad to {param.grad}') + + + # if DIST_REGIME == 'all_reduce': + # # Reduces the tensor data across all machines in such a way that all get the final result. + # dist.all_reduce(tensor=loss) + # print(f'rank = {rank}: Main loss after all reduce: ', loss) + + # elif DIST_REGIME == 'reduce': + # # Reduces the tensor data across all machines. Only the process with rank dst + # # is going to receive the final result. + # dist.reduce(tensor=loss, dst=0) + # print(f'rank = {rank}: Main loss after all reduce: ', loss) + + # elif DIST_REGIME == 'all_gather': + # optimizer.zero_grad() + # loss.backward() + # pytorch_grad = model.linear.weight.grad + # expected_grad = 2 * (model.linear.weight * x - y) * x + # print(f'rank = {rank}: Pytorch grad: ', pytorch_grad, ' vs. expected grad: ', expected_grad) + # optimizer.step() + # main_loss = loss.detach() + # print(f'rank = {rank}: Main loss after detach: ', main_loss) + # tensor_list = all_gather_create_tensor_list(tensor=model.linear.weight.grad, ngpus_per_node=args.ngpus_per_node) + # print(f'rank = {rank}: Tensor list: ', tensor_list) + # print(f'rank = {rank}: model.linear.weight.grad: ', model.linear.weight.grad) + # dataset_grad_dict = { dataset: torch.zeros_like(model.linear.weight.grad) for dataset in args.dataset_gpu_mapping.keys()} + # for dataset, gpu_list in args.dataset_gpu_mapping.items(): + # for gpu_idx in gpu_list: + # dataset_grad_dict[dataset] += tensor_list[gpu_idx] + + # print(dataset_grad_dict) + # elif DIST_REGIME == 'mgda': + # loss = scale_loss_and_gradients(loss, optimizer, model, args) + + # # If there was NO MGDA, you would use the following two lines, and nothing would converge! + # # optimizer.zero_grad() + # # dist.all_reduce(tensor=loss) + # # loss.backward() + + print(f'rank={rank}, During Iter {i} ', model.module.linear.weight) + optimizer.step() + print(f'rank={rank}, After Iter {i} ', model.module.linear.weight) + +if __name__ == '__main__': + # test_single_process() + test_multiple_processes() + + + diff --git a/mseg_semantic/tool/launch_ccsa.sh b/mseg_semantic/tool/launch_ccsa.sh new file mode 100755 index 0000000..b2621bc --- /dev/null +++ b/mseg_semantic/tool/launch_ccsa.sh @@ -0,0 +1,20 @@ +export outf=1122 +mkdir ${outf} + +# v1 uses 1000 pairs +# sbatch -p quadro --gres=gpu:6 -c 60 -t 2-00:00:00 -o ${outf}/three-1.6-ccsa tool/train-ccsa-qvga-mix.sh three-1.6-ccsa.yaml False exp-ccsa-v1 ${WORK}/supp/three-1.6-ccsa +# may have gotten polluted + +# V2 uses 100 pairs + +# v4 has 1,000 pairs for sure + +# v5 has 10,000 pairs for sure + +# v6 has 1,000 pairs with alpha 0.5 + +# v7 has 1,000 pairs with alpha 0.1 + +# v8 alpha = 0 with 1000 pairs, should be no DG effectively + +sbatch -p quadro --gres=gpu:6 -c 60 -t 2-00:00:00 -o ${outf}/three-1.6-ccsa tool/train-ccsa-qvga-mix.sh three-1.6-ccsa.yaml False exp-ccsa-v9 ${WORK}/supp/three-1.6-ccsa-v9 diff --git a/mseg_semantic/tool/train.py b/mseg_semantic/tool/train.py new file mode 100755 index 0000000..6eed765 --- /dev/null +++ b/mseg_semantic/tool/train.py @@ -0,0 +1,855 @@ +#!/usr/bin/python3 + +import math +import time +start = time.time() +from typing import Dict, Union +# import numpy as np +# import os +# import pdb +# import random + +import apex +import torch +import torch.nn as nn +# import cv2 + +import mseg_semantic +from mseg_semantic.utils import transform + +""" +Script to train models on the MSeg dataset using Pytorch DDP. +""" + +# cv2.ocl.setUseOpenCL(False) +# cv2.setNumThreads(0) + +MAX_NUM_EPOCHS = 100000 # we let epochs run forever, then exit when max number of iters is reached + +def get_parser(): + """Merge config parameters and commend line arguments into `cfg` object""" + import argparse + from mseg_semantic.utils import config + + parser = argparse.ArgumentParser(description='PyTorch Semantic Segmentation') + parser.add_argument('--config', type=str, default='config/ade20k/ade20k_pspnet50.yaml', help='config file') + parser.add_argument('opts', help='see config/ade20k/ade20k_pspnet50.yaml for all options', default=None, nargs=argparse.REMAINDER) + args = parser.parse_args() + assert args.config is not None + cfg = config.load_cfg_from_cfg_file(args.config) + if args.opts is not None: + cfg = config.merge_cfg_from_list(cfg, args.opts) + return cfg + + +def get_logger(): + """ Configure a Python logger to the logging.INFO verbosity level""" + import logging + logger_name = "main-logger" + logger = logging.getLogger(logger_name) + logger.setLevel(logging.INFO) + handler = logging.StreamHandler() + fmt = "[%(asctime)s %(levelname)s %(filename)s line %(lineno)d %(process)d] %(message)s" + handler.setFormatter(logging.Formatter(fmt)) + logger.addHandler(handler) + return logger + + +def worker_init_fn(worker_id): + import random + random.seed(args.manual_seed + worker_id) + + +def main_process() -> bool: + return not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % args.ngpus_per_node == 0) + + +def main() -> None: + """ + """ + import pickle + + import torch, os, math + import torch.backends.cudnn as cudnn + import torch.nn as nn + import torch.nn.functional as F + import torch.nn.parallel + import torch.optim + import torch.utils.data + import torch.multiprocessing as mp + import torch.distributed as dist + from mseg.utils.dataset_config import infos + from mseg.taxonomy.taxonomy_converter import TaxonomyConverter + from mseg.taxonomy.naive_taxonomy_converter import NaiveTaxonomyConverter + from mseg_semantic.utils import config + from mseg_semantic.utils.avg_meter import AverageMeter, SegmentationAverageMeter + from mseg_semantic.utils.verification_utils import verify_architecture + + + print('Using PyTorch version: ', torch.__version__) + args = get_parser() + assert isinstance(args.train_gpu, list) + os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.train_gpu) + + ###### FLAT-MIX CODE ####################### + print("CUDA_VISIBLE_DEVICES: ", os.environ["CUDA_VISIBLE_DEVICES"]) + + # Randomize args.dist_url too avoid conflicts on same machine + args.dist_url = args.dist_url[:-2] + str(os.getpid() % 100).zfill(2) + + if isinstance(args.dataset, str): # only one dataset, i.e. 'single' + # map to a list of GPU IDs + args.dataset_gpu_mapping = {args.dataset: args.train_gpu} + args.dataset = [args.dataset] + print("args.dataset=", args.dataset) + + # train with multiple datasets, must be in the universal taxonomy space + elif len(args.dataset) > 1 and args.universal: + args.tc = NaiveTaxonomyConverter() if args.use_naive_taxonomy else TaxonomyConverter() + + args.data_root = {dataset:infos[dataset].dataroot for dataset in args.dataset} + args.train_list = {dataset:infos[dataset].trainlist for dataset in args.dataset} + args.classes = args.tc.num_uclasses + # args.save_path = args.save_path.replace("{}", '-'.join([infos[dataset].shortname for dataset in args.dataset])) + + elif (len(args.dataset) == 1) and args.universal: # single dataset on universal taxonomy training + args.tc = TaxonomyConverter(train_datasets=args.dataset) + args.data_root = infos[args.dataset[0]].dataroot + args.train_list = infos[args.dataset[0]].trainlist + args.classes = args.tc.num_uclasses + # args.save_path = args.save_path.replace("{}", info[args.dataset].shortname) + + elif (len(args.dataset) == 1) and (not args.universal): # single dataset on self taxonomy training + args.data_root = infos[args.dataset[0]].dataroot + args.train_list = infos[args.dataset[0]].trainlist + args.classes = infos[args.dataset[0]].num_classes + # args.save_path = args.save_path.replace("{}", infos[args.dataset].shortname) + else: + raise RuntimeError('Incorrect training configuration, please verify your config params.') + + # verify arch after args.classes is populated + verify_architecture(args) + + if args.manual_seed is not None: + cudnn.benchmark = False + cudnn.deterministic = True + torch.manual_seed(args.manual_seed) + np.random.seed(args.manual_seed) + torch.manual_seed(args.manual_seed) + torch.cuda.manual_seed_all(args.manual_seed) + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + args.ngpus_per_node = len(args.train_gpu) + if len(args.train_gpu) == 1: + args.sync_bn = False + args.distributed = False + args.multiprocessing_distributed = False + if args.multiprocessing_distributed: + args.world_size = args.ngpus_per_node * args.world_size + mp.spawn(main_worker, nprocs=args.ngpus_per_node, args=(args.ngpus_per_node, args)) + else: + main_worker(args.train_gpu, args.ngpus_per_node, args) + + +def get_dataset_split_transform( + args, split: str +) -> transform.Compose: + """Return the input data transform (w/ data augmentations) + + Args: + args: experiment parameters + split: dataset split, either 'train' or 'val' + + Return: + Runtime data transformation object that is callable + """ + from mseg_semantic.utils.normalization_utils import get_imagenet_mean_std + + mean, std = get_imagenet_mean_std() + if split == "train": + transform_list = [ + transform.ResizeShort(args.short_size), + transform.RandScale([args.scale_min, args.scale_max]), + transform.RandRotate( + [args.rotate_min, args.rotate_max], + padding=mean, + ignore_label=args.ignore_label, + ), + transform.RandomGaussianBlur(), + transform.RandomHorizontalFlip(), + transform.Crop( + [args.train_h, args.train_w], + crop_type="rand", + padding=mean, + ignore_label=args.ignore_label, + ), + transform.ToTensor(), + transform.Normalize(mean=mean, std=std), + ] + elif split == "val": + transform_list = [ + transform.Crop( + [args.train_h, args.train_w], + crop_type="center", + padding=mean, + ignore_label=args.ignore_label, + ), + transform.ToTensor(), + transform.Normalize(mean=mean, std=std), + ] + else: + raise RuntimeError("Unknown split. Quitting ...") + + if len(args.dataset) > 1 and args.universal: + transform_list += [ + transform.ToUniversalLabel( + args.dataset_name, use_naive_taxonomy=args.use_naive_taxonomy + ) + ] + elif len(args.dataset) == 1 and args.universal: + # never run naive taxonomy baseline for training with a single dataset + transform_list += [transform.ToUniversalLabel(args.dataset[0])] + + return transform.Compose(transform_list) + + + + +def load_pretrained_weights(args, model, optimizer): + """ + Returns: model (if args.resume is a model, loads the model, + if it is a directory, find the latest model in that directory) + """ + import torch, os, math + + resume_iter = 0 + + if args.weight: + if os.path.isfile(args.weight): + if main_process(): + logger.info("=> loading weight '{}'".format(args.weight)) + checkpoint = torch.load(args.weight) + model.load_state_dict(checkpoint['state_dict']) + if main_process(): + logger.info("=> loaded weight '{}'".format(args.weight)) + else: + if main_process(): + logger.info("=> no weight found at '{}'".format(args.weight)) + + if args.resume: + if os.path.isfile(args.resume): + if main_process(): + logger.info("=> loading checkpoint '{}'".format(args.resume)) + # checkpoint = torch.load(args.resume) + checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda()) + # args.start_epoch = checkpoint['epoch'] + args.start_epoch = 0 # we don't rely on this, but on resume_iter + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + resume_iter = checkpoint['current_iter'] + if main_process(): + logger.info("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) + else: + if main_process(): + logger.info("=> no checkpoint found at '{}'".format(args.resume) + ' Please check') + exit() + + if args.auto_resume and (args.auto_resume != 'None'): + import glob + if main_process(): + logger.info("=> loading latest checkpoint from folder'{}'".format(args.auto_resume)) + + print("Auto resume training? ", args.auto_resume) + filelist = glob.glob(args.auto_resume + '/*.pth') + print(os.getcwd()) + print(filelist) + filename = [file.split('/')[-1] for file in filelist] + filename = [file.replace('.pth', '') for file in filename] + # epochlist = [] + if 'train_epoch_final' in filename: + if main_process(): + logger.info("Training already finished, no need to resume!!") + exit() + else: + print(filename) + epochs = [file.split('_')[-1] for file in filename] + epochs = [epoch for epoch in epochs if epoch.isdigit()] + epochs = [int(epoch) for epoch in epochs] + max_epoch = max(epochs) + + filename = 'train_epoch_{}.pth'.format(max_epoch) + + model_path = os.path.join(args.auto_resume, filename) + logger.info(model_path) + print(0, max_epoch, model_path, os.path.isfile(model_path)) + + + if os.path.isfile(model_path): + if main_process(): + logger.info("=> loading checkpoint '{}'".format(model_path)) + + checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage.cuda()) + # args.start_epoch = checkpoint['epoch'] + args.start_epoch = 0 # we don't rely on this, but on resume_iter + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + resume_iter = checkpoint['current_iter'] + + args.epoch_history = checkpoint['epoch'] + + if main_process(): + logger.info("=> loaded checkpoint '{}' (epoch history: {})".format(model_path, checkpoint['epoch'])) + else: + if main_process(): + logger.info("=> no checkpoint found at '{}'".format(model_path) + ' Please check') + exit() + + return model, optimizer, resume_iter + +# optimizer = get_optimizer(args.model) + + +def get_model( + args, + criterion: nn.Module, + BatchNorm: Union[ + torch.nn.SyncBatchNorm, apex.parallel.SyncBatchNorm, nn.BatchNorm2d + ], +) -> nn.Module: + """ Build the semantic segmentation model """ + if args.arch == "psp": + from mseg_semantic.model.pspnet import PSPNet + + model = PSPNet( + layers=args.layers, + classes=args.classes, + zoom_factor=args.zoom_factor, + criterion=criterion, + BatchNorm=BatchNorm, + network_name=args.network_name, + ) + elif args.arch == "hrnet": + from mseg_semantic.model.seg_hrnet import get_configured_hrnet + + # note apex batchnorm is hardcoded + model = get_configured_hrnet(args.classes) + elif args.arch == "hrnet_ocr": + from mseg_semantic.model.seg_hrnet_ocr import get_configured_hrnet_ocr + + model = get_configured_hrnet_ocr(args.classes) + return model + + +def get_optimizer(args, model: nn.Module) -> torch.optim.Optimizer: + """ + Create an optimizer and provide model parameters to it. + + For PSPNet, the learning rate is module-specfiic; the first 5 entries (ResNet backbone) + have low learning rate to not clobber pre-trained weights, and later entries (PPM derivatives) + have high learning rate. + """ + import torch, os, math + + # HRNet settings + if args.arch == "hrnet" or args.arch == "hrnet_ocr": + optimizer = torch.optim.SGD( + [ + { + "params": filter(lambda p: p.requires_grad, model.parameters()), + "lr": args.base_lr, + } + ], + lr=args.base_lr, + momentum=args.momentum, + weight_decay=args.weight_decay, + ) + return optimizer + + if args.arch != "psp": + raise RuntimeError("Unknown network architecture") + # PSPNet settings + modules_original = [ + model.layer0, + model.layer1, + model.layer2, + model.layer3, + model.layer4, + ] + modules_new = [model.ppm, model.cls, model.aux] + params_list = [] + for module in modules_original: + params_list.append(dict(params=module.parameters(), lr=args.base_lr)) + + for module in modules_new: + params_list.append(dict(params=module.parameters(), lr=args.base_lr * 10)) + NUM_PRETRAINED_RESNET_LAYERS = 5 + args.index_split = NUM_PRETRAINED_RESNET_LAYERS + optimizer = torch.optim.SGD( + params_list, + lr=args.base_lr, + momentum=args.momentum, + weight_decay=args.weight_decay, + ) + return optimizer + + +def get_rank_to_dataset_map(args) -> Dict[int, str]: + """ + Obtain a mapping from GPU rank (index) to the name of the dataset residing on this GPU. + """ + rank_to_dataset_map = {} + for dataset, gpu_idxs in args.dataset_gpu_mapping.items(): + for gpu_idx in gpu_idxs: + rank_to_dataset_map[gpu_idx] = dataset + logger.info("Rank to dataset map: ", rank_to_dataset_map) + return rank_to_dataset_map + + +def set_number_of_training_iters(args): + """ + There are two scenarios we consider to determine number of required training iters + when training on MSeg. We set a max number of training crops, and then subdivide the + work between our GPUs. + + 1. We are training with a single dataset. Suppose we want to train for 1 million + crops in total (args.num_examples). Suppose our dataset has 18k images. Then + we will train for 56 epochs. Suppose our training node has 8 GPUs. Then + with a batch size of 32, and 8 GPUs, we need ~3906 iterations to reach 1M crops. + + 2. We are mixing many datasets together. We determine which dataset this GPU + is assigned to. Each GPU runs 1 process, and multiple GPU IDs (referred to + as replicas) may be assigned to a single dataset. The computation is the same + as before, except instead of counting all of the GPUs on the node, we only + count the number of replicas counting towards this dataset. + """ + # single dataset training + if (len(args.dataset) == 1) and (not args.use_mgda): + from util.txt_utils import read_txt_file + # number of examples for 1 epoch of this dataset + num_d_examples = len(read_txt_file(infos[args.dataset[0]].trainlist)) + # number of examples to train for in total + num_examples_total = args.num_examples + + args.epochs = math.ceil(num_examples_total / num_d_examples) + args.max_iters = math.floor(num_examples_total / (args.batch_size * args.ngpus_per_node)) + + # on small datasets, avoid saving checkpoints too frequently in order to not waste time + if args.epochs > 1000: + args.save_freq = args.epochs // 100 + + # multiple dataset training + elif len(args.dataset) > 1: + rank_to_dataset_map = get_rank_to_dataset_map(args) + # # which dataset this gpu is for + args.dataset_name = rank_to_dataset_map[args.rank] + # within this dataset, its rank, i.e. 0,1,2,3 etc gpu ID assigned to this dataset + args.dataset_rank = args.dataset_gpu_mapping[args.dataset_name].index(args.rank) + args.num_replica_per_dataset = len(args.dataset_gpu_mapping[args.dataset_name]) + + args.max_iters = math.floor(args.num_examples / (args.batch_size * args.num_replica_per_dataset)) + logger.info(f'max_iters = {args.max_iters}') + + return args + + +def main_worker(gpu: int, ngpus_per_node: int, argss) -> None: + """ Each GPU process will execute this function""" + global args + args = argss + + import apex + import torch, os, math + import torch.backends.cudnn as cudnn + import torch.nn as nn + import torch.nn.functional as F + import torch.nn.parallel + import torch.optim + import torch.utils.data + + import torch.multiprocessing as mp + import torch.distributed as dist + + from mseg.utils.dataset_config import infos + from mseg.taxonomy.taxonomy_converter import TaxonomyConverter + + from mseg_semantic.multiobjective_opt.dist_mgda_utils import scale_loss_and_gradients + from mseg_semantic.utils import config + from mseg_semantic.utils import dataset + from mseg_semantic.utils.avg_meter import AverageMeter, SegmentationAverageMeter + from mseg_semantic.utils.training_utils import poly_learning_rate + from mseg_semantic.utils.verification_utils import verify_architecture + + if args.sync_bn: + if args.multiprocessing_distributed: + # BatchNorm = torch.nn.SyncBatchNorm + BatchNorm = apex.parallel.SyncBatchNorm + else: + raise RuntimeError("Batch norm not supported for DataParallel at this time") + else: + BatchNorm = nn.BatchNorm2d + print('Using batchnorm variant: ', BatchNorm) + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + args.rank = args.rank * ngpus_per_node + gpu + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) + + criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label) + model = get_model(args, criterion, BatchNorm) + optimizer = get_optimizer(args, model) + + global logger + logger = get_logger() + args.logger = logger + + if main_process(): + logger.info(args) + logger.info("=> creating model ...") + logger.info("Classes: {}".format(args.classes)) + logger.info(model) + + if args.distributed: + torch.cuda.set_device(gpu) + args.batch_size = int(args.batch_size / ngpus_per_node) + args.batch_size_val = int(args.batch_size_val / ngpus_per_node) + args.batch_size_val = max(1, args.batch_size_val) + args.workers = int(args.workers / ngpus_per_node) + if args.use_apex: + model, optimizer = apex.amp.initialize(model.cuda(), optimizer, opt_level=args.opt_level, keep_batchnorm_fp32=args.keep_batchnorm_fp32, loss_scale=args.loss_scale) + model = apex.parallel.DistributedDataParallel(model) + else: + model = torch.nn.parallel.DistributedDataParallel(model.cuda(), device_ids=[gpu]) + + else: + model = torch.nn.DataParallel(model.cuda()) + + model, optimizer, args.resume_iter = load_pretrained_weights(args, model, optimizer) + + args = set_number_of_training_iters(args) + train_transform = get_dataset_split_transform(args, split='train') + + # Consider if a dataset has size 18,000 and is placed on a single GPU, of 4 gpus. + # Batch size 32. In this case, len(train_data) = 18,000 but len(train_loader) = 2250 + # Because effective batch size is 8. + + # Consider if a dataset has size 118287. If placed on 2/4 gpus with batch size 32. + # In this case, len(train_data) = 118287 and len(train_loader) = 7393. + if len(args.dataset) > 1: + # FLATMIX ADDITION + train_data = dataset.SemData(split='train', data_root=args.data_root[args.dataset_name], data_list=args.train_list[args.dataset_name], transform=train_transform) + iters_per_epoch = math.floor((len(train_data) / (args.batch_size * args.num_replica_per_dataset))) + args.epochs = math.ceil(args.max_iters / iters_per_epoch) + print(f'''Rank: {args.rank}, Dataset: {args.dataset_name}, replicas: {args.num_replica_per_dataset}, length of dataset: {len(train_data)}, max_iter: {args.max_iters}, batch_size: {args.batch_size}, + iters_per_epoch: {iters_per_epoch}, epochs: {args.epochs}, ''') + else: + train_data = dataset.SemData(split='train', data_root=args.data_root, data_list=args.train_list, transform=train_transform) + + logger.info(f'Train data has len {len(train_data)} on {args.rank}') + if args.distributed: + if len(args.dataset) > 1: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_data, num_replicas=args.num_replica_per_dataset, rank=args.dataset_rank) + logger.info(f"rank: {args.rank}, dataset_rank: {args.dataset_rank}, replica: {args.num_replica_per_dataset}, actual_replica: {train_sampler.num_replicas}, length of sampler, {len(train_sampler)}") + else: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_data, num_replicas=args.ngpus_per_node, rank=args.rank) + logger.info(f"rank: {args.rank}, actual_replica: {train_sampler.num_replicas}, length of sampler, {len(train_sampler)}") + + else: + train_sampler = None + train_loader = torch.utils.data.DataLoader( + train_data, + batch_size=args.batch_size, + shuffle=(train_sampler is None), + num_workers=args.workers, + pin_memory=True, + sampler=train_sampler, + drop_last=True + ) + logger.info(f'Train loader has len {len(train_loader)} on {args.rank}') + + if args.evaluate: + val_transform = get_dataset_split_transform(args, split='val') + # val_transform = transform.Compose(val_transform_list) + val_data = dataset.SemData(split='val', data_root=args.data_root, data_list=args.val_list, transform=val_transform) + if args.distributed: + val_sampler = torch.utils.data.distributed.DistributedSampler(val_data) + else: + val_sampler = None + val_loader = torch.utils.data.DataLoader( + val_data, + batch_size=args.batch_size_val, + shuffle=False, + num_workers=args.workers, + pin_memory=True, + sampler=val_sampler + ) + + for epoch in range(args.start_epoch, args.epochs + MAX_NUM_EPOCHS): + + epoch_log = epoch + 1 + if args.auto_resume != 'None': # if it is a resumed training + epoch_log += args.epoch_history # only the main process, acting like "total_epoch" + logger.info(f'New epoch {epoch_log} starts on rank {args.rank}') + + if args.distributed: + train_sampler.set_epoch(epoch) + loss_train, mIoU_train, mAcc_train, allAcc_train = train(train_loader, model, optimizer, epoch) + + if ((epoch_log % args.save_freq == 0)) and main_process(): + filename = args.save_path + '/train_epoch_' + str(epoch_log) + '.pth' + logger.info('Saving checkpoint to: ' + filename) + torch.save({'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), + 'current_iter': (epoch_log) * len(train_loader), 'max_iter': args.max_iters}, filename) + # latestname = args.save_path + '/train_epoch_' + str(epoch_log) + '.pth' + if epoch_log / args.save_freq > 2: + # if (epoch_log - 3) % 10 != 0: + deletename = args.save_path + '/train_epoch_' + str(epoch_log - args.save_freq * 2) + '.pth' + os.remove(deletename) + + if (epoch_log == args.epochs) and main_process(): + filename = args.save_path + '/train_epoch_final.pth' + logger.info('Saving checkpoint to: ' + filename) + torch.save({'epoch': epoch_log, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), + 'current_iter': (epoch_log) * len(train_loader) + args.resume_iter, 'max_iter': args.max_iters}, filename) + exit() + + + # if args.evaluate: + # loss_val, mIoU_val, mAcc_val, allAcc_val = validate(val_loader, model, criterion) + + +def train(train_loader, model, optimizer: torch.optim.Optimizer, epoch: int): + """ Run one training epoch """ + import torch, os, math, time + import torch.distributed as dist + + from mseg_semantic.multiobjective_opt.dist_mgda_utils import scale_loss_and_gradients + from mseg_semantic.utils.avg_meter import AverageMeter, SegmentationAverageMeter + from mseg_semantic.utils.training_utils import poly_learning_rate + + batch_time = AverageMeter() + data_time = AverageMeter() + main_loss_meter = AverageMeter() + aux_loss_meter = AverageMeter() + loss_meter = AverageMeter() + sam = SegmentationAverageMeter() + + model.train() + + end = time.time() + max_iter = args.max_iters + for i, (input, target) in enumerate(train_loader): + + data_time.update(time.time() - end) + if args.zoom_factor != 8: + h = int((target.size()[1] - 1) / 8 * args.zoom_factor + 1) + w = int((target.size()[2] - 1) / 8 * args.zoom_factor + 1) + # 'nearest' mode doesn't support align_corners mode and 'bilinear' mode is fine for downsampling + target = F.interpolate(target.unsqueeze(1).float(), size=(h, w), mode='bilinear', align_corners=True).squeeze(1).long() + input = input.cuda(non_blocking=True) + target = target.cuda(non_blocking=True) + + if args.use_mgda: + output, loss, main_loss, aux_loss, scales = forward_backward_mgda(input, target, model, optimizer, args) + else: + output, loss, main_loss, aux_loss = forward_backward_full_sync(input, target, model, optimizer, args) + optimizer.step() + + n = input.size(0) + if args.multiprocessing_distributed: + main_loss, aux_loss, loss = main_loss.detach() * n, aux_loss * n, loss * n # not considering ignore pixels + count = target.new_tensor([n], dtype=torch.long) + dist.all_reduce(main_loss), dist.all_reduce(aux_loss), dist.all_reduce(loss), dist.all_reduce(count) + n = count.item() + main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n + + sam.update_metrics_gpu(output, target, args.classes, args.ignore_label, args.multiprocessing_distributed) + + main_loss_meter.update(main_loss.item(), n) + aux_loss_meter.update(aux_loss.item(), n) + loss_meter.update(loss.item(), n) + # if main_process(): + if i > 0: + batch_time.update(time.time() - end) + end = time.time() + + # print(len(train_loader)) + # logger.info(len(train_loader)) + + current_iter = epoch * len(train_loader) + i + 1 + args.resume_iter + current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power) + + # logger.info(f'LR:{current_lr}, base_lr: {args.base_lr}, current_iter:{current_iter}, max_iter:{max_iter}, power:{args.power}') + + if args.arch == 'psp': + for index in range(0, args.index_split): + optimizer.param_groups[index]['lr'] = current_lr + for index in range(args.index_split, len(optimizer.param_groups)): + optimizer.param_groups[index]['lr'] = current_lr * 10 + + elif args.arch == 'hrnet' or args.arch == 'hrnet_ocr': + optimizer.param_groups[0]['lr'] = current_lr + + remain_iter = max_iter - current_iter + remain_time = remain_iter * batch_time.avg + t_m, t_s = divmod(remain_time, 60) + t_h, t_m = divmod(t_m, 60) + remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) + + if (current_iter) % args.print_freq == 0 and True: + logger.info('Epoch: [{}/{}][{}/{}] ' + 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' + 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' + 'Remain {remain_time} ' + 'MainLoss {main_loss_meter.val:.4f} ' + 'AuxLoss {aux_loss_meter.val:.4f} ' + 'LR {current_lr:.8f} ' + 'Loss {loss_meter.val:.4f} ' + 'Accuracy {accuracy:.4f}.'.format(epoch+1, args.epochs, i + 1, len(train_loader), + batch_time=batch_time, + data_time=data_time, + remain_time=remain_time, + main_loss_meter=main_loss_meter, + aux_loss_meter=aux_loss_meter, + current_lr=current_lr, + loss_meter=loss_meter, + accuracy=sam.accuracy) + f'current_iter: {current_iter}' + f' rank: {args.rank} ') + if args.use_mgda and main_process(): + # Scales identical in each process, so print out only in main process. + scales_str = [f'{d}: {scale:.2f}' for d,scale in scales.items()] + scales_str = ' , '.join(scales_str) + logger.info(f'Scales: {scales_str}') + + if main_process() and current_iter == max_iter - 5: # early exit to prevent iter number not matching between gpus + break + + iou_class, accuracy_class, mIoU, mAcc, allAcc = sam.get_metrics() + logger.info('Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(epoch+1, args.epochs, mIoU, mAcc, allAcc)) + return main_loss_meter.avg, mIoU, mAcc, allAcc + + +def forward_backward_full_sync( + input: torch.Tensor, + target: torch.Tensor, + model, + optimizer: torch.optim.Optimizer, + args, +): + """ + Args: + input: Tensor of size NCHW representing + target: Tensor of size (?) representing + model + optimizer + args + + Returns: + output: Tensor of size (?) representing + loss: Tensor of size (?) representing + main_loss: Tensor of size (?) representing + aux_loss: Tensor of size (?) representing + """ + output, main_loss, aux_loss = model(input, target) + if not args.multiprocessing_distributed: + main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss) + loss = main_loss + args.aux_weight * aux_loss + + optimizer.zero_grad() + if args.use_apex and args.multiprocessing_distributed: + with apex.amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + return output, loss, main_loss, aux_loss + + +def forward_backward_mgda(input: torch.Tensor, target: torch.Tensor, model, optimizer, args): + """ + We rely upon the ddp.no_sync() of gradients: + https://github.com/pytorch/pytorch/blob/master/torch/nn/parallel/distributed.py + + Args: + - input: Tensor of size (?) representing + - target: Tensor of size (?) representing + - model + - optimizer + - args + + Returns: + - output: Tensor of size (?) representing + - loss: Tensor of size (?) representing + - main_loss: Tensor of size (?) representing + - aux_loss: Tensor of size (?) representing + """ + from mseg_semantic.multiobjective_opt.dist_mgda_utils import scale_loss_and_gradients + with model.no_sync(): + output, main_loss, aux_loss = model(input, target) + loss = main_loss + args.aux_weight * aux_loss + loss, scales = scale_loss_and_gradients(loss, optimizer, model, args) + + return output, loss, main_loss, aux_loss, scales + + +def validate(val_loader, model, criterion: nn.Module): + if main_process(): + logger.info('>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>') + batch_time = AverageMeter() + data_time = AverageMeter() + loss_meter = AverageMeter() + sam = SegmentationAverageMeter() + + model.eval() + if main_process(): + end = time.time() + for i, (input, target) in enumerate(val_loader): + if main_process(): + data_time.update(time.time() - end) + input = input.cuda(non_blocking=True) + target = target.cuda(non_blocking=True) + output = model(input) + if args.zoom_factor != 8: + output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=True) + loss = criterion(output, target) + + n = input.size(0) + if args.multiprocessing_distributed: + loss = loss * n # not considering ignore pixels + count = target.new_tensor([n], dtype=torch.long) + dist.all_reduce(loss), dist.all_reduce(count) + n = count.item() + loss = loss / n + else: + loss = torch.mean(loss) + + output = output.max(1)[1] + sam.update_metrics_gpu(output, target, args.classes, args.ignore_label, args.multiprocessing_distributed) + loss_meter.update(loss.item(), input.size(0)) + if main_process(): + batch_time.update(time.time() - end) + end = time.time() + if ((i + 1) % args.print_freq == 0) and main_process(): + logger.info('Test: [{}/{}] ' + 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' + 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) ' + 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) ' + 'Accuracy {accuracy:.4f}.'.format(i + 1, len(val_loader), + data_time=data_time, + batch_time=batch_time, + loss_meter=loss_meter, + accuracy=sam.accuracy)) + + iou_class, accuracy_class, mIoU, mAcc, allAcc = sam.get_metrics() + if main_process(): + logger.info('Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(mIoU, mAcc, allAcc)) + for i in range(args.classes): + logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(i, iou_class[i], accuracy_class[i])) + logger.info('<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<') + return loss_meter.avg, mIoU, mAcc, allAcc + +end = time.time() +print(end-start) + +if __name__ == '__main__': + + main() diff --git a/mseg_semantic/tool/train_final_1080.sh b/mseg_semantic/tool/train_final_1080.sh new file mode 100755 index 0000000..cb6bcde --- /dev/null +++ b/mseg_semantic/tool/train_final_1080.sh @@ -0,0 +1,110 @@ +export outf=0327-fixedbug +mkdir ${outf} + +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/bdd tool/train-qvga-mix-copy.sh 1080/bdd.yaml False exp ${WORK}/copies/final_train/1080/bdd +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/bdd tool/train-qvga-mix-copy.sh 1080/bdd.yaml False exp ${WORK}/copies/final_train/1080/bdd + +# 6892-6894 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/coco-panoptic-v1-sr tool/train-qvga-mix-copy.sh 1080/coco-panoptic-v1-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/coco-panoptic-v1-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/bdd-sr tool/train-qvga-mix-copy.sh 1080/bdd-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/bdd-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/ade20k-v1-sr tool/train-qvga-mix-copy.sh 1080/ade20k-v1-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/ade20k-v1-sr + +# after john made changes to ade20k taxonomy, 7091 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/ade20k-v1-sr tool/train-qvga-mix-copy.sh 1080/ade20k-v1-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/ade20k-v1-sr + + +# 6927 - 6929 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/sunrgbd-37-sr tool/train-qvga-mix-copy.sh 1080/sunrgbd-37-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/sunrgbd-37-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/idd-new tool/train-qvga-mix-copy.sh 1080/idd-new.yaml False exp ${WORK}/copies/final_train/1080-1-new/idd-new +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/cityscapes tool/train-qvga-mix-copy.sh 1080/cityscapes.yaml False exp ${WORK}/copies/final_train/1080-1-new/cityscapes + +# 6999- 7002 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/mapillary tool/train-qvga-mix-copy.sh 1080/mapillary.yaml False exp ${WORK}/copies/final_train/1080-1-new/mapillary +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/voc2012 tool/train-qvga-mix-copy.sh 1080/voc2012.yaml False exp ${WORK}/copies/final_train/1080-1-new/voc2012 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/scannet-20 tool/train-qvga-mix-copy.sh 1080/scannet-20.yaml False exp ${WORK}/copies/final_train/1080-1-new/scannet-20 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/camvid tool/train-qvga-mix-copy.sh 1080/camvid.yaml False exp ${WORK}/copies/final_train/1080-1-new/camvid + +# 7051 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/kitti tool/train-qvga-mix-copy.sh 1080/kitti.yaml False exp ${WORK}/copies/final_train/1080-1/kitti +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/kitti-sr tool/train-qvga-mix-copy.sh 1080/kitti-sr.yaml False exp ${WORK}/copies/final_train/1080-1/kitti-sr + + +# 7075-7077 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/kitti-sr tool/train-qvga-mix-copy.sh 1080/kitti-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/kitti-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/camvid-sr tool/train-qvga-mix-copy.sh 1080/camvid-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/camvid-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/voc2012-sr tool/train-qvga-mix-copy.sh 1080/voc2012-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/voc2012-sr + + + + +# tool/train-qvga-mix-copy.sh 1080/kitti.yaml False exp ${WORK}/copies/final_train/1080-1-new/test + + +# 6920-6922 +# 7100-7102 + +# 7150-7251 now, gpu19 +# 7254-55 + + + +# 6294-6296 +# sbatch -p quadro --gres=gpu:8 -w isl-gpu3 -c 80 -t 2-00:00:00 -o ${outf}/mseg-stupid tool/train-qvga-mix-copy.sh 1080/mseg-stupid.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-stupid +# 7252-7253, gpu18 + + +# 7256-7257 +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-stupid-1 tool/train-qvga-mix-cd.sh 1080/mseg-stupid.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-stupid +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-stupid-2 tool/train-qvga-mix-cd.sh 1080/mseg-stupid.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-stupid + +# 7410-12 +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled tool/train-qvga-mix-copy.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-unrelabeled +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-1 tool/train-qvga-mix-cd.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-unrelabeled +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-2 tool/train-qvga-mix-cd.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-unrelabeled + + +# 7419, gpu4 + + +# 7436-7453 + +# 7972-7991 + +# 8256-8269 -fixed bug +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m tool/train-qvga-mix-copy.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-3m +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-1 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-3m +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-2 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-3m +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-3 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-3m +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-4 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-3m +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-5 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-3m +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-6 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-3m +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-7 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-3m + + +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg tool/train-qvga-mix-copy.sh 1080/mseg.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-1 tool/train-qvga-mix-cd.sh 1080/mseg.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-2 tool/train-qvga-mix-cd.sh 1080/mseg.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg + + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled tool/train-qvga-mix-copy.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-unrelabeled +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-1 tool/train-qvga-mix-cd.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-unrelabeled +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-2 tool/train-qvga-mix-cd.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-unrelabeled + + +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres tool/train-qvga-mix-copy.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-lowres +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-1 tool/train-qvga-mix-cd.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-lowres +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-2 tool/train-qvga-mix-cd.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080-1-new/mseg-lowres + +# sbatch -p quadro --qos=normal --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/sunrgbd-37-sr-new tool/train-qvga-mix-copy.sh 1080/sunrgbd-37-sr.yaml False exp ${WORK}/copies/final_train/1080-1-new/sunrgbd-37-sr + + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2:20:00 -o ${outf}/mseg-lowres-test tool/train-qvga-mix-copy.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/test-new +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2:20:00 -o ${outf}/mseg-lowres-1-test tool/train-qvga-mix-cd.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/test-new +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2:20:00 -o ${outf}/mseg-lowres-2-test tool/train-qvga-mix-cd.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/test-new + + +sh tool/train-qvga-mix-copy.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/test + + + + diff --git a/mseg_semantic/tool/train_final_1080_one.sh b/mseg_semantic/tool/train_final_1080_one.sh new file mode 100755 index 0000000..047cf6c --- /dev/null +++ b/mseg_semantic/tool/train_final_1080_one.sh @@ -0,0 +1,105 @@ +export outf=0329_halfway +mkdir ${outf} + +# 8571-8580 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/coco-v1 tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/coco-panoptic-v1-sr coco-panoptic-v1-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/ade-v1 tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/ade20k-v1-sr ade20k-v1-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/idd-new tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/idd-new idd-new +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/sunrgbd-37 tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/sunrgbd-37-sr sunrgbd-37-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/bdd tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/bdd-sr bdd-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/cityscapes tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/cityscapes cityscapes +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/mapillary tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/mapillary mapillary + +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/scannet-20 tool/train-qvga-one-copy.sh 1080/single.yaml False exp ${WORK}/copies/final_train/1080-halfway/scannet-20 scannet-20 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/camvid tool/train-qvga-one-copy.sh 1080/single.yaml False exp ${WORK}/copies/final_train/1080-halfway/camvid camvid +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/voc2012 tool/train-qvga-one-copy.sh 1080/single.yaml False exp ${WORK}/copies/final_train/1080-halfway/voc2012 voc2012 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/kitti tool/train-qvga-one-copy.sh 1080/single.yaml False exp ${WORK}/copies/final_train/1080-halfway/kitti kitti +sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/pascal-context tool/train-qvga-one-copy.sh 1080/single.yaml False exp ${WORK}/copies/final_train/1080-halfway/pascal-context-60 pascal-context-60 +# 9888 + + + + +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/cityscapes-v2 tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/cit-v2 cityscapes-v2 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/cityscapes tool/train-qvga-one-copy.sh 1080/single_universal.yaml False exp ${WORK}/copies/final_train/1080-halfway/cityscapes cityscapes +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/kitti-sr tool/train-qvga-mix-copy.sh 1080/kitti-sr.yaml False exp ${WORK}/copies/final_train/1080-halfway-1/kitti-sr + + +# 7075-7077 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/kitti-sr tool/train-qvga-mix-copy.sh 1080/kitti-sr.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/kitti-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/camvid-sr tool/train-qvga-mix-copy.sh 1080/camvid-sr.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/camvid-sr +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/voc2012-sr tool/train-qvga-mix-copy.sh 1080/voc2012-sr.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/voc2012-sr + + + + +# tool/train-qvga-mix-copy.sh 1080/kitti.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/test + + +# 6920-6922 +# 7100-7102 + +# 7150-7251 now, gpu19 +# 7254-55 + + + +# 6294-6296 +# sbatch -p quadro --gres=gpu:8 -w isl-gpu3 -c 80 -t 2-00:00:00 -o ${outf}/mseg-stupid tool/train-qvga-mix-copy.sh 1080/mseg-stupid.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-stupid +# 7252-7253, gpu18 + + +# 7256-7257 +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-stupid-1 tool/train-qvga-mix-cd.sh 1080/mseg-stupid.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-stupid +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-stupid-2 tool/train-qvga-mix-cd.sh 1080/mseg-stupid.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-stupid + +# 7410-12 +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled tool/train-qvga-mix-copy.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-unrelabeled +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-1 tool/train-qvga-mix-cd.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-unrelabeled +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-2 tool/train-qvga-mix-cd.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-unrelabeled + + +# 7419, gpu4 + + +# 7436-7453 + +# 7972-7991 + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m tool/train-qvga-mix-copy.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-1 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-2 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-3 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-4 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-5 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-6 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu25 -c 80 -t 2-00:00:00 -o ${outf}/mseg-3m-7 tool/train-qvga-mix-cd.sh 1080/mseg-3m.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-3m + + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg tool/train-qvga-mix-copy.sh 1080/mseg.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-1 tool/train-qvga-mix-cd.sh 1080/mseg.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-2 tool/train-qvga-mix-cd.sh 1080/mseg.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg + + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled tool/train-qvga-mix-copy.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-unrelabeled +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-1 tool/train-qvga-mix-cd.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-unrelabeled +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-2 tool/train-qvga-mix-cd.sh 1080/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-unrelabeled + + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu3 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres tool/train-qvga-mix-copy.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-lowres +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu3 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-1 tool/train-qvga-mix-cd.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-lowres +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu3 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-2 tool/train-qvga-mix-cd.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/mseg-lowres + +# sbatch -p quadro --qos=normal --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/sunrgbd-37-sr-new tool/train-qvga-mix-copy.sh 1080/sunrgbd-37-sr.yaml False exp ${WORK}/copies/final_train/1080-halfway-1-new/sunrgbd-37-sr + + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2:20:00 -o ${outf}/mseg-lowres-test tool/train-qvga-mix-copy.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/test-new +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2:20:00 -o ${outf}/mseg-lowres-1-test tool/train-qvga-mix-cd.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/test-new +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2:20:00 -o ${outf}/mseg-lowres-2-test tool/train-qvga-mix-cd.sh 1080/mseg-lowres.yaml False exp ${WORK}/copies/test-new + + + + + + + diff --git a/mseg_semantic/tool/train_release_1080.sh b/mseg_semantic/tool/train_release_1080.sh new file mode 100755 index 0000000..a20ef8f --- /dev/null +++ b/mseg_semantic/tool/train_release_1080.sh @@ -0,0 +1,56 @@ +export outf=0424_release +mkdir ${outf} + +# all is so-called "lowres", 13801-13808 + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-3m tool/train-qvga-mix-copy.sh 1080_release/mseg-lowres-3m.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-3m-1 tool/train-qvga-mix-cd.sh 1080_release/mseg-lowres-3m.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-3m-2 tool/train-qvga-mix-cd.sh 1080_release/mseg-lowres-3m.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-3m-3 tool/train-qvga-mix-cd.sh 1080_release/mseg-lowres-3m.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-3m-4 tool/train-qvga-mix-cd.sh 1080_release/mseg-lowres-3m.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu18 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-3m-5 tool/train-qvga-mix-cd.sh 1080_release/mseg-lowres-3m.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres-3m + + + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu8 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled tool/train-qvga-mix-copy.sh 1080_release/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-unrelabeled-1 +# 14239 +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu3 -c 80 -t 2-00:00:00 -o ${outf}/mseg-unrelabeled-1 tool/train-qvga-mix-cd.sh 1080_release/mseg-unrelabeled.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-unrelabeled-1 + +# 14293-14297 +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-720-3m tool/train-qvga-mix-copy.sh 720_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/720_release/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-720-3m-1 tool/train-qvga-mix-cd.sh 720_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/720_release/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-720-3m-2 tool/train-qvga-mix-cd.sh 720_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/720_release/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-720-3m-3 tool/train-qvga-mix-cd.sh 720_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/720_release/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu19 -c 80 -t 2-00:00:00 -o ${outf}/mseg-720-3m-4 tool/train-qvga-mix-cd.sh 720_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/720_release/mseg-3m + + +# 14301-14304 +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2-00:00:00 -o ${outf}/mseg-480-3m tool/train-qvga-mix-copy.sh 480_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/480_release/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2-00:00:00 -o ${outf}/mseg-480-3m-1 tool/train-qvga-mix-cd.sh 480_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/480_release/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2-00:00:00 -o ${outf}/mseg-480-3m-2 tool/train-qvga-mix-cd.sh 480_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/480_release/mseg-3m +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu4 -c 80 -t 2-00:00:00 -o ${outf}/mseg-480-3m-3 tool/train-qvga-mix-cd.sh 480_release/mseg-3m.yaml False exp ${WORK}/copies/final_train/480_release/mseg-3m + + + +# 14308-14312 +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu7 -c 80 -t 2-00:00:00 -o ${outf}/mseg-mgda tool/train-qvga-mix-copy.sh 1080_release/mseg-mgda.yaml True exp ${WORK}/copies/final_train/1080_release/mseg-mgda +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu7 -c 80 -t 2-00:00:00 -o ${outf}/mseg-mgda-1 tool/train-qvga-mix-cd.sh 1080_release/mseg-mgda.yaml True exp ${WORK}/copies/final_train/1080_release/mseg-mgda +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu7 -c 80 -t 2-00:00:00 -o ${outf}/mseg-mgda-2 tool/train-qvga-mix-cd.sh 1080_release/mseg-mgda.yaml True exp ${WORK}/copies/final_train/1080_release/mseg-mgda +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu7 -c 80 -t 2-00:00:00 -o ${outf}/mseg-mgda-3 tool/train-qvga-mix-cd.sh 1080_release/mseg-mgda.yaml True exp ${WORK}/copies/final_train/1080_release/mseg-mgda +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu7 -c 80 -t 2-00:00:00 -o ${outf}/mseg-mgda-4 tool/train-qvga-mix-cd.sh 1080_release/mseg-mgda.yaml True exp ${WORK}/copies/final_train/1080_release/mseg-mgda + + +# 14315-16 +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-baseline tool/train-qvga-mix-copy.sh 1080_release/mseg-baseline.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-baseline +sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-baseline-1 tool/train-qvga-mix-cd.sh 1080_release/mseg-baseline.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-baseline + + + +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres tool/train-qvga-mix-copy.sh 1080_release/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-1 tool/train-qvga-mix-cd.sh 1080_release/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres +# sbatch -p quadro --qos=normal --gres=gpu:8 -w isl-gpu24 -c 80 -t 2-00:00:00 -o ${outf}/mseg-lowres-2 tool/train-qvga-mix-cd.sh 1080_release/mseg-lowres.yaml False exp ${WORK}/copies/final_train/1080_release/mseg-lowres + + + + + diff --git a/mseg_semantic/tool/train_release_1080_one.sh b/mseg_semantic/tool/train_release_1080_one.sh new file mode 100755 index 0000000..0e45dd5 --- /dev/null +++ b/mseg_semantic/tool/train_release_1080_one.sh @@ -0,0 +1,19 @@ +export outf=0424_release/ +mkdir ${outf} + +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/scannet-20 tool/train-qvga-one-copy.sh 1080_release/single.yaml False exp ${WORK}/copies/final_train/1080_release/scannet-20 scannet-20 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/camvid-11 tool/train-qvga-one-copy.sh 1080_release/single.yaml False exp ${WORK}/copies/final_train/1080_release/camvid-11 camvid-11 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/voc2012 tool/train-qvga-one-copy.sh 1080_release/single.yaml False exp ${WORK}/copies/final_train/1080_release/voc2012 voc2012 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/kitti-19 tool/train-qvga-one-copy.sh 1080_release/single.yaml False exp ${WORK}/copies/final_train/1080_release/kitti-19 kitti-19 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/pascal-context-60 tool/train-qvga-one-copy.sh 1080_release/single.yaml False exp ${WORK}/copies/final_train/1080_release/pascal-context-60 pascal-context-60 + +# 14483-86 +sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/coco-panoptic-133 tool/train-qvga-one-copy.sh 1080_release/single_universal.yaml False exp ${WORK}/copies/final_train/1080_release/coco-panoptic-133 coco-panoptic-133 +sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/ade20k-150 tool/train-qvga-one-copy.sh 1080_release/single_universal.yaml False exp ${WORK}/copies/final_train/1080_release/ade20k-150 ade20k-150 +sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/sunrgbd-37 tool/train-qvga-one-copy.sh 1080_release/single_universal.yaml False exp ${WORK}/copies/final_train/1080_release/sunrgbd-37 sunrgbd-37 +sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/bdd tool/train-qvga-one-copy.sh 1080_release/single_universal.yaml False exp ${WORK}/copies/final_train/1080_release/bdd bdd +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/idd-39 tool/train-qvga-one-copy.sh 1080_release/single_universal.yaml False exp ${WORK}/copies/final_train/1080_release/idd-39 idd-39 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/cityscapes-19 tool/train-qvga-one-copy.sh 1080_release/single_universal.yaml False exp ${WORK}/copies/final_train/1080_release/cityscapes-19 cityscapes-19 +# sbatch -p quadro --gres=gpu:8 -c 80 -t 2-00:00:00 -o ${outf}/mapillary-public65 tool/train-qvga-one-copy.sh 1080_release/single_universal.yaml False exp ${WORK}/copies/final_train/1080_release/mapillary-public65 mapillary-public65 + + diff --git a/mseg_semantic/tool/train_self.sh b/mseg_semantic/tool/train_self.sh new file mode 100755 index 0000000..207670e --- /dev/null +++ b/mseg_semantic/tool/train_self.sh @@ -0,0 +1,18 @@ +#!/bin/sh +export outf=0711-2 +sbatch -C turing -p gpu --gres=gpu:8 -c 80 -o ${outf}/city_18 tool/train.sh cityscapes_18 pspnet50 +sbatch -C turing -p gpu --gres=gpu:8 -c 80 -o ${outf}/nyu_36 tool/train.sh nyudepthv2_36 pspnet50 + +#7846 and 7847 + +# sbatch -C turing -p gpu --gres=gpu:8 -c 80 -o ${outf}/map-coco tool/train_flatmix.sh mix flat-map-coco +# sbatch -p gpu --gres=gpu:8 -c 80 -o ${outf}/coco-scan tool/train_flatmix.sh mix flat-coco-scan +# sbatch -p gpu --gres=gpu:8 -c 80 -o ${outf}/map-scan tool/train_flatmix.sh mix flat-map-scan +# sbatch -p gpu --gres=gpu:8 -c 80 -o ${outf}/map tool/train_flatmix.sh mix flat-map +# sbatch -p gpu --gres=gpu:8 -c 80 -o ${outf}/coco tool/train_flatmix.sh mix flat-coco +# sbatch -p gpu --gres=gpu:8 -c 80 -o ${outf}/scan tool/train_flatmix.sh mix flat-scan + +# 2-8 above + +# sbatch -p quadro --gres=gpu:8 -c 80 -o ${outf}/coco-scan-2 tool/train_flatmix.sh mix flat-coco-scan # 7786 +# sbatch -C turing -p gpu --gres=gpu:8 -c 80 -o ${outf}/map-scan-2 tool/train_flatmix.sh mix flat-map-scan # 7781 diff --git a/mseg_semantic/utils/training_utils.py b/mseg_semantic/utils/training_utils.py new file mode 100755 index 0000000..7dd0b36 --- /dev/null +++ b/mseg_semantic/utils/training_utils.py @@ -0,0 +1,112 @@ +#!/usr/bin/python3 + +import os +import numpy as np +from PIL import Image + +import torch +from torch import nn +import torch.nn.init as initer + + +def step_learning_rate(base_lr, epoch, step_epoch, multiplier=0.1): + """Sets the learning rate to the base LR decayed by 10 every step epochs""" + lr = base_lr * (multiplier ** (epoch // step_epoch)) + return lr + + +def poly_learning_rate(base_lr, curr_iter, max_iter, power=0.9): + """poly learning rate policy""" + lr = base_lr * (1 - float(curr_iter) / max_iter) ** power + return lr + + +def check_mkdir(dir_name): + if not os.path.exists(dir_name): + os.mkdir(dir_name) + + +def check_makedirs(dir_name): + if not os.path.exists(dir_name): + os.makedirs(dir_name) + + +def init_weights(model, conv='kaiming', batchnorm='normal', linear='kaiming', lstm='kaiming'): + """ + :param model: Pytorch Model which is nn.Module + :param conv: 'kaiming' or 'xavier' + :param batchnorm: 'normal' or 'constant' + :param linear: 'kaiming' or 'xavier' + :param lstm: 'kaiming' or 'xavier' + """ + for m in model.modules(): + if isinstance(m, (nn.modules.conv._ConvNd)): + if conv == 'kaiming': + initer.kaiming_normal_(m.weight) + elif conv == 'xavier': + initer.xavier_normal_(m.weight) + else: + raise ValueError("init type of conv error.\n") + if m.bias is not None: + initer.constant_(m.bias, 0) + + elif isinstance(m, (nn.modules.batchnorm._BatchNorm)): + if batchnorm == 'normal': + initer.normal_(m.weight, 1.0, 0.02) + elif batchnorm == 'constant': + initer.constant_(m.weight, 1.0) + else: + raise ValueError("init type of batchnorm error.\n") + initer.constant_(m.bias, 0.0) + + elif isinstance(m, nn.Linear): + if linear == 'kaiming': + initer.kaiming_normal_(m.weight) + elif linear == 'xavier': + initer.xavier_normal_(m.weight) + else: + raise ValueError("init type of linear error.\n") + if m.bias is not None: + initer.constant_(m.bias, 0) + + elif isinstance(m, nn.LSTM): + for name, param in m.named_parameters(): + if 'weight' in name: + if lstm == 'kaiming': + initer.kaiming_normal_(param) + elif lstm == 'xavier': + initer.xavier_normal_(param) + else: + raise ValueError("init type of lstm error.\n") + elif 'bias' in name: + initer.constant_(param, 0) + + +def group_weight(weight_group, module, lr): + group_decay = [] + group_no_decay = [] + for m in module.modules(): + if isinstance(m, nn.Linear): + group_decay.append(m.weight) + if m.bias is not None: + group_no_decay.append(m.bias) + elif isinstance(m, nn.modules.conv._ConvNd): + group_decay.append(m.weight) + if m.bias is not None: + group_no_decay.append(m.bias) + elif isinstance(m, nn.modules.batchnorm._BatchNorm): + if m.weight is not None: + group_no_decay.append(m.weight) + if m.bias is not None: + group_no_decay.append(m.bias) + assert len(list(module.parameters())) == len(group_decay) + len(group_no_decay) + weight_group.append(dict(params=group_decay, lr=lr)) + weight_group.append(dict(params=group_no_decay, weight_decay=.0, lr=lr)) + return weight_group + + +def colorize(gray, palette): + # gray: numpy array of the label and 1*3N size list palette + color = Image.fromarray(gray.astype(np.uint8)).convert('P') + color.putpalette(palette) + return color diff --git a/mseg_semantic/utils/verification_utils.py b/mseg_semantic/utils/verification_utils.py new file mode 100755 index 0000000..2f6a72d --- /dev/null +++ b/mseg_semantic/utils/verification_utils.py @@ -0,0 +1,32 @@ + + + +def verify_architecture(args) -> None: + """ + + Args: + - args + """ + assert args.classes > 1 + assert args.zoom_factor in [1, 2, 4, 8] + if args.arch == 'psp': + assert (args.train_h - 1) % 8 == 0 and (args.train_w - 1) % 8 == 0 + elif args.arch == 'psa': + if args.compact: + args.mask_h = (args.train_h - 1) // (8 * args.shrink_factor) + 1 + args.mask_w = (args.train_w - 1) // (8 * args.shrink_factor) + 1 + else: + assert (args.mask_h is None and args.mask_w is None) or ( + args.mask_h is not None and args.mask_w is not None) + if args.mask_h is None and args.mask_w is None: + args.mask_h = 2 * ((args.train_h - 1) // (8 * args.shrink_factor) + 1) - 1 + args.mask_w = 2 * ((args.train_w - 1) // (8 * args.shrink_factor) + 1) - 1 + else: + assert (args.mask_h % 2 == 1) and (args.mask_h >= 3) and ( + args.mask_h <= 2 * ((args.train_h - 1) // (8 * args.shrink_factor) + 1) - 1) + assert (args.mask_w % 2 == 1) and (args.mask_w >= 3) and ( + args.mask_w <= 2 * ((args.train_h - 1) // (8 * args.shrink_factor) + 1) - 1) + elif args.arch == 'hrnet' or args.arch == 'hrnet_ocr': + pass + else: + raise Exception('architecture not supported yet'.format(args.arch)) \ No newline at end of file diff --git a/tests/normalization_utils_tests.py b/tests/normalization_utils_tests.py new file mode 100755 index 0000000..92f6879 --- /dev/null +++ b/tests/normalization_utils_tests.py @@ -0,0 +1,91 @@ +#!/usr/bin/python3 + +import numpy as np +import pdb +import torch + +from mseg_semantic.utils.normalization_utils import normalize_img + +def test_normalize_img_test_mean_only(): + """ + Take image of shape HWC, i.e. (2 x 2 x 3) + """ + image = np.array( + [ + [ + [20,22,24], + [26,28,30] + ], + [ + [32,34,36], + [38,40,42] + ] + ] + ).astype(np.uint8) + input = torch.from_numpy(image.transpose((2, 0, 1))).float() + # tensor is now CHW, i.e. (3,2,2) + mean = [30,30,30] + + normalize_img(input, mean) + + # subtract 30 from all entries + gt_input = torch.tensor( + [ + [ + [-10,-8,-6], + [ -4,-2, 0] + ], + [ + [2,4,6], + [ 8,10,12] + ] + ]) + gt_input = gt_input.permute(2,0,1).float() + assert torch.allclose(input, gt_input) + assert isinstance(input, torch.Tensor) + +def test_normalize_img_test_mean_std_both(): + """ + Take image of shape (2 x 2 x 3) + """ + image = np.array( + [ + [ + [20,22,24], + [26,28,30] + ], + [ + [32,34,36], + [38,40,42] + ] + ] + ).astype(np.uint8) + input = torch.from_numpy(image.transpose((2, 0, 1))).float() + # tensor is now CHW, i.e. (3,2,2) + mean = [30,30,30] + std = [2,2,2] + + normalize_img(input, mean, std) + + # subtract 30 from all entries + gt_input = torch.tensor( + [ + [ + [-10/2, -8/2, -6/2], + [ -4/2, -2/2, 0/2] + ], + [ + [ 2/2, 4/2, 6/2], + [ 8/2, 10/2, 12/2] + ] + ]) + gt_input = gt_input.permute(2,0,1).float() + assert torch.allclose(input, gt_input) + assert isinstance(input, torch.Tensor) + +if __name__ == '__main__': + """ """ + test_normalize_img_test_mean_only() + test_normalize_img_test_mean_std_both() + + diff --git a/tests/test_ccsa_data.py b/tests/test_ccsa_data.py new file mode 100755 index 0000000..68a3b54 --- /dev/null +++ b/tests/test_ccsa_data.py @@ -0,0 +1,102 @@ +#!/usr/bin/python3 + +import argparse +import numpy as np + +# from mseg.utils.dataset_config import infos +from mseg.utils.dir_utils import check_mkdir + +from mseg_semantic.utils import transform +from mseg_semantic.utils.normalization_utils import get_imagenet_mean_std +from mseg_semantic.domain_generalization.ccsa_data import ( + append_per_tuple, + pad_to_max_sz, + CCSA_Data +) + + +def test_append_per_tuple(): + """ """ + dataset_2tuples = [ + ('/path/to/img0', '/path/to/label0'), + ('/path/to/img1', '/path/to/label1'), + ('/path/to/img2', '/path/to/label2') + ] + new_val = 'ade20k' + dataset_3tuples = append_per_tuple(dataset_2tuples, new_val) + + gt_dataset_3tuples = [ + ('/path/to/img0', '/path/to/label0', 'ade20k'), + ('/path/to/img1', '/path/to/label1', 'ade20k'), + ('/path/to/img2', '/path/to/label2', 'ade20k') + ] + assert gt_dataset_3tuples == dataset_3tuples + + +def test_pad_to_max_sz(): + """ + """ + tuple_list = [ + ('a', 1), + ('b', 2) + ] + max_sz = 3 + padded_tuple_list = pad_to_max_sz(tuple_list, max_sz) + assert len(padded_tuple_list) == 3 + gt_tuple_list = [ + ('a', 1), + ('b', 2), + ('a', 1) + ] + assert padded_tuple_list == gt_tuple_list + + +# def test_ccsa_data(): +# """ Requires valid file paths. +# """ +# datasets = [ +# 'ade20k-v1-qvga', +# 'coco-panoptic-v1-qvga', +# 'mapillary_vistas_comm-qvga', +# 'interiornet-37cls-qvga' +# ] + +# mean, std = get_imagenet_mean_std() + +# train_h, train_w = 201, 201 +# transform_list = [ +# transform.Crop([train_h, train_w], crop_type='rand', padding=mean, ignore_label=255), +# transform.ToTensor() +# ] +# train_transform = transform.Compose(transform_list) + +# data_roots = {dataset:infos[dataset].dataroot for dataset in datasets} +# train_lists = {dataset:infos[dataset].trainlist for dataset in datasets} + +# COCO_LEN = 118287 +# train_data = CCSA_Data( +# split='train', +# data_roots=data_roots, +# data_lists=train_lists, +# transform_dict={'ade20k-v1-qvga': train_transform} +# ) +# assert len(train_data) == COCO_LEN * 3 + +# check_mkdir('temp_files/ccsa_data') +# for i in np.random.randint(low=0,high=COCO_LEN*3,size=(1000,)): +# pytorch_img, _, domain = train_data[i] +# np_img = pytorch_img.permute(1,2,0).cpu().numpy() +# np_img = np_img.astype(np.uint8) +# cv2.imwrite(f'temp_files/ccsa_data/domain_{domain}__i_{i}.png', np_img[:,:,::-1]) + + + +if __name__ == '__main__': + """ + """ + test_append_per_tuple() + test_pad_to_max_sz() + #test_ccsa_data() + + + diff --git a/tests/test_ccsa_pspnet.py b/tests/test_ccsa_pspnet.py new file mode 100755 index 0000000..f8546bf --- /dev/null +++ b/tests/test_ccsa_pspnet.py @@ -0,0 +1,69 @@ + + +import torch +import torch.nn as nn + +from domain_generalization.ccsa_pspnet import CCSA_PSPNet + + +def test_CCSA_PSPNet_dims(): + """ """ + layers = 50 + classes = 183 + network_name = None + zoom_factor = 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label = 255 + criterion = nn.CrossEntropyLoss(ignore_index=ignore_label) + BatchNorm = torch.nn.BatchNorm2d # torch.nn.SyncBatchNorm + model = CCSA_PSPNet( + layers=layers, + classes=classes, + zoom_factor=zoom_factor, + criterion=criterion, + BatchNorm=BatchNorm, + network_name=network_name, + pretrained=False) # unlike actual training time. + + x = torch.randint(high=255, size=(4,3,201,201)).type(torch.float32) + y = torch.randint(high=10,size=(4,201,201)) + batch_domain_idxs = torch.tensor([0,1,2,1]) + + out_cache = model(x,y,batch_domain_idxs) + + +def test_CCSA_PSPNet_dims_cuda(): + """ """ + layers = 50 + classes = 183 + network_name = None + zoom_factor = 8 # zoom factor for final prediction during training, be in [1, 2, 4, 8] + ignore_label = 255 + criterion = nn.CrossEntropyLoss(ignore_index=ignore_label) + BatchNorm = torch.nn.BatchNorm2d # torch.nn.SyncBatchNorm + model = CCSA_PSPNet( + layers=layers, + classes=classes, + zoom_factor=zoom_factor, + criterion=criterion, + BatchNorm=BatchNorm, + network_name=network_name, + pretrained=False) # unlike actual training time. + + model = model.cuda() + + x = torch.randint(high=255, size=(4,3,201,201)).type(torch.float32) + y = torch.randint(high=10,size=(4,201,201)) + batch_domain_idxs = torch.tensor([0,1,2,1]) + + x = x.cuda() + y = y.cuda() + batch_domain_idxs = batch_domain_idxs.cuda() + + out_cache = model(x,y,batch_domain_idxs) + + +if __name__ == '__main__': + """ """ + test_CCSA_PSPNet_dims() + test_CCSA_PSPNet_dims_cuda() + diff --git a/tests/test_ccsa_utils.py b/tests/test_ccsa_utils.py new file mode 100755 index 0000000..afb42ec --- /dev/null +++ b/tests/test_ccsa_utils.py @@ -0,0 +1,690 @@ +#!/usr/bin/python3 + +import math +import numpy as np +import pdb +import time +import torch + +from mseg_semantic.domain_generalization.ccsa_utils import ( + contrastive_loss, + paired_euclidean_distance, + downsample_label_map, + sample_pair_indices, + find_matching_pairs, + remove_pairs_from_same_domain, + get_merged_pair_embeddings, + pytorch_random_choice, + shuffle_pytorch_tensor, + get_pair_embedding, + count_per_domain_statistics, + sample_px_locations_uniformly, + sample_crossdomain_pos_neg_pairs, + form_pair_info_tensor +) + +""" +For sake of unit tests, pretend we have the following categories: +Let 0 = Sky + 1 = Mountain + 2 = Road + 3 = Person + 4 = Vegetation +""" + + +def test_contrastive_loss1(): + """ + Should be no loss here (zero from pull term, and zero from push term) + """ + # which pairs share the same semantic class label + y_c = torch.tensor([ 1., 0., 0., 0., 1.], dtype=torch.float32) + + # distances between pairs + pred_dists = torch.tensor([0, 1.1, 1.1, 1.1, 0], dtype=torch.float32) + + loss = contrastive_loss(y_c, pred_dists) + gt_loss = torch.tensor([0]) + + assert torch.allclose(loss, gt_loss) + + +def test_contrastive_loss2(): + """ + There should be more loss here (coming only from push term) + """ + # which pairs share the same semantic class label + y_c = torch.tensor([ 1., 0., 0., 0., 1.], dtype=torch.float32) + + # distances between pairs + pred_dists = torch.tensor([0, 0.2, 0.3, 0.1, 0], dtype=torch.float32) + + loss = contrastive_loss(y_c, pred_dists) + gt_loss = torch.tensor([0.3880]) + + assert torch.allclose(loss, gt_loss, atol=1e-3) + + +def test_contrastive_loss3(): + """ + There should be the most loss here (some from pull term, and some from push term also) + """ + # which pairs share the same semantic class label + y_c = torch.tensor([ 1., 0., 0., 0., 1.], dtype=torch.float32) + + # distances between pairs + pred_dists = torch.tensor([2.0, 0.2, 0.3, 0.1, 4.0], dtype=torch.float32) + + loss = contrastive_loss(y_c, pred_dists) + gt_loss = torch.tensor([4.3880]) + + assert torch.allclose(loss, gt_loss, atol=1e-3) + + +def test_paired_euclidean_distance(): + """ """ + X = torch.tensor( + [ + [3,0], + [4,0], + [1,1] + ], dtype=torch.float32) + Y = torch.tensor( + [ + [1,1], + [0,3], + [0,4] + ], dtype=torch.float32) + dists = paired_euclidean_distance(X, Y) + gt_dists = torch.tensor( + [ + [ math.sqrt(2*2 + 1) ], # (3,0) vs. (1,1) + [ math.sqrt(3*3 + 4*4) ], # (4,0) vs. (0,3) + [ math.sqrt(3*3 + 1) ] # (1,1) vs. (0,4) + ]) + torch.allclose(gt_dists.squeeze(), dists, atol=1e-3) + + +def test_downsample_label_map(): + """ + Downsample two label maps "Y" + """ + labelmap_1 = torch.tensor( + [ + [0,0,0,0,0,0,0,0], + [4,4,0,0,0,0,4,4], + [4,3,2,2,2,2,3,4], + [4,2,2,2,2,2,2,4] + ]) + + labelmap_2 = torch.tensor( + [ + [1,1,1,1,0,0,0,0], + [1,1,1,1,2,2,2,4], + [4,4,4,4,2,2,2,4], + [4,4,4,3,2,2,2,4] + ]) + Y = torch.stack([labelmap_1, labelmap_2]) + Y = Y.type(torch.float32) + assert Y.shape == (2,4,8) + + dY = downsample_label_map(Y, d=2) + assert dY.shape == (2,2,4) + gt_dY = torch.tensor( + [ + [[0., 0., 0., 0.], + [4., 2., 2., 3.]], + + [[1., 1., 0., 0.], + [4., 4., 2., 2.]] + ]) + + dY = downsample_label_map(Y, d=4) + gt_dY = torch.tensor( + [ + [[0., 0.]], + [[1., 0.]] + ]) + assert dY.shape == (2,1,2) + + + +def test_sample_pair_indices1(): + """ + Given labels for 3 images, sample corresponding pixels that + are known positives and that are known negatives. + Suppose images 0 and 2 come from Domain-0, and image 1 comes + from Domain-1. + """ + labelmap_0 = torch.tensor( + [ + [0,0,0,0,0,0,0,0], + [4,4,0,0,0,0,4,4], + [4,3,2,2,2,2,3,4], + [4,2,2,2,2,2,2,4] + ], dtype=torch.float32) + + labelmap_1 = torch.tensor( + [ + [1,1,1,1,0,0,0,0], + [1,1,1,1,2,2,2,4], + [4,4,4,4,2,2,2,4], + [4,4,4,3,2,2,2,4] + ], dtype=torch.float32) + labelmap_2 = torch.tensor( + [ + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4] + ], dtype=torch.float32) + + Y = torch.stack([labelmap_0, labelmap_1, labelmap_2]) + assert Y.shape == (3,4,8) + + batch_domain_indices = torch.tensor([0,1,0], dtype=torch.int32) + + pos_pair_info, neg_pair_info = sample_pair_indices(Y, batch_domain_indices, num_pos_pairs=30000, neg_to_pos_ratio=3, downsample_factor=1) + + for (bi, hi, wi, bj, hj, wj) in pos_pair_info: + assert Y[bi,hi,wi] == Y[bj,hj,wj] # is same class + assert batch_domain_indices[bi] != batch_domain_indices[bj] # must be cross-domain + + for (bi, hi, wi, bj, hj, wj) in neg_pair_info: + assert Y[bi,hi,wi] != Y[bj,hj,wj] # is different class + assert batch_domain_indices[bi] != batch_domain_indices[bj] # must be cross-domain + + +def test_sample_pair_indices2(): + """ + Given labels for 3 images, sample corresponding pixels that + are known positives and that are known negatives. + Suppose images 0 and 2 come from Domain-0, and image 1 comes + from Domain-1. + """ + labelmap_0 = torch.tensor( + [ + [0,0,0,0,1,1,1,1], + [0,0,0,0,1,1,1,1], + [2,2,2,2,4,4,4,4], + [2,2,2,2,4,4,4,4] + ], dtype=torch.float32) + + labelmap_1 = torch.tensor( + [ + [1,1,1,1,0,0,0,0], + [1,1,1,1,0,0,0,0], + [4,4,4,4,2,2,2,2], + [4,4,4,4,2,2,2,2] + ], dtype=torch.float32) + labelmap_2 = torch.tensor( + [ + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4] + ], dtype=torch.float32) + + Y = torch.stack([labelmap_0, labelmap_1, labelmap_2]) + assert Y.shape == (3,4,8) + + batch_domain_indices = torch.tensor([0,1,0], dtype=torch.int32) + + pos_pair_info, neg_pair_info = sample_pair_indices(Y, batch_domain_indices, num_pos_pairs=3000, neg_to_pos_ratio=3, downsample_factor=2) + for (bi, hi, wi, bj, hj, wj) in pos_pair_info: + assert Y[:,::2,::2][bi,hi,wi] == Y[:,::2,::2][bj,hj,wj] # is same class + assert batch_domain_indices[bi] != batch_domain_indices[bj] # must be cross-domain + + for (bi, hi, wi, bj, hj, wj) in neg_pair_info: + assert Y[:,::2,::2][bi,hi,wi] != Y[:,::2,::2][bj,hj,wj] # is different class + assert batch_domain_indices[bi] != batch_domain_indices[bj] # must be cross-domain + + + +def test_remove_pairs_from_same_domain(): + """ + Consider a minibatch of size 5 (examples). Suppose we have sampled 4 pairs + of pixel locations. + + In training, we want only pairs from different domains. We + enforce that their feature embeddings are similar. + + We could have 1 million sampled pairs from a minibatch of size 5. + (Number of elements in batch (batch_domain_indices) need not + agree with number of sampled pairs!) + """ + # show which minibatch examples belong to which domain + batch_domain_indices = torch.tensor([0,1,2,1,0]) + # sampled pairs (a,b) are enumerated here. + a_info_ = torch.tensor( + [ + [0, 1, 2], # Belongs to domain 0 (will be removed) + [0, 1, 2], # Belongs to domain 0 + [2, 1, 2], # Belongs to domain 2 + [3, 1, 2] # Belongs to domain 1 (will be removed) + ]) + b_info_ = torch.tensor( + [ + [4, 3, 4], # Belongs to domain 0 (will be removed) + [1, 3, 4], # Belongs to domain 1 + [3, 3, 4], # Belongs to domain 1 + [1, 3, 4] # Belongs to domain 1 (will be removed) + ]) + a_pair_info, b_pair_info = remove_pairs_from_same_domain(batch_domain_indices, a_info_, b_info_) + gt_a_pair_info = torch.tensor( + [ + [0, 1, 2], + [2, 1, 2] + ]) + assert torch.allclose(gt_a_pair_info, a_pair_info) + gt_b_pair_info = torch.tensor( + [ + [1, 3, 4], + [3, 3, 4] + ]) + assert torch.allclose(gt_b_pair_info, b_pair_info) + +def test_form_pair_info_tensor(): + """ + Ensure hstacking of 3 length-N 1d arrays into a (N,3) array + is successful. + + Given batch_dim_idxs (representing indices of examples in a minibatch), + and px_1d_y (representing row indices) and px_1d_x + (representing column indices), stack them along axis-0 (row dimension). + """ + batch_dim_idxs = torch.tensor([5,6,7,8,9], dtype=torch.int32) + px_1d_y = torch.tensor([4,3,2,1,0], dtype=torch.int32) + px_1d_x = torch.tensor([0,2,4,6,8], dtype=torch.int32) + + pair_info = form_pair_info_tensor(batch_dim_idxs, px_1d_y, px_1d_x) + gt_pair_info = torch.tensor( + [ + [5,4,0], + [6,3,2], + [7,2,4], + [8,1,6], + [9,0,8] + ], dtype=torch.int32) + assert torch.allclose(pair_info, gt_pair_info) + + +def test_find_matching_pairs(): + """ + Given a batch of ground truth label maps, and sampled pixel + pair locations (pairs are across label maps), identify which + pairs are matching vs. non-matching and return corresponding metadata + (basically, partition them). + + Get back pos_pair_info -- Pytorch tensor containing info about each positive pair (a,b). Contains + (a batch_idx, a row, a col, b batch_idx, b row, b col) + Also get back neg_pair_info -- same as above, but for negative pairs. + """ + labelmap_0 = torch.tensor( + [ + [0,0,0,0,0,0,0,0], + [4,4,0,0,0,0,4,4], + [4,3,2,2,2,2,3,4], + [4,2,2,2,2,2,2,4] + ]) + + labelmap_1 = torch.tensor( + [ + [1,1,1,1,0,0,0,0], + [1,1,1,1,2,2,2,4], + [4,4,4,4,2,2,2,4], + [4,4,4,3,2,2,2,4] + ]) + labelmap_2 = torch.tensor( + [ + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4] + ]) + + Y = torch.stack([labelmap_0, labelmap_1, labelmap_2]) + assert Y.shape == (3,4,8) + + a_pair_info = torch.tensor( + [ + [0,1,1], # pos + [2,1,4], # neg + [1,1,7], # pos + [0,2,2] # neg + ]) + b_pair_info = torch.tensor( + [ + [2,3,7], # pos + [0,1,4], # neg + [2,3,0], # pos + [1,3,3] # neg + ]) + pos_pair_info, neg_pair_info = find_matching_pairs(Y, a_pair_info, b_pair_info) + gt_pos_pair_info = torch.tensor( + [ + [0, 1, 1, 2, 3, 7], # pos pairs + [1, 1, 7, 2, 3, 0] + ]) + assert torch.allclose(pos_pair_info, gt_pos_pair_info) + gt_neg_pair_info = torch.tensor( + [ + [2, 1, 4, 0, 1, 4], # neg pairs + [0, 2, 2, 1, 3, 3] + ]) + assert torch.allclose(neg_pair_info, gt_neg_pair_info) + + +def test_sample_crossdomain_pos_neg_pairs(): + """ """ + labelmap_0 = torch.tensor( + [ + [0,0,0,0,0,0,0,0], + [4,4,0,0,0,0,4,4], + [4,3,2,2,2,2,3,4], + [4,2,2,2,2,2,2,4] + ]) + + labelmap_1 = torch.tensor( + [ + [1,1,1,1,0,0,0,0], + [1,1,1,1,2,2,2,4], + [4,4,4,4,2,2,2,4], + [4,4,4,3,2,2,2,4] + ]) + labelmap_2 = torch.tensor( + [ + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4] + ]) + + Y = torch.stack([labelmap_0, labelmap_1, labelmap_2]) + assert Y.shape == (3,4,8) + + # here, domain 1 would be sampled more than others + batch_domain_indices = torch.tensor([0,1,0], dtype=torch.int64) + + _, unique_domain_idxs = count_per_domain_statistics(batch_domain_indices) + b, h, w = Y.shape + INITIAL_SAMPLE_NUM = int(1e4) + + pos_pair_info, neg_pair_info = sample_crossdomain_pos_neg_pairs(Y, batch_domain_indices, unique_domain_idxs, w, h, INITIAL_SAMPLE_NUM) + for (bi, hi, wi, bj, hj, wj) in pos_pair_info: + assert Y[bi,hi,wi] == Y[bj,hj,wj] # is same class + assert batch_domain_indices[bi] != batch_domain_indices[bj] # must be cross-domain + + for (bi, hi, wi, bj, hj, wj) in neg_pair_info: + assert Y[bi,hi,wi] != Y[bj,hj,wj] # is different class + assert batch_domain_indices[bi] != batch_domain_indices[bj] # must be cross-domain + + +def test_count_per_domain_statistics(): + """ + """ + domain_idxs = torch.tensor([0,1,0,1,4]) + examples_per_domain, unique_domain_idxs = count_per_domain_statistics(domain_idxs) + gt_examples_per_domain = np.array([2., 2., 0., 0., 1.], dtype=np.int32) + gt_unique_domain_idxs = np.array([0, 1, 4]) + assert np.allclose(examples_per_domain, gt_examples_per_domain) + assert np.allclose(unique_domain_idxs, gt_unique_domain_idxs) + assert examples_per_domain.dtype == np.int64 + + +def test_sample_px_locations_uniformly(): + """ + Let 0 = Sky + 1 = Mountain + 2 = Road + 3 = Person + 4 = Vegetation + + In expectation, minibatch examples from less common domains should be + sampled more often, if domains sampled uniformly. + """ + labelmap_1 = torch.tensor( + [ + [0,0,0,0,0,0,0,0], + [4,4,0,0,0,0,4,4], + [4,3,2,2,2,2,3,4], + [4,2,2,2,2,2,2,4] + ]) + + labelmap_2 = torch.tensor( + [ + [1,1,1,1,0,0,0,0], + [1,1,1,1,2,2,2,4], + [4,4,4,4,2,2,2,4], + [4,4,4,3,2,2,2,4] + ]) + labelmap_3 = torch.tensor( + [ + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4], + [4,4,4,4,4,4,4,4] + ]) + + Y = torch.stack([labelmap_1, labelmap_2, labelmap_3]) + assert Y.shape == (3,4,8) + + # here, domain 1 would be sampled more than others (sampled twice as often) + domain_indices = torch.tensor([0,1,0], dtype=torch.int64) + + # unique domain indices would be [0,1] + _, unique_domain_idxs = count_per_domain_statistics(domain_indices) + b, h, w = Y.shape + INITIAL_SAMPLE_NUM = int(1e6) + + b_idxs, w_idxs, h_idxs = sample_px_locations_uniformly( + domain_indices, + unique_domain_idxs, + w, + h, + INITIAL_SAMPLE_NUM + ) + # Verify expected value vs. empirical. Allow for some margin of error. + # Less common domain (minibatch example 1) should be sampled roughly + # 2x as often, since it appears less often. + assert 245000 < (b_idxs == 0).sum() and (b_idxs == 0).sum() < 255000 + assert 495000 < (b_idxs == 1).sum() and (b_idxs == 1).sum() < 505000 + assert 245000 < (b_idxs == 2).sum() and (b_idxs == 2).sum() < 255000 + + # Sample minibatch indices should lie in [0,b) + assert (b_idxs >= 0).sum() == INITIAL_SAMPLE_NUM + assert (b_idxs < b).sum() == INITIAL_SAMPLE_NUM + + # Sampled pixel rows should lie in [0,h) + assert (h_idxs >= 0).sum() == INITIAL_SAMPLE_NUM + assert (h_idxs < h).sum() == INITIAL_SAMPLE_NUM + + # Sampled pixel columns should lie in [0,w) + assert (w_idxs >= 0).sum() == INITIAL_SAMPLE_NUM + assert (w_idxs < w).sum() == INITIAL_SAMPLE_NUM + + +def test_shuffle_pytorch_tensor(): + """ + Given all possible permutations, ensure that the shuffling that was + executed corresponds to any valid permutation. + """ + t = torch.tensor( + [ + [1,2], + [3,4], + [5,6] + ]) + + shuffled = shuffle_pytorch_tensor(t) + + gt_permutations = torch.tensor( + [ + [[1,2], + [3,4], + [5,6]], + + [[1,2], + [5,6], + [3,4]], + + [[3,4], + [5,6], + [1,2]], + + [[5,6], + [3,4], + [1,2]], + + [[3,4], + [1,2], + [5,6]], + + [[5,6], + [1,2], + [3,4]] + ]) + assert any([torch.allclose(gt_permutations[i], shuffled) for i in range(6)]) + + + +def test_pytorch_random_choice(): + """ + Ensure that sampling with replacement returns values that are found + in original array, and of correct shape. + """ + x = np.array([0,2,4,5,6]) + vals = pytorch_random_choice(x, num_samples=10) + for val in list(torch.unique(vals).cpu().numpy()): + assert val in list(x) + assert vals.shape == (10,) + + x = np.array([0,2,4,5,6]) + vals = pytorch_random_choice(x, num_samples=3) + for val in list(torch.unique(vals).cpu().numpy()): + assert val in list(x) + assert vals.shape == (3,) + + x = np.array([0,2]) + vals = pytorch_random_choice(x, num_samples=10) + for val in list(torch.unique(vals).cpu().numpy()): + assert val in list(x) + assert vals.shape == (10,) + + +def test_get_merged_pair_embeddings(): + """ + """ + pos_pair_info = torch.tensor( + [ + [0,1,1,1,2,2], + [1,3,4,2,0,0] + ]) + neg_pair_info = torch.tensor( + [ + [0,1,1,1,2,2], + [1,3,4,2,0,0] + ]) + resnet_embedding = torch.arange(2*3*4*5).reshape(3,2,4,5) + + y_c, a_embedding, b_embedding = get_merged_pair_embeddings( + pos_pair_info, + neg_pair_info, + resnet_embedding + ) + gt_y_c = torch.tensor([1,1,0,0], dtype=torch.float32) + gt_a_embedding = torch.tensor( + [ + [ 6, 26], + [59, 79], + [ 6, 26], + [59, 79] + ]) + gt_b_embedding = torch.tensor( + [ + [ 52, 72], + [ 80, 100], + [ 52, 72], + [ 80, 100] + ]) + assert torch.allclose(a_embedding, gt_a_embedding) + assert torch.allclose(b_embedding, gt_b_embedding) + assert torch.allclose(y_c, gt_y_c) + +def test_get_pair_embedding(): + """ + """ + pair_info = torch.tensor( + [ + # (bi,hi,wi,bj,hj,wj) + [0, 1, 1, 1, 2, 2], + [1, 3, 4, 2, 0, 0] + ]) + embedding = torch.arange(2*3*4*5).reshape(3,2,4,5) + a_embedding, b_embedding = get_pair_embedding(pair_info, embedding) + + gt_a_embedding = torch.tensor( + [ + [ 6, 26], + [59, 79] + ]) + gt_b_embedding = torch.tensor( + [ + [ 52, 72], + [ 80, 100] + ]) + + assert torch.allclose(a_embedding, gt_a_embedding) + assert torch.allclose(b_embedding, gt_b_embedding) + + +def time_sample_pair_indices(): + """ + Count how long it takes to sample pairs. + Suppose we have a batch size of 128 images, and 194 possible + classes. Suppose the 128 minibatch examples come from 7 different + domains. + + Takes around 0.5 sec on Macbook Pro to sample pair indices each time. + """ + for _ in range(10): + batch_domain_idxs = torch.randint(low=0, high=7, size=(128,)) + Y = torch.randint(low=0, high=194, size=(128,201,201)) + + start = time.time() + out = sample_pair_indices( + Y.type(torch.float32), + batch_domain_idxs, + num_pos_pairs=int(1e3), + neg_to_pos_ratio=3, + downsample_factor=8 + ) + end = time.time() + duration = end - start + print(f'Duration was {duration}') + + +if __name__ == '__main__': + """ """ + test_contrastive_loss1() + test_contrastive_loss2() + test_contrastive_loss3() + test_paired_euclidean_distance() + test_downsample_label_map() + + test_shuffle_pytorch_tensor() + test_pytorch_random_choice() + test_count_per_domain_statistics() + test_sample_px_locations_uniformly() + + test_form_pair_info_tensor() + test_remove_pairs_from_same_domain() + + test_find_matching_pairs() + test_sample_crossdomain_pos_neg_pairs() + test_sample_pair_indices1() + test_sample_pair_indices2() + + test_get_pair_embedding() + test_get_merged_pair_embeddings() + time_sample_pair_indices() diff --git a/tests/test_dist_mgda_utils.py b/tests/test_dist_mgda_utils.py new file mode 100755 index 0000000..70d3bef --- /dev/null +++ b/tests/test_dist_mgda_utils.py @@ -0,0 +1,100 @@ +#!/usr/bin/python3 + +import numpy as np +import pdb +import torch + +from multiobjective_opt.dist_mgda_utils import ( + reduce_to_dict_per_dataset, + scaled_reduce_dict_to_tensor, + normalize_tensor_list +) + +def test_all_gather_create_tensor_list(): + """ + NOT EASY TO TEST SINCE MUST BE ON SEPARATE cpus/GPUS FOR IT TO WORK + """ + pass + + +def test_scaled_reduce_dict_to_tensor(): + """ + """ + dataset_grad_p_dict = { + 'coco': torch.tensor([1.,2.]), + 'ade20k': torch.tensor([3.,4.]), + 'mapillary': torch.tensor([5.,6.]) + } + dataset_names = ['coco', 'ade20k', 'mapillary'] + scales = {'coco': 1., 'ade20k': 5., 'mapillary': 2.} + + tensor = scaled_reduce_dict_to_tensor(dataset_grad_p_dict, dataset_names, scales=scales) + gt_tensor = torch.tensor([26., 34.]) + assert torch.allclose(tensor, gt_tensor) + + +def test_reduce_to_dict_per_dataset(): + """ + """ + ngpus_per_node = 8 + tensor_list = [torch.ones(1) * i for i in range(ngpus_per_node) ] + dataset_gpu_mapping = { + 'coco':[0,1,2], + 'mapillary': [3,4,5], + 'ade20k': [6,7] + } + + dataset_loss_dict = reduce_to_dict_per_dataset(tensor_list, dataset_gpu_mapping) + gt_dataset_loss_dict = { + 'coco': torch.tensor([3./3]), # (0 + 1 + 2 ) / 3 + 'mapillary': torch.tensor([12./3.]), # (3 + 4 + 5) / 3 + 'ade20k': torch.tensor([13./2.]) # (6 + 7) / 2 + } + assert_tensor_dicts_are_equal(dataset_loss_dict, gt_dataset_loss_dict) + print(dataset_loss_dict) + + +def assert_tensor_dicts_are_equal(dict1, dict2): + """ + """ + assert set(dict1.keys()) == set(dict2.keys()) + for k, v1 in dict1.items(): + assert torch.allclose(v1, dict2[k]) + + +def test_normalize_tensor_list(): + """ + """ + tensor_list = [ + torch.arange(5).type(torch.float32), + torch.ones(3).type(torch.float32), + torch.ones(2).type(torch.float32) * 2 + ] + print('Unnormalized: ', tensor_list) + normalized_tensor_list, norm = normalize_tensor_list(tensor_list) + + gt_tensor_list = np.array([0,1,2,3,4,1,1,1,2,2.]) + gt_norm = np.linalg.norm(gt_tensor_list) + + assert np.allclose(gt_norm, 6.403, atol=1e-3) + assert torch.allclose( norm, torch.Tensor([gt_norm]) ) + + gt_tensor0 = torch.tensor([0. , 0.156, 0.312, 0.468, 0.624]) + gt_tensor1 = torch.tensor([0.156, 0.156, 0.156]) + gt_tensor2 = torch.tensor([0.312, 0.312]) + + assert len(normalized_tensor_list) == 3 + assert torch.allclose(normalized_tensor_list[0], gt_tensor0, atol=1e-2) + assert torch.allclose(normalized_tensor_list[1], gt_tensor1, atol=1e-2) + assert torch.allclose(normalized_tensor_list[2], gt_tensor2, atol=1e-2) + + +if __name__ == '__main__': + + # test_all_gather_create_tensor_list() + #test_scaled_reduce_dict_to_tensor() + #test_reduce_to_dict_per_dataset() + + test_normalize_tensor_list() + + diff --git a/tests/test_distributed_train.py b/tests/test_distributed_train.py new file mode 100755 index 0000000..4b4c3ac --- /dev/null +++ b/tests/test_distributed_train.py @@ -0,0 +1,89 @@ + +import os +import torch +from util import dataset, transform +import torch.multiprocessing as mp +import torch.distributed as dist + + +def main_process(): + """ """ + return args['rank'] % 8 == 0 + + +def train(train_loader): + """ """ + print(args) + + if main_process(): + print('Main process runs in ', args) + + for i, (input, target) in enumerate(train_loader): + print('hello from training with ', args) + + + +def main_worker(gpu, ngpus_per_node, argss): + """ """ + global args + print('Argss: ', argss) + args = argss + args['rank'] = gpu + rank = args['rank'] * ngpus_per_node + gpu + print(f'Rank: {rank}') + print(f'Args on {rank}: ', args) + dist.init_process_group( + backend=args['dist_backend'], + init_method=args['dist_url'], + world_size=args['world_size'], + rank=args['rank'] + ) + + train_transform = transform.Compose([ + transform.RandScale([args.scale_min, args.scale_max]) + ]) + + train_data = dataset.SemData( + split='train', + data_root=args['data_root'], + data_list=args['train_list'], + transform=train_transform + ) + train_sampler = torch.utils.data.distributed.DistributedSampler( + train_data, + num_replicas=args.num_replica_per_dataset, + rank=args.dataset_rank + ) + train_loader = torch.utils.data.DataLoader( + train_data, + batch_size=args.batch_size, + shuffle=(train_sampler is None), + num_workers=args.workers, + pin_memory=True, + sampler=train_sampler, + drop_last=True + ) + + +def main(): + """ """ + ngpus_per_node = 8 + world_size = 1 + world_size = ngpus_per_node * world_size + print(f'World size: {world_size}') + args = { + 'world_size' : world_size, + 'dist_url': 'tcp://127.0.0.1:6789', + 'dist_backend': 'nccl', + 'scale_min': 0.5, # minimum random scale + 'scale_max': 2.0 # maximum random scale + 'data_root':, + 'train_list': + } + mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) + + +if __name__ == '__main__': + main() + +