diff --git a/.gitignore b/.gitignore index 2ad723b..87151a4 100644 --- a/.gitignore +++ b/.gitignore @@ -111,4 +111,8 @@ preprocess_data.py res/ adj.md tensorrt/build/* +datasets/coco/train.txt +datasets/coco/val.txt +pretrained/* +lib/coco.py diff --git a/configs/bisenetv2.py b/configs/bisenetv2.py index 94f01e3..4e8537a 100644 --- a/configs/bisenetv2.py +++ b/configs/bisenetv2.py @@ -3,7 +3,7 @@ cfg = dict( model_type='bisenetv2', num_aux_heads=4, - lr_start = 5e-2, + lr_start = 1 * 5e-3, weight_decay=5e-4, warmup_iters = 1000, max_iter = 150000, @@ -14,6 +14,6 @@ cropsize=[512, 1024], ims_per_gpu=8, use_fp16=True, - use_sync_bn=False, + use_sync_bn=True, respth='./res', ) diff --git a/datasets/cityscapes/gtFine b/datasets/cityscapes/gtFine index ae71826..a4859b7 120000 --- a/datasets/cityscapes/gtFine +++ b/datasets/cityscapes/gtFine @@ -1 +1 @@ -/data2/zzy/.datasets/cityscapes//gtFine/ \ No newline at end of file +/data/zzy/zzy/cityscapes/cityscapes/gtFine/ \ No newline at end of file diff --git a/datasets/cityscapes/leftImg8bit b/datasets/cityscapes/leftImg8bit index eed1adb..f185676 120000 --- a/datasets/cityscapes/leftImg8bit +++ b/datasets/cityscapes/leftImg8bit @@ -1 +1 @@ -/data2/zzy/.datasets/cityscapes//leftImg8bit/ \ No newline at end of file +/data/zzy/zzy/cityscapes/cityscapes/leftImg8bit/ \ No newline at end of file diff --git a/datasets/coco/images/train2017 b/datasets/coco/images/train2017 new file mode 120000 index 0000000..9d730b5 --- /dev/null +++ b/datasets/coco/images/train2017 @@ -0,0 +1 @@ +/data/zzy/zzy/coco/images/train2017/ \ No newline at end of file diff --git a/datasets/coco/images/val2017 b/datasets/coco/images/val2017 new file mode 120000 index 0000000..9d47787 --- /dev/null +++ b/datasets/coco/images/val2017 @@ -0,0 +1 @@ +/data/zzy/zzy/coco/images/val2017/ \ No newline at end of file diff --git a/datasets/coco/labels/train2017 b/datasets/coco/labels/train2017 new file mode 120000 index 0000000..725cfae --- /dev/null +++ b/datasets/coco/labels/train2017 @@ -0,0 +1 @@ +/data/zzy/zzy/coco/labels/train2017/ \ No newline at end of file diff --git a/datasets/coco/labels/val2017 b/datasets/coco/labels/val2017 new file mode 120000 index 0000000..20cbc2d --- /dev/null +++ b/datasets/coco/labels/val2017 @@ -0,0 +1 @@ +/data/zzy/zzy/coco/labels/val2017/ \ No newline at end of file diff --git a/dist_train.sh b/dist_train.sh new file mode 100644 index 0000000..fe7cefe --- /dev/null +++ b/dist_train.sh @@ -0,0 +1,6 @@ + +export CUDA_VISIBLE_DEVICES=6,7 +PORT=52330 +NGPUS=2 + +python -m torch.distributed.launch --nproc_per_node=$NGPUS tools/train_amp.py --model bisenetv2 --port $PORT diff --git a/lib/logger.py b/lib/logger.py index ba59d7c..66889b2 100644 --- a/lib/logger.py +++ b/lib/logger.py @@ -16,7 +16,10 @@ def setup_logger(name, logpth): log_level = logging.INFO if dist.is_initialized() and dist.get_rank() != 0: log_level = logging.WARNING - logging.basicConfig(level=log_level, format=FORMAT, filename=logfile) + try: + logging.basicConfig(level=log_level, format=FORMAT, filename=logfile, force=True) + except Exception: + logging.basicConfig(level=log_level, format=FORMAT, filename=logfile) logging.root.addHandler(logging.StreamHandler()) diff --git a/lib/models/bisenetv2.py b/lib/models/bisenetv2.py index a06133b..5e40a59 100644 --- a/lib/models/bisenetv2.py +++ b/lib/models/bisenetv2.py @@ -41,6 +41,7 @@ def init_weight(self): nn.init.xavier_normal_(self.proj.weight, gain=1.) + class DetailBranch(nn.Module): def __init__(self): @@ -324,6 +325,7 @@ def __init__(self, n_classes, output_aux=True): self.aux5_4 = SegmentHead(128, 128, n_classes, up_factor=32) self.init_weights() + self.load_pretrain() def forward(self, x): size = x.size()[2:] @@ -353,6 +355,33 @@ def init_weights(self): nn.init.ones_(module.weight) nn.init.zeros_(module.bias) + def load_pretrain(self): + state = torch.load('pretrained/bisenetv2_pretrain.pth', map_location='cpu') + state = {k:v for k,v in state.items() if not k in ('fc', 'head', 'dense_head')} + for name, child in self.named_children(): + if name in state.keys(): + child.load_state_dict(state[name]) + + + def get_params(self): + wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], [] + for name, param in self.named_parameters(): + if 'head' in name or 'aux' in name: + if param.dim() == 1: + lr_mul_nowd_params.append(param) + elif param.dim() == 4: + lr_mul_wd_params.append(param) + else: + print(name) + else: + if param.dim() == 1: + nowd_params.append(param) + elif param.dim() == 4: + wd_params.append(param) + else: + print(name) + return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params + if __name__ == "__main__": # x = torch.randn(16, 3, 1024, 2048) diff --git a/lib/sampler.py b/lib/sampler.py index e205be8..b6aae9f 100644 --- a/lib/sampler.py +++ b/lib/sampler.py @@ -24,7 +24,7 @@ class RepeatedDistSampler(Sampler): shuffle (optional): If true (default), sampler will shuffle the indices """ - def __init__(self, dataset, num_imgs, num_replicas=None, rank=None, shuffle=True): + def __init__(self, dataset, num_imgs, num_replicas=None, rank=None, shuffle=True, ba=False): if num_replicas is None: if not dist.is_available(): raise RuntimeError("Requires distributed package to be available") @@ -40,6 +40,7 @@ def __init__(self, dataset, num_imgs, num_replicas=None, rank=None, shuffle=True self.total_size = self.num_imgs_rank * self.num_replicas self.num_imgs = num_imgs self.shuffle = shuffle + self.ba = ba def __iter__(self): @@ -58,6 +59,12 @@ def __iter__(self): indices = indices[:self.total_size] assert len(indices) == self.total_size + if self.ba: + n_rep = max(4, self.num_replicas) + len_ind = len(indices) // n_rep + 1 + indices = indices[:len_ind] + indices = [ind for ind in indices for _ in range(n_rep)] + # subsample indices = indices[self.rank:self.total_size:self.num_replicas] assert len(indices) == self.num_imgs_rank diff --git a/tools/gen_coco_annos.py b/tools/gen_coco_annos.py new file mode 100644 index 0000000..d4272fd --- /dev/null +++ b/tools/gen_coco_annos.py @@ -0,0 +1,42 @@ + +import os +import os.path as osp + + +def gen_coco(): + ''' + root_path: + |- images + |- train2017 + |- val2017 + |- labels + |- train2017 + |- val2017 + ''' + root_path = '/datasets/coco' + save_path = './datasets/coco/' + for mode in ('train', 'val'): + im_root = osp.join(root_path, f'images/{mode}2017') + lb_root = osp.join(root_path, f'labels/{mode}2017') + + ims = os.listdir(im_root) + lbs = os.listdir(lb_root) + + print(len(ims)) + print(len(lbs)) + + im_names = [el.replace('.jpg', '') for el in ims] + lb_names = [el.replace('.png', '') for el in lbs] + common_names = list(set(im_names) & set(lb_names)) + + lines = [ + f'images/{mode}2017/{name}.jpg,labels/{mode}2017/{name}.png' + for name in common_names + ] + + with open(f'{save_path}/{mode}.txt', 'w') as fw: + fw.write('\n'.join(lines)) + + + +gen_coco() diff --git a/tools/train_amp.py b/tools/train_amp.py index 04e9721..5ce76af 100644 --- a/tools/train_amp.py +++ b/tools/train_amp.py @@ -29,11 +29,11 @@ ## fix all random seeds -torch.manual_seed(123) -torch.cuda.manual_seed(123) -np.random.seed(123) -random.seed(123) -torch.backends.cudnn.deterministic = True +# torch.manual_seed(123) +# torch.cuda.manual_seed(123) +# np.random.seed(123) +# random.seed(123) +# torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = True # torch.multiprocessing.set_sharing_strategy('file_system') @@ -68,11 +68,13 @@ def set_model(): def set_optimizer(model): if hasattr(model, 'get_params'): wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params() + # wd_val = cfg.weight_decay + wd_val = 0 params_list = [ {'params': wd_params, }, - {'params': nowd_params, 'weight_decay': 0}, + {'params': nowd_params, 'weight_decay': wd_val}, {'params': lr_mul_wd_params, 'lr': cfg.lr_start * 10}, - {'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr': cfg.lr_start * 10}, + {'params': lr_mul_nowd_params, 'weight_decay': wd_val, 'lr': cfg.lr_start * 10}, ] else: wd_params, non_wd_params = [], []