drop the need of cocobu_fc; use zero size tensor when use_fc or use_a…

…tt is False.
gilamsalem · Jan 29, 2020 · 79b5f89 · 79b5f89
1 parent 297e9d3
commit 79b5f89
Show file tree

Hide file tree

Showing 7 changed files with 10 additions and 12 deletions.
diff --git a/configs/a2i2.yml b/configs/a2i2.yml
@@ -1,7 +1,6 @@
 # base
 caption_model: att2in2
 input_json: data/cocotalk.json
-input_fc_dir: data/cocobu_fc
 input_att_dir: data/cocobu_att
 input_label_h5: data/cocotalk_label.h5
 learning_rate: 0.0005

diff --git a/configs/topdown.yml b/configs/topdown.yml
@@ -1,7 +1,6 @@
 # base
 caption_model: topdown
 input_json: data/cocotalk.json
-input_fc_dir: data/cocobu_fc
 input_att_dir: data/cocobu_att
 input_label_h5: data/cocotalk_label.h5
 learning_rate: 0.0005

diff --git a/configs/transformer.yml b/configs/transformer.yml
@@ -4,7 +4,6 @@ noamopt_warmup: 20000
 label_smoothing: 0.0
 input_json: data/cocotalk.json
 input_label_h5: data/cocotalk_label.h5
-input_fc_dir: data/cocobu_fc
 input_att_dir: data/cocobu_att
 seq_per_img: 5
 batch_size: 10

diff --git a/data/README.md b/data/README.md
@@ -57,15 +57,12 @@ Then:
 python script/make_bu_data.py --output_dir data/cocobu
 ```
 
-This will create `data/cocobu_fc`, `data/cocobu_att` and `data/cocobu_box`. If you want to use bottom-up feature, you can just follow the following steps and replace all cocotalk with cocobu.
+This will create `data/cocobu_fc`(not necessary), `data/cocobu_att` and `data/cocobu_box`. If you want to use bottom-up feature, you can just replace all `"cocotalk"` with `"cocobu"` in the training/test scripts.
 
 #### Download converted files
 
-bottomup-fc: [link](https://drive.google.com/file/d/1IpjCJ5LYC4kX2krxHcPgxAIipgA8uqTU/view?usp=sharing) (The fc features here are simply the average of the attention features)
-
 bottomup-att: [link](https://drive.google.com/file/d/1hun0tsel34aXO4CYyTRIvHJkcbZHwjrD/view?usp=sharing)
 
-
 ## Flickr30k.
 
 It's similar.

diff --git a/dataloader.py b/dataloader.py
@@ -255,11 +255,15 @@ def __getitem__(self, index):
                 # sort the features by the size of boxes
                 att_feat = np.stack(sorted(att_feat, key=lambda x:x[-1], reverse=True))
         else:
-            att_feat = np.zeros((1,1,1), dtype='float32')
+            att_feat = np.zeros((0,0), dtype='float32')
         if self.use_fc:
-            fc_feat = self.fc_loader.get(str(self.info['images'][ix]['id']))
+            try:
+                fc_feat = self.fc_loader.get(str(self.info['images'][ix]['id']))
+            except:
+                # Use average of attention when there is no fc provided (For bottomup feature)
+                fc_feat = att_feat.mean(0)
         else:
-            fc_feat = np.zeros((1), dtype='float32')
+            fc_feat = np.zeros((0), dtype='float32')
         if hasattr(self, 'h5_label_file'):
             seq = self.get_captions(ix, self.seq_per_img)
         else:

diff --git a/models/CaptionModel.py b/models/CaptionModel.py
@@ -235,7 +235,7 @@ def repeat_tensor(n, x):
         if x is not None:
             x = x.unsqueeze(1) # Bx1x...
             x = x.expand(-1, n, *([-1]*len(x.shape[2:]))) # Bxnx...
-            x = x.reshape(-1, *x.shape[2:]) # Bnx...
+            x = x.reshape(x.shape[0]*n, *x.shape[2:]) # Bnx...
         return x
 
     @staticmethod

diff --git a/models/TransformerModel.py b/models/TransformerModel.py
@@ -296,7 +296,7 @@ def _prepare_feature(self, fc_feats, att_feats, att_masks):
         att_feats, seq, att_masks, seq_mask = self._prepare_feature_forward(att_feats, att_masks)
         memory = self.model.encode(att_feats, att_masks)
 
-        return fc_feats[...,:1], att_feats[...,:1], memory, att_masks
+        return fc_feats[...,:0], att_feats[...,:0], memory, att_masks
 
     def _prepare_feature_forward(self, att_feats, att_masks=None, seq=None):
         att_feats, att_masks = self.clip_att(att_feats, att_masks)