From 79b5f89d3e8ec7faf7d23c7fa0593a88898e818c Mon Sep 17 00:00:00 2001
From: Ruotian Luo <rluo@ttic.edu>
Date: Tue, 14 Jan 2020 14:05:55 -0600
Subject: [PATCH] drop the need of cocobu_fc; use zero size tensor when use_fc
 or use_att is False.

---
 configs/a2i2.yml           |  1 -
 configs/topdown.yml        |  1 -
 configs/transformer.yml    |  1 -
 data/README.md             |  5 +----
 dataloader.py              | 10 +++++++---
 models/CaptionModel.py     |  2 +-
 models/TransformerModel.py |  2 +-
 7 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/configs/a2i2.yml b/configs/a2i2.yml
index 808582b4..9ea0eb32 100644
--- a/configs/a2i2.yml
+++ b/configs/a2i2.yml
@@ -1,7 +1,6 @@
 # base
 caption_model: att2in2
 input_json: data/cocotalk.json
-input_fc_dir: data/cocobu_fc
 input_att_dir: data/cocobu_att
 input_label_h5: data/cocotalk_label.h5
 learning_rate: 0.0005
diff --git a/configs/topdown.yml b/configs/topdown.yml
index 324892e8..3babc24e 100644
--- a/configs/topdown.yml
+++ b/configs/topdown.yml
@@ -1,7 +1,6 @@
 # base
 caption_model: topdown
 input_json: data/cocotalk.json
-input_fc_dir: data/cocobu_fc
 input_att_dir: data/cocobu_att
 input_label_h5: data/cocotalk_label.h5
 learning_rate: 0.0005
diff --git a/configs/transformer.yml b/configs/transformer.yml
index a08ef544..b7f90607 100644
--- a/configs/transformer.yml
+++ b/configs/transformer.yml
@@ -4,7 +4,6 @@ noamopt_warmup: 20000
 label_smoothing: 0.0
 input_json: data/cocotalk.json
 input_label_h5: data/cocotalk_label.h5
-input_fc_dir: data/cocobu_fc
 input_att_dir: data/cocobu_att
 seq_per_img: 5
 batch_size: 10
diff --git a/data/README.md b/data/README.md
index 78d972f6..0dc7b2c5 100644
--- a/data/README.md
+++ b/data/README.md
@@ -57,15 +57,12 @@ Then:
 python script/make_bu_data.py --output_dir data/cocobu
 ```
 
-This will create `data/cocobu_fc`, `data/cocobu_att` and `data/cocobu_box`. If you want to use bottom-up feature, you can just follow the following steps and replace all cocotalk with cocobu.
+This will create `data/cocobu_fc`(not necessary), `data/cocobu_att` and `data/cocobu_box`. If you want to use bottom-up feature, you can just replace all `"cocotalk"` with `"cocobu"` in the training/test scripts.
 
 #### Download converted files
 
-bottomup-fc: [link](https://drive.google.com/file/d/1IpjCJ5LYC4kX2krxHcPgxAIipgA8uqTU/view?usp=sharing) (The fc features here are simply the average of the attention features)
-
 bottomup-att: [link](https://drive.google.com/file/d/1hun0tsel34aXO4CYyTRIvHJkcbZHwjrD/view?usp=sharing)
 
-
 ## Flickr30k.
 
 It's similar.
diff --git a/dataloader.py b/dataloader.py
index 95679a3b..9ae88be8 100644
--- a/dataloader.py
+++ b/dataloader.py
@@ -255,11 +255,15 @@ def __getitem__(self, index):
                 # sort the features by the size of boxes
                 att_feat = np.stack(sorted(att_feat, key=lambda x:x[-1], reverse=True))
         else:
-            att_feat = np.zeros((1,1,1), dtype='float32')
+            att_feat = np.zeros((0,0), dtype='float32')
         if self.use_fc:
-            fc_feat = self.fc_loader.get(str(self.info['images'][ix]['id']))
+            try:
+                fc_feat = self.fc_loader.get(str(self.info['images'][ix]['id']))
+            except:
+                # Use average of attention when there is no fc provided (For bottomup feature)
+                fc_feat = att_feat.mean(0)
         else:
-            fc_feat = np.zeros((1), dtype='float32')
+            fc_feat = np.zeros((0), dtype='float32')
         if hasattr(self, 'h5_label_file'):
             seq = self.get_captions(ix, self.seq_per_img)
         else:
diff --git a/models/CaptionModel.py b/models/CaptionModel.py
index bb6c4198..c12c7333 100644
--- a/models/CaptionModel.py
+++ b/models/CaptionModel.py
@@ -235,7 +235,7 @@ def repeat_tensor(n, x):
         if x is not None:
             x = x.unsqueeze(1) # Bx1x...
             x = x.expand(-1, n, *([-1]*len(x.shape[2:]))) # Bxnx...
-            x = x.reshape(-1, *x.shape[2:]) # Bnx...
+            x = x.reshape(x.shape[0]*n, *x.shape[2:]) # Bnx...
         return x
 
     @staticmethod
diff --git a/models/TransformerModel.py b/models/TransformerModel.py
index 766c2ecc..8ecb332a 100644
--- a/models/TransformerModel.py
+++ b/models/TransformerModel.py
@@ -296,7 +296,7 @@ def _prepare_feature(self, fc_feats, att_feats, att_masks):
         att_feats, seq, att_masks, seq_mask = self._prepare_feature_forward(att_feats, att_masks)
         memory = self.model.encode(att_feats, att_masks)
 
-        return fc_feats[...,:1], att_feats[...,:1], memory, att_masks
+        return fc_feats[...,:0], att_feats[...,:0], memory, att_masks
 
     def _prepare_feature_forward(self, att_feats, att_masks=None, seq=None):
         att_feats, att_masks = self.clip_att(att_feats, att_masks)