majority voting

ajing · Jun 28, 2017 · 9cbabc0 · 9cbabc0
1 parent 8c1b928
commit 9cbabc0
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -34,4 +34,11 @@ converge, with a good learning rate setup, it is able to outperform Adam. (why??
 3. Used different transformations on test dataset and average the ensemble results. (LB: 0.93112, Rank: it is 18th now :(  )
 
 ---------------------------------------
+
+## Update June 28
+
+Majority voting gives a small improvement: 0.93112 -> 0.93114. Now rank at 21st.
+
+----------------------------------------
+
 Have been doing some other stuff during the weekend, no progress yet. If anyone wants to teamup please email me at [email protected]. In addition, if anyone has new ideas using my code, please discuss in the Kaggle discussion board so that every competitor is able to learn some new stuff! Thanks!
diff --git a/baseline_ensembles.py b/baseline_ensembles.py
@@ -232,12 +232,13 @@ def predict_test_majority():
     Majority voting method.
     """
     labels = np.empty((len(models), 61191, 17))
-    for m_idx, model in models:
+    for m_idx, model in enumerate(models):
         name = str(model).split()[1]
+        print('predicting model {}'.format(name))
         net = nn.DataParallel(model().cuda())
-        net.load_state_dict(torch.load('models/{}.pth').format(name))
+        net.load_state_dict(torch.load('models/{}.pth'.format(name)))
         net.eval()
-        preds = np.zeros(61191, 17)
+        preds = np.zeros((61191, 17))
         for t in transforms:
             test_dataloader.dataset.images = t(test_dataloader.dataset.images)
             pred = predict(net, dataloader=test_dataloader)
@@ -250,7 +251,7 @@ def predict_test_majority():
 
     # majority voting
     labels = labels.sum(axis=0)
-    labels = (labels >= len(models)//2).astype(int)
+    labels = (labels >= (len(models)//2)).astype(int)
     pred_csv(predictions=labels, name='majority_voting_ensembles')
 
 

diff --git a/util.py b/util.py
@@ -40,10 +40,11 @@ def pred_csv(predictions, name, threshold=None):
     """
     csv_name = os.path.join(KAGGLE_DATA_DIR, 'sample_submission.csv')
     submission = pd.read_csv(csv_name)
-    print(submission)
     for i, pred in enumerate(predictions):
         if threshold is not None:
             labels = (pred > threshold).astype(int)
+        else:
+            labels = pred
         labels = np.where(labels == 1)[0]
         labels = ' '.join(idx_name()[index] for index in labels)
         submission['tags'][i] = labels