-
Notifications
You must be signed in to change notification settings - Fork 2
/
ensemble_gen_oof_predicts.py
executable file
·50 lines (35 loc) · 1.1 KB
/
ensemble_gen_oof_predicts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/python3.6
import os
import re
import sys
import yaml
from glob import glob
from collections import OrderedDict
from typing import List
import numpy as np
import pandas as pd
from tqdm import tqdm
from metrics import F_score
from debug import dprint
NUM_FOLDS = 5
NUM_CLASSES = 1103
if __name__ == '__main__':
if len(sys.argv) != 2:
print(f'usage: {sys.argv[0]} ensemble.yml')
sys.exit()
source_file = sys.argv[1]
result_name = os.path.splitext(os.path.basename(source_file))[0] + '_oof.npy'
fold_num = np.load('folds.npy')
train_df = pd.read_csv('../input/train.csv')
with open(source_file) as f:
ensemble = yaml.load(f, Loader=yaml.SafeLoader)
result = np.zeros((train_df.shape[0], NUM_CLASSES))
for predicts in tqdm(ensemble):
weight = predicts['weight']
assert len(predicts['predicts']) == NUM_FOLDS
for fold, pred in enumerate(predicts['predicts']):
result[fold_num == fold] += np.load(pred) * weight
result /= len(ensemble)
dprint(result.shape)
dprint(result)
np.save(result_name, result)