forked from HIPS/neural-fingerprint
-
Notifications
You must be signed in to change notification settings - Fork 0
/
regression.py
executable file
·125 lines (107 loc) · 5.7 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Example regression script using neural fingerprints.
#
# Compares Morgan fingerprints to neural fingerprints.
import autograd.numpy as np
import autograd.numpy.random as npr
from neuralfingerprint import load_data
from neuralfingerprint import build_morgan_deep_net
from neuralfingerprint import build_conv_deep_net
from neuralfingerprint import normalize_array, adam
from neuralfingerprint import build_batched_grad
from neuralfingerprint.util import rmse
from autograd import grad
task_params = {'target_name' : 'measured log solubility in mols per litre',
'data_file' : 'delaney.csv'}
N_train = 800
N_val = 20
N_test = 20
model_params = dict(fp_length=50, # Usually neural fps need far fewer dimensions than morgan.
fp_depth=4, # The depth of the network equals the fingerprint radius.
conv_width=20, # Only the neural fps need this parameter.
h1_size=100, # Size of hidden layer of network on top of fps.
L2_reg=np.exp(-2))
train_params = dict(num_iters=100,
batch_size=100,
init_scale=np.exp(-4),
step_size=np.exp(-6))
# Define the architecture of the network that sits on top of the fingerprints.
vanilla_net_params = dict(
layer_sizes = [model_params['fp_length'], model_params['h1_size']], # One hidden layer.
normalize=True, L2_reg = model_params['L2_reg'], nll_func = rmse)
def train_nn(pred_fun, loss_fun, num_weights, train_smiles, train_raw_targets, train_params, seed=0,
validation_smiles=None, validation_raw_targets=None):
"""loss_fun has inputs (weights, smiles, targets)"""
print "Total number of weights in the network:", num_weights
init_weights = npr.RandomState(seed).randn(num_weights) * train_params['init_scale']
num_print_examples = 100
train_targets, undo_norm = normalize_array(train_raw_targets)
training_curve = []
def callback(weights, iter):
if iter % 10 == 0:
print "max of weights", np.max(np.abs(weights))
train_preds = undo_norm(pred_fun(weights, train_smiles[:num_print_examples]))
cur_loss = loss_fun(weights, train_smiles[:num_print_examples], train_targets[:num_print_examples])
training_curve.append(cur_loss)
print "Iteration", iter, "loss", cur_loss,\
"train RMSE", rmse(train_preds, train_raw_targets[:num_print_examples]),
if validation_smiles is not None:
validation_preds = undo_norm(pred_fun(weights, validation_smiles))
print "Validation RMSE", iter, ":", rmse(validation_preds, validation_raw_targets),
# Build gradient using autograd.
grad_fun = grad(loss_fun)
grad_fun_with_data = build_batched_grad(grad_fun, train_params['batch_size'],
train_smiles, train_targets)
# Optimize weights.
trained_weights = adam(grad_fun_with_data, init_weights, callback=callback,
num_iters=train_params['num_iters'], step_size=train_params['step_size'])
def predict_func(new_smiles):
"""Returns to the original units that the raw targets were in."""
return undo_norm(pred_fun(trained_weights, new_smiles))
return predict_func, trained_weights, training_curve
def main():
print "Loading data..."
traindata, valdata, testdata = load_data(
task_params['data_file'], (N_train, N_val, N_test),
input_name='smiles', target_name=task_params['target_name'])
train_inputs, train_targets = traindata
val_inputs, val_targets = valdata
test_inputs, test_targets = testdata
def print_performance(pred_func):
train_preds = pred_func(train_inputs)
val_preds = pred_func(val_inputs)
print "\nPerformance (RMSE) on " + task_params['target_name'] + ":"
print "Train:", rmse(train_preds, train_targets)
print "Test: ", rmse(val_preds, val_targets)
print "-" * 80
return rmse(val_preds, val_targets)
def run_morgan_experiment():
loss_fun, pred_fun, net_parser = \
build_morgan_deep_net(model_params['fp_length'],
model_params['fp_depth'], vanilla_net_params)
num_weights = len(net_parser)
predict_func, trained_weights, conv_training_curve = \
train_nn(pred_fun, loss_fun, num_weights, train_inputs, train_targets,
train_params, validation_smiles=val_inputs, validation_raw_targets=val_targets)
return print_performance(predict_func)
def run_conv_experiment():
conv_layer_sizes = [model_params['conv_width']] * model_params['fp_depth']
conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
'fp_length' : model_params['fp_length'], 'normalize' : 1}
loss_fun, pred_fun, conv_parser = \
build_conv_deep_net(conv_arch_params, vanilla_net_params, model_params['L2_reg'])
num_weights = len(conv_parser)
predict_func, trained_weights, conv_training_curve = \
train_nn(pred_fun, loss_fun, num_weights, train_inputs, train_targets,
train_params, validation_smiles=val_inputs, validation_raw_targets=val_targets)
test_predictions = predict_func(test_inputs)
return rmse(test_predictions, test_targets)
print "Task params", task_params
print
print "Starting Morgan fingerprint experiment..."
test_loss_morgan = run_morgan_experiment()
print "Starting neural fingerprint experiment..."
test_loss_neural = run_conv_experiment()
print
print "Morgan test RMSE:", test_loss_morgan, "Neural test RMSE:", test_loss_neural
if __name__ == '__main__':
main()