-
Notifications
You must be signed in to change notification settings - Fork 1
/
train_tl_kitchen.py
120 lines (99 loc) · 4.23 KB
/
train_tl_kitchen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import sys
import argparse
import datetime
import torch
from algorithms.translation.sentence_encoder import BertEncoder
from utils.utils import setup_seeds, get_best_cuda
from algorithms.demonstration import TrajectoryDemonstration
from algorithms.generation.mlm_language_abstraction import RelationModel, LanguageAbstractionTrainer
def get_args():
parser = argparse.ArgumentParser()
# training config
parser.add_argument('--cuda', action="store_true", default=True)
parser.add_argument('--seed', type=int, default=0)
parser.add_argument('--epoch', type=int, default=201)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--save_interval', type=int, default=10)
parser.add_argument('--log_interval', type=int, default=1)
# task language config
parser.add_argument('--tl_model', type=str, default="relation")
parser.add_argument('--latent', type=int, default=16)
# relation model config
parser.add_argument('--num_obj', type=int, default=6)
parser.add_argument('--num_pu', type=int, default=1)
parser.add_argument('--num_pred', type=int, default=2)
parser.add_argument('--num_variable', type=int, default=2)
parser.add_argument('--reparameterize', action="store_false", default=True)
parser.add_argument('--hidden_dim', type=int, default=128)
parser.add_argument('--n_layer', type=int, default=3)
# training parameters
parser.add_argument('--n_depth', type=int, default=1)
parser.add_argument('--n_head', type=int, default=6)
parser.add_argument('--lr', type=float, default=3e-4)
parser.add_argument('--sample_middle_state', default=True, action="store_false")
parser.add_argument('--language_model', default="bert", type=str)
parser.add_argument('--using_middle_embed', default=True, action="store_false")
parser.add_argument('--bert_dim', default=32, type=int)
parser.add_argument('--num_grammar', default=40, type=int)
parser.add_argument('--test_ratio', default=0.0512, type=float)
parser.add_argument('--debug', default=False, action="store_true")
parser.add_argument('--sample_from_predefined_set', default=False, action="store_true")
args = parser.parse_args()
return args
def main():
args = get_args()
setup_seeds(args.seed)
log_dir = None
# load dataset
demonstration_path = "/home/yangxy/workspace/language_rl/dataset/kitchen_20230426_with_goals.npy"
print(f"Load demonstration from {demonstration_path}")
demonstration = TrajectoryDemonstration()
demonstration.load(demonstration_path)
obsSize = demonstration.fields_attrs["observations"]["shape"][-1]
device = torch.device(f"cuda:{get_best_cuda()}") if args.cuda else torch.device("cpu")
print(f"Choose task model: {args.tl_model}")
if args.tl_model == "relation":
policy_language_model = RelationModel(
input_dim=obsSize,
num_obj=args.num_obj,
num_variable=args.num_variable,
num_pu=args.num_pu,
num_pred=args.num_pred,
hidden_dim=args.hidden_dim,
reparameterize=args.reparameterize,
device=device
)
else:
raise NotImplementedError
if args.language_model == "bert":
language_model = BertEncoder(
output_dim=args.bert_dim, device=device,
hidden_dim=args.hidden_dim
)
else:
raise NotImplementedError
trainer = LanguageAbstractionTrainer(
demonstration=demonstration,
policy_language_model=policy_language_model,
language_model=language_model,
obs_dim=obsSize,
device=device,
log_dir=log_dir,
log_interval=args.log_interval,
save_interval=args.save_interval,
lr=args.lr,
num_epoch=args.epoch,
batch_size=args.batch_size,
sample_middle_state=args.sample_middle_state,
using_middle_embed=args.using_middle_embed,
demonstration_path=demonstration_path,
num_grammar=args.num_grammar,
test_set_ratio=args.test_ratio,
sample_from_predefined_set=args.sample_from_predefined_set,
auto_regressive=True,
n_layer=args.n_layer
)
trainer.train()
if __name__ == "__main__":
main()