-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
41 lines (24 loc) · 948 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from datasetloader.datasetloader import DatasetLoader
from models.modelbuilder import LLMModelBuilder
from models.tokenbuilder import TokenizerBuilder
from models.training import StartTraining
from tunings.finetune import FineTune
def train_llm():
# Get dataset
dt_loader = DatasetLoader()
dataset = dt_loader.get_dataset()
# Get Model and respective tokenizer
md_bl = LLMModelBuilder()
md_tk = TokenizerBuilder()
original_model = md_bl.get_flan_model()
tokenizer = md_tk.get_tokenizer()
tokenized_datasets = md_tk.get_tokenized_inputs(dataset=dataset)
print(f'after tokenized datasets \n: {tokenized_datasets}')
# for full fine-tuning
# st_train = StartTraining(tokenized_datasets, original_model)
# st_train.go()
# for PEFT fine-tuning
fnt = FineTune(model=original_model, tokenized_datasets=tokenized_datasets)
fnt.tune()
if __name__ == '__main__':
train_llm()