-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.sh
71 lines (62 loc) · 2.23 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/bin/bash
start_step=0
end_step=0
output_dir='../output'
raw_news_dir='../data/CnNewsReport'
raw_price_dir='../data/CnStockPrice'
mkdir -p $output_dir
# process news corpus and prices
segment_dir=$output_dir/segmented
news_dir=$output_dir/token_ids
vocab_file=$output_dir/vocab
price_dir=$output_dir/prices
embedding_train_file=$output_dir/embedding_train_corpus
number_info_file=$output_dir/number_info
fastText_dir=$output_dir/fastText
if [ $start_step -le 0 ]&&[ $end_step -ge 0 ]; then
mkdir -p $segment_dir
mkdir -p $news_dir
mkdir -p $price_dir
mkdir -p $fastText_dir
python data_utils.py $news_dir $segment_dir $vocab_file $news_dir $embedding_train_file $raw_price_dir $price_dir $fastText_dir $number_info_file
fi
# get P and N set
K=100
polar_seed_file=$output_dir/polar_seed
polar_optimal_file=$output_dir/polar_optimal
if [ $start_step -le 1 ]&&[ $end_step -ge 1 ]; then
python pmi.py $vocab_file $news_dir $polar_seed_file $K $polar_optimal_file
fi
# train glove word embedding
glove_output_dir=$output_dir/glove
glove_embedding=$glove_output_dir/vectors.txt
if [ $start_step -le 2 ]&&[ $end_step -ge 2 ]; then
sh train_glove.sh $embedding_train_file $glove_output_dir vectors
fi
# train word2vec word embedding
word2vec_output_dir=$output_dir/word2vec
word2vec_embedding=$word2vec_output_dir/vectors.txt
if [ $start_step -le 3 ]&&[ $end_step -ge 3 ]; then
sh train_word2vec.sh $embedding_train_file $word2vec_output_dir vectors.txt
fi
# train fastText graph embedding
fastText_embedding=$fastText_dir/vectors.txt
if [ $start_step -le 4 ]&&[ $end_step -ge 4 ]; then
train_file=$fastText_dir/train_file
test_file=$fastText_dir/test_file
sh train_fastText.sh $train_file $test_file $fastText_dir vectors.txt
fi
# train model
train_dir=$output_dir/train_data
if [ $start_step -le 5 ]&&[ $end_step -ge 5 ]; then
mkdir -p $train_dir
python model.py --train_dir=$train_dir
fi
# test model
test_output=$output_dir/test_result
test_accuracy=$output_dir/accuracy
test_acc_news=$output_dir/accuracy_sort_news
if [ $start_step -le 6 ]&&[ $end_step -ge 6 ]; then
python model.py --test --train_dir=$train_dir --test_output=$test_output --batch_size=128
python proc_result.py $test_output $number_info_file $test_accuracy $test_acc_news
fi