forked from SCANL/scanl_tagger
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
71 lines (66 loc) · 3.06 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
FROM python:3.10-slim
# Install (and build) requirements
COPY requirements.txt /requirements.txt
RUN apt-get update && \
apt-get install -y git curl && \
pip install -r requirements.txt && \
rm -rf /var/lib/apt/lists/*
# ntlk downloads
RUN python3 -c "import nltk; nltk.download('averaged_perceptron_tagger');nltk.download('universal_tagset')"
# Pythong scripts and data
COPY classifier_multiclass.py \
download_code2vec_vectors.py \
feature_generator.py \
print_utility_functions.py \
tag_identifier.py \
create_models.py \
serve.json \
main \
/.
COPY input/* /input/.
COPY models/model_GradientBoostingClassifier.pkl /models/.
CMD date; \
echo "Download..."; \
remote_target_date=$(curl -sI http://131.123.42.41/target_vecs.txt | grep -i "Last-Modified" | cut -d' ' -f2-); \
remote_token_date=$(curl -sI http://131.123.42.41/token_vecs.txt | grep -i "Last-Modified" | cut -d' ' -f2-); \
remote_words_date=$(curl -sI http://131.123.42.41/abbreviationList.csv | grep -i "Last-Modified" | cut -d' ' -f2-); \
remote_dictionary_date=$(curl -sI htp://131.123.42.41/en.txt | grep -i "Last-Modified" | cut -d' ' -f2-); \
if [ -n "$remote_target_date" ] && [ -n "$remote_token_date" ]; then \
remote_target_timestamp=$(date -d "$remote_target_date" +%s); \
remote_token_timestamp=$(date -d "$remote_token_date" +%s); \
remote_words_timestamp=$(date -d "$remote_words_date" +%s); \
remote_dictionary_timestamp=$(date -d "$remote_dictionary_date" +%s); \
if [ ! -f /code2vec/target_vecs.txt ] || [ $remote_target_timestamp -gt $(date -r /code2vec/target_vecs.txt +%s) ]; then \
curl -s -o /code2vec/target_vecs.txt http://131.123.42.41/target_vecs.txt; \
echo "target_vecs.txt updated"; \
else \
echo "target_vecs.txt not updated"; \
fi; \
if [ ! -f /code2vec/token_vecs.txt ] || [ $remote_token_timestamp -gt $(date -r /code2vec/token_vecs.txt +%s) ]; then \
curl -s -o /code2vec/token_vecs.txt http://131.123.42.41/token_vecs.txt; \
echo "token_vecs.txt updated"; \
else \
echo "token_vecs.txt not updated"; \
fi; \
if [ ! -r /words/abbreviationList.csv ] || [ $remote_words_timestamp -gt $(date -r /words/abbreviationList.csv +%s) ]; then \
curl -s -o /words/abbreviationList.csv http://131.123.42.41/abbreviationList.csv; \
echo "abbreviationList.csv updated"; \
else \
echo "abbreviationList.csv not updated"; \
fi; \
if [ ! -r /words/en.txt ] || [ $remote_dictionary_timestamp -gt $(date -r /words/en.txt +%s) ]; then \
curl -s -o /words/en.txt http://131.123.42.41/en.txt; \
echo "en.txt updated"; \
else \
echo "en.txt not updated"; \
fi; \
else \
echo "Failed to retrieve Last-Modified headers"; \
fi; \
date; \
echo "Training..."; \
/main -t; \
date; \
echo "Running..."; \
/main -r --words words/abbreviationList.csv
ENV TZ=US/Michigan