-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
232 lines (163 loc) · 8.37 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
from key_extraction import keywordExtractor
from transformers import ElectraModel, ElectraTokenizerFast
import numpy as np
import pandas as pd
from typing import Union, Tuple, List, Dict
from itertools import chain, islice
import torch
import openai
from gensim.models import keyedvectors
import pickle
# load model and tokenizer
name = "monologg/koelectra-base-v3-discriminator"
model = ElectraModel.from_pretrained(name)
tokenizer = ElectraTokenizerFast.from_pretrained(name)
# load keywordExtractor
key = keywordExtractor(model,tokenizer,dir='data/preprocess/eng_han.csv')
# load food data
scraping_result = pd.read_csv('data/food_data.csv',encoding='cp949')
#scraping_result = pd.read_csv('data/food_data2.csv')
##################################### 따로 만든 함수 #####################################
API_KEY = 'sk-' ####### 키
# chatGPT API 사용 함수
def callChatGPT(prompt, API_KEY=API_KEY):
messages = []
#get api key
openai.api_key = API_KEY
messages.append({"role":"user", "content":prompt})
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
chat_response = completion.choices[0].message.content
messages.append({"role":"assitant", "content":chat_response})
return messages[1]["content"]
# chatGPT한테 필요한 데이터 얻는 함수
def obtain_data(menu_name):
#chat_res_cat = callChatGPT(menu_name + " 는 [밥], [국], [면], [분식] 중에 뭐야")
#chat_res_cat = chat_res_cat[chat_res_cat.find('[')+1:chat_res_cat.find(']')] # GPT 답변 : 메뉴 카테고리
chat_res_des = callChatGPT("음식 " + menu_name + "에 대한 간단한 설명") # GPT 답변 : 메뉴 설명
print(chat_res_des)
"""menu_name = "라면"
chat_res_cat = "면"
chat_res_des = '라면은 아시아에서 유명한 인스턴트 면 요리로, 면과 스프로 구성됩니다. 면은 탄력이 있고 쫄깃하며, 다양한 모양과 두께로 제작됩니다. 스프는 라면의 맛을 결정짓는 중요한 재료로, 다양한 맛과 종류가 있습니다. 라면은 추가 재료로 고기, 해산물, 채소, 계란 등을 넣어 풍부하고 맛있게 즐길 수 있습니다. 라면은 전 세계적으로 인기 있는 음식으로, 맛과 편리함으로 알려져 있습니다.'"""
#menu_str = menu_name + " " + chat_res_cat + " " + chat_res_des
menu_str = menu_name + " " + chat_res_des
menu_list = menu_str.split()
return menu_list
# 새로운 메뉴명 -> 메뉴명, 카테고리, 메뉴설명 -> 키워드 리스트
def get_keyword_list(menu_name):
min_count = 2
min_length = 1
raw_data = obtain_data(menu_name) #
keyword_list = key._extract_keywords(raw_data)
translated_keyword_list = key._map_english_to_korean(keyword_list)
refined_keyword_list = key._eliminate_min_count_words(translated_keyword_list, min_count)
result = list(filter(lambda x: len(x) >= min_length, refined_keyword_list))
return (result)
# 인덱스 번호 -> 기존 메뉴들 키워드 리스트 가져오기 (지금은 사용 X)
def get_keyword_idx(num):
min_count = 2
min_length = 1
doc = scraping_result.iloc[num]
raw_data = _convert_series_to_list_in_main(doc)
keyword_list = key._extract_keywords(raw_data)
translated_keyword_list = key._map_english_to_korean(keyword_list)
refined_keyword_list = key._eliminate_min_count_words(translated_keyword_list, min_count)
result = list(filter(lambda x: len(x) >= min_length, refined_keyword_list))
return (result)
# 기존 메뉴들의 food_name과 각 메뉴들에서 추출한 키워드 뽑아서 초기화하는 함수
def init_function():
food_name = []
food_keyword = []
for i in range(len(scraping_result)):
docs_keywords = extract_keyword_in_main(scraping_result.iloc[[i]])
food_name.append(docs_keywords["food_name"][0])
food_keyword.append(docs_keywords["keywords"][0])
return [food_name, food_keyword]
# 메뉴 검색하는 함수
def search_menu(menu_name, food_name_list, food_keyword_list):
search = get_keyword_list(menu_name) # 입력된 메뉴에서 키워드 추출
"""w2v_model = keyedvectors.load_word2vec_format('data/w2v2')
# 키워드 확장
recommand_keyword = w2v_model.most_similar(positive=search, topn=15)
np_recommand_keyword = np.array(list(map(lambda x: x[0], recommand_keyword)))
print('W2V을 활용한 키워드 확장 :', np_recommand_keyword)
print('')"""
# 키워드와 유사한 도서 검색
user_point = [int(0)] * len(food_name_list)
for search_key in search:
for i in range(len(food_name_list)):
if search_key in food_keyword_list[i]:
user_point[i] = user_point[i] + int(1)
"""recommand_point = [int(0)] * len(food_name_list)
for search_key in np_recommand_keyword:
for i in range(len(food_name_list)):
if search_key in food_keyword_list[i]:
recommand_point[i] = recommand_point[i] + int(1)
total_point = [int(0)] * len(user_point)
for i in range(len(user_point)):
total_point[i] = (user_point[i] * 3) + recommand_point[i]"""
total_point = user_point
top_k_idx = np.argsort(total_point)[::-1][:20]
# 메뉴명 연관 점수 저장
food_name_list = np.array(food_name_list)
total_point = np.array(total_point)
result = dict(zip(food_name_list[top_k_idx], total_point[top_k_idx]))
# 음식 정보 추출
food_info = pd.read_csv('data/food_data.csv',encoding='cp949')
IDX = food_info.food_name.isin(list(result.keys()))
food_recommandation_result = food_info[["food_name", "food_category"]][IDX].sort_values(
by="food_name", key=lambda x: x.map(result), ascending=False
).reset_index(drop=True)
return list(food_recommandation_result.food_name)
##################################### 기존 함수 수정한 함수 #####################################
def extract_keyword_list_in_main(doc: pd.Series, min_count: int = 2, min_length: int = 2) -> List:
raw_data = _convert_series_to_list_in_main(doc)
keyword_list = key._extract_keywords(raw_data)
translated_keyword_list = key._map_english_to_korean(keyword_list)
refined_keyword_list = key._eliminate_min_count_words(translated_keyword_list, min_count)
return list(filter(lambda x: len(x) >= min_length, refined_keyword_list))
def _convert_series_to_list_in_main(series: pd.Series) -> List[List[str]]:
raw_data = list(series.values)
return list(chain(*map(lambda x: x.split(), raw_data)))
def create_keyword_embedding_in_main(doc: pd.Series) -> torch.Tensor:
keyword_list = extract_keyword_list_in_main(doc)
tokenized_keyword = key.tokenize_keyword(keyword_list)
return key._create_keyword_embedding(tokenized_keyword)
def create_doc_embedding_in_main(doc: pd.Series) -> torch.Tensor:
stringified_doc = _convert_series_to_str_in_main(doc)
tokenized_doc = key.tokenize_keyword(stringified_doc)
return key._create_doc_embedding(tokenized_doc)
def _convert_series_to_str_in_main(series: pd.Series) -> str:
return " ".join(list(series.values))
def extract_keyword_in_main(docs: pd.DataFrame) -> Dict:
keyword_embedding = map(lambda x: create_keyword_embedding_in_main(x[1]), docs.iterrows())
doc_embedding = map(lambda x: create_doc_embedding_in_main(x[1]), docs.iterrows())
keyword_list = map(lambda x: extract_keyword_list_in_main(x[1]), docs.iterrows())
co_sim_score = map(
lambda x: key._calc_cosine_similarity(*x).flatten(),
zip(doc_embedding, keyword_embedding),
)
top_n_keyword = list(
map(lambda x: key._filter_top_n_keyword(*x), zip(keyword_list, co_sim_score))
)
return dict(food_name=docs["food_name"].tolist(), keywords=top_n_keyword)
##################################### 전체 알고리즘 #####################################
menu_name = "참치김밥" ## 입력
lst = []
with open("data/food_name_data.pickle","rb") as fr:
food_name_list = pickle.load(fr)
with open("data/food_keyword_data.pickle","rb") as fr:
food_keyword_list = pickle.load(fr)
"""print('\n\n\n키워드에 따른 상위 20개 음식 추천 결과\n')
print(search_menu(menu_name, food_name_list, food_keyword_list))"""
if menu_name in food_name_list:
print("일치하는 메뉴가 있습니다.")
lst.append(menu_name)
else :
lst.append(search_menu(menu_name, food_name_list, food_keyword_list))
if len(lst) == 0:
print("해당 메뉴가 없습니다.")
else:
print(lst)