-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapriori.py
123 lines (97 loc) · 4.75 KB
/
apriori.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# İŞ PROBLEMİ : Sepet aşamasındaki kullanıcılara ürün önerisinde bulunmak
# Veri Seti: Online Retail II
# https://archive.ics.uci.edu/ml/datasets/Online+Retail+II
############################################
# ASSOCIATION RULE LEARNING (BİRLİKTELİK KURALI ÖĞRENİMİ)
############################################
############################################
# Veri Ön İşleme
############################################
# !pip install mlxtend
import pandas as pd
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.set_option('display.width', 500)
pd.set_option('display.expand_frame_repr', False)
from mlxtend.frequent_patterns import apriori, association_rules
df_ = pd.read_excel("datasets/online_retail_II.xlsx", sheet_name="Year 2010-2011")
def outlier_thresholds(dataframe, variable):
quartile1 = dataframe[variable].quantile(0.01)
quartile3 = dataframe[variable].quantile(0.99)
interquantile_range = quartile3 - quartile1
up_limit = quartile3 + 1.5 * interquantile_range
low_limit = quartile1 - 1.5 * interquantile_range
return low_limit, up_limit
def replace_with_thresholds(dataframe, variable):
low_limit, up_limit = outlier_thresholds(dataframe, variable)
dataframe.loc[(dataframe[variable] < low_limit), variable] = low_limit
dataframe.loc[(dataframe[variable] > up_limit), variable] = up_limit
def retail_data_prep(dataframe):
dataframe.dropna(inplace=True)
dataframe = dataframe[~dataframe["Invoice"].str.contains("C", na=False)]
dataframe = dataframe[dataframe["Quantity"] > 0]
dataframe = dataframe[dataframe["Price"] > 0]
replace_with_thresholds(dataframe, "Quantity")
replace_with_thresholds(dataframe, "Price")
return dataframe
def create_invoice_product_df(dataframe, id=False):
if id:
return dataframe.groupby(['Invoice', "StockCode"])['Quantity'].sum().unstack().fillna(0). \
applymap(lambda x: 1 if x > 0 else 0)
else:
return dataframe.groupby(['Invoice', 'Description'])['Quantity'].sum().unstack().fillna(0). \
applymap(lambda x: 1 if x > 0 else 0)
df = df_.copy()
df = retail_data_prep(df)
df.head()
df.describe().T
df.isnull().sum()
df.shape()
############################################
# Apriori Veri Yapısı
############################################
#satırlarda invoice, sütunlarda product olsun istiyoruz. aşağıdaki gibi bir matrix olacak.
# Description NINE DRAWER OFFICE TIDY SET 2 TEA TOWELS I LOVE LONDON SPACEBOY BABY GIFT SET
# Invoice
# 536370 0 1 0
# 536852 1 0 1
# 536974 0 0 0
# 537065 1 0 0
# 537463 0 0 1
#Bunların her biri satırı bir sepeti gösteriyor.
def create_rules(dataframe, id=True, country="France"):
dataframe = dataframe[dataframe['Country'] == country]
dataframe = create_invoice_product_df(dataframe, id)
frequent_itemsets = apriori(dataframe, min_support=0.01, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="support", min_threshold=0.01)
return rules
rules= create_rules(df)
rules[(rules["support"]>0.05) & (rules["confidence"]>0.1) & (rules["lift"]>5)]. \
sort_values("confidence", ascending=False)
############################################
# Sepet Aşamasındaki Kullanıcılara Ürün Önerisinde Bulunmak
############################################
# pratikte hangi ürünler eklendiğinde, hangisini önermeliyim sorusunun cevabı bir tabloda tutulur. ve öneri işlemi buradan yapılır.
# Örnek:
# Kullanıcı örnek ürün id: 22492
product_id = 22492
check_id(df, product_id)
#bu ürüne ne önereceğiz?
sorted_rules = rules.sort_values("lift", ascending=False)
recommendation_list = []
for i, product in enumerate(sorted_rules["antecedents"]):
for j in list(product):
if j == product_id:
recommendation_list.append(list(sorted_rules.iloc[i]["consequents"])[0])
recommendation_list[0:2]
def arl_recommender(rules_df, product_id, rec_count=1):
sorted_rules = rules_df.sort_values("lift", ascending=False)
recommendation_list = []
for i, product in enumerate(sorted_rules["antecedents"]):
for j in list(product):
if j == product_id:
recommendation_list.append(list(sorted_rules.iloc[i]["consequents"])[0])
return recommendation_list[0:rec_count]
arl_recommender(rules, 22492, 1)
arl_recommender(rules, 22492, 2)
arl_recommender(rules, 22492, 3)