forked from unpackpy/unpack-py101-webscrapping
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
125 lines (99 loc) · 3.43 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import streamlit as st
from process_data import get_exchange_rates, get_lego_df, get_lego_image_url
st.set_page_config(page_title="WebScrapping - PY101", page_icon="🛒", layout="wide")
st.image("https://unpackai.github.io/unpackai_logo.svg")
st.title("WebScrapping of Lego Prices around the world 🌏")
st.write("*by <name>*")
# TODO: Update your name here (and potentially the title if you want)
st.sidebar.button("Refresh Data", on_click=st.legacy_caching.clear_cache)
EXCHANGE_RATES = get_exchange_rates(["USD", "EUR", "GBP"])
st.sidebar.header("Exchange Rates:")
st.sidebar.table(
pd.DataFrame(EXCHANGE_RATES.items(), columns=["currency", "rate (in CNY)"])
)
LIST_LEGOS = {
"Apollo Saturn V": 21309,
"Safari Tree House": 31116,
"NASA Women": 21312,
"Jeep Wrangler": 42122,
"HP Hogwarts": 71043,
"City Town": 60097,
"Millennium Falcon": 75257,
"Frozen Ice Palace": 43172,
"Classic Box": 10698,
"Infinity Gauntlet": 76191,
}
# TODO: You can consider editing this list of Lego Sets
def get_df():
"""Get the DataFrame with all the selected legos"""
dfs = [
get_lego_df(lego_name, lego_id, EXCHANGE_RATES)
for lego_name, lego_id in LIST_LEGOS.items()
]
return pd.concat(dfs, ignore_index=True)
# We will get the DataFrame with all the date
# We can then filter the Lego sets and the websites
df_all = get_df()
df = df_all.copy()
st.sidebar.header("Selected Legos:")
SELECTED_LEGOS = [
n for n, i in LIST_LEGOS.items() if st.sidebar.checkbox(f"{n} ({i})", value=True)
]
df = df[df.legoName.isin(SELECTED_LEGOS)]
st.sidebar.header("Selected Websites:")
list_websites = df.source.unique()
# TODO: Add filtering on list of Websites (similar to filter of Legos)
st.sidebar.header("Filtering by price:")
current_max_price = float(df.price.max())
max_price = st.sidebar.slider(
"Maximum Price", min_value=0.0, max_value=current_max_price, value=current_max_price
)
df = df[df.price <= max_price]
# TODO: Add filtering on minimum price
# We get a DataFrame with all the Lego
# and another one with the average per Website and Lego Set
df_average = df.groupby(["source", "legoName"]).mean().reset_index()
sns.set_theme(style="darkgrid", rc={"figure.figsize": (16, 8)})
st.header("Average Prices:")
fig_avg, ax_avg = plt.subplots()
sns.stripplot(
x="legoName",
y="price",
hue="source",
data=df_average,
size=16,
marker="D",
alpha=0.75,
ax=ax_avg,
)
st.pyplot(fig_avg)
st.header("Details:")
with st.expander("Distribution of prices:", expanded=True):
fig_all, ax_all = plt.subplots()
sns.stripplot(
x="legoName", y="price", hue="source", data=df, alpha=0.50, size=10, ax=ax_all
)
st.pyplot(fig_all)
@st.cache
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
with st.expander("All prices:"):
st.download_button(
label="Download Lego Prices",
data=convert_df(df),
file_name='lego_prices.csv',
mime='text/csv',
)
st.table(df.drop(["image"], axis=1))
st.header("Pictures of Lego:")
for lego_name, lego_id in LIST_LEGOS.items():
with st.expander(lego_name):
img_url = get_lego_image_url(lego_id, df_all)
if img_url and isinstance(img_url, str):
st.image(img_url)
else:
st.write("No image available")