-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathusage.py
59 lines (42 loc) · 1.39 KB
/
usage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import seaborn as sns
from matplotlib import pyplot as plt
from rich import pretty, print
import utils
pretty.install()
# genres = utils.load("data/genres.csv")
# echonest = utils.load("data/echonest.csv")
tracks = utils.load_tracks(
buckets="continuous", dummies=True, fill=True, outliers=False
)
# artists = utils.load("data/raw_artists.csv")
print(tracks.info())
threshold = 0.9
low_coverage = []
for col in tracks:
miao = tracks[col].isnull().mean()
if miao > threshold:
low_coverage.append(col)
print(low_coverage)
print(tracks[("track", "duration")].describe())
fig = plt.subplots(figsize=(100, 20))
fig_dims = (1, 1)
ax = plt.subplot2grid(fig_dims, (0, 0))
sns.countplot(x=("track", "interest"), data=tracks, palette="hls")
plt.title("Frequency of duration")
plt.xticks(rotation=90)
plt.show()
# error checking
# errors = utils.check_rules(tracks, "data/rules.txt")
# print(errors)
# print("Here are informations on tracks")
# print(tracks.info())
# my_df = tracks.query(f"not ('album', 'listens') < 0")
# print(my_df)
# errors = utils.check_rules(tracks, Path("data/rules.txt"))
# print(errors)
# print(tracks["track", "comments"].describe())
# print(tracks["track", "comments"].head(30))
# print(tracks[("track", "dummy_lyricist")].value_counts())
# df_decisiontree = tracks[[blablabla]]
# df_knn = tracks[[blablaslsadasdldsaldsa]]
print(tracks[("album", "type")].unique())