-
Notifications
You must be signed in to change notification settings - Fork 0
/
1
76 lines (64 loc) · 2.34 KB
/
1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#read
data1<- read.csv(url("http://stat.columbia.edu/~rachel/datasets/nyt1.csv"))
#categorize
head(data1)
data1$agecat<-cut(data1$Age,c(-Inf,0,18,24,34,44,54,64,Inf))
#view
summary(data1)
#brackets
install.packages("doBy")
library("doBy")
siterange<-function(x){c(length(x),min(x),mean(x),max(x))}
summaryBy(Age~agecat,data=data1,FUN=siterange)
#only signed users have ages and genders
summaryBy(Gender+Signed_In+Impressions+Clicks~agecat,data=data1)
#plot
install.packages("ggplot2")
library(ggplot2)
ggplot(data1,aes(x=Impressios,fill=agecat))+geom_histogram(binwidth=1)
ggplot(data1,aes(x=agecat,y=Impressions,fill=agecat))+geom_boxplot()
#create CTR
#we do not care about clicks if there is no impression
data1$hasimps<-cut(data1$Impressions,c(-Inf,0,Inf))
summaryBy(Clicks~hasimps,data=data1,FUN=siterange)
ggplot(subset(data1,Impressions>0),aes(x=Clicks/Impressions,colour=agecat))+geom_density()
ggplot(subset(data1,Clicks>0),aes(x=Clicks/Impressions,colour=agecat))+geom_density()
ggplot(subset(data1,Clicks>0),aes(x=agecat,y=Clicks,fill=agecat))+geom_boxplot()
ggplot(subset(data1,Clicks>0),aes(x=Clicks,colour=agecat))+geom_density()
#create categories
data1$scode[data1$Impressions==0]<-"NoImps"
data1$scode[data1$Impressions>0]<-"Imps"
data1$scode[data1$Clicks>0]<-"Clicks"
#convert the column into a factor
data1$scode<-factor(data1$scode)
head(data1)
Age Gender Impressions Clicks Signed_In agecat hasimps scode
1 36 0 3 0 1 (34,44] (0, Inf] Imps
2 73 1 3 0 1 (64, Inf] (0, Inf] Imps
3 30 0 3 0 1 (24,34] (0, Inf] Imps
4 49 1 3 0 1 (44,54] (0, Inf] Imps
5 47 1 11 0 1 (44,54] (0, Inf] Imps
6 47 0 11 1 1 (44,54] (0, Inf] Clicks
#look at levels
clen<-function(x){c(length(x))}
etable<-summaryBy(Impressions~scode+Gender+agecat,data=data1,FUN=clen)
head(etable)
scode Gender agecat Impressions.clen
1 Clicks 0 (-Inf,0] 17776
2 Clicks 0 (0,18] 846
3 Clicks 0 (18,24] 779
4 Clicks 0 (24,34] 1361
5 Clicks 0 (34,44] 1675
6 Clicks 0 (44,54] 1494
summaryBy(etable)
Error in .get_variables(formula, data, id, debug.info) :
'formula' must be a formula or a list
summary(etable)
scode Gender agecat Impressions.clen
Clicks:15 Min. :0.0000 (0,18] :6 Min. : 43
Imps :15 1st Qu.:0.0000 (18,24]:6 1st Qu.: 219
NoImps:15 Median :0.0000 (24,34]:6 Median : 1525
Mean :0.4667 (34,44]:6 Mean : 10188
3rd Qu.:1.0000 (44,54]:6 3rd Qu.: 15585
Max. :1.0000 (54,64]:6 Max. :118401
(Other):9