-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgroupBy.R
51 lines (37 loc) · 1.33 KB
/
groupBy.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
########################################################
#Stack Overflow question:
#http://stackoverflow.com/questions/40700126/manipulation-of-data-in-r-using-data-table-or-dplyr-with-groupby-and-subsetting
#
#
#
#########################################################
#initialize the output frame
outputFrame = as.data.frame(matrix(nrow = length(unique(train$Name)),
ncol = 4))
#renaming the data frame
names(outputFrame) = c("Names", "daysBetween", "avgX", "avgY")
#turn the date to a date
train$Date = as.Date(train$Date, "%m/%d/%Y")
#initialize the outputCounter
outputCounter = 1
#iterates over every unique Name in the data frame
for(name in as.character(unique(train$Name)))
{
#subsets the dataframe into the values of each given
#level of Name
dfSubset = train[which(train$Name == name),]
#Orders the dataframe by date
dfSubset = dfSubset[order(dfSubset$Date),]
#get the 3 most recent dates
dfSubset = dfSubset[(nrow(dfSubset) -2):nrow(dfSubset),]
#fill the names
outputFrame$Names[outputCounter] = name
#fill the days between
outputFrame$daysBetween[outputCounter] = as.numeric(max(dfSubset$Date) - min(dfSubset$Date))
#get the average X
outputFrame$avgX[outputCounter] = mean(dfSubset$X)
#get the average Y
outputFrame$avgY[outputCounter] = mean(dfSubset$Y)
#increment outputCounter
outputCounter = outputCounter +1
}