-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
52 lines (40 loc) · 1.71 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
library(dplyr)
library(data.table)
features <- fread("features.txt")
colnames(features) <- c("id","measure")
activities <- fread("activity_labels.txt")
colnames(activities) <- c("id","activity")
## read test and train data into memory. LaF is very fast
library(LaF)
laf <- laf_open_fwf("X_test.txt", column_widths = rep.int(16,561), column_types=rep("double",561))
tests <- laf[,] ## or read.table
laf <- laf_open_fwf("X_train.txt", column_widths = rep.int(16,561), column_types=rep("double",561))
trains <- laf[,] ## or read.table
# pull out features that have mean or std()
cols <- sort(c(grep("mean",features$measure),grep("std",features$measure)))
tests <- select(tests,num_range(prefix="V",range = cols))
trains <- select(trains,num_range(prefix="V",range = cols))
# rename the columns
filtColNames <- slice(features,cols)
colnames(tests) <- c(filtColNames$measure)
colnames(trains) <- c(filtColNames$measure)
# get activities, join to the y text files to lookup activity
testActs <- fread("y_test.txt")
colnames(testActs) <- "id"
trainActs <- fread("y_train.txt")
colnames(trainActs) <- "id"
tests <- cbind(testActs,tests)
tests <- left_join(tests,activities)
## reorder to put activity first, then remove the id column
tests <- select(tests,c(ncol(tests),1:(ncol(tests)-1)),-id)
trains <- cbind(trainActs,trains)
trains <- left_join(trains,activities)
## reorder to put activity first, then remove the id column
trains <- select(trains,c(ncol(trains),1:(ncol(trains)-1)),-id)
## combine the two data sets
tot <- rbind(tests,trains)
## take the average of each column grouping by activity
totAvg <- tot %>%
group_by(activity) %>%
summarise_each(funs(mean))
write.table(totAvg, file="avgs.txt", row.names = F)