-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
129 lines (102 loc) · 5.85 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Function to read the different files in the UCI HAR Dataset and merge them
# together into a single tidy dataset
getTidyDataset <- function(directory = "UCI HAR Dataset"){
## 1. Check all files
### 1.1 Set path names for all required files
pathHeaders <- paste(directory, "features.txt", sep = "")
pathActivityNames <- paste(directory, "activity_labels.txt", sep = "")
pathTest <- paste(directory, "test/X_test.txt", sep = "")
pathTestActivity <- paste(directory, "test/y_test.txt", sep = "")
pathTestSubjects <- paste(directory, "test/subject_test.txt", sep = "")
pathTrain <- paste(directory, "train/X_train.txt", sep = "")
pathTrainActivity <- paste(directory, "train/y_train.txt", sep = "")
pathTrainSubjects <- paste(directory, "train/subject_train.txt", sep = "")
### 1.2 Check if all required files can be found, error if not
if(!all(file.exists(c(pathHeaders, pathActivityNames, pathTest,
pathTestActivity, pathTestSubjects, pathTrain,
pathTrainActivity, pathTrainSubjects))))
stop(paste ("One or more files could not be found in ",
directory, ", please check directory", sep = ""))
## 2. Read the required data
### 2.1 Headers for descriptive variable names
headers <- read.table(pathHeaders, na.strings = "",
header = FALSE, stringsAsFactors = FALSE)
### 2.2 ActivityNames to convert activity codes into names
activityNames <- read.table(pathActivityNames, na.strings = "",
header = FALSE, stringsAsFactors = FALSE)
### 2.3 Test dataset
testset <- read.table(pathTest, na.strings = "",
header = FALSE, stringsAsFactors = FALSE)
### 2.4 Activities for each row of the test dataset
testsetActivity <- read.table(pathTestActivity, na.strings = "",
header = FALSE, stringsAsFactors = FALSE)
### 2.5 Subjects for each row of the test dataset
testsetSubjects <- read.table(pathTestSubjects, na.strings = "",
header = FALSE, stringsAsFactors = FALSE)
### 2.6 Train dataset
trainset <- read.table(pathTrain, na.strings = "",
header = FALSE, stringsAsFactors = FALSE)
### 2.7 Activities for each row of the train dataset
trainsetActivity <- read.table(pathTrainActivity, na.strings = "",
header = FALSE, stringsAsFactors = FALSE)
### 2.8 Subjects for each row of the train dataset
trainsetSubjects <- read.table(pathTrainSubjects, na.strings = "",
header = FALSE, stringsAsFactors = FALSE)
## 3. Add subjects and activity to each row of the two datasets
testset <- bind_cols(testsetSubjects, testsetActivity, testset)
trainset <- bind_cols(trainsetSubjects, trainsetActivity, trainset)
## 4. Prepare headers and activityNames
headers.unique <- c("Subject","activitycode", make.unique(headers[,2]))
colnames(activityNames) <- c("activitycode", "Activity")
## 5. Give the datasets descriptive headers
colnames(testset) <- headers.unique
colnames(trainset) <- headers.unique
## 6. Format the dataset
dataset <-
### 6.1 Merge the datasets
bind_rows(testset,trainset) %>%
### 6.2 Select appropriate columns
select(contains("Subject"),
contains("activitycode"),
contains("mean"),
-contains("Freq"), #exclude the meanFreq-columns
-contains("angle"), #exclude angle([...]Mean) columns
contains("std")) %>%
### 6.3 Give activities descriptive names
left_join(activityNames, by = "activitycode") %>%
### 6.4 Remove the old activity codes
select(-activitycode)
## 7. Kind message to the user and return dataset
message("Dataset successfully tidied")
dataset
}
# Function to create the summary of the UCI HAR Dataset with the mean of each
# measurement per subject per activity
createSummary <- function(dataset, filename = "summary.txt"){
## 1. Group the dataset by subject and activity
group_by(dataset, Subject, Activity) %>%
## 2. Generate mean for each column per Subject/Activity group
summarise_each(funs(mean)) %>%
## 3. write this summary to a file in the working directory
write.table(filename, row.name = FALSE)
## 4. Kind message to the user
message(paste("Summary saved in", filename))
}
# Little helper function to read the summary data back into R
readSummary <- function(filename){
## 1. Read the summary file from specified filename
read.table(filename, header = TRUE)
}
# Function to run the entire project with (my) default settings and file
# locations
runProject <- function(datafolder = "UCI HAR Dataset/",
summaryfile = "summary.txt"){
## Check if dplyr is installed and then load
if(!"dplyr" %in% rownames(installed.packages()))
install.packages(package)
library(dplyr)
dataset <- getTidyDataset(normalizePath(datafolder)) %>%
createSummary
}
# Single call to run the Getting and Cleaning Data Project
runProject()