-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
91 lines (52 loc) · 3.09 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#============Getting and Cleaning Data Course Project===========================
#Begin by downloading and unzipping the data file into your working directory
# It should still be in the folder titled "UCI HAR Dataset"
#==============Step 1: Merging the training and test sets=======================
# First read the two sets in
train.set<-read.table("UCI HAR Dataset//train//X_train.txt")
test.set<-read.table("UCI HAR Dataset//test//X_test.txt")
#Rename the data sets with their names so they can be worked with more easily
# NOTE: This is step 4, but I found it easier to work with the data with descriptive
# labels already in place
labels<-read.table("UCI HAR Dataset//features.txt")
names(train.set)<-labels$V2
names(test.set)<-labels$V2
#Combine the two data sets with a row bind
all.data<-rbind(train.set, test.set)
#=============Step 2: Extract the mean and sd for each measurement==============
#Extract the mean and sd for each measurement by locating the variable names that
# have either "-mean()" or "-std()" in the names and subsetting to just include those
# columns.
Mean.Sd.Only<-all.data[,c(grep("-mean()", fixed=T, names(all.data)), grep("-std()", fixed=T, names(all.data)))]
#===================Step 3:Descriptive Activity Names===========================
# First read in the activity codes
train.codes<-read.table("UCI HAR Dataset//train//y_train.txt")
test.codes<-read.table("UCI HAR Dataset//test//y_test.txt")
# Combine the 2 vectors
activity.codes<-c(train.codes$V1, test.codes$V1)
# Convert to a factor with descriptive labels
activity.codes<-as.factor(activity.codes)
levels(activity.codes)<-c("Walking", "Walking_Upstairs", "Walking_Downstairs", "Sitting", "Standing", "Lying")
# Add that column onto the previous data frame
Mean.Sd.Activity<-cbind(activity.codes=activity.codes, Mean.Sd.Only)
#=================Step 4: Descriptive Variable Names============================
#This has already been done in step 1 above. I found it easier to extract the
# desired columns when they were already descriptively labelled.
#===Step 5: New Data Set - Avg of each var for each activity for each subject====
#Begin by adding the subjects codes to the data
subject.train<-read.table("UCI HAR Dataset//train//subject_train.txt")
subject.test<-read.table("UCI HAR Dataset//test//subject_test.txt")
all.subject<-c(subject.train$V1, subject.test$V1)
Mean.Sd.Activity.Subj<-cbind(Subject=all.subject, Mean.Sd.Activity)
# Use split-apply-combine method for each variable
Final.Data<-data.frame(rep(NA, 180))
for (i in 1:66) {
splitData<-split(Mean.Sd.Activity.Subj[,i+2],
f=list(Mean.Sd.Activity.Subj$activity.codes, Mean.Sd.Activity.Subj$Subject) )
means<-sapply(splitData, mean)
Final.Data[,i]<-means
names(Final.Data)[i]<-names(Mean.Sd.Activity.Subj)[i+2]
}
Final.Data<- cbind(Group=names(means), Final.Data)
#=====================Write table for upload====================================
write.table(Final.Data, file="FinalData.txt", row.names=FALSE)