-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
84 lines (70 loc) · 3.37 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# 1. Merges the training and the test sets to create one data set.
# 2. Extracts only the measurements on the mean and standard deviation for each measurement.
# 3. Uses descriptive activity names to name the activities in the data set
# 4. Appropriately labels the data set with descriptive variable names.
# 5. Creates a second, independent tidy data set with the average of each variable for each activity and each subject.
#1. Load files
#features
features <- read.table("UCI HAR Dataset/features.txt", header=FALSE, colClasses = "character")
#features that contain mean or std
mean.std.vector<-grepl('mean\\(\\)|std\\(\\)', features[,"V2"])
#activities
activity <- read.table("UCI HAR Dataset/activity_labels.txt", header=FALSE, colClasses = "character")
#test
test_X <- read.table("UCI HAR Dataset/test/X_test.txt", header=FALSE)
test_Y <- read.table("UCI HAR Dataset/test/y_test.txt", header=FALSE)
test_subject <- read.table("UCI HAR Dataset/test/subject_test.txt", header=FALSE)
#train
train_X <- read.table("UCI HAR Dataset/train/X_train.txt", header=FALSE)
train_Y <- read.table("UCI HAR Dataset/train/y_train.txt", header=FALSE)
train_subject <- read.table("UCI HAR Dataset/train/subject_train.txt", header=FALSE)
# 2. Combine subject, activity and features data into a test and trainning data framecombine all cololumns in one file
test_data <- data.frame(test_X, test_subject, test_Y, stringsAsFactors=FALSE)
train_data <- data.frame(train_X, train_subject, train_Y, stringsAsFactors=FALSE)
# 3. Merges the training and the test sets to create one data set.
combined_data <- rbind(test_data, train_data)
# 4. remove unwnated columns keeping mean and stds ones
size <- ncol(combined_data)
tidyDataSet <- data.frame(subject= combined_data[,562], activity = combined_data[,563])
# 5. Prepere column names
vecColNames <- rep(NA,sum(mean.std.vector, na.rm=TRUE))
for(x in 1:(size-2))
{
if(mean.std.vector[x])
{
# 6. Extracts only the measurements on the mean and standard deviation for each measurement.
tidyDataSet <- cbind(tidyDataSet, name = combined_data[,x])
vecColNames[x] <- features[x,"V2"]
}
}
vecColNames <- vecColNames[!is.na(vecColNames)]
# 7. Appropriately labels the data set with descriptive variable names.
# 8. Assign col names
colnames(tidyDataSet) <- c("subject", "activity", vecColNames)
# 9. Uses descriptive activity names to name the activities in the data set
for (x in 1:nrow(tidyDataSet))
{
tidyDataSet[x,2] <- activity[tidyDataSet[x,2],2]
}
# 10. Exports first tidy data set to file system
write.table(tidyDataSet, "summTidyDataSet1.txt", sep="\t")
# 11. Creates a second, independent tidy data set with the average of each variable for each activity and each subject.
summTidyDataSet <- subset(tidyDataSet, FALSE)
size <- ncol(tidyDataSet)
rows <- 1
for (subject in unique(tidyDataSet[,"subject"]))
{
for (activity in unique(tidyDataSet[,"activity"]))
{
for (feature in 3:(size-2))
{
featureMean <- mean(tidyDataSet[,feature] [tidyDataSet[,2] == activity & tidyDataSet[1] == subject])
summTidyDataSet[rows,"subject"] <- subject
summTidyDataSet[rows,"activity"] <- activity
summTidyDataSet[rows,feature] <- featureMean
}
rows <- rows + 1
}
}
#Exports second tidy data set to file system with the average of each variable for each activity and each subject.
write.table(summTidyDataSet, "summTidyDataSet2-SumAvg.txt", sep="\t")