-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
74 lines (59 loc) · 2.49 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
## Getting and Cleaning Data
## Project
## Ryan Summe
# Make sure all files exist
checkFiles <- function(){
# Required files
files <- c("UCI HAR Dataset/test/X_test.txt","UCI HAR Dataset/test/y_test.txt",
"UCI HAR Dataset/test/subject_test.txt","UCI HAR Dataset/train/X_train.txt",
"UCI HAR Dataset/train/y_train.txt","UCI HAR Dataset/train/subject_train.txt",
"UCI HAR Dataset/features.txt","UCI HAR Dataset/activity_labels.txt")
# Make sure they are there
status <- sapply(files,file.exists)
if(!all(status)){
stop("File(s) missing, consult README.")
}
print("Files OK.")
}
# Loads "test" or "train" data set
loadFiles <- function(set){
# Read files
root <- "UCI HAR Dataset/"
x <- read.table(paste(root,set,"/X_",set,".txt",sep=""),col.names=features)
y <- read.table(paste(root,set,"/y_",set,".txt",sep=""),col.names="Activity.Factor")
subjects <- read.table(paste(root,set,"/subject_",set,".txt",sep=""),col.names="Subject")
# Bind files together, replace factors with labels, and reorder
data <- cbind(subjects,y,x)
data <- merge(data,activityLabels,by="Activity.Factor")
print(paste(set,"data set loaded."))
return(data[,c(2,564,3:563)])
}
# Check that all files exist
checkFiles()
# Load required packages
if(!("package:reshape2" %in% search())){
print("Loading package:reshape2.")
library(reshape2)
}
# Load variable names (remove punctuation) and activities
features <- read.table("UCI HAR Dataset/features.txt")[,2]
features <- gsub("[[:punct:]]","",features)
activityLabels <- read.table("UCI HAR Dataset/activity_labels.txt",
col.names=c("Activity.Factor","Activity"))
# Load test and train data
testData <- loadFiles("test")
trainData <- loadFiles("train")
fullData <- rbind(testData,trainData)
# Extract mean and standard deviation columns
msData <- fullData[,grepl("subject|activity|mean|std",
names(testData),ignore.case=TRUE)]
# Melt and cast data for result, taking the mean for each subject's activity
dataMelt <- melt(msData,id=c("Subject","Activity"),measure.vars=names(msData)[3:88])
results <- dcast(dataMelt,Subject+Activity~variable,mean)
# Write result to file
write.table(results,"results.txt",row.name=FALSE)
print("Result file exported as results.txt")
# Clean up workspace
rm(features,activityLabels,loadFiles,checkFiles,testData,
trainData,fullData,msData,dataMelt)
print("Workspace cleaned up.")