-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
132 lines (61 loc) · 3.67 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
### Step1 : Reading the files
#Since the train and test data are space seperated files it is easy to read it using readr
#package.
library(readr)
#Reading the file X_train.txt but before that extract the files and store in UCI_HAR_Dataset
#folder. Next set working directory to the location where you extracted the above file.
setwd('D:/R_language/data cleaning/assignment_4')
#Reading train file in folder UCI_HAR_Dataset -> train -> X_train.txt.
uci_train <- read_table('./UCI_HAR_Dataset/train/X_train.txt', col_names = FALSE)
#Reading test file
uci_test <- read_table('./UCI_HAR_Dataset/test/X_test.txt', col_names = FALSE)
#Merging train and test data.
combined <- rbind(uci_train,uci_test)
## Step 2 : Reading Features and extracting mean and std columns
#Reading the feature file
uci_feat <- read_table('./UCI_HAR_Dataset/features.txt', col_names = FALSE)
#Extracting column numbers of the features with mean()
ext_mean <- grep('\\bmean()\\b',uci_feat$X1)
#Extracting column numbers of the features with std()
ext_std <- grep('\\bstd()\\b',uci_feat$X1)
#Merging the extracted column numbers of mean and std features
ext_features <- c(ext_mean,ext_std)
#Subsetting the original dataframe(combined) for mean and std only
combined_ext_features <- combined[ext_features]
### Step3 : Reading label and joining it to dataset
#Reading the training label file
uci_train_label <- read_table('./UCI_HAR_Dataset/train/y_train.txt', col_names = FALSE)
#Reading the test label file
uci_test_label <- read_table('./UCI_HAR_Dataset/test/y_test.txt', col_names = FALSE)
#Combining the train and test labels
uci_label <- rbind(uci_train_label,uci_test_label)
#Reading the descriptive label file
label <- read_table('./UCI_HAR_Dataset/activity_labels.txt', col_names = FALSE)
#Now for merging the labels from y_train, y_test and activity_labels file we use join from plyr package and not merge because merge shuffle the rows whereas join retains the row as it is
library(plyr)
uci_label_descrip <- join(uci_label,label)
#Merging this descriptive label column to dataset
combined_ext_features_labels <- cbind(combined_ext_features,uci_label_descrip$X2)
### Step 4: Labelling the dataset
#To get the feature names with mean() and std() in it we use the ext_features vector to subset the uci_feature dataset
ext_uci_feat <- uci_feat[ext_features,]
#Adding one more feature to the above dataset i.e. the labels column
ext_uci_feat <- rbind(ext_uci_feat,'labels')
#removing the initial multiple unwanted numbers
ext_uci_feat_des <- gsub('[0-9]','',as.character(ext_uci_feat$X1))
#Giving description to the feature abbrevations
ext_uci_feat_des <- sub('^ t', 'time domain signal', ext_uci_feat_des)
ext_uci_feat_des <- sub('^ f', 'frequency domain signal', ext_uci_feat_des)
#Setting the column names of dataset to the above vector
names(combined_ext_features_labels) <- ext_uci_feat_des
# Step 5: Average of variable grouped by acitivity and subject
#Reading the subject files and combining
sub_train <- read_table('./UCI_HAR_Dataset/train/subject_train.txt', col_names = FALSE)
sub_test <- read_table('./UCI_HAR_Dataset/test/subject_test.txt', col_names = FALSE)
sub <- rbind(sub_train,sub_test)
#Combining the suject vector to new column of tidy data of step 4 and renaming it
combined_sub <- cbind(combined_ext_features_labels,sub)
library(dplyr)
combined_sub <- rename(combined_sub, subject = X1)
#Average of variable grouped by acitivity and subject
combined_grouped <- aggregate(combined_sub[, 1:66], list(combined_sub$labels,combined_sub$subject), mean)