forked from SUSM/getting-and-cleaning-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Run_analysis.R
71 lines (42 loc) · 2.7 KB
/
Run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
setwd(""C:/Users/SUSM/Documents/assignmentdata/UCI HAR Dataset")
list.files("./test")
###step1: Merge the Test and Train data sets to create merged data sets of Test and Train
TestX<-read.table("./test/X_test.txt")
TestY<-read.table("./test/y_test.txt")
Testsubject<-read.table("./test/subject_test.txt")
list.files("./train")
TrainX<-read.table("./Train/X_train.txt")
TrainY<-read.table("./Train/y_train.txt")
Trainsubject<-read.table("./Train/subject_train.txt")
mergedX<-rbind(TestX,TrainX) ## rowbind Test and Training sets to create mergedX dataset
mergedY<-rbind(TestY,TrainY)
mergedsubject<-rbind(Testsubject,Trainsubject)
## step2: Extract only the measurements of only mean and standard deviation
features<-read.table("features.txt") ## read features.txt file into features data frame
## using grep(),extract variables ending with mean() and std() from features dataset
## and assign to features_meanstd subset
features_meanstd<-features[grep("mean\\>|std",features$V2),]
## step 3: and label the data with descriptive variable names
features_meanstd[,2]<-sub("tB","timeB",features_meanstd[,2])
features_meanstd[,2]<-sub("tG","timeG",features_meanstd[,2])
features_meanstd[,2]<-sub("f","Freq",features_meanstd[,2])
## find and replace illegal characters with '.' in the variable names of dataset
features_meanstd[,2]<-gsub("[()]","",features_meanstd[,2])
features_meanstd[,2]<-gsub("-",".",features_meanstd[,2])
mergedXmeanstd<-mergedX[,c(features_meanstd[,1])]
names(mergedXmeanstd)<-c(features_meanstd[,2]) ## rename column names with descriptive variable names assigned in features_meanstd
## step 4: descriptive activity names to name the activities in the data
names(mergedY)<-c("Activity") ## rename column name to Activity in mergedY dataset
names(mergedsubject)<-c("Subject") ## rename column name to Subject in mergedsubject dataset
activity_labels<-read.table("activity_labels.txt")
activitylabels<-activity_labels[,2]
mergedY$Activity<-as.factor(mergedY$Activity)
mergedY$Activity<-factor(mergedY$Activity,labels=activitylabels)
## cbind X data (containing mean & std),Y and subject datasets to create one mergeddata data set
mergeddata<-cbind(mergedsubject,mergedY,mergedXmeanstd)
## step5: create tidy data set from the one data set created above
library(reshape2)
melteddata<-melt(mergeddata, id.vars=c("Subject","Activity"),measure.vars=c(features_meanstd[,2]))
casteddata<-dcast(melteddata,Subject+Activity ~ variable, mean)
## write the casteddata into tidydata.txt file using write.table
write.table(castdata,file="tidydata.txt", sep="\t", row.name=FALSE)