-
Notifications
You must be signed in to change notification settings - Fork 0
/
gender_model.R
49 lines (40 loc) · 1.42 KB
/
gender_model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Import the training set: train
train_url <- "http://s3.amazonaws.com/assets.datacamp.com/course/Kaggle/train.csv"
train <- read.csv(train_url)
# Import the testing set: test
test_url <- "http://s3.amazonaws.com/assets.datacamp.com/course/Kaggle/test.csv"
test <- read.csv(test_url)
write.csv(train, file="training_set.csv" , row.names=FALSE)
write.csv(test, file="test_set.csv" , row.names=FALSE)
# Print train and test to the console
train
test
# Your train and test set are still loaded
str(train)
str(test)
# Passengers that survived vs passengers that passed away
table(train$Survived)
# As proportions
prop.table(table(train$Survived))
# Males & females that survived vs males & females that passed away
table(train$Sex, train$Survived)
# As row-wise proportions(margin=2 is column wise proportions)
prop.table(table(train$Sex, train$Survived), margin=1)
# Your train and test set are still loaded in
str(train)
str(test)
#Does Age play a role?
# Create the column child, and indicate whether child or no child
train$Child <- NA
train$Child[train$Age < 18] <- 1
train$Child[train$Age >= 18] <- 0
# Two-way comparison
prop.table(table(train$Child, train$Survived), 1)
# Create a copy of test: test_one
test_one <- test
# Initialize a Survived column to 0
test_one$Survived <- 0
str(test_one)
# Set Survived to 1 if Sex equals "female"
test_one$Survived[test_one$Sex == "female"] <- 1
write.csv(test_one, file="test_set.csv" , row.names=FALSE)