TestData = read.csv('./DATA/all/test.csv')
使用 split & apply 去分割出姓名與稱謂
心得…
Names = TestData$Name
splitTitle <- function(x){
strsplit(x, split='[,.]')[[1]]
}
Temp = sapply(as.character(Names), splitTitle)
TestData$Surname <- Temp[1,]
TestData$Title <- Temp[2,]
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.1
kable(table(TestData$Sex, TestData$Title))
female |
0 |
1 |
0 |
0 |
78 |
0 |
72 |
1 |
0 |
male |
2 |
0 |
1 |
21 |
0 |
240 |
0 |
0 |
2 |
all$Title[all$Title %in% c("Mlle", "Ms")] <- "Miss"
all$Title[all$Title== "Mme"] <- "Mrs"
all$Title[!(all$Title %in% c('Master', 'Miss', 'Mr', 'Mrs'))] <- "Rare Title"
all$Title <- as.factor(all$Title)
kable(table(all$Sex, all$Title))