Data From https://www.kaggle.com/erikbruin/titanic-2nd-degree-families-and-majority-voting/data

TestData = read.csv('./DATA/all/test.csv')

使用 split & apply 去分割出姓名與稱謂

心得…

Names = TestData$Name
splitTitle <- function(x){
  strsplit(x, split='[,.]')[[1]]
}
Temp = sapply(as.character(Names), splitTitle)
TestData$Surname <- Temp[1,]
TestData$Title <- Temp[2,]
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.1
kable(table(TestData$Sex, TestData$Title))
Col Dona Dr Master Miss Mr Mrs Ms Rev
female 0 1 0 0 78 0 72 1 0
male 2 0 1 21 0 240 0 0 2
all$Title[all$Title %in% c("Mlle", "Ms")] <- "Miss"
all$Title[all$Title== "Mme"] <- "Mrs"
all$Title[!(all$Title %in% c('Master', 'Miss', 'Mr', 'Mrs'))] <- "Rare Title"
all$Title <- as.factor(all$Title)
kable(table(all$Sex, all$Title))