library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
data = read.csv("C:/Users/User/Desktop/R HW/titanic.csv", header=T, sep=",")

分析票價和生存率的關係,以100元為分界點

#票價大於100元者
temp1 = select(data,fare,survival)%>%
  filter(fare>=100)
asd = temp1[-which(temp1$survival==1),]
qwe = length(asd$survival==0)/length(temp1$survival)
print (qwe)
## [1] 0.4234234
#票價小於100元者
temp2 = select(data,fare,survival) %>%
  filter(fare<=100)
zxc = temp2[-which(temp2$survival==1),]
fgh = length(zxc$survival==0)/length(temp2$survival)
print (fgh)
## [1] 0.647351

票價大於100者死亡率為0.423,而票價小於100者死亡率為0.647,可以猜測在事故發生時,票價較高者享有優先逃生的權力

分析傭人的搭船地點,生存率,和class等級

temp3 = select(data,survival,gender,joined,class,job,fare) %>%
  filter(survival==1,class==1,job=='Personal Maid',gender==1)
temp4 = select(data,survival,gender,joined,class,job,age,fare) %>%
  filter(survival==1,class==1,job=='Personal Maid',gender==0)
group_by(temp3,joined) 
## # A tibble: 18 x 6
## # Groups:   joined [2]
##    survival gender      joined  class           job  fare
##       <int>  <int>      <fctr> <fctr>        <fctr> <int>
##  1        1      1 Southampton      1 Personal Maid    78
##  2        1      1   Cherbourg      1 Personal Maid    76
##  3        1      1   Cherbourg      1 Personal Maid    63
##  4        1      1   Cherbourg      1 Personal Maid   247
##  5        1      1 Southampton      1 Personal Maid   221
##  6        1      1   Cherbourg      1 Personal Maid   262
##  7        1      1 Southampton      1 Personal Maid   151
##  8        1      1   Cherbourg      1 Personal Maid   110
##  9        1      1 Southampton      1 Personal Maid   211
## 10        1      1 Southampton      1 Personal Maid    80
## 11        1      1 Southampton      1 Personal Maid   211
## 12        1      1   Cherbourg      1 Personal Maid   106
## 13        1      1   Cherbourg      1 Personal Maid   146
## 14        1      1 Southampton      1 Personal Maid    86
## 15        1      1 Southampton      1 Personal Maid    93
## 16        1      1 Southampton      1 Personal Maid    31
## 17        1      1   Cherbourg      1 Personal Maid   512
## 18        1      1   Cherbourg      1 Personal Maid   134

從分析結果我們可以發現傭人多來自於Southampton和Cherbourg,可推知這2個地區較為富裕,有趣的一點是人傭的生存率居然高達100%,其生存率高的原因大概是跟著雇主,所以同樣享有優先逃生的權力

class等級最高和最低存活率的比較

passenger = data[data$class %in% c(1,3),]
passenger = passenger[!passenger$fare == 9999,]
passenger = passenger[!passenger$age == 9999,]
head(passenger)
##                                  name gender age class fare group
## 1        ALLEN, Miss Elisabeth Walton      1  29     1  211      
## 2 ALLISON, Mr Hudson Joshua Creighton      0  30     1  151      
## 3           ALLISON, Mrs Bessie Waldo      1  25     1  151      
## 4         ALLISON, Miss Helen Loraine      1   2     1  151      
## 5       ALLISON, Master Hudson Trevor      0   1     1  151      
## 6                  ANDERSON, Mr Harry      0  47     1   26      
##        joined         job boat survival
## 1 Southampton                2        1
## 2 Southampton Businessman             0
## 3 Southampton                         0
## 4 Southampton                         0
## 5 Southampton               11        1
## 6 Southampton Stockbroker    3        1
#class1生存率
Class1 = select(passenger, survival, class) %>%
  filter(class == 1)
Class1alive = sum(Class1$survival == 1) 
Class1dead = sum(Class1$survival == 0) 
Class1total = length(Class1$survival) 
Class1survivalrate = Class1alive / Class1total
print (Class1survivalrate)
## [1] 0.6453674
#class3生存率
Class3 = select(passenger, survival, class) %>%
  filter(class == 3)
Class3alive = sum(Class3$survival == 1) 
Class3dead = sum(Class3$survival == 0) 
Class3total = length(Class3$survival) 
Class3survivalrate = Class3alive / Class3total
print (Class3survivalrate)
## [1] 0.2453638
#兩者生存率的比較
CLASS = data.frame(Class = c("Class1", "Class3"),
  survivalrate = c(Class1survivalrate, Class3survivalrate))

ggplot(data = CLASS, aes(x = Class, y = survivalrate, fill = Class)) + 
  geom_bar(stat = "identity")

由結果推論等級越高,存活率越大,且Class1和Class3存活率快相差3倍