library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(reshape2)

台灣死因分析

原始資料

Death <- read.csv("C:/Users/angel/Downloads/Death.csv")
##    Year  Death Cancer Cerebrovascular Accident HeartDisease Diabetes
## 1  2010 145804  41046           10134     6669        15675     8211
## 2  2009 143513  39918           10383     7358        15094     8230
## 3  2008 143594  38913           10663     7077        15726     8036
## 4  2007 140371  40306           12875     7130        13003    10231
## 5  2006 136371  37998           12596     8011        12283     9690
## 6  2005 139779  37222           13139     8365        12970    10501
## 7  2004 134765  36357           12339     8452        12861     9191
## 8  2003 131229  35201           12404     8191        11785    10013
## 9  2002 128357  34342           12009     8489        11441     8818
## 10 2001 127892  32993           13141     9513        11003     9113
## 11 2000 126016  31554           13332    10515        10552     9450
## 12 1999 126654  29784           12631    12960        11299     9023
## 13 1998 123489  29260           12705    10973        11030     7532
## 14 1997 121014  29011           12885    11297        10754     7500
## 15 1996 121933  27961           13944    12422        11273     7525
## 16 1995 119693  25841           14132    12983        11256     7225
## 17 1994 113896  23318           13658    13219        12005     6094
## 18 1993 111289  22319           13680    13270        12506     5367
## 19 1992 110413  20959           14325    13152        12993     4887
## 20 1991 105979  19630           14137    13636        12026     4210
## 21 1990 105658  18536           14174    13927        11505     3960
## 22 1989 103653  18878           14461    14047        10699     3868
## 23 1988 102312  18233           15067    13730        10836     3883
## 24 1987  96650  17342           14476    13024        11209     3286
## 25 1986  95064  16559           14862    12187         9953     2970

Q1.2010與1986年的死因比較

先增加一個除了五大死因以外的欄位

d1 = Death %>% 
  mutate(Others=Death-Cancer-Cerebrovascular-Accident-HeartDisease-Diabetes)

再把2010和1986年的資料挑出來

d2=melt(d1,id=c("Year","Death"))%>%filter(Year==2010|Year==1986)

畫成圓餅圖

d2$Year = factor(d2$Year)
pie = ggplot(d2, aes(x="Death", y=value, fill=variable))+
  geom_bar(stat="identity",position="fill",width=1)+
  facet_wrap(~Year)+
  coord_polar(theta = "y")+
  labs(x="",y="",title="")+
  theme(axis.ticks = element_blank())

由圖可知,經過24年後,癌症死亡的比例大幅上升,腦血管疾病和意外的死亡比例則相對減少

Q2歷年來的意外致死率趨勢

Q2 = Death %>%
  group_by(Year) %>%
  summarise(AccidentRate=Accident/Death)
Q2plot = ggplot(Q2, aes(x=Year,y=AccidentRate))+
  geom_line()+
  geom_point()

由折線圖可以看出,意外致死率大致呈現逐年下降的趨勢,惟1999年突然上升,可能是因九二一大地震的緣故

Q3歷年來的疾病致死率趨勢

Q3 = Death %>%
  group_by(Year) %>%
  summarise(DiseaseRate=Cancer+Cerebrovascular+HeartDisease+Diabetes/Death)
Q3plot = ggplot(Q3, aes(x=Year,y=DiseaseRate))+
  geom_line()+
  geom_point()

相較於意外致死率逐年下降,四大主要疾病的致死率則呈現逐年上升的趨勢