H <-  read.csv("HIV.csv", header=TRUE)
A <-  read.csv("AIDS.csv", header=TRUE)
#將中文標題命名為英文
names(H)=c("country","diagnostic_year","diagnostic_month","gender","age","case")
names(A)=c("diagnostic_year","diagnostic_month","gender","age","case","country")

此資料多數是男生,想看看歷年來(到2016)資料中診斷出HIV病毒的25-34歲且住在台北市的男性人數,感染AIDS的人數分佈趨勢是如何?

library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.3.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
#先看資料型態
str(H)
## 'data.frame':    11062 obs. of  6 variables:
##  $ country         : Factor w/ 23 levels "台中市","台北市",..: 13 2 2 2 22 2 2 7 12 2 ...
##  $ diagnostic_year : int  1984 1984 1984 1984 1984 1984 1984 1984 1984 1985 ...
##  $ diagnostic_month: int  1 10 10 10 10 11 12 12 12 1 ...
##  $ gender          : Factor w/ 2 levels "女","男": 2 2 2 2 2 2 2 2 2 2 ...
##  $ age             : Factor w/ 6 levels "15-24","25-34",..: 1 1 2 3 2 2 2 2 1 1 ...
##  $ case            : int  1 1 1 1 1 1 1 1 1 1 ...
str(A)
## 'data.frame':    7282 obs. of  6 variables:
##  $ diagnostic_year : int  1986 1987 1988 1988 1989 1989 1989 1989 1989 1989 ...
##  $ diagnostic_month: int  2 9 5 10 3 4 8 8 10 10 ...
##  $ gender          : Factor w/ 2 levels "女","男": 2 2 2 2 2 2 2 2 2 2 ...
##  $ age             : Factor w/ 6 levels "15-24","25-34",..: 2 3 2 3 2 4 2 3 2 5 ...
##  $ case            : int  1 1 1 1 2 1 1 1 1 1 ...
##  $ country         : Factor w/ 22 levels "台中市","台北市",..: 2 1 4 16 16 16 11 11 16 16 ...
H<-H[1:11009,]
A<-A[1:7240,]
#利用dplyr的select,filter做分類
nh<-select(H,country,diagnostic_year,diagnostic_month,case,age,gender) %>%
  filter(country=="台北市",age=="25-34",gender=="男")
## Warning: package 'bindrcpp' was built under R version 3.3.3
nh1 <- group_by(nh,diagnostic_year) %>%
  summarise(sum(case))
names(nh1) <- c("year","total_case")
w1 <- ggplot(data=nh1,aes(x=year,y=total_case,group=1))+geom_line(colour="blue")+geom_point(colour="red")
w1   

na<-select(A,country,diagnostic_year,case,age,gender) %>%
   filter(country=="台北市",age=="25-34",gender=="男")
na1 <- group_by(na,diagnostic_year) %>%
  summarise(sum(case))
names(na1) <- c("year","total_case")
#畫圖
w2<-ggplot(data=na1,aes(x=year,y=total_case,group=1)) +geom_line(color="green")+geom_point(colour="orange")
w2

#兩張圖呈現比較
 p=grid.arrange(w1, w2, nrow=2)

分析結果發現HIV的走勢圖與AIDS相當相似,幾乎是確診有HIV感染,之後就會有AIDS發病,所以當發現HIV病毒感染後就應該立刻投藥治療,以便追蹤後續的病況,而兩張圖發現,2005年之後,有越來越高的情況

先對有HIV的醫療院所做資料的處理,再把AIDS資料與有AIDS門診服務的醫療院所的資料做整併 利用ggmap分別看2000年與2016年的情況

hos <-  read.csv("HIV_HOS.csv", header=TRUE)

names(hos)=c("level","num","name","city_name","address","gpsy","gpsx")
hoss<-hos
hoss$city = substr(hoss$address, 1, 3)
hoss = hoss[,c(6,7,8)]
names(hoss) = c("gpsy", "gpsx","city")
nhoss = hoss %>% 
  group_by(city) %>%
  summarise(mean(gpsx), mean(gpsy))

names(nhoss) = c("city","gpsx", "gpsy")


#2010年的分布情況
nac1 = select(A,diagnostic_year,case,country) %>%
  filter(diagnostic_year==2010)
aa <- group_by(nac1,country) %>%
  summarise(sum(case))
names(aa) = c("city","case")
aa$city <- as.character(aa$city)


nn1 <- left_join(aa,nhoss,by = c("city"="city"))

nn1<-nn1[!is.na(nn1$gpsx),]
nn1$case <- as.numeric(nn1$case)

map <- get_map(location = "Taiwan",zoom = 8)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Taiwan&zoom=8&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Taiwan&sensor=false
ggmap(map)+geom_point(aes(x = gpsx, y = gpsy,size=case),color="pink",data = nn1)

#2016年的分布情況
nac = select(A,diagnostic_year,case,country) %>%
  filter(diagnostic_year==2016)
bb <- group_by(nac,country) %>%
  summarise(sum(case))
names(bb) = c("city","case")
bb$city <- as.character(bb$city)


nn <- left_join(bb,nhoss,by = c("city"="city"))

nn<-nn[!is.na(nn$gpsx),]
nn$case <- as.numeric(nn$case)


map <- get_map(location = "Taiwan",zoom = 8)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Taiwan&zoom=8&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Taiwan&sensor=false
ggmap(map)+geom_point(aes(x = gpsx, y = gpsy,size=case),color="pink",data = nn)
## Warning: Removed 1 rows containing missing values (geom_point).

此些愛滋病分布的地區,均有愛滋病治療的醫療院所或藥局,2016年相較於2010年的病例數均有增加

但是比例的分布情況是如何呢? 哪一個縣/市是最需要注意AIDS疾病?

此資料為愛滋病死亡的人數統計

D <-  read.csv("DIE.csv", header=TRUE)

#將中文標題命名為英文

names(D)=c("death_year","death_month","gender","age","case","country")


library(gridExtra)



people<-read_xlsx("各縣市人數.xlsx")
A1<-A[3637:7240,]
A1$country <- as.character(A1$country)
#cbind(people,D1,bycol=Ture)
#得到AIDS疾病後與近年來統計死亡的圖形的比較
nd<-select(A1,country,diagnostic_year,case,age,gender)  
nd1 <- group_by(nd,country) %>%
  summarise(sum(case))
    
dp <- left_join(nd1,people,by = c("country"="country"))
names(dp) <- c("country","sumcase","2010","2011","2012","2013","2014","2015","2016","sum","mean")
dp$newpercent<- dp$sumcase/dp$mean
d1 <- ggplot(data=dp,aes(x=country,y=newpercent,group=1))+geom_line(colour="blue")+geom_point(colour="red")
d1   

這是2010~2016年愛滋病在各縣市所佔的比例,是利用(病例個數的平均)/(七年人口的平均),發現歷年來新增病例個數最高的是在高雄市,其次是新北市,有可能與人口移入都市有關,但也可能與觀念的改變有關