Q.觀察臉書高雄市長陳菊 從2017/10/25~10/31對高雄地區的活動或政策有哪些?

rm(list=ls(all.names=TRUE))
#先把需要的package執行看存不存在
library(httr)
library(rjson)
library(httpuv)
library(Rfacebook)
## 
## Attaching package: 'Rfacebook'
## The following object is masked from 'package:methods':
## 
##     getGroup
library(plyr)
library(NLP)
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:httr':
## 
##     content
library(tm)
library(rvest)
## Loading required package: xml2
library(xml2)
Sys.setenv(JAVA_HOME="/Users/liuqieru/Desktop/jdk-9.0.1/")
library(rJava)
library(SnowballC)
library(slam)
library(Matrix)
prefex = "https://graph.facebook.com/v2.10/"
#會變得
token  = "EAACEdEose0cBAB1GyZBf34ef0xSIZCVshJCbJe4mrSFR4YPGc7mUVwHqSOLRh28o6Go3wTMHvQMuFZC1k7cpwdVnW1HpOM8rRxVucx1SHYndu2dSJvZB06JZCnJEUTFlN3QCok3JpJAG1Iql3bilxuUCn4cAnGWKgSpQxSej1cpnxMZA1MeqhXvu4LXf5McrHbZAuuVgSlLfAZDZD"
number=1
#把正確的fb網址接起來
attrs  = paste0("232716627404/posts?limit=",number,"&until=2017-10-31&since=2017-10-25&access_token=")
url    = paste0(prefex, attrs, token)
res    = httr::GET(url)
#GET去湊出URL
#強制跟content說他是屬於httr這個library
data <-  httr::content(res)
#把data先解開他list的結構,再用matrix方式存下來
groups= matrix(unlist(data$data))

#存成檔案(因為要分梯次存,所以藉由count這個變數來存取每一篇文章)
filename = paste0(1, ".txt")
write.table(groups,filename)
#要跳到下一頁
after  = data$paging$cursors$after
nextflg= data$paging[2]

count=1
while(nextflg!= "NULL"){
  
  count=count+1
  attrs  = paste0("232716627404/posts?limit=1&until=2017-10-31&since=2017-10-25&after=",after,"&access_token=")
  #nexturl= paste0(prefex,attrs,"&after=",after)
  
  url = paste0(prefex,attrs,token)
  
  nextres= httr::GET(url)
  ndata  = httr::content(nextres)
  ngroups= matrix(unlist(ndata$data))
  #p1=ndata[["data"]][[1]]$message
  #p1=ndata$data[[1]]$message
  ##可用try_catch來測試,while loop停在哪一段 可以記錄走到哪一段停止
  
  after  = ndata$paging$cursors$after
  nextflg = ndata$paging[3]
  
  filename = paste0(count, ".txt")
  
  write.table(ngroups,filename)
}
#要做文字雲 抓一些需要用的套件
library(NLP)
library(tm)
library(jiebaRD)
library(jiebaR)
library(RColorBrewer)
library(wordcloud)
#進行文本清理
par(family='STKaiti')#讓文字顯示成中文
filenames <- list.files(getwd(), pattern="*.txt")
files <- lapply(filenames, readLines)
docs <- Corpus(VectorSource(files))

#要清洗掉的東西

toSpace <- content_transformer(function(x, pattern) {
  return (gsub(pattern, " ", x))
}
#定義清洗:清洗就是把你找到的符號用空白取代
)
docs <- tm_map(docs,toSpace,"V1")
docs <- tm_map(docs,toSpace,"\n")
docs <- tm_map(docs,toSpace, "1")
docs <- tm_map(docs,toSpace, "的")
docs <- tm_map(docs,toSpace, "及")
docs <- tm_map(docs,toSpace, "為")
docs <- tm_map(docs,toSpace, "是")
docs <- tm_map(docs,toSpace, "在")
docs <- tm_map(docs,toSpace, "[A-Za-z0-9]")
#移除標點符號 (punctuation)
#移除數字 (digits)、空白 (white space)
docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs, stripWhitespace)

mixseg = worker()
segment <- c("陳菊","布里斯本","高雄","重劃區","合作會","後勁溪")
new_user_word(mixseg,segment)
## [1] TRUE
#有詞頻之後就可以去畫文字雲
jieba_tokenizer=function(d){
  unlist(segment(d[[1]],mixseg))
}
seg = lapply(docs, jieba_tokenizer)
freqFrame = as.data.frame(table(unlist(seg)))
#畫出文字雲
wordcloud(freqFrame$Var1,freqFrame$Freq,
          min.freq=3,
          random.order=TRUE,random.color=TRUE, 
          rot.per=.1, colors=rainbow(length(row.names(freqFrame))),
          ordered.colors=FALSE,use.r.layout=FALSE,
          fixed.asp=TRUE)

結語:發現市長非常重視高雄,因為在詞頻統計下,得到最高的數字,再由文字雲看出,在他目前對高雄的規劃下,相當重視環境、水質、開發與國際接軌的關係…等