台北捷運美食通文字雲

使用套件

library(Rfacebook)
## Loading required package: httr
## Loading required package: rjson
## Loading required package: httpuv
## 
## Attaching package: 'Rfacebook'
## The following object is masked from 'package:methods':
## 
##     getGroup
library(NLP)
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:httr':
## 
##     content
library(tm)
library(jiebaRD)
library(jiebaR)
library(RColorBrewer)
library(wordcloud)

抓取臉書資料

page.id <- "890212461087699" 
token <- "123646328319174|BXykPz_PGpwMR7909eybOoJkktU" 
page <- getPage(page.id, token, n = 300)
## 25 posts 50 posts 75 posts 100 posts 125 posts 150 posts 175 posts 200 posts 225 posts 250 posts 275 posts 300 posts

文本清理

docs <- Corpus(VectorSource(page$message))
toSpace <- content_transformer(function(x, pattern) {
  return (gsub(pattern, " ", x))}
)
docs <- tm_map(docs, toSpace, "的")
docs <- tm_map(docs, toSpace, "啊")
docs <- tm_map(docs, toSpace, "站")
docs <- tm_map(docs, toSpace, "有")
docs <- tm_map(docs, toSpace, "了")
docs <- tm_map(docs, toSpace, "是")
docs <- tm_map(docs, toSpace, "很")
docs <- tm_map(docs, toSpace, "都")
docs <- tm_map(docs, toSpace, "在")
docs <- tm_map(docs, toSpace, "和")
docs <- tm_map(docs, toSpace, "這")
docs <- tm_map(docs, toSpace, "讓")
docs <- tm_map(docs, toSpace, "跟")
docs <- tm_map(docs, toSpace, "就")
docs <- tm_map(docs, toSpace, "真")
docs <- tm_map(docs, toSpace, "上")
docs <- tm_map(docs, toSpace, "但")
docs <- tm_map(docs, toSpace, "又")
docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs, stripWhitespace)

製作文字雲

mixseg = worker()
jieba_tokenizer=function(d){
  unlist(segment(d[[1]],mixseg))
}
seg = lapply(docs, jieba_tokenizer)
freqFrame = as.data.frame(table(unlist(seg)))
freqFrame = freqFrame[-c(1:34),]
wordcloud(freqFrame$Var1,freqFrame$Freq,
          scale=c(5,0.5),min.freq=10,max.words=50,
          random.order=FALSE, random.color=TRUE, 
          rot.per=0, colors=brewer.pal(8, "Dark2"),
          ordered.colors=FALSE,use.r.layout=FALSE,
          fixed.asp=TRUE)

文字雲中最常出現的字就是“吃美食”這三個字,這很合理,因為這是介紹美食的粉專。另外,由於這是是台北捷運美食通,所以“台北”這兩個字也蠻常出現。其他地名如“忠孝”、“板橋”、“新莊”、“中山”就是比較熱門的美食景點。“起司”、“吐司”、“甜點”、“咖啡”則是比較熱門的美食。