匯入套件

library(httr)
library(rjson)
library(jiebaRD)
library(jiebaR)
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(stringr)
library(RColorBrewer)
library(wordcloud)

從facebook 取十九大習近平對黨的報告資料進行分析

token <-"EAACEdEose0cBAMtn7BjpU5BbuYsWuoHQMSLftPb3XgKcxOtYHluDIQnqPvzb9jmPy8iHd2RDlsPFUd9yTZApmrJHmRHmm9depJc0ZCs5zptHtbGKQv9Mc5MfBTZBH93HxZCZCIsIusnjP5jaNyERqD2zBpd37xOZCt8QDE6fl2enrsOpMCue0xRlljO2PwMgZCVBZBRl0CMVZAgZDZD"
query="message"

id <- "243993392310512_1563534403689731"
url1 <- sprintf("https://graph.facebook.com/v2.10/%s?fields=%s&access_token=%s",id, query, token)
page1 <- as.data.frame(fromJSON(content(GET(url1),'text')))

id <- "243993392310512_1563534820356356"
url2 <- sprintf("https://graph.facebook.com/v2.10/%s?fields=%s&access_token=%s",id, query, token)
page2 <- as.data.frame(fromJSON(content(GET(url2),'text')))

id <- "243993392310512_1563535517022953"
url3 <- sprintf("https://graph.facebook.com/v2.10/%s?fields=%s&access_token=%s",id, query, token)
page3 <- as.data.frame(fromJSON(content(GET(url3),'text')))

id <- "243993392310512_1563536387022866"
url4 <- sprintf("https://graph.facebook.com/v2.10/%s?fields=%s&access_token=%s",id, query, token)
page4 <- as.data.frame(fromJSON(content(GET(url4),'text')))

id <- "243993392310512_1563537567022748"
url5 <- sprintf("https://graph.facebook.com/v2.10/%s?fields=%s&access_token=%s",id, query, token)
page5 <- as.data.frame(fromJSON(content(GET(url5),'text')))

進行斷字斷句

slice <- worker()
a1<-as.character(page1$message)
slice1 <- slice[a1]
word1 <- as.data.frame(table(slice1))

a2<-as.character(page2$message)
slice2 <- slice[a2]
word2 <- as.data.frame(table(slice2))

a3<-as.character(page3$message)
slice3 <- slice[a3]
word3 <- as.data.frame(table(slice3))

a4<-as.character(page4$message)
slice4 <- slice[a4]
word4<- as.data.frame(table(slice4))

a5<-as.character(page5$message)
slice5 <- slice[a5]
word5 <- as.data.frame(table(slice5))

刪除無意義用詞,如亂碼、數字等

word1 <-word1%>%
  filter(!(slice1 %in% c('必須','各位','因為','所有')))%>%
  filter(nchar(as.character(slice1)) >= 2) %>%
  filter(!str_detect(slice1, "[a-zA-Z0-9]+")) %>%
  arrange(desc(Freq))
word2 <-word2%>%
  filter(!(slice2 %in% c('必須','各位','因為','所有'))) %>%
  filter(nchar(as.character(slice2)) >= 2)%>%
  filter(!str_detect(slice2, "[a-zA-Z0-9]+")) %>%
  arrange(desc(Freq))
word3 <-word3%>%
  filter(!(slice3 %in% c('必須','各位','因為','所有' )))%>%
  filter(nchar(as.character(slice3)) >= 2) %>%
  filter(!str_detect(slice3, "[a-zA-Z0-9]+"))%>%
  arrange(desc(Freq))
word4 <-word4%>%
  filter(!(slice4 %in% c('必須','各位','因為','所有' ))) %>%
  filter(nchar(as.character(slice4)) >= 2) %>%
  filter(!str_detect(slice4, "[a-zA-Z0-9]+")) %>%
  arrange(desc(Freq))
word5 <-word5%>%
  filter(!(slice5 %in% c('必須','各位','因為','所有' ))) %>%
  filter(nchar(as.character(slice5)) >= 2) %>%
  filter(!str_detect(slice5, "[a-zA-Z0-9]+")) %>%
  arrange(desc(Freq))

製作文字雲

wordcloud(words = word1$slice[1:60],
          freq = word1$Freq, 
          min.freq = 3,
          random.order = F)

wordcloud(words = word2$slice[1:60],
          freq = word2$Freq, 
          min.freq = 3,
          random.order = F)

wordcloud(words = word3$slice[1:60],
          freq = word3$Freq, 
          min.freq = 3,
          random.order = F)

wordcloud(words = word4$slice[1:60],
          freq = word4$Freq, 
          min.freq = 3,
          random.order = F)

wordcloud(words = word5$slice[1:60],
          freq = word5$Freq, 
          min.freq = 3,
          random.order = F)

從五段話的文字雲可看出習近平不斷提到中國特色、社會主義、實現、經濟、發展、建設、改革等,而也見到幾個與以往不同的詞,如:夢想、生態等,整體可看出習近平的雄心,同時也印證了中國近年來的快速發展,他要把中國的實力展現在世界舞台,習近平提出的中國夢,更是希望凝聚中國各個民族,把中國推向更好的未來。