泛科學網站爬蟲

#徐晣彧-大數據分析-隨堂測驗
#安裝套件
install.packages(c(“httr”,”xml2″,”rvest”,”stringr”,”dplyr”,”ggplot2″))
install.packages(“purrr”)

#載入
library(httr)
library(xml2)
library(rvest)
library(stringr)
library(dplyr)
library(ggplot2)

#擷取網站
pan <- GET(“http://pansci.asia/hots/month”)
pan

content(pan,”text”)

#讀取html
pan_html <- read_html(pan)
pan_html

#抓取標題時間與觀看次數
pan_title <- html_nodes(pan_html, “.title”)
pan_title <- html_text(pan_title)
#把文字內容抓出來
View(pan_title[3:32])
pan_title<-pan_title[3:32]

#瀏覽次數
pan_view <- html_nodes(pan_html,”.last”)
pan_view <- html_text(pan_view)
View(pan_view[5:34])
pan_view<-pan_view[5:34]

pan_df <- data.frame(

pan_title = pan_title,
pan_view= as.numeric(pan_view)

)
View(pan_df)

#30個排名
pan_df$pan_rank <- rev(rank(pan_df$pan_view))
pan_df$pan_level <- ifelse(pan_df$pan_view > 33000,”1″,”2″)
pan_df$pan_titlenum <- nchar(as.character(pan_df$pan_title))
View(pan_df)

install.packages(“wordcloud2″)
library(wordcloud2)

wordcloud2(pan_df,size = 0.14 ,minSize = 1, gridSize = 2,color=”random-light”,backgroundColor = “black”)

print(“05154152”)
print(“徐晣彧”)

R語言-網站爬蟲與文字雲
>>將內容用自己的手機、平版分享到.....
標籤:    

One thought on “R語言-網站爬蟲與文字雲

發佈留言