Hubway_Stations <- read.csv("hubway_stations.csv")
Hubway_Trips <- read.csv("hubway_trips.csv")
# set up categorical data as factor/character variables
Hubway_Trips$hubway_id <- as.factor(Hubway_Trips$hubway_id)
Hubway_Trips$strt_statn <- as.character(Hubway_Trips$strt_statn)
Hubway_Trips$end_statn <- as.character(Hubway_Trips$end_statn)
Hubway_Trips$subsc_type <- as.factor(Hubway_Trips$subsc_type)
Hubway現行收費規定註明,年度用戶使用超過90分鐘,每30分鐘即加收6美金。本題希望透過鼓勵定點借還有效減少補車作業,以中短期定點借還使用時間的平均值為基準,提供優惠費率給定點借還時間低於平均值的用戶。
## [1] "C"
# 計算借還時間間隔
Hubway_Trips$start_date <- as.POSIXct(Hubway_Trips$start_date,
format='%m/%d/%Y %H:%M:%S')
Hubway_Trips$end_date <- as.POSIXct(Hubway_Trips$end_date,
format='%m/%d/%Y %H:%M:%S')
Hubway_Trips$start_end <- difftime(Hubway_Trips$end_date,Hubway_Trips$start_date,units="mins")
# 刪除借還無意義的資料
Hubway_Trips <- Hubway_Trips[!(Hubway_Trips$start_end == 0), ]
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# 擷取中短期定點借還用戶的資料,按照Hubway ID和借出站做統整
ShortTerm <- Hubway_Trips %>%
select(hubway_id, start_end, strt_statn, end_statn) %>%
filter(strt_statn == end_statn & start_end > 90 & start_end < 24*60) %>%
group_by(hubway_id,strt_statn) %>%
arrange(desc(start_end))
## Warning: package 'bindrcpp' was built under R version 3.3.3
# 計算定點借還時間的平均值
ShortTerm_mean <- mean(ShortTerm$start_end,na.rm=TRUE)
# 篩選出定點借還時間低於平均值的用戶
ShortTerm_users <- ShortTerm %>%
filter(start_end < ShortTerm_mean) %>%
group_by(hubway_id)
以Hubway現行收費規定,使用24小時高達288美金,但仍有不少用戶占用車輛超過一天。本題希望找出這些長期借用的用戶,進一步了解包車的原因,並提供方案避免此現象再發生。
# 擷取長期借還用戶的資料,按照Hubway ID和借出站做統整
LongTerm <- Hubway_Trips %>%
select(hubway_id, start_end, strt_statn, end_statn) %>%
filter(start_end > 24*60) %>%
group_by(hubway_id,strt_statn) %>%
arrange(desc(start_end))
假設Casual用戶為短期來訪的觀光客,分析用戶還車地是否鄰近Boston的著名觀光景點。
# extract relevant data of casual users, group by end station
Casual_users <- Hubway_Trips %>%
filter(subsc_type == "Casual") %>%
select(hubway_id, end_statn) %>%
group_by(end_statn)
# calculate number of users who got off at end station, arrange by number
Casual_users_dest <- summarise(Casual_users,
users_destination = n())
# combine relevant data with data of stations
Casual_users_dest <- merge(Casual_users_dest, Hubway_Stations, by.x = "end_statn", by.y = "id")
#arrange by number of users who got off
Casual_users_dest <- arrange(Casual_users_dest, desc(users_destination))
羅列十大casual users的還車地。
# list the top ten end stations for casual users
head(Casual_users_dest, 10)
## end_statn users_destination terminal
## 1 36 9499 D32005
## 2 42 8861 D32007
## 3 60 6101 D32016
## 4 33 5786 B32010
## 5 53 5785 B32016
## 6 52 5740 B32000
## 7 58 5671 D32017
## 8 38 5578 D32003
## 9 22 5328 A32010
## 10 20 5199 B32004
## station municipal lat
## 1 Boston Public Library - 700 Boylston St. Boston 42.34967
## 2 Boylston St. at Arlington St. Boston 42.35210
## 3 Charles Circle - Charles St. at Cambridge St. Boston 42.36088
## 4 Kenmore Sq / Comm Ave Boston 42.34905
## 5 Beacon St / Mass Ave Boston 42.35085
## 6 Newbury St / Hereford St Boston 42.34872
## 7 The Esplanade - Beacon St. at Arlington St. Boston 42.35560
## 8 TD Garden - Legends Way Boston 42.36623
## 9 South Station - 700 Atlantic Ave. Boston 42.35218
## 10 Aquarium Station - 200 Atlantic Ave. Boston 42.35977
## lng status
## 1 -71.07730 Existing
## 2 -71.07038 Existing
## 3 -71.07131 Removed
## 4 -71.09683 Existing
## 5 -71.08989 Existing
## 6 -71.08595 Existing
## 7 -71.07278 Existing
## 8 -71.06087 Removed
## 9 -71.05555 Existing
## 10 -71.05160 Existing
以圖呈現十大casual users的還車地。
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.3.3
tenlat <- Casual_users_dest[1:10,6]
tenlng <- Casual_users_dest[1:10,7]
tenname <- Casual_users_dest[1:10,4]
map <- leaflet() %>%
addTiles() %>%
addMarkers(
lat = tenlat,
lng = tenlng,
popup = tenname
)
map
以圖呈現Boston的八大觀光景點。
Tourist_Attraction <- read.csv("tourist_attraction.csv")
library(leaflet)
eightlat <- Tourist_Attraction[1:8,2]
eightlng <- Tourist_Attraction[1:8,3]
eightname <- Tourist_Attraction[1:8,1]
map <- leaflet() %>%
addTiles() %>%
addMarkers(
lat = eightlat,
lng = eightlng,
popup = eightname
)
map