資料來源

Hubway Data — 波士頓共享單車使用數據

讀取資料

Hubway_Stations <- read.csv("hubway_stations.csv")
Hubway_Trips <- read.csv("hubway_trips.csv")

資料定義

# set up categorical data as factor/character variables
Hubway_Trips$hubway_id <- as.factor(Hubway_Trips$hubway_id)
Hubway_Trips$strt_statn <- as.character(Hubway_Trips$strt_statn) 
Hubway_Trips$end_statn <- as.character(Hubway_Trips$end_statn) 
Hubway_Trips$subsc_type <- as.factor(Hubway_Trips$subsc_type) 

題目一:計算中短期定點借還使用時間(介於90分鐘到24小時)的平均值

Hubway現行收費規定註明,年度用戶使用超過90分鐘,每30分鐘即加收6美金。本題希望透過鼓勵定點借還有效減少補車作業,以中短期定點借還使用時間的平均值為基準,提供優惠費率給定點借還時間低於平均值的用戶。

資料整理

## [1] "C"
# 計算借還時間間隔
Hubway_Trips$start_date <- as.POSIXct(Hubway_Trips$start_date,
                                      format='%m/%d/%Y %H:%M:%S')
Hubway_Trips$end_date <- as.POSIXct(Hubway_Trips$end_date,
                                      format='%m/%d/%Y %H:%M:%S')
Hubway_Trips$start_end <- difftime(Hubway_Trips$end_date,Hubway_Trips$start_date,units="mins")

# 刪除借還無意義的資料
Hubway_Trips <- Hubway_Trips[!(Hubway_Trips$start_end == 0), ] 

資料擷取

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# 擷取中短期定點借還用戶的資料,按照Hubway ID和借出站做統整
ShortTerm <- Hubway_Trips %>%
  select(hubway_id, start_end, strt_statn, end_statn) %>%
  filter(strt_statn == end_statn & start_end > 90 & start_end < 24*60) %>%
  group_by(hubway_id,strt_statn) %>%
  arrange(desc(start_end))
## Warning: package 'bindrcpp' was built under R version 3.3.3
# 計算定點借還時間的平均值
ShortTerm_mean <- mean(ShortTerm$start_end,na.rm=TRUE)

資料呈現

# 篩選出定點借還時間低於平均值的用戶
ShortTerm_users <- ShortTerm %>%
  filter(start_end < ShortTerm_mean) %>%
  group_by(hubway_id) 

題目二:找出長期借還(使用時間超過24小時)的用戶,提供方案避免包車現象

以Hubway現行收費規定,使用24小時高達288美金,但仍有不少用戶占用車輛超過一天。本題希望找出這些長期借用的用戶,進一步了解包車的原因,並提供方案避免此現象再發生。

資料擷取

# 擷取長期借還用戶的資料,按照Hubway ID和借出站做統整
LongTerm <- Hubway_Trips %>%
  select(hubway_id, start_end, strt_statn, end_statn) %>%
  filter(start_end > 24*60) %>%
  group_by(hubway_id,strt_statn) %>%
  arrange(desc(start_end))

題目三:分析Casual用戶的還車地和觀光景點分布的關係

假設Casual用戶為短期來訪的觀光客,分析用戶還車地是否鄰近Boston的著名觀光景點。

資料擷取

# extract relevant data of casual users, group by end station
Casual_users <- Hubway_Trips %>%
  filter(subsc_type == "Casual") %>%
  select(hubway_id, end_statn) %>%
  group_by(end_statn) 

資料彙整

# calculate number of users who got off at end station, arrange by number
Casual_users_dest <- summarise(Casual_users,
                               users_destination = n())

資料合併

# combine relevant data with data of stations
Casual_users_dest <- merge(Casual_users_dest, Hubway_Stations, by.x = "end_statn", by.y = "id")

#arrange by number of users who got off
Casual_users_dest <- arrange(Casual_users_dest, desc(users_destination))

資料呈現

羅列十大casual users的還車地。

# list the top ten end stations for casual users
head(Casual_users_dest, 10)
##    end_statn users_destination terminal
## 1         36              9499   D32005
## 2         42              8861   D32007
## 3         60              6101   D32016
## 4         33              5786   B32010
## 5         53              5785   B32016
## 6         52              5740   B32000
## 7         58              5671   D32017
## 8         38              5578   D32003
## 9         22              5328   A32010
## 10        20              5199   B32004
##                                          station municipal      lat
## 1       Boston Public Library - 700 Boylston St.    Boston 42.34967
## 2                  Boylston St. at Arlington St.    Boston 42.35210
## 3  Charles Circle - Charles St. at Cambridge St.    Boston 42.36088
## 4                          Kenmore Sq / Comm Ave    Boston 42.34905
## 5                           Beacon St / Mass Ave    Boston 42.35085
## 6                       Newbury St / Hereford St    Boston 42.34872
## 7    The Esplanade - Beacon St. at Arlington St.    Boston 42.35560
## 8                        TD Garden - Legends Way    Boston 42.36623
## 9              South Station - 700 Atlantic Ave.    Boston 42.35218
## 10          Aquarium Station - 200 Atlantic Ave.    Boston 42.35977
##          lng   status
## 1  -71.07730 Existing
## 2  -71.07038 Existing
## 3  -71.07131  Removed
## 4  -71.09683 Existing
## 5  -71.08989 Existing
## 6  -71.08595 Existing
## 7  -71.07278 Existing
## 8  -71.06087  Removed
## 9  -71.05555 Existing
## 10 -71.05160 Existing

繪圖

以圖呈現十大casual users的還車地。

library(leaflet)
## Warning: package 'leaflet' was built under R version 3.3.3
tenlat <- Casual_users_dest[1:10,6]
tenlng <- Casual_users_dest[1:10,7]
tenname <- Casual_users_dest[1:10,4]

map <- leaflet() %>% 
  addTiles() %>% 
  addMarkers(
  lat = tenlat,
  lng = tenlng,
  popup = tenname
)
map

以圖呈現Boston的八大觀光景點。

Tourist_Attraction <- read.csv("tourist_attraction.csv")

library(leaflet)
eightlat <- Tourist_Attraction[1:8,2]
eightlng <- Tourist_Attraction[1:8,3]
eightname <- Tourist_Attraction[1:8,1]

map <- leaflet() %>% 
  addTiles() %>% 
  addMarkers(
  lat = eightlat,
  lng = eightlng,
  popup = eightname
)

map