通常飛機延誤的原油包括下列幾點因素:

library(nycflights13)
library(knitr)
library(lubridate)
library(dplyr)
library(hflights)
library(ggplot2)

使用之資料如下:

飛機航班資訊

kable(head(hflights))
Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin Dest Distance TaxiIn TaxiOut Cancelled CancellationCode Diverted
5424 2011 1 1 6 1400 1500 AA 428 N576AA 60 40 -10 0 IAH DFW 224 7 13 0 0
5425 2011 1 2 7 1401 1501 AA 428 N557AA 60 45 -9 1 IAH DFW 224 6 9 0 0
5426 2011 1 3 1 1352 1502 AA 428 N541AA 70 48 -8 -8 IAH DFW 224 5 17 0 0
5427 2011 1 4 2 1403 1513 AA 428 N403AA 70 39 3 3 IAH DFW 224 9 22 0 0
5428 2011 1 5 3 1405 1507 AA 428 N492AA 62 44 -3 5 IAH DFW 224 9 9 0 0
5429 2011 1 6 4 1359 1503 AA 428 N262AA 64 45 -7 -1 IAH DFW 224 6 13 0 0

機場位置資訊

kable(head(airports))
faa name lat lon alt tz dst tzone
04G Lansdowne Airport 41.13047 -80.61958 1044 -5 A America/New_York
06A Moton Field Municipal Airport 32.46057 -85.68003 264 -6 A America/Chicago
06C Schaumburg Regional 41.98934 -88.10124 801 -6 A America/Chicago
06N Randall Airport 41.43191 -74.39156 523 -5 A America/New_York
09J Jekyll Island Airport 31.07447 -81.42778 11 -5 A America/New_York
0A9 Elizabethton Municipal Airport 36.37122 -82.17342 1593 -5 A America/New_York

飛機製造資訊

kable(head(planes))
tailnum year type manufacturer model engines seats speed engine
N10156 2004 Fixed wing multi engine EMBRAER EMB-145XR 2 55 NA Turbo-fan
N102UW 1998 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2 182 NA Turbo-fan
N103US 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2 182 NA Turbo-fan
N104UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2 182 NA Turbo-fan
N10575 2002 Fixed wing multi engine EMBRAER EMB-145LR 2 55 NA Turbo-fan
N105UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2 182 NA Turbo-fan

飛機延誤抵達機場與飛機使用時數關聯分析

planes_info = hflights %>%
  inner_join(planes,by = c("TailNum" = "tailnum")) %>%
  select(TailNum,ArrDelay,Year,manufacturer,produce_year = year) %>%
  mutate(age = Year - produce_year) %>%
  group_by(TailNum,manufacturer,age) %>%
  summarise(avg_delay = mean(ArrDelay,na.rm = TRUE))

planes_info %>%
  filter(avg_delay > 0) %>%
  ggplot(aes(x = age,y = avg_delay)) + geom_point(alpha = 0.5)

飛機延誤抵達機場與機場所在地關聯分析

avg_dest_delay_top10 = hflights %>%
  group_by(Dest) %>%
  summarise(delay = mean(ArrDelay,na.rm = TRUE)) %>%
  arrange(desc(delay)) %>%
  head(10) %>%  
  inner_join(airports,by = c("Dest" = "faa"))

avg_dest_delay_top10 %>%
  ggplot(aes(lon, lat)) +
  borders("state") +
  geom_point(aes(colour = delay)) +
  coord_quickmap()

Q&A

wind_speed_dist = weather %>% 
  count(wind_speed)
kable(head(wind_speed_dist))
wind_speed n
0.00000 1330
3.45234 1252
4.60312 1682
5.75390 2001
6.90468 2306
8.05546 2287