1. Self-Reported Heights
Density plots and empirical cumulative distribution plots for the two groups:
library(patchwork)
data(heights, package = "dslabs")
thm <- theme_minimal() + theme(text = element_text(size = 16))
p1 <- ggplot(heights, aes(x = height, fill = sex)) +
geom_density(alpha = 0.6) +
thm + theme(legend.position = "top")
p2 <- ggplot(heights, aes(x = height, color = sex)) +
stat_ecdf() +
thm + theme(legend.position = "top")
p1 | p2
The density plots make it easier to identify general characteristics of the distribution: close to symmetric, not too far from bell-shaped.
Reading off percentiles, such as medians of quartiles, is easier in the eCDF plots.
2. Top Fleet Highway Gas Mileages
The following table shows the manufacturers with top five fleet average highway gas mileage values and the corresponging average city gas mileages:
library(readr)
if (! file.exists("vehicles.csv.zip"))
download.file("http://www.stat.uiowa.edu/~luke/data/vehicles.csv.zip",
"vehicles.csv.zip")
newmpg <- read_csv("vehicles.csv.zip", guess_max = 100000)
tbl <- filter(newmpg, year == 2023) |>
group_by(make) |>
summarize(avg_hwy = mean(highway08),
avg_cty = mean(city08)) |>
ungroup() |>
slice_max(avg_hwy, n = 5) |>
arrange(desc(avg_hwy))
names(tbl) <- c("Make", "Highway", "City")
kbl <- knitr::kable(tbl, format = "html", digits = 1)
kableExtra::kable_styling(kbl, full_width = FALSE)
Make
|
Highway
|
City
|
Lucid
|
125.2
|
125.2
|
Tesla
|
109.1
|
118.9
|
Polestar
|
95.8
|
105.2
|
Fisker
|
84.0
|
99.0
|
Vinfast
|
81.5
|
85.5
|
3. Top Destinations
The top four destinations are shown in the following table:
top_nyc_dest <- count(flights, dest) |> slice_max(n, n = 4)
kbl <- knitr::kable(top_nyc_dest, format = "html")
kableExtra::kable_styling(kbl, full_width = FALSE)
dest
|
n
|
ORD
|
17283
|
ATL
|
17215
|
LAX
|
16174
|
BOS
|
15508
|
The flights to these locations can be selected using semi_join
. After using count
to compute the number of flights to each origin/destination the counts for each destination can be converted to proportion with a grouped mutate
. The resulting faceted bar chart:
top_nyc_dest <- count(flights, dest) |> slice_max(n, n = 4)
semi_join(flights, top_nyc_dest, "dest") |>
count(origin, dest) |>
group_by(dest) |>
mutate(prop = n / sum(n)) |>
ungroup() |>
ggplot() +
geom_col(aes(x = origin, y = prop), fill = "deepskyblue3") +
facet_wrap(~ dest, ncol = 1) +
thm +
labs(x = "Origin", y = "Proportion")
There are no flights from LGA to LAX and few from JFK to either ATL or ORD.
4. Summer-Only Destinations
fs <- filter(flights, month %in% 6 : 8) |> select(dest) |> unique()
fn <- filter(flights, ! (month %in% 6 : 8)) |> select(dest) |> unique()
summer_only <- anti_join(fs, fn, "dest")
summer_dest <- semi_join(airports, summer_only, c(faa = "dest")) |>
select(faa, name)
flight_counts <- count(flights, dest)
summer_dest <- left_join(summer_dest, flight_counts, c("faa" = "dest"))
There were 3 destinations with flights only in June through August:
knitr::kable(summer_dest, format = "html") |>
kableExtra::kable_styling(full_width = FALSE)
faa
|
name
|
n
|
ANC
|
Ted Stevens Anchorage Intl
|
8
|
LGA
|
La Guardia
|
1
|
TVC
|
Cherry Capital Airport
|
101
|
Apparently there was a flight on US Airways from EWR to LGA on July 27.
5. High Altitude Destinations
The destinations at altitudes above 5,000 feet and the number of flights in 2013 from New York City to these destinations were:
airports1 <- select(airports, faa, name, alt)
left_join(flights, airports1, c("dest" = "faa")) |>
filter(alt > 5000) |>
count(dest, name, alt) |>
select(FAA = dest, Name = name, Altitude = alt, Flights = n) |>
knitr::kable(format = "html") |>
kableExtra::kable_styling(full_width = FALSE)
FAA
|
Name
|
Altitude
|
Flights
|
ABQ
|
Albuquerque International Sunport
|
5355
|
254
|
DEN
|
Denver Intl
|
5431
|
7266
|
EGE
|
Eagle Co Rgnl
|
6540
|
213
|
HDN
|
Yampa Valley
|
6602
|
15
|
JAC
|
Jackson Hole Airport
|
6451
|
25
|
MTJ
|
Montrose Regional Airport
|
5759
|
15
|
LS0tCnRpdGxlOiAiQXNzaWdubWVudCA2IE5vdGVzIgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogeWVzCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6ICJoaWRlIgotLS0KCmBgYHtyIGdsb2JhbF9vcHRpb25zLCBpbmNsdWRlID0gRkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChjb2xsYXBzZSA9IFRSVUUpCmBgYAoKYGBge3IsIGluY2x1ZGUgPSBGQUxTRX0KbGlicmFyeShkcGx5cikKbGlicmFyeShueWNmbGlnaHRzMTMpCmxpYnJhcnkoZ2dwbG90MikKCmBgYAoKCiMjIDEuIFNlbGYtUmVwb3J0ZWQgSGVpZ2h0cwoKRGVuc2l0eSBwbG90cyBhbmQgZW1waXJpY2FsIGN1bXVsYXRpdmUgZGlzdHJpYnV0aW9uIHBsb3RzIGZvciB0aGUgdHdvIGdyb3VwczoKCmBgYHtyLCBmaWcud2lkdGggPSA5LCBmaWcuaGVpZ2h0ID0gNX0KbGlicmFyeShwYXRjaHdvcmspCmRhdGEoaGVpZ2h0cywgcGFja2FnZSA9ICJkc2xhYnMiKQp0aG0gPC0gdGhlbWVfbWluaW1hbCgpICsgdGhlbWUodGV4dCA9IGVsZW1lbnRfdGV4dChzaXplID0gMTYpKQpwMSA8LSBnZ3Bsb3QoaGVpZ2h0cywgYWVzKHggPSBoZWlnaHQsIGZpbGwgPSBzZXgpKSArCiAgICBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjYpICsKICAgIHRobSArIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJ0b3AiKQpwMiA8LSBnZ3Bsb3QoaGVpZ2h0cywgYWVzKHggPSBoZWlnaHQsIGNvbG9yID0gc2V4KSkgKwogICAgc3RhdF9lY2RmKCkgKwogICAgdGhtICsgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gInRvcCIpCnAxIHwgcDIKYGBgCgpUaGUgZGVuc2l0eSBwbG90cyBtYWtlIGl0IGVhc2llciB0byBpZGVudGlmeSBnZW5lcmFsIGNoYXJhY3RlcmlzdGljcwpvZiB0aGUgZGlzdHJpYnV0aW9uOiBjbG9zZSB0byBzeW1tZXRyaWMsIG5vdCB0b28gZmFyIGZyb20gYmVsbC1zaGFwZWQuCgpSZWFkaW5nIG9mZiBwZXJjZW50aWxlcywgc3VjaCBhcyBtZWRpYW5zIG9mIHF1YXJ0aWxlcywgaXMgZWFzaWVyIGluCnRoZSBlQ0RGIHBsb3RzLgoKCiMjIDIuIFRvcCBGbGVldCBIaWdod2F5IEdhcyBNaWxlYWdlcwoKVGhlIGZvbGxvd2luZyB0YWJsZSBzaG93cyB0aGUgbWFudWZhY3R1cmVycyB3aXRoIHRvcCBmaXZlIGZsZWV0CmF2ZXJhZ2UgaGlnaHdheSBnYXMgbWlsZWFnZSB2YWx1ZXMgYW5kIHRoZSBjb3JyZXNwb25naW5nIGF2ZXJhZ2UgY2l0eQpnYXMgbWlsZWFnZXM6CgpgYGB7ciwgbWVzc2FnZSA9IEZBTFNFfQpsaWJyYXJ5KHJlYWRyKQppZiAoISBmaWxlLmV4aXN0cygidmVoaWNsZXMuY3N2LnppcCIpKQogICAgZG93bmxvYWQuZmlsZSgiaHR0cDovL3d3dy5zdGF0LnVpb3dhLmVkdS9+bHVrZS9kYXRhL3ZlaGljbGVzLmNzdi56aXAiLAogICAgICAgICAgICAgICAgICAidmVoaWNsZXMuY3N2LnppcCIpCm5ld21wZyA8LSByZWFkX2NzdigidmVoaWNsZXMuY3N2LnppcCIsIGd1ZXNzX21heCA9IDEwMDAwMCkKCnRibCA8LSBmaWx0ZXIobmV3bXBnLCB5ZWFyID09IDIwMjMpIHw+CiAgICBncm91cF9ieShtYWtlKSB8PgogICAgc3VtbWFyaXplKGF2Z19od3kgPSBtZWFuKGhpZ2h3YXkwOCksCiAgICAgICAgICAgICAgYXZnX2N0eSA9IG1lYW4oY2l0eTA4KSkgfD4KICAgIHVuZ3JvdXAoKSB8PgogICAgc2xpY2VfbWF4KGF2Z19od3ksIG4gPSA1KSB8PgogICAgYXJyYW5nZShkZXNjKGF2Z19od3kpKQpuYW1lcyh0YmwpIDwtIGMoIk1ha2UiLCAiSGlnaHdheSIsICJDaXR5IikKa2JsIDwtIGtuaXRyOjprYWJsZSh0YmwsIGZvcm1hdCA9ICJodG1sIiwgZGlnaXRzID0gMSkKa2FibGVFeHRyYTo6a2FibGVfc3R5bGluZyhrYmwsIGZ1bGxfd2lkdGggPSBGQUxTRSkKYGBgCgojIyAzLiBUb3AgRGVzdGluYXRpb25zCgpUaGUgdG9wIGZvdXIgZGVzdGluYXRpb25zIGFyZSBzaG93biBpbiB0aGUgZm9sbG93aW5nIHRhYmxlOgoKYGBge3J9CnRvcF9ueWNfZGVzdCA8LSBjb3VudChmbGlnaHRzLCBkZXN0KSB8PiBzbGljZV9tYXgobiwgbiA9IDQpCmtibCA8LSBrbml0cjo6a2FibGUodG9wX255Y19kZXN0LCBmb3JtYXQgPSAiaHRtbCIpCmthYmxlRXh0cmE6OmthYmxlX3N0eWxpbmcoa2JsLCBmdWxsX3dpZHRoID0gRkFMU0UpCmBgYAoKVGhlIGZsaWdodHMgdG8gdGhlc2UgbG9jYXRpb25zIGNhbiBiZSBzZWxlY3RlZCB1c2luZyBgc2VtaV9qb2luYC4KQWZ0ZXIgdXNpbmcgYGNvdW50YCB0byBjb21wdXRlIHRoZSBudW1iZXIgb2YgZmxpZ2h0cyB0byBlYWNoCm9yaWdpbi9kZXN0aW5hdGlvbiB0aGUgY291bnRzIGZvciBlYWNoIGRlc3RpbmF0aW9uIGNhbiBiZSBjb252ZXJ0ZWQgdG8KcHJvcG9ydGlvbiB3aXRoIGEgZ3JvdXBlZCBgbXV0YXRlYC4gVGhlIHJlc3VsdGluZyBmYWNldGVkIGJhciBjaGFydDoKCmBgYHtyfQp0b3BfbnljX2Rlc3QgPC0gY291bnQoZmxpZ2h0cywgZGVzdCkgfD4gc2xpY2VfbWF4KG4sIG4gPSA0KQoKc2VtaV9qb2luKGZsaWdodHMsIHRvcF9ueWNfZGVzdCwgImRlc3QiKSB8PgogICAgY291bnQob3JpZ2luLCBkZXN0KSB8PgogICAgZ3JvdXBfYnkoZGVzdCkgfD4KICAgIG11dGF0ZShwcm9wID0gbiAvIHN1bShuKSkgfD4KICAgIHVuZ3JvdXAoKSB8PgogICAgZ2dwbG90KCkgKwogICAgZ2VvbV9jb2woYWVzKHggPSBvcmlnaW4sIHkgPSBwcm9wKSwgZmlsbCA9ICJkZWVwc2t5Ymx1ZTMiKSArCiAgICBmYWNldF93cmFwKH4gZGVzdCwgbmNvbCA9IDEpICsKICAgIHRobSArCiAgICBsYWJzKHggPSAiT3JpZ2luIiwgeSA9ICJQcm9wb3J0aW9uIikKYGBgCgpUaGVyZSBhcmUgbm8gZmxpZ2h0cyBmcm9tIExHQSB0byBMQVggYW5kIGZldyBmcm9tIEpGSyB0byBlaXRoZXIgQVRMIG9yCk9SRC4KCgojIyA0LiBTdW1tZXItT25seSBEZXN0aW5hdGlvbnMKCmBgYHtyfQpmcyA8LSBmaWx0ZXIoZmxpZ2h0cywgbW9udGggJWluJSA2IDogOCkgfD4gc2VsZWN0KGRlc3QpIHw+IHVuaXF1ZSgpCmZuIDwtIGZpbHRlcihmbGlnaHRzLCAhIChtb250aCAlaW4lIDYgOiA4KSkgfD4gc2VsZWN0KGRlc3QpIHw+IHVuaXF1ZSgpCnN1bW1lcl9vbmx5IDwtIGFudGlfam9pbihmcywgZm4sICJkZXN0IikKc3VtbWVyX2Rlc3QgPC0gc2VtaV9qb2luKGFpcnBvcnRzLCBzdW1tZXJfb25seSwgYyhmYWEgPSAiZGVzdCIpKSB8PgogICBzZWxlY3QoZmFhLCBuYW1lKQpmbGlnaHRfY291bnRzIDwtIGNvdW50KGZsaWdodHMsIGRlc3QpCnN1bW1lcl9kZXN0IDwtIGxlZnRfam9pbihzdW1tZXJfZGVzdCwgZmxpZ2h0X2NvdW50cywgYygiZmFhIiA9ICJkZXN0IikpCmBgYAoKYGBge3IsIGVjaG8gPSBGQUxTRX0Kc2QxIDwtIGdyb3VwX2J5KGZsaWdodHMsIGRlc3QpIHw+CiAgICBzdW1tYXJpemUobiA9IG4oKSwKCSAgICAgICAgICBzdW1tZXJfb25seSA9IGFueShtb250aCAlaW4lIDY6OCkgJgoJCQkgICAgICAgICAgICAgICAgISBhbnkobW9udGggPD0gNSkgJgoJCQkJCQkJISBhbnkobW9udGggPj0gOSkpIHw+Cgl1bmdyb3VwKCkgfD4KCWZpbHRlcihzdW1tZXJfb25seSkgfD4KCWxlZnRfam9pbihzZWxlY3QoYWlycG9ydHMsIGZhYSwgbmFtZSksIGMoImRlc3QiID0gImZhYSIpKSB8PgogICAgc2VsZWN0KGZhYSA9IGRlc3QsIG5hbWUsIG4pCnJlbWF0dHIgPC0gZnVuY3Rpb24oeCkgeyBhdHRyaWJ1dGVzKHgpIDwtIE5VTEw7IHggfQpzdG9waWZub3QoaWRlbnRpY2FsKHJlbWF0dHIoc3VtbWVyX2Rlc3QpLCByZW1hdHRyKHNkMSkpKQpgYGAKClRoZXJlIHdlcmUgYHIgbnJvdyhzdW1tZXJfZGVzdClgIGRlc3RpbmF0aW9ucyB3aXRoIGZsaWdodHMgb25seSBpbgpKdW5lIHRocm91Z2ggQXVndXN0OgoKYGBge3J9CmtuaXRyOjprYWJsZShzdW1tZXJfZGVzdCwgZm9ybWF0ID0gImh0bWwiKSB8PgogICAga2FibGVFeHRyYTo6a2FibGVfc3R5bGluZyhmdWxsX3dpZHRoID0gRkFMU0UpCmBgYAoKQXBwYXJlbnRseSB0aGVyZSB3YXMgYSBmbGlnaHQgb24gVVMgQWlyd2F5cyBmcm9tIEVXUiB0byBMR0Egb24gSnVseSAyNy4KCgojIyA1LiBIaWdoIEFsdGl0dWRlIERlc3RpbmF0aW9ucwoKVGhlIGRlc3RpbmF0aW9ucyBhdCBhbHRpdHVkZXMgYWJvdmUgNSwwMDAgZmVldCBhbmQgdGhlIG51bWJlciBvZgpmbGlnaHRzIGluIDIwMTMgZnJvbSBOZXcgWW9yayBDaXR5IHRvIHRoZXNlIGRlc3RpbmF0aW9ucyB3ZXJlOgoKYGBge3J9CmFpcnBvcnRzMSA8LSBzZWxlY3QoYWlycG9ydHMsIGZhYSwgbmFtZSwgYWx0KQpsZWZ0X2pvaW4oZmxpZ2h0cywgYWlycG9ydHMxLCBjKCJkZXN0IiA9ICJmYWEiKSkgfD4KICAgIGZpbHRlcihhbHQgPiA1MDAwKSB8PgogICAgY291bnQoZGVzdCwgbmFtZSwgYWx0KSB8PgogICAgc2VsZWN0KEZBQSA9IGRlc3QsIE5hbWUgPSBuYW1lLCBBbHRpdHVkZSA9IGFsdCwgRmxpZ2h0cyA9IG4pIHw+CiAgICBrbml0cjo6a2FibGUoZm9ybWF0ID0gImh0bWwiKSB8PgogICAga2FibGVFeHRyYTo6a2FibGVfc3R5bGluZyhmdWxsX3dpZHRoID0gRkFMU0UpCmBgYAo=