library(tidyverse)
library(sf)
library(USAboundaries)
library(readxl)
library(rmapshaper)
library(knitr)
library(leaflet)
library(mapview)
library(gridExtra)
library(gghighlight)
library(units)
library(kableExtra)
library(leafpop)

Question 1:

counties = us_counties() %>% 
  filter(!state_name %in% c("Alaska", "Hawaii", "Puerto Rico")) %>% 
  st_transform(5070)
county_cent = counties %>% 
  st_centroid(counties) %>% 
  st_combine()
v_grid = county_cent %>% 
  st_voronoi() %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

t_grid = county_cent %>% 
  st_triangulate() %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

s_grid = county_cent %>%
  st_make_grid(n = 70) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

h_grid = county_cent %>% 
  st_make_grid(square = FALSE, n = 70) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
conus_border = st_union(counties)

plot(conus_border)

npts(conus_border)
## [1] 3229
simp_border = ms_simplify(conus_border, keep = .1)

plot(simp_border)

npts(simp_border)
## [1] 322

I was able to remove 2907=3229-322 points. Consequences of doing this computationally is it increases the speed of computation.

v_grid = st_intersection(v_grid, simp_border)
t_grid = st_intersection(t_grid, simp_border)
tess_plot = function(data, title){
  ggplot() +
    geom_sf(data = data, fill = "white", col = "navy", size = .2) +   
    theme_void() +
    labs(title = title, caption = paste("This tesselation has:", nrow(data), "tiles" )) +
    theme(plot.title = element_text(hjust = .5, color =  "navy", face = "bold"))
}
tess_plot(counties, "Counties")

tess_plot(v_grid, "Voronoi Coverage")

tess_plot(t_grid, "Triangulation Coverage")

tess_plot(s_grid, "Square Coverage")

tess_plot(h_grid, "Hexagonal Coverage")

Question 2:

tess_cal = function(tess_type, tess_name){
  
  tess_type = tess_type %>% 
  mutate(area = st_area(tess_type), area = set_units(area, "km^2"), area = drop_units(area), total_area = sum(area), mean_area = total_area / n(), sd_area = sum(area - mean_area / n()) ^ (1/2), number = length(tess_type$id)
         )
  number_of_feature = length(tess_type$id)
  
  tess_name = tess_type %>% 
    mutate(num_feature = number_of_feature, name = tess_name) %>% 
    select(name, num_feature, mean_area, sd_area, total_area) %>% 
    st_drop_geometry() %>% 
    head(1)
  
  return(tess_name)
}
original = counties %>% 
  mutate(id = 1:n())

tess_cal(counties, "Counties")
##       name num_feature mean_area  sd_area total_area
## 1 Counties           0  2521.745 2799.118    7837583
tess_cal(v_grid, "Voronoi Coverage")
##               name num_feature mean_area  sd_area total_area
## 1 Voronoi Coverage        3107  2522.865 2799.289    7838541
tess_cal(t_grid, "Triangulation Coverage")
##                     name num_feature mean_area  sd_area total_area
## 1 Triangulation Coverage        6196  1252.506 2785.548    7760528
tess_cal(s_grid, "Square Coverage")
##              name num_feature mean_area  sd_area total_area
## 1 Square Coverage        2013  2558.283 2268.758    5149823
tess_cal(h_grid, "Hexegonal Coverage")
##                 name num_feature mean_area  sd_area total_area
## 1 Hexegonal Coverage        1639  3586.119 2423.646    5877648
tess_summary = bind_rows(tess_cal(counties, "Counties"), tess_cal(v_grid, "Voronoi Coverage"), tess_cal(t_grid, "Triangulation Coverage"), tess_cal(s_grid, "Square Coverage"), tess_cal(h_grid, "Hexegonal Coverage"))

knitr::kable(tess_summary, caption = "Summary of Tessellations", col.names = c("Tessellation Type", "Number of Features", "Mean Area", "Standard Deviation", "Total Area"), format.args = list(big.mark = ",")) %>% 
  kable_styling("striped", full_width = TRUE, font_size = 11) 
Summary of Tessellations
Tessellation Type Number of Features Mean Area Standard Deviation Total Area
Counties 0 2,521.745 2,799.118 7,837,583
Voronoi Coverage 3,107 2,522.865 2,799.289 7,838,541
Triangulation Coverage 6,196 1,252.506 2,785.548 7,760,528
Square Coverage 2,013 2,558.283 2,268.758 5,149,823
Hexegonal Coverage 1,639 3,586.119 2,423.646 5,877,648

Question 3:

NID2019_U = readxl::read_excel("~/github/geog-176A-labs/data/NID2019_U.xlsx") %>%
  filter(!is.na(LATITUDE)) %>%
  filter(!is.na(LONGITUDE))

sf_NID2019_U <- NID2019_U%>% 
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
  st_transform(5070)
point_in_polygon3 = function(points, polygon, id){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(.data[[id]]) %>%
    setNames(c(id, "n")) %>%
    left_join(polygon, by = id) %>%
    st_as_sf()
}
c_p = point_in_polygon3(sf_NID2019_U, counties, "geoid")
v_p = point_in_polygon3(sf_NID2019_U, v_grid, "id")
t_p = point_in_polygon3(sf_NID2019_U, t_grid, "id")
s_p = point_in_polygon3(sf_NID2019_U, s_grid, "id")
h_p = point_in_polygon3(sf_NID2019_U, h_grid, "id")
plot_pip = function(data, title){
  ggplot() +
    geom_sf(data = data, aes(fill = log(n)), col = NA, alpha = .9, size = .2) +
    scale_fill_viridis_c() +
    theme_void() +
    theme(legend.position = "none", plot.title = element_text(face = "bold", color = "darkgreen", hjust = .5, size = 20)) +
    labs(title = title, caption = paste0(sum(data$n), " damages represented"))
}
plot_pip(c_p, "Damages per Country")

plot_pip(v_p, "Damages per Voronoi")

plot_pip(t_p, "Damages per Trianglulation")

plot_pip(s_p, "Damages per Square")

plot_pip(h_p, "Damages per Hexagon")

Different tessellation types influences map visualization differently such as the desity of damages.MAUP, the modifiable areal unit problem, is a source of statistical bias that can significantly impact the results of statistical hypothesis tests. In other words, with MAUP, result could be vastly distorted. Compare to square grid tessellation, Voronoi is a lot similar to the original data. Both square grid and Hexagon dominantly lose datas. I prefer Voronoi tessellation because it stores larger range of data which benefits analysis more.

Question 4:

The damages with the purpose of recreation, flood control, water supply, and fish and wildlife are the ones I choose. The reason why I choose these 4 damages is because they are not only interest me the most, but also they have a wide range of data to be analyzed.

dam_rec = sf_NID2019_U %>% 
  filter(grepl("R", PURPOSES))

dam_flood = sf_NID2019_U %>% 
  filter(grepl("C", PURPOSES))

dam_water = sf_NID2019_U %>% 
  filter(grepl("S", PURPOSES))

dam_fish = sf_NID2019_U %>% 
  filter(grepl("F", PURPOSES))
pip_rec = point_in_polygon3(dam_rec, v_grid, "id")
pip_flood = point_in_polygon3(dam_flood, v_grid, "id")
pip_water = point_in_polygon3(dam_water, v_grid, "id")
pip_fish = point_in_polygon3(dam_fish, v_grid, "id")
plot_pip(pip_rec, "Recreation Damages Location") +
  gghighlight(n > (mean(n) + sd(n)))

plot_pip(pip_flood, "Flood Control Damages Location") +
  gghighlight(n > (mean(n) + sd(n)))

plot_pip(pip_water, "Water Supply Damages Location") +
  gghighlight(n > (mean(n) + sd(n)))

plot_pip(pip_fish, "Fish and Wildlife Damages Location") +
  gghighlight(n > (mean(n) + sd(n)))

Damage locations seem reasonable generally among all the plots. If I were to pick Hexagonal or square grid tessellation, a few data in fish and wildlife damages would be lost. Recreation damages distribute more in the east coast, as well as Colorado and Southern California where the higher population density is. A big part of flood control damages distribute around Mississippi River which totally make sense because it is the largest drainage system in the Unites States. California experiencing drought is not something new. The most populated state has the largest cattle industry on top of Mediterranean climate. It is also not surprising that fish and wildlife damages distribute more in the Midwest where is the home to them.

Extra Credit:

MS = read_sf("~/github/geog-176A-labs/data/majorrivers_0_0") %>% 
  filter(SYSTEM == "Mississippi")
dam = NID2019_U %>% 
  filter(!STATE %in% c("AK", "HI", "GU", "PR")) %>% 
  filter(HAZARD == "H") %>% 
  filter(PURPOSES == "C") %>% 
  group_by(STATE) %>% 
  slice_max(NID_STORAGE, n = 1)
max_storage = dam %>% 
  select(DAM_NAME, PURPOSES, NID_STORAGE, YEAR_COMPLETED)
radius = dam %>% 
  mutate(radius = NID_STORAGE / 1500000) %>% 
  select(radius)
leaflet() %>% 
  addProviderTiles(providers$CartoDB) %>% 
  addCircleMarkers(data = dam, color = "purple", fillOpacity = 1, radius = as.vector(radius$radius), stroke = FALSE, popup = popupTable(max_storage, feature.id = FALSE, row.numbers = FALSE)) %>% 
  addPolylines(data = MS, color = "yellow")