Libraries

library(tidyverse)
library(sf)
library(USAboundaries)
library(rmapshaper)
library(kableExtra)
library(units)
library(plotrix)
library(knitr)
library(ggplot2)
library(readxl)
library(dplyr)
library(gghighlight)

1.1 - Get CONUS

conus = USAboundaries::us_counties() %>%
  filter(!state_name %in% c("Puerto Rico", "Alaska", "Hawaii")) %>%
  st_transform(5070)

1.2-1.5 - Data and mapping

conus_ms <- us_states() %>%
  filter(!(state_name %in% c('Puerto Rico', 'Alaska', 'Hawaii'))) %>% 
  st_as_sf(coords = c("lng","lat"), crs = 5326) %>%  
  st_transform(5070)

conus_ms = ms_simplify(conus, keep = 0.05)

county_centroid = st_centroid(conus_ms) %>%
  st_combine() %>%
  st_cast("MULTIPOINT")
county_centroid_u = st_union(county_centroid)

v_grid = st_voronoi(county_centroid) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

t_grid = st_triangulate(county_centroid) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

sq_grid = st_make_grid(county_centroid, n = c(70, 50)) %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

hex_grid = st_make_grid(county_centroid, n = c(70, 50), square = FALSE) %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

1.6-1.7 - Plot Tessellation

2.1-2.4 - Gather Data and Summarise each Tessellation

total_tess = function(data, title){
    area = st_area(data) %>% 
    units::set_units("km2") %>%
    units::drop_units() 
  
data_frame(title, nrow(data), mean(area), sd(area), sum(area)) 
}
total_tess(conus_ms, "Raw_original")
## # A tibble: 1 x 5
##   title        `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
##   <chr>               <int>        <dbl>      <dbl>       <dbl>
## 1 Raw_original         3075        2544.      3428.    7822757.
total_tess(v_grid, "Voronoi")
## # A tibble: 1 x 5
##   title   `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
##   <chr>          <int>        <dbl>      <dbl>       <dbl>
## 1 Voronoi         3075        2544.      2894.    7822757.
total_tess(t_grid, "Triangulation")
## # A tibble: 1 x 5
##   title         `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
##   <chr>                <int>        <dbl>      <dbl>       <dbl>
## 1 Triangulation         6130        1263.      1583.    7740190.
total_tess(sq_grid, "Square")
## # A tibble: 1 x 5
##   title  `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
##   <chr>         <int>        <dbl>      <dbl>       <dbl>
## 1 Square         1635        3484.       401.    5696590.
total_tess(hex_grid, "Hexagon")
## # A tibble: 1 x 5
##   title   `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
##   <chr>          <int>        <dbl>      <dbl>       <dbl>
## 1 Hexagon         1633        3501.       352.    5716720.
Types of Tessellation
Name Numbers Mean_Area Sd_Area(km2) Sum_Area
Counties 3,075 2,543.986 3,428.2080 7,822,757
Voroni 3,075 2,543.986 2,894.3919 7,822,757
Triangulation 6,130 1,262.674 1,583.2598 7,740,190
Square 1,635 3,484.153 400.9774 5,696,590
Hexagon 1,633 3,500.747 352.0933 5,716,720

#2.5 - The different types of tessellation have different type of shapes and result that might effect on calculation on analysis, the counties tessellation is the most regulate shapes on the tessellation it gives good enough information rather the rest. The voronoi tessellation is more spread by proportion of the nearby area and it shape in variety, it used more in convex domain with connect distance. - The triangulation tessellation have more on regular triangle shape than voronoi tessellation and if the point is inside the permanent circle of the triangle, it is easy to cause a sharper angle. The square and hexagon tessellation can have less edge effects and offer an alternative to square.

3.1-3.4 - Dams Data

nid2019 <- read_excel("/Users/xingxin/Github/geog176a-summer-2020-lab1/NID2019_U.xlsx") %>%
  filter(!is.na(LONGITUDE)) %>% 
  filter(!is.na(LATITUDE))

dam2019 <- nid2019 %>%
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
  st_transform(5070) %>%
  st_filter(conus)
point_in_polygon = function(points, polygon, id){
    st_join(polygon, points) %>%
    st_drop_geometry() %>%
    dplyr::count(.data[[id]]) %>%
    setNames(c(id, "n")) %>%
    left_join(polygon, by = id) %>%
    st_as_sf()
}
cpip = point_in_polygon(dam2019, conus_ms, "geoid")
vpip = point_in_polygon(dam2019, v_grid, "id")
tpip = point_in_polygon(dam2019, t_grid, "id")
spip = point_in_polygon(dam2019, sq_grid, "id")
hpip = point_in_polygon(dam2019, hex_grid, "id")
plot_tess = function(data, title)
  {ggplot() +
    geom_sf(data = data, aes(fill = log(n)), alpha = .9, size = .2, col = NA) +
    scale_fill_viridis_c() +
    theme_void() +
    theme(legend.position = 'none',
          plot.title = element_text(face = "bold", color = "navy", hjust = .5, size = 24)) +
    labs(title = title,
         caption = paste0(sum(data$n), " Number of Dams "))}

3.5 - Plot Dams Data

#3.6 - According to the lecture note, MAUP is a source of statistical bias that can significantly impact the results of statistical hypothesis tests. Therefore, I will choose the voronoi to moving forward because in these four results, the triangulated tessellation is out of the visualization of seeing the spread of dams in the U.S. and the square and hexagon tessellation is missing some part of the data in the western America. The voronoi tessellation is more match the counties map display and gives a good ratio visualize on the distribution of the dams in the Country.

4.1-4.2 - Choice I selected

## [1] 492

#The steps I followed on Mike instrction to indetify how many dams serve each purpose by splitting the PURPOSES values. The data what I choosing to use are Flood Control, Water Supply, Fire Protection, and Fish and Wildlife for my chosen because of the flood controls is the most important thing for natural disaster and Water Supply is huge hydrology industry provide people water usage, the fire protection is because it can avoid fire and the fish and wildlife is maintain the environment cycle.

#4.3 - In the results, it matches the geographic of the distribution of dams because most of the dams are located nearby the areas of drainage basin, gulfs/bays and the mountain range. It’s good to see all these geographic of the distribution in the US geography. The flood controls are most located in side of the Mississippi River, Water Supply Dam are more located in agricultural area and shortage water area with many population. The Fire Protection are more located in the area have more forest and wood. The Fish and Wildlife area are located more in the Mid-West which is the distribution area of wildlife.