library(tidyverse)
library(sf)
library(USAboundaries)
library(rmapshaper)
library(kableExtra)
library(units)
library(plotrix)
library(knitr)
library(ggplot2)
library(readxl)
library(dplyr)
library(gghighlight)
conus = USAboundaries::us_counties() %>%
filter(!state_name %in% c("Puerto Rico", "Alaska", "Hawaii")) %>%
st_transform(5070)
conus_ms <- us_states() %>%
filter(!(state_name %in% c('Puerto Rico', 'Alaska', 'Hawaii'))) %>%
st_as_sf(coords = c("lng","lat"), crs = 5326) %>%
st_transform(5070)
conus_ms = ms_simplify(conus, keep = 0.05)
county_centroid = st_centroid(conus_ms) %>%
st_combine() %>%
st_cast("MULTIPOINT")
county_centroid_u = st_union(county_centroid)
v_grid = st_voronoi(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
t_grid = st_triangulate(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
sq_grid = st_make_grid(county_centroid, n = c(70, 50)) %>%
st_as_sf() %>%
mutate(id = 1:n())
hex_grid = st_make_grid(county_centroid, n = c(70, 50), square = FALSE) %>%
st_as_sf() %>%
mutate(id = 1:n())
total_tess = function(data, title){
area = st_area(data) %>%
units::set_units("km2") %>%
units::drop_units()
data_frame(title, nrow(data), mean(area), sd(area), sum(area))
}
total_tess(conus_ms, "Raw_original")
## # A tibble: 1 x 5
## title `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
## <chr> <int> <dbl> <dbl> <dbl>
## 1 Raw_original 3075 2544. 3428. 7822757.
total_tess(v_grid, "Voronoi")
## # A tibble: 1 x 5
## title `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
## <chr> <int> <dbl> <dbl> <dbl>
## 1 Voronoi 3075 2544. 2894. 7822757.
total_tess(t_grid, "Triangulation")
## # A tibble: 1 x 5
## title `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
## <chr> <int> <dbl> <dbl> <dbl>
## 1 Triangulation 6130 1263. 1583. 7740190.
total_tess(sq_grid, "Square")
## # A tibble: 1 x 5
## title `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
## <chr> <int> <dbl> <dbl> <dbl>
## 1 Square 1635 3484. 401. 5696590.
total_tess(hex_grid, "Hexagon")
## # A tibble: 1 x 5
## title `nrow(data)` `mean(area)` `sd(area)` `sum(area)`
## <chr> <int> <dbl> <dbl> <dbl>
## 1 Hexagon 1633 3501. 352. 5716720.
Name | Numbers | Mean_Area | Sd_Area(km2) | Sum_Area |
---|---|---|---|---|
Counties | 3,075 | 2,543.986 | 3,428.2080 | 7,822,757 |
Voroni | 3,075 | 2,543.986 | 2,894.3919 | 7,822,757 |
Triangulation | 6,130 | 1,262.674 | 1,583.2598 | 7,740,190 |
Square | 1,635 | 3,484.153 | 400.9774 | 5,696,590 |
Hexagon | 1,633 | 3,500.747 | 352.0933 | 5,716,720 |
#2.5 - The different types of tessellation have different type of shapes and result that might effect on calculation on analysis, the counties tessellation is the most regulate shapes on the tessellation it gives good enough information rather the rest. The voronoi tessellation is more spread by proportion of the nearby area and it shape in variety, it used more in convex domain with connect distance. - The triangulation tessellation have more on regular triangle shape than voronoi tessellation and if the point is inside the permanent circle of the triangle, it is easy to cause a sharper angle. The square and hexagon tessellation can have less edge effects and offer an alternative to square.
nid2019 <- read_excel("/Users/xingxin/Github/geog176a-summer-2020-lab1/NID2019_U.xlsx") %>%
filter(!is.na(LONGITUDE)) %>%
filter(!is.na(LATITUDE))
dam2019 <- nid2019 %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
st_transform(5070) %>%
st_filter(conus)
point_in_polygon = function(points, polygon, id){
st_join(polygon, points) %>%
st_drop_geometry() %>%
dplyr::count(.data[[id]]) %>%
setNames(c(id, "n")) %>%
left_join(polygon, by = id) %>%
st_as_sf()
}
cpip = point_in_polygon(dam2019, conus_ms, "geoid")
vpip = point_in_polygon(dam2019, v_grid, "id")
tpip = point_in_polygon(dam2019, t_grid, "id")
spip = point_in_polygon(dam2019, sq_grid, "id")
hpip = point_in_polygon(dam2019, hex_grid, "id")
plot_tess = function(data, title)
{ggplot() +
geom_sf(data = data, aes(fill = log(n)), alpha = .9, size = .2, col = NA) +
scale_fill_viridis_c() +
theme_void() +
theme(legend.position = 'none',
plot.title = element_text(face = "bold", color = "navy", hjust = .5, size = 24)) +
labs(title = title,
caption = paste0(sum(data$n), " Number of Dams "))}
#3.6 - According to the lecture note, MAUP is a source of statistical bias that can significantly impact the results of statistical hypothesis tests. Therefore, I will choose the voronoi to moving forward because in these four results, the triangulated tessellation is out of the visualization of seeing the spread of dams in the U.S. and the square and hexagon tessellation is missing some part of the data in the western America. The voronoi tessellation is more match the counties map display and gives a good ratio visualize on the distribution of the dams in the Country.
## [1] 492
#The steps I followed on Mike instrction to indetify how many dams serve each purpose by splitting the PURPOSES values. The data what I choosing to use are Flood Control, Water Supply, Fire Protection, and Fish and Wildlife for my chosen because of the flood controls is the most important thing for natural disaster and Water Supply is huge hydrology industry provide people water usage, the fire protection is because it can avoid fire and the fish and wildlife is maintain the environment cycle.
#4.3 - In the results, it matches the geographic of the distribution of dams because most of the dams are located nearby the areas of drainage basin, gulfs/bays and the mountain range. It’s good to see all these geographic of the distribution in the US geography. The flood controls are most located in side of the Mississippi River, Water Supply Dam are more located in agricultural area and shortage water area with many population. The Fire Protection are more located in the area have more forest and wood. The Fish and Wildlife area are located more in the Mid-West which is the distribution area of wildlife.