Code
library(here)
library(stringr)
library(ggplot2)
library(ggnewscale)
library(whitebox)
library(ggpubr)
library(ggspatial)
source(str_glue("{here()}/scripts/raster-to-graph-functions.R"))
Varying resolution and cutoff times in centrality analysis
Date | Changes |
---|---|
2024-09-26 | Added an executive summary. |
2024-09-04 | Added quick analysis of timings. |
2024-08-29 | Initial post. |
The key findings are:
This notebook explores the impact of varying the resolution of the hiking network and cutoff times on computation times.
library(here)
library(stringr)
library(ggplot2)
library(ggnewscale)
library(whitebox)
library(ggpubr)
library(ggspatial)
source(str_glue("{here()}/scripts/raster-to-graph-functions.R"))
Most of this code is in the previous notebook just wrapped in a couple of loops.
Setup a bunch of folders and base data sets that are unchanged from run to run.
<- "32m" # 10m also available, but probably not critical
dem_resolution <- -999 # in the cover-costs file. hat-tip to ESRI
impassable_cost <- str_glue("{here()}/_data")
data_folder <- str_glue("{data_folder}/dry-valleys")
base_folder <- str_glue("{base_folder}/common-inputs")
input_folder <- str_glue("{base_folder}/output")
output_folder <- str_glue("dry-valleys")
basename <- "05" # use small one '05' for testing
contiguous_geology
<- str_glue("{data_folder}/dem/{dem_resolution}")
dem_folder <- str_glue("{dem_folder}/{basename}-combined-{dem_resolution}-dem-clipped.tif")
dem_file <- str_glue("{base_folder}/contiguous-geologies/{basename}-extent-{contiguous_geology}.gpkg")
extent_file <- st_read(extent_file)
extent
<- rast(dem_file) |>
terrain crop(extent |> as("SpatVector")) |>
mask(extent |> as("SpatVector"))
<- get_hillshade(terrain) |>
shade as.data.frame(xy = TRUE) # this is for plotting in ggplot
<- ggplot(shade) +
hillshade_basemap geom_raster(data = shade, aes(x = x, y = y, fill = hillshade), alpha = 0.5) +
scale_fill_distiller(palette = "Greys", direction = 1) +
guides(fill = "none") +
annotation_scale(plot_unit = "m", height = unit(0.1, "cm")) +
theme_void()
<- str_glue("{input_folder}/{basename}-cover-costs.gpkg")
landcover_file <- st_read(landcover_file) cover
<- c(100, 200)
resolutions <- c(1:5, -1)
cutoffs
<- list()
results <- 1
index for (resolution in resolutions) {
<- resolution * sqrt(2 / sqrt(3))
hex_cell_spacing # check if we have already made this dataset
<-
hexgrid_file str_glue("{base_folder}/output/{basename}-{contiguous_geology}-{dem_resolution}-hex-grid-{resolution}.gpkg")
if (file.exists(hexgrid_file)) {
<- st_read(hexgrid_file)
xy else {
} <- extent |>
xy st_make_grid(cellsize = hex_cell_spacing, what = "centers", square = FALSE) |>
st_as_sf() |>
rename(geometry = x) |> # shouldsee https://github.com/r-spatial/sf/issues/2429
st_set_crs(st_crs(extent))
<- xy |> st_coordinates()
coords <- xy |>
xy mutate(x = coords[, 1], y = coords[, 2])
|>
xy st_write(str_glue(hexgrid_file), delete_dsn = TRUE)
}<- terrain |> terra::extract(xy |> as("SpatVector"), method = "bilinear")
z <- xy |>
pts mutate(z = z[, 2]) |>
st_join(cover) |>
filter(!is.na(z), cost != impassable_cost)
<-
graph_file str_glue("{base_folder}/output/{basename}-{contiguous_geology}-{dem_resolution}-graph-hex-{resolution}.txt")
if (file.exists(graph_file)) {
<- read_graph(graph_file)
G else {
} <- graph_from_points(pts, hex_cell_spacing * 1.1)
G write_graph(G, graph_file)
}
# note... for now this uses a default Tobler hiking function
<- assign_movement_variables_to_graph(G, xyz = pts, impedances = pts$cost)
G
for (cutoff in cutoffs) {
<- system.time(base_vb <- betweenness(G, cutoff = cutoff))
vb_base_time <- system.time(cost_vb <- betweenness(G, weights = E(G)$cost, cutoff = cutoff))
vb_cost_time <- scales::rescale(base_vb, to = c(1, 100))
base_vb <- scales::rescale(cost_vb, to = c(1, 100))
cost_vb <- (cost_vb - base_vb) / (cost_vb + base_vb)
diff_vb <- cost_vb / base_vb
rel_vb <- log(rel_vb)
log_rel_vb
<- G |>
Gsp set_vertex_attr("base_vb", value = base_vb) |>
set_vertex_attr("cost_vb", value = cost_vb) |>
set_vertex_attr("diff_vb", value = diff_vb) |>
set_vertex_attr("rel_vb", value = rel_vb) |>
set_vertex_attr("log_rel_vb", value = log_rel_vb)
<- vertex.attributes(Gsp) |>
results[[index]] as.data.frame() |>
mutate(resolution = resolution, cutoff = cutoff,
vb_base_time = vb_base_time[3],
vb_cost_time = vb_cost_time[3])
<- index + 1
index
}
}<- bind_rows(results) results
<- results |>
results mutate(resolution = as.factor(resolution),
cutoff = as.factor(cutoff)) |>
mutate(cutoff = if_else(cutoff == -1, "None", cutoff))
+
hillshade_basemap geom_point(data = results |> filter(resolution == 100),
aes(x = x, y = y, colour = cost_vb), size = 0.09) +
scale_colour_viridis_c(option = "A", direction = -1) +
coord_equal() +
facet_wrap( ~ cutoff, nrow = 3, labeller = label_both) +
ggtitle("Hiking network nominal resolution: 100m")
+
hillshade_basemap geom_point(data = results |> filter(resolution == 200),
aes(x = x, y = y, colour = cost_vb), size = 0.2) +
scale_colour_viridis_c(option = "A", direction = -1) +
coord_equal() +
facet_wrap( ~ cutoff, nrow = 3, labeller = label_both) +
ggtitle("Hiking network nominal resolution: 200m")
Export these results for interactive examination in QGIS.
[NOT RUN]
for (c in cutoffs) {
for (r in resolutions) {
results |> filter(cutoff == c, resolution == r) |>
st_as_sf(coords = c("x", "y")) |>
st_set_crs(st_crs(extent)) |>
st_write(
str_glue("{output_folder}/{basename}-{contiguous_geology}-{dem_resolution}-betweenness-{r}-{c}.gpkg"),
delete_dsn = TRUE)
}
}
The timing for this small area of interest at these resolutions and cutoff times are tabulated below.
<- results |>
df select(resolution, vb_base_time, vb_cost_time, cutoff) |>
distinct() |>
pivot_longer(cols = c(vb_base_time, vb_cost_time))
ggplot(df) +
geom_col(aes(x = cutoff, y = value, group = resolution, fill = resolution), position = "dodge") +
xlab("Betweenness calculation est. hiking time cutoff") +
ylab("Time, sec") +
theme_minimal()
This confirms that the time to perform betweenness centrality estimation increases with the square of the number of vertices in the network. The finer resolution network (100m) has four times as many nodes, and analysis takes around 16 times longer (>15 seconds vs ~ 1 second in the cutoff = 5 case).
When no cutoff time is used, the betweenness calculation takes much longer. It is clear from the maps above that the no cutoff case identifies ‘arterials’ although it is unclear that this is the most relevant concern for expected environmental impacts.
Further analysis of larger networks (the networks for the largest contiguous region are about 10 times larger than this one) confirms this result, with comparable analyses taking ~100 times longer, which on the computer used (an M2 Mac Mini) translates to several hours compute time for the most extensive betweenness estimation at 100m resolution.
These results suggest that it is important to settle on a resolution and cutoff distance judged appropriate for ongoing work to avoid costly repetition of sometimes time-consuming analysis. It is almost certainly preferable to apply some cutoff estimated hiking time to the calculation in any case.