Code
library(here)
library(stringr)
library(ggplot2)
library(ggnewscale)
library(ggspatial)
library(hrbrthemes)
library(spatstat)
library(purrr)
library(igraph)
source(str_glue("{here()}/scripts/raster-to-graph-functions.R"))
Using shortest paths and minimum spanning trees
Date | Changes |
---|---|
2024-09-19 | Detailed the commentary. |
2024-09-16 | Initial post. |
This notebook explores developing the betweenness centrality networks/risk maps based on a known set of most often used origins and destinations. The idea would be to supply a list of locations with associated weightings, where for example base camp might be (say) 25, some once-visited sampling locations might be ‘1’, repeat-visit experimental sites might be 5. Betweenness is then based on all shortest paths among these sites weighted by those numbers.
As an extension it might be possible to use outputs from this to propose a low impact network of routes to use for an expedition.
library(here)
library(stringr)
library(ggplot2)
library(ggnewscale)
library(ggspatial)
library(hrbrthemes)
library(spatstat)
library(purrr)
library(igraph)
source(str_glue("{here()}/scripts/raster-to-graph-functions.R"))
This is similar to many previous notebooks.
<- "32m" # 10m also available, but probably not critical
dem_resolution <- "ice"
impassable_geology <- str_glue("{here()}/_data")
data_folder <- str_glue("{data_folder}/dry-valleys")
base_folder <- str_glue("{base_folder}/common-inputs")
input_folder <- str_glue("{base_folder}/output")
output_folder <- str_glue("dry-valleys")
basename <- "05" # use small one '05' for testing
contiguous_geology
<- str_glue("{data_folder}/dem/{dem_resolution}")
dem_folder <- str_glue("{dem_folder}/{basename}-combined-{dem_resolution}-dem-clipped.tif")
dem_file <- str_glue("{base_folder}/contiguous-geologies/{basename}-extent-{contiguous_geology}.gpkg")
extent_file <- st_read(extent_file)
extent
<- rast(dem_file) |>
terrain crop(extent |> as("SpatVector")) |>
mask(extent |> as("SpatVector"))
<- get_hillshade(terrain) |>
shade as.data.frame(xy = TRUE) # this is for plotting in ggplot
<- ggplot(shade) +
hillshade_basemap geom_raster(data = shade, aes(x = x, y = y, fill = hillshade), alpha = 0.5) +
scale_fill_distiller(palette = "Greys", direction = 1) +
guides(fill = "none") +
annotation_scale(plot_unit = "m", height = unit(0.1, "cm")) +
theme_void()
<- str_glue("{data_folder}/ata-scar-geomap-geology-v2022-08-clipped.gpkg") geologies_file
Also similar is building or retrieving the graph representation of the terrain.
<- 150
resolution
<- resolution * sqrt(2 / sqrt(3))
hex_cell_spacing # check if we have already made this dataset
<- str_glue("{base_folder}/output/{basename}-{contiguous_geology}-{dem_resolution}-hex-grid-{resolution}.gpkg")
hexgrid_file if (file.exists(hexgrid_file)) {
<- st_read(hexgrid_file)
xy else {
} <- extent |>
xy st_make_grid(cellsize = hex_cell_spacing, what = "centers", square = FALSE) |>
st_as_sf() |>
rename(geometry = x) |>
st_set_crs(st_crs(extent))
<- xy |> st_coordinates()
coords <- xy |>
xy mutate(x = coords[, 1], y = coords[, 2])
|>
xy st_write(str_glue(hexgrid_file), delete_dsn = TRUE)
}
<- terrain |>
z ::extract(xy |> as("SpatVector"), method = "bilinear")
terra
<- xy |>
pts mutate(z = z[, 2])
# remove any NAs that might result from interpolation or impassable terrain
# NAs tend to occur around the edge of study area
<- st_read(geologies_file) |>
cover ::select(POLYGTYPE) |>
dplyrrename(terrain = POLYGTYPE)
<- pts |>
pts st_join(cover) |>
filter(!is.na(z), terrain != impassable_geology)
<-
graph_file str_glue("{base_folder}/output/{basename}-{contiguous_geology}-{dem_resolution}-graph-hex-{resolution}.txt")
if (file.exists(graph_file)) {
<- read_graph(graph_file)
G else {
} <- graph_from_points(pts, hex_cell_spacing * 1.1)
G write_graph(G, graph_file)
}
<- assign_movement_variables_to_graph_2(G, xyz = pts, terrain = pts$terrain)
G V(G)$id <- 1:length(G)
<- 0.5 * resolution / 100
cost_cutoff <- delete_edges(G, E(G)[which(E(G)$cost > cost_cutoff)]) |>
G largest_component(mode = "strong")
<- V(G)$id
remaining_nodes <- pts |> slice(remaining_nodes) pts
Use spatstat
to make a set of random locations that are at least somewhat spaced apart.
<- rSSI(2000, n = 20, win = as.owin(extent)) |>
origin_pts st_as_sf() |>
st_set_crs(st_crs(extent)) |>
slice(-1) |>
::select(-label)
dplyr# get the sequence numbers of thes in the pts data
<- origin_pts |>
origin_is st_nearest_feature(pts)
# make up a point dataset of these origins with randomly assigned importance
<- pts |>
origin_vs mutate(id = row_number()) |>
slice(origin_is) |>
mutate(importance = sample(1:25, n(), replace = TRUE, prob = 25:1))
While doing so, we accumulate how often they appear on shortest paths.
There are a couple of key issues to note here:
weights
parameter referenced if using all all_shortest_paths
MUST NOT be equal across all edges. If it is, then given the uniformity of the underlying lattice there is likely to be a combinatorial explosion as all the many equal shortest paths between every pair of sites are calculated.<- data.frame(v = V(G) |>
result as.vector() |>
as.character(),
betweenness = 0)
for (i in 1:nrow(origin_vs)) {
for (j in 1:nrow(origin_vs)) {
if (i != j) {
<- all_shortest_paths(G, origin_vs$id[i], origin_vs$id[j],
ASP weights = edge_attr(G, "cost"))
<- length(ASP$vpaths)
n_shortest_paths <- result |>
result left_join(ASP$vpaths |>
lapply(as.vector) |>
reduce(c) |>
table() |>
as.data.frame() |>
mutate(v = as.character(Var1)) |>
::select(-Var1), by = "v") |>
dplyrmutate(Freq = replace_na(Freq, 0)) |>
mutate(betweenness =
+ Freq / n_shortest_paths *
betweenness $importance[i]) |>
origin_vs::select(-Freq)
dplyr
}
}
}<- G |> set_vertex_attr("sp_betweenness", value = result$betweenness) G
igraph
makes this choice). Note that unless two paths are exactly the same total cost then the much slower unused version of the code will still, in effect, pick the shorter path, when in fact ‘on the ground’ either might be just as likely to be selected. First up, we use it calculate all sites to all sites shortest paths and overlay those.# put all vertices on a shortest path to begin
<- V(G) |>
vs_on_shortest_paths as.vector()
# make a data frame we can join the results into
for (i in seq_along(origin_vs$id)) {
# one entry for each appearance on a shortest path
<- shortest_paths(G, origin_vs$id[i], origin_vs$id[-i], weights = edge_attr(G, "cost"))
SP <- SP$vpath |>
new_shortest_paths lapply(as.vector) |>
reduce(c) |>
rep(origin_vs$importance[i])
<- vs_on_shortest_paths |>
vs_on_shortest_paths c(new_shortest_paths)
}<- vs_on_shortest_paths |>
shortest_path_counts table() |>
as.vector()
<- set_vertex_attr(G, "sp_betweenness", value = shortest_path_counts - 1) G
And map it
<- G |> igraph::as_data_frame(what = "vertices")
spb +
hillshade_basemap geom_point(data = spb |> filter(sp_betweenness > 0),
aes(x = x, y = y, colour = sp_betweenness),size = 0.25) +
scale_colour_viridis_c(option = "A", direction = -1) +
geom_sf(data = origin_vs, aes(size = importance), colour = "black", pch = 1) +
theme_ipsum_rc()
Here, we calculate the distance matrix and determine the greatest minimum distance to a near site across all sites, then construct a network consisting only of connections shorter than this.
<- distances(G, origin_vs$id, origin_vs$id, weights = edge_attr(G, "cost"))
dist_m diag(dist_m) <- max(dist_m)
<- dist_m |> apply(MARGIN = 1, FUN = min) |> max()
row_max_min <- dist_m |> apply(MARGIN = 2, FUN = min) |> max()
col_max_min <- max(row_max_min, col_max_min)
cutoff
# hence the connections to be made are given by
<- which(dist_m <= cutoff, arr.ind = TRUE)
which_ijs # and proceed as before
<- V(G) |>
vs_on_shortest_paths as.vector()
# make a data frame we can join the results into
for (k in 1:nrow(which_ijs)) {
<- which_ijs[k, 1]
i <- which_ijs[k, 2]
j # one entry for each appearance on a shortest path
<- shortest_paths(G, origin_vs$id[i], origin_vs$id[j], weights = edge_attr(G, "cost"))
SP <- SP$vpath[[1]] |>
new_shortest_paths as.vector() |>
rep(origin_vs$importance[i])
<- vs_on_shortest_paths |>
vs_on_shortest_paths c(new_shortest_paths)
}<- vs_on_shortest_paths |>
shortest_path_counts table() |>
as.vector()
<- set_vertex_attr(G, "sp_betweenness", value = shortest_path_counts - 1) G
And map it
<- G |> igraph::as_data_frame(what = "vertices")
spb +
hillshade_basemap geom_point(data = spb |> filter(sp_betweenness > 0),
aes(x = x, y = y, colour = sp_betweenness),size = 0.25) +
scale_colour_viridis_c(option = "A", direction = -1) +
geom_sf(data = origin_vs, aes(size = importance), colour = "black", pch = 1) +
theme_ipsum_rc()
We can use a minimum spanning tree to make a network instead… with one reservation: the minimum spanning tree of a directed graph is not found using algorithms for MSTs on undirected graphs, such as that offered by igraph::mst
.
The directed graph equivalent to an MST is called a minimum-cost arborescence or optimal branching.1 The networkx
package in python supports the required algorithms, but igraph
does not.
Since the asymmetry in our networks is not extreme, we can approximate a solution in igraph
in this case by round tripping the network via an undirected network and back to a directed one.
First the incorrect arborescence…
<- vertex_attr(G, "x", V(G)[origin_vs$id])
xs <- vertex_attr(G, "y", V(G)[origin_vs$id])
ys <- dist_m |>
approx_min_arborescence graph_from_adjacency_matrix(mode = "directed", weighted = TRUE) |>
set_vertex_attr("x", value = xs) |>
set_vertex_attr("y", value = ys) |>
mst() # this applies algorithms appropriate to an undirected graph
plot(approx_min_arborescence,
vertex.size = 20, edge.arrow.size = .35,
xlim = range(xs), ylim = range(ys), rescale = FALSE, asp = 1)
Now round-trip it via an undirected graph. For some reason, I can’t get igraph::as.undirected()
to do this, so use matrix representation.
<- approx_min_arborescence |> as_adjacency_matrix(sparse = FALSE)
dir_m <- ((dir_m + t(dir_m)) > 0)
undir_m <- which(undir_m, arr.ind = TRUE)
which_ijs
# put all vertices on a shortest path to begin
<- V(G) |>
vs_on_shortest_paths as.vector()
# make a data frame we can join the results into
for (k in 1:nrow(which_ijs)) {
<- which_ijs[k, 1]
i <- which_ijs[k, 2]
j # one entry for each appearance on a shortest path
<- shortest_paths(G, origin_vs$id[i], origin_vs$id[j], weights = edge_attr(G, "cost"))
SP <- SP$vpath[[1]] |>
new_shortest_paths as.vector() |>
rep(origin_vs$importance[i])
<- vs_on_shortest_paths |>
vs_on_shortest_paths c(new_shortest_paths)
}<- vs_on_shortest_paths |>
shortest_path_counts table() |>
as.vector()
<- set_vertex_attr(G, "sp_betweenness", value = shortest_path_counts - 1) G
And once more… map it
<- G |> igraph::as_data_frame(what = "vertices")
spb +
hillshade_basemap geom_point(data = spb |> filter(sp_betweenness > 0),
aes(x = x, y = y, colour = sp_betweenness),size = 0.25) +
scale_colour_viridis_c(option = "A", direction = -1) +
geom_sf(data = origin_vs, aes(size = importance), colour = "black", pch = 1) +
theme_ipsum_rc()
It is interesting to note the total cost of this network and the cost of its longest connection between sites:
<- which_ijs |>
edges as.data.frame() |>
mutate(from = origin_vs$id[row], to = origin_vs$id[col]) |>
mutate(cost = 0) |>
::select(-row, -col)
dplyr<- data.frame(
vertices id = origin_vs$id,
x = vertex_attr(G, "x", origin_vs$id),
y = vertex_attr(G, "y", origin_vs$id))
for (k in 1:nrow(edges)) {
<- edges$from[k]
i <- edges$to[k]
j = distances(G, i, j, weights = edge_attr(G, "cost"))
costs $cost[k] <- costs |> as.vector()
edges
}print(str_glue("Total cost: {sum(edges$cost)}\nHighest cost: {max(edges$cost)}"))
Total cost: 53.1451751235408
Highest cost: 3.06211977831383
See this and Korte B and J Vygen. 2018. Spanning Trees and Arborescences. In Combinatorial Optimization: Theory and Algorithms, ed. B Korte and J Vygen, 133–157. Berlin, Heidelberg: Springer. doi:10.1007/978-3-662-56039-6_6.↩︎