Skip to content

Commit

Permalink
Merge pull request #555 from ropensci/handle_strings
Browse files Browse the repository at this point in the history
handle strings
  • Loading branch information
Robinlovelace authored Mar 14, 2024
2 parents d593b9f + 336c208 commit 42773e7
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 65 deletions.
32 changes: 29 additions & 3 deletions R/rnet_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,20 +224,36 @@ line_cast <- function(x) {
#' # rnet_y = sf::read_sf("rnet_y_ed.geojson")
#' # rnet_merged = rnet_merge(rnet_x, rnet_y, dist = 9, segment_length = 20, funs = funs)
#' @return An sf object with the same geometry as `rnet_x`
#'


rnet_merge <- function(rnet_x, rnet_y, dist = 5, funs = NULL, sum_flows = TRUE, crs = geo_select_aeq(rnet_x), ...) {

# handle_strings = function(strings) {
# unique_strings = unique(strings)
# paste(unique_strings, collapse = "; ")
# }

if (is.null(funs)) {
print("funs is NULL")
funs <- list()
for (col in names(rnet_y)) {
if (is.numeric(rnet_y[[col]])) {
funs[[col]] <- sum
if (col == "geometry") {
next # Skip the current iteration
} else if (is.numeric(rnet_y[[col]])) {
funs[[col]] = sum
} else if (is.character(rnet_y[[col]])) {
funs[[col]] = handle_strings
} else if (col %in% c("gradient", "quietness")) {
funs[[col]] = mean
}
}
}

sum_cols <- sapply(funs, function(f) identical(f, sum))
sum_cols <- names(funs)[which(sum_cols)]
rnetj <- rnet_join(rnet_x, rnet_y, dist = dist, crs = crs, ...)
names(rnetj)

rnetj_df <- sf::st_drop_geometry(rnetj)
# Apply functions to columns with lapply:
res_list <- lapply(seq_along(funs), function(i) {
Expand Down Expand Up @@ -277,3 +293,13 @@ rnet_merge <- function(rnet_x, rnet_y, dist = 5, funs = NULL, sum_flows = TRUE,
}
res_sf
}

handle_strings <- function(strings) {
# Calculate the frequency of each unique string
string_freq <- table(strings)

# Find the string(s) with the highest frequency
most_frequent_string <- names(which.max(string_freq))

return(most_frequent_string)
}
2 changes: 1 addition & 1 deletion man/rnet_group.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

82 changes: 21 additions & 61 deletions vignettes/merging-route-networks.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ knitr::opts_chunk$set(
# # Uncomment to speed-up build
eval = FALSE,
comment = "#>",
echo = FALSE,
echo = TRUE,
message = FALSE,
warning = FALSE
)
Expand All @@ -22,7 +22,8 @@ sf::sf_use_s2(FALSE)
```

```{r setup}
library(stplanr)
# library(stplanr)
devtools::load_all()
library(dplyr)
library(tmap)
library(ggplot2)
Expand Down Expand Up @@ -88,6 +89,7 @@ system.time({
Let's check the results:

```{r}
names(rnet_merged)
summary(rnet_merged$value)
summary(rnet_y$value)
sum(rnet_merged$value * sf::st_length(rnet_merged), na.rm = TRUE)
Expand Down Expand Up @@ -141,6 +143,23 @@ sum(rnet_merged$value * sf::st_length(rnet_merged), na.rm = TRUE)
sum(rnet_y$value * sf::st_length(rnet_y), na.rm = TRUE)
```

It also works with charaster strings:

```{r}
rnet_y$char = paste0("road", sample(1:3, nrow(rnet_y), replace = TRUE))
most_common = function(x) {
ux = unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
funs = list(char = most_common)
system.time({
rnet_merged = rnet_merge(rnet_x, rnet_y, dist = 10, segment_length = 20, funs = funs)
})
plot(rnet_y["char"])
plot(rnet_merged["char"])
```

Now let's testing on 3km dataset

```{r}
Expand Down Expand Up @@ -188,62 +207,3 @@ summary(exmaple_3km$all_fastest_bicycle)
sum(exmaple_3km$all_fastest_bicycle * sf::st_length(exmaple_3km), na.rm = TRUE)
sum(rnet_y$all_fastest_bicycle * sf::st_length(rnet_y), na.rm = TRUE)
```

Now let's testing on large dataset

```{r}
rnet_x = sf::read_sf("https://github.com/nptscot/networkmerge/releases/download/v0.1/OS_large_route_network_example_edingurgh.geojson")
rnet_y = sf::read_sf("https://github.com/nptscot/networkmerge/releases/download/v0.1/large_route_network_example_edingurgh.geojson")
```

Read columns from rnet_y to assign functions to them
```{r}
# Extract column names from the rnet_x data frame
name_list <- names(rnet_y)
name_list
# Initialize an empty list
funs <- list()
# Loop through each name and assign it a function based on specific conditions
for (name in name_list) {
if (name == "geometry") {
next # Skip the current iteration
} else if (name %in% c("Gradient", "Quietness")) {
funs[[name]] <- mean
} else {
funs[[name]] <- sum
}
}
```

```{r, eval = FALSE}
# Take 0.1% sample:
# ...
# Buffer of the 0.1% sample
# ...
# Select OS road data that intersects
# rnet_x = rnet_x[osm_buffer, ] # or similar
brks = c(0, 100, 500, 1000, 5000,10000)
rnet_merged = rnet_merge(rnet_x, rnet_y, dist = 20, segment_length = 10, funs = funs, max_angle_diff = 20, crs = "EPSG:27700")
st_write(rnet_merged, "data-raw/large_exmaple_merged.geojson", driver = "GeoJSON")
rnet_merged <- st_make_valid(rnet_merged)
m1 = tm_shape(rnet_y) + tm_lines("all_fastest_bicycle", palette = "viridis", lwd = 5, breaks = brks)
m2 = tm_shape(rnet_merged) + tm_lines("all_fastest_bicycle", palette = "viridis", lwd = 5, breaks = brks)
tmap_arrange(m1, m2, sync = TRUE, nrow = 1)
dim(rnet_merged)
st_write(rnet_merged, "data-raw/large_exmaple_merged.geojson", driver = "GeoJSON")
```

Read large_exmaple_merged from github
```{r}
large_exmaple_merged = sf::read_sf("https://github.com/nptscot/networkmerge/releases/download/v0.1/large_exmaple_merged.geojson")
summary(rnet_y$all_fastest_bicycle)
summary(large_exmaple_merged$all_fastest_bicycle)
sum(large_exmaple_merged$all_fastest_bicycle * sf::st_length(large_exmaple_merged), na.rm = TRUE)
sum(rnet_y$all_fastest_bicycle * sf::st_length(rnet_y), na.rm = TRUE)
plot(large_exmaple_merged)
```

0 comments on commit 42773e7

Please sign in to comment.