From 57025de30f96dce617fda7c3b5e09ea71b4ebf1a Mon Sep 17 00:00:00 2001 From: Malcolm Morgan Date: Tue, 15 Aug 2023 14:29:23 +0100 Subject: [PATCH 1/7] Switch parts of overline to data.table --- R/overline.R | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/R/overline.R b/R/overline.R index 8224c4ad..08e45f82 100644 --- a/R/overline.R +++ b/R/overline.R @@ -232,12 +232,8 @@ overline2 <- sl <- cbind(c3, sl) rm(c3) - # browser() - # if(requireNamespace("data.table", quietly = TRUE)) { - # sl = data.table::data.table(sl) - # } - slg <- dplyr::group_by_at(sl, c("1", "2", "3", "4")) - sls <- dplyr::ungroup(dplyr::summarise_all(slg, .funs = fun)) + sls <- dplyr::group_by_at(sl, c("1", "2", "3", "4")) + sls <- dplyr::ungroup(dplyr::summarise_all(sls, .funs = fun)) attrib <- names(sls)[5:ncol(sls)] coords <- as.matrix(sls[, 1:4]) sl <- sls[, -c(1:4)] @@ -300,13 +296,11 @@ overline2 <- }) overlined_simple <- if (requireNamespace("pbapply", quietly = TRUE)) { pbapply::pblapply(sl, function(y) { - y <- dplyr::group_by_at(y, attrib) - y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop") + ol_grp(y, attrib) }, cl = cl) } else { lapply(sl, function(y) { - y <- dplyr::group_by_at(y, attrib) - y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop") + ol_grp(y, attrib) }) } @@ -315,13 +309,11 @@ overline2 <- } else { overlined_simple <- if (requireNamespace("pbapply", quietly = TRUE)) { pbapply::pblapply(sl, function(y) { - y <- dplyr::group_by_at(y, attrib) - y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop") + ol_grp(y, attrib) }) } else { lapply(sl, function(y) { - y <- dplyr::group_by_at(y, attrib) - y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop") + ol_grp(y, attrib) }) } } @@ -333,8 +325,8 @@ overline2 <- if (!quiet) { message(paste0(Sys.time(), " aggregating flows")) } - overlined_simple <- dplyr::group_by_at(sl, attrib) - overlined_simple <- dplyr::summarise(overlined_simple, do_union = FALSE, .groups = "drop") + + overlined_simple <- ol_grp(sl, attrib) rm(sl) } @@ -363,6 +355,11 @@ overline2 <- #' @export overline.sf <- overline2 +ol_grp <- function(sl, attrib){ + sl <- data.table::data.table(sl) + sl <- sl[, .(geometry = st_combine(geometry)), by = attrib] + sf::st_as_sf(sl) +} #' Aggregate flows so they become non-directional (by geometry - the slow way) #' From 50275c4c6cddc576e1416e1a91e66b9935768745 Mon Sep 17 00:00:00 2001 From: Malcolm Morgan Date: Tue, 15 Aug 2023 14:53:34 +0100 Subject: [PATCH 2/7] ad missing sf:: --- R/overline.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/overline.R b/R/overline.R index 08e45f82..aa62893f 100644 --- a/R/overline.R +++ b/R/overline.R @@ -357,7 +357,7 @@ overline.sf <- overline2 ol_grp <- function(sl, attrib){ sl <- data.table::data.table(sl) - sl <- sl[, .(geometry = st_combine(geometry)), by = attrib] + sl <- sl[, .(geometry = sf::st_combine(geometry)), by = attrib] sf::st_as_sf(sl) } From 5e56ee15b5ef5a4d463e3b42d036c1a904b318db Mon Sep 17 00:00:00 2001 From: Malcolm Morgan Date: Wed, 16 Aug 2023 16:22:09 +0100 Subject: [PATCH 3/7] Fix multicore support --- R/overline.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/overline.R b/R/overline.R index aa62893f..7ce2fd9b 100644 --- a/R/overline.R +++ b/R/overline.R @@ -287,7 +287,7 @@ overline2 <- cl <- parallel::makeCluster(ncores) parallel::clusterExport( cl = cl, - varlist = c("attrib"), + varlist = c("attrib","ol_grp"), envir = environment() ) parallel::clusterEvalQ(cl, { From e19390b7405b060f80466da698b247b649288732 Mon Sep 17 00:00:00 2001 From: Malcolm Morgan Date: Wed, 16 Aug 2023 16:27:19 +0100 Subject: [PATCH 4/7] Bring in #510 --- R/overline.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/overline.R b/R/overline.R index 7ce2fd9b..47271f40 100644 --- a/R/overline.R +++ b/R/overline.R @@ -263,6 +263,10 @@ overline2 <- } if (nrow(sl) > regionalise) { message(paste0("large data detected, using regionalisation, nrow = ", nrow(sl))) + + # Fix for https://github.com/ropensci/stplanr/issues/510 + sl <- sl[st_is_valid(sl),] + suppressWarnings(cents <- sf::st_centroid(sl)) # Fix for https://github.com/r-spatial/sf/issues/1777 if(sf::st_is_longlat(cents)){ From 9c9292d9561c9df1fa34595612fbe06ace1bf155 Mon Sep 17 00:00:00 2001 From: Malcolm Morgan Date: Wed, 16 Aug 2023 16:28:40 +0100 Subject: [PATCH 5/7] Drop regionalisation value --- R/overline.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/overline.R b/R/overline.R index 47271f40..91a8ad8d 100644 --- a/R/overline.R +++ b/R/overline.R @@ -166,7 +166,7 @@ overline2 <- attrib, ncores = 1, simplify = TRUE, - regionalise = 1e9, + regionalise = 1e7, quiet = ifelse(nrow(sl) < 1000, TRUE, FALSE), fun = sum) { if(as.character(unique(sf::st_geometry_type(sl))) == "MULTILINESTRING") { From afb9c74aaa6523b141dc24aa3895653678d45980 Mon Sep 17 00:00:00 2001 From: Malcolm Morgan Date: Wed, 16 Aug 2023 17:13:31 +0100 Subject: [PATCH 6/7] update docs --- DESCRIPTION | 2 +- man/overline.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e746eb75..f874f87c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,5 +70,5 @@ VignetteBuilder: Encoding: UTF-8 LazyData: yes Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 SystemRequirements: GNU make diff --git a/man/overline.Rd b/man/overline.Rd index 3bd286f1..47605a49 100644 --- a/man/overline.Rd +++ b/man/overline.Rd @@ -20,7 +20,7 @@ overline2( attrib, ncores = 1, simplify = TRUE, - regionalise = 1e+09, + regionalise = 1e+07, quiet = ifelse(nrow(sl) < 1000, TRUE, FALSE), fun = sum ) From 519891ea7383542ff9a1273b17113e2bd79dd36a Mon Sep 17 00:00:00 2001 From: robinlovelace Date: Thu, 17 Aug 2023 09:49:51 +0100 Subject: [PATCH 7/7] Add tests results --- .../ad-hoc-tests/test-overline-performance.R | 0 .../test-overline-performance.Rmd | 101 +++++++ .../ad-hoc-tests/test-overline-performance.md | 276 ++++++++++++++++++ 3 files changed, 377 insertions(+) create mode 100644 data-raw/ad-hoc-tests/test-overline-performance.R create mode 100644 data-raw/ad-hoc-tests/test-overline-performance.Rmd create mode 100644 data-raw/ad-hoc-tests/test-overline-performance.md diff --git a/data-raw/ad-hoc-tests/test-overline-performance.R b/data-raw/ad-hoc-tests/test-overline-performance.R new file mode 100644 index 00000000..e69de29b diff --git a/data-raw/ad-hoc-tests/test-overline-performance.Rmd b/data-raw/ad-hoc-tests/test-overline-performance.Rmd new file mode 100644 index 00000000..2de6ac0e --- /dev/null +++ b/data-raw/ad-hoc-tests/test-overline-performance.Rmd @@ -0,0 +1,101 @@ +--- +output: github_document +--- + +```{r} +devtools::load_all() +library(tidyverse) +``` + + +```{r} +if(!file.exists("routes.geojson")) { + routes = pct::get_pct_routes_fast("isle-of-wight") + routes = routes %>% + slice(1:1000) + sf::write_sf(routes, "routes.geojson", delete_dsn = TRUE) +} + +routes = geojsonsf::geojson_sf("routes.geojson") +nrow(routes) + +``` + +```{r} +res1 = overline_old(routes, attrib = "foot") +``` + +```{r} +res2 = overline3(routes, attrib = "foot") +``` + +```{r} +summary(res2) +summary(res2) +``` + + +# Small test + +```{r} +res = bench::mark(time_unit = "s", check = FALSE, + original = {o1 <<- overline_old(routes, attrib = "foot")}, + new = {o2 <<- overline3(routes, attrib = "foot")} +) +``` + + +The results are as follows: + +```{r} +res |> + dplyr::select(expression, median, mem_alloc) |> + mutate(routes_per_second = nrow(routes) / median) |> + knitr::kable() +``` + +# Large test + +```{r} +routes = readRDS("/tmp/uptake_commute_fastest.Rds") +r = routes |> + slice(seq(10000)) +names(r) +system.time({ +rnet = overline_old(r, + attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"), + fun = list(sum = sum, max = first), + ncores = 1, + regionalise = 1e9) +}) + +system.time({ +rnet = overline3(r, + attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"), + fun = list(sum = sum, max = first), + ncores = 1, + regionalise = 1e9) +}) +bench::mark(check = FALSE, iterations = 1, + old = {res1 <<- overline_old(r, + attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"), + fun = list(sum = sum, max = first), + ncores = 1, + regionalise = 1e9) + }, +new = {res2 <<- overline3(r, + attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"), + fun = list(sum = sum, max = first), + ncores = 1, + regionalise = 1e9) +} + ) +``` + +```{r} +summary(res1) +summary(res2) + +``` + + diff --git a/data-raw/ad-hoc-tests/test-overline-performance.md b/data-raw/ad-hoc-tests/test-overline-performance.md new file mode 100644 index 00000000..07ae3d50 --- /dev/null +++ b/data-raw/ad-hoc-tests/test-overline-performance.md @@ -0,0 +1,276 @@ + +``` r +devtools::load_all() +``` + + ## ℹ Loading stplanr + + ## Warning: Objects listed as exports, but not present in namespace: + ## • overline2 + +``` r +library(tidyverse) +``` + + ## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── + ## ✔ dplyr 1.1.2 ✔ readr 2.1.4 + ## ✔ forcats 1.0.0 ✔ stringr 1.5.0 + ## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1 + ## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0 + ## ✔ purrr 1.0.2 + ## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── + ## ✖ readr::edition_get() masks testthat::edition_get() + ## ✖ dplyr::filter() masks stats::filter() + ## ✖ purrr::is_null() masks testthat::is_null() + ## ✖ dplyr::lag() masks stats::lag() + ## ✖ readr::local_edition() masks testthat::local_edition() + ## ✖ dplyr::matches() masks tidyr::matches(), testthat::matches() + ## ✖ readr::parse_date() masks stplanr::parse_date() + ## ℹ Use the conflicted package () to force all conflicts to become errors + +``` r +if(!file.exists("routes.geojson")) { + routes = pct::get_pct_routes_fast("isle-of-wight") + routes = routes %>% + slice(1:1000) + sf::write_sf(routes, "routes.geojson", delete_dsn = TRUE) +} + +routes = geojsonsf::geojson_sf("routes.geojson") +nrow(routes) +``` + + ## [1] 1000 + +``` r +res1 = overline_old(routes, attrib = "foot") +``` + + ## 2023-08-17 09:40:59.482411 constructing segments + + ## 2023-08-17 09:41:00.722783 building geometry + + ## 2023-08-17 09:41:01.004696 simplifying geometry + + ## 2023-08-17 09:41:01.005065 aggregating flows + + ## 2023-08-17 09:41:01.237217 rejoining segments into linestrings + +``` r +res2 = overline3(routes, attrib = "foot") +``` + + ## 2023-08-17 09:41:01.377522 constructing segments + + ## 2023-08-17 09:41:02.752467 building geometry + + ## 2023-08-17 09:41:03.045405 simplifying geometry + + ## 2023-08-17 09:41:03.045752 aggregating flows + + ## 2023-08-17 09:41:03.28658 rejoining segments into linestrings + +# Small test + +``` r +res = bench::mark(time_unit = "s", check = FALSE, + original = {overline_old(routes, attrib = "foot")}, + new = {overline3(routes, attrib = "foot")} +) +``` + + ## 2023-08-17 09:41:03.547956 constructing segments + + ## 2023-08-17 09:41:04.903392 building geometry + + ## 2023-08-17 09:41:05.311159 simplifying geometry + + ## 2023-08-17 09:41:05.311571 aggregating flows + + ## 2023-08-17 09:41:05.578866 rejoining segments into linestrings + + ## 2023-08-17 09:41:10.37143 constructing segments + + ## 2023-08-17 09:41:11.527714 building geometry + + ## 2023-08-17 09:41:11.844704 simplifying geometry + + ## 2023-08-17 09:41:11.845035 aggregating flows + + ## 2023-08-17 09:41:12.235894 rejoining segments into linestrings + + ## 2023-08-17 09:41:15.413275 constructing segments + + ## 2023-08-17 09:41:16.371987 building geometry + + ## 2023-08-17 09:41:16.663718 simplifying geometry + + ## 2023-08-17 09:41:16.66405 aggregating flows + + ## 2023-08-17 09:41:16.876137 rejoining segments into linestrings + + ## 2023-08-17 09:41:17.162153 constructing segments + + ## 2023-08-17 09:41:18.370308 building geometry + + ## 2023-08-17 09:41:18.723326 simplifying geometry + + ## 2023-08-17 09:41:18.723716 aggregating flows + + ## 2023-08-17 09:41:19.020335 rejoining segments into linestrings + + ## Warning: Some expressions had a GC in every iteration; so filtering is + ## disabled. + +The results are as follows: + +``` r +res |> + dplyr::select(expression, median, mem_alloc) |> + mutate(routes_per_second = nrow(routes) / median) |> + knitr::kable() +``` + +| expression | median | mem_alloc | routes_per_second | +|:-----------|---------:|----------:|------------------:| +| original | 1.561284 | 213MB | 640.4983 | +| new | 2.189923 | 192MB | 456.6370 | + +# Large test + +``` r +routes = readRDS("/tmp/uptake_commute_fastest.Rds") +r = routes |> + slice(seq(10000)) +names(r) +``` + + ## [1] "route_number" "name" + ## [3] "provisionName" "distances" + ## [5] "time" "quietness" + ## [7] "gradient_smooth" "geo_code1" + ## [9] "geo_code2" "car" + ## [11] "taxi" "foot" + ## [13] "bicycle" "public_transport" + ## [15] "all" "dist_euclidean" + ## [17] "dist_euclidean_jittered" "route_id" + ## [19] "splittingID" "geometry" + ## [21] "route_hilliness" "length_route" + ## [23] "pcycle_go_dutch" "pcycle_ebike" + ## [25] "bicycle_go_dutch" "bicycle_ebike" + ## [27] "mode_ratio_go_dutch" "mode_ratio_ebike" + ## [29] "car_go_dutch" "public_transport_go_dutch" + ## [31] "foot_go_dutch" "car_ebike" + ## [33] "public_transport_ebike" "foot_ebike" + +``` r +system.time({ +rnet = overline_old(r, + attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"), + fun = list(sum = sum, max = first), + ncores = 1, + regionalise = 1e9) +}) +``` + + ## 2023-08-17 09:42:23.777888 constructing segments + + ## 2023-08-17 09:42:31.65472 building geometry + + ## 2023-08-17 09:42:32.809003 simplifying geometry + + ## 2023-08-17 09:42:32.809407 aggregating flows + + ## 2023-08-17 09:42:36.679727 rejoining segments into linestrings + + ## user system elapsed + ## 12.193 1.239 13.469 + +``` r +system.time({ +rnet = overline3(r, + attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"), + fun = list(sum = sum, max = first), + ncores = 1, + regionalise = 1e9) +}) +``` + + ## 2023-08-17 09:42:39.289945 constructing segments + + ## 2023-08-17 09:42:46.551332 building geometry + + ## 2023-08-17 09:42:47.787907 simplifying geometry + + ## 2023-08-17 09:42:47.788377 aggregating flows + + ## 2023-08-17 09:42:49.824217 rejoining segments into linestrings + + ## user system elapsed + ## 11.511 0.044 11.107 + +``` r +bench::mark(check = FALSE, iterations = 1, + old = {res1 <<- overline_old(r, + attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"), + fun = list(sum = sum, max = first), + ncores = 1, + regionalise = 1e9) + }, +new = {res2 <<- overline3(r, + attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"), + fun = list(sum = sum, max = first), + ncores = 1, + regionalise = 1e9) +} + ) +``` + + ## 2023-08-17 09:42:50.545904 constructing segments + + ## 2023-08-17 09:42:57.975437 building geometry + + ## 2023-08-17 09:42:58.99299 simplifying geometry + + ## 2023-08-17 09:42:58.993323 aggregating flows + + ## 2023-08-17 09:43:02.448424 rejoining segments into linestrings + + ## 2023-08-17 09:43:29.905679 constructing segments + + ## 2023-08-17 09:43:36.984241 building geometry + + ## 2023-08-17 09:43:38.07569 simplifying geometry + + ## 2023-08-17 09:43:38.076099 aggregating flows + + ## 2023-08-17 09:43:40.249743 rejoining segments into linestrings + + ## 2023-08-17 09:43:49.839485 constructing segments + + ## 2023-08-17 09:43:56.549587 building geometry + + ## 2023-08-17 09:43:57.601956 simplifying geometry + + ## 2023-08-17 09:43:57.602298 aggregating flows + + ## 2023-08-17 09:44:02.195464 rejoining segments into linestrings + + ## 2023-08-17 09:44:02.867219 constructing segments + + ## 2023-08-17 09:44:11.384976 building geometry + + ## 2023-08-17 09:44:12.857531 simplifying geometry + + ## 2023-08-17 09:44:12.85796 aggregating flows + + ## 2023-08-17 09:44:15.095431 rejoining segments into linestrings + + ## Warning: Some expressions had a GC in every iteration; so filtering is + ## disabled. + + ## # A tibble: 2 × 6 + ## expression min median `itr/sec` mem_alloc `gc/sec` + ## + ## 1 old 13s 13s 0.0770 3.41GB 0.539 + ## 2 new 12.9s 12.9s 0.0773 223.49MB 0.386