From 691cb3bdeabb8d5379be8370afeab768b639ee87 Mon Sep 17 00:00:00 2001 From: rafapereirabr Date: Sat, 19 Aug 2023 09:14:37 -0300 Subject: [PATCH] clean polygon slivers --- data_prep/R/polygon_slivers.R | 111 +++++++++++++++++++++++----------- data_prep/R/support_fun.R | 38 +++--------- 2 files changed, 84 insertions(+), 65 deletions(-) diff --git a/data_prep/R/polygon_slivers.R b/data_prep/R/polygon_slivers.R index d485eb63..b92d52f5 100644 --- a/data_prep/R/polygon_slivers.R +++ b/data_prep/R/polygon_slivers.R @@ -1,68 +1,91 @@ library(sf) -library(data.table) library(dplyr) -library(mapview) library(lwgeom) -states1920 <- geobr::read_state(year = 1920) -states1920 <- sf::st_read('C:/Users/user/Downloads/04_limite_estadual_1920/04-limite estadual 1920.shp') -states1920 <- st_transform(states1920, crs = 4674) -states1920 <- df +# using planar geometry +sf::sf_use_s2(FALSE) -save(states1920, file = 'states1920.rda') -saveRDS(states1920, file = 'states1920.rds') +# load data +states1920 <- geobr::read_state(year = 1920, simplified = F) +plot(states1920) -library(tlocoh) # http://tlocoh.r-forge.r-project.org/ -states1920_s <- tlocoh::clean_slivers(states1920_s) -states1920_s <- as(states1920, 'Spatial') +# test summarise with original data +states1920$br <- 1 -sp.report <- clgeo_CollectionReport(states1920_s) -sp.summary <- clgeo_SummaryReport(sp.report) -sp.fixed <- clgeo_Clean(states1920_s, verbose = TRUE) +states1920 |> + group_by(br) |> + summarise() |> + plot(col='gray90') +# using make_valid and buffer +states1920 |> + sf::st_make_valid() |> + sf::st_buffer(dist = 0) |> + group_by(br) |> + summarise() |> + plot(col='gray90') -library(sf) -library(dplyr) -library(lwgeom) -library(geobr) +# using st_snap_to_grid +states1920 |> + sf::st_make_valid() |> + st_transform(crs = 32722) |> -# using planar geometry -sf::sf_use_s2(FALSE) + lwgeom::st_snap_to_grid(size = 0.01) |> + group_by(br) |> + summarise() |> + plot(col='gray90') -# load data -states1920 <- geobr::read_state(year = 1920) -plot(states1920) +t <- dissolve_polygons(mysf=states1920, group_column='name_state') +plot(t,col='gray90') -# test summarise with original data -states1920$br <- 1 +# using sfheaders states1920 |> + sf::st_make_valid() |> group_by(br) |> summarise() |> + sfheaders::sf_remove_holes() |> plot(col='gray90') - -# trying to fix with make_valid and buffer -df1 |> +# using nngeo +states1920 |> sf::st_make_valid() |> - sf::st_buffer(dist = 0) |> group_by(br) |> summarise() |> + nngeo::st_remove_holes() |> plot(col='gray90') +# using smoothr. This is the slowest solution +area_thresh <- units::set_units(800, km^2) -# trying to fix with st_snap_to_grid -states1920 %>% - lwgeom::st_snap_to_grid(size = 0.0000001) %>% +states1920 |> + sf::st_make_valid() |> + group_by(br) |> + summarise() |> + smoothr::fill_holes(threshold = area_thresh) |> + plot(col = "gray90") + + + + +# using rmapshaper. It works by simplifying geometries, which I would consider +# and unwanted side effect in this case + +states1920 |> sf::st_make_valid() |> group_by(br) |> summarise() |> - ungroup() |> + rmapshaper::ms_simplify() |> plot(col='gray90') + + + + + # mother ship dissolve_polygons <- function(temp_sf, f){ @@ -71,7 +94,7 @@ dissolve_polygons <- function(temp_sf, f){ dissolvefun <- function(grp){ # c.1) subset region - temp_region <- subset(temp_sf, nome== grp ) + temp_region <- subset(temp_sf, name_state== grp ) #temp_region <- fgeos(temp_region) temp_region <- f(temp_region) return(temp_region) @@ -79,13 +102,20 @@ dissolve_polygons <- function(temp_sf, f){ # Apply sub-function - groups_sf <- pbapply::pblapply(X = unique(temp_sf$nome), FUN = dissolvefun ) + groups_sf <- pbapply::pblapply(X = unique(temp_sf$name_state), FUN = dissolvefun ) # rbind results groups_sf <- do.call('rbind', groups_sf) return(groups_sf) } +##### sfheaders + + +sfh2 <- dissolve_polygons(temp_sf = df, f = sfheaders::sf_remove_holes) +sfh2$br <- 1 +sfh2 |> group_by(br) |> summarise() |> plot() + ##### rgeos library(rgeos) @@ -289,7 +319,16 @@ sss <- function(temp_region){ # temp_region = df[4,] return(poly) } -sss2 <- dissolve_polygons(temp_sf = df, f = sss) +sss2 <- dissolve_polygons(temp_sf = sta, f = sss) sss2$br <- 1 #sss2 <- fix_topoly(sss2) sss2 |> group_by(br) |> summarise() |> plot() + + + + +t <- dissolve_polygons(mysf = states1920, group_column = 'br') +plot(t, col='gray90') +head(t) + + diff --git a/data_prep/R/support_fun.R b/data_prep/R/support_fun.R index 6e39f577..4048dc23 100644 --- a/data_prep/R/support_fun.R +++ b/data_prep/R/support_fun.R @@ -304,7 +304,8 @@ to_multipolygon <- function(temp_sf){ fix_topoly <- function(temp_sf){ temp_sf <- sf::st_make_valid(temp_sf) - temp_sf <- temp_sf |> sf::st_buffer(0) + temp_sf <- sf::st_buffer(temp_sf, dist = 0) + return(temp_sf) } @@ -352,35 +353,13 @@ dissolve_polygons <- function(mysf, group_column){ temp_region <- subset(mysf, get(group_column, mysf)== grp ) - # c.2) create attribute with the number of points each polygon has - points_in_each_polygon = sapply(1:dim(temp_region)[1], function(i) - length(sf::st_coordinates(temp_region$geom[i]))) - - temp_region$points_in_each_polygon <- points_in_each_polygon - mypols <- subset(temp_region, points_in_each_polygon > 0) - - # d) convert to sp - sf_regiona <- mypols |> as("Spatial") - sf_regiona <- rgeos::gBuffer(sf_regiona, byid=TRUE, width=0) # correct eventual topology issues - - # c) dissolve borders to create country file - result <- maptools::unionSpatialPolygons(sf_regiona, rep(TRUE, nrow(sf_regiona@data))) # dissolve - - - # d) get rid of holes - outerRings = Filter(function(f){f@ringDir==1},result@polygons[[1]]@Polygons) - outerBounds = sp::SpatialPolygons(list(sp::Polygons(outerRings,ID=1))) + temp_region <- summarise(temp_region, .by = group_column) + # plot(temp_region) - # e) convert back to sf data - outerBounds <- st_as_sf(outerBounds) - outerBounds <- st_set_crs(outerBounds, st_crs(mysf)) - st_crs(outerBounds) <- st_crs(mysf) + temp_region <- sfheaders::sf_remove_holes(temp_region) + temp_region <- fix_topoly(temp_region) - # retrieve code_region info and reorder columns - outerBounds <- dplyr::mutate(outerBounds, group_column = grp) - outerBounds <- dplyr::select(outerBounds, group_column, geometry) - names(outerBounds)[1] <- group_column - return(outerBounds) + return(temp_region) } @@ -394,7 +373,7 @@ dissolve_polygons <- function(mysf, group_column){ # # test -# states <- geobr::read_state() +# states <- geobr::read_state(year=2000) # a <- dissolve_polygons(states, group_column='code_region') # plot(a) @@ -409,6 +388,7 @@ remove_state_repetition <- function(temp_sf){ vars <- names(temp_sf)[-length(names(temp_sf))] temp_sf <- temp_sf |> group_by_at(vars) |> summarise() temp_sf <- temp_sf |> filter(!code_state=="0") + temp_sf <- unique(temp_sf) return(temp_sf) } else { return(temp_sf) }