Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove support for non-bucket index on middle way table #2237

Merged
merged 1 commit into from
Sep 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions man/osm2pgsql.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,6 @@ mandatory for short options too.
database user. By default the schema set with `--schema` is used, or
`public` if that is not set.

\--middle-way-node-index-id-shift=SHIFT
: Set ID shift for way node bucket index in middle. Experts only. See
documentation for details.

\--middle-with-nodes
: Used together with the **new** middle database format when a flat nodes
file is used to force storing nodes with tags in the database, too.
Expand Down
15 changes: 2 additions & 13 deletions src/command-line-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,8 @@ void parse_expire_tiles_param(char const *arg, uint32_t *expire_tiles_zoom_min,
void check_options_non_slim(CLI::App const &app)
{
std::vector<std::string> const slim_options = {
"--cache",
"--middle-schema",
"--middle-with-nodes",
"--middle-way-node-index-id-shift",
"--tablespace-slim-data",
"--tablespace-slim-index"};
"--cache", "--middle-schema", "--middle-with-nodes",
"--tablespace-slim-data", "--tablespace-slim-index"};

for (auto const &opt : slim_options) {
if (app.count(opt) > 0) {
Expand Down Expand Up @@ -575,13 +571,6 @@ options_t parse_command_line(int argc, char *argv[])
->description("Disable concurrent index creation.")
->group("Advanced options");

// --middle-way-node-index-id-shift
app.add_option("--middle-way-node-index-id-shift",
options.way_node_index_id_shift)
->description("Set ID shift for bucket index.")
->type_name("N")
->group("Advanced options");

// --number-processes
app.add_option("--number-processes", options.num_procs)
// The threads will open up database connections which will
Expand Down
85 changes: 23 additions & 62 deletions src/middle-pgsql.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,6 @@

namespace {

bool check_bucket_index(pg_conn_t const *db_connection,
std::string const &prefix)
{
auto const res =
db_connection->exec("SELECT relname FROM pg_class"
" WHERE relkind='i'"
" AND relname = '{}_ways_nodes_bucket_idx'",
prefix);
return res.num_tuples() > 0;
}

void send_id_list(pg_conn_t const &db_connection,
std::string const &table, idlist_t const &ids)
{
Expand Down Expand Up @@ -97,7 +86,7 @@ std::string build_sql(options_t const &options, std::string const &templ)
fmt::arg("using_tablespace", using_tablespace),
fmt::arg("data_tablespace", tablespace_clause(options.tblsslim_data)),
fmt::arg("index_tablespace", tablespace_clause(options.tblsslim_index)),
fmt::arg("way_node_index_id_shift", options.way_node_index_id_shift),
fmt::arg("way_node_index_id_shift", 5),
fmt::arg("attribute_columns_definition",
options.extra_attributes ? " created timestamp with time zone,"
" version int4,"
Expand Down Expand Up @@ -663,17 +652,13 @@ void middle_pgsql_t::get_node_parents(idlist_t const &changed_nodes,

queries.emplace_back("ANALYZE osm2pgsql_changed_nodes");

bool const has_bucket_index =
check_bucket_index(&m_db_connection, m_options->prefix);

if (has_bucket_index) {
// The query to get the parent ways of changed nodes is "hidden"
// inside a PL/pgSQL function so that the query planner only sees
// a single node id that is being queried for. If we ask for all
// nodes at the same time the query planner sometimes thinks it is
// better to do a full table scan which totally destroys performance.
// This is due to the PostgreSQL statistics on ARRAYs being way off.
queries.emplace_back(R"(
// The query to get the parent ways of changed nodes is "hidden"
// inside a PL/pgSQL function so that the query planner only sees
// a single node id that is being queried for. If we ask for all
// nodes at the same time the query planner sometimes thinks it is
// better to do a full table scan which totally destroys performance.
// This is due to the PostgreSQL statistics on ARRAYs being way off.
queries.emplace_back(R"(
CREATE OR REPLACE FUNCTION osm2pgsql_find_changed_ways() RETURNS void AS $$
DECLARE
changed_buckets RECORD;
Expand All @@ -692,16 +677,8 @@ BEGIN
END;
$$ LANGUAGE plpgsql
)");
queries.emplace_back("SELECT osm2pgsql_find_changed_ways()");
queries.emplace_back("DROP FUNCTION osm2pgsql_find_changed_ways()");
} else {
queries.emplace_back(R"(
INSERT INTO osm2pgsql_changed_ways
SELECT w.id
FROM {schema}"{prefix}_ways" w, osm2pgsql_changed_nodes n
WHERE w.nodes && ARRAY[n.id]
)");
}
queries.emplace_back("SELECT osm2pgsql_find_changed_ways()");
queries.emplace_back("DROP FUNCTION osm2pgsql_find_changed_ways()");

queries.emplace_back(R"(
INSERT INTO osm2pgsql_changed_relations
Expand Down Expand Up @@ -1176,7 +1153,7 @@ table_sql sql_for_nodes(middle_pgsql_options const &options)
return sql;
}

table_sql sql_for_ways(middle_pgsql_options const &options)
table_sql sql_for_ways()
{
table_sql sql{};

Expand All @@ -1200,23 +1177,17 @@ table_sql sql_for_ways(middle_pgsql_options const &options)
" {users_table_access}"
" WHERE o.id = ANY($1::int8[])"};

if (options.way_node_index_id_shift == 0) {
sql.create_fw_dep_indexes = {
"CREATE INDEX ON {schema}\"{prefix}_ways\" USING GIN (nodes)"
" WITH (fastupdate = off) {index_tablespace}"};
} else {
sql.create_fw_dep_indexes = {
"CREATE OR REPLACE FUNCTION"
" {schema}\"{prefix}_index_bucket\"(int8[])"
" RETURNS int8[] AS $$"
" SELECT ARRAY(SELECT DISTINCT"
" unnest($1) >> {way_node_index_id_shift})"
"$$ LANGUAGE SQL IMMUTABLE",
"CREATE INDEX \"{prefix}_ways_nodes_bucket_idx\""
" ON {schema}\"{prefix}_ways\""
" USING GIN ({schema}\"{prefix}_index_bucket\"(nodes))"
" WITH (fastupdate = off) {index_tablespace}"};
}
sql.create_fw_dep_indexes = {
"CREATE OR REPLACE FUNCTION"
" {schema}\"{prefix}_index_bucket\"(int8[])"
" RETURNS int8[] AS $$"
" SELECT ARRAY(SELECT DISTINCT"
" unnest($1) >> {way_node_index_id_shift})"
"$$ LANGUAGE SQL IMMUTABLE",
"CREATE INDEX \"{prefix}_ways_nodes_bucket_idx\""
" ON {schema}\"{prefix}_ways\""
" USING GIN ({schema}\"{prefix}_index_bucket\"(nodes))"
" WITH (fastupdate = off) {index_tablespace}"};

return sql;
}
Expand Down Expand Up @@ -1272,7 +1243,6 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,
m_db_copy(m_copy_thread), m_append(options->append)
{
m_store_options.with_attributes = options->extra_attributes;
m_store_options.way_node_index_id_shift = options->way_node_index_id_shift;

if (options->middle_with_nodes) {
m_store_options.nodes = true;
Expand All @@ -1289,15 +1259,8 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,

log_debug("Mid: pgsql, cache={}", options->cache);

bool const has_bucket_index =
check_bucket_index(&m_db_connection, options->prefix);

if (!has_bucket_index && options->append) {
log_debug("You don't have a bucket index. See manual for details.");
}

m_tables.nodes() = table_desc{*options, sql_for_nodes(m_store_options)};
m_tables.ways() = table_desc{*options, sql_for_ways(m_store_options)};
m_tables.ways() = table_desc{*options, sql_for_ways()};
m_tables.relations() = table_desc{*options, sql_for_relations()};

m_users_table = table_desc{*options, sql_for_users(m_store_options)};
Expand All @@ -1310,8 +1273,6 @@ void middle_pgsql_t::set_requirements(
log_debug(" nodes: {}", m_store_options.nodes);
log_debug(" untagged_nodes: {}", m_store_options.untagged_nodes);
log_debug(" use_flat_node_file: {}", m_store_options.use_flat_node_file);
log_debug(" way_node_index_id_shift: {}",
m_store_options.way_node_index_id_shift);
log_debug(" with_attributes: {}", m_store_options.with_attributes);
}

Expand Down
3 changes: 0 additions & 3 deletions src/middle-pgsql.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ struct middle_pgsql_options
// Store untagged nodes also (set in addition to nodes=true).
bool untagged_nodes = false;

// Bit shift used in way node index
uint8_t way_node_index_id_shift = 5;

// Use a flat node file
bool use_flat_node_file = false;

Expand Down
8 changes: 0 additions & 8 deletions src/options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,6 @@ struct options_t

unsigned int num_procs = 1;

/**
* How many bits should the node id be shifted for the way node index?
* The result is a lossy index which is significantly smaller.
* See https://osm2pgsql.org/doc/manual.html#bucket-index-for-slim-mode
* Use 0 to use a classic loss-less GIN index.
*/
uint8_t way_node_index_id_shift = 5;

/**
* Middle database format:
* 0 = non-slim mode, no database middle (ram middle)
Expand Down
Loading