From 4c494ccb4430039d088302efc578a2b6e218803d Mon Sep 17 00:00:00 2001 From: Evgeny Zverev Date: Fri, 20 Sep 2024 04:30:21 +0000 Subject: [PATCH] Cs to stable 24 3 (#9268) Co-authored-by: Semyon Co-authored-by: ivanmorozov333 Co-authored-by: Ivan <5627721+abyss7@users.noreply.github.com> Co-authored-by: Artem Alekseev Co-authored-by: Alexander Avdonkin Co-authored-by: Nikita Vasilev Co-authored-by: Vladislav Gogov Conflicts: ydb/core/kqp/compute_actor/kqp_compute_actor.cpp ydb/core/kqp/compute_actor/kqp_compute_actor.h ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h ydb/core/kqp/compute_actor/kqp_compute_events.h ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h ydb/core/kqp/executer_actor/kqp_data_executer.cpp ydb/core/kqp/executer_actor/kqp_planner.cpp ydb/core/kqp/executer_actor/kqp_planner.h ydb/core/kqp/node_service/kqp_node_service.cpp ydb/core/kqp/ut/common/kqp_ut_common.cpp ydb/core/kqp/ut/olap/clickbench_ut.cpp ydb/core/kqp/ut/olap/kqp_olap_ut.cpp ydb/core/kqp/ut/tx/kqp_sink_common.h ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp ydb/core/protos/feature_flags.proto ydb/core/protos/kqp.proto ydb/core/testlib/basics/feature_flags.h ydb/core/tx/columnshard/engines/scheme/index_info.cpp ydb/core/tx/columnshard/splitter/chunks.h ydb/library/yql/sql/v1/sql_ut.cpp ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 Fix tx tests (#8945) (#9284) Co-authored-by: Nikita Vasilev fix tests --- .github/config/muted_ya.txt | 8 +- ydb/core/base/events.h | 2 + .../run/kikimr_services_initializers.cpp | 23 + .../run/kikimr_services_initializers.h | 6 + ydb/core/driver_lib/run/run.cpp | 4 + ydb/core/driver_lib/run/service_mask.h | 1 + ydb/core/driver_lib/run/ya.make | 1 + .../arrow/accessor/abstract/accessor.cpp | 163 +++++++ .../arrow/accessor/abstract/accessor.h | 396 +++++++++++++++ .../arrow/accessor/abstract/constructor.cpp | 11 + .../arrow/accessor/abstract/constructor.h | 76 +++ .../arrow/accessor/abstract/request.cpp | 16 + .../formats/arrow/accessor/abstract/request.h | 58 +++ .../formats/arrow/accessor/abstract/ya.make | 17 + .../arrow/accessor/common/chunk_data.cpp | 5 + .../arrow/accessor/common/chunk_data.h | 23 + .../formats/arrow/accessor/common/const.cpp | 5 + .../formats/arrow/accessor/common/const.h | 12 + .../formats/arrow/accessor/common/ya.make | 12 + .../arrow/accessor/composite/accessor.cpp | 57 +++ .../arrow/accessor/composite/accessor.h | 69 +++ .../formats/arrow/accessor/composite/ya.make | 12 + .../accessor/composite_serial/accessor.cpp | 46 ++ .../accessor/composite_serial/accessor.h | 78 +++ .../arrow/accessor/composite_serial/ya.make | 13 + .../formats/arrow/accessor/plain/accessor.cpp | 89 ++++ .../formats/arrow/accessor/plain/accessor.h | 77 +++ .../arrow/accessor/plain/constructor.cpp | 34 ++ .../arrow/accessor/plain/constructor.h | 28 ++ .../formats/arrow/accessor/plain/request.cpp | 22 + .../formats/arrow/accessor/plain/request.h | 26 + ydb/core/formats/arrow/accessor/plain/ya.make | 14 + .../arrow/accessor/sparsed/accessor.cpp | 280 +++++++++++ .../formats/arrow/accessor/sparsed/accessor.h | 189 ++++++++ .../arrow/accessor/sparsed/constructor.cpp | 34 ++ .../arrow/accessor/sparsed/constructor.h | 28 ++ .../arrow/accessor/sparsed/request.cpp | 22 + .../formats/arrow/accessor/sparsed/request.h | 26 + .../formats/arrow/accessor/sparsed/ya.make | 14 + ydb/core/formats/arrow/accessor/ya.make | 11 + ydb/core/formats/arrow/arrow_helpers.cpp | 44 ++ ydb/core/formats/arrow/arrow_helpers.h | 6 +- ydb/core/formats/arrow/common/accessor.cpp | 135 ------ ydb/core/formats/arrow/common/accessor.h | 227 --------- ydb/core/formats/arrow/common/adapter.h | 33 +- ydb/core/formats/arrow/common/container.cpp | 58 ++- ydb/core/formats/arrow/common/container.h | 4 +- ydb/core/formats/arrow/common/validation.h | 25 +- .../formats/arrow/common/vector_operations.h | 54 +++ ydb/core/formats/arrow/common/ya.make | 4 +- ydb/core/formats/arrow/custom_registry.cpp | 7 + ydb/core/formats/arrow/modifier/schema.cpp | 7 + ydb/core/formats/arrow/modifier/schema.h | 112 ++++- ydb/core/formats/arrow/process_columns.cpp | 158 ++++-- ydb/core/formats/arrow/process_columns.h | 33 +- ydb/core/formats/arrow/program.cpp | 39 ++ ydb/core/formats/arrow/program.h | 14 +- ydb/core/formats/arrow/protos/accessor.proto | 30 ++ ydb/core/formats/arrow/protos/ya.make | 1 + .../formats/arrow/reader/batch_iterator.h | 3 +- ydb/core/formats/arrow/reader/merger.cpp | 9 +- ydb/core/formats/arrow/reader/merger.h | 7 +- ydb/core/formats/arrow/reader/position.cpp | 66 +-- ydb/core/formats/arrow/reader/position.h | 113 +++-- .../formats/arrow/reader/result_builder.cpp | 2 +- ydb/core/formats/arrow/save_load/loader.cpp | 68 +++ ydb/core/formats/arrow/save_load/loader.h | 56 +++ .../arrow/save_load}/saver.cpp | 13 +- .../arrow/save_load}/saver.h | 22 +- ydb/core/formats/arrow/save_load/ya.make | 17 + .../formats/arrow/serializer/abstract.cpp | 6 +- ydb/core/formats/arrow/serializer/abstract.h | 1 + ydb/core/formats/arrow/serializer/ya.make | 2 +- ydb/core/formats/arrow/simple_arrays_cache.h | 12 +- ydb/core/formats/arrow/simple_builder/array.h | 11 +- .../formats/arrow/simple_builder/filler.cpp | 12 +- .../formats/arrow/simple_builder/filler.h | 83 +++- ydb/core/formats/arrow/size_calcer.cpp | 13 +- ydb/core/formats/arrow/size_calcer.h | 30 +- ydb/core/formats/arrow/special_keys.cpp | 24 +- ydb/core/formats/arrow/special_keys.h | 5 +- .../formats/arrow/splitter/scheme_info.cpp | 13 + .../arrow}/splitter/scheme_info.h | 15 +- .../formats/arrow/splitter/similar_packer.cpp | 5 + .../arrow}/splitter/similar_packer.h | 2 +- .../arrow}/splitter/simple.cpp | 58 +-- .../arrow}/splitter/simple.h | 86 +--- .../arrow}/splitter/stats.cpp | 2 +- .../arrow}/splitter/stats.h | 2 +- ydb/core/formats/arrow/splitter/ya.make | 17 + .../formats/arrow/ssa_program_optimizer.cpp | 5 +- .../{common => validation}/validation.cpp | 0 .../formats/arrow/validation/validation.h | 26 + ydb/core/formats/arrow/validation/ya.make | 12 + ydb/core/formats/arrow/ya.make | 3 + ydb/core/grpc_services/rpc_load_rows.cpp | 9 +- .../kqp/compute_actor/kqp_compute_actor.cpp | 4 +- .../kqp/compute_actor/kqp_compute_actor.h | 4 +- .../compute_actor/kqp_compute_actor_factory.h | 2 +- .../kqp/compute_actor/kqp_compute_events.h | 290 +++++------ .../compute_actor/kqp_scan_compute_actor.cpp | 6 +- .../compute_actor/kqp_scan_compute_actor.h | 5 +- .../kqp_scan_compute_manager.cpp | 9 +- .../compute_actor/kqp_scan_compute_manager.h | 16 +- ydb/core/kqp/compute_actor/kqp_scan_events.h | 5 + .../compute_actor/kqp_scan_fetcher_actor.cpp | 31 +- .../compute_actor/kqp_scan_fetcher_actor.h | 7 +- .../kqp/executer_actor/kqp_data_executer.cpp | 2 +- ydb/core/kqp/executer_actor/kqp_planner.cpp | 4 +- ydb/core/kqp/executer_actor/kqp_planner.h | 4 +- .../tablestore/operations/alter_column.cpp | 15 +- .../tablestore/operations/alter_column.h | 2 + .../kqp/node_service/kqp_node_service.cpp | 4 +- .../kqp/opt/physical/kqp_opt_phy_olap_agg.cpp | 5 - ydb/core/kqp/query_data/kqp_predictor.cpp | 2 +- ydb/core/kqp/runtime/kqp_scan_data.cpp | 9 +- ydb/core/kqp/ut/common/columnshard.cpp | 57 ++- ydb/core/kqp/ut/common/columnshard.h | 7 +- ydb/core/kqp/ut/common/kqp_ut_common.cpp | 2 + ydb/core/kqp/ut/common/kqp_ut_common.h | 1 + ydb/core/kqp/ut/olap/aggregations_ut.cpp | 7 +- ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp | 227 ++++++--- ydb/core/kqp/ut/olap/clickbench_ut.cpp | 11 +- ydb/core/kqp/ut/olap/datatime64_ut.cpp | 1 + ydb/core/kqp/ut/olap/decimal_ut.cpp | 198 ++++++++ ydb/core/kqp/ut/olap/helpers/aggregation.cpp | 4 +- ydb/core/kqp/ut/olap/helpers/get_value.cpp | 5 + ydb/core/kqp/ut/olap/helpers/local.h | 8 +- ydb/core/kqp/ut/olap/helpers/typed_local.cpp | 29 +- ydb/core/kqp/ut/olap/helpers/typed_local.h | 35 +- ydb/core/kqp/ut/olap/indexes_ut.cpp | 10 +- ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp | 4 +- ydb/core/kqp/ut/olap/kqp_olap_ut.cpp | 147 +++++- ydb/core/kqp/ut/olap/sparsed_ut.cpp | 307 ++++++++++++ ydb/core/kqp/ut/olap/sys_view_ut.cpp | 2 +- ydb/core/kqp/ut/olap/tiering_ut.cpp | 161 ++++++ ydb/core/kqp/ut/olap/write_ut.cpp | 29 +- ydb/core/kqp/ut/olap/ya.make | 5 +- ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp | 83 ++++ ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp | 4 +- ydb/core/kqp/ut/tx/kqp_sink_common.h | 12 +- ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp | 46 +- ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp | 59 ++- ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp | 74 ++- ydb/core/protos/config.proto | 16 +- ydb/core/protos/counters_columnshard.proto | 8 +- ydb/core/protos/feature_flags.proto | 6 + ydb/core/protos/flat_scheme_op.proto | 3 + ydb/core/protos/flat_tx_scheme.proto | 2 + ydb/core/protos/tx_columnshard.proto | 26 + ydb/core/statistics/ut_common/ut_common.cpp | 4 +- ydb/core/sys_view/common/schema.h | 8 +- .../tablet/tablet_counters_aggregator.cpp | 49 +- ydb/core/testlib/basics/feature_flags.h | 3 + ydb/core/testlib/cs_helper.cpp | 30 +- ydb/core/testlib/cs_helper.h | 11 +- ydb/core/testlib/test_client.cpp | 6 + ydb/core/testlib/ya.make | 1 + .../tx/columnshard/background_controller.h | 7 + .../blobs_action/bs/blob_manager.cpp | 9 +- .../blobs_action/bs/blob_manager.h | 3 - .../tx/columnshard/blobs_action/bs/write.cpp | 7 +- .../transaction/tx_gc_insert_table.h | 4 +- .../blobs_action/transaction/tx_write.cpp | 101 ++-- .../blobs_action/transaction/tx_write.h | 2 +- .../transaction/tx_write_index.cpp | 2 +- ydb/core/tx/columnshard/columnshard.cpp | 250 +++++----- ydb/core/tx/columnshard/columnshard.h | 4 +- ydb/core/tx/columnshard/columnshard__init.cpp | 32 +- .../columnshard__notify_tx_completion.cpp | 2 +- .../tx/columnshard/columnshard__plan_step.cpp | 4 +- .../columnshard/columnshard__progress_tx.cpp | 45 +- .../columnshard__propose_transaction.cpp | 56 ++- ydb/core/tx/columnshard/columnshard__scan.cpp | 7 +- .../tx/columnshard/columnshard__write.cpp | 431 +++++++++++----- .../columnshard/columnshard__write_index.cpp | 2 +- .../tx/columnshard/columnshard_common.cpp | 18 - ydb/core/tx/columnshard/columnshard_common.h | 94 ---- ydb/core/tx/columnshard/columnshard_impl.cpp | 201 ++++---- ydb/core/tx/columnshard/columnshard_impl.h | 153 ++---- .../columnshard/columnshard_private_events.h | 36 +- .../tx/columnshard/columnshard_schema.cpp | 61 +-- ydb/core/tx/columnshard/columnshard_schema.h | 220 ++++++++- ydb/core/tx/columnshard/common/limits.h | 5 + ydb/core/tx/columnshard/common/snapshot.cpp | 8 + ydb/core/tx/columnshard/common/snapshot.h | 4 + .../columnshard/common/tests/shard_reader.cpp | 4 - ydb/core/tx/columnshard/common/tests/ya.make | 14 - ydb/core/tx/columnshard/common/ya.make | 2 +- .../counters/aggregation/table_stats.h | 47 ++ .../columnshard/counters/aggregation/ya.make | 10 + .../counters/background_controller.cpp | 18 + .../counters/background_controller.h | 35 ++ .../tx/columnshard/counters/column_tables.cpp | 13 + .../tx/columnshard/counters/column_tables.h | 85 ++++ .../tx/columnshard/counters/columnshard.cpp | 4 +- .../tx/columnshard/counters/columnshard.h | 23 +- .../columnshard/counters/counters_manager.cpp | 5 + .../columnshard/counters/counters_manager.h | 97 ++++ .../tx/columnshard/counters/engine_logs.h | 75 ++- .../tx/columnshard/counters/initialization.h | 56 +++ .../ev_write.cpp => counters/req_tracer.cpp} | 3 +- ydb/core/tx/columnshard/counters/req_tracer.h | 51 ++ ydb/core/tx/columnshard/counters/scan.cpp | 6 + ydb/core/tx/columnshard/counters/scan.h | 47 +- .../tx/columnshard/counters/tablet_counters.h | 132 +++++ .../tx/columnshard/counters/tx_progress.h | 110 +++++ .../tx/columnshard/counters/writes_monitor.h | 46 ++ ydb/core/tx/columnshard/counters/ya.make | 13 +- .../common/transactions/tx_extension.h | 4 +- .../engines/changes/abstract/abstract.cpp | 2 +- .../engines/changes/cleanup_portions.cpp | 30 +- .../engines/changes/cleanup_tables.cpp | 4 +- .../engines/changes/compaction.cpp | 2 +- .../changes/compaction/abstract/merger.cpp | 28 +- .../changes/compaction/abstract/merger.h | 139 +++++- .../changes/compaction/common/context.h | 42 +- .../engines/changes/compaction/merger.cpp | 75 +-- .../engines/changes/compaction/merger.h | 20 +- .../compaction/plain/column_cursor.cpp | 47 +- .../changes/compaction/plain/column_cursor.h | 19 +- .../compaction/plain/column_portion_chunk.cpp | 57 +-- .../compaction/plain/column_portion_chunk.h | 9 +- .../changes/compaction/plain/logic.cpp | 26 +- .../engines/changes/compaction/plain/logic.h | 11 +- .../compaction/plain/merged_column.cpp | 16 +- .../changes/compaction/plain/merged_column.h | 8 +- .../engines/changes/compaction/plain/ya.make | 2 +- .../changes/compaction/sparsed/logic.cpp | 169 +++++++ .../changes/compaction/sparsed/logic.h | 280 +++++++++++ .../changes/compaction/sparsed/ya.make | 11 + .../engines/changes/compaction/ya.make | 3 +- .../engines/changes/general_compaction.cpp | 13 +- .../engines/changes/indexation.cpp | 184 ++++--- .../columnshard/engines/changes/indexation.h | 23 +- .../engines/changes/with_appended.cpp | 16 +- .../tx/columnshard/engines/column_engine.h | 6 +- .../engines/column_engine_logs.cpp | 55 +-- .../columnshard/engines/column_engine_logs.h | 13 +- .../tx/columnshard/engines/db_wrapper.cpp | 4 +- ydb/core/tx/columnshard/engines/db_wrapper.h | 11 +- ydb/core/tx/columnshard/engines/defs.cpp | 11 + ydb/core/tx/columnshard/engines/defs.h | 27 +- .../insert_table/committed.cpp} | 2 +- .../engines/insert_table/committed.h | 161 ++++++ .../columnshard/engines/insert_table/data.h | 199 -------- .../engines/insert_table/insert_table.cpp | 116 +++-- .../engines/insert_table/insert_table.h | 61 ++- .../engines/insert_table/inserted.cpp | 12 + .../engines/insert_table/inserted.h | 35 ++ .../columnshard/engines/insert_table/meta.cpp | 23 +- .../columnshard/engines/insert_table/meta.h | 26 +- .../engines/insert_table/path_info.cpp | 10 +- .../engines/insert_table/path_info.h | 28 +- .../engines/insert_table/rt_insertion.cpp | 57 +-- .../engines/insert_table/rt_insertion.h | 59 ++- .../insert_table/{data.cpp => user_data.cpp} | 21 +- .../engines/insert_table/user_data.h | 100 ++++ .../columnshard/engines/insert_table/ya.make | 4 +- .../engines/portions/column_record.cpp | 5 +- .../engines/portions/column_record.h | 109 ++--- .../tx/columnshard/engines/portions/common.h | 2 + .../engines/portions/constructor.cpp | 16 - .../engines/portions/index_chunk.h | 1 - .../tx/columnshard/engines/portions/meta.cpp | 2 +- .../engines/portions/portion_info.cpp | 159 +++--- .../engines/portions/portion_info.h | 126 ++--- .../engines/portions/read_with_blobs.cpp | 4 +- .../engines/predicate/container.cpp | 32 +- .../columnshard/engines/predicate/container.h | 61 ++- .../columnshard/engines/predicate/filter.cpp | 162 ++++++- .../tx/columnshard/engines/predicate/filter.h | 37 +- .../engines/predicate/predicate.cpp | 118 ++++- .../columnshard/engines/predicate/predicate.h | 30 +- .../columnshard/engines/predicate/range.cpp | 105 ++-- .../tx/columnshard/engines/predicate/range.h | 20 +- .../engines/reader/abstract/abstract.h | 2 +- .../engines/reader/abstract/read_context.h | 33 +- .../engines/reader/abstract/read_metadata.cpp | 21 +- .../engines/reader/abstract/read_metadata.h | 88 +++- .../engines/reader/actor/actor.cpp | 13 +- .../engines/reader/common/description.h | 6 +- .../engines/reader/common/result.cpp | 15 +- .../engines/reader/common/result.h | 35 +- .../plain_reader/constructor/constructor.cpp | 23 +- .../constructor/read_metadata.cpp | 84 +++- .../plain_reader/constructor/read_metadata.h | 95 +++- .../plain_reader/constructor/resolver.h | 4 - .../reader/plain_reader/constructor/ya.make | 1 + .../plain_reader/iterator/columns_set.cpp | 4 +- .../plain_reader/iterator/columns_set.h | 153 ++++-- .../reader/plain_reader/iterator/context.cpp | 225 ++++++--- .../reader/plain_reader/iterator/context.h | 54 ++- .../plain_reader/iterator/fetched_data.cpp | 11 +- .../plain_reader/iterator/fetched_data.h | 51 +- .../reader/plain_reader/iterator/fetching.cpp | 135 +++++- .../reader/plain_reader/iterator/fetching.h | 185 +++++-- .../reader/plain_reader/iterator/interval.cpp | 54 ++- .../reader/plain_reader/iterator/interval.h | 25 +- .../reader/plain_reader/iterator/iterator.cpp | 10 +- .../reader/plain_reader/iterator/iterator.h | 16 +- .../reader/plain_reader/iterator/merge.cpp | 41 +- .../reader/plain_reader/iterator/merge.h | 40 +- .../plain_reader/iterator/plain_read_data.cpp | 67 +-- .../plain_reader/iterator/plain_read_data.h | 9 +- .../reader/plain_reader/iterator/scanner.cpp | 185 ++++--- .../reader/plain_reader/iterator/scanner.h | 10 +- .../reader/plain_reader/iterator/source.cpp | 70 +-- .../reader/plain_reader/iterator/source.h | 178 +++++-- .../reader/plain_reader/iterator/ya.make | 3 + .../reader/sys_view/abstract/filler.cpp | 6 +- .../reader/sys_view/abstract/iterator.h | 13 +- .../engines/reader/sys_view/chunks/chunks.cpp | 65 ++- .../engines/reader/sys_view/chunks/chunks.h | 26 + .../reader/sys_view/portions/portions.cpp | 4 +- .../reader/transaction/tx_internal_scan.cpp | 117 +++-- .../reader/transaction/tx_internal_scan.h | 13 +- .../engines/reader/transaction/tx_scan.cpp | 291 ++++------- .../engines/reader/transaction/tx_scan.h | 10 +- .../engines/scheme/abstract/index_info.h | 67 +-- .../engines/scheme/abstract/loader.cpp | 61 --- .../engines/scheme/abstract/loader.h | 47 -- .../engines/scheme/abstract/ya.make | 5 +- .../engines/scheme/column/info.cpp | 20 +- .../columnshard/engines/scheme/column/info.h | 26 +- .../engines/scheme/column_features.h | 7 +- .../columnshard/engines/scheme/index_info.cpp | 344 ++++++------- .../columnshard/engines/scheme/index_info.h | 258 ++++++---- .../scheme/indexes/abstract/program.cpp | 9 +- .../engines/scheme/tiering/tier_info.h | 19 +- .../scheme/versions/abstract_scheme.cpp | 67 ++- .../engines/scheme/versions/abstract_scheme.h | 23 +- .../scheme/versions/filtered_scheme.cpp | 64 ++- .../engines/scheme/versions/filtered_scheme.h | 17 +- .../scheme/versions/snapshot_scheme.cpp | 13 +- .../engines/scheme/versions/snapshot_scheme.h | 9 +- .../engines/scheme/versions/versioned_index.h | 2 +- .../tx/columnshard/engines/scheme/ya.make | 1 + .../storage/actualizer/tiering/tiering.cpp | 2 +- .../engines/storage/chunks/column.cpp | 16 +- .../engines/storage/chunks/column.h | 8 +- .../engines/storage/chunks/data.cpp | 9 + .../columnshard/engines/storage/chunks/data.h | 6 +- .../engines/storage/chunks/null_column.cpp | 5 - .../engines/storage/chunks/null_column.h | 60 --- .../engines/storage/chunks/ya.make | 1 - .../engines/storage/granule/granule.cpp | 9 +- .../engines/storage/granule/granule.h | 11 +- .../storage/granule/portions_index.cpp | 21 +- .../engines/storage/granule/portions_index.h | 132 ++++- .../engines/storage/granule/storage.cpp | 2 +- .../engines/storage/indexes/max/meta.cpp | 4 +- .../engines/storage/indexes/portions/meta.cpp | 10 - .../engines/storage/indexes/portions/meta.h | 48 +- .../optimizer/lbuckets/planner/optimizer.cpp | 3 +- .../optimizer/lbuckets/planner/optimizer.h | 8 +- .../optimizer/sbuckets/common/optimizer.cpp | 5 +- .../sbuckets/constructor/constructor.cpp | 3 +- .../sbuckets/constructor/constructor.h | 5 +- .../optimizer/sbuckets/index/bucket.cpp | 2 +- .../optimizer/sbuckets/logic/slices/logic.cpp | 2 +- ydb/core/tx/columnshard/engines/ut/helper.cpp | 15 + ydb/core/tx/columnshard/engines/ut/helper.h | 1 + .../engines/ut/ut_insert_table.cpp | 38 +- .../columnshard/engines/ut/ut_logs_engine.cpp | 90 ++-- .../tx/columnshard/engines/ut/ut_program.cpp | 60 +++ .../engines/writer/buffer/actor.cpp | 3 +- .../engines/writer/buffer/events.h | 6 +- .../engines/writer/indexed_blob_constructor.h | 19 +- ydb/core/tx/columnshard/engines/ya.make | 1 + .../columnshard/hooks/abstract/abstract.cpp | 22 + .../tx/columnshard/hooks/abstract/abstract.h | 174 ++++--- .../tx/columnshard/hooks/testing/controller.h | 77 ++- .../columnshard/hooks/testing/ro_controller.h | 19 +- .../columnshard/inflight_request_tracker.cpp | 173 ++++--- .../tx/columnshard/inflight_request_tracker.h | 109 ++++- .../normalizer/abstract/abstract.h | 2 + .../normalizer/insert_table/broken_dedup.cpp | 151 ++++++ .../normalizer/insert_table/broken_dedup.h | 35 ++ .../normalizer/insert_table/ya.make | 11 + .../columnshard/normalizer/portion/chunks.cpp | 5 +- .../columnshard/normalizer/portion/chunks.h | 4 + .../normalizer/tablet/broken_txs.cpp | 31 ++ .../normalizer/tablet/broken_txs.h | 35 ++ .../normalizer/tablet/gc_counters.h | 2 +- .../tx/columnshard/normalizer/tablet/ya.make | 1 + ydb/core/tx/columnshard/normalizer/ya.make | 1 + .../operations/batch_builder/builder.cpp | 14 +- .../operations/batch_builder/builder.h | 14 +- .../operations/batch_builder/merger.cpp | 6 +- .../operations/batch_builder/restore.cpp | 18 +- .../operations/batch_builder/restore.h | 11 +- .../tx/columnshard/operations/manager.cpp | 296 +++++++++++ ydb/core/tx/columnshard/operations/manager.h | 221 +++++++++ .../operations/slice_builder/builder.cpp | 28 +- .../operations/slice_builder/builder.h | 16 +- ydb/core/tx/columnshard/operations/write.cpp | 326 ++++--------- ydb/core/tx/columnshard/operations/write.h | 142 +++--- .../tx/columnshard/operations/write_data.cpp | 2 +- ydb/core/tx/columnshard/operations/ya.make | 2 + .../resource_subscriber/counters.cpp | 3 + .../splitter/abstract/chunk_meta.cpp | 21 +- .../splitter/abstract/chunk_meta.h | 4 +- .../tx/columnshard/splitter/abstract/chunks.h | 1 - .../tx/columnshard/splitter/batch_slice.cpp | 54 +-- .../tx/columnshard/splitter/batch_slice.h | 52 +- ydb/core/tx/columnshard/splitter/chunks.h | 40 +- .../tx/columnshard/splitter/scheme_info.cpp | 13 - .../columnshard/splitter/ut/batch_slice.cpp | 65 +++ .../tx/columnshard/splitter/ut/batch_slice.h | 31 ++ .../columnshard/splitter/ut/ut_splitter.cpp | 153 +++--- ydb/core/tx/columnshard/splitter/ut/ya.make | 3 + ydb/core/tx/columnshard/splitter/ya.make | 5 +- .../test_helper/columnshard_ut_common.cpp | 34 +- .../test_helper/columnshard_ut_common.h | 42 +- .../tx/columnshard/test_helper/controllers.h | 13 +- .../tx/columnshard/test_helper/helper.cpp | 22 +- ydb/core/tx/columnshard/test_helper/helper.h | 8 +- .../tests => test_helper}/shard_reader.h | 4 +- ydb/core/tx/columnshard/test_helper/ya.make | 1 + .../transactions/locks/abstract.cpp | 6 + .../columnshard/transactions/locks/abstract.h | 105 ++++ .../transactions/locks/dependencies.cpp | 48 ++ .../transactions/locks/dependencies.h | 55 +++ .../transactions/locks/interaction.cpp | 26 + .../transactions/locks/interaction.h | 458 ++++++++++++++++++ .../transactions/locks/read_finished.cpp | 5 + .../transactions/locks/read_finished.h | 30 ++ .../transactions/locks/read_start.cpp | 47 ++ .../transactions/locks/read_start.h | 71 +++ .../columnshard/transactions/locks/write.cpp | 5 + .../tx/columnshard/transactions/locks/write.h | 36 ++ .../tx/columnshard/transactions/locks/ya.make | 19 + .../transactions/operators/backup.h | 3 + .../transactions/operators/ev_write.h | 77 --- .../operators/ev_write/abstract.cpp | 5 + .../operators/ev_write/abstract.h | 113 +++++ .../operators/ev_write/primary.cpp | 5 + .../transactions/operators/ev_write/primary.h | 287 +++++++++++ .../operators/ev_write/secondary.cpp | 5 + .../operators/ev_write/secondary.h | 219 +++++++++ .../operators/ev_write/simple.cpp | 5 + .../transactions/operators/ev_write/simple.h | 28 ++ .../transactions/operators/ev_write/sync.cpp | 5 + .../transactions/operators/ev_write/sync.h | 37 ++ .../transactions/operators/ev_write/ya.make | 18 + .../transactions/operators/long_tx_write.cpp | 9 +- .../transactions/operators/long_tx_write.h | 19 +- .../transactions/operators/propose_tx.cpp | 27 +- .../transactions/operators/propose_tx.h | 1 + .../transactions/operators/schema.cpp | 31 +- .../transactions/operators/schema.h | 18 +- .../transactions/operators/sharing.h | 3 + .../transactions/operators/ss_operation.cpp | 23 - .../transactions/operators/ss_operation.h | 17 - .../transactions/operators/ya.make | 2 +- .../transactions/protos/tx_event.proto | 19 + .../columnshard/transactions/protos/ya.make | 12 + .../transactions/tx_controller.cpp | 139 +++--- .../columnshard/transactions/tx_controller.h | 151 ++++-- ydb/core/tx/columnshard/transactions/ya.make | 1 + .../ut_rw/ut_columnshard_read_write.cpp | 123 +++-- ydb/core/tx/columnshard/ut_rw/ya.make | 2 - .../ut_schema/ut_columnshard_schema.cpp | 20 +- ydb/core/tx/columnshard/ya.make | 1 - .../tx/data_events/columnshard_splitter.cpp | 3 +- ydb/core/tx/data_events/events.h | 11 +- ydb/core/tx/data_events/shard_writer.cpp | 22 +- ydb/core/tx/data_events/shard_writer.h | 80 ++- ydb/core/tx/data_events/shards_splitter.h | 8 +- ydb/core/tx/data_events/write_data.h | 14 + .../limiter/grouped_memory/service/actor.cpp | 50 ++ .../tx/limiter/grouped_memory/service/actor.h | 59 +++ .../grouped_memory/service/allocation.cpp | 26 + .../grouped_memory/service/allocation.h | 76 +++ .../grouped_memory/service/counters.cpp | 5 + .../limiter/grouped_memory/service/counters.h | 62 +++ .../limiter/grouped_memory/service/group.cpp | 68 +++ .../tx/limiter/grouped_memory/service/group.h | 90 ++++ .../tx/limiter/grouped_memory/service/ids.cpp | 66 +++ .../tx/limiter/grouped_memory/service/ids.h | 70 +++ .../grouped_memory/service/manager.cpp | 122 +++++ .../limiter/grouped_memory/service/manager.h | 79 +++ .../grouped_memory/service/process.cpp | 5 + .../limiter/grouped_memory/service/process.h | 262 ++++++++++ .../tx/limiter/grouped_memory/service/ya.make | 20 + .../limiter/grouped_memory/usage/abstract.cpp | 79 +++ .../limiter/grouped_memory/usage/abstract.h | 223 +++++++++ .../limiter/grouped_memory/usage/config.cpp | 20 + .../tx/limiter/grouped_memory/usage/config.h | 27 ++ .../limiter/grouped_memory/usage/events.cpp | 5 + .../tx/limiter/grouped_memory/usage/events.h | 150 ++++++ .../limiter/grouped_memory/usage/service.cpp | 5 + .../tx/limiter/grouped_memory/usage/service.h | 102 ++++ .../tx/limiter/grouped_memory/usage/ya.make | 16 + .../limiter/grouped_memory/ut/ut_manager.cpp | 222 +++++++++ ydb/core/tx/limiter/grouped_memory/ut/ya.make | 17 + ydb/core/tx/limiter/grouped_memory/ya.make | 5 + ydb/core/tx/limiter/ya.make | 5 + ydb/core/tx/program/program.cpp | 5 +- ydb/core/tx/program/program.h | 5 - .../olap/bg_tasks/tx_chain/session.h | 5 + .../tx/schemeshard/olap/columns/update.cpp | 16 + ydb/core/tx/schemeshard/olap/columns/update.h | 9 + .../tx/schemeshard/olap/manager/manager.cpp | 4 +- .../tx/schemeshard/olap/manager/manager.h | 4 +- .../olap/operations/alter/common/update.h | 25 + .../operations/alter/standalone/update.cpp | 19 + .../olap/operations/alter_store.cpp | 43 +- .../olap/operations/alter_table.cpp | 13 + .../tx/schemeshard/olap/operations/checks.h | 19 + .../olap/operations/create_store.cpp | 10 + .../olap/operations/create_table.cpp | 12 + ydb/core/tx/schemeshard/olap/table/table.h | 5 +- ydb/core/tx/schemeshard/olap/ttl/ya.make | 1 + ydb/core/tx/schemeshard/schemeshard__init.cpp | 1 + .../schemeshard/schemeshard__table_stats.cpp | 4 +- .../tx/schemeshard/schemeshard_info_types.cpp | 33 +- .../tx/schemeshard/schemeshard_info_types.h | 10 +- .../schemeshard_path_describer.cpp | 4 +- ydb/core/tx/schemeshard/schemeshard_schema.h | 4 +- ydb/core/tx/schemeshard/schemeshard_types.cpp | 4 + ydb/core/tx/schemeshard/schemeshard_types.h | 1 + .../tx/schemeshard/ut_helpers/helpers.cpp | 5 +- .../tx/schemeshard/ut_helpers/test_env.cpp | 1 + ydb/core/tx/schemeshard/ut_helpers/test_env.h | 1 + ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp | 73 ++- .../schemeshard/ut_subdomain/ut_subdomain.cpp | 150 ++++++ ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp | 8 + ydb/core/tx/tiering/manager.cpp | 19 +- ydb/core/tx/tiering/rule/manager.cpp | 7 + ydb/core/tx/tiering/rule/object.cpp | 8 + ydb/core/tx/tiering/rule/ss_fetcher.cpp | 2 +- ydb/core/tx/tiering/tier/manager.cpp | 4 + ydb/core/tx/tiering/ut/ut_tiers.cpp | 17 +- ydb/core/tx/tx_processing.h | 2 +- ydb/core/tx/tx_proxy/global.cpp | 5 + ydb/core/tx/tx_proxy/global.h | 9 + ydb/core/tx/tx_proxy/rpc_long_tx.cpp | 125 ++--- .../tx/tx_proxy/upload_rows_common_impl.cpp | 13 +- .../tx/tx_proxy/upload_rows_common_impl.h | 67 ++- ydb/core/tx/tx_proxy/ya.make | 1 + ydb/library/accessor/validator.h | 5 + .../AggregateFunctionNumRows.h | 79 +++ .../AggregateFunctions/IAggregateFunction.cpp | 3 + .../AggregateFunctions/IAggregateFunction.h | 18 +- ydb/library/arrow_kernels/func_num_rows.h | 20 + ydb/library/arrow_kernels/functions.h | 1 + ydb/library/conclusion/result.h | 5 + ydb/library/services/services.proto | 2 + ydb/library/yql/sql/v1/node.h | 1 + ydb/library/yql/sql/v1/query.cpp | 4 + ydb/library/yql/sql/v1/sql_translation.cpp | 24 +- ydb/library/yql/sql/v1/sql_translation.h | 2 + ydb/library/yql/sql/v1/sql_ut.cpp | 25 + .../queries-original-plan-column-12 | 34 +- .../queries-original-plan-column-14 | 37 +- .../queries-original-plan-column-15 | 34 +- .../queries-original-plan-column-16 | 37 +- .../queries-original-plan-column-17 | 37 +- .../queries-original-plan-column-21 | 49 +- .../queries-original-plan-column-30 | 85 +++- .../queries-original-plan-column-31 | 85 +++- .../queries-original-plan-column-32 | 85 +++- .../queries-original-plan-column-33 | 34 +- .../queries-original-plan-column-34 | 37 +- .../queries-original-plan-column-36 | 34 +- .../queries-original-plan-column-37 | 34 +- .../queries-original-plan-column-38 | 34 +- .../queries-original-plan-column-41 | 37 +- .../queries-original-plan-column-7 | 34 +- ydb/tests/functional/clickbench/ya.make | 1 + .../flat_schemeshard.schema | 8 +- ydb/tests/functional/ydb_cli/ya.make | 1 + ydb/tests/library/harness/kikimr_config.py | 2 + ydb/tests/library/harness/kikimr_runner.py | 45 +- .../harness/resources/default_yaml.yml | 2 - ydb/tests/olap/scenario/conftest.py | 3 +- .../olap/scenario/helpers/data_generators.py | 26 +- .../olap/scenario/helpers/table_helper.py | 99 ++++ .../olap/scenario/helpers/tiering_helper.py | 246 ++++++++++ ydb/tests/olap/scenario/test_alter_tiering.py | 152 ++++++ ydb/tests/olap/scenario/ya.make | 1 + ydb/tools/cfg/static.py | 7 + ydb/tools/olap_workload/__main__.py | 199 ++++++++ ydb/tools/olap_workload/ya.make | 12 + ydb/tools/ya.make | 1 + 587 files changed, 19209 insertions(+), 6264 deletions(-) create mode 100644 ydb/core/formats/arrow/accessor/abstract/accessor.cpp create mode 100644 ydb/core/formats/arrow/accessor/abstract/accessor.h create mode 100644 ydb/core/formats/arrow/accessor/abstract/constructor.cpp create mode 100644 ydb/core/formats/arrow/accessor/abstract/constructor.h create mode 100644 ydb/core/formats/arrow/accessor/abstract/request.cpp create mode 100644 ydb/core/formats/arrow/accessor/abstract/request.h create mode 100644 ydb/core/formats/arrow/accessor/abstract/ya.make create mode 100644 ydb/core/formats/arrow/accessor/common/chunk_data.cpp create mode 100644 ydb/core/formats/arrow/accessor/common/chunk_data.h create mode 100644 ydb/core/formats/arrow/accessor/common/const.cpp create mode 100644 ydb/core/formats/arrow/accessor/common/const.h create mode 100644 ydb/core/formats/arrow/accessor/common/ya.make create mode 100644 ydb/core/formats/arrow/accessor/composite/accessor.cpp create mode 100644 ydb/core/formats/arrow/accessor/composite/accessor.h create mode 100644 ydb/core/formats/arrow/accessor/composite/ya.make create mode 100644 ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp create mode 100644 ydb/core/formats/arrow/accessor/composite_serial/accessor.h create mode 100644 ydb/core/formats/arrow/accessor/composite_serial/ya.make create mode 100644 ydb/core/formats/arrow/accessor/plain/accessor.cpp create mode 100644 ydb/core/formats/arrow/accessor/plain/accessor.h create mode 100644 ydb/core/formats/arrow/accessor/plain/constructor.cpp create mode 100644 ydb/core/formats/arrow/accessor/plain/constructor.h create mode 100644 ydb/core/formats/arrow/accessor/plain/request.cpp create mode 100644 ydb/core/formats/arrow/accessor/plain/request.h create mode 100644 ydb/core/formats/arrow/accessor/plain/ya.make create mode 100644 ydb/core/formats/arrow/accessor/sparsed/accessor.cpp create mode 100644 ydb/core/formats/arrow/accessor/sparsed/accessor.h create mode 100644 ydb/core/formats/arrow/accessor/sparsed/constructor.cpp create mode 100644 ydb/core/formats/arrow/accessor/sparsed/constructor.h create mode 100644 ydb/core/formats/arrow/accessor/sparsed/request.cpp create mode 100644 ydb/core/formats/arrow/accessor/sparsed/request.h create mode 100644 ydb/core/formats/arrow/accessor/sparsed/ya.make create mode 100644 ydb/core/formats/arrow/accessor/ya.make delete mode 100644 ydb/core/formats/arrow/common/accessor.cpp delete mode 100644 ydb/core/formats/arrow/common/accessor.h create mode 100644 ydb/core/formats/arrow/common/vector_operations.h create mode 100644 ydb/core/formats/arrow/protos/accessor.proto create mode 100644 ydb/core/formats/arrow/save_load/loader.cpp create mode 100644 ydb/core/formats/arrow/save_load/loader.h rename ydb/core/{tx/columnshard/engines/scheme/abstract => formats/arrow/save_load}/saver.cpp (63%) rename ydb/core/{tx/columnshard/engines/scheme/abstract => formats/arrow/save_load}/saver.h (59%) create mode 100644 ydb/core/formats/arrow/save_load/ya.make create mode 100644 ydb/core/formats/arrow/splitter/scheme_info.cpp rename ydb/core/{tx/columnshard => formats/arrow}/splitter/scheme_info.h (64%) create mode 100644 ydb/core/formats/arrow/splitter/similar_packer.cpp rename ydb/core/{tx/columnshard => formats/arrow}/splitter/similar_packer.h (98%) rename ydb/core/{tx/columnshard => formats/arrow}/splitter/simple.cpp (78%) rename ydb/core/{tx/columnshard => formats/arrow}/splitter/simple.h (57%) rename ydb/core/{tx/columnshard => formats/arrow}/splitter/stats.cpp (95%) rename ydb/core/{tx/columnshard => formats/arrow}/splitter/stats.h (99%) create mode 100644 ydb/core/formats/arrow/splitter/ya.make rename ydb/core/formats/arrow/{common => validation}/validation.cpp (100%) create mode 100644 ydb/core/formats/arrow/validation/validation.h create mode 100644 ydb/core/formats/arrow/validation/ya.make create mode 100644 ydb/core/kqp/ut/olap/decimal_ut.cpp create mode 100644 ydb/core/kqp/ut/olap/sparsed_ut.cpp create mode 100644 ydb/core/kqp/ut/olap/tiering_ut.cpp delete mode 100644 ydb/core/tx/columnshard/columnshard_common.cpp delete mode 100644 ydb/core/tx/columnshard/columnshard_common.h delete mode 100644 ydb/core/tx/columnshard/common/tests/shard_reader.cpp delete mode 100644 ydb/core/tx/columnshard/common/tests/ya.make create mode 100644 ydb/core/tx/columnshard/counters/aggregation/table_stats.h create mode 100644 ydb/core/tx/columnshard/counters/aggregation/ya.make create mode 100644 ydb/core/tx/columnshard/counters/background_controller.cpp create mode 100644 ydb/core/tx/columnshard/counters/background_controller.h create mode 100644 ydb/core/tx/columnshard/counters/column_tables.cpp create mode 100644 ydb/core/tx/columnshard/counters/column_tables.h create mode 100644 ydb/core/tx/columnshard/counters/counters_manager.cpp create mode 100644 ydb/core/tx/columnshard/counters/counters_manager.h create mode 100644 ydb/core/tx/columnshard/counters/initialization.h rename ydb/core/tx/columnshard/{transactions/operators/ev_write.cpp => counters/req_tracer.cpp} (59%) create mode 100644 ydb/core/tx/columnshard/counters/req_tracer.h create mode 100644 ydb/core/tx/columnshard/counters/tablet_counters.h create mode 100644 ydb/core/tx/columnshard/counters/tx_progress.h create mode 100644 ydb/core/tx/columnshard/counters/writes_monitor.h create mode 100644 ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp create mode 100644 ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h create mode 100644 ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make create mode 100644 ydb/core/tx/columnshard/engines/defs.cpp rename ydb/core/tx/columnshard/{splitter/similar_packer.cpp => engines/insert_table/committed.cpp} (52%) create mode 100644 ydb/core/tx/columnshard/engines/insert_table/committed.h delete mode 100644 ydb/core/tx/columnshard/engines/insert_table/data.h create mode 100644 ydb/core/tx/columnshard/engines/insert_table/inserted.cpp create mode 100644 ydb/core/tx/columnshard/engines/insert_table/inserted.h rename ydb/core/tx/columnshard/engines/insert_table/{data.cpp => user_data.cpp} (55%) create mode 100644 ydb/core/tx/columnshard/engines/insert_table/user_data.h delete mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp delete mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/loader.h delete mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp delete mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/null_column.h create mode 100644 ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp create mode 100644 ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h create mode 100644 ydb/core/tx/columnshard/normalizer/insert_table/ya.make create mode 100644 ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp create mode 100644 ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h create mode 100644 ydb/core/tx/columnshard/operations/manager.cpp create mode 100644 ydb/core/tx/columnshard/operations/manager.h delete mode 100644 ydb/core/tx/columnshard/splitter/scheme_info.cpp create mode 100644 ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp create mode 100644 ydb/core/tx/columnshard/splitter/ut/batch_slice.h rename ydb/core/tx/columnshard/{common/tests => test_helper}/shard_reader.h (99%) create mode 100644 ydb/core/tx/columnshard/transactions/locks/abstract.cpp create mode 100644 ydb/core/tx/columnshard/transactions/locks/abstract.h create mode 100644 ydb/core/tx/columnshard/transactions/locks/dependencies.cpp create mode 100644 ydb/core/tx/columnshard/transactions/locks/dependencies.h create mode 100644 ydb/core/tx/columnshard/transactions/locks/interaction.cpp create mode 100644 ydb/core/tx/columnshard/transactions/locks/interaction.h create mode 100644 ydb/core/tx/columnshard/transactions/locks/read_finished.cpp create mode 100644 ydb/core/tx/columnshard/transactions/locks/read_finished.h create mode 100644 ydb/core/tx/columnshard/transactions/locks/read_start.cpp create mode 100644 ydb/core/tx/columnshard/transactions/locks/read_start.h create mode 100644 ydb/core/tx/columnshard/transactions/locks/write.cpp create mode 100644 ydb/core/tx/columnshard/transactions/locks/write.h create mode 100644 ydb/core/tx/columnshard/transactions/locks/ya.make delete mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write.h create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h create mode 100644 ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make delete mode 100644 ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp delete mode 100644 ydb/core/tx/columnshard/transactions/operators/ss_operation.h create mode 100644 ydb/core/tx/columnshard/transactions/protos/tx_event.proto create mode 100644 ydb/core/tx/columnshard/transactions/protos/ya.make create mode 100644 ydb/core/tx/limiter/grouped_memory/service/actor.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/service/actor.h create mode 100644 ydb/core/tx/limiter/grouped_memory/service/allocation.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/service/allocation.h create mode 100644 ydb/core/tx/limiter/grouped_memory/service/counters.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/service/counters.h create mode 100644 ydb/core/tx/limiter/grouped_memory/service/group.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/service/group.h create mode 100644 ydb/core/tx/limiter/grouped_memory/service/ids.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/service/ids.h create mode 100644 ydb/core/tx/limiter/grouped_memory/service/manager.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/service/manager.h create mode 100644 ydb/core/tx/limiter/grouped_memory/service/process.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/service/process.h create mode 100644 ydb/core/tx/limiter/grouped_memory/service/ya.make create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/abstract.h create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/config.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/config.h create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/events.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/events.h create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/service.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/service.h create mode 100644 ydb/core/tx/limiter/grouped_memory/usage/ya.make create mode 100644 ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp create mode 100644 ydb/core/tx/limiter/grouped_memory/ut/ya.make create mode 100644 ydb/core/tx/limiter/grouped_memory/ya.make create mode 100644 ydb/core/tx/limiter/ya.make create mode 100644 ydb/core/tx/schemeshard/olap/operations/checks.h create mode 100644 ydb/core/tx/tx_proxy/global.cpp create mode 100644 ydb/core/tx/tx_proxy/global.h create mode 100644 ydb/library/arrow_clickhouse/AggregateFunctions/AggregateFunctionNumRows.h create mode 100644 ydb/library/arrow_kernels/func_num_rows.h create mode 100644 ydb/tests/olap/scenario/helpers/tiering_helper.py create mode 100644 ydb/tests/olap/scenario/test_alter_tiering.py create mode 100644 ydb/tools/olap_workload/__main__.py create mode 100644 ydb/tools/olap_workload/ya.make diff --git a/.github/config/muted_ya.txt b/.github/config/muted_ya.txt index aa54aa1413fc..bcf3b1b6e481 100644 --- a/.github/config/muted_ya.txt +++ b/.github/config/muted_ya.txt @@ -11,8 +11,6 @@ ydb/core/kqp/provider/ut KikimrIcGateway.TestLoadBasicSecretValueFromExternalDat ydb/core/kqp/ut/join KqpJoinOrder.Chain65Nodes ydb/core/kqp/ut/olap KqpOlapBlobsSharing.* ydb/core/kqp/ut/olap KqpOlapStatistics.StatsUsageWithTTL -ydb/core/kqp/ut/olap KqpOlapAggregations.Aggregation_ResultCountAll_FilterL -ydb/core/kqp/ut/olap KqpOlapWrite.WriteDeleteCleanGC ydb/core/kqp/ut/pg KqpPg.CreateIndex ydb/core/kqp/ut/tx KqpLocksTricky.TestNoLocksIssueInteractiveTx+withSink ydb/core/kqp/ut/tx KqpLocksTricky.TestNoLocksIssue+withSink @@ -33,10 +31,6 @@ ydb/core/kqp/ut/federated_query/s3 KqpFederatedQuery.CreateTableAsSelectFromExte ydb/core/kqp/ut/federated_query/s3 KqpFederatedQuery.CreateTableAsSelectFromExternalTable ydb/core/kqp/ut/scan KqpRequestContext.TraceIdInErrorMessage ydb/core/kqp/ut/scheme KqpOlapScheme.TenThousandColumns -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_GenericQuerys -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_StreamGenericQuery -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_UsesGenericQueryOnJoinWithDataShardTable -ydb/core/kqp/ut/scheme KqpOlapScheme.DropTable ydb/core/kqp/ut/scheme KqpScheme.AlterAsyncReplication ydb/core/kqp/ut/scheme KqpScheme.QueryWithAlter ydb/core/kqp/ut/scheme [14/50]* @@ -122,4 +116,6 @@ ydb/tests/functional/tenants test_storage_config.py.TestStorageConfig.* ydb/tests/functional/tenants test_tenants.py.* ydb/tests/functional/ydb_cli test_ydb_impex.py.TestImpex.test_big_dataset* ydb/tests/tools/pq_read/test test_timeout.py.TestTimeout.test_timeout +ydb/core/kqp/ut/olap KqpDecimalColumnShard.TestFilterCompare +ydb/core/kqp/ut/olap KqpDecimalColumnShard.TestAggregation ydb/tests/functional/rename [test_rename.py */10] chunk chunk diff --git a/ydb/core/base/events.h b/ydb/core/base/events.h index 14eab5de98e9..704f503215e6 100644 --- a/ydb/core/base/events.h +++ b/ydb/core/base/events.h @@ -179,6 +179,8 @@ struct TKikimrEvents : TEvents { ES_TX_BACKGROUND = 4256, ES_SS_BG_TASKS = 4257, ES_LIMITER = 4258, + //ES_MEMORY = 4259, NB. exists in main + ES_GROUPED_ALLOCATIONS_MANAGER = 4260, }; }; diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index 4863864e8a67..c9ceb1ef12be 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -186,6 +186,9 @@ #include #include +#include +#include + #include #include @@ -2179,6 +2182,26 @@ void TKqpServiceInitializer::InitializeServices(NActors::TActorSystemSetup* setu } } +TGroupedMemoryLimiterInitializer::TGroupedMemoryLimiterInitializer(const TKikimrRunConfig& runConfig) + : IKikimrServicesInitializer(runConfig) { +} + +void TGroupedMemoryLimiterInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { + NOlap::NGroupedMemoryManager::TConfig serviceConfig; + Y_ABORT_UNLESS(serviceConfig.DeserializeFromProto(Config.GetGroupedMemoryLimiterConfig())); + + if (serviceConfig.IsEnabled()) { + TIntrusivePtr<::NMonitoring::TDynamicCounters> tabletGroup = GetServiceCounters(appData->Counters, "tablets"); + TIntrusivePtr<::NMonitoring::TDynamicCounters> countersGroup = tabletGroup->GetSubgroup("type", "TX_GROUPED_MEMORY_LIMITER"); + + auto service = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(serviceConfig, countersGroup); + + setup->LocalServices.push_back(std::make_pair( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::MakeServiceId(NodeId), + TActorSetupCmd(service, TMailboxType::HTSwap, appData->UserPoolId))); + } +} + TCompDiskLimiterInitializer::TCompDiskLimiterInitializer(const TKikimrRunConfig& runConfig) : IKikimrServicesInitializer(runConfig) { } diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.h b/ydb/core/driver_lib/run/kikimr_services_initializers.h index cd2060c52aec..87d692434081 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.h +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.h @@ -397,6 +397,12 @@ class TCompDiskLimiterInitializer: public IKikimrServicesInitializer { void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; }; +class TGroupedMemoryLimiterInitializer: public IKikimrServicesInitializer { +public: + TGroupedMemoryLimiterInitializer(const TKikimrRunConfig& runConfig); + void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; +}; + class TCompConveyorInitializer: public IKikimrServicesInitializer { public: TCompConveyorInitializer(const TKikimrRunConfig& runConfig); diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index 5e9c23d634e5..bc70d1e83bd4 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -1554,6 +1554,10 @@ TIntrusivePtr TKikimrRunner::CreateServiceInitializers sil->AddServiceInitializer(new TCompDiskLimiterInitializer(runConfig)); } + if (serviceMask.EnableGroupedMemoryLimiter) { + sil->AddServiceInitializer(new TGroupedMemoryLimiterInitializer(runConfig)); + } + if (serviceMask.EnableScanConveyor) { sil->AddServiceInitializer(new TScanConveyorInitializer(runConfig)); } diff --git a/ydb/core/driver_lib/run/service_mask.h b/ydb/core/driver_lib/run/service_mask.h index b99356bd3cff..3b694ce5ac84 100644 --- a/ydb/core/driver_lib/run/service_mask.h +++ b/ydb/core/driver_lib/run/service_mask.h @@ -78,6 +78,7 @@ union TBasicKikimrServicesMask { bool EnableDatabaseMetadataCache:1; bool EnableGraphService:1; bool EnableCompDiskLimiter:1; + bool EnableGroupedMemoryLimiter:1; }; struct { diff --git a/ydb/core/driver_lib/run/ya.make b/ydb/core/driver_lib/run/ya.make index 0f2a0e70dd7f..987bf4772020 100644 --- a/ydb/core/driver_lib/run/ya.make +++ b/ydb/core/driver_lib/run/ya.make @@ -111,6 +111,7 @@ PEERDIR( ydb/core/tx/coordinator ydb/core/tx/conveyor/service ydb/core/tx/limiter/service + ydb/core/tx/limiter/grouped_memory/usage ydb/core/tx/datashard ydb/core/tx/long_tx_service ydb/core/tx/long_tx_service/public diff --git a/ydb/core/formats/arrow/accessor/abstract/accessor.cpp b/ydb/core/formats/arrow/accessor/abstract/accessor.cpp new file mode 100644 index 000000000000..566bbb4e7ad2 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/accessor.cpp @@ -0,0 +1,163 @@ +#include "accessor.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +void IChunkedArray::TReader::AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const { + auto address = GetReadChunk(position); + AFL_VERIFY(NArrow::Append(builder, *address.GetArray(), address.GetPosition(), recordSize)); +} + +std::shared_ptr IChunkedArray::TReader::CopyRecord(const ui64 recordIndex) const { + auto address = GetReadChunk(recordIndex); + return NArrow::CopyRecords(address.GetArray(), { address.GetPosition() }); +} + +std::shared_ptr IChunkedArray::Slice(const ui32 offset, const ui32 count) const { + AFL_VERIFY(offset + count <= (ui64)GetRecordsCount())("offset", offset)("count", count)("length", GetRecordsCount()); + ui32 currentOffset = offset; + ui32 countLeast = count; + std::vector> chunks; + auto address = GetChunkSlow(offset); + while (countLeast) { + address = GetChunk(address.GetAddress(), currentOffset); + const ui64 internalPos = address.GetAddress().GetLocalIndex(currentOffset); + if (internalPos + countLeast <= (ui64)address.GetArray()->length()) { + chunks.emplace_back(address.GetArray()->Slice(internalPos, countLeast)); + break; + } else { + const ui32 deltaCount = address.GetArray()->length() - internalPos; + chunks.emplace_back(address.GetArray()->Slice(internalPos, deltaCount)); + AFL_VERIFY(countLeast >= deltaCount); + countLeast -= deltaCount; + currentOffset += deltaCount; + } + } + return std::make_shared(chunks, DataType); +} + +NKikimr::NArrow::NAccessor::IChunkedArray::TFullDataAddress IChunkedArray::GetChunk( + const std::optional& chunkCurrent, const ui64 position) const { + AFL_VERIFY(position < GetRecordsCount()); + std::optional address; + + if (IsDataOwner()) { + if (chunkCurrent) { + AFL_VERIFY(chunkCurrent->GetSize() == 1)("size", chunkCurrent->GetSize()); + } + auto localAddress = GetLocalData(address, position); + TAddressChain addressChain; + addressChain.Add(localAddress.GetAddress()); + AFL_VERIFY(addressChain.Contains(position)); + return TFullDataAddress(localAddress.GetArray(), std::move(addressChain)); + } else { + auto chunkedArrayAddress = GetArray(chunkCurrent, position, nullptr); + if (chunkCurrent) { + AFL_VERIFY(chunkCurrent->GetSize() == 1 + chunkedArrayAddress.GetAddress().GetSize())("current", chunkCurrent->GetSize())( + "chunked", chunkedArrayAddress.GetAddress().GetSize()); + } + auto localAddress = chunkedArrayAddress.GetArray()->GetLocalData(address, chunkedArrayAddress.GetAddress().GetLocalIndex(position)); + auto fullAddress = std::move(chunkedArrayAddress.MutableAddress()); + fullAddress.Add(localAddress.GetAddress()); + AFL_VERIFY(fullAddress.Contains(position)); + return TFullDataAddress(localAddress.GetArray(), std::move(fullAddress)); + } +} + +IChunkedArray::TFullChunkedArrayAddress IChunkedArray::GetArray( + const std::optional& chunkCurrent, const ui64 position, const std::shared_ptr& selfPtr) const { + AFL_VERIFY(position < GetRecordsCount()); + if (IsDataOwner()) { + AFL_VERIFY(selfPtr); + TAddressChain chain; + chain.Add(TCommonChunkAddress(0, GetRecordsCount(), 0)); + return IChunkedArray::TFullChunkedArrayAddress(selfPtr, std::move(chain)); + } + TAddressChain addressChain; + + auto* currentLevel = this; + ui32 currentPosition = position; + ui32 idx = 0; + std::vector> chainForTemporarySave; + while (!currentLevel->IsDataOwner()) { + std::optional currentAddress; + if (chunkCurrent) { + currentAddress = chunkCurrent->GetAddress(idx); + } + auto nextChunkedArray = currentLevel->GetLocalChunkedArray(currentAddress, currentPosition); + chainForTemporarySave.emplace_back(nextChunkedArray.GetArray()); + currentLevel = chainForTemporarySave.back().get(); + addressChain.Add(nextChunkedArray.GetAddress()); + AFL_VERIFY(nextChunkedArray.GetAddress().GetStartPosition() <= currentPosition); + currentPosition -= nextChunkedArray.GetAddress().GetStartPosition(); + ++idx; + } + AFL_VERIFY(!chunkCurrent || chunkCurrent->GetSize() - idx <= 1)("idx", idx)("size", chunkCurrent->GetSize()); + return TFullChunkedArrayAddress(chainForTemporarySave.back(), std::move(addressChain)); +} + +TString IChunkedArray::TReader::DebugString(const ui32 position) const { + auto address = GetReadChunk(position); + return NArrow::DebugString(address.GetArray(), address.GetPosition()); +} + +std::partial_ordering IChunkedArray::TReader::CompareColumns( + const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition) { + AFL_VERIFY(l.size() == r.size()); + for (ui32 i = 0; i < l.size(); ++i) { + const TAddress lAddress = l[i].GetReadChunk(lPosition); + const TAddress rAddress = r[i].GetReadChunk(rPosition); + auto cmp = lAddress.Compare(rAddress); + if (std::is_neq(cmp)) { + return cmp; + } + } + return std::partial_ordering::equivalent; +} + +IChunkedArray::TAddress IChunkedArray::TReader::GetReadChunk(const ui64 position) const { + AFL_VERIFY(position < ChunkedArray->GetRecordsCount()); + if (CurrentChunkAddress && CurrentChunkAddress->GetAddress().Contains(position)) { + } else { + CurrentChunkAddress = ChunkedArray->GetChunk(CurrentChunkAddress, position); + } + return IChunkedArray::TAddress(CurrentChunkAddress->GetArray(), CurrentChunkAddress->GetAddress().GetLocalIndex(position)); +} + +const std::partial_ordering IChunkedArray::TAddress::Compare(const TAddress& item) const { + return TComparator::TypedCompare(*Array, Position, *item.Array, item.Position); +} + +TChunkedArraySerialized::TChunkedArraySerialized(const std::shared_ptr& array, const TString& serializedData) + : Array(array) + , SerializedData(serializedData) { + AFL_VERIFY(serializedData); + AFL_VERIFY(Array); + AFL_VERIFY(Array->GetRecordsCount()); +} + +std::partial_ordering IChunkedArray::TFullDataAddress::Compare( + const ui64 position, const TFullDataAddress& item, const ui64 itemPosition) const { + AFL_VERIFY(Address.Contains(position))("pos", position)("start", Address.DebugString()); + AFL_VERIFY(item.Address.Contains(itemPosition))("pos", itemPosition)("start", item.Address.DebugString()); + return TComparator::TypedCompare(*Array, Address.GetLocalIndex(position), *item.Array, item.Address.GetLocalIndex(itemPosition)); +} + +std::shared_ptr IChunkedArray::TFullDataAddress::CopyRecord(const ui64 recordIndex) const { + return NArrow::CopyRecords(Array, { Address.GetLocalIndex(recordIndex) }); +} + +TString IChunkedArray::TFullDataAddress::DebugString(const ui64 position) const { + return NArrow::DebugString(Array, Address.GetLocalIndex(position)); +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/abstract/accessor.h b/ydb/core/formats/arrow/accessor/abstract/accessor.h new file mode 100644 index 000000000000..c8e7979618dd --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/accessor.h @@ -0,0 +1,396 @@ +#pragma once +#include +#include + +#include +#include +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TColumnSaver; +class IChunkedArray; + +class TChunkedArraySerialized { +private: + YDB_READONLY_DEF(std::shared_ptr, Array); + YDB_READONLY_DEF(TString, SerializedData); + +public: + TChunkedArraySerialized(const std::shared_ptr& array, const TString& serializedData); +}; + +class IChunkedArray { +public: + enum class EType { + Undefined, + Array, + ChunkedArray, + SerializedChunkedArray, + SparsedArray + }; + + class TCommonChunkAddress { + private: + YDB_READONLY(ui64, StartPosition, 0); + YDB_READONLY(ui64, FinishPosition, 0); + YDB_READONLY(ui64, ChunkIndex, 0); + + public: + TString DebugString() const { + return TStringBuilder() << "start=" << StartPosition << ";" + << "chunk_index=" << ChunkIndex << ";" + << "finish=" << FinishPosition << ";" + << "size=" << FinishPosition - StartPosition << ";"; + } + + ui64 GetLength() const { + return FinishPosition - StartPosition; + } + + bool Contains(const ui64 position) const { + return position >= StartPosition && position < FinishPosition; + } + + TCommonChunkAddress(const ui64 start, const ui64 finish, const ui64 index) + : StartPosition(start) + , FinishPosition(finish) + , ChunkIndex(index) { + AFL_VERIFY(FinishPosition > StartPosition); + } + }; + + class TAddressChain { + private: + YDB_READONLY_DEF(std::deque, Addresses); + YDB_READONLY(ui32, GlobalStartPosition, 0); + YDB_READONLY(ui32, GlobalFinishPosition, 0); + + public: + TAddressChain() = default; + + ui32 GetSize() const { + return Addresses.size(); + } + + ui32 GetLocalIndex(const ui32 position) const { + AFL_VERIFY(Contains(position))("pos", position)("start", GlobalStartPosition); + return position - GlobalStartPosition; + } + + bool Contains(const ui32 position) const { + return GlobalStartPosition <= position && position < GlobalFinishPosition; + } + + const TCommonChunkAddress& GetAddress(const ui32 index) const { + AFL_VERIFY(index < Addresses.size()); + return Addresses[index]; + } + + void Add(const TCommonChunkAddress& address) { + if (Addresses.size()) { + AFL_VERIFY(address.GetFinishPosition() <= Addresses.back().GetLength()); + } + Addresses.emplace_back(address); + GlobalStartPosition += address.GetStartPosition(); + GlobalFinishPosition = GlobalStartPosition + address.GetLength(); + } + + const TCommonChunkAddress& GetLastAddress() const { + AFL_VERIFY(Addresses.size()); + return Addresses.back(); + } + + TString DebugString() const { + return TStringBuilder() << "start=" << GlobalStartPosition << ";finish=" << GlobalFinishPosition + << ";addresses_count=" << Addresses.size() << ";"; + } + }; + + class TFullChunkedArrayAddress { + private: + YDB_READONLY_DEF(std::shared_ptr, Array); + YDB_ACCESSOR_DEF(TAddressChain, Address); + + public: + TFullChunkedArrayAddress(const std::shared_ptr& arr, TAddressChain&& address) + : Array(arr) + , Address(std::move(address)) + { + AFL_VERIFY(Address.GetSize()); + AFL_VERIFY(Array); + AFL_VERIFY(Array->GetRecordsCount()); + } + }; + + class TLocalChunkedArrayAddress { + private: + YDB_READONLY_DEF(std::shared_ptr, Array); + TCommonChunkAddress Address; + + public: + const TCommonChunkAddress& GetAddress() const { + return Address; + } + + TLocalChunkedArrayAddress(const std::shared_ptr& arr, const TCommonChunkAddress& address) + : Array(arr) + , Address(address) { + AFL_VERIFY(arr); + AFL_VERIFY(address.GetLength() == (ui32)arr->GetRecordsCount()); + } + + TLocalChunkedArrayAddress(const std::shared_ptr& arr, const ui32 start, const ui32 chunkIdx) + : Array(arr) + , Address(TCommonChunkAddress(start, start + TValidator::CheckNotNull(arr)->GetRecordsCount(), chunkIdx)) { + } + }; + + class TFullDataAddress { + private: + YDB_READONLY_DEF(std::shared_ptr, Array); + YDB_ACCESSOR_DEF(TAddressChain, Address); + + public: + TString DebugString(const ui64 position) const; + + std::shared_ptr CopyRecord(const ui64 recordIndex) const; + + std::partial_ordering Compare(const ui64 position, const TFullDataAddress& item, const ui64 itemPosition) const; + + TFullDataAddress(const std::shared_ptr& arr, TAddressChain&& address) + : Array(arr) + , Address(std::move(address)) + { + AFL_VERIFY(Array); + AFL_VERIFY(Address.GetSize()); + } + }; + + class TLocalDataAddress { + private: + YDB_READONLY_DEF(std::shared_ptr, Array); + TCommonChunkAddress Address; + + public: + const TCommonChunkAddress& GetAddress() const { + return Address; + } + + TLocalDataAddress(const std::shared_ptr& arr, const ui32 start, const ui32 chunkIdx) + : Array(arr) + , Address(start, start + TValidator::CheckNotNull(arr)->length(), chunkIdx) + { + } + + TLocalDataAddress(const std::shared_ptr& arr, const TCommonChunkAddress& address) + : Array(arr) + , Address(address) { + AFL_VERIFY(address.GetLength() == (ui32)arr->length()); + } + }; + + class TAddress { + private: + YDB_READONLY_DEF(std::shared_ptr, Array); + YDB_READONLY(ui64, Position, 0); + + public: + bool NextPosition() { + if (Position + 1 < (ui32)Array->length()) { + ++Position; + return true; + } + return false; + } + + TAddress(const std::shared_ptr& arr, const ui64 position) + : Array(arr) + , Position(position) + { + AFL_VERIFY(!!Array); + AFL_VERIFY(position < (ui32)Array->length()); + } + + const std::partial_ordering Compare(const TAddress& item) const; + }; + +private: + YDB_READONLY_DEF(std::shared_ptr, DataType); + YDB_READONLY(ui64, RecordsCount, 0); + YDB_READONLY(EType, Type, EType::Undefined); + virtual std::optional DoGetRawSize() const = 0; + virtual std::shared_ptr DoGetScalar(const ui32 index) const = 0; + + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray(const std::optional& chunkCurrent, const ui64 position) const = 0; + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const = 0; + +protected: + virtual std::shared_ptr DoGetChunkedArray() const = 0; + TLocalChunkedArrayAddress GetLocalChunkedArray(const std::optional& chunkCurrent, const ui64 position) const { + return DoGetLocalChunkedArray(chunkCurrent, position); + } + TLocalDataAddress GetLocalData(const std::optional& chunkCurrent, const ui64 position) const { + return DoGetLocalData(chunkCurrent, position); + } + virtual std::shared_ptr DoGetMaxScalar() const = 0; + virtual std::vector DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) = 0; + + template + void SelectChunk(const std::optional& chunkCurrent, const ui64 position, const TChunkAccessor& accessor) const { + if (!chunkCurrent || chunkCurrent->GetStartPosition() <= position) { + ui32 startIndex = 0; + ui64 idx = 0; + if (chunkCurrent) { + if (position < chunkCurrent->GetFinishPosition()) { + return accessor.OnArray( + chunkCurrent->GetChunkIndex(), chunkCurrent->GetStartPosition()); + } + AFL_VERIFY(chunkCurrent->GetChunkIndex() < accessor.GetChunksCount()); + startIndex = chunkCurrent->GetChunkIndex(); + idx = chunkCurrent->GetStartPosition(); + } + for (ui32 i = startIndex; i < accessor.GetChunksCount(); ++i) { + const ui64 nextIdx = idx + accessor.GetChunkLength(i); + if (idx <= position && position < nextIdx) { + return accessor.OnArray(i, idx); + } + idx = nextIdx; + } + } else { + AFL_VERIFY(chunkCurrent->GetChunkIndex() > 0); + ui64 idx = chunkCurrent->GetStartPosition(); + for (i32 i = chunkCurrent->GetChunkIndex() - 1; i >= 0; --i) { + AFL_VERIFY(idx >= accessor.GetChunkLength(i))("idx", idx)("length", accessor.GetChunkLength(i)); + const ui64 nextIdx = idx - accessor.GetChunkLength(i); + if (nextIdx <= position && position < idx) { + return accessor.OnArray(i, nextIdx); + } + idx = nextIdx; + } + } + TStringBuilder sb; + ui64 recordsCountChunks = 0; + for (ui32 i = 0; i < accessor.GetChunksCount(); ++i) { + sb << accessor.GetChunkLength(i) << ","; + recordsCountChunks += accessor.GetChunkLength(i); + } + TStringBuilder chunkCurrentInfo; + if (chunkCurrent) { + chunkCurrentInfo << chunkCurrent->DebugString(); + } + AFL_VERIFY(recordsCountChunks == GetRecordsCount())("pos", position)("count", GetRecordsCount())("chunks_map", sb)( + "chunk_current", chunkCurrentInfo); + AFL_VERIFY(false)("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); + } + +public: + class TReader { + private: + std::shared_ptr ChunkedArray; + mutable std::optional CurrentChunkAddress; + + public: + TReader(const std::shared_ptr& data) + : ChunkedArray(data) { + AFL_VERIFY(ChunkedArray); + } + + ui64 GetRecordsCount() const { + return ChunkedArray->GetRecordsCount(); + } + + TAddress GetReadChunk(const ui64 position) const; + static std::partial_ordering CompareColumns( + const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition); + void AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const; + std::shared_ptr CopyRecord(const ui64 recordIndex) const; + TString DebugString(const ui32 position) const; + }; + + std::shared_ptr GetScalar(const ui32 index) const { + AFL_VERIFY(index < GetRecordsCount()); + return DoGetScalar(index); + } + + std::vector SplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) { + return DoSplitBySizes(saver, fullSerializedData, splitSizes); + } + + std::shared_ptr GetMaxScalar() const { + AFL_VERIFY(GetRecordsCount()); + return DoGetMaxScalar(); + } + + std::optional GetRawSize() const { + return DoGetRawSize(); + } + + ui64 GetRawSizeVerified() const { + auto result = GetRawSize(); + AFL_VERIFY(result); + return *result; + } + + std::shared_ptr GetChunkedArray() const { + return DoGetChunkedArray(); + } + virtual ~IChunkedArray() = default; + + std::shared_ptr Slice(const ui32 offset, const ui32 count) const; + + bool IsDataOwner() const { + switch (Type) { + case EType::SparsedArray: + case EType::ChunkedArray: + case EType::Array: + return true; + case EType::Undefined: + AFL_VERIFY(false); + case EType::SerializedChunkedArray: + return false; + }; + } + + TFullChunkedArrayAddress GetArray( + const std::optional& chunkCurrent, const ui64 position, const std::shared_ptr& selfPtr) const; + + TFullDataAddress GetChunk(const std::optional& chunkCurrent, const ui64 position) const { + if (chunkCurrent) { + return GetChunk(chunkCurrent->GetAddress(), position); + } else { + return GetChunk(std::optional(), position); + } + } + + TFullDataAddress GetChunkSlow(const ui64 position) const { + return GetChunk(std::optional(), position); + } + + TFullChunkedArrayAddress GetArray( + const std::optional& chunkCurrent, const ui64 position, const std::shared_ptr& selfPtr) const { + if (chunkCurrent) { + return GetArray(chunkCurrent->GetAddress(), position, selfPtr); + } else { + return GetArray(std::optional(), position, selfPtr); + } + } + + TFullChunkedArrayAddress GetArraySlow(const ui64 position, const std::shared_ptr& selfPtr) const { + return GetArray(std::optional(), position, selfPtr); + } + + TFullDataAddress GetChunk(const std::optional& chunkCurrent, const ui64 position) const; + + IChunkedArray(const ui64 recordsCount, const EType type, const std::shared_ptr& dataType) + : DataType(dataType) + , RecordsCount(recordsCount) + , Type(type) { + } +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/abstract/constructor.cpp b/ydb/core/formats/arrow/accessor/abstract/constructor.cpp new file mode 100644 index 000000000000..51c2e86cadf7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/constructor.cpp @@ -0,0 +1,11 @@ +#include "constructor.h" +#include + +namespace NKikimr::NArrow::NAccessor { + +TConstructorContainer TConstructorContainer::GetDefaultConstructor() { + static std::shared_ptr result = std::make_shared(); + return result; +} + +} diff --git a/ydb/core/formats/arrow/accessor/abstract/constructor.h b/ydb/core/formats/arrow/accessor/abstract/constructor.h new file mode 100644 index 000000000000..7f9883402c25 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/constructor.h @@ -0,0 +1,76 @@ +#pragma once +#include "accessor.h" + +#include +#include + +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class IConstructor { +public: + using TFactory = NObjectFactory::TObjectFactory; + using TProto = NKikimrArrowAccessorProto::TConstructor; + +private: + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const = 0; + virtual TConclusion> DoConstructDefault( + const TChunkConstructionData& externalInfo) const = 0; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const = 0; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) = 0; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const = 0; + virtual TString DoDebugString() const { + return ""; + } + +public: + virtual ~IConstructor() = default; + + TString DebugString() const { + return TStringBuilder() << GetClassName() << ":" << DoDebugString(); + } + + TConclusion> Construct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const { + return DoConstruct(originalData, externalInfo); + } + + TConclusion> ConstructDefault(const TChunkConstructionData& externalInfo) const { + return DoConstructDefault(externalInfo); + } + + bool DeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) { + return DoDeserializeFromProto(proto); + } + + NKikimrArrowAccessorProto::TConstructor SerializeToProto() const { + return DoSerializeToProto(); + } + + void SerializeToProto(NKikimrArrowAccessorProto::TConstructor& proto) const { + proto = DoSerializeToProto(); + } + + std::shared_ptr GetExpectedSchema(const std::shared_ptr& resultColumn) const { + AFL_VERIFY(resultColumn); + return DoGetExpectedSchema(resultColumn); + } + + virtual TString GetClassName() const = 0; +}; + +class TConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + +public: + using TBase::TBase; + + static TConstructorContainer GetDefaultConstructor(); +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/abstract/request.cpp b/ydb/core/formats/arrow/accessor/abstract/request.cpp new file mode 100644 index 000000000000..08bc3ee5c87d --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/request.cpp @@ -0,0 +1,16 @@ +#include "request.h" + +namespace NKikimr::NArrow::NAccessor { + +TConclusionStatus TRequestedConstructorContainer::DeserializeFromRequest(NYql::TFeaturesExtractor& features) { + const std::optional className = features.Extract("DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME"); + if (!className) { + return TConclusionStatus::Success(); + } + if (!TBase::Initialize(*className)) { + return TConclusionStatus::Fail("don't know anything about class_name=" + *className); + } + return TBase::GetObjectPtr()->DeserializeFromRequest(features); +} + +} diff --git a/ydb/core/formats/arrow/accessor/abstract/request.h b/ydb/core/formats/arrow/accessor/abstract/request.h new file mode 100644 index 000000000000..c13105fe8e21 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/request.h @@ -0,0 +1,58 @@ +#pragma once +#include "constructor.h" + +#include + +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class IRequestedConstructor { +public: + using TFactory = NObjectFactory::TObjectFactory; + using TProto = NKikimrArrowAccessorProto::TRequestedConstructor; +private: + virtual TConclusion DoBuildConstructor() const = 0; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const = 0; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) = 0; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& features) = 0; + +public: + virtual ~IRequestedConstructor() = default; + + NKikimrArrowAccessorProto::TRequestedConstructor SerializeToProto() const { + return DoSerializeToProto(); + } + + void SerializeToProto(NKikimrArrowAccessorProto::TRequestedConstructor& proto) const { + proto = DoSerializeToProto(); + } + + bool DeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) { + return DoDeserializeFromProto(proto); + } + + TConclusionStatus DeserializeFromRequest(NYql::TFeaturesExtractor& features) { + return DoDeserializeFromRequest(features); + } + + TConclusion BuildConstructor() const { + return DoBuildConstructor(); + } + + virtual TString GetClassName() const = 0; +}; + +class TRequestedConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + +public: + using TBase::TBase; + TConclusionStatus DeserializeFromRequest(NYql::TFeaturesExtractor& features); +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/abstract/ya.make b/ydb/core/formats/arrow/accessor/abstract/ya.make new file mode 100644 index 000000000000..fd68f1eeb3bf --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/protos + ydb/core/formats/arrow/accessor/common + contrib/libs/apache/arrow + ydb/library/conclusion + ydb/services/metadata/abstract +) + +SRCS( + accessor.cpp + constructor.cpp + request.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/common/chunk_data.cpp b/ydb/core/formats/arrow/accessor/common/chunk_data.cpp new file mode 100644 index 000000000000..da03037ef321 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/common/chunk_data.cpp @@ -0,0 +1,5 @@ +#include "chunk_data.h" + +namespace NKikimr::NArrow::NAccessor { + +} diff --git a/ydb/core/formats/arrow/accessor/common/chunk_data.h b/ydb/core/formats/arrow/accessor/common/chunk_data.h new file mode 100644 index 000000000000..d10d27abb85b --- /dev/null +++ b/ydb/core/formats/arrow/accessor/common/chunk_data.h @@ -0,0 +1,23 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TChunkConstructionData { +private: + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY_DEF(std::shared_ptr, DefaultValue); + YDB_READONLY_DEF(std::shared_ptr, ColumnType); + +public: + TChunkConstructionData( + const ui32 recordsCount, const std::shared_ptr& defaultValue, const std::shared_ptr& columnType) + : RecordsCount(recordsCount) + , DefaultValue(defaultValue) + , ColumnType(columnType) { + } +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/common/const.cpp b/ydb/core/formats/arrow/accessor/common/const.cpp new file mode 100644 index 000000000000..926a9ca94deb --- /dev/null +++ b/ydb/core/formats/arrow/accessor/common/const.cpp @@ -0,0 +1,5 @@ +#include "const.h" + +namespace NKikimr::NArrow::NAccessor { + +} diff --git a/ydb/core/formats/arrow/accessor/common/const.h b/ydb/core/formats/arrow/accessor/common/const.h new file mode 100644 index 000000000000..192332854478 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/common/const.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace NKikimr::NArrow::NAccessor { + +class TGlobalConst { +public: + static const inline TString SparsedDataAccessorName = "SPARSED"; + static const inline TString PlainDataAccessorName = "PLAIN"; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/common/ya.make b/ydb/core/formats/arrow/accessor/common/ya.make new file mode 100644 index 000000000000..28ef714226aa --- /dev/null +++ b/ydb/core/formats/arrow/accessor/common/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow +) + +SRCS( + chunk_data.cpp + const.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/composite/accessor.cpp b/ydb/core/formats/arrow/accessor/composite/accessor.cpp new file mode 100644 index 000000000000..5660aaccc922 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite/accessor.cpp @@ -0,0 +1,57 @@ +#include "accessor.h" +namespace NKikimr::NArrow::NAccessor { + +namespace { +class TCompositeChunkAccessor { +private: + const std::vector>& Chunks; + std::optional* ResultArrayAddress = nullptr; + +public: + TCompositeChunkAccessor(const std::vector>& chunks, + std::optional& result) + : Chunks(chunks) + , ResultArrayAddress(&result) { + } + ui64 GetChunksCount() const { + return Chunks.size(); + } + ui64 GetChunkLength(const ui32 idx) const { + return Chunks[idx]->GetRecordsCount(); + } + void OnArray(const ui32 chunkIdx, const ui32 startPosition) const { + if (ResultArrayAddress) { + *ResultArrayAddress = NArrow::NAccessor::IChunkedArray::TLocalChunkedArrayAddress(Chunks[chunkIdx], startPosition, chunkIdx); + } + } +}; +} // namespace + +IChunkedArray::TLocalDataAddress TCompositeChunkedArray::DoGetLocalData( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const { + AFL_VERIFY(false); + return IChunkedArray::TLocalDataAddress(nullptr, 0, 0); +} + +IChunkedArray::TLocalChunkedArrayAddress TCompositeChunkedArray::DoGetLocalChunkedArray( + const std::optional& chunkCurrent, const ui64 position) const { + std::optional result; + TCompositeChunkAccessor accessor(Chunks, result); + SelectChunk(chunkCurrent, position, accessor); + AFL_VERIFY(result); + return *result; +} + +std::shared_ptr TCompositeChunkedArray::DoGetChunkedArray() const { + std::vector> chunks; + for (auto&& i : Chunks) { + auto arr = i->GetChunkedArray(); + AFL_VERIFY(arr->num_chunks()); + for (auto&& chunk : arr->chunks()) { + chunks.emplace_back(chunk); + } + } + return std::make_shared(chunks); +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/composite/accessor.h b/ydb/core/formats/arrow/accessor/composite/accessor.h new file mode 100644 index 000000000000..9b253b265e84 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite/accessor.h @@ -0,0 +1,69 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TCompositeChunkedArray: public NArrow::NAccessor::IChunkedArray { +private: + using TBase = NArrow::NAccessor::IChunkedArray; + +private: + std::vector> Chunks; + +protected: + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& chunkCurrent, const ui64 position) const override; + + virtual std::vector DoSplitBySizes( + const TColumnSaver& /*saver*/, const TString& /*fullSerializedData*/, const std::vector& /*splitSizes*/) override { + AFL_VERIFY(false); + return {}; + } + + virtual std::shared_ptr DoGetScalar(const ui32 /*index*/) const override { + AFL_VERIFY(false)("problem", "cannot use method"); + return nullptr; + } + virtual std::optional DoGetRawSize() const override { + return {}; + } + virtual std::shared_ptr DoGetMaxScalar() const override { + AFL_VERIFY(false); + return nullptr; + } + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override; + virtual std::shared_ptr DoGetChunkedArray() const override; + + TCompositeChunkedArray(std::vector>&& chunks, const ui32 recordsCount, + const std::shared_ptr& type) + : TBase(recordsCount, NArrow::NAccessor::IChunkedArray::EType::SerializedChunkedArray, type) + , Chunks(std::move(chunks)) { + } + +public: + class TBuilder { + private: + ui32 RecordsCount = 0; + std::vector> Chunks; + const std::shared_ptr Type; + + public: + TBuilder(const std::shared_ptr& type) + : Type(type) { + AFL_VERIFY(Type); + } + + void AddChunk(const std::shared_ptr& arr) { + AFL_VERIFY(arr->GetDataType()->id() == Type->id())("incoming", arr->GetDataType()->ToString())("main", Type->ToString()); + Chunks.emplace_back(arr); + RecordsCount += arr->GetRecordsCount(); + } + + std::shared_ptr Finish() { + return std::shared_ptr(new TCompositeChunkedArray(std::move(Chunks), RecordsCount, Type)); + } + }; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/composite/ya.make b/ydb/core/formats/arrow/accessor/composite/ya.make new file mode 100644 index 000000000000..828c9a8e531d --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow + ydb/core/formats/arrow/common +) + +SRCS( + accessor.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp b/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp new file mode 100644 index 000000000000..d804e92efdd7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp @@ -0,0 +1,46 @@ +#include "accessor.h" + +namespace NKikimr::NArrow::NAccessor { + +namespace { +class TSerializedChunkAccessor { +private: + const std::vector& Chunks; + const std::shared_ptr& Loader; + std::optional& Result; + +public: + TSerializedChunkAccessor(const std::vector& chunks, const std::shared_ptr& loader, + std::optional& result) + : Chunks(chunks) + , Loader(loader) + , Result(result) { + } + ui64 GetChunksCount() const { + return Chunks.size(); + } + ui64 GetChunkLength(const ui32 idx) const { + return Chunks[idx].GetRecordsCount(); + } + void OnArray(const ui32 chunkIdx, const ui32 startPosition) const { + Result = IChunkedArray::TLocalChunkedArrayAddress(Chunks[chunkIdx].GetArrayVerified(Loader), startPosition, chunkIdx); + } +}; +} // namespace + +IChunkedArray::TLocalDataAddress TDeserializeChunkedArray::DoGetLocalData( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const { + AFL_VERIFY(false); + return IChunkedArray::TLocalDataAddress(nullptr, 0, 0); +} + +IChunkedArray::TLocalChunkedArrayAddress TDeserializeChunkedArray::DoGetLocalChunkedArray( + const std::optional& chunkCurrent, const ui64 position) const { + std::optional result; + TSerializedChunkAccessor accessor(Chunks, Loader, result); + SelectChunk(chunkCurrent, position, accessor); + AFL_VERIFY(result); + return *result; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/composite_serial/accessor.h b/ydb/core/formats/arrow/accessor/composite_serial/accessor.h new file mode 100644 index 000000000000..63a4cda06bcb --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/accessor.h @@ -0,0 +1,78 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TDeserializeChunkedArray: public NArrow::NAccessor::IChunkedArray { +private: + using TBase = NArrow::NAccessor::IChunkedArray; + +public: + class TChunk { + private: + YDB_READONLY(ui32, RecordsCount, 0); + std::shared_ptr PredefinedArray; + const TString Data; + + public: + TChunk(const std::shared_ptr& predefinedArray) + : PredefinedArray(predefinedArray) { + AFL_VERIFY(PredefinedArray); + RecordsCount = PredefinedArray->GetRecordsCount(); + } + + TChunk(const ui32 recordsCount, const TString& data) + : RecordsCount(recordsCount) + , Data(data) { + } + + std::shared_ptr GetArrayVerified(const std::shared_ptr& loader) const { + if (PredefinedArray) { + return PredefinedArray; + } + return loader->ApplyVerified(Data, RecordsCount); + } + }; + +private: + std::shared_ptr Loader; + std::vector Chunks; + +protected: + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& chunkCurrent, const ui64 position) const override; + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override; + + virtual std::vector DoSplitBySizes( + const TColumnSaver& /*saver*/, const TString& /*fullSerializedData*/, const std::vector& /*splitSizes*/) override { + AFL_VERIFY(false); + return {}; + } + + virtual std::shared_ptr DoGetScalar(const ui32 /*index*/) const override { + AFL_VERIFY(false)("problem", "cannot use method"); + return nullptr; + } + virtual std::optional DoGetRawSize() const override { + return {}; + } + virtual std::shared_ptr DoGetMaxScalar() const override { + AFL_VERIFY(false); + return nullptr; + } + virtual std::shared_ptr DoGetChunkedArray() const override { + AFL_VERIFY(false); + return nullptr; + } + +public: + TDeserializeChunkedArray(const ui64 recordsCount, const std::shared_ptr& loader, std::vector&& chunks) + : TBase(recordsCount, NArrow::NAccessor::IChunkedArray::EType::SerializedChunkedArray, loader->GetField()->type()) + , Loader(loader) + , Chunks(std::move(chunks)) { + AFL_VERIFY(Loader); + } +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/composite_serial/ya.make b/ydb/core/formats/arrow/accessor/composite_serial/ya.make new file mode 100644 index 000000000000..49c2e1e41ea4 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow + ydb/core/formats/arrow/common + ydb/core/formats/arrow/save_load +) + +SRCS( + accessor.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/plain/accessor.cpp b/ydb/core/formats/arrow/accessor/plain/accessor.cpp new file mode 100644 index 000000000000..c606f2e1952b --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/accessor.cpp @@ -0,0 +1,89 @@ +#include "accessor.h" + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +std::optional TTrivialArray::DoGetRawSize() const { + return NArrow::GetArrayDataSize(Array); +} + +std::vector TTrivialArray::DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) { + auto schema = std::make_shared(arrow::FieldVector({ std::make_shared("f", GetDataType()) })); + auto chunks = NArrow::NSplitter::TSimpleSplitter(saver).SplitBySizes( + arrow::RecordBatch::Make(schema, GetRecordsCount(), { Array }), fullSerializedData, splitSizes); + std::vector result; + for (auto&& i : chunks) { + AFL_VERIFY(i.GetSlicedBatch()->num_columns() == 1); + result.emplace_back(std::make_shared(i.GetSlicedBatch()->column(0)), i.GetSerializedChunk()); + } + return result; +} + +std::shared_ptr TTrivialArray::DoGetMaxScalar() const { + auto minMaxPos = NArrow::FindMinMaxPosition(Array); + return NArrow::TStatusValidator::GetValid(Array->GetScalar(minMaxPos.second)); +} + +namespace { +class TChunkAccessor { +private: + std::shared_ptr ChunkedArray; + std::optional* Result; + +public: + TChunkAccessor(const std::shared_ptr& chunkedArray, std::optional& result) + : ChunkedArray(chunkedArray) + , Result(&result) { + } + ui64 GetChunksCount() const { + return (ui64)ChunkedArray->num_chunks(); + } + ui64 GetChunkLength(const ui32 idx) const { + return (ui64)ChunkedArray->chunk(idx)->length(); + } + void OnArray(const ui32 idx, const ui32 startPosition) const { + const auto& arr = ChunkedArray->chunk(idx); + *Result = IChunkedArray::TLocalDataAddress(arr, startPosition, idx); + } +}; + +} // namespace + +IChunkedArray::TLocalDataAddress TTrivialChunkedArray::DoGetLocalData( + const std::optional& chunkCurrent, const ui64 position) const { + std::optional result; + TChunkAccessor accessor(Array, result); + SelectChunk(chunkCurrent, position, accessor); + AFL_VERIFY(result); + return *result; +} + +std::optional TTrivialChunkedArray::DoGetRawSize() const { + ui64 result = 0; + for (auto&& i : Array->chunks()) { + result += NArrow::GetArrayDataSize(i); + } + return result; +} + +std::shared_ptr TTrivialChunkedArray::DoGetMaxScalar() const { + std::shared_ptr result; + for (auto&& i : Array->chunks()) { + if (!i->length()) { + continue; + } + auto minMaxPos = NArrow::FindMinMaxPosition(i); + auto scalarCurrent = NArrow::TStatusValidator::GetValid(i->GetScalar(minMaxPos.second)); + if (!result || ScalarCompare(result, scalarCurrent) < 0) { + result = scalarCurrent; + } + } + + return result; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/plain/accessor.h b/ydb/core/formats/arrow/accessor/plain/accessor.h new file mode 100644 index 000000000000..323073dc0704 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/accessor.h @@ -0,0 +1,77 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TTrivialArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + const std::shared_ptr Array; + +protected: + virtual std::optional DoGetRawSize() const override; + + virtual TLocalDataAddress DoGetLocalData(const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + return TLocalDataAddress(Array, 0, 0); + } + virtual std::shared_ptr DoGetChunkedArray() const override { + return std::make_shared(Array); + } + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + return NArrow::TStatusValidator::GetValid(Array->GetScalar(index)); + } + virtual std::shared_ptr DoGetMaxScalar() const override; + virtual std::vector DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) override; + + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, TCommonChunkAddress(0, GetRecordsCount(), 0)); + } + +public: + TTrivialArray(const std::shared_ptr& data) + : TBase(data->length(), EType::Array, data->type()) + , Array(data) { + } +}; + +class TTrivialChunkedArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + const std::shared_ptr Array; + +protected: + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override; + virtual std::shared_ptr DoGetChunkedArray() const override { + return Array; + } + virtual std::optional DoGetRawSize() const override; + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + auto chunk = GetChunkSlow(index); + return NArrow::TStatusValidator::GetValid(chunk.GetArray()->GetScalar(chunk.GetAddress().GetLocalIndex(index))); + } + virtual std::vector DoSplitBySizes( + const TColumnSaver& /*saver*/, const TString& /*fullSerializedData*/, const std::vector& /*splitSizes*/) override { + AFL_VERIFY(false); + return {}; + } + + virtual std::shared_ptr DoGetMaxScalar() const override; + + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, TCommonChunkAddress(0, 0, 0)); + } + +public: + TTrivialChunkedArray(const std::shared_ptr& data) + : TBase(data->length(), EType::ChunkedArray, data->type()) + , Array(data) { + } +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/plain/constructor.cpp b/ydb/core/formats/arrow/accessor/plain/constructor.cpp new file mode 100644 index 000000000000..7e756d1f30bf --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/constructor.cpp @@ -0,0 +1,34 @@ +#include "accessor.h" +#include "constructor.h" + +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +TConclusion> TConstructor::DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& /*externalInfo*/) const { + AFL_VERIFY(originalData->num_columns() == 1)("count", originalData->num_columns())("schema", originalData->schema()->ToString()); + return std::make_shared(originalData->column(0)); +} + +TConclusion> TConstructor::DoConstructDefault(const TChunkConstructionData& externalInfo) const { + return std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(externalInfo.GetColumnType(), externalInfo.GetDefaultValue(), externalInfo.GetRecordsCount())); +} + +NKikimrArrowAccessorProto::TConstructor TConstructor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TConstructor(); +} + +bool TConstructor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& /*proto*/) { + return true; +} + +std::shared_ptr TConstructor::DoGetExpectedSchema(const std::shared_ptr& resultColumn) const { + return std::make_shared(arrow::FieldVector({ resultColumn })); +} + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/constructor.h b/ydb/core/formats/arrow/accessor/plain/constructor.h new file mode 100644 index 000000000000..cf84f5021bd7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/constructor.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +class TConstructor: public IConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::PlainDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const override; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) override; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const override; + virtual TConclusion> DoConstructDefault(const TChunkConstructionData& externalInfo) const override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/request.cpp b/ydb/core/formats/arrow/accessor/plain/request.cpp new file mode 100644 index 000000000000..05a6ab128165 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/request.cpp @@ -0,0 +1,22 @@ +#include "request.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NPlain { + +NKikimrArrowAccessorProto::TRequestedConstructor TRequestedConstuctor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TRequestedConstructor(); +} + +bool TRequestedConstuctor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) { + return true; +} + +NKikimr::TConclusionStatus TRequestedConstuctor::DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) { + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TRequestedConstuctor::DoBuildConstructor() const { + return std::make_shared(); +} + +} diff --git a/ydb/core/formats/arrow/accessor/plain/request.h b/ydb/core/formats/arrow/accessor/plain/request.h new file mode 100644 index 000000000000..02f6cce8560a --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/request.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +class TRequestedConstuctor: public IRequestedConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::PlainDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion DoBuildConstructor() const override; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) override; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/ya.make b/ydb/core/formats/arrow/accessor/plain/ya.make new file mode 100644 index 000000000000..b7d600885be5 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/protos + ydb/core/formats/arrow/accessor/abstract +) + +SRCS( + accessor.cpp + GLOBAL constructor.cpp + GLOBAL request.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp b/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp new file mode 100644 index 000000000000..b360ee3ea155 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp @@ -0,0 +1,280 @@ +#include "accessor.h" + +#include +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +TSparsedArray::TSparsedArray(const IChunkedArray& defaultArray, const std::shared_ptr& defaultValue) + : TBase(defaultArray.GetRecordsCount(), EType::SparsedArray, defaultArray.GetDataType()) + , DefaultValue(defaultValue) { + if (DefaultValue) { + AFL_VERIFY(DefaultValue->type->id() == defaultArray.GetDataType()->id()); + } + std::optional current; + std::shared_ptr records; + ui32 sparsedRecordsCount = 0; + AFL_VERIFY(SwitchType(GetDataType()->id(), [&](const auto& type) { + using TWrap = std::decay_t; + using TScalar = typename arrow::TypeTraits::ScalarType; + using TArray = typename arrow::TypeTraits::ArrayType; + using TBuilder = typename arrow::TypeTraits::BuilderType; + auto builderValue = NArrow::MakeBuilder(GetDataType()); + TBuilder* builderValueImpl = (TBuilder*)builderValue.get(); + auto builderIndex = NArrow::MakeBuilder(arrow::uint32()); + arrow::UInt32Builder* builderIndexImpl = (arrow::UInt32Builder*)builderIndex.get(); + auto scalar = static_pointer_cast(DefaultValue); + for (ui32 pos = 0; pos < GetRecordsCount();) { + current = defaultArray.GetChunk(current, pos); + auto typedArray = static_pointer_cast(current->GetArray()); + for (ui32 i = 0; i < typedArray->length(); ++i) { + std::optional isDefault; + if (scalar) { + if constexpr (arrow::has_string_view()) { + isDefault = arrow::util::string_view((char*)scalar->value->data(), scalar->value->size()) == typedArray->GetView(i); + } else if constexpr (arrow::has_c_type()) { + isDefault = scalar->value == typedArray->Value(i); + } else { + AFL_VERIFY(false)("type", GetDataType()->ToString()); + } + } else { + isDefault = typedArray->IsNull(i); + } + if (!*isDefault) { + if constexpr (arrow::has_string_view()) { + NArrow::TStatusValidator::Validate(builderValueImpl->Append(typedArray->GetView(i))); + NArrow::TStatusValidator::Validate(builderIndexImpl->Append(pos + i)); + ++sparsedRecordsCount; + } else if constexpr (arrow::has_c_type()) { + NArrow::TStatusValidator::Validate(builderValueImpl->Append(typedArray->Value(i))); + NArrow::TStatusValidator::Validate(builderIndexImpl->Append(pos + i)); + ++sparsedRecordsCount; + } else { + AFL_VERIFY(false)("type", GetDataType()->ToString()); + } + } + } + pos = current->GetAddress().GetGlobalFinishPosition(); + AFL_VERIFY(pos <= GetRecordsCount()); + } + std::vector> columns = { NArrow::TStatusValidator::GetValid(builderIndex->Finish()), + NArrow::TStatusValidator::GetValid(builderValue->Finish()) }; + records = arrow::RecordBatch::Make(BuildSchema(GetDataType()), sparsedRecordsCount, columns); + AFL_VERIFY_DEBUG(records->ValidateFull().ok()); + return true; + })); + AFL_VERIFY(records); + Records.emplace_back(0, GetRecordsCount(), records, DefaultValue); +} + +std::vector TSparsedArray::DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) { + AFL_VERIFY(Records.size() == 1)("size", Records.size()); + auto chunks = NArrow::NSplitter::TSimpleSplitter(saver).SplitBySizes(Records.front().GetRecords(), fullSerializedData, splitSizes); + + std::vector result; + ui32 idx = 0; + ui32 startIdx = 0; + for (auto&& i : chunks) { + AFL_VERIFY(i.GetSlicedBatch()->num_columns() == 2); + AFL_VERIFY(i.GetSlicedBatch()->column(0)->type()->id() == arrow::uint32()->id()); + auto UI32Column = static_pointer_cast(i.GetSlicedBatch()->column(0)); + ui32 nextStartIdx = NArrow::NAccessor::TSparsedArray::GetLastIndex(i.GetSlicedBatch()) + 1; + if (idx + 1 == chunks.size()) { + nextStartIdx = GetRecordsCount(); + } + std::shared_ptr batch; + { + std::unique_ptr builder = NArrow::MakeBuilder(arrow::uint32()); + arrow::UInt32Builder* builderImpl = (arrow::UInt32Builder*)builder.get(); + for (ui32 rowIdx = 0; rowIdx < UI32Column->length(); ++rowIdx) { + TStatusValidator::Validate(builderImpl->Append(UI32Column->Value(rowIdx) - startIdx)); + } + auto colIndex = TStatusValidator::GetValid(builder->Finish()); + batch = arrow::RecordBatch::Make( + i.GetSlicedBatch()->schema(), i.GetSlicedBatch()->num_rows(), { colIndex, i.GetSlicedBatch()->column(1) }); + } + + ++idx; + { + TBuilder builder(DefaultValue, GetDataType()); + builder.AddChunk(nextStartIdx - startIdx, batch); + result.emplace_back(builder.Finish(), saver.Apply(batch)); + } + startIdx = nextStartIdx; + } + + return result; +} + +std::shared_ptr TSparsedArray::DoGetMaxScalar() const { + std::shared_ptr result; + for (auto&& i : Records) { + auto scalarCurrent = i.GetMaxScalar(); + if (!scalarCurrent) { + continue; + } + if (!result || ScalarCompare(result, scalarCurrent) < 0) { + result = scalarCurrent; + } + } + return result; +} + +ui32 TSparsedArray::GetLastIndex(const std::shared_ptr& batch) { + AFL_VERIFY(batch); + AFL_VERIFY(batch->num_rows()); + auto c = batch->GetColumnByName("index"); + AFL_VERIFY(c)("schema", batch->schema()->ToString()); + AFL_VERIFY(c->type_id() == arrow::uint32()->id())("type", c->type()->ToString()); + auto ui32Column = static_pointer_cast(c); + return ui32Column->Value(ui32Column->length() - 1); +} + +namespace { +static thread_local THashMap> SimpleBatchesCache; +} + +NKikimr::NArrow::NAccessor::TSparsedArrayChunk TSparsedArray::MakeDefaultChunk( + const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount) { + auto it = SimpleBatchesCache.find(type->ToString()); + if (it == SimpleBatchesCache.end()) { + it = SimpleBatchesCache.emplace(type->ToString(), NArrow::MakeEmptyBatch(BuildSchema(type))).first; + AFL_VERIFY(it->second->ValidateFull().ok()); + } + return TSparsedArrayChunk(0, recordsCount, it->second, defaultValue); +} + +IChunkedArray::TLocalDataAddress TSparsedArrayChunk::GetChunk( + const std::optional& /*chunkCurrent*/, const ui64 position, const ui32 chunkIdx) const { + const auto predCompare = [](const ui32 position, const TInternalChunkInfo& item) { + return position < item.GetStartExt(); + }; + auto it = std::upper_bound(RemapExternalToInternal.begin(), RemapExternalToInternal.end(), position, predCompare); + AFL_VERIFY(it != RemapExternalToInternal.begin()); + --it; + if (it->GetIsDefault()) { + return IChunkedArray::TLocalDataAddress( + NArrow::TThreadSimpleArraysCache::Get(ColValue->type(), DefaultValue, it->GetSize()), StartPosition + it->GetStartExt(), chunkIdx); + } else { + return IChunkedArray::TLocalDataAddress( + ColValue->Slice(it->GetStartInt(), it->GetSize()), StartPosition + it->GetStartExt(), chunkIdx); + } +} + +std::vector> TSparsedArrayChunk::GetChunkedArray() const { + std::vector> chunks; + for (auto&& i : RemapExternalToInternal) { + if (i.GetIsDefault()) { + chunks.emplace_back(NArrow::TThreadSimpleArraysCache::Get(ColValue->type(), DefaultValue, i.GetSize())); + } else { + chunks.emplace_back(ColValue->Slice(i.GetStartInt(), i.GetSize())); + } + } + return chunks; +} + +TSparsedArrayChunk::TSparsedArrayChunk(const ui32 posStart, const ui32 recordsCount, const std::shared_ptr& records, + const std::shared_ptr& defaultValue) + : RecordsCount(recordsCount) + , StartPosition(posStart) + , Records(records) + , DefaultValue(defaultValue) { + AFL_VERIFY(records->num_columns() == 2); + ColIndex = Records->GetColumnByName("index"); + AFL_VERIFY(ColIndex); + AFL_VERIFY(ColIndex->type_id() == arrow::uint32()->id()); + UI32ColIndex = static_pointer_cast(ColIndex); + if (UI32ColIndex->length()) { + AFL_VERIFY(UI32ColIndex->Value(UI32ColIndex->length() - 1) < recordsCount)("val", UI32ColIndex->Value(UI32ColIndex->length() - 1))( + "count", recordsCount); + } + NotDefaultRecordsCount = UI32ColIndex->length(); + RawValues = UI32ColIndex->raw_values(); + ColValue = Records->GetColumnByName("value"); + if (DefaultValue) { + AFL_VERIFY(DefaultValue->type->id() == ColValue->type_id()); + } + ui32 nextIndex = 0; + ui32 startIndexExt = 0; + ui32 startIndexInt = 0; + for (ui32 idx = 0; idx < UI32ColIndex->length(); ++idx) { + if (nextIndex != UI32ColIndex->Value(idx)) { + if (idx - startIndexInt) { + RemapExternalToInternal.emplace_back(startIndexExt, startIndexInt, idx - startIndexInt, false); + } + RemapExternalToInternal.emplace_back(nextIndex, 0, UI32ColIndex->Value(idx) - nextIndex, true); + startIndexExt = UI32ColIndex->Value(idx); + startIndexInt = idx; + } + nextIndex = UI32ColIndex->Value(idx) + 1; + } + if (UI32ColIndex->length() > startIndexInt) { + RemapExternalToInternal.emplace_back(startIndexExt, startIndexInt, UI32ColIndex->length() - startIndexInt, false); + } + if (nextIndex != RecordsCount) { + RemapExternalToInternal.emplace_back(nextIndex, 0, RecordsCount - nextIndex, true); + } + ui32 count = 0; + for (auto&& i : RemapExternalToInternal) { + count += i.GetSize(); + } + for (ui32 i = 0; i + 1 < RemapExternalToInternal.size(); ++i) { + AFL_VERIFY(RemapExternalToInternal[i + 1].GetStartExt() == RemapExternalToInternal[i].GetStartExt() + RemapExternalToInternal[i].GetSize()); + } + AFL_VERIFY(count == RecordsCount)("count", count)("records_count", RecordsCount); + AFL_VERIFY(ColValue); +} + +ui64 TSparsedArrayChunk::GetRawSize() const { + return std::max(NArrow::GetBatchDataSize(Records), 8); +} + +std::shared_ptr TSparsedArrayChunk::GetScalar(const ui32 index) const { + AFL_VERIFY(index < RecordsCount); + for (ui32 idx = 0; idx < UI32ColIndex->length(); ++idx) { + if (UI32ColIndex->Value(idx) == index) { + return NArrow::TStatusValidator::GetValid(ColValue->GetScalar(idx)); + } + } + return DefaultValue; +} + +ui32 TSparsedArrayChunk::GetFirstIndexNotDefault() const { + if (UI32ColIndex->length()) { + return StartPosition + GetUI32ColIndex()->Value(0); + } else { + return StartPosition + GetRecordsCount(); + } +} + +std::shared_ptr TSparsedArrayChunk::GetMaxScalar() const { + if (!ColValue->length()) { + return DefaultValue; + } + auto minMax = NArrow::FindMinMaxPosition(ColValue); + auto currentScalar = NArrow::TStatusValidator::GetValid(ColValue->GetScalar(minMax.second)); + if (!DefaultValue || ScalarCompare(DefaultValue, currentScalar) < 0) { + return currentScalar; + } + return DefaultValue; +} + +void TSparsedArray::TBuilder::AddChunk(const ui32 recordsCount, const std::shared_ptr& data) { + AFL_VERIFY(data); + AFL_VERIFY(recordsCount); + AFL_VERIFY(data->num_rows() <= recordsCount)("rows", data->num_rows())("count", recordsCount); + AFL_VERIFY(data->num_columns() == 2)("count", data->num_columns()); + AFL_VERIFY(data->column(0)->type_id() == arrow::uint32()->id())("type", data->column(0)->type()->ToString()); + AFL_VERIFY_DEBUG(data->schema()->field(0)->name() == "index")("name", data->schema()->field(0)->name()); + if (data->num_rows()) { + auto* arr = static_cast(data->column(0).get()); + AFL_VERIFY(arr->Value(arr->length() - 1) < recordsCount)("val", arr->Value(arr->length() - 1))("count", recordsCount); + } + Chunks.emplace_back(RecordsCount, recordsCount, data, DefaultValue); + RecordsCount += recordsCount; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/sparsed/accessor.h b/ydb/core/formats/arrow/accessor/sparsed/accessor.h new file mode 100644 index 000000000000..3f531375613f --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/accessor.h @@ -0,0 +1,189 @@ +#pragma once +#include +#include + +#include + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TSparsedArrayChunk: public TMoveOnly { +private: + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY(ui32, StartPosition, 0); + YDB_READONLY_DEF(std::shared_ptr, Records); + std::shared_ptr DefaultValue; + + std::shared_ptr ColIndex; + const ui32* RawValues = nullptr; + ui32 NotDefaultRecordsCount = 0; + YDB_READONLY_DEF(std::shared_ptr, UI32ColIndex); + YDB_READONLY_DEF(std::shared_ptr, ColValue); + + class TInternalChunkInfo { + private: + YDB_READONLY(ui32, StartExt, 0); + YDB_READONLY(ui32, StartInt, 0); + YDB_READONLY(ui32, Size, 0); + YDB_READONLY(bool, IsDefault, false); + + public: + TInternalChunkInfo(const ui32 startExt, const ui32 startInt, const ui32 size, const bool defaultFlag) + : StartExt(startExt) + , StartInt(startInt) + , Size(size) + , IsDefault(defaultFlag) { + AFL_VERIFY(Size); + } + + bool operator<(const TInternalChunkInfo& item) const { + return StartExt < item.StartExt; + } + }; + + std::vector RemapExternalToInternal; + +public: + ui32 GetFinishPosition() const { + return StartPosition + RecordsCount; + } + + ui32 GetNotDefaultRecordsCount() const { + return NotDefaultRecordsCount; + } + + ui32 GetIndexUnsafeFast(const ui32 i) const { + return RawValues[i]; + } + + ui32 GetFirstIndexNotDefault() const; + + std::shared_ptr GetMaxScalar() const; + + std::shared_ptr GetScalar(const ui32 index) const; + + IChunkedArray::TLocalDataAddress GetChunk( + const std::optional& chunkCurrent, const ui64 position, const ui32 chunkIdx) const; + + std::vector> GetChunkedArray() const; + + TSparsedArrayChunk(const ui32 posStart, const ui32 recordsCount, const std::shared_ptr& records, + const std::shared_ptr& defaultValue); + + ui64 GetRawSize() const; +}; + +class TSparsedArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + std::shared_ptr DefaultValue; + std::vector Records; + +protected: + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, 0, 0); + } + + virtual std::shared_ptr DoGetMaxScalar() const override; + + virtual std::vector DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) override; + + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override { + ui32 currentIdx = 0; + for (ui32 i = 0; i < Records.size(); ++i) { + if (currentIdx <= position && position < currentIdx + Records[i].GetRecordsCount()) { + return Records[i].GetChunk(chunkCurrent, position - currentIdx, i); + } + currentIdx += Records[i].GetRecordsCount(); + } + AFL_VERIFY(false); + return TLocalDataAddress(nullptr, 0, 0); + } + virtual std::shared_ptr DoGetChunkedArray() const override { + std::vector> chunks; + for (auto&& i : Records) { + auto chunksLocal = i.GetChunkedArray(); + chunks.insert(chunks.end(), chunksLocal.begin(), chunksLocal.end()); + } + return std::make_shared(chunks, GetDataType()); + } + virtual std::optional DoGetRawSize() const override { + ui64 bytes = 0; + for (auto&& i : Records) { + bytes += i.GetRawSize(); + } + return bytes; + } + + TSparsedArray(std::vector&& data, const std::shared_ptr& defaultValue, + const std::shared_ptr& type, const ui32 recordsCount) + : TBase(recordsCount, EType::SparsedArray, type) + , DefaultValue(defaultValue) + , Records(std::move(data)) { + } + + static ui32 GetLastIndex(const std::shared_ptr& batch); + + static std::shared_ptr BuildSchema(const std::shared_ptr& type) { + std::vector> fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", type) }; + return std::make_shared(fields); + } + + static TSparsedArrayChunk MakeDefaultChunk( + const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount); + +public: + TSparsedArray(const IChunkedArray& defaultArray, const std::shared_ptr& defaultValue); + + TSparsedArray(const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount) + : TBase(recordsCount, EType::SparsedArray, type) + , DefaultValue(defaultValue) { + Records.emplace_back(MakeDefaultChunk(defaultValue, type, recordsCount)); + } + + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + auto& chunk = GetSparsedChunk(index); + return chunk.GetScalar(index - chunk.GetStartPosition()); + } + + const TSparsedArrayChunk& GetSparsedChunk(const ui64 position) const { + const auto pred = [](const ui64 position, const TSparsedArrayChunk& item) { + return position < item.GetStartPosition(); + }; + auto it = std::upper_bound(Records.begin(), Records.end(), position, pred); + AFL_VERIFY(it != Records.begin()); + --it; + AFL_VERIFY(position < it->GetStartPosition() + it->GetRecordsCount()); + AFL_VERIFY(it->GetStartPosition() <= position); + return *it; + } + + class TBuilder { + private: + ui32 RecordsCount = 0; + std::vector Chunks; + std::shared_ptr DefaultValue; + std::shared_ptr Type; + + public: + TBuilder(const std::shared_ptr& defaultValue, const std::shared_ptr& type) + : DefaultValue(defaultValue) + , Type(type) { + } + + void AddChunk(const ui32 recordsCount, const std::shared_ptr& data); + + std::shared_ptr Finish() { + return std::shared_ptr(new TSparsedArray(std::move(Chunks), DefaultValue, Type, RecordsCount)); + } + }; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp b/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp new file mode 100644 index 000000000000..e3f45cd75327 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp @@ -0,0 +1,34 @@ +#include "accessor.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +std::shared_ptr TConstructor::DoGetExpectedSchema(const std::shared_ptr& resultColumn) const { + arrow::FieldVector fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", resultColumn->type()) }; + return std::make_shared(fields); +} + +TConclusion> TConstructor::DoConstructDefault(const TChunkConstructionData& externalInfo) const { + return std::make_shared(externalInfo.GetDefaultValue(), externalInfo.GetColumnType(), externalInfo.GetRecordsCount()); +} + +TConclusion> TConstructor::DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const { + AFL_VERIFY(originalData->num_columns() == 2)("count", originalData->num_columns())("schema", originalData->schema()->ToString()); + NArrow::NAccessor::TSparsedArray::TBuilder builder(externalInfo.GetDefaultValue(), externalInfo.GetColumnType()); + builder.AddChunk(externalInfo.GetRecordsCount(), originalData); + return builder.Finish(); +} + +NKikimrArrowAccessorProto::TConstructor TConstructor::DoSerializeToProto() const { + NKikimrArrowAccessorProto::TConstructor result; + *result.MutableSparsed() = {}; + return result; +} + +bool TConstructor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& /*proto*/) { + return true; +} + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/constructor.h b/ydb/core/formats/arrow/accessor/sparsed/constructor.h new file mode 100644 index 000000000000..05743cb4b373 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/constructor.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +class TConstructor: public IConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::SparsedDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const override; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) override; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const override; + virtual TConclusion> DoConstructDefault(const TChunkConstructionData& externalInfo) const override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/request.cpp b/ydb/core/formats/arrow/accessor/sparsed/request.cpp new file mode 100644 index 000000000000..d484341a95c0 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/request.cpp @@ -0,0 +1,22 @@ +#include "request.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +NKikimrArrowAccessorProto::TRequestedConstructor TRequestedConstuctor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TRequestedConstructor(); +} + +bool TRequestedConstuctor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) { + return true; +} + +NKikimr::TConclusionStatus TRequestedConstuctor::DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) { + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TRequestedConstuctor::DoBuildConstructor() const { + return std::make_shared(); +} + +} diff --git a/ydb/core/formats/arrow/accessor/sparsed/request.h b/ydb/core/formats/arrow/accessor/sparsed/request.h new file mode 100644 index 000000000000..205949bca97a --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/request.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +class TRequestedConstuctor: public IRequestedConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::SparsedDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion DoBuildConstructor() const override; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) override; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& features) override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/ya.make b/ydb/core/formats/arrow/accessor/sparsed/ya.make new file mode 100644 index 000000000000..c4916a29c36c --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/protos + ydb/core/formats/arrow/accessor/abstract +) + +SRCS( + GLOBAL constructor.cpp + GLOBAL request.cpp + accessor.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/ya.make b/ydb/core/formats/arrow/accessor/ya.make new file mode 100644 index 000000000000..8d9536da5157 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/accessor/abstract + ydb/core/formats/arrow/accessor/plain + ydb/core/formats/arrow/accessor/composite + ydb/core/formats/arrow/accessor/composite_serial + ydb/core/formats/arrow/accessor/sparsed +) + +END() diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index 443bb091090c..b57af6d8ef53 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -589,6 +589,38 @@ bool ScalarLess(const arrow::Scalar& x, const arrow::Scalar& y) { return ScalarCompare(x, y) < 0; } +bool ColumnEqualsScalar( + const std::shared_ptr& c, const ui32 position, const std::shared_ptr& s) { + AFL_VERIFY(c); + if (!s) { + return c->IsNull(position) ; + } + AFL_VERIFY(c->type()->Equals(s->type))("s", s->type->ToString())("c", c->type()->ToString()); + + return SwitchTypeImpl(c->type()->id(), [&](const auto& type) { + using TWrap = std::decay_t; + using TScalar = typename arrow::TypeTraits::ScalarType; + using TArrayType = typename arrow::TypeTraits::ArrayType; + using TValue = std::decay_t(*s).value)>; + + if constexpr (arrow::has_string_view()) { + const auto& cval = static_cast(*c).GetView(position); + const auto& sval = static_cast(*s).value; + AFL_VERIFY(sval); + TStringBuf cBuf(reinterpret_cast(cval.data()), cval.size()); + TStringBuf sBuf(reinterpret_cast(sval->data()), sval->size()); + return cBuf == sBuf; + } + if constexpr (std::is_arithmetic_v) { + const auto cval = static_cast(*c).GetView(position); + const auto sval = static_cast(*s).value; + return (cval == sval); + } + Y_ABORT_UNLESS(false); // TODO: non primitive types + return false; + }); +} + int ScalarCompare(const arrow::Scalar& x, const arrow::Scalar& y) { Y_VERIFY_S(x.type->Equals(y.type), x.type->ToString() + " vs " + y.type->ToString()); @@ -875,6 +907,18 @@ std::shared_ptr ReallocateBatch(std::shared_ptrschema()); } +std::shared_ptr ReallocateBatch(const std::shared_ptr& original) { + if (!original) { + return original; + } + auto batches = NArrow::SliceToRecordBatches(original); + for (auto&& i : batches) { + i = NArrow::TStatusValidator::GetValid( + NArrow::NSerialization::TNativeSerializer().Deserialize(NArrow::NSerialization::TNativeSerializer().SerializeFull(i))); + } + return NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batches)); +} + std::shared_ptr MergeColumns(const std::vector>& batches) { std::vector> columns; std::vector> fields; diff --git a/ydb/core/formats/arrow/arrow_helpers.h b/ydb/core/formats/arrow/arrow_helpers.h index 584803598daf..08a9387a7f30 100644 --- a/ydb/core/formats/arrow/arrow_helpers.h +++ b/ydb/core/formats/arrow/arrow_helpers.h @@ -99,10 +99,14 @@ bool IsGoodScalar(const std::shared_ptr& x); int ScalarCompare(const arrow::Scalar& x, const arrow::Scalar& y); int ScalarCompare(const std::shared_ptr& x, const std::shared_ptr& y); int ScalarCompareNullable(const std::shared_ptr& x, const std::shared_ptr& y); -std::partial_ordering ColumnsCompare(const std::vector>& x, const ui32 xRow, const std::vector>& y, const ui32 yRow); +std::partial_ordering ColumnsCompare( + const std::vector>& x, const ui32 xRow, const std::vector>& y, const ui32 yRow); +bool ColumnEqualsScalar( + const std::shared_ptr& c, const ui32 position, const std::shared_ptr& s); bool ScalarLess(const std::shared_ptr& x, const std::shared_ptr& y); bool ScalarLess(const arrow::Scalar& x, const arrow::Scalar& y); std::shared_ptr ReallocateBatch(std::shared_ptr original); +std::shared_ptr ReallocateBatch(const std::shared_ptr& original); bool HasNulls(const std::shared_ptr& column); diff --git a/ydb/core/formats/arrow/common/accessor.cpp b/ydb/core/formats/arrow/common/accessor.cpp deleted file mode 100644 index 775cffa95bab..000000000000 --- a/ydb/core/formats/arrow/common/accessor.cpp +++ /dev/null @@ -1,135 +0,0 @@ -#include "accessor.h" -#include -#include -#include -#include -#include -#include - -namespace NKikimr::NArrow::NAccessor { - -void IChunkedArray::TReader::AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const { - auto address = GetReadChunk(position); - AFL_VERIFY(NArrow::Append(builder, *address.GetArray(), address.GetPosition(), recordSize)); -} - -std::shared_ptr IChunkedArray::TReader::CopyRecord(const ui64 recordIndex) const { - auto address = GetReadChunk(recordIndex); - return NArrow::CopyRecords(address.GetArray(), {address.GetPosition()}); -} - -std::shared_ptr IChunkedArray::Slice(const ui32 offset, const ui32 count) const { - AFL_VERIFY(offset + count <= (ui64)GetRecordsCount())("offset", offset)("count", count)("length", GetRecordsCount()); - ui32 currentOffset = offset; - ui32 countLeast = count; - std::vector> chunks; - auto address = GetChunk({}, offset); - while (countLeast) { - address = GetChunk(address, currentOffset); - const ui64 internalPos = currentOffset - address.GetStartPosition(); - if (internalPos + countLeast <= (ui64)address.GetArray()->length()) { - chunks.emplace_back(address.GetArray()->Slice(internalPos, countLeast)); - break; - } else { - const ui32 deltaCount = address.GetArray()->length() - internalPos; - chunks.emplace_back(address.GetArray()->Slice(internalPos, deltaCount)); - AFL_VERIFY(countLeast >= deltaCount); - countLeast -= deltaCount; - currentOffset += deltaCount; - } - } - return std::make_shared(chunks, DataType); -} - -TString IChunkedArray::TReader::DebugString(const ui32 position) const { - auto address = GetReadChunk(position); - return NArrow::DebugString(address.GetArray(), address.GetPosition()); -} - -std::partial_ordering IChunkedArray::TReader::CompareColumns(const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition) { - AFL_VERIFY(l.size() == r.size()); - for (ui32 i = 0; i < l.size(); ++i) { - const TAddress lAddress = l[i].GetReadChunk(lPosition); - const TAddress rAddress = r[i].GetReadChunk(rPosition); - auto cmp = lAddress.Compare(rAddress); - if (std::is_neq(cmp)) { - return cmp; - } - } - return std::partial_ordering::equivalent; -} - -IChunkedArray::TAddress IChunkedArray::TReader::GetReadChunk(const ui64 position) const { - AFL_VERIFY(position < ChunkedArray->GetRecordsCount()); - if (CurrentChunkAddress && position < CurrentChunkAddress->GetStartPosition() + CurrentChunkAddress->GetArray()->length() && CurrentChunkAddress->GetStartPosition() <= position) { - } else { - CurrentChunkAddress = ChunkedArray->DoGetChunk(CurrentChunkAddress, position); - } - return IChunkedArray::TAddress(CurrentChunkAddress->GetArray(), position - CurrentChunkAddress->GetStartPosition(), CurrentChunkAddress->GetChunkIndex()); -} - -const std::partial_ordering IChunkedArray::TAddress::Compare(const TAddress& item) const { - return TComparator::TypedCompare(*Array, Position, *item.Array, item.Position); -} - -namespace { -class TChunkAccessor { -private: - std::shared_ptr ChunkedArray; -public: - TChunkAccessor(const std::shared_ptr& chunkedArray) - : ChunkedArray(chunkedArray) - { - - } - ui64 GetChunksCount() const { - return (ui64)ChunkedArray->num_chunks(); - } - ui64 GetChunkLength(const ui32 idx) const { - return (ui64)ChunkedArray->chunk(idx)->length(); - } - std::shared_ptr GetArray(const ui32 idx) const { - return ChunkedArray->chunk(idx); - } -}; - -} - -std::optional TTrivialArray::DoGetRawSize() const { - return NArrow::GetArrayDataSize(Array); -} - -std::partial_ordering IChunkedArray::TCurrentChunkAddress::Compare(const ui64 position, const TCurrentChunkAddress& item, const ui64 itemPosition) const { - AFL_VERIFY(StartPosition <= position); - AFL_VERIFY(position < FinishPosition); - AFL_VERIFY(item.StartPosition <= itemPosition); - AFL_VERIFY(itemPosition < item.FinishPosition); - return TComparator::TypedCompare(*Array, position - StartPosition, *item.Array, itemPosition - item.StartPosition); -} - -std::shared_ptr IChunkedArray::TCurrentChunkAddress::CopyRecord(const ui64 recordIndex) const { - AFL_VERIFY(StartPosition <= recordIndex); - AFL_VERIFY(recordIndex < FinishPosition); - return NArrow::CopyRecords(Array, { recordIndex - StartPosition }); -} - -TString IChunkedArray::TCurrentChunkAddress::DebugString(const ui64 position) const { - AFL_VERIFY(position < FinishPosition); - AFL_VERIFY(StartPosition <= position); - return NArrow::DebugString(Array, position - StartPosition); -} - -IChunkedArray::TCurrentChunkAddress TTrivialChunkedArray::DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const { - TChunkAccessor accessor(Array); - return SelectChunk(chunkCurrent, position, accessor); -} - -std::optional TTrivialChunkedArray::DoGetRawSize() const { - ui64 result = 0; - for (auto&& i : Array->chunks()) { - result += NArrow::GetArrayDataSize(i); - } - return result; -} - -} diff --git a/ydb/core/formats/arrow/common/accessor.h b/ydb/core/formats/arrow/common/accessor.h deleted file mode 100644 index 6021f47f5a88..000000000000 --- a/ydb/core/formats/arrow/common/accessor.h +++ /dev/null @@ -1,227 +0,0 @@ -#pragma once -#include -#include - -#include -#include -#include - -namespace NKikimr::NArrow::NAccessor { - -class IChunkedArray { -public: - enum class EType { - Undefined, - Array, - ChunkedArray, - SerializedChunkedArray - }; - - class TCurrentChunkAddress { - private: - YDB_READONLY_DEF(std::shared_ptr, Array); - YDB_READONLY(ui64, StartPosition, 0); - YDB_READONLY(ui64, FinishPosition, 0); - YDB_READONLY(ui64, ChunkIndex, 0); - public: - TString DebugString(const ui64 position) const; - - ui64 GetLength() const { - return Array->length(); - } - - bool Contains(const ui64 position) const { - return position >= StartPosition && position < FinishPosition; - } - - std::shared_ptr CopyRecord(const ui64 recordIndex) const; - - std::partial_ordering Compare(const ui64 position, const TCurrentChunkAddress& item, const ui64 itemPosition) const; - - TCurrentChunkAddress(const std::shared_ptr& arr, const ui64 pos, const ui32 chunkIdx) - : Array(arr) - , StartPosition(pos) - , ChunkIndex(chunkIdx) - { - AFL_VERIFY(arr); - AFL_VERIFY(arr->length()); - FinishPosition = StartPosition + arr->length(); - } - - TString DebugString() const { - return TStringBuilder() - << "start=" << StartPosition << ";" - << "chunk_index=" << ChunkIndex << ";" - << "length=" << Array->length() << ";"; - } - }; - - class TAddress { - private: - YDB_READONLY_DEF(std::shared_ptr, Array); - YDB_READONLY(ui64, Position, 0); - YDB_READONLY(ui64, ChunkIdx, 0); - public: - bool NextPosition() { - if (Position + 1 < (ui32)Array->length()) { - ++Position; - return true; - } - return false; - } - - TAddress(const std::shared_ptr& arr, const ui64 position, const ui64 chunkIdx) - : Array(arr) - , Position(position) - , ChunkIdx(chunkIdx) - { - - } - - const std::partial_ordering Compare(const TAddress& item) const; - }; -private: - YDB_READONLY_DEF(std::shared_ptr, DataType); - YDB_READONLY(ui64, RecordsCount, 0); - YDB_READONLY(EType, Type, EType::Undefined); - virtual std::optional DoGetRawSize() const = 0; -protected: - virtual std::shared_ptr DoGetChunkedArray() const = 0; - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const = 0; - - template - TCurrentChunkAddress SelectChunk(const std::optional& chunkCurrent, const ui64 position, const TChunkAccessor& accessor) const { - if (!chunkCurrent || position >= chunkCurrent->GetStartPosition()) { - ui32 startIndex = 0; - ui64 idx = 0; - if (chunkCurrent) { - if (position < chunkCurrent->GetFinishPosition()) { - return *chunkCurrent; - } - AFL_VERIFY(chunkCurrent->GetChunkIndex() < accessor.GetChunksCount()); - startIndex = chunkCurrent->GetChunkIndex(); - idx = chunkCurrent->GetStartPosition(); - } - for (ui32 i = startIndex; i < accessor.GetChunksCount(); ++i) { - const ui64 nextIdx = idx + accessor.GetChunkLength(i); - if (idx <= position && position < nextIdx) { - return TCurrentChunkAddress(accessor.GetArray(i), idx, i); - } - idx = nextIdx; - } - } else { - AFL_VERIFY(chunkCurrent->GetChunkIndex() > 0); - ui64 idx = chunkCurrent->GetStartPosition(); - for (i32 i = chunkCurrent->GetChunkIndex() - 1; i >= 0; --i) { - AFL_VERIFY(idx >= accessor.GetChunkLength(i))("idx", idx)("length", accessor.GetChunkLength(i)); - const ui64 nextIdx = idx - accessor.GetChunkLength(i); - if (nextIdx <= position && position < idx) { - return TCurrentChunkAddress(accessor.GetArray(i), nextIdx, i); - } - idx = nextIdx; - } - } - TStringBuilder sb; - ui64 recordsCountChunks = 0; - for (ui32 i = 0; i < accessor.GetChunksCount(); ++i) { - sb << accessor.GetChunkLength(i) << ","; - recordsCountChunks += accessor.GetChunkLength(i); - } - TStringBuilder chunkCurrentInfo; - if (chunkCurrent) { - chunkCurrentInfo << chunkCurrent->DebugString(); - } - AFL_VERIFY(recordsCountChunks == GetRecordsCount())("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); - AFL_VERIFY(false)("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); - return TCurrentChunkAddress(nullptr, 0, 0); - } - -public: - - class TReader { - private: - std::shared_ptr ChunkedArray; - mutable std::optional CurrentChunkAddress; - public: - TReader(const std::shared_ptr& data) - : ChunkedArray(data) - { - AFL_VERIFY(ChunkedArray); - } - - ui64 GetRecordsCount() const { - return ChunkedArray->GetRecordsCount(); - } - - TAddress GetReadChunk(const ui64 position) const; - static std::partial_ordering CompareColumns(const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition); - void AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const; - std::shared_ptr CopyRecord(const ui64 recordIndex) const; - TString DebugString(const ui32 position) const; - }; - - std::optional GetRawSize() const { - return DoGetRawSize(); - } - - std::shared_ptr GetChunkedArray() const { - return DoGetChunkedArray(); - } - virtual ~IChunkedArray() = default; - - std::shared_ptr Slice(const ui32 offset, const ui32 count) const; - - TCurrentChunkAddress GetChunk(const std::optional& chunkCurrent, const ui64 position) const { - return DoGetChunk(chunkCurrent, position); - } - - IChunkedArray(const ui64 recordsCount, const EType type, const std::shared_ptr& dataType) - : DataType(dataType) - , RecordsCount(recordsCount) - , Type(type) { - - } -}; - -class TTrivialArray: public IChunkedArray { -private: - using TBase = IChunkedArray; - const std::shared_ptr Array; -protected: - virtual std::optional DoGetRawSize() const override; - - virtual TCurrentChunkAddress DoGetChunk(const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { - return TCurrentChunkAddress(Array, 0, 0); - } - virtual std::shared_ptr DoGetChunkedArray() const override { - return std::make_shared(Array); - } - -public: - TTrivialArray(const std::shared_ptr& data) - : TBase(data->length(), EType::Array, data->type()) - , Array(data) { - - } -}; - -class TTrivialChunkedArray: public IChunkedArray { -private: - using TBase = IChunkedArray; - const std::shared_ptr Array; -protected: - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const override; - virtual std::shared_ptr DoGetChunkedArray() const override { - return Array; - } - virtual std::optional DoGetRawSize() const override; - -public: - TTrivialChunkedArray(const std::shared_ptr& data) - : TBase(data->length(), EType::ChunkedArray, data->type()) - , Array(data) { - - } -}; - -} diff --git a/ydb/core/formats/arrow/common/adapter.h b/ydb/core/formats/arrow/common/adapter.h index 1b368e38de50..0a32dc4eb885 100644 --- a/ydb/core/formats/arrow/common/adapter.h +++ b/ydb/core/formats/arrow/common/adapter.h @@ -1,18 +1,20 @@ #pragma once #include "container.h" -#include "accessor.h" #include "validation.h" +#include +#include + #include -#include #include +#include #include +#include +#include #include #include -#include -#include -#include +#include namespace NKikimr::NArrow::NAdapter { @@ -27,7 +29,8 @@ class TDataBuilderPolicy { using TColumn = arrow::Array; using TAccessor = NAccessor::TTrivialArray; - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, + const std::shared_ptr& field, const std::shared_ptr& extCol) { return TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), field, extCol)); } @@ -37,7 +40,8 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr Build(const std::shared_ptr& schema, std::vector>&& columns, const ui32 count) { return arrow::RecordBatch::Make(schema, count, std::move(columns)); } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { auto res = arrow::compute::Filter(batch, filter); Y_VERIFY_S(res.ok(), res.status().message()); Y_ABORT_UNLESS(res->kind() == arrow::Datum::RECORD_BATCH); @@ -46,7 +50,6 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr GetEmptySame(const std::shared_ptr& batch) { return batch->Slice(0, 0); } - }; template <> @@ -60,11 +63,13 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr Build(const std::shared_ptr& schema, std::vector>&& columns, const ui32 count) { return arrow::Table::Make(schema, std::move(columns), count); } - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn( + const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { return TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), field, std::make_shared(extCol))); } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { auto res = arrow::compute::Filter(batch, filter); Y_VERIFY_S(res.ok(), res.status().message()); Y_ABORT_UNLESS(res->kind() == arrow::Datum::TABLE); @@ -86,11 +91,13 @@ class TDataBuilderPolicy { } return std::make_shared(std::make_shared(std::move(fields)), std::move(columns)); } - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, + const std::shared_ptr& field, const std::shared_ptr& extCol) { batch->AddField(field, std::make_shared(extCol)).Validate(); return batch; } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { auto table = batch->BuildTableVerified(); return std::make_shared(TDataBuilderPolicy::ApplyArrowFilter(table, filter)); } @@ -99,4 +106,4 @@ class TDataBuilderPolicy { } }; -} +} // namespace NKikimr::NArrow::NAdapter diff --git a/ydb/core/formats/arrow/common/container.cpp b/ydb/core/formats/arrow/common/container.cpp index 57c79ed57114..747ca6708ccf 100644 --- a/ydb/core/formats/arrow/common/container.cpp +++ b/ydb/core/formats/arrow/common/container.cpp @@ -1,9 +1,13 @@ #include "container.h" -#include + +#include +#include #include #include #include +#include + namespace NKikimr::NArrow { TConclusionStatus TGeneralContainer::MergeColumnsStrictly(const TGeneralContainer& container) { @@ -14,8 +18,9 @@ TConclusionStatus TGeneralContainer::MergeColumnsStrictly(const TGeneralContaine RecordsCount = container.RecordsCount; } if (*RecordsCount != *container.RecordsCount) { - return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in additional container: " << - container.GetSchema()->ToString() << ". expected: " << RecordsCount << ", reality: " << container.GetRecordsCount()); + return TConclusionStatus::Fail(TStringBuilder() + << "inconsistency records count in additional container: " << container.GetSchema()->ToString() + << ". expected: " << RecordsCount << ", reality: " << container.GetRecordsCount()); } for (i32 i = 0; i < container.Schema->num_fields(); ++i) { auto addFieldResult = AddField(container.Schema->field(i), container.Columns[i]); @@ -30,11 +35,12 @@ TConclusionStatus TGeneralContainer::AddField(const std::shared_ptrGetRecordsCount() != *RecordsCount) { - return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in new column: " << - f->name() << ". expected: " << RecordsCount << ", reality: " << data->GetRecordsCount()); + return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in new column: " << f->name() + << ". expected: " << RecordsCount << ", reality: " << data->GetRecordsCount()); } if (!data->GetDataType()->Equals(f->type())) { - return TConclusionStatus::Fail("schema and data type are not equals: " + data->GetDataType()->ToString() + " vs " + f->type()->ToString()); + return TConclusionStatus::Fail( + "schema and data type are not equals: " + data->GetDataType()->ToString() + " vs " + f->type()->ToString()); } { auto conclusion = Schema->AddField(f); @@ -55,6 +61,11 @@ TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr(data)); } +void TGeneralContainer::DeleteFieldsByIndex(const std::vector& idxs) { + Schema->DeleteFieldsByIndex(idxs); + NUtil::EraseItems(Columns, idxs); +} + void TGeneralContainer::Initialize() { std::optional recordsCount; AFL_VERIFY(Schema->num_fields() == (i32)Columns.size())("schema", Schema->num_fields())("columns", Columns.size()); @@ -65,7 +76,8 @@ void TGeneralContainer::Initialize() { recordsCount = Columns[i]->GetRecordsCount(); } else { AFL_VERIFY(*recordsCount == Columns[i]->GetRecordsCount()) - ("event", "inconsistency_records_count")("expect", *recordsCount)("real", Columns[i]->GetRecordsCount())("field_name", Schema->field(i)->name()); + ("event", "inconsistency_records_count")("expect", *recordsCount)("real", Columns[i]->GetRecordsCount())( + "field_name", Schema->field(i)->name()); } } AFL_VERIFY(recordsCount); @@ -73,24 +85,24 @@ void TGeneralContainer::Initialize() { RecordsCount = *recordsCount; } -TGeneralContainer::TGeneralContainer(const std::vector>& fields, std::vector>&& columns) +TGeneralContainer::TGeneralContainer( + const std::vector>& fields, std::vector>&& columns) : Schema(std::make_shared(fields)) - , Columns(std::move(columns)) -{ + , Columns(std::move(columns)) { Initialize(); } -TGeneralContainer::TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns) +TGeneralContainer::TGeneralContainer( + const std::shared_ptr& schema, std::vector>&& columns) : Schema(std::make_shared(schema)) - , Columns(std::move(columns)) -{ + , Columns(std::move(columns)) { Initialize(); } -TGeneralContainer::TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns) +TGeneralContainer::TGeneralContainer( + const std::shared_ptr& schema, std::vector>&& columns) : Schema(std::make_shared(schema)) - , Columns(std::move(columns)) -{ + , Columns(std::move(columns)) { Initialize(); } @@ -170,7 +182,8 @@ std::shared_ptr TGeneralContainer::GetAccessor return Columns[idx]; } -TConclusionStatus TGeneralContainer::SyncSchemaTo(const std::shared_ptr& schema, const IFieldsConstructor* defaultFieldsConstructor, const bool forceDefaults) { +TConclusionStatus TGeneralContainer::SyncSchemaTo( + const std::shared_ptr& schema, const IFieldsConstructor* defaultFieldsConstructor, const bool forceDefaults) { std::shared_ptr schemaNew = std::make_shared(); std::vector> columnsNew; if (!RecordsCount) { @@ -187,12 +200,14 @@ TConclusionStatus TGeneralContainer::SyncSchemaTo(const std::shared_ptr(NArrow::TThreadSimpleArraysCache::Get(i->type(), *defConclusion, *RecordsCount))); + columnsNew.emplace_back( + std::make_shared(NArrow::TThreadSimpleArraysCache::Get(i->type(), *defConclusion, *RecordsCount))); } } else { const auto& fOwned = Schema->GetFieldVerified(idx); if (!fOwned->type()->Equals(i->type())) { - return TConclusionStatus::Fail("different field types for '" + i->name() + "'. Have " + fOwned->type()->ToString() + ", need " + i->type()->ToString()); + return TConclusionStatus::Fail( + "different field types for '" + i->name() + "'. Have " + fOwned->type()->ToString() + ", need " + i->type()->ToString()); } schemaNew->AddField(fOwned).Validate(); columnsNew.emplace_back(Columns[idx]); @@ -212,7 +227,8 @@ TString TGeneralContainer::DebugString() const { return result; } -TConclusion> IFieldsConstructor::GetDefaultColumnElementValue(const std::shared_ptr& field, const bool force) const { +TConclusion> IFieldsConstructor::GetDefaultColumnElementValue( + const std::shared_ptr& field, const bool force) const { AFL_VERIFY(field); auto result = DoGetDefaultColumnElementValue(field->name()); if (result) { @@ -224,4 +240,4 @@ TConclusion> IFieldsConstructor::GetDefaultColumn return TConclusionStatus::Fail("have not default value for column " + field->name()); } -} +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/common/container.h b/ydb/core/formats/arrow/common/container.h index aa15c6740741..572c3e3c4373 100644 --- a/ydb/core/formats/arrow/common/container.h +++ b/ydb/core/formats/arrow/common/container.h @@ -1,5 +1,5 @@ #pragma once -#include "accessor.h" +#include #include @@ -74,6 +74,8 @@ class TGeneralContainer { [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f, const std::shared_ptr& data); + void DeleteFieldsByIndex(const std::vector& idxs); + TGeneralContainer(const std::shared_ptr& table); TGeneralContainer(const std::shared_ptr& table); TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns); diff --git a/ydb/core/formats/arrow/common/validation.h b/ydb/core/formats/arrow/common/validation.h index f71f18ece59c..344128547d7c 100644 --- a/ydb/core/formats/arrow/common/validation.h +++ b/ydb/core/formats/arrow/common/validation.h @@ -1,26 +1,3 @@ #pragma once -#include -#include -#include - -namespace NKikimr::NArrow { - -class TStatusValidator { -public: - static void Validate(const arrow::Status& status); - - template - static T GetValid(const arrow::Result& result) { - Validate(result.status()); - return *result; - } - - template - static T GetValid(arrow::Result&& result) { - Validate(result.status()); - return std::move(*result); - } -}; - -} +#include diff --git a/ydb/core/formats/arrow/common/vector_operations.h b/ydb/core/formats/arrow/common/vector_operations.h new file mode 100644 index 000000000000..99642b8eeb7f --- /dev/null +++ b/ydb/core/formats/arrow/common/vector_operations.h @@ -0,0 +1,54 @@ +#pragma once + +#include + +#include + +#include + +namespace NKikimr::NArrow::NUtil { + +template +class TDefaultErasePolicy { +public: + void OnEraseItem(const T& /*item*/) const { + } + void OnMoveItem(const T& /*item*/, const ui64 /*new_index*/) const { + } +}; + +template > +void EraseItems(std::vector& container, const std::vector& idxsToErase, const ErasePolicy& policy = TDefaultErasePolicy()) { + if (idxsToErase.empty()) { + return; + } + AFL_VERIFY(idxsToErase.front() < container.size()); + + auto itNextEraseIdx = idxsToErase.begin(); + ui64 writeIdx = idxsToErase.front(); + ui64 readIdx = idxsToErase.front(); + while (readIdx != container.size()) { + AFL_VERIFY(itNextEraseIdx != idxsToErase.end() && readIdx == *itNextEraseIdx); + + policy.OnEraseItem(container[readIdx]); + ++readIdx; + ++itNextEraseIdx; + if (itNextEraseIdx != idxsToErase.end()) { + AFL_VERIFY(*itNextEraseIdx > *std::prev(itNextEraseIdx)); + AFL_VERIFY(*itNextEraseIdx < container.size()); + } + + const ui64 nextReadIdx = itNextEraseIdx == idxsToErase.end() ? container.size() : *itNextEraseIdx; + while (readIdx != nextReadIdx) { + std::swap(container[writeIdx], container[readIdx]); + policy.OnMoveItem(container[writeIdx], writeIdx); + ++writeIdx; + ++readIdx; + } + } + + container.resize(writeIdx); + AFL_VERIFY(itNextEraseIdx == idxsToErase.end()); +} + +} // namespace NKikimr::NArrow::NUtil diff --git a/ydb/core/formats/arrow/common/ya.make b/ydb/core/formats/arrow/common/ya.make index 61f742b09b76..76f8805b572f 100644 --- a/ydb/core/formats/arrow/common/ya.make +++ b/ydb/core/formats/arrow/common/ya.make @@ -5,13 +5,13 @@ PEERDIR( ydb/core/formats/arrow/switch ydb/library/actors/core ydb/library/conclusion + ydb/core/formats/arrow/splitter + ydb/core/formats/arrow/validation ) SRCS( container.cpp - validation.cpp adapter.cpp - accessor.cpp ) END() diff --git a/ydb/core/formats/arrow/custom_registry.cpp b/ydb/core/formats/arrow/custom_registry.cpp index 13e8dc6150a3..9d61c8bf6476 100644 --- a/ydb/core/formats/arrow/custom_registry.cpp +++ b/ydb/core/formats/arrow/custom_registry.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #endif namespace cp = ::arrow::compute; @@ -62,6 +63,10 @@ static void RegisterYdbCast(cp::FunctionRegistry* registry) { Y_ABORT_UNLESS(registry->AddFunction(std::make_shared()).ok()); } +static void RegisterCustomAggregates(cp::FunctionRegistry* registry) { + Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetFunctionName(EAggregate::NumRows))).ok()); +} + static void RegisterHouseAggregates(cp::FunctionRegistry* registry) { #ifndef WIN32 try { @@ -71,6 +76,7 @@ static void RegisterHouseAggregates(cp::FunctionRegistry* registry) { Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Max))).ok()); Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Sum))).ok()); //Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Avg))).ok()); + Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::NumRows))).ok()); Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseGroupByName())).ok()); } catch (const std::exception& /*ex*/) { @@ -88,6 +94,7 @@ static std::unique_ptr CreateCustomRegistry() { RegisterRound(registry.get()); RegisterArithmetic(registry.get()); RegisterYdbCast(registry.get()); + RegisterCustomAggregates(registry.get()); RegisterHouseAggregates(registry.get()); return registry; } diff --git a/ydb/core/formats/arrow/modifier/schema.cpp b/ydb/core/formats/arrow/modifier/schema.cpp index 4cf792614802..728eff839592 100644 --- a/ydb/core/formats/arrow/modifier/schema.cpp +++ b/ydb/core/formats/arrow/modifier/schema.cpp @@ -1,5 +1,6 @@ #include "schema.h" #include +#include #include namespace NKikimr::NArrow::NModifier { @@ -29,6 +30,12 @@ TConclusionStatus TSchema::AddField(const std::shared_ptr& f) { return TConclusionStatus::Success(); } +void TSchema::DeleteFieldsByIndex(const std::vector& idxs) { + AFL_VERIFY(Initialized); + AFL_VERIFY(!Finished); + NUtil::EraseItems(Fields, idxs, TFieldsErasePolicy(this)); +} + TString TSchema::ToString() const { TStringBuilder result; for (auto&& i : Fields) { diff --git a/ydb/core/formats/arrow/modifier/schema.h b/ydb/core/formats/arrow/modifier/schema.h index dc663bad9f6a..1d90167c0979 100644 --- a/ydb/core/formats/arrow/modifier/schema.h +++ b/ydb/core/formats/arrow/modifier/schema.h @@ -1,8 +1,95 @@ #pragma once +#include +#include #include + #include #include +namespace NKikimr::NArrow { + +class TSchemaLite { +private: + YDB_READONLY_DEF(std::vector>, Fields); + +public: + TSchemaLite() = default; + TSchemaLite(const std::shared_ptr& schema) { + AFL_VERIFY(schema); + Fields = schema->fields(); + } + + const std::shared_ptr& field(const ui32 index) const { + return GetFieldByIndexVerified(index); + } + + bool Equals(const TSchemaLite& schema, const bool withMetadata = false) const { + if (Fields.size() != schema.Fields.size()) { + return false; + } + for (ui32 i = 0; i < Fields.size(); ++i) { + if (!Fields[i]->Equals(schema.Fields[i], withMetadata)) { + return false; + } + } + return true; + } + + const std::vector>& fields() const { + return Fields; + } + + int num_fields() const { + return Fields.size(); + } + + std::vector field_names() const { + std::vector result; + result.reserve(Fields.size()); + for (auto&& f : Fields) { + result.emplace_back(f->name()); + } + return result; + } + + TString DebugString() const { + TStringBuilder sb; + sb << "["; + for (auto&& f : Fields) { + sb << f->ToString() << ";"; + } + sb << "]"; + + return sb; + } + + TString ToString() const { + return DebugString(); + } + + const std::shared_ptr& GetFieldByIndexVerified(const ui32 index) const { + AFL_VERIFY(index < Fields.size()); + return Fields[index]; + } + + const std::shared_ptr& GetFieldByIndexOptional(const ui32 index) const { + if (index < Fields.size()) { + return Fields[index]; + } + return Default>(); + } + + TSchemaLite(std::vector>&& fields) + : Fields(std::move(fields)) { + } + + TSchemaLite(const std::vector>& fields) + : Fields(fields) { + } +}; + +} // namespace NKikimr::NArrow + namespace NKikimr::NArrow::NModifier { class TSchema { private: @@ -12,6 +99,7 @@ class TSchema { bool Finished = false; void Initialize(const std::vector>& fields); + public: TSchema() = default; TSchema(const std::shared_ptr& schema); @@ -39,6 +127,7 @@ class TSchema { std::shared_ptr Finish(); [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f); const std::shared_ptr& GetFieldByName(const std::string& name) const; + void DeleteFieldsByIndex(const std::vector& idxs); bool HasField(const std::string& name) const { return IndexByName.contains(name); @@ -51,5 +140,26 @@ class TSchema { const std::shared_ptr& GetFieldVerified(const ui32 index) const; const std::shared_ptr& field(const ui32 index) const; + +private: + class TFieldsErasePolicy { + private: + TSchema* const Owner; + + public: + TFieldsErasePolicy(TSchema* const owner) + : Owner(owner) { + } + + void OnEraseItem(const std::shared_ptr& item) const { + Owner->IndexByName.erase(item->name()); + } + + void OnMoveItem(const std::shared_ptr& item, const ui64 new_index) const { + auto* findField = Owner->IndexByName.FindPtr(item->name()); + AFL_VERIFY(findField); + *findField = new_index; + } + }; }; -} \ No newline at end of file +} // namespace NKikimr::NArrow::NModifier diff --git a/ydb/core/formats/arrow/process_columns.cpp b/ydb/core/formats/arrow/process_columns.cpp index 5fb133f90de5..846542608304 100644 --- a/ydb/core/formats/arrow/process_columns.cpp +++ b/ydb/core/formats/arrow/process_columns.cpp @@ -1,5 +1,7 @@ #include "process_columns.h" + #include "common/adapter.h" +#include "modifier/schema.h" #include "modifier/subset.h" #include @@ -7,9 +9,36 @@ namespace NKikimr::NArrow { namespace { -template -std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr& srcBatch, - const std::vector& columnNames) { + +template +class TColumnNameAccessor { +public: + static const std::string& GetFieldName(const T& val) { + return val; + } + static TString DebugString(const std::vector& items) { + return JoinSeq(",", items); + } +}; + +template <> +class TColumnNameAccessor> { +public: + static const std::string& GetFieldName(const std::shared_ptr& val) { + return val->name(); + } + static TString DebugString(const std::vector>& items) { + TStringBuilder sb; + for (auto&& i : items) { + sb << i->name() << ","; + } + return sb; + } +}; + +template +std::shared_ptr ExtractColumnsValidateImpl( + const std::shared_ptr& srcBatch, const std::vector& columnNames) { std::vector> fields; fields.reserve(columnNames.size()); std::vector::TColumn>> columns; @@ -17,7 +46,7 @@ std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr auto srcSchema = srcBatch->schema(); for (auto& name : columnNames) { - const int pos = srcSchema->GetFieldIndex(name); + const int pos = srcSchema->GetFieldIndex(TColumnNameAccessor::GetFieldName(name)); if (Y_LIKELY(pos > -1)) { fields.push_back(srcSchema->field(pos)); columns.push_back(srcBatch->column(pos)); @@ -27,9 +56,9 @@ std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr return NAdapter::TDataBuilderPolicy::Build(std::move(fields), std::move(columns), srcBatch->num_rows()); } -template -TConclusion> AdaptColumnsImpl(const std::shared_ptr& srcBatch, - const std::shared_ptr& dstSchema, TSchemaSubset* subset) { +template +TConclusion> AdaptColumnsImpl( + const std::shared_ptr& srcBatch, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { AFL_VERIFY(srcBatch); AFL_VERIFY(dstSchema); std::vector::TColumn>> columns; @@ -48,16 +77,16 @@ TConclusion> AdaptColumnsImpl(const std::shared_ fields.emplace_back(field); auto srcField = srcBatch->schema()->field(index); if (field->Equals(srcField)) { - AFL_VERIFY(columns.back()->type()->Equals(field->type()))("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")("column", field->name()) - ("column_type", field->type()->ToString())("incoming_type", columns.back()->type()->ToString()); + AFL_VERIFY(columns.back()->type()->Equals(field->type()))("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column", field->name())("column_type", field->type()->ToString())("incoming_type", columns.back()->type()->ToString()); } else { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")("column", field->name()) - ("column_type", field->ToString(true))("incoming_type", srcField->ToString(true)); + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column", field->name())("column_type", field->ToString(true))("incoming_type", srcField->ToString(true)); return TConclusionStatus::Fail("incompatible column types"); } } else if (!subset) { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "not_found_column")("column", field->name()) - ("column_type", field->type()->ToString())("columns", JoinSeq(",", srcBatch->schema()->field_names())); + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "not_found_column")("column", field->name())( + "column_type", field->type()->ToString())("columns", JoinSeq(",", srcBatch->schema()->field_names())); return TConclusionStatus::Fail("not found column '" + field->name() + "'"); } ++idx; @@ -68,15 +97,16 @@ TConclusion> AdaptColumnsImpl(const std::shared_ return NAdapter::TDataBuilderPolicy::Build(std::make_shared(fields), std::move(columns), srcBatch->num_rows()); } -template +template std::shared_ptr ExtractImpl(const TColumnOperator::EExtractProblemsPolicy& policy, - const std::shared_ptr& incoming, const std::vector& columnNames) { + const std::shared_ptr& incoming, const std::vector& columnNames) { AFL_VERIFY(incoming); AFL_VERIFY(columnNames.size()); auto result = ExtractColumnsValidateImpl(incoming, columnNames); switch (policy) { case TColumnOperator::EExtractProblemsPolicy::Verify: - AFL_VERIFY((ui32)result->num_columns() == columnNames.size())("schema", incoming->schema()->ToString())("required", JoinSeq(",", columnNames)); + AFL_VERIFY((ui32)result->num_columns() == columnNames.size())("schema", incoming->schema()->ToString())( + "required", TColumnNameAccessor::DebugString(columnNames)); break; case TColumnOperator::EExtractProblemsPolicy::Null: if ((ui32)result->num_columns() != columnNames.size()) { @@ -90,7 +120,8 @@ std::shared_ptr ExtractImpl(const TColumnOperator::EExtractProbl } template -TConclusion> ReorderImpl(const std::shared_ptr& incoming, const std::vector& columnNames) { +TConclusion> ReorderImpl( + const std::shared_ptr& incoming, const std::vector& columnNames) { AFL_VERIFY(!!incoming); AFL_VERIFY(columnNames.size()); if ((ui32)incoming->num_columns() < columnNames.size()) { @@ -107,17 +138,30 @@ TConclusion> ReorderImpl(const std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } -std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } -std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector>& columns) { + return ExtractImpl(AbsentColumnPolicy, incoming, columns); +} + +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector>& columns) { + return ExtractImpl(AbsentColumnPolicy, incoming, columns); +} + +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } @@ -125,28 +169,86 @@ std::shared_ptr TColumnOperator::Extract(const std::shared_ptr> TColumnOperator::Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { return AdaptColumnsImpl(incoming, dstSchema, subset); } -NKikimr::TConclusion> TColumnOperator::Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { return AdaptColumnsImpl(incoming, dstSchema, subset); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); +} + +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); +} + +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } +namespace { +template +TConclusion BuildSequentialSubsetImpl( + const std::shared_ptr& srcBatch, const std::shared_ptr& dstSchema) { + AFL_VERIFY(srcBatch); + AFL_VERIFY(dstSchema); + if (dstSchema->num_fields() < srcBatch->schema()->num_fields()) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "incorrect columns set: destination must been wider than source")( + "source", srcBatch->schema()->ToString())("destination", dstSchema->ToString()); + return TConclusionStatus::Fail("incorrect columns set: destination must been wider than source"); + } + std::set fieldIdx; + auto itSrc = srcBatch->schema()->fields().begin(); + auto itDst = dstSchema->fields().begin(); + while (itSrc != srcBatch->schema()->fields().end() && itDst != dstSchema->fields().end()) { + if ((*itSrc)->name() != (*itDst)->name()) { + ++itDst; + } else { + fieldIdx.emplace(itDst - dstSchema->fields().begin()); + if (!(*itDst)->Equals(*itSrc)) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column_type", (*itDst)->ToString(true))("incoming_type", (*itSrc)->ToString(true)); + return TConclusionStatus::Fail("incompatible column types"); + } + + ++itDst; + ++itSrc; + } + } + if (itDst == dstSchema->fields().end() && itSrc != srcBatch->schema()->fields().end()) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "incorrect columns order in source set")("source", srcBatch->schema()->ToString())( + "destination", dstSchema->ToString()); + return TConclusionStatus::Fail("incorrect columns order in source set"); + } + return TSchemaSubset(fieldIdx, dstSchema->num_fields()); +} +} // namespace + +TConclusion TColumnOperator::BuildSequentialSubset( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema) { + return BuildSequentialSubsetImpl(incoming, dstSchema); +} -} \ No newline at end of file +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/process_columns.h b/ydb/core/formats/arrow/process_columns.h index be05e84efb14..ad57af9e6647 100644 --- a/ydb/core/formats/arrow/process_columns.h +++ b/ydb/core/formats/arrow/process_columns.h @@ -6,6 +6,7 @@ namespace NKikimr::NArrow { class TSchemaSubset; +class TSchemaLite; class TColumnOperator { public: @@ -14,6 +15,7 @@ class TColumnOperator { Verify, Skip }; + private: EExtractProblemsPolicy AbsentColumnPolicy = EExtractProblemsPolicy::Verify; @@ -33,18 +35,35 @@ class TColumnOperator { return *this; } - std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector& columnNames); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector>& columns); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector>& columns); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); - TConclusion> Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion BuildSequentialSubset( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema); + + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt(const std::shared_ptr& incoming, + const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); }; -} \ No newline at end of file +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/program.cpp b/ydb/core/formats/arrow/program.cpp index 60e59749bb7a..50071d8490e3 100644 --- a/ydb/core/formats/arrow/program.cpp +++ b/ydb/core/formats/arrow/program.cpp @@ -19,6 +19,18 @@ enum class AggFunctionId { AGG_MIN = 3, AGG_MAX = 4, AGG_SUM = 5, + AGG_AVG = 6, + //AGG_VAR = 7, + //AGG_COVAR = 8, + //AGG_STDDEV = 9, + //AGG_CORR = 10, + //AGG_ARG_MIN = 11, + //AGG_ARG_MAX = 12, + //AGG_COUNT_DISTINCT = 13, + //AGG_QUANTILES = 14, + //AGG_TOP_COUNT = 15, + //AGG_TOP_SUM = 16, + AGG_NUM_ROWS = 17, }; struct GroupByOptions: public arrow::compute::ScalarAggregateOptions { struct Assign { @@ -398,6 +410,8 @@ const char * GetFunctionName(EAggregate op) { return "min_max"; case EAggregate::Sum: return "sum"; + case EAggregate::NumRows: + return "num_rows"; #if 0 // TODO case EAggregate::Avg: return "mean"; @@ -424,6 +438,8 @@ const char * GetHouseFunctionName(EAggregate op) { case EAggregate::Avg: return "ch.avg"; #endif + case EAggregate::NumRows: + return "ch.num_rows"; default: break; } @@ -448,6 +464,8 @@ CH::AggFunctionId GetHouseFunction(EAggregate op) { case EAggregate::Avg: return CH::AggFunctionId::AGG_AVG; #endif + case EAggregate::NumRows: + return CH::AggFunctionId::AGG_NUM_ROWS; default: break; } @@ -678,6 +696,27 @@ IStepFunction::TPtr TAggregateAssign::GetFunction(arrow::compu return std::make_shared(ctx); } +TString TAggregateAssign::DebugString() const { + TStringBuilder sb; + sb << "{"; + if (Operation != EAggregate::Unspecified) { + sb << "op=" << GetFunctionName(Operation) << ";"; + } + if (Arguments.size()) { + sb << "arguments=["; + for (auto&& i : Arguments) { + sb << i.DebugString() << ";"; + } + sb << "];"; + } + sb << "options=" << ScalarOpts.ToString() << ";"; + if (KernelFunction) { + sb << "kernel=" << KernelFunction->name() << ";"; + } + sb << "column=" << Column.DebugString() << ";"; + sb << "}"; + return sb; +} arrow::Status TProgramStep::ApplyAssignes(TDatumBatch& batch, arrow::compute::ExecContext* ctx) const { if (Assignes.empty()) { diff --git a/ydb/core/formats/arrow/program.h b/ydb/core/formats/arrow/program.h index e3f9943e6c13..2b953b55e070 100644 --- a/ydb/core/formats/arrow/program.h +++ b/ydb/core/formats/arrow/program.h @@ -21,6 +21,7 @@ enum class EAggregate { Max = 4, Sum = 5, //Avg = 6, + NumRows = 7, }; } @@ -323,6 +324,7 @@ class TAggregateAssign { const arrow::compute::ScalarAggregateOptions* GetOptions() const { return &ScalarOpts; } IStepFunction::TPtr GetFunction(arrow::compute::ExecContext* ctx) const; + TString DebugString() const; private: TColumnInfo Column; @@ -372,10 +374,18 @@ class TProgramStep { sb << "];"; } if (GroupBy.size()) { - sb << "group_by_count=" << GroupBy.size() << "; "; + sb << "group_by_assignes=["; + for (auto&& i : GroupBy) { + sb << i.DebugString() << ";"; + } + sb << "];"; } if (GroupByKeys.size()) { - sb << "group_by_keys_count=" << GroupByKeys.size() << ";"; + sb << "group_by_keys=["; + for (auto&& i : GroupByKeys) { + sb << i.DebugString() << ";"; + } + sb << "];"; } sb << "projections=["; diff --git a/ydb/core/formats/arrow/protos/accessor.proto b/ydb/core/formats/arrow/protos/accessor.proto new file mode 100644 index 000000000000..015ea0b7cf89 --- /dev/null +++ b/ydb/core/formats/arrow/protos/accessor.proto @@ -0,0 +1,30 @@ +package NKikimrArrowAccessorProto; + +message TRequestedConstructor { + optional string ClassName = 1; + + message TPlain { + } + + message TSparsed { + } + + oneof Implementation { + TPlain Plain = 10; + TSparsed Sparsed = 11; + } +} + +message TConstructor { + optional string ClassName = 1; + + message TPlain { + } + + message TSparsed { + } + oneof Implementation { + TPlain Plain = 10; + TSparsed Sparsed = 11; + } +} \ No newline at end of file diff --git a/ydb/core/formats/arrow/protos/ya.make b/ydb/core/formats/arrow/protos/ya.make index 828b0aa0fb77..6fbf466a1771 100644 --- a/ydb/core/formats/arrow/protos/ya.make +++ b/ydb/core/formats/arrow/protos/ya.make @@ -3,6 +3,7 @@ PROTO_LIBRARY() SRCS( ssa.proto fields.proto + accessor.proto ) PEERDIR( diff --git a/ydb/core/formats/arrow/reader/batch_iterator.h b/ydb/core/formats/arrow/reader/batch_iterator.h index 48497a53c452..d3bb365d5706 100644 --- a/ydb/core/formats/arrow/reader/batch_iterator.h +++ b/ydb/core/formats/arrow/reader/batch_iterator.h @@ -44,7 +44,8 @@ class TBatchIterator { TBatchIterator(TRWSortableBatchPosition&& keyColumns) : ControlPointFlag(true) - , KeyColumns(std::move(keyColumns)) { + , KeyColumns(std::move(keyColumns)) + { } diff --git a/ydb/core/formats/arrow/reader/merger.cpp b/ydb/core/formats/arrow/reader/merger.cpp index a09983971be8..68e6fb842faa 100644 --- a/ydb/core/formats/arrow/reader/merger.cpp +++ b/ydb/core/formats/arrow/reader/merger.cpp @@ -4,12 +4,12 @@ namespace NKikimr::NArrow::NMerger { -void TMergePartialStream::PutControlPoint(const TSortableBatchPosition& point) { +void TMergePartialStream::PutControlPoint(const TSortableBatchPosition& point, const bool deepCopy) { AFL_VERIFY(point.IsSameSortingSchema(SortSchema))("point", point.DebugJson())("schema", SortSchema->ToString()); Y_ABORT_UNLESS(point.IsReverseSort() == Reverse); Y_ABORT_UNLESS(++ControlPoints == 1); - SortHeap.Push(TBatchIterator(point.BuildRWPosition())); + SortHeap.Push(TBatchIterator(point.BuildRWPosition(false, deepCopy))); } void TMergePartialStream::RemoveControlPoint() { @@ -65,7 +65,7 @@ bool TMergePartialStream::DrainToControlPoint(TRecordBatchBuilder& builder, cons } bool TMergePartialStream::DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition) { - PutControlPoint(readTo); + PutControlPoint(readTo, false); return DrainToControlPoint(builder, includeFinish, lastResultPosition); } @@ -191,6 +191,9 @@ std::vector> TMergePartialStream::DrainAllPa std::vector> result; for (auto&& i : positions) { TRecordBatchBuilder indexesBuilder(resultFields); + if (SortHeap.Empty() || i.GetPosition().Compare(SortHeap.Current().GetKeyColumns()) == std::partial_ordering::less) { + continue; + } DrainCurrentTo(indexesBuilder, i.GetPosition(), i.IsIncludedToLeftInterval()); result.emplace_back(indexesBuilder.Finalize()); if (result.back()->num_rows() == 0) { diff --git a/ydb/core/formats/arrow/reader/merger.h b/ydb/core/formats/arrow/reader/merger.h index 972e891fe1fd..c30aba0f384f 100644 --- a/ydb/core/formats/arrow/reader/merger.h +++ b/ydb/core/formats/arrow/reader/merger.h @@ -37,6 +37,9 @@ class TMergePartialStream { void DrainCurrentPosition(TRecordBatchBuilder* builder, std::shared_ptr* resultScanData, ui64* resultPosition); void CheckSequenceInDebug(const TRWSortableBatchPosition& nextKeyColumnsPosition); + bool DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, + std::optional* lastResultPosition = nullptr); + public: TMergePartialStream(std::shared_ptr sortSchema, std::shared_ptr dataSchema, const bool reverse, const std::vector& versionColumnNames) : SortSchema(sortSchema) @@ -49,6 +52,7 @@ class TMergePartialStream { Y_ABORT_UNLESS(!DataSchema || DataSchema->num_fields()); } + void PutControlPoint(const TSortableBatchPosition& point, const bool deepCopy); void SkipToLowerBound(const TSortableBatchPosition& pos, const bool include); void SetPossibleSameVersion(const bool value) { @@ -67,8 +71,6 @@ class TMergePartialStream { return TStringBuilder() << "sort_heap=" << SortHeap.DebugJson(); } - void PutControlPoint(const TSortableBatchPosition& point); - void RemoveControlPoint(); bool ControlPointEnriched() const { @@ -92,7 +94,6 @@ class TMergePartialStream { void DrainAll(TRecordBatchBuilder& builder); std::shared_ptr SingleSourceDrain(const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); - bool DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); bool DrainToControlPoint(TRecordBatchBuilder& builder, const bool includeFinish, std::optional* lastResultPosition = nullptr); std::vector> DrainAllParts(const TIntervalPositions& positions, const std::vector>& resultFields); diff --git a/ydb/core/formats/arrow/reader/position.cpp b/ydb/core/formats/arrow/reader/position.cpp index 6431d180d130..b728405769d7 100644 --- a/ydb/core/formats/arrow/reader/position.cpp +++ b/ydb/core/formats/arrow/reader/position.cpp @@ -1,4 +1,7 @@ #include "position.h" + +#include + #include namespace NKikimr::NArrow::NMerger { @@ -15,11 +18,13 @@ NJson::TJsonValue TSortableBatchPosition::DebugJson() const { return result; } -std::optional TSortableBatchPosition::FindPosition(TRWSortableBatchPosition& position, const ui64 posStartExt, const ui64 posFinishExt, const TSortableBatchPosition& forFound, const bool greater) { +std::optional TSortableBatchPosition::FindPosition(TRWSortableBatchPosition& position, + const ui64 posStartExt, const ui64 posFinishExt, const TSortableBatchPosition& forFound, const bool greater) { ui64 posStart = posStartExt; ui64 posFinish = posFinishExt; + auto guard = position.CreateAsymmetricAccessGuard(); { - AFL_VERIFY(position.InitPosition(posStart)); + AFL_VERIFY(guard.InitSortingPosition(posStart)); auto cmp = position.Compare(forFound); if (cmp == std::partial_ordering::greater) { return TFoundPosition::Greater(posStart); @@ -28,7 +33,7 @@ std::optional TSortableBatchPosition::Fi } } { - AFL_VERIFY(position.InitPosition(posFinish)); + AFL_VERIFY(guard.InitSortingPosition(posFinish)); auto cmp = position.Compare(forFound); if (cmp == std::partial_ordering::less) { return TFoundPosition::Less(posFinish); @@ -37,7 +42,7 @@ std::optional TSortableBatchPosition::Fi } } while (posFinish > posStart + 1) { - Y_ABORT_UNLESS(position.InitPosition(0.5 * (posStart + posFinish))); + AFL_VERIFY(guard.InitSortingPosition(0.5 * (posStart + posFinish))); const auto comparision = position.Compare(forFound); if (comparision == std::partial_ordering::less) { posStart = position.Position; @@ -47,17 +52,18 @@ std::optional TSortableBatchPosition::Fi return TFoundPosition::Equal(position.Position); } } - Y_ABORT_UNLESS(posFinish != posStart); + AFL_VERIFY(posFinish != posStart); if (greater) { - Y_ABORT_UNLESS(position.InitPosition(posFinish)); + AFL_VERIFY(guard.InitSortingPosition(posFinish)); return TFoundPosition::Greater(posFinish); } else { - Y_ABORT_UNLESS(position.InitPosition(posStart)); + AFL_VERIFY(guard.InitSortingPosition(posStart)); return TFoundPosition::Less(posStart); } } -std::optional TSortableBatchPosition::FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, const bool greater, const std::optional includedStartPosition) { +std::optional TSortableBatchPosition::FindPosition(const std::shared_ptr& batch, + const TSortableBatchPosition& forFound, const bool greater, const std::optional includedStartPosition) { if (!batch || !batch->num_rows()) { return {}; } @@ -74,11 +80,14 @@ std::optional TSortableBatchPosition::Fi return FindPosition(position, posStart, posFinish, forFound, greater); } -NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition() const { - return TRWSortableBatchPosition(Position, RecordsCount, ReverseSort, Sorting->BuildCopy(Position), Data ? Data->BuildCopy(Position) : nullptr); +NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition(const bool needData, const bool deepCopy) const { + return TRWSortableBatchPosition(Position, RecordsCount, ReverseSort, + deepCopy ? Sorting->BuildCopy(Position) : Sorting, + (needData && Data) ? (deepCopy ? Data->BuildCopy(Position) : Data) : nullptr); } -NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition(std::shared_ptr batch, const ui32 position) const { +NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition( + std::shared_ptr batch, const ui32 position) const { std::vector dataColumns; if (Data) { dataColumns = Data->GetFieldNames(); @@ -98,7 +107,8 @@ TSortableBatchPosition::TFoundPosition TRWSortableBatchPosition::SkipToLower(con return *pos; } -TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { +TSortableScanData::TSortableScanData( + const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { for (auto&& i : columns) { auto c = batch->GetAccessorByNameOptional(i); AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns))("batch", batch->DebugString()); @@ -110,7 +120,8 @@ TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr< BuildPosition(position); } -TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { +TSortableScanData::TSortableScanData( + const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { for (auto&& i : columns) { auto c = batch->GetColumnByName(i); AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns)); @@ -134,10 +145,11 @@ TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr< BuildPosition(position); } -void TSortableScanData::AppendPositionTo(const std::vector>& builders, const ui64 position, ui64* recordSize) const { +void TSortableScanData::AppendPositionTo( + const std::vector>& builders, const ui64 position, ui64* recordSize) const { AFL_VERIFY(builders.size() == PositionAddress.size()); for (ui32 i = 0; i < PositionAddress.size(); ++i) { - AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), position - PositionAddress[i].GetStartPosition(), recordSize)); + AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), PositionAddress[i].GetAddress().GetLocalIndex(position), recordSize)); } } @@ -148,9 +160,9 @@ void TSortableScanData::BuildPosition(const ui64 position) { StartPosition = 0; LastInit = position; for (auto&& i : Columns) { - PositionAddress.emplace_back(i->GetChunk({}, position)); - StartPosition = std::max(StartPosition, PositionAddress.back().GetStartPosition()); - FinishPosition = std::min(FinishPosition, PositionAddress.back().GetFinishPosition()); + PositionAddress.emplace_back(i->GetChunkSlow(position)); + StartPosition = std::max(StartPosition, PositionAddress.back().GetAddress().GetGlobalStartPosition()); + FinishPosition = std::min(FinishPosition, PositionAddress.back().GetAddress().GetGlobalFinishPosition()); if (!recordsCount) { recordsCount = i->GetRecordsCount(); } else { @@ -166,18 +178,19 @@ void TSortableScanData::BuildPosition(const ui64 position) { bool TSortableScanData::InitPosition(const ui64 position) { AFL_VERIFY(position < RecordsCount); if (position < FinishPosition && StartPosition <= position) { - return false; + return true; } LastInit = position; ui32 idx = 0; FinishPosition = Max(); StartPosition = 0; for (auto&& i : PositionAddress) { - if (!i.Contains(position)) { - i = Columns[idx]->GetChunk(i, position); + if (!i.GetAddress().Contains(position)) { + i = Columns[idx]->GetChunk(i.GetAddress(), position); } - StartPosition = std::max(StartPosition, i.GetStartPosition()); - FinishPosition = std::min(FinishPosition, i.GetFinishPosition()); + StartPosition = std::max(StartPosition, i.GetAddress().GetGlobalStartPosition()); + FinishPosition = std::min(FinishPosition, i.GetAddress().GetGlobalFinishPosition()); + AFL_VERIFY(i.GetAddress().Contains(position)); ++idx; } AFL_VERIFY(StartPosition < FinishPosition); @@ -212,14 +225,13 @@ void TCursor::AppendPositionTo(const std::vectortype()->Equals(PositionAddress[i].GetArray()->type())); - AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), Position - PositionAddress[i].GetStartPosition(), recordSize)); + AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), PositionAddress[i].GetAddress().GetLocalIndex(Position), recordSize)); } } TCursor::TCursor(const std::shared_ptr& table, const ui64 position, const std::vector& columns) - : Position(position) -{ + : Position(position) { PositionAddress = TSortableScanData(position, table, columns).GetPositionAddress(); } -} +} // namespace NKikimr::NArrow::NMerger diff --git a/ydb/core/formats/arrow/reader/position.h b/ydb/core/formats/arrow/reader/position.h index c33f8e75ef01..ef5c0990eb0c 100644 --- a/ydb/core/formats/arrow/reader/position.h +++ b/ydb/core/formats/arrow/reader/position.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include @@ -22,12 +22,12 @@ class TSortableScanData; class TCursor { private: YDB_READONLY(ui64, Position, 0); - std::vector PositionAddress; + std::vector PositionAddress; public: TCursor() = default; TCursor(const std::shared_ptr& table, const ui64 position, const std::vector& columns); - TCursor(const ui64 position, const std::vector& addresses) + TCursor(const ui64 position, const std::vector& addresses) : Position(position) , PositionAddress(addresses) { @@ -64,7 +64,7 @@ class TCursor { class TSortableScanData { private: ui64 RecordsCount = 0; - YDB_READONLY_DEF(std::vector, PositionAddress); + YDB_READONLY_DEF(std::vector, PositionAddress); YDB_READONLY_DEF(std::vector>, Columns); YDB_READONLY_DEF(std::vector>, Fields); ui64 StartPosition = 0; @@ -87,19 +87,18 @@ class TSortableScanData { BuildPosition(position); } - const NAccessor::IChunkedArray::TCurrentChunkAddress& GetPositionAddress(const ui32 colIdx) const { + const NAccessor::IChunkedArray::TFullDataAddress& GetPositionAddress(const ui32 colIdx) const { AFL_VERIFY(colIdx < PositionAddress.size()); return PositionAddress[colIdx]; } ui32 GetPositionInChunk(const ui32 colIdx, const ui32 pos) const { AFL_VERIFY(colIdx < PositionAddress.size()); - AFL_VERIFY(pos >= PositionAddress[colIdx].GetStartPosition()); - return pos - PositionAddress[colIdx].GetStartPosition(); + return PositionAddress[colIdx].GetAddress().GetLocalIndex(pos); } - std::shared_ptr BuildCopy(const ui64 position) const { - return std::make_shared(position, RecordsCount, Columns, Fields); + std::shared_ptr BuildCopy(const ui64 /*position*/) const { + return std::make_shared(*this); } TCursor BuildCursor(const ui64 position) const { @@ -109,8 +108,8 @@ class TSortableScanData { auto addresses = PositionAddress; ui32 idx = 0; for (auto&& i : addresses) { - if (!i.Contains(position)) { - i = Columns[idx]->GetChunk(i, position); + if (!i.GetAddress().Contains(position)) { + i = Columns[idx]->GetChunk(i.GetAddress(), position); } ++idx; } @@ -129,15 +128,15 @@ class TSortableScanData { } else { for (ui32 idx = 0; idx < PositionAddress.size(); ++idx) { std::partial_ordering cmp = std::partial_ordering::equivalent; - const bool containsSelf = PositionAddress[idx].Contains(position); - const bool containsItem = item.PositionAddress[idx].Contains(itemPosition); + const bool containsSelf = PositionAddress[idx].GetAddress().Contains(position); + const bool containsItem = item.PositionAddress[idx].GetAddress().Contains(itemPosition); if (containsSelf && containsItem) { cmp = PositionAddress[idx].Compare(position, item.PositionAddress[idx], itemPosition); } else if (containsSelf) { - auto temporaryAddress = item.Columns[idx]->GetChunk(item.PositionAddress[idx], itemPosition); + auto temporaryAddress = item.Columns[idx]->GetChunk(item.PositionAddress[idx].GetAddress(), itemPosition); cmp = PositionAddress[idx].Compare(position, temporaryAddress, itemPosition); } else if (containsItem) { - auto temporaryAddress = Columns[idx]->GetChunk(PositionAddress[idx], position); + auto temporaryAddress = Columns[idx]->GetChunk(PositionAddress[idx].GetAddress(), position); cmp = temporaryAddress.Compare(position, item.PositionAddress[idx], itemPosition); } else { AFL_VERIFY(false); @@ -153,7 +152,7 @@ class TSortableScanData { void AppendPositionTo(const std::vector>& builders, const ui64 position, ui64* recordSize) const; - bool InitPosition(const ui64 position); + [[nodiscard]] bool InitPosition(const ui64 position); std::shared_ptr Slice(const ui64 offset, const ui64 count) const { std::vector> slicedArrays; @@ -210,6 +209,17 @@ class TSortableBatchPosition { bool ReverseSort = false; std::shared_ptr Sorting; std::shared_ptr Data; + + TSortableBatchPosition(const i64 position, const i64 recordsCount, const bool reverseSort, const std::shared_ptr& sorting, + const std::shared_ptr& data) + : Position(position) + , RecordsCount(recordsCount) + , ReverseSort(reverseSort) + , Sorting(sorting) + , Data(data) { + AFL_VERIFY(IsAvailablePosition(Position)); + } + public: TSortableBatchPosition() = default; @@ -221,7 +231,7 @@ class TSortableBatchPosition { return RecordsCount; } - std::shared_ptr GetSorting() const { + const std::shared_ptr& GetSorting() const { return Sorting; } @@ -240,16 +250,6 @@ class TSortableBatchPosition { return Sorting->GetFields(); } - TSortableBatchPosition(const i64 position, const i64 recordsCount, const bool reverseSort, const std::shared_ptr& sorting, const std::shared_ptr& data) - : Position(position) - , RecordsCount(recordsCount) - , ReverseSort(reverseSort) - , Sorting(sorting) - , Data(data) - { - - } - TSortableBatchPosition(const TRWSortableBatchPosition& source) = delete; TSortableBatchPosition(TRWSortableBatchPosition& source) = delete; TSortableBatchPosition(TRWSortableBatchPosition&& source) = delete; @@ -258,7 +258,7 @@ class TSortableBatchPosition { TSortableBatchPosition operator= (TRWSortableBatchPosition& source) = delete; TSortableBatchPosition operator= (TRWSortableBatchPosition&& source) = delete; - TRWSortableBatchPosition BuildRWPosition() const; + TRWSortableBatchPosition BuildRWPosition(const bool needData, const bool deepCopy) const; std::shared_ptr SliceData(const ui64 offset, const ui64 count) const { AFL_VERIFY(Data); @@ -316,7 +316,12 @@ class TSortableBatchPosition { } }; - static std::optional FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, const bool needGreater, const std::optional includedStartPosition); + [[nodiscard]] bool IsAvailablePosition(const i64 position) const { + return 0 <= position && position < RecordsCount; + } + + static std::optional FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, + const bool needGreater, const std::optional includedStartPosition); static std::optional FindPosition(TRWSortableBatchPosition& position, const ui64 posStart, const ui64 posFinish, const TSortableBatchPosition& forFound, const bool greater); const TSortableScanData& GetData() const { @@ -488,7 +493,7 @@ class TIntervalPositions { void AddPosition(TSortableBatchPosition&& position, const bool includePositionToLeftInterval) { TIntervalPosition intervalPosition(std::move(position), includePositionToLeftInterval); AddPosition(std::move(intervalPosition)); - } + } void AddPosition(const TSortableBatchPosition& position, const bool includePositionToLeftInterval) { TIntervalPosition intervalPosition(position, includePositionToLeftInterval); @@ -502,23 +507,53 @@ class TRWSortableBatchPosition: public TSortableBatchPosition, public TMoveOnly public: using TBase::TBase; - bool NextPosition(const i64 delta) { + [[nodiscard]] bool NextPosition(const i64 delta) { return InitPosition(Position + delta); } - bool InitPosition(const i64 position) { - if (position < RecordsCount && position >= 0) { - Sorting->InitPosition(position); - if (Data) { - Data->InitPosition(position); + [[nodiscard]] bool InitPosition(const i64 position) { + if (!IsAvailablePosition(position)) { + return false; + } + AFL_VERIFY(Sorting->InitPosition(position))("pos", position)("count", RecordsCount); + if (Data) { + AFL_VERIFY(Data->InitPosition(position))("pos", position)("count", RecordsCount); + } + Position = position; + return true; + } + + class TAsymmetricPositionGuard: TNonCopyable { + private: + TRWSortableBatchPosition& Owner; + public: + TAsymmetricPositionGuard(TRWSortableBatchPosition& owner) + : Owner(owner) + { + } + + [[nodiscard]] bool InitSortingPosition(const i64 position) { + if (!Owner.IsAvailablePosition(position)) { + return false; } - Position = position; + AFL_VERIFY(Owner.Sorting->InitPosition(position)); + Owner.Position = position; return true; - } else { - return false; } + ~TAsymmetricPositionGuard() { + if (Owner.IsAvailablePosition(Owner.Position)) { + if (Owner.Data) { + AFL_VERIFY(Owner.Data->InitPosition(Owner.Position)); + } + } + } + }; + + TAsymmetricPositionGuard CreateAsymmetricAccessGuard() { + return TAsymmetricPositionGuard(*this); } + TSortableBatchPosition::TFoundPosition SkipToLower(const TSortableBatchPosition& forFound); // (-inf, it1), [it1, it2), [it2, it3), ..., [itLast, +inf) diff --git a/ydb/core/formats/arrow/reader/result_builder.cpp b/ydb/core/formats/arrow/reader/result_builder.cpp index deb4fe3e1427..523539b843e1 100644 --- a/ydb/core/formats/arrow/reader/result_builder.cpp +++ b/ydb/core/formats/arrow/reader/result_builder.cpp @@ -64,7 +64,7 @@ std::shared_ptr TRecordBatchBuilder::Finalize() { for (auto&& i : Builders) { columns.emplace_back(NArrow::TStatusValidator::GetValid(i->Finish())); } - auto result = arrow::RecordBatch::Make(schema, columns.front()->length(), columns); + auto result = arrow::RecordBatch::Make(schema, columns.front()->length(), std::move(columns)); #ifndef NDEBUG NArrow::TStatusValidator::Validate(result->ValidateFull()); #endif diff --git a/ydb/core/formats/arrow/save_load/loader.cpp b/ydb/core/formats/arrow/save_load/loader.cpp new file mode 100644 index 000000000000..d6f200b35c05 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/loader.cpp @@ -0,0 +1,68 @@ +#include "loader.h" + +#include + +namespace NKikimr::NArrow::NAccessor { + +TString TColumnLoader::DebugString() const { + TStringBuilder result; + result << "accessor_constructor:" << AccessorConstructor->DebugString() << ";"; + result << "result_field:" << ResultField->ToString() << ";"; + if (Transformer) { + result << "transformer:" << Transformer->DebugString() << ";"; + } + result << "serializer:" << Serializer->DebugString() << ";"; + return result; +} + +TColumnLoader::TColumnLoader(NTransformation::ITransformer::TPtr transformer, const NSerialization::TSerializerContainer& serializer, + const TConstructorContainer& accessorConstructor, const std::shared_ptr& resultField, + const std::shared_ptr& defaultValue, const ui32 columnId) + : Serializer(serializer) + , Transformer(transformer) + , AccessorConstructor(accessorConstructor) + , ResultField(resultField) + , DefaultValue(defaultValue) + , ColumnId(columnId) { + AFL_VERIFY(!!AccessorConstructor); + AFL_VERIFY(ResultField); + AFL_VERIFY(Serializer); +} + +const std::shared_ptr& TColumnLoader::GetField() const { + return ResultField; +} + +arrow::Result> TColumnLoader::Apply(const TString& data) const { + Y_ABORT_UNLESS(Serializer); + arrow::Result> columnArray = + Transformer ? Serializer->Deserialize(data) : Serializer->Deserialize(data, AccessorConstructor->GetExpectedSchema(ResultField)); + if (!columnArray.ok()) { + return columnArray; + } + if (Transformer) { + return Transformer->Transform(*columnArray); + } else { + return columnArray; + } +} + +std::shared_ptr TColumnLoader::ApplyRawVerified(const TString& data) const { + return TStatusValidator::GetValid(Apply(data)); +} + +std::shared_ptr TColumnLoader::ApplyVerified(const TString& dataStr, const ui32 recordsCount) const { + auto data = TStatusValidator::GetValid(Apply(dataStr)); + return BuildAccessor(data, TChunkConstructionData(recordsCount, DefaultValue, ResultField->type())); +} + +std::shared_ptr TColumnLoader::BuildAccessor( + const std::shared_ptr& batch, const TChunkConstructionData& chunkData) const { + return AccessorConstructor->Construct(batch, chunkData).DetachResult(); +} + +std::shared_ptr TColumnLoader::BuildDefaultAccessor(const ui32 recordsCount) const { + return AccessorConstructor->ConstructDefault(TChunkConstructionData(recordsCount, DefaultValue, ResultField->type())).DetachResult(); +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/save_load/loader.h b/ydb/core/formats/arrow/save_load/loader.h new file mode 100644 index 000000000000..1b42e41fc106 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/loader.h @@ -0,0 +1,56 @@ +#pragma once +#include +#include +#include + +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class TColumnLoader { +private: + NSerialization::TSerializerContainer Serializer; + NTransformation::ITransformer::TPtr Transformer; + YDB_READONLY_DEF(NAccessor::TConstructorContainer, AccessorConstructor); + YDB_READONLY_DEF(std::shared_ptr, ResultField); + YDB_READONLY_DEF(std::shared_ptr, DefaultValue); + const ui32 ColumnId; + + arrow::Result> Apply(const TString& data) const; + std::shared_ptr BuildAccessor( + const std::shared_ptr& batch, const TChunkConstructionData& chunkData) const; + +public: + std::shared_ptr BuildDefaultAccessor(const ui32 recordsCount) const; + + bool IsEqualTo(const TColumnLoader& item) const { + if (!!Transformer != !!item.Transformer) { + return false; + } else if (!!Transformer && !Transformer->IsEqualTo(*item.Transformer)) { + return false; + } + if (!Serializer.IsEqualTo(item.Serializer)) { + return false; + } + return true; + } + + TString DebugString() const; + + TColumnLoader(NTransformation::ITransformer::TPtr transformer, const NSerialization::TSerializerContainer& serializer, + const NAccessor::TConstructorContainer& accessorConstructor, const std::shared_ptr& resultField, + const std::shared_ptr& defaultValue, const ui32 columnId); + + ui32 GetColumnId() const { + return ColumnId; + } + + const std::shared_ptr& GetField() const; + + std::shared_ptr ApplyVerified(const TString& data, const ui32 expectedRecordsCount) const; + std::shared_ptr ApplyRawVerified(const TString& data) const; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp b/ydb/core/formats/arrow/save_load/saver.cpp similarity index 63% rename from ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp rename to ydb/core/formats/arrow/save_load/saver.cpp index c15db92b8eec..95adebc76471 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp +++ b/ydb/core/formats/arrow/save_load/saver.cpp @@ -1,6 +1,6 @@ #include "saver.h" -namespace NKikimr::NOlap { +namespace NKikimr::NArrow::NAccessor { TColumnSaver::TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer) : Transformer(transformer) @@ -21,10 +21,17 @@ TString TColumnSaver::Apply(std::shared_ptr data, std::shared_ptr< TString TColumnSaver::Apply(const std::shared_ptr& data) const { Y_ABORT_UNLESS(Serializer); + NArrow::NSerialization::TSerializerContainer serializer = Serializer; + if (SerializerBySizeUpperBorder.size()) { + auto it = SerializerBySizeUpperBorder.lower_bound(data->num_rows()); + if (it != SerializerBySizeUpperBorder.end()) { + serializer = it->second; + } + } if (Transformer) { - return Serializer->SerializeFull(Transformer->Transform(data)); + return serializer->SerializeFull(Transformer->Transform(data)); } else { - return Serializer->SerializePayload(data); + return serializer->SerializePayload(data); } } diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h b/ydb/core/formats/arrow/save_load/saver.h similarity index 59% rename from ydb/core/tx/columnshard/engines/scheme/abstract/saver.h rename to ydb/core/formats/arrow/save_load/saver.h index c4d10c55a359..3532a0195fa3 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h +++ b/ydb/core/formats/arrow/save_load/saver.h @@ -1,25 +1,30 @@ #pragma once -#include #include +#include + +#include + #include #include -namespace NKikimr::NOlap { +namespace NKikimr::NArrow::NAccessor { class TColumnSaver { private: NArrow::NTransformation::ITransformer::TPtr Transformer; - NArrow::NSerialization::TSerializerContainer Serializer; + YDB_READONLY_DEF(NArrow::NSerialization::TSerializerContainer, Serializer); + std::map SerializerBySizeUpperBorder; + public: TColumnSaver() = default; TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer); - void ResetSerializer(const NArrow::NSerialization::TSerializerContainer& serializer) { - AFL_VERIFY(serializer); + void AddSerializerWithBorder(const ui32 upperBorder, const NArrow::NSerialization::TSerializerContainer& serializer) { if (Serializer.IsCompatibleForExchange(serializer)) { - Serializer = serializer; + AFL_VERIFY(SerializerBySizeUpperBorder.emplace(upperBorder, serializer).second); } else { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_reset_serializer")("reason", "incompatible_serializers"); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_add_serializer")("reason", "incompatible_serializers")( + "border", upperBorder); } } @@ -30,5 +35,4 @@ class TColumnSaver { TString Apply(const std::shared_ptr& data) const; }; - -} \ No newline at end of file +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/save_load/ya.make b/ydb/core/formats/arrow/save_load/ya.make new file mode 100644 index 000000000000..db2d6667519a --- /dev/null +++ b/ydb/core/formats/arrow/save_load/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +SRCS( + saver.cpp + loader.cpp +) + +PEERDIR( + ydb/library/actors/core + contrib/libs/apache/arrow + ydb/library/accessor + ydb/library/conclusion + ydb/core/formats/arrow/transformer + ydb/core/formats/arrow/serializer +) + +END() diff --git a/ydb/core/formats/arrow/serializer/abstract.cpp b/ydb/core/formats/arrow/serializer/abstract.cpp index 6347cd765b9b..8010197ae409 100644 --- a/ydb/core/formats/arrow/serializer/abstract.cpp +++ b/ydb/core/formats/arrow/serializer/abstract.cpp @@ -21,8 +21,12 @@ NKikimr::TConclusionStatus TSerializerContainer::DeserializeFromRequest(NYql::TF return TBase::GetObjectPtr()->DeserializeFromRequest(features); } -std::shared_ptr TSerializerContainer::GetDefaultSerializer() { +std::shared_ptr TSerializerContainer::GetDefaultSerializer() { return std::make_shared(); } +std::shared_ptr TSerializerContainer::GetFastestSerializer() { + return std::make_shared(arrow::Compression::UNCOMPRESSED); +} + } diff --git a/ydb/core/formats/arrow/serializer/abstract.h b/ydb/core/formats/arrow/serializer/abstract.h index 1c8d9963dd7e..db60152224ea 100644 --- a/ydb/core/formats/arrow/serializer/abstract.h +++ b/ydb/core/formats/arrow/serializer/abstract.h @@ -146,6 +146,7 @@ class TSerializerContainer: public NBackgroundTasks::TInterfaceProtoContainer GetDefaultSerializer(); + static std::shared_ptr GetFastestSerializer(); TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TCompressionOptions& proto); diff --git a/ydb/core/formats/arrow/serializer/ya.make b/ydb/core/formats/arrow/serializer/ya.make index bf7e091ab4bf..79a3ae1a3ddf 100644 --- a/ydb/core/formats/arrow/serializer/ya.make +++ b/ydb/core/formats/arrow/serializer/ya.make @@ -2,7 +2,7 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow - ydb/core/formats/arrow/common + ydb/core/formats/arrow/validation ydb/services/metadata/abstract ydb/library/actors/core ydb/core/protos diff --git a/ydb/core/formats/arrow/simple_arrays_cache.h b/ydb/core/formats/arrow/simple_arrays_cache.h index e527e44a0b08..2d307171658f 100644 --- a/ydb/core/formats/arrow/simple_arrays_cache.h +++ b/ydb/core/formats/arrow/simple_arrays_cache.h @@ -36,10 +36,14 @@ class TThreadSimpleArraysCache { } std::shared_ptr GetNullImpl(const std::shared_ptr& type, const ui32 recordsCount); - std::shared_ptr GetConstImpl(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); + std::shared_ptr GetConstImpl( + const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); + public: static std::shared_ptr GetNull(const std::shared_ptr& type, const ui32 recordsCount); - static std::shared_ptr GetConst(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); - static std::shared_ptr Get(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); + static std::shared_ptr GetConst( + const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); + static std::shared_ptr Get( + const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); }; -} +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/simple_builder/array.h b/ydb/core/formats/arrow/simple_builder/array.h index ec7680bc73b1..6803ff92e499 100644 --- a/ydb/core/formats/arrow/simple_builder/array.h +++ b/ydb/core/formats/arrow/simple_builder/array.h @@ -52,10 +52,12 @@ class TSimpleArrayConstructor: public IArrayBuilder { using TSelf = TSimpleArrayConstructor; using TBuilder = typename arrow::TypeTraits::BuilderType; const TFiller Filler; + ui32 ShiftValue = 0; - TSimpleArrayConstructor(const TString& fieldName, bool nullable, const TFiller& filler) + TSimpleArrayConstructor(const TString& fieldName, bool nullable, const TFiller& filler, ui32 shiftValue = 0) : TBase(fieldName, nullable) , Filler(filler) + , ShiftValue(shiftValue) { } protected: @@ -63,16 +65,17 @@ class TSimpleArrayConstructor: public IArrayBuilder { TBuilder fBuilder = TFillerBuilderConstructor::Construct(); Y_ABORT_UNLESS(fBuilder.Reserve(recordsCount).ok()); for (ui32 i = 0; i < recordsCount; ++i) { - Y_ABORT_UNLESS(fBuilder.Append(Filler.GetValue(i)).ok()); + Y_ABORT_UNLESS(fBuilder.Append(Filler.GetValue(i + ShiftValue)).ok()); } return *fBuilder.Finish(); } - + public: - TSimpleArrayConstructor(const TString& fieldName, const TFiller& filler = TFiller()) + TSimpleArrayConstructor(const TString& fieldName, const TFiller& filler = TFiller(), ui32 shiftValue = 0) : TBase(fieldName) , Filler(filler) + , ShiftValue(shiftValue) { } diff --git a/ydb/core/formats/arrow/simple_builder/filler.cpp b/ydb/core/formats/arrow/simple_builder/filler.cpp index f6168701ddbe..337941bebe42 100644 --- a/ydb/core/formats/arrow/simple_builder/filler.cpp +++ b/ydb/core/formats/arrow/simple_builder/filler.cpp @@ -1,11 +1,17 @@ #include "filler.h" + #include +#include namespace NKikimr::NArrow::NConstruction { -TStringPoolFiller::TStringPoolFiller(const ui32 poolSize, const ui32 strLen) { +TStringPoolFiller::TStringPoolFiller(const ui32 poolSize, const ui32 strLen, const TString& defaultValue, const double defaultValueFrq) { for (ui32 i = 0; i < poolSize; ++i) { - Data.emplace_back(NUnitTest::RandomString(strLen, i)); + if (RandomNumber() < defaultValueFrq) { + Data.emplace_back(defaultValue); + } else { + Data.emplace_back(NUnitTest::RandomString(strLen, i)); + } } } @@ -14,4 +20,4 @@ arrow::util::string_view TStringPoolFiller::GetValue(const ui32 idx) const { return arrow::util::string_view(str.data(), str.size()); } -} +} // namespace NKikimr::NArrow::NConstruction diff --git a/ydb/core/formats/arrow/simple_builder/filler.h b/ydb/core/formats/arrow/simple_builder/filler.h index e86e7a6c2139..c9c115933358 100644 --- a/ydb/core/formats/arrow/simple_builder/filler.h +++ b/ydb/core/formats/arrow/simple_builder/filler.h @@ -1,9 +1,13 @@ #pragma once #include -#include #include -#include +#include + +#include + #include +#include +#include namespace NKikimr::NArrow::NConstruction { @@ -11,16 +15,76 @@ template class TIntSeqFiller { public: using TValue = TArrowInt; + private: using CType = typename TArrowInt::c_type; const CType Delta; + public: CType GetValue(const CType idx) const { return Delta + idx; } TIntSeqFiller(const CType delta = 0) : Delta(delta) { + } +}; +class TStringType : public arrow::StringType { +public: + using c_type = TString; +}; + +template +class TPoolFiller { +private: + using CType = typename TArrowType::c_type; + +private: + std::vector Data; + +public: + using TValue = std::conditional_t, arrow::StringType, TArrowType>; + using ValueType = std::conditional_t, arrow::util::string_view, CType>; + + static CType GetRandomNumberNotEqDef(CType defaultValue) { + CType result; + do { + result = RandomNumber() * std::numeric_limits::max(); + } while (result == defaultValue); + return result; + } + + TPoolFiller(const ui32 poolSize, const CType defaultValue, const double defaultValueFrq) { + for (ui32 i = 0; i < poolSize; ++i) { + if (RandomNumber() < defaultValueFrq) { + Data.emplace_back(defaultValue); + } else { + Data.emplace_back(GetRandomNumberNotEqDef(defaultValue)); + } + } + } + + TPoolFiller(const ui32 poolSize, const ui32 strLen, const TString& defaultValue, const double defaultValueFrq) { + for (ui32 i = 0; i < poolSize; ++i) { + if (RandomNumber() < defaultValueFrq) { + Data.emplace_back(defaultValue); + } else { + Data.emplace_back(NUnitTest::RandomString(strLen, i)); + } + } + } + + template + const ValueType Convert(const Type& v) const { + return v; + } + + const ValueType Convert(const TString& str) const { + return arrow::util::string_view(str.data(), str.size()); + } + + ValueType GetValue(const ui32 idx) const { + return Convert(Data[(2 + 7 * idx) % Data.size()]); } }; @@ -28,27 +92,29 @@ template class TIntConstFiller { public: using TValue = TArrowInt; + private: using CType = typename TArrowInt::c_type; const CType Value; + public: CType GetValue(const CType /*idx*/) const { return Value; } TIntConstFiller(const CType value) : Value(value) { - } }; class TStringPoolFiller { private: std::vector Data; + public: using TValue = arrow::StringType; arrow::util::string_view GetValue(const ui32 idx) const; - TStringPoolFiller(const ui32 poolSize, const ui32 strLen); + TStringPoolFiller(const ui32 poolSize, const ui32 strLen, const TString& defaultValue = "", const double defaultValueFrq = 0); }; template @@ -56,6 +122,7 @@ class TLinearArrayAccessor { private: using TArray = typename arrow::TypeTraits::ArrayType; const TArray& Data; + public: using TValue = TValueExt; auto GetValue(const ui32 idx) const { @@ -72,6 +139,7 @@ class TBinaryArrayAccessor { private: using TArray = typename arrow::TypeTraits::ArrayType; const TArray& Data; + public: using TValue = TValueExt; const char* GetValueView(const ui32 idx) const { @@ -89,6 +157,7 @@ class TDictionaryArrayAccessor { using TDictionary = typename arrow::TypeTraits::ArrayType; const TDictionary& Dictionary; const TIndices& Indices; + public: using TValue = TDictionaryValue; auto GetValue(const ui32 idx) const { @@ -108,6 +177,7 @@ class TBinaryDictionaryArrayAccessor { const TDictionary& Dictionary; const TIndices& Indices; std::vector DictionaryStrings; + public: using TValue = TDictionaryValue; const char* GetValueView(const ui32 idx) const { @@ -116,8 +186,7 @@ class TBinaryDictionaryArrayAccessor { TBinaryDictionaryArrayAccessor(const TDictionary& dictionary, const TIndices& indices) : Dictionary(dictionary) - , Indices(indices) - { + , Indices(indices) { DictionaryStrings.reserve(Dictionary.length()); for (i64 idx = 0; idx < Dictionary.length(); ++idx) { auto sView = Dictionary.Value(idx); @@ -126,4 +195,4 @@ class TBinaryDictionaryArrayAccessor { } }; -} +} // namespace NKikimr::NArrow::NConstruction diff --git a/ydb/core/formats/arrow/size_calcer.cpp b/ydb/core/formats/arrow/size_calcer.cpp index c718b7807410..283d0ff2d3c1 100644 --- a/ydb/core/formats/arrow/size_calcer.cpp +++ b/ydb/core/formats/arrow/size_calcer.cpp @@ -242,12 +242,15 @@ ui64 GetArrayDataSize(const std::shared_ptr& column) { } NKikimr::NArrow::TSerializedBatch TSerializedBatch::Build(std::shared_ptr batch, const TBatchSplitttingContext& context) { - std::optional specialKeys; + std::optional specialKeysPayload; + std::optional specialKeysFull; if (context.GetFieldsForSpecialKeys().size()) { - specialKeys = TFirstLastSpecialKeys(batch, context.GetFieldsForSpecialKeys()).SerializeToString(); + TFirstLastSpecialKeys specialKeys(batch, context.GetFieldsForSpecialKeys()); + specialKeysPayload = specialKeys.SerializePayloadToString(); + specialKeysFull = specialKeys.SerializeFullToString(); } - return TSerializedBatch(NArrow::SerializeSchema(*batch->schema()), NArrow::SerializeBatchNoCompression(batch), batch->num_rows(), - NArrow::GetBatchDataSize(batch), specialKeys); + return TSerializedBatch(NArrow::SerializeBatchNoCompression(batch), batch->num_rows(), + NArrow::GetBatchDataSize(batch), specialKeysPayload, specialKeysFull); } TConclusionStatus TSerializedBatch::BuildWithLimit(std::shared_ptr batch, const TBatchSplitttingContext& context, std::optional& sbL, std::optional& sbR) { @@ -291,7 +294,7 @@ TConclusion> TSerializedBatch::BuildWithLimit(std: } TString TSerializedBatch::DebugString() const { - return TStringBuilder() << "(data_size=" << Data.size() << ";schema_data_size=" << SchemaData.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)"; + return TStringBuilder() << "(data_size=" << Data.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)"; } } diff --git a/ydb/core/formats/arrow/size_calcer.h b/ydb/core/formats/arrow/size_calcer.h index d260427ade3a..3ae1c212405e 100644 --- a/ydb/core/formats/arrow/size_calcer.h +++ b/ydb/core/formats/arrow/size_calcer.h @@ -70,23 +70,29 @@ class TBatchSplitttingContext { class TSerializedBatch { private: - YDB_READONLY_DEF(TString, SchemaData); YDB_READONLY_DEF(TString, Data); YDB_READONLY(ui32, RowsCount, 0); YDB_READONLY(ui32, RawBytes, 0); - std::optional SpecialKeys; + std::optional SpecialKeysFull; + std::optional SpecialKeysPayload; + public: size_t GetSize() const { return Data.size(); } - const TString& GetSpecialKeysSafe() const { - AFL_VERIFY(SpecialKeys); - return *SpecialKeys; + const TString& GetSpecialKeysPayloadSafe() const { + AFL_VERIFY(SpecialKeysPayload); + return *SpecialKeysPayload; + } + + const TString& GetSpecialKeysFullSafe() const { + AFL_VERIFY(SpecialKeysFull); + return *SpecialKeysFull; } bool HasSpecialKeys() const { - return !!SpecialKeys; + return !!SpecialKeysFull; } TString DebugString() const; @@ -95,14 +101,14 @@ class TSerializedBatch { static TConclusionStatus BuildWithLimit(std::shared_ptr batch, const TBatchSplitttingContext& context, std::optional& sbL, std::optional& sbR); static TSerializedBatch Build(std::shared_ptr batch, const TBatchSplitttingContext& context); - TSerializedBatch(TString&& schemaData, TString&& data, const ui32 rowsCount, const ui32 rawBytes, const std::optional& specialKeys) - : SchemaData(schemaData) - , Data(data) + TSerializedBatch(TString&& data, const ui32 rowsCount, const ui32 rawBytes, + const std::optional& specialKeysPayload, const std::optional& specialKeysFull) + : Data(data) , RowsCount(rowsCount) , RawBytes(rawBytes) - , SpecialKeys(specialKeys) - { - + , SpecialKeysFull(specialKeysFull) + , SpecialKeysPayload(specialKeysPayload) { + AFL_VERIFY(!!SpecialKeysPayload == !!SpecialKeysFull); } }; diff --git a/ydb/core/formats/arrow/special_keys.cpp b/ydb/core/formats/arrow/special_keys.cpp index 0b97fb3f25ed..0745fad0e559 100644 --- a/ydb/core/formats/arrow/special_keys.cpp +++ b/ydb/core/formats/arrow/special_keys.cpp @@ -27,12 +27,12 @@ NKikimr::NArrow::TReplaceKey TSpecialKeys::GetKeyByIndex(const ui32 position, co } } -TString TSpecialKeys::SerializeToString() const { - return NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()->SerializeFull(Data); +TString TSpecialKeys::SerializePayloadToString() const { + return NArrow::NSerialization::TSerializerContainer::GetFastestSerializer()->SerializePayload(Data); } -TString TSpecialKeys::SerializeToStringDataOnlyNoCompression() const { - return NArrow::SerializeBatchNoCompression(Data); +TString TSpecialKeys::SerializeFullToString() const { + return NArrow::NSerialization::TSerializerContainer::GetFastestSerializer()->SerializeFull(Data); } ui64 TSpecialKeys::GetMemoryBytes() const { @@ -50,13 +50,17 @@ TFirstLastSpecialKeys::TFirstLastSpecialKeys(const std::shared_ptr indexes = {0}; - if (batch->num_rows() > 1) { - indexes.emplace_back(batch->num_rows() - 1); - } + if (keyBatch->num_rows() <= 2) { + Data = keyBatch; + } else { + std::vector indexes = { 0 }; + if (batch->num_rows() > 1) { + indexes.emplace_back(batch->num_rows() - 1); + } - Data = NArrow::CopyRecords(keyBatch, indexes); - Y_ABORT_UNLESS(Data->num_rows() == 1 || Data->num_rows() == 2); + Data = NArrow::CopyRecords(keyBatch, indexes); + Y_ABORT_UNLESS(Data->num_rows() == 1 || Data->num_rows() == 2); + } } TMinMaxSpecialKeys::TMinMaxSpecialKeys(std::shared_ptr batch, const std::shared_ptr& schema) { diff --git a/ydb/core/formats/arrow/special_keys.h b/ydb/core/formats/arrow/special_keys.h index d56e658fbb68..e7ac96eaf9f9 100644 --- a/ydb/core/formats/arrow/special_keys.h +++ b/ydb/core/formats/arrow/special_keys.h @@ -22,8 +22,6 @@ class TSpecialKeys { public: ui64 GetMemoryBytes() const; - TString SerializeToStringDataOnlyNoCompression() const; - TSpecialKeys(const TString& data, const std::shared_ptr& schema) { Data = NArrow::DeserializeBatch(data, schema); Y_ABORT_UNLESS(Data); @@ -34,7 +32,8 @@ class TSpecialKeys { Y_ABORT_UNLESS(DeserializeFromString(data)); } - TString SerializeToString() const; + TString SerializePayloadToString() const; + TString SerializeFullToString() const; ui64 GetMemorySize() const; }; diff --git a/ydb/core/formats/arrow/splitter/scheme_info.cpp b/ydb/core/formats/arrow/splitter/scheme_info.cpp new file mode 100644 index 000000000000..35a8fcc5c035 --- /dev/null +++ b/ydb/core/formats/arrow/splitter/scheme_info.cpp @@ -0,0 +1,13 @@ +#include "scheme_info.h" + +namespace NKikimr::NArrow::NSplitter { + +NAccessor::TColumnSaver ISchemaDetailInfo::GetColumnSaver(const ui32 columnId) const { + auto saver = DoGetColumnSaver(columnId); + if (OverrideSerializer) { + saver.AddSerializerWithBorder(Max(), *OverrideSerializer); + } + return saver; +} + +} diff --git a/ydb/core/tx/columnshard/splitter/scheme_info.h b/ydb/core/formats/arrow/splitter/scheme_info.h similarity index 64% rename from ydb/core/tx/columnshard/splitter/scheme_info.h rename to ydb/core/formats/arrow/splitter/scheme_info.h index 1e72e63e9d35..400bdfcc7862 100644 --- a/ydb/core/tx/columnshard/splitter/scheme_info.h +++ b/ydb/core/formats/arrow/splitter/scheme_info.h @@ -1,26 +1,29 @@ #pragma once #include "stats.h" -#include +#include +#include #include -namespace NKikimr::NOlap { +namespace NKikimr::NArrow::NSplitter { class ISchemaDetailInfo { private: - YDB_ACCESSOR_DEF(std::optional, OverrideSerializer); + YDB_ACCESSOR_DEF(std::optional, OverrideSerializer); + protected: - virtual TColumnSaver DoGetColumnSaver(const ui32 columnId) const = 0; + virtual NAccessor::TColumnSaver DoGetColumnSaver(const ui32 columnId) const = 0; + public: using TPtr = std::shared_ptr; virtual ~ISchemaDetailInfo() = default; virtual ui32 GetColumnId(const std::string& fieldName) const = 0; - TColumnSaver GetColumnSaver(const ui32 columnId) const; + NAccessor::TColumnSaver GetColumnSaver(const ui32 columnId) const; virtual std::shared_ptr GetField(const ui32 columnId) const = 0; virtual std::optional GetColumnSerializationStats(const ui32 columnId) const = 0; virtual bool NeedMinMaxForColumn(const ui32 columnId) const = 0; virtual bool IsSortedColumn(const ui32 columnId) const = 0; virtual std::optional GetBatchSerializationStats(const std::shared_ptr& rb) const = 0; }; -} +} // namespace NKikimr::NArrow::NSplitter diff --git a/ydb/core/formats/arrow/splitter/similar_packer.cpp b/ydb/core/formats/arrow/splitter/similar_packer.cpp new file mode 100644 index 000000000000..94395e18c3ee --- /dev/null +++ b/ydb/core/formats/arrow/splitter/similar_packer.cpp @@ -0,0 +1,5 @@ +#include "similar_packer.h" + +namespace NKikimr::NArrow::NSplitter { + +} diff --git a/ydb/core/tx/columnshard/splitter/similar_packer.h b/ydb/core/formats/arrow/splitter/similar_packer.h similarity index 98% rename from ydb/core/tx/columnshard/splitter/similar_packer.h rename to ydb/core/formats/arrow/splitter/similar_packer.h index 54abde2640fb..1fdfdf3e7a83 100644 --- a/ydb/core/tx/columnshard/splitter/similar_packer.h +++ b/ydb/core/formats/arrow/splitter/similar_packer.h @@ -3,7 +3,7 @@ #include -namespace NKikimr::NOlap { +namespace NKikimr::NArrow::NSplitter { template class TArrayView { diff --git a/ydb/core/tx/columnshard/splitter/simple.cpp b/ydb/core/formats/arrow/splitter/simple.cpp similarity index 78% rename from ydb/core/tx/columnshard/splitter/simple.cpp rename to ydb/core/formats/arrow/splitter/simple.cpp index 0af14f0ff9b4..8af1ca704212 100644 --- a/ydb/core/tx/columnshard/splitter/simple.cpp +++ b/ydb/core/formats/arrow/splitter/simple.cpp @@ -1,27 +1,11 @@ #include "simple.h" -#include + #include #include -#include - -namespace NKikimr::NOlap { - -std::vector> TSplittedColumnChunk::DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { - auto chunks = TSimpleSplitter(saver, counters).SplitBySizes(Data.GetSlicedBatch(), Data.GetSerializedChunk(), splitSizes); - std::vector> newChunks; - for (auto&& i : chunks) { - newChunks.emplace_back(std::make_shared(GetColumnId(), i, SchemaInfo)); - } - return newChunks; -} -TString TSplittedColumnChunk::DoDebugString() const { - return TStringBuilder() << "records_count=" << GetRecordsCount() << ";data=" << NArrow::DebugJson(Data.GetSlicedBatch(), 3, 3) << ";"; -} +#include -ui64 TSplittedColumnChunk::DoGetRawBytesImpl() const { - return NArrow::GetBatchDataSize(Data.GetSlicedBatch()); -} +namespace NKikimr::NArrow::NSplitter { std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const { AFL_VERIFY(data); @@ -38,27 +22,27 @@ class TSplitChunk { ui32 SplitFactor = 0; ui32 Iterations = 0; ui32 MaxBlobSize = 8 * 1024 * 1024; - TColumnSaver ColumnSaver; - std::shared_ptr Counters; + NAccessor::TColumnSaver ColumnSaver; + public: - TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, const TColumnSaver& columnSaver, const std::shared_ptr& counters) + TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, + const NAccessor::TColumnSaver& columnSaver) : Data(data) , SplitFactor(baseSplitFactor) , MaxBlobSize(maxBlobSize) , ColumnSaver(columnSaver) - , Counters(counters) { AFL_VERIFY(Data && Data->num_rows()); AFL_VERIFY(SplitFactor); } - TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, TString&& serializedData, const TColumnSaver& columnSaver, const std::shared_ptr& counters) + TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, TString&& serializedData, + const NAccessor::TColumnSaver& columnSaver) : Data(data) , Result(TSaverSplittedChunk(data, std::move(serializedData))) , SplitFactor(baseSplitFactor) , MaxBlobSize(maxBlobSize) , ColumnSaver(columnSaver) - , Counters(counters) { AFL_VERIFY(Data && Data->num_rows()); AFL_VERIFY(SplitFactor); @@ -68,18 +52,17 @@ class TSplitChunk { while (true) { AFL_VERIFY(!Result); AFL_VERIFY(++Iterations < 100); - AFL_VERIFY(SplitFactor <= Data->num_rows())("factor", SplitFactor)("records", Data->num_rows())("iteration", Iterations)("size", NArrow::GetBatchDataSize(Data)); + AFL_VERIFY(SplitFactor <= Data->num_rows())("factor", SplitFactor)("records", Data->num_rows())("iteration", Iterations)( + "size", NArrow::GetBatchDataSize(Data)); bool found = false; std::vector result; if (SplitFactor == 1) { TString blob = ColumnSaver.Apply(Data); if (blob.size() < MaxBlobSize) { - Counters->SimpleSplitter.OnCorrectSerialized(blob.size()); Result = TSaverSplittedChunk(Data, std::move(blob)); found = true; result.emplace_back(*this); } else { - Counters->SimpleSplitter.OnTrashSerialized(blob.size()); TBatchSerializationStat stats(blob.size(), Data->num_rows(), NArrow::GetBatchDataSize(Data)); SplitFactor = stats.PredictOptimalSplitFactor(Data->num_rows(), MaxBlobSize).value_or(1); if (SplitFactor == 1) { @@ -98,7 +81,6 @@ class TSplitChunk { auto slice = Data->Slice(it.GetPosition(), it.GetCurrentPackSize()); TString blob = ColumnSaver.Apply(slice); if (blob.size() >= MaxBlobSize) { - Counters->SimpleSplitter.OnTrashSerialized(blob.size()); if (!badStartPosition) { badStartPosition = it.GetPosition(); } @@ -107,25 +89,24 @@ class TSplitChunk { ++badBatchCount; Y_ABORT_UNLESS(!linearSplitting.IsMinimalGranularity()); } else { - Counters->SimpleSplitter.OnCorrectSerialized(blob.size()); if (badStartPosition) { AFL_VERIFY(badBatchRecordsCount && badBatchCount)("count", badBatchCount)("records", badBatchRecordsCount); auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); - result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver, Counters); + result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver); badStartPosition = {}; badBatchRecordsCount = 0; badBatchCount = 0; badBatchSerializedSize = 0; } found = true; - result.emplace_back(1, MaxBlobSize, slice, std::move(blob), ColumnSaver, Counters); + result.emplace_back(1, MaxBlobSize, slice, std::move(blob), ColumnSaver); } } if (badStartPosition) { auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); - result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver, Counters); + result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver); } ++SplitFactor; } @@ -139,9 +120,8 @@ class TSplitChunk { }; std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const ui32 maxBlobSize) const { - AFL_VERIFY(data->num_columns() == 1); AFL_VERIFY(data->num_rows()); - TSplitChunk baseChunk(Stats ? Stats->PredictOptimalSplitFactor(data->num_rows(), maxBlobSize).value_or(1) : 1, maxBlobSize, data, ColumnSaver, Counters); + TSplitChunk baseChunk(Stats ? Stats->PredictOptimalSplitFactor(data->num_rows(), maxBlobSize).value_or(1) : 1, maxBlobSize, data, ColumnSaver); std::vector chunks = {baseChunk}; for (auto it = chunks.begin(); it != chunks.end(); ) { AFL_VERIFY(chunks.size() < 100); @@ -217,12 +197,4 @@ std::vector TSimpleSplitter::SplitBySizes(std::shared_ptr TSaverSplittedChunk::GetFirstScalar() const { - return NArrow::TStatusValidator::GetValid(SlicedBatch->column(0)->GetScalar(0)); -} - -std::shared_ptr TSaverSplittedChunk::GetLastScalar() const { - return NArrow::TStatusValidator::GetValid(SlicedBatch->column(0)->GetScalar(GetRecordsCount() - 1)); -} - } diff --git a/ydb/core/tx/columnshard/splitter/simple.h b/ydb/core/formats/arrow/splitter/simple.h similarity index 57% rename from ydb/core/tx/columnshard/splitter/simple.h rename to ydb/core/formats/arrow/splitter/simple.h index 01467190373d..5be5c0b9b5d2 100644 --- a/ydb/core/tx/columnshard/splitter/simple.h +++ b/ydb/core/formats/arrow/splitter/simple.h @@ -1,51 +1,25 @@ #pragma once #include -#include -#include #include "stats.h" -#include "chunks.h" #include "scheme_info.h" -namespace NKikimr::NOlap { +namespace NKikimr::NArrow::NSplitter { class TSaverSplittedChunk { private: YDB_READONLY_DEF(std::shared_ptr, SlicedBatch); YDB_READONLY_DEF(TString, SerializedChunk); public: - std::shared_ptr GetColumn() const { - return SlicedBatch->column(0); - } - ui32 GetRecordsCount() const { return SlicedBatch->num_rows(); } - std::shared_ptr GetFirstScalar() const; - std::shared_ptr GetLastScalar() const; - TSaverSplittedChunk(const std::shared_ptr& batch, TString&& serializedChunk) : SlicedBatch(batch) , SerializedChunk(std::move(serializedChunk)) { Y_ABORT_UNLESS(SlicedBatch); - Y_ABORT_UNLESS(SlicedBatch->num_columns() == 1); Y_ABORT_UNLESS(SlicedBatch->num_rows()); } - - bool IsCompatibleColumn(const std::shared_ptr& f) const { - if (!SlicedBatch) { - return false; - } - if (SlicedBatch->num_columns() != 1) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unexpected columns count")("expectation", 1)("actual", SlicedBatch->num_columns()); - return false; - } - if (!SlicedBatch->schema()->fields().front()->Equals(f)) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unexpected column type")("expectation", f->ToString())("actual", SlicedBatch->schema()->fields().front()->ToString()); - return false; - } - return true; - } }; class TLinearSplitInfo { @@ -109,13 +83,11 @@ class TLinearSplitInfo { class TSimpleSplitter { private: - TColumnSaver ColumnSaver; + NAccessor::TColumnSaver ColumnSaver; YDB_ACCESSOR_DEF(std::optional, Stats); - std::shared_ptr Counters; public: - explicit TSimpleSplitter(const TColumnSaver& columnSaver, std::shared_ptr counters) + explicit TSimpleSplitter(const NAccessor::TColumnSaver& columnSaver) : ColumnSaver(columnSaver) - , Counters(counters) { } @@ -146,56 +118,4 @@ class TSimpleSplitter { std::vector SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const; }; -class TSplittedColumnChunk: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - TSaverSplittedChunk Data; - ISchemaDetailInfo::TPtr SchemaInfo; -protected: - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; - virtual const TString& DoGetData() const override { - return Data.GetSerializedChunk(); - } - virtual ui64 DoGetRawBytesImpl() const override; - - virtual ui32 DoGetRecordsCountImpl() const override { - return Data.GetRecordsCount(); - } - - virtual TString DoDebugString() const override; - - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - return TSimpleChunkMeta(Data.GetColumn(), SchemaInfo->NeedMinMaxForColumn(GetColumnId()), SchemaInfo->IsSortedColumn(GetColumnId())); - } - - virtual std::shared_ptr DoGetFirstScalar() const override { - return Data.GetFirstScalar(); - } - virtual std::shared_ptr DoGetLastScalar() const override { - return Data.GetLastScalar(); - } - virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& /*data*/, const TSimpleColumnInfo& /*columnInfo*/) const override { - AFL_VERIFY(false); - return nullptr; - } - -public: - i64 GetSize() const { - return Data.GetSerializedChunk().size(); - } - - const TSaverSplittedChunk& GetData() const { - return Data; - } - - TSplittedColumnChunk() = default; - - TSplittedColumnChunk(const ui32 columnId, const TSaverSplittedChunk& data, ISchemaDetailInfo::TPtr schemaInfo) - : TBase(columnId) - , Data(data) - , SchemaInfo(schemaInfo) { - - } -}; - } diff --git a/ydb/core/tx/columnshard/splitter/stats.cpp b/ydb/core/formats/arrow/splitter/stats.cpp similarity index 95% rename from ydb/core/tx/columnshard/splitter/stats.cpp rename to ydb/core/formats/arrow/splitter/stats.cpp index 3cb468bec5ad..c815485ada8e 100644 --- a/ydb/core/tx/columnshard/splitter/stats.cpp +++ b/ydb/core/formats/arrow/splitter/stats.cpp @@ -1,7 +1,7 @@ #include "stats.h" #include -namespace NKikimr::NOlap { +namespace NKikimr::NArrow::NSplitter { std::optional TSerializationStats::GetStatsForRecordBatch(const std::shared_ptr& schema) const { std::optional result; diff --git a/ydb/core/tx/columnshard/splitter/stats.h b/ydb/core/formats/arrow/splitter/stats.h similarity index 99% rename from ydb/core/tx/columnshard/splitter/stats.h rename to ydb/core/formats/arrow/splitter/stats.h index 695a7ab32dd7..447e59b68ca0 100644 --- a/ydb/core/tx/columnshard/splitter/stats.h +++ b/ydb/core/formats/arrow/splitter/stats.h @@ -11,7 +11,7 @@ #include #include -namespace NKikimr::NOlap { +namespace NKikimr::NArrow::NSplitter { class TSimpleSerializationStat { protected: diff --git a/ydb/core/formats/arrow/splitter/ya.make b/ydb/core/formats/arrow/splitter/ya.make new file mode 100644 index 000000000000..078d7ea83737 --- /dev/null +++ b/ydb/core/formats/arrow/splitter/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +SRCS( + stats.cpp + simple.cpp + scheme_info.cpp + similar_packer.cpp +) + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/actors/core + ydb/library/conclusion + ydb/core/formats/arrow/serializer +) + +END() diff --git a/ydb/core/formats/arrow/ssa_program_optimizer.cpp b/ydb/core/formats/arrow/ssa_program_optimizer.cpp index f55f63110f7b..ff1e5a5cb38c 100644 --- a/ydb/core/formats/arrow/ssa_program_optimizer.cpp +++ b/ydb/core/formats/arrow/ssa_program_optimizer.cpp @@ -1,5 +1,7 @@ #include "ssa_program_optimizer.h" +#include + namespace NKikimr::NSsa { namespace { @@ -11,7 +13,8 @@ void ReplaceCountAll(TProgram& program) { Y_ABORT_UNLESS(step); for (auto& groupBy : step->MutableGroupBy()) { - if (groupBy.GetOperation() == EAggregate::Count && groupBy.GetArguments().empty()) { + if (groupBy.GetOperation() == EAggregate::NumRows) { + AFL_VERIFY(groupBy.GetArguments().empty()); if (step->GetGroupByKeys().size()) { groupBy.MutableArguments().push_back(step->GetGroupByKeys()[0]); } else { diff --git a/ydb/core/formats/arrow/common/validation.cpp b/ydb/core/formats/arrow/validation/validation.cpp similarity index 100% rename from ydb/core/formats/arrow/common/validation.cpp rename to ydb/core/formats/arrow/validation/validation.cpp diff --git a/ydb/core/formats/arrow/validation/validation.h b/ydb/core/formats/arrow/validation/validation.h new file mode 100644 index 000000000000..f71f18ece59c --- /dev/null +++ b/ydb/core/formats/arrow/validation/validation.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NArrow { + +class TStatusValidator { +public: + static void Validate(const arrow::Status& status); + + template + static T GetValid(const arrow::Result& result) { + Validate(result.status()); + return *result; + } + + template + static T GetValid(arrow::Result&& result) { + Validate(result.status()); + return std::move(*result); + } +}; + +} diff --git a/ydb/core/formats/arrow/validation/ya.make b/ydb/core/formats/arrow/validation/ya.make new file mode 100644 index 000000000000..e060fae10d8e --- /dev/null +++ b/ydb/core/formats/arrow/validation/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/actors/core +) + +SRCS( + validation.cpp +) + +END() diff --git a/ydb/core/formats/arrow/ya.make b/ydb/core/formats/arrow/ya.make index 49938a884154..8bb86947266e 100644 --- a/ydb/core/formats/arrow/ya.make +++ b/ydb/core/formats/arrow/ya.make @@ -7,11 +7,14 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow ydb/core/scheme + ydb/core/formats/arrow/accessor ydb/core/formats/arrow/serializer ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/dictionary ydb/core/formats/arrow/transformer ydb/core/formats/arrow/reader + ydb/core/formats/arrow/save_load + ydb/core/formats/arrow/splitter ydb/core/formats/arrow/modifier ydb/core/formats/arrow/scalar ydb/core/formats/arrow/hash diff --git a/ydb/core/grpc_services/rpc_load_rows.cpp b/ydb/core/grpc_services/rpc_load_rows.cpp index e780de7eea05..4b5cab1c5b1c 100644 --- a/ydb/core/grpc_services/rpc_load_rows.cpp +++ b/ydb/core/grpc_services/rpc_load_rows.cpp @@ -75,9 +75,12 @@ bool ConvertArrowToYdbPrimitive(const arrow::DataType& type, Ydb::Type& toType) case arrow::Type::DURATION: toType.set_type_id(Ydb::Type::INTERVAL); return true; - case arrow::Type::DECIMAL: - // TODO - return false; + case arrow::Type::DECIMAL: { + Ydb::DecimalType* decimalType = toType.mutable_decimal_type(); + decimalType->set_precision(22); + decimalType->set_scale(9); + return true; + } case arrow::Type::NA: case arrow::Type::HALF_FLOAT: case arrow::Type::FIXED_SIZE_BINARY: diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp index 17db4f9fe87e..3fd8fd44ae56 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp @@ -131,7 +131,7 @@ namespace NKikimr::NKqp { using namespace NYql::NDq; using namespace NYql::NDqProto; -IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, ui64 lockTxId, ui32 lockNodeId, +IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, TDqTask* task, IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena, TComputeActorSchedulingOptions schedulingOptions) { @@ -141,7 +141,7 @@ IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, ui64 lo IActor* CreateKqpScanFetcher(const NKikimrKqp::TKqpSnapshot& snapshot, std::vector&& computeActors, const NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta& meta, const NYql::NDq::TComputeRuntimeSettings& settings, - const ui64 txId, ui64 lockTxId, ui32 lockNodeId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) { + const ui64 txId, TMaybe lockTxId, ui32 lockNodeId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) { return new NScanPrivate::TKqpScanFetcherActor(snapshot, settings, std::move(computeActors), txId, lockTxId, lockNodeId, meta, shardsScanningPolicy, counters, std::move(traceId)); } diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor.h b/ydb/core/kqp/compute_actor/kqp_compute_actor.h index 3cf258b3e64e..1a775d5807d7 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor.h @@ -51,14 +51,14 @@ IActor* CreateKqpComputeActor(const TActorId& executerId, ui64 txId, NYql::NDqPr TIntrusivePtr arena, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings, TComputeActorSchedulingOptions); -IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, ui64 lockTxId, ui32 lockNodeId, +IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, NYql::NDqProto::TDqTask* task, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const NYql::NDq::TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena, TComputeActorSchedulingOptions); IActor* CreateKqpScanFetcher(const NKikimrKqp::TKqpSnapshot& snapshot, std::vector&& computeActors, const NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta& meta, const NYql::NDq::TComputeRuntimeSettings& settings, - const ui64 txId, ui64 lockTxId, ui32 lockNodeId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); + const ui64 txId, TMaybe lockTxId, ui32 lockNodeId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); NYql::NDq::IDqAsyncIoFactory::TPtr CreateKqpAsyncIoFactory( TIntrusivePtr counters, diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h index 3922b0fdaa76..9dcb44a35966 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h @@ -108,7 +108,7 @@ struct IKqpNodeComputeActorFactory { struct TCreateArgs { const NActors::TActorId& ExecuterId; const ui64 TxId; - const ui64 LockTxId; + const TMaybe LockTxId; const ui32 LockNodeId; NYql::NDqProto::TDqTask* Task; TIntrusivePtr TxInfo; diff --git a/ydb/core/kqp/compute_actor/kqp_compute_events.h b/ydb/core/kqp/compute_actor/kqp_compute_events.h index d8d42b76e69f..0adb559f132f 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_events.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_events.h @@ -16,165 +16,176 @@ struct TLocksInfo { TVector BrokenLocks; }; -struct TEvKqpCompute { - struct TEvRemoteScanData : public TEventPB {}; +namespace NInternalImplementation { +struct TEvRemoteScanData: public TEventPB { +}; - class IShardScanStats { - public: - virtual ~IShardScanStats() = default; - virtual THashMap GetMetrics() const = 0; - }; +class IShardScanStats { +public: + virtual ~IShardScanStats() = default; + virtual THashMap GetMetrics() const = 0; +}; - /* - * Scan communications. - * - * TEvScanData is intentionally preserved as a local event for performance reasons: leaf compute - * actors are communicating with shard scans using this message, so big amount of unfiltered data - * is expected. However, it is possible that after query planning datashard would migrate to other - * node. To support scans in this case we provide serialization routines. For now such remote scan - * is considered as rare event and not worth of some fast serialization, so we just use protobuf. - * - * TEvScanDataAck follows the same pattern mostly for symmetry reasons. - */ - struct TEvScanData : public NActors::TEventLocal { - TEvScanData(const ui32 scanId, const ui32 generation = 0) - : ScanId(scanId) - , Generation(generation) - , Finished(false) {} - - std::optional AvailablePacks; - ui32 ScanId; - ui32 Generation; - TVector Rows; - std::shared_ptr ArrowBatch; - std::vector> SplittedBatches; - - TOwnedCellVec LastKey; - TDuration CpuTime; - TDuration WaitTime; - ui32 PageFaults = 0; // number of page faults occurred when filling in this message - bool RequestedBytesLimitReached = false; - bool Finished = false; - bool PageFault = false; // page fault was the reason for sending this message - mutable THolder Remote; - std::shared_ptr StatsOnFinished; - TLocksInfo LocksInfo; - - template - const T& GetStatsAs() const { - Y_ABORT_UNLESS(!!StatsOnFinished); - return VerifyDynamicCast(*StatsOnFinished); +/* + * Scan communications. + * + * TEvScanData is intentionally preserved as a local event for performance reasons: leaf compute + * actors are communicating with shard scans using this message, so big amount of unfiltered data + * is expected. However, it is possible that after query planning datashard would migrate to other + * node. To support scans in this case we provide serialization routines. For now such remote scan + * is considered as rare event and not worth of some fast serialization, so we just use protobuf. + * + * TEvScanDataAck follows the same pattern mostly for symmetry reasons. + */ +struct TEvScanData: public NActors::TEventLocal { + TEvScanData(const ui32 scanId, const ui32 generation = 0) + : ScanId(scanId) + , Generation(generation) + , Finished(false) { + } + + std::optional AvailablePacks; + ui32 ScanId; + ui32 Generation; + TVector Rows; + std::shared_ptr ArrowBatch; + std::vector> SplittedBatches; + + TOwnedCellVec LastKey; + TDuration CpuTime; + TDuration WaitTime; + ui32 PageFaults = 0; // number of page faults occurred when filling in this message + bool RequestedBytesLimitReached = false; + bool Finished = false; + bool PageFault = false; // page fault was the reason for sending this message + mutable THolder Remote; + std::shared_ptr StatsOnFinished; + TLocksInfo LocksInfo; + + template + const T& GetStatsAs() const { + Y_ABORT_UNLESS(!!StatsOnFinished); + return VerifyDynamicCast(*StatsOnFinished); + } + + template + bool CheckStatsIs() const { + auto p = dynamic_cast(StatsOnFinished.get()); + return p; + } + + ui32 GetRowsCount() const { + if (ArrowBatch) { + return ArrowBatch->num_rows(); + } else { + return Rows.size(); } + } - template - bool CheckStatsIs() const { - auto p = dynamic_cast(StatsOnFinished.get()); - return p; - } + bool IsEmpty() const { + return GetRowsCount() == 0; + } - ui32 GetRowsCount() const { - if (ArrowBatch) { - return ArrowBatch->num_rows(); - } else { - return Rows.size(); - } - } + bool IsSerializable() const override { + return true; + } - bool IsEmpty() const { - return GetRowsCount() == 0; - } + ui32 CalculateSerializedSize() const override { + InitRemote(); + return Remote->CalculateSerializedSizeCached(); + } - bool IsSerializable() const override { - return true; - } + bool SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override { + InitRemote(); + return Remote->SerializeToArcadiaStream(chunker); + } - ui32 CalculateSerializedSize() const override { - InitRemote(); - return Remote->CalculateSerializedSizeCached(); + NKikimrDataEvents::EDataFormat GetDataFormat() const { + if (ArrowBatch != nullptr || SplittedBatches.size()) { + return NKikimrDataEvents::FORMAT_ARROW; } - - bool SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override { - InitRemote(); - return Remote->SerializeToArcadiaStream(chunker); + return NKikimrDataEvents::FORMAT_CELLVEC; + } + + + static NActors::IEventBase* Load(TEventSerializedData* data) { + auto pbEv = THolder(static_cast(TEvRemoteScanData::Load(data))); + auto ev = MakeHolder(pbEv->Record.GetScanId(), pbEv->Record.GetGeneration()); + + ev->CpuTime = TDuration::MicroSeconds(pbEv->Record.GetCpuTimeUs()); + ev->WaitTime = TDuration::MilliSeconds(pbEv->Record.GetWaitTimeMs()); + ev->PageFault = pbEv->Record.GetPageFault(); + ev->PageFaults = pbEv->Record.GetPageFaults(); + ev->Finished = pbEv->Record.GetFinished(); + ev->RequestedBytesLimitReached = pbEv->Record.GetRequestedBytesLimitReached(); + ev->LastKey = TOwnedCellVec(TSerializedCellVec(pbEv->Record.GetLastKey()).GetCells()); + if (pbEv->Record.HasAvailablePacks()) { + ev->AvailablePacks = pbEv->Record.GetAvailablePacks(); } - NKikimrDataEvents::EDataFormat GetDataFormat() const { - if (ArrowBatch != nullptr || SplittedBatches.size()) { - return NKikimrDataEvents::FORMAT_ARROW; - } - return NKikimrDataEvents::FORMAT_CELLVEC; + auto rows = pbEv->Record.GetRows(); + ev->Rows.reserve(rows.size()); + for (const auto& row : rows) { + ev->Rows.emplace_back(TSerializedCellVec(row).GetCells()); } - - static NActors::IEventBase* Load(TEventSerializedData* data) { - auto pbEv = THolder(static_cast(TEvRemoteScanData::Load(data))); - auto ev = MakeHolder(pbEv->Record.GetScanId(), pbEv->Record.GetGeneration()); - - ev->CpuTime = TDuration::MicroSeconds(pbEv->Record.GetCpuTimeUs()); - ev->WaitTime = TDuration::MilliSeconds(pbEv->Record.GetWaitTimeMs()); - ev->PageFault = pbEv->Record.GetPageFault(); - ev->PageFaults = pbEv->Record.GetPageFaults(); - ev->Finished = pbEv->Record.GetFinished(); - ev->RequestedBytesLimitReached = pbEv->Record.GetRequestedBytesLimitReached(); - ev->LastKey = TOwnedCellVec(TSerializedCellVec(pbEv->Record.GetLastKey()).GetCells()); - if (pbEv->Record.HasAvailablePacks()) { - ev->AvailablePacks = pbEv->Record.GetAvailablePacks(); - } - - auto rows = pbEv->Record.GetRows(); - ev->Rows.reserve(rows.size()); - for (const auto& row: rows) { - ev->Rows.emplace_back(TSerializedCellVec(row).GetCells()); + if (pbEv->Record.HasArrowBatch()) { + auto batch = pbEv->Record.GetArrowBatch(); + auto schema = NArrow::DeserializeSchema(batch.GetSchema()); + ev->ArrowBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({ NArrow::DeserializeBatch(batch.GetBatch(), schema) })); + } + return ev.Release(); + } + +private: + void InitRemote() const { + if (!Remote) { + Remote = MakeHolder(); + + Remote->Record.SetScanId(ScanId); + Remote->Record.SetGeneration(Generation); + Remote->Record.SetCpuTimeUs(CpuTime.MicroSeconds()); + Remote->Record.SetWaitTimeMs(WaitTime.MilliSeconds()); + Remote->Record.SetPageFaults(PageFaults); + Remote->Record.SetFinished(Finished); + Remote->Record.SetRequestedBytesLimitReached(RequestedBytesLimitReached); + Remote->Record.SetPageFaults(PageFaults); + Remote->Record.SetPageFault(PageFault); + Remote->Record.SetLastKey(TSerializedCellVec::Serialize(LastKey)); + if (AvailablePacks) { + Remote->Record.SetAvailablePacks(*AvailablePacks); } - if (pbEv->Record.HasArrowBatch()) { - auto batch = pbEv->Record.GetArrowBatch(); - auto schema = NArrow::DeserializeSchema(batch.GetSchema()); - ev->ArrowBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({NArrow::DeserializeBatch(batch.GetBatch(), schema)})); + switch (GetDataFormat()) { + case NKikimrDataEvents::FORMAT_UNSPECIFIED: + case NKikimrDataEvents::FORMAT_CELLVEC: { + Remote->Record.MutableRows()->Reserve(Rows.size()); + for (const auto& row : Rows) { + Remote->Record.AddRows(TSerializedCellVec::Serialize(row)); + } + break; + } + case NKikimrDataEvents::FORMAT_ARROW: { + Y_DEBUG_ABORT_UNLESS(ArrowBatch != nullptr); + auto* protoArrowBatch = Remote->Record.MutableArrowBatch(); + protoArrowBatch->SetSchema(NArrow::SerializeSchema(*ArrowBatch->schema())); + protoArrowBatch->SetBatch(NArrow::SerializeBatchNoCompression(NArrow::ToBatch(ArrowBatch, true))); + break; + } } - return ev.Release(); } + } +}; - private: - void InitRemote() const { - if (!Remote) { - Remote = MakeHolder(); +} - Remote->Record.SetScanId(ScanId); - Remote->Record.SetGeneration(Generation); - Remote->Record.SetCpuTimeUs(CpuTime.MicroSeconds()); - Remote->Record.SetWaitTimeMs(WaitTime.MilliSeconds()); - Remote->Record.SetPageFaults(PageFaults); - Remote->Record.SetFinished(Finished); - Remote->Record.SetRequestedBytesLimitReached(RequestedBytesLimitReached); - Remote->Record.SetPageFaults(PageFaults); - Remote->Record.SetPageFault(PageFault); - Remote->Record.SetLastKey(TSerializedCellVec::Serialize(LastKey)); - if (AvailablePacks) { - Remote->Record.SetAvailablePacks(*AvailablePacks); - } +struct TEvKqpCompute { + using TEvRemoteScanData = NInternalImplementation::TEvRemoteScanData; - switch (GetDataFormat()) { - case NKikimrDataEvents::FORMAT_UNSPECIFIED: - case NKikimrDataEvents::FORMAT_CELLVEC: { - Remote->Record.MutableRows()->Reserve(Rows.size()); - for (const auto& row: Rows) { - Remote->Record.AddRows(TSerializedCellVec::Serialize(row)); - } - break; - } - case NKikimrDataEvents::FORMAT_ARROW: { - Y_DEBUG_ABORT_UNLESS(ArrowBatch != nullptr); - auto* protoArrowBatch = Remote->Record.MutableArrowBatch(); - protoArrowBatch->SetSchema(NArrow::SerializeSchema(*ArrowBatch->schema())); - protoArrowBatch->SetBatch(NArrow::SerializeBatchNoCompression(NArrow::ToBatch(ArrowBatch, true))); - break; - } - } - } - } - }; + using IShardScanStats = NInternalImplementation::IShardScanStats; + + using TEvScanData = NInternalImplementation::TEvScanData; struct TEvRemoteScanDataAck: public NActors::TEventPB { @@ -253,5 +264,4 @@ struct TEvKqpCompute { TKqpComputeEvents::EvKillScanTablet> {}; }; - } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp index 72d0e2af10a7..07c3ab88ed02 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp @@ -23,7 +23,7 @@ static constexpr TDuration RL_MAX_BATCH_DELAY = TDuration::Seconds(50); } // anonymous namespace -TKqpScanComputeActor::TKqpScanComputeActor(TComputeActorSchedulingOptions cpuOptions, const TActorId& executerId, ui64 txId, ui64 lockTxId, ui32 lockNodeId, +TKqpScanComputeActor::TKqpScanComputeActor(TComputeActorSchedulingOptions cpuOptions, const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, NDqProto::TDqTask* task, IDqAsyncIoFactory::TPtr asyncIoFactory, const TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena) @@ -149,14 +149,14 @@ void TKqpScanComputeActor::Handle(TEvScanExchange::TEvSendData::TPtr& ev) { for (const auto& lock : msg.GetLocksInfo().Locks) { Locks.insert(lock); } - for (const auto& lock : msg.GetLocksInfo().Locks) { + for (const auto& lock : msg.GetLocksInfo().BrokenLocks) { BrokenLocks.insert(lock); } auto guard = TaskRunner->BindAllocator(); if (!!msg.GetArrowBatch()) { ScanData->AddData(NMiniKQL::TBatchDataAccessor(msg.GetArrowBatch(), std::move(msg.MutableDataIndexes())), msg.GetTabletId(), TaskRunner->GetHolderFactory()); - } else { + } else if (!msg.GetRows().empty()) { ScanData->AddData(std::move(msg.MutableRows()), msg.GetTabletId(), TaskRunner->GetHolderFactory()); } if (IsQuotingEnabled()) { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h index 7dbb9f4f8252..b89d6aee71a4 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h @@ -11,7 +11,6 @@ namespace NKikimr::NKqp::NScanPrivate { class TKqpScanComputeActor: public TSchedulableComputeActorBase { private: using TBase = TSchedulableComputeActorBase; - NMiniKQL::TKqpScanComputeContext ComputeCtx; NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta Meta; using TBase::TaskRunner; @@ -23,7 +22,7 @@ class TKqpScanComputeActor: public TSchedulableComputeActorBase Fetchers; NMiniKQL::TKqpScanComputeContext::TScanData* ScanData = nullptr; - const ui64 LockTxId; + const TMaybe LockTxId; const ui32 LockNodeId; struct TLockHash { @@ -65,7 +64,7 @@ class TKqpScanComputeActor: public TSchedulableComputeActorBase lockTxId, ui32 lockNodeId, NYql::NDqProto::TDqTask* task, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const NYql::NDq::TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp index 0a9aaf6dbe31..7eab5ef78196 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp @@ -20,14 +20,13 @@ std::vector> TShardScannerInfo::OnReceiveData( } else { Finished = true; } - if (data.IsEmpty()) { - AFL_ENSURE(data.Finished); - return {}; - } AFL_ENSURE(ActorId); AFL_ENSURE(!DataChunksInFlightCount)("data_chunks_in_flightCount", DataChunksInFlightCount); std::vector> result; - if (data.SplittedBatches.size() > 1) { + if (data.IsEmpty()) { + AFL_ENSURE(data.Finished); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(TabletId, data.LocksInfo))); + } else if (data.SplittedBatches.size() > 1) { ui32 idx = 0; AFL_ENSURE(data.ArrowBatch); for (auto&& i : data.SplittedBatches) { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h index 1e4d9ac58937..2d684d2f6b09 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h @@ -23,6 +23,7 @@ class TComputeTaskData; class TShardScannerInfo { private: std::optional ActorId; + const ui64 ScanId; const ui64 TabletId; const ui64 Generation; i64 DataChunksInFlightCount = 0; @@ -51,15 +52,16 @@ class TShardScannerInfo { } } public: - TShardScannerInfo(TShardState& state, const IExternalObjectsProvider& externalObjectsProvider) - : TabletId(state.TabletId) + TShardScannerInfo(const ui64 scanId, TShardState& state, const IExternalObjectsProvider& externalObjectsProvider) + : ScanId(scanId) + , TabletId(state.TabletId) , Generation(++state.Generation) { const bool subscribed = std::exchange(state.SubscribedOnTablet, true); const auto& keyColumnTypes = externalObjectsProvider.GetKeyColumnTypes(); auto ranges = state.GetScanRanges(keyColumnTypes); - auto ev = externalObjectsProvider.BuildEvKqpScan(0, Generation, ranges); + auto ev = externalObjectsProvider.BuildEvKqpScan(ScanId, Generation, ranges); AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "start_scanner")("tablet_id", TabletId)("generation", Generation) ("info", state.ToString(keyColumnTypes))("range", DebugPrintRanges(keyColumnTypes, ranges, *AppData()->TypeRegistry)) @@ -250,6 +252,7 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { THashMap ShardsByActorId; bool IsActiveFlag = true; THashMap> ShardScanners; + const ui64 ScanId; const IExternalObjectsProvider& ExternalObjectsProvider; public: @@ -313,7 +316,7 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { AFL_ENSURE(state.TabletId); AFL_ENSURE(!state.ActorId)("actor_id", state.ActorId); state.State = NComputeActor::EShardState::Starting; - auto newScanner = std::make_shared(state, ExternalObjectsProvider); + auto newScanner = std::make_shared(ScanId, state, ExternalObjectsProvider); AFL_ENSURE(ShardScanners.emplace(state.TabletId, newScanner).second); } @@ -356,8 +359,9 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { return nullptr; } - TInFlightShards(const IExternalObjectsProvider& externalObjectsProvider) - : ExternalObjectsProvider(externalObjectsProvider) + TInFlightShards(const ui64 scanId, const IExternalObjectsProvider& externalObjectsProvider) + : ScanId(scanId) + , ExternalObjectsProvider(externalObjectsProvider) { } bool IsActive() const { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_events.h b/ydb/core/kqp/compute_actor/kqp_scan_events.h index cd91f7f4dae8..08ac0e535a88 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_events.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_events.h @@ -74,6 +74,11 @@ struct TEvScanExchange { , LocksInfo(locksInfo) { Y_ABORT_UNLESS(Rows.size()); } + + TEvSendData(const ui64 tabletId, const TLocksInfo& locksInfo) + : TabletId(tabletId) + , LocksInfo(locksInfo) { + } }; class TEvAckData: public NActors::TEventLocal { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp index 4aea48ad6dbb..73f4c86398b9 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp @@ -23,7 +23,7 @@ static constexpr ui64 MAX_SHARD_RESOLVES = 3; TKqpScanFetcherActor::TKqpScanFetcherActor(const NKikimrKqp::TKqpSnapshot& snapshot, - const TComputeRuntimeSettings& settings, std::vector&& computeActors, const ui64 txId, const ui64 lockTxId, const ui32 lockNodeId, + const TComputeRuntimeSettings& settings, std::vector&& computeActors, const ui64 txId, const TMaybe lockTxId, const ui32 lockNodeId, const NKikimrTxDataShard::TKqpTransaction_TScanTaskMeta& meta, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) : Meta(meta) @@ -36,7 +36,7 @@ TKqpScanFetcherActor::TKqpScanFetcherActor(const NKikimrKqp::TKqpSnapshot& snaps , Snapshot(snapshot) , ShardsScanningPolicy(shardsScanningPolicy) , Counters(counters) - , InFlightShards(*this) + , InFlightShards(ScanId, *this) , InFlightComputes(ComputeActorIds) { Y_UNUSED(traceId); @@ -88,7 +88,11 @@ void TKqpScanFetcherActor::Bootstrap() { void TKqpScanFetcherActor::HandleExecute(TEvScanExchange::TEvAckData::TPtr& ev) { Y_ABORT_UNLESS(ev->Get()->GetFreeSpace()); - ALS_DEBUG(NKikimrServices::KQP_COMPUTE) << "EvAckData (" << SelfId() << "): " << ev->Sender; + AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "AckDataFromCompute")("self_id", SelfId())("scan_id", ScanId) + ("packs_to_send", InFlightComputes.GetPacksToSendCount()) + ("from", ev->Sender)("shards remain", PendingShards.size()) + ("in flight scans", InFlightShards.GetScansCount()) + ("in flight shards", InFlightShards.GetShardsCount()); InFlightComputes.OnComputeAck(ev->Sender, ev->Get()->GetFreeSpace()); CheckFinish(); } @@ -432,7 +436,9 @@ std::unique_ptr TKqpScanFetcherActor::BuildEv ev->Record.SetStatsMode(RuntimeSettings.StatsMode); ev->Record.SetScanId(scanId); ev->Record.SetTxId(std::get(TxId)); - ev->Record.SetLockTxId(LockTxId); + if (LockTxId) { + ev->Record.SetLockTxId(*LockTxId); + } ev->Record.SetLockNodeId(LockNodeId); ev->Record.SetTablePath(ScanDataMeta.TablePath); ev->Record.SetSchemaVersion(ScanDataMeta.TableId.SchemaVersion); @@ -481,12 +487,17 @@ void TKqpScanFetcherActor::ProcessPendingScanDataItem(TEvKqpCompute::TEvScanData state->LastKey = std::move(msg.LastKey); const ui64 rowsCount = msg.GetRowsCount(); - CA_LOG_D("action=got EvScanData;rows=" << rowsCount << ";finished=" << msg.Finished << ";exceeded=" << msg.RequestedBytesLimitReached - << ";from=" << ev->Sender << ";shards remain=" << PendingShards.size() - << ";in flight scans=" << InFlightShards.GetScansCount() - << ";in flight shards=" << InFlightShards.GetShardsCount() - << ";delayed_for=" << latency.SecondsFloat() << " seconds by ratelimiter" - << ";tablet_id=" << state->TabletId); + AFL_ENSURE(!LockTxId || !msg.LocksInfo.Locks.empty() || !msg.LocksInfo.BrokenLocks.empty()); + AFL_ENSURE(LockTxId || (msg.LocksInfo.Locks.empty() && msg.LocksInfo.BrokenLocks.empty())); + AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("action","got EvScanData")("rows", rowsCount)("finished", msg.Finished)("exceeded", msg.RequestedBytesLimitReached) + ("scan", ScanId)("packs_to_send", InFlightComputes.GetPacksToSendCount()) + ("from", ev->Sender)("shards remain", PendingShards.size()) + ("in flight scans", InFlightShards.GetScansCount()) + ("in flight shards", InFlightShards.GetShardsCount()) + ("delayed_for_seconds_by_ratelimiter", latency.SecondsFloat()) + ("tablet_id", state->TabletId) + ("locks", msg.LocksInfo.Locks.size()) + ("broken locks", msg.LocksInfo.BrokenLocks.size()); auto shardScanner = InFlightShards.GetShardScannerVerified(state->TabletId); auto tasksForCompute = shardScanner->OnReceiveData(msg, shardScanner); AFL_ENSURE(tasksForCompute.size() == 1 || tasksForCompute.size() == 0 || tasksForCompute.size() == ComputeActorIds.size())("size", tasksForCompute.size())("compute_size", ComputeActorIds.size()); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h index 2731d5d438aa..0bd2bfc1d58b 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h @@ -50,7 +50,7 @@ class TKqpScanFetcherActor: public NActors::TActorBootstrapped LockTxId; const ui32 LockNodeId; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { @@ -58,7 +58,7 @@ class TKqpScanFetcherActor: public NActors::TActorBootstrapped&& computeActors, const ui64 txId, const ui64 lockTxId, const ui32 lockNodeId, + std::vector&& computeActors, const ui64 txId, const TMaybe lockTxId, const ui32 lockNodeId, const NKikimrTxDataShard::TKqpTransaction_TScanTaskMeta& meta, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); @@ -170,6 +170,9 @@ class TKqpScanFetcherActor: public NActors::TActorBootstrapped PendingShards; std::deque PendingResolveShards; + static inline TAtomicCounter ScanIdCounter = 0; + const ui64 ScanId = ScanIdCounter.Inc(); + TInFlightShards InFlightShards; TInFlightComputes InFlightComputes; ui32 TotalRetries = 0; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 07771776059e..7a8d76f830f6 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -2543,7 +2543,7 @@ class TKqpDataExecuter : public TKqpExecuterBase TKqpPlanner::SerializeReque auto result = std::make_unique(TasksGraph.GetMeta().GetArenaIntrusivePtr()); auto& request = result->Record; request.SetTxId(TxId); - request.SetLockTxId(LockTxId); + if (LockTxId) { + request.SetLockTxId(*LockTxId); + } request.SetLockNodeId(LockNodeId); ActorIdToProto(ExecuterId, request.MutableExecuterActorId()); diff --git a/ydb/core/kqp/executer_actor/kqp_planner.h b/ydb/core/kqp/executer_actor/kqp_planner.h index 29facb855f07..746cc531cd43 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.h +++ b/ydb/core/kqp/executer_actor/kqp_planner.h @@ -43,7 +43,7 @@ class TKqpPlanner { struct TArgs { TKqpTasksGraph& TasksGraph; const ui64 TxId; - const ui64 LockTxId; + const TMaybe LockTxId; const ui32 LockNodeId; const TActorId& Executer; const IKqpGateway::TKqpSnapshot& Snapshot; @@ -103,7 +103,7 @@ class TKqpPlanner { private: const ui64 TxId; - const ui64 LockTxId; + const TMaybe LockTxId; const ui32 LockNodeId; const TActorId ExecuterId; TVector ComputeTasks; diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp index 1467a61bc38e..b000a2fd94a4 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp @@ -16,15 +16,21 @@ TConclusionStatus TAlterColumnOperation::DoDeserialize(NYql::TObjectSettingsImpl if (StorageId && !*StorageId) { return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); } + { + auto status = AccessorConstructor.DeserializeFromRequest(features); + if (status.IsFail()) { + return status; + } + } { auto result = DictionaryEncodingDiff.DeserializeFromRequestFeatures(features); - if (!result) { - return TConclusionStatus::Fail(result.GetErrorMessage()); + if (result.IsFail()) { + return result; } } { auto status = Serializer.DeserializeFromRequest(features); - if (!status) { + if (status.IsFail()) { return status; } } @@ -40,6 +46,9 @@ void TAlterColumnOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTable if (!!Serializer) { Serializer.SerializeToProto(*column->MutableSerializer()); } + if (!!AccessorConstructor) { + *column->MutableDataAccessorConstructor() = AccessorConstructor.SerializeToProto(); + } *column->MutableDictionaryEncoding() = DictionaryEncodingDiff.SerializeToProto(); if (DefaultValue) { column->SetDefaultValue(*DefaultValue); diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h index ee51b47bb8df..23d1aef28abb 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h @@ -1,4 +1,5 @@ #include "abstract.h" +#include #include #include @@ -18,6 +19,7 @@ class TAlterColumnOperation : public ITableStoreOperation { NArrow::NSerialization::TSerializerContainer Serializer; NArrow::NDictionary::TEncodingDiff DictionaryEncodingDiff; std::optional DefaultValue; + NArrow::NAccessor::TRequestedConstructorContainer AccessorConstructor; public: TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; diff --git a/ydb/core/kqp/node_service/kqp_node_service.cpp b/ydb/core/kqp/node_service/kqp_node_service.cpp index dd996b3249d4..fc90556eb574 100644 --- a/ydb/core/kqp/node_service/kqp_node_service.cpp +++ b/ydb/core/kqp/node_service/kqp_node_service.cpp @@ -149,7 +149,9 @@ class TKqpNodeService : public TActorBootstrapped { auto requester = ev->Sender; ui64 txId = msg.GetTxId(); - ui64 lockTxId = msg.GetLockTxId(); + TMaybe lockTxId = msg.HasLockTxId() + ? TMaybe(msg.GetLockTxId()) + : Nothing(); ui32 lockNodeId = msg.GetLockNodeId(); YQL_ENSURE(msg.GetStartAllOrFail()); // todo: support partial start diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp index 5092bbeec38d..ffe2ffae4c53 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp @@ -299,11 +299,6 @@ TExprBase KqpPushOlapAggregate(TExprBase node, TExprContext& ctx, const TKqpOpti return node; } - // temporary for keys grouping push down not useful - if (!aggCombine.Keys().Empty()) { - return node; - } - auto read = maybeRead.Cast(); auto aggs = Build(ctx, node.Pos()); diff --git a/ydb/core/kqp/query_data/kqp_predictor.cpp b/ydb/core/kqp/query_data/kqp_predictor.cpp index 9c0ce87d73e4..3cf0b3b97187 100644 --- a/ydb/core/kqp/query_data/kqp_predictor.cpp +++ b/ydb/core/kqp/query_data/kqp_predictor.cpp @@ -131,7 +131,7 @@ ui32 TStagePredictor::GetUsableThreads() { userPoolSize = TlsActivationContext->ActorSystem()->GetPoolThreadsCount(AppData()->UserPoolId); } if (!userPoolSize) { - ALS_ERROR(NKikimrServices::KQP_EXECUTER) << "user pool is undefined for executer tasks construction"; + ALS_INFO(NKikimrServices::KQP_EXECUTER) << "user pool is undefined for executer tasks construction"; userPoolSize = NSystemInfo::NumberOfCpus(); } return Max(1, *userPoolSize); diff --git a/ydb/core/kqp/runtime/kqp_scan_data.cpp b/ydb/core/kqp/runtime/kqp_scan_data.cpp index 2e8b430681e9..96c8f6dbbca3 100644 --- a/ydb/core/kqp/runtime/kqp_scan_data.cpp +++ b/ydb/core/kqp/runtime/kqp_scan_data.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -296,26 +296,27 @@ TBytesStatistics WriteColumnValuesFromArrowSpecImpl(TAccessor editAccessor, auto trivialChunkedArray = std::make_shared(chunkedArrayExt); NArrow::NAccessor::IChunkedArray::TReader reader(trivialChunkedArray); - std::optional chunkIdx; std::optional currentIdxFrom; std::optional address; const typename TElementAccessor::TArrayType* currentArray = nullptr; const auto applyToIndex = [&](const ui32 rowIndexFrom, const ui32 rowIndexTo) { + bool changed = false; if (!currentIdxFrom) { address = reader.GetReadChunk(rowIndexFrom); AFL_ENSURE(rowIndexFrom == 0)("real", rowIndexFrom); + changed = true; } else { AFL_ENSURE(rowIndexFrom == *currentIdxFrom + 1)("next", rowIndexFrom)("current", *currentIdxFrom); if (!address->NextPosition()) { address = reader.GetReadChunk(rowIndexFrom); + changed = true; } } currentIdxFrom = rowIndexFrom; - if (!chunkIdx || *chunkIdx != address->GetChunkIdx()) { + if (changed) { currentArray = static_cast(address->GetArray().get()); TElementAccessor::Validate(*currentArray); - chunkIdx = address->GetChunkIdx(); } auto& rowItem = editAccessor(rowIndexTo, columnIndex); diff --git a/ydb/core/kqp/ut/common/columnshard.cpp b/ydb/core/kqp/ut/common/columnshard.cpp index 90e3992b7176..6318a8f1e7ea 100644 --- a/ydb/core/kqp/ut/common/columnshard.cpp +++ b/ydb/core/kqp/ut/common/columnshard.cpp @@ -22,7 +22,7 @@ namespace NKqp { } SecretableSecretKey: { Value: { - Data: "secretSecretKey" + Data: "fakeSecret" } } } @@ -31,32 +31,37 @@ namespace NKqp { using namespace NYdb; - TTestHelper::TTestHelper(const TKikimrSettings& settings) - : Kikimr(settings) - , TableClient(Kikimr.GetTableClient()) - , Session(TableClient.CreateSession().GetValueSync().GetSession()) - {} + TTestHelper::TTestHelper(const TKikimrSettings& settings) { + TKikimrSettings kikimrSettings(settings); + if (!kikimrSettings.FeatureFlags.HasEnableTieringInColumnShard()) { + kikimrSettings.SetEnableTieringInColumnShard(true); + } + + Kikimr = std::make_unique(kikimrSettings); + TableClient = std::make_unique(Kikimr->GetTableClient()); + Session = std::make_unique(TableClient->CreateSession().GetValueSync().GetSession()); + } NKikimr::NKqp::TKikimrRunner& TTestHelper::GetKikimr() { - return Kikimr; + return *Kikimr; } TTestActorRuntime& TTestHelper::GetRuntime() { - return *Kikimr.GetTestServer().GetRuntime(); + return *Kikimr->GetTestServer().GetRuntime(); } NYdb::NTable::TSession& TTestHelper::GetSession() { - return Session; + return *Session; } void TTestHelper::CreateTable(const TColumnTableBase& table, const EStatus expectedStatus) { std::cerr << (table.BuildQuery()) << std::endl; - auto result = Session.ExecuteSchemeQuery(table.BuildQuery()).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(table.BuildQuery()).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), expectedStatus, result.GetIssues().ToString()); } void TTestHelper::CreateTier(const TString& tierName) { - auto result = Session.ExecuteSchemeQuery("CREATE OBJECT " + tierName + " (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName(tierName) + "`").GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery("CREATE OBJECT " + tierName + " (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName(tierName) + "`").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } @@ -70,43 +75,43 @@ namespace NKqp { } ] })"; - auto result = Session.ExecuteSchemeQuery("CREATE OBJECT IF NOT EXISTS " + ruleName + " (TYPE TIERING_RULE) WITH (defaultColumn = " + columnName + ", description = `" + configTieringStr + "`)").GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery("CREATE OBJECT IF NOT EXISTS " + ruleName + " (TYPE TIERING_RULE) WITH (defaultColumn = " + columnName + ", description = `" + configTieringStr + "`)").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); return ruleName; } void TTestHelper::SetTiering(const TString& tableName, const TString& ruleName) { auto alterQuery = TStringBuilder() << "ALTER TABLE `" << tableName << "` SET (TIERING = '" << ruleName << "')"; - auto result = Session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::ResetTiering(const TString& tableName) { auto alterQuery = TStringBuilder() << "ALTER TABLE `" << tableName << "` RESET (TIERING)"; - auto result = Session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::DropTable(const TString& tableName) { - auto result = Session.DropTable(tableName).GetValueSync(); + auto result = GetSession().DropTable(tableName).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::BulkUpsert(const TColumnTable& table, TTestHelper::TUpdatesBuilder& updates, const Ydb::StatusIds_StatusCode& opStatus /*= Ydb::StatusIds::SUCCESS*/) { Y_UNUSED(opStatus); - NKikimr::Tests::NCS::THelper helper(Kikimr.GetTestServer()); + NKikimr::Tests::NCS::THelper helper(GetKikimr().GetTestServer()); auto batch = updates.BuildArrow(); helper.SendDataViaActorSystem(table.GetName(), batch, opStatus); } void TTestHelper::BulkUpsert(const TColumnTable& table, std::shared_ptr batch, const Ydb::StatusIds_StatusCode& opStatus /*= Ydb::StatusIds::SUCCESS*/) { Y_UNUSED(opStatus); - NKikimr::Tests::NCS::THelper helper(Kikimr.GetTestServer()); + NKikimr::Tests::NCS::THelper helper(GetKikimr().GetTestServer()); helper.SendDataViaActorSystem(table.GetName(), batch, opStatus); } void TTestHelper::ReadData(const TString& query, const TString& expected, const EStatus opStatus /*= EStatus::SUCCESS*/) { - auto it = TableClient.StreamExecuteScanQuery(query).GetValueSync(); + auto it = TableClient->StreamExecuteScanQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); // Means stream successfully get TString result = StreamResultToYson(it, false, opStatus); if (opStatus == EStatus::SUCCESS) { @@ -115,21 +120,29 @@ namespace NKqp { } void TTestHelper::RebootTablets(const TString& tableName) { - auto runtime = Kikimr.GetTestServer().GetRuntime(); + auto runtime = GetKikimr().GetTestServer().GetRuntime(); TActorId sender = runtime->AllocateEdgeActor(); TVector shards; { - auto describeResult = DescribeTable(&Kikimr.GetTestServer(), sender, tableName); + auto describeResult = DescribeTable(&GetKikimr().GetTestServer(), sender, tableName); for (auto shard : describeResult.GetPathDescription().GetColumnTableDescription().GetSharding().GetColumnShards()) { shards.push_back(shard); } } for (auto shard : shards) { - Kikimr.GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + GetKikimr().GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( new TEvents::TEvPoisonPill(), shard, false)); } } + void TTestHelper::WaitTabletDeletionInHive(ui64 tabletId, TDuration duration) { + auto deadline = TInstant::Now() + duration; + while (GetKikimr().GetTestClient().TabletExistsInHive(&GetRuntime(), tabletId) && TInstant::Now() <= deadline) { + Cerr << "WaitTabletDeletionInHive: wait until " << tabletId << " is deleted" << Endl; + Sleep(TDuration::Seconds(1)); + } + } + TString TTestHelper::TColumnSchema::BuildQuery() const { TStringBuilder str; str << Name << ' '; @@ -233,6 +246,8 @@ namespace NKqp { return arrow::field(name, arrow::int64(), nullable); case NScheme::NTypeIds::JsonDocument: return arrow::field(name, arrow::binary(), nullable); + case NScheme::NTypeIds::Decimal: + return arrow::field(name, arrow::decimal(22, 9)); case NScheme::NTypeIds::Pg: switch (NPg::PgTypeIdFromTypeDesc(typeDesc)) { case INT2OID: diff --git a/ydb/core/kqp/ut/common/columnshard.h b/ydb/core/kqp/ut/common/columnshard.h index a938e91b4d4a..201b44ad8ea5 100644 --- a/ydb/core/kqp/ut/common/columnshard.h +++ b/ydb/core/kqp/ut/common/columnshard.h @@ -63,9 +63,9 @@ namespace NKqp { }; private: - TKikimrRunner Kikimr; - NYdb::NTable::TTableClient TableClient; - NYdb::NTable::TSession Session; + std::unique_ptr Kikimr; + std::unique_ptr TableClient; + std::unique_ptr Session; public: TTestHelper(const TKikimrSettings& settings); @@ -82,6 +82,7 @@ namespace NKqp { void BulkUpsert(const TColumnTable& table, std::shared_ptr batch, const Ydb::StatusIds_StatusCode& opStatus = Ydb::StatusIds::SUCCESS); void ReadData(const TString& query, const TString& expected, const NYdb::EStatus opStatus = NYdb::EStatus::SUCCESS); void RebootTablets(const TString& tableName); + void WaitTabletDeletionInHive(ui64 tabletId, TDuration duration); }; } diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index 75c7850c73fc..1cfd1d07c294 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -132,6 +132,8 @@ TKikimrRunner::TKikimrRunner(const TKikimrSettings& settings) { ServerSettings->SetEnableUniqConstraint(true); ServerSettings->SetUseRealThreads(settings.UseRealThreads); ServerSettings->SetEnableTablePgTypes(true); + ServerSettings->SetEnablePgSyntax(true); + ServerSettings->SetEnableOlapCompression(true); ServerSettings->S3ActorsFactory = settings.S3ActorsFactory; if (settings.Storage) { diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.h b/ydb/core/kqp/ut/common/kqp_ut_common.h index 23e34b91183d..a02aeafc2b38 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.h +++ b/ydb/core/kqp/ut/common/kqp_ut_common.h @@ -97,6 +97,7 @@ struct TKikimrSettings: public TTestFeatureFlagsHolder { exchangerSettings->SetStartDelayMs(10); exchangerSettings->SetMaxDelayMs(10); AppConfig.MutableColumnShardConfig()->SetDisabledOnSchemeShard(false); + FeatureFlags.SetEnableSparsedColumns(true); } TKikimrSettings& SetAppConfig(const NKikimrConfig::TAppConfig& value) { AppConfig = value; return *this; } diff --git a/ydb/core/kqp/ut/olap/aggregations_ut.cpp b/ydb/core/kqp/ut/olap/aggregations_ut.cpp index 71cb222e1de5..28dcf8d19069 100644 --- a/ydb/core/kqp/ut/olap/aggregations_ut.cpp +++ b/ydb/core/kqp/ut/olap/aggregations_ut.cpp @@ -163,7 +163,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { // Check plan #if SSA_RUNTIME_VERSION >= 2U - CheckPlanForAggregatePushdown(query, tableClient, { "WideCombiner" }, "Aggregate-TableFullScan"); + CheckPlanForAggregatePushdown(query, tableClient, { "WideCombiner" }, "TableFullScan"); // CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); #else CheckPlanForAggregatePushdown(query, tableClient, { "CombineCore" }, ""); @@ -395,7 +395,8 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { .AddExpectedPlanOptions("KqpOlapFilter") #if SSA_RUNTIME_VERSION >= 2U .AddExpectedPlanOptions("TKqpOlapAgg") - .MutableLimitChecker().SetExpectedResultCount(1) + // See https://github.com/ydb-platform/ydb/issues/7299 for explanation, why resultCount = 3 + .MutableLimitChecker().SetExpectedResultCount(3) #else .AddExpectedPlanOptions("CombineCore") #endif @@ -1023,7 +1024,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { ORDER BY c, resource_id DESC LIMIT 3 )") .SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); testCase.FillExpectedAggregationGroupByPlanOptions(); TestAggregations({ testCase }); } diff --git a/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp b/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp index 436b1ba0286d..ea97c44484f3 100644 --- a/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp +++ b/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp @@ -15,7 +15,6 @@ namespace NKikimr::NKqp { Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { - namespace { class TTransferStatus { private: @@ -60,7 +59,6 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { return TConclusionStatus::Success(); } virtual void DoSerializeToProto(NKikimrColumnShardDataSharingProto::TInitiator::TController& /*proto*/) const override { - } virtual TString GetClassName() const override { @@ -68,10 +66,16 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } }; + TKikimrSettings GetKikimrSettings() { + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableAlterShardingInColumnShard(true); + return TKikimrSettings().SetWithSampleTables(false).SetFeatureFlags(featureFlags); + } + class TSharingDataTestCase { private: const ui32 ShardsCount; - TKikimrRunner& Kikimr; + TKikimrRunner Kikimr; TTypedLocalHelper Helper; NYDBTest::TControllers::TGuard Controller; std::vector ShardIds; @@ -86,15 +90,15 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { Helper.FillPKOnly(kff, recordsCount); } - TSharingDataTestCase(const ui32 shardsCount, TKikimrRunner& kikimr) + TSharingDataTestCase(const ui32 shardsCount) : ShardsCount(shardsCount) - , Kikimr(kikimr) + , Kikimr(GetKikimrSettings()) , Helper("", Kikimr, "olapTable", "olapStore12") , Controller(NYDBTest::TControllers::RegisterCSControllerGuard()) { Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); Controller->SetExpectedShardsCount(ShardsCount); - Controller->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - Controller->SetReadTimeoutClean(TDuration::Seconds(1)); + Controller->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").Initialize(); @@ -111,7 +115,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } void WaitNormalization() { - Controller->SetReadTimeoutClean(TDuration::Seconds(1)); + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Force); const auto start = TInstant::Now(); while (!Controller->IsTrivialLinks() && TInstant::Now() - start < TDuration::Seconds(30)) { @@ -120,11 +124,11 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } AFL_VERIFY(Controller->IsTrivialLinks()); Controller->CheckInvariants(); - Controller->SetReadTimeoutClean(TDuration::Minutes(5)); + Controller->SetOverrideReadTimeoutClean(TDuration::Minutes(5)); } void Execute(const ui64 destinationIdx, const std::vector& sourceIdxs, const bool move, const NOlap::TSnapshot& snapshot, const std::set& pathIdxs) { - Controller->SetReadTimeoutClean(TDuration::Seconds(1)); + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); AFL_VERIFY(destinationIdx < ShardIds.size()); const ui64 destination = ShardIds[destinationIdx]; std::vector sources; @@ -192,22 +196,18 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { CSTransferStatus->Reset(); AFL_VERIFY(!Controller->IsTrivialLinks()); Controller->CheckInvariants(); - Controller->SetReadTimeoutClean(TDuration::Minutes(5)); + Controller->SetOverrideReadTimeoutClean(TDuration::Minutes(5)); } }; Y_UNIT_TEST(BlobsSharingSplit1_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(0, { 1 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); } Y_UNIT_TEST(BlobsSharingSplit1_1_clean) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(2, kikimr); + TSharingDataTestCase tester(2); tester.AddRecords(80000); CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[80000u;]])"); Sleep(TDuration::Seconds(1)); @@ -219,9 +219,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit1_1_clean_with_restarts) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(2, kikimr); + TSharingDataTestCase tester(2); tester.SetRebootTablet(true); tester.AddRecords(80000); CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[80000u;]])"); @@ -234,18 +232,14 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit3_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(0, { 1, 2, 3 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); } Y_UNIT_TEST(BlobsSharingSplit1_3_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(1, { 0 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); @@ -255,9 +249,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit1_3_2_1_clean) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(1, { 0 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); @@ -271,8 +263,15 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } class TReshardingTest { - private: - YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); + public: + TReshardingTest() + : Kikimr(GetKikimrSettings()) + , CSController(NYDBTest::TControllers::RegisterCSControllerGuard()) + , TableClient(Kikimr.GetTableClient()) { + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + CSController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + CSController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + } void WaitResharding(const TString& hint = "") { const TInstant start = TInstant::Now(); @@ -308,28 +307,22 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { CompareYson(result, "[[" + ::ToString(expectation) + "u;]]"); } + protected: TKikimrRunner Kikimr; - public: + NKikimr::NYDBTest::TControllers::TGuard CSController; + NYdb::NTable::TTableClient TableClient; + }; - TReshardingTest() - : Kikimr(TKikimrSettings().SetWithSampleTables(false)) { + class TShardingTypeTest: public TReshardingTest { + YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); - } + public: + using TReshardingTest::TReshardingTest; void Execute() { - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); - csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - TLocalHelper(Kikimr).SetShardingMethod(ShardingType).CreateTestOlapTable("olapTable", "olapStore", 24, 4); - auto tableClient = Kikimr.GetTableClient(); - Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); - - std::vector uids; - std::vector resourceIds; - std::vector levels; + Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_SCAN }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); { WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); @@ -339,83 +332,161 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); - - const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { - for (ui32 i = 0; i < count; ++i) { - uids.emplace_back("uid_" + ::ToString(startUid + i)); - resourceIds.emplace_back(::ToString(startRes + i)); - levels.emplace_back(i % 5); - } - }; - - filler(1000000, 300000000, 10000); - filler(1100000, 300100000, 10000); - filler(1200000, 300200000, 10000); - filler(1300000, 300300000, 10000); - filler(1400000, 300400000, 10000); - filler(2000000, 200000000, 70000); - filler(3000000, 100000000, 110000); - } CheckCount(230000); for (ui32 i = 0; i < 2; ++i) { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=SPLIT);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); WaitResharding("SPLIT:" + ::ToString(i)); } { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=SPLIT);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } - AFL_VERIFY(csController->GetShardingFiltersCount().Val() == 0); + AFL_VERIFY(CSController->GetShardingFiltersCount().Val() == 0); CheckCount(230000); - i64 count = csController->GetShardingFiltersCount().Val(); + i64 count = CSController->GetShardingFiltersCount().Val(); AFL_VERIFY(count >= 16)("count", count); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitIndexation(TDuration::Seconds(3)); + CSController->WaitCompactions(TDuration::Seconds(3)); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); - csController->WaitIndexation(TDuration::Seconds(5)); - csController->WaitCompactions(TDuration::Seconds(5)); + CheckCount(230000); + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->WaitIndexation(TDuration::Seconds(5)); + CheckCount(230000); + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitCompactions(TDuration::Seconds(5)); + count = CSController->GetShardingFiltersCount().Val(); + CheckCount(230000); - csController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); + CSController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); CheckCount(230000); - AFL_VERIFY(count == csController->GetShardingFiltersCount().Val())("count", count)("val", csController->GetShardingFiltersCount().Val()); + AFL_VERIFY(count == CSController->GetShardingFiltersCount().Val())("count", count)( + "val", CSController->GetShardingFiltersCount().Val()); const ui32 portionsCount = 16; for (ui32 i = 0; i < 4; ++i) { { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=MERGE);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } WaitResharding("MERGE:" + ::ToString(i)); - // csController->WaitCleaning(TDuration::Seconds(5)); + // CSController->WaitCleaning(TDuration::Seconds(5)); CheckCount(230000); - AFL_VERIFY(count + portionsCount == csController->GetShardingFiltersCount().Val())("count", count)("val", csController->GetShardingFiltersCount().Val()); + AFL_VERIFY(count + portionsCount == CSController->GetShardingFiltersCount().Val())("count", count)( + "val", CSController->GetShardingFiltersCount().Val()); count += portionsCount; } { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=MERGE);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } - csController->CheckInvariants(); + CSController->CheckInvariants(); } }; Y_UNIT_TEST(TableReshardingConsistency64) { - TReshardingTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); + TShardingTypeTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); } Y_UNIT_TEST(TableReshardingModuloN) { - TReshardingTest().SetShardingType("HASH_FUNCTION_MODULO_N").Execute(); + TShardingTypeTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); + } + + class TAsyncReshardingTest: public TReshardingTest { + YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); + + public: + TAsyncReshardingTest() { + TLocalHelper(Kikimr).CreateTestOlapTable("olapTable", "olapStore", 24, 4); + } + + void AddBatch(int numRows) { + WriteTestData(Kikimr, "/Root/olapStore/olapTable", LastPathId, LastTs, numRows); + LastPathId += numRows * 10; + LastTs += numRows * 10; + NumRows += numRows; + } + + void StartResharding(TString modification) { + auto alterQuery = + TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=)" + << modification << ");"; + auto session = TableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + void CheckCount() { + TReshardingTest::CheckCount(NumRows); + } + + void ChangeSchema() { + auto alterQuery = + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=level, " + "`SERIALIZER.CLASS_NAME`=`ARROW_SERIALIZER`, " + "`COMPRESSION.TYPE`=`zstd`);"; + auto session = TableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + void DisableCompaction() { + CSController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); + } + + private: + ui64 LastPathId = 1000000; + ui64 LastTs = 300000000; + ui64 NumRows = 0; + }; + + Y_UNIT_TEST(UpsertWhileSplitTest) { + TAsyncReshardingTest tester; + + tester.AddBatch(10000); + + tester.CheckCount(); + + for (int i = 0; i < 4; i++) { + tester.StartResharding("SPLIT"); + + tester.CheckCount(); + tester.AddBatch(10000); + tester.CheckCount(); + tester.WaitResharding(); + } + tester.AddBatch(10000); + tester.CheckCount(); + } + + Y_UNIT_TEST(ChangeSchemaAndSplit) { + TAsyncReshardingTest tester; + tester.DisableCompaction(); + + tester.AddBatch(10000); + tester.ChangeSchema(); + tester.AddBatch(10000); + + tester.StartResharding("SPLIT"); + tester.WaitResharding(); + + tester.CheckCount(); } } } diff --git a/ydb/core/kqp/ut/olap/clickbench_ut.cpp b/ydb/core/kqp/ut/olap/clickbench_ut.cpp index e9f09d450e0c..43ca55f438aa 100644 --- a/ydb/core/kqp/ut/olap/clickbench_ut.cpp +++ b/ydb/core/kqp/ut/olap/clickbench_ut.cpp @@ -155,7 +155,6 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - // .SetExpectedReadNodeType("TableFullScan"); q7.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q9; @@ -166,10 +165,9 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { GROUP BY RegionID ORDER BY c DESC LIMIT 10 - )") + )"); //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - .SetExpectedReadNodeType("TableFullScan"); - // .SetExpectedReadNodeType("Aggregate-TableFullScan"); + //.SetExpectedReadNodeType("Filter-TableFullScan"); q9.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q12; @@ -185,7 +183,6 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - // .SetExpectedReadNodeType("TableFullScan"); q12.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q14; @@ -201,7 +198,6 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - // .SetExpectedReadNodeType("TableFullScan"); q14.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q22; @@ -213,7 +209,8 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; - )"); + )") + .AddExpectedPlanOptions("KqpOlapFilter"); q22.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q39; diff --git a/ydb/core/kqp/ut/olap/datatime64_ut.cpp b/ydb/core/kqp/ut/olap/datatime64_ut.cpp index b4ea097eafe8..97e2a2f2fd28 100644 --- a/ydb/core/kqp/ut/olap/datatime64_ut.cpp +++ b/ydb/core/kqp/ut/olap/datatime64_ut.cpp @@ -170,6 +170,7 @@ Y_UNIT_TEST_SUITE(KqpDatetime64ColumnShard) { runnerSettings.WithSampleTables = false; TTestHelper testHelper(runnerSettings); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER }, "CS").Initialize(); TVector schema = { TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int64).SetNullable(false), diff --git a/ydb/core/kqp/ut/olap/decimal_ut.cpp b/ydb/core/kqp/ut/olap/decimal_ut.cpp new file mode 100644 index 000000000000..151226bd01d6 --- /dev/null +++ b/ydb/core/kqp/ut/olap/decimal_ut.cpp @@ -0,0 +1,198 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NTable; + +Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { + class TDecimalTestCase { + public: + TDecimalTestCase() + : TestHelper(TKikimrSettings().SetWithSampleTables(false)) { + } + + TTestHelper::TUpdatesBuilder Inserter() { + return TTestHelper::TUpdatesBuilder(TestTable.GetArrowSchema(Schema)); + } + + void Upsert(TTestHelper::TUpdatesBuilder& inserter) { + TestHelper.BulkUpsert(TestTable, inserter); + } + + void CheckQuery(const TString& query, const TString& expected) { + TestHelper.ReadData(query, expected); + } + + void PrepareTable1() { + Schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("int").SetType(NScheme::NTypeIds::Int64), + TTestHelper::TColumnSchema().SetName("dec").SetType(NScheme::NTypeIds::Decimal), + }; + TestTable.SetName("/Root/Table1").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(Schema); + TestHelper.CreateTable(TestTable); + + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(1).Add(4).Add(TDecimalValue("3.14")); + inserter.AddRow().Add(2).Add(3).Add(TDecimalValue("8.16")); + Upsert(inserter); + } + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(4).Add(1).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("8.492")); + + Upsert(inserter); + } + } + + void PrepareTable2() { + Schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("table1_id").SetType(NScheme::NTypeIds::Int64), + TTestHelper::TColumnSchema().SetName("dec").SetType(NScheme::NTypeIds::Decimal), + }; + TestTable.SetName("/Root/Table2").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(Schema); + TestHelper.CreateTable(TestTable); + + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(1).Add(1).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(2).Add(1).Add(TDecimalValue("8.16")); + inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(4).Add(2).Add(TDecimalValue("8.16")); + Upsert(inserter); + } + } + + private: + TTestHelper TestHelper; + + TVector Schema; + TTestHelper::TColumnTable TestTable; + }; + + Y_UNIT_TEST(TestSimpleQueries) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE id=1", "[[[\"3.14\"];1;[4]]]"); + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` order by id", "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterEqual) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec == cast(\"3.14\" as decimal(22,9))", "[[[\"3.14\"];1;[4]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec != cast(\"3.14\" as decimal(22,9)) order by id", + "[[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterNulls) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + TTestHelper::TUpdatesBuilder inserter = tester.Inserter(); + inserter.AddRow().Add(5).Add(5).AddNull(); + inserter.AddRow().Add(6).Add(6).AddNull(); + tester.Upsert(inserter); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec is NULL order by id", "[[#;5;[5]];[#;6;[6]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec is not NULL order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterCompare) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec < cast(\"12.46\" as decimal(22,9)) order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]]]"); + + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` WHERE dec > cast(\"8.16\" as decimal(22,9)) order by id", "[[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec <= cast(\"12.46\" as decimal(22,9)) order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec >= cast(\"8.492\" as decimal(22,9)) order by id", + "[[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestOrderByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` order by dec", "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestGroupByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + TTestHelper::TUpdatesBuilder inserter = tester.Inserter(); + inserter.AddRow().Add(5).Add(12).Add(TDecimalValue("8.492")); + inserter.AddRow().Add(6).Add(30).Add(TDecimalValue("12.46")); + tester.Upsert(inserter); + + tester.CheckQuery("SELECT dec, count(*) FROM `/Root/Table1` group by dec order by dec", + "[[[\"3.14\"];1u];[[\"8.16\"];1u];[[\"8.492\"];2u];[[\"12.46\"];2u]]"); + } + + Y_UNIT_TEST(TestAggregation) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.CheckQuery("SELECT min(dec) FROM `/Root/Table1`", "[[[\"3.14\"]]]"); + tester.CheckQuery("SELECT max(dec) FROM `/Root/Table1`", "[[[\"12.46\"]]]"); + tester.CheckQuery("SELECT sum(dec) FROM `/Root/Table1`", "[[[\"32.252\"]]]"); + } + + Y_UNIT_TEST(TestJoinById) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.PrepareTable2(); + + tester.CheckQuery( + "SELECT t1.id, t1.dec, t2.dec FROM `/Root/Table1` as t1 join `/Root/Table2` as t2 on t1.id = t2.table1_id order by t1.id, t1.dec, " + "t2.dec", + R"([[1;["3.14"];["8.16"]];[1;["3.14"];["12.46"]];[2;["8.16"];["8.16"]];[2;["8.16"];["12.46"]]])"); + } + + Y_UNIT_TEST(TestJoinByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.PrepareTable2(); + + tester.CheckQuery( + "SELECT t1.id, t2.id, t1.dec FROM `/Root/Table1` as t1 join `/Root/Table2` as t2 on t1.dec = t2.dec order by t1.id, t2.id, t1.dec", + R"([[2;2;["8.16"]];[2;4;["8.16"]];[4;1;["12.46"]];[4;3;["12.46"]]])"); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/olap/helpers/aggregation.cpp b/ydb/core/kqp/ut/olap/helpers/aggregation.cpp index 7bed6f4a4aa4..1c03f6be9ffa 100644 --- a/ydb/core/kqp/ut/olap/helpers/aggregation.cpp +++ b/ydb/core/kqp/ut/olap/helpers/aggregation.cpp @@ -13,6 +13,7 @@ void TestAggregationsBase(const std::vector& cases) { TLocalHelper(kikimr).CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER, NKikimrServices::TX_COLUMNSHARD_SCAN }, "CS").Initialize(); { WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); @@ -49,10 +50,11 @@ void TestAggregationsInternal(const std::vector& cases) { Tests::TServer::TPtr server = new Tests::TServer(settings); auto runtime = server->GetRuntime(); + Tests::NCommon::TLoggerInit(runtime).Initialize(); + Tests::NCommon::TLoggerInit(runtime).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER }, "CS").Initialize(); auto sender = runtime->AllocateEdgeActor(); InitRoot(server, sender); - Tests::NCommon::TLoggerInit(runtime).Initialize(); ui32 numShards = 1; ui32 numIterations = 10; diff --git a/ydb/core/kqp/ut/olap/helpers/get_value.cpp b/ydb/core/kqp/ut/olap/helpers/get_value.cpp index 208e5e40e02e..67fe905d8e44 100644 --- a/ydb/core/kqp/ut/olap/helpers/get_value.cpp +++ b/ydb/core/kqp/ut/olap/helpers/get_value.cpp @@ -36,6 +36,11 @@ void PrintValue(IOutputStream& out, const NYdb::TValue& v) { out << value.GetInt64(); break; } + case NYdb::EPrimitiveType::Uint8: + { + out << value.GetUint8(); + break; + } case NYdb::EPrimitiveType::Utf8: { out << value.GetUtf8(); diff --git a/ydb/core/kqp/ut/olap/helpers/local.h b/ydb/core/kqp/ut/olap/helpers/local.h index dc957f98220e..9511ad1828ef 100644 --- a/ydb/core/kqp/ut/olap/helpers/local.h +++ b/ydb/core/kqp/ut/olap/helpers/local.h @@ -28,8 +28,14 @@ class TLocalHelper: public Tests::NCS::THelper { void CreateTestOlapTable(TString tableName = "olapTable", TString storeName = "olapStore", ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { - CreateOlapTableWithStore(tableName, storeName, storeShardsCount, tableShardsCount); + CreateOlapTablesWithStore({tableName}, storeName, storeShardsCount, tableShardsCount); } + + void CreateTestOlapTables(TVector tableNames = {"olapTable0", "olapTable1"}, TString storeName = "olapStore", + ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { + CreateOlapTablesWithStore(tableNames, storeName, storeShardsCount, tableShardsCount); + } + using TBase::TBase; TLocalHelper(TKikimrRunner& runner) diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp index a37600897d2b..32c08c2c8925 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp @@ -18,6 +18,31 @@ TString TTypedLocalHelper::GetTestTableSchema() const { return result; } +TString TTypedLocalHelper::GetMultiColumnTestTableSchema(ui32 reps) const { + TString result; + result += R"( + Columns { Name: "pk_int" Type: "Int64" NotNull: true } + Columns { Name: "ts" Type: "Timestamp" } + )"; + for (ui32 i = 0; i < reps; i++) { + TString strNum = ToString(i); + result += "Columns {Name: \"field_utf" + strNum + "\" Type: \"Utf8\"}\n"; + result += "Columns {Name: \"field_int" + strNum + "\" Type: \"Int64\"}\n"; + result += "Columns {Name: \"field_uint" + strNum + "\" Type: \"Uint8\"}\n"; + result += "Columns {Name: \"field_float" + strNum + "\" Type: \"Float\"}\n"; + result += "Columns {Name: \"field_double" + strNum + "\" Type: \"Double\"}\n"; + } + result += R"( + KeyColumnNames: "pk_int" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + )"; + return result; +} + +void TTypedLocalHelper::CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount, ui32 tableShardsCount) { + CreateSchemaOlapTablesWithStore(GetMultiColumnTestTableSchema(reps), {TableName}, "olapStore", storeShardsCount, tableShardsCount); +} + void TTypedLocalHelper::ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus /*= EStatus::SUCCESS*/) const { auto session = KikimrRunner.GetTableClient().CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); @@ -79,7 +104,7 @@ NKikimr::NKqp::TTypedLocalHelper::TDistribution TTypedLocalHelper::GetDistributi } void TTypedLocalHelper::GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose /*= false*/, const std::vector columnNames /*= {}*/) { - TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats` WHERE Activity = true"; + TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats` WHERE Activity == 1"; if (columnNames.size()) { selectQuery += " AND EntityName IN ('" + JoinSeq("','", columnNames) + "')"; } @@ -144,7 +169,7 @@ void TTypedLocalHelper::FillPKOnly(const double pkKff /*= 0*/, const ui32 numRow } void TTypedLocalHelper::GetStats(std::vector& stats, const bool verbose /*= false*/) { - TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_portion_stats` WHERE Activity = true"; + TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_portion_stats` WHERE Activity == 1"; auto tableClient = KikimrRunner.GetTableClient(); auto rows = ExecuteScanQuery(tableClient, selectQuery, verbose); for (auto&& r : rows) { diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.h b/ydb/core/kqp/ut/olap/helpers/typed_local.h index a72cef64e33e..285c7d062b94 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.h +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.h @@ -7,6 +7,8 @@ #include +#include + #include namespace NKikimr::NKqp { @@ -29,7 +31,7 @@ class TTypedLocalHelper: public Tests::NCS::THelper { : TBase(kikimrRunner.GetTestServer()) , TypeName(typeName) , KikimrRunner(kikimrRunner) - , TablePath("/Root/" + storeName + "/" + tableName) + , TablePath(storeName.empty() ? "/Root/" + tableName : "/Root/" + storeName + "/" + tableName) , TableName(tableName) , StoreName(storeName) { SetShardingMethod("HASH_FUNCTION_CONSISTENCY_64"); @@ -83,11 +85,38 @@ class TTypedLocalHelper: public Tests::NCS::THelper { TBase::SendDataViaActorSystem(TablePath, batch); } + void FillMultiColumnTable(ui32 repCount, const double pkKff = 0, const ui32 numRows = 800000) const { + const double frq = 0.9; + NArrow::NConstruction::TPoolFiller int64Pool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller uint8Pool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller floatPool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller doublePool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller utfPool(1000, 52, "abcde", frq); + + std::vector builders; + builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); + for (ui32 i = 0; i < repCount; i++) { + TString repStr = ToString(i); + builders.emplace_back(std::make_shared>>("field_utf" + repStr, utfPool, i)); + builders.emplace_back(std::make_shared>>("field_int" + repStr, int64Pool, i)); + builders.emplace_back(std::make_shared>>("field_uint" + repStr, uint8Pool, i)); + builders.emplace_back(std::make_shared>>("field_float" + repStr, floatPool, i)); + builders.emplace_back(std::make_shared>>("field_double" + repStr, doublePool, i)); + } + NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders); + std::shared_ptr batch = batchBuilder.BuildBatch(numRows); + TBase::SendDataViaActorSystem(TablePath, batch); + } + + void FillPKOnly(const double pkKff = 0, const ui32 numRows = 800000) const; void CreateTestOlapTable(ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { - CreateOlapTableWithStore(TableName, StoreName, storeShardsCount, tableShardsCount); + CreateOlapTablesWithStore({TableName}, StoreName, storeShardsCount, tableShardsCount); } + + TString GetMultiColumnTestTableSchema(ui32 reps) const; + void CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); }; -} \ No newline at end of file +} diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp index 331d54dd32de..13e98b57e9b6 100644 --- a/ydb/core/kqp/ut/olap/indexes_ut.cpp +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -19,8 +19,8 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); TLocalHelper(kikimr).CreateTestOlapTable(); @@ -113,8 +113,8 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); TLocalHelper(kikimr).CreateTestOlapTable(); @@ -236,7 +236,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); TLocalHelper(kikimr).CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); diff --git a/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp index 4f6c90056e89..c1fcab4be0fd 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp @@ -21,10 +21,10 @@ Y_UNIT_TEST_SUITE(KqpOlapStats) { class TOlapStatsController : public NYDBTest::NColumnShard::TController { public: - TDuration GetPeriodicWakeupActivationPeriod(const TDuration /*defaultValue*/) const override { + TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration /*defaultValue*/) const override { return TDuration::MilliSeconds(10); } - TDuration GetStatsReportInterval(const TDuration /*defaultValue*/) const override { + TDuration DoGetStatsReportInterval(const TDuration /*defaultValue*/) const override { return TDuration::MilliSeconds(10); } }; diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index ca25c070056a..390d28b931ca 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -51,9 +51,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PARTITION BY HASH(timestamp) WITH ( STORE = COLUMN, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = %d + PARTITION_COUNT = %d ) - )", storeName.data(), tableName.data(), shardsCount); + )", + storeName.data(), tableName.data(), shardsCount); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); if (result.GetStatus() != EStatus::SUCCESS) { Cerr << result.GetIssues().ToOneLineString() << Endl; @@ -1843,8 +1844,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PARTITION BY HASH(WatchID) WITH ( STORE = COLUMN, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT =)" << numShards - << ")"; + PARTITION_COUNT =)" << numShards + << ")"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); @@ -1931,10 +1932,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1 + PARTITION_COUNT = 1 ); - )" - ); + )"); lHelper.StartDataRequest( R"( @@ -1986,10 +1986,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1 + PARTITION_COUNT = 1 ); - )" - ); + )"); lHelper.StartDataRequest( R"( @@ -1998,7 +1997,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { ); } -/* + /* Y_UNIT_TEST(OlapDeletePlanned) { TPortManager pm; @@ -2038,7 +2037,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 8 + PARTITION_COUNT = 8 ); )" ); @@ -2509,7 +2508,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PRIMARY KEY (a) ) PARTITION BY HASH(a) - WITH (STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 4); + WITH (STORE = COLUMN, PARTITION_COUNT = 4); )"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); @@ -2551,7 +2550,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_EQUAL_C(plan.QueryStats->Getquery_ast().find("WideFromBlocks"), plan.QueryStats->Getquery_ast().rfind("WideFromBlocks"), plan.QueryStats->Getquery_ast()); break; case NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_FORCE: - UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("return (FromFlow (WideMap"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("(FromFlow (WideSortBlocks"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("(FromFlow (NarrowMap (WideFromBlocks"), plan.QueryStats->Getquery_ast()); break; } } @@ -2724,8 +2724,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { TLocalHelper testHelper(kikimr); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); @@ -2790,6 +2790,121 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } } + Y_UNIT_TEST(CountWhereColumnIsNull) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD_SCAN, NActors::NLog::PRI_DEBUG); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 300, true); + + auto client = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL AND uid IS NOT NULL + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL + GROUP BY level + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + } + + Y_UNIT_TEST(SimpleCount) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD_SCAN, NActors::NLog::PRI_DEBUG); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 300, true); + + auto client = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE StartsWith(uid, "uid_") + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[200u]]", result); + } + } + + Y_UNIT_TEST(TableSinkWithOlapStore) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + TLocalHelper(kikimr).CreateTestOlapTables(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable0", 0, 1000000, 3, true); + + auto client = kikimr.GetQueryClient(); + { + auto result = client.ExecuteQuery(R"( + SELECT * FROM `/Root/olapStore/olapTable0` ORDER BY timestamp; + INSERT INTO `/Root/olapStore/olapTable1` SELECT * FROM `/Root/olapStore/olapTable0`; + REPLACE INTO `/Root/olapStore/olapTable0` SELECT * FROM `/Root/olapStore/olapTable1`; + SELECT * FROM `/Root/olapStore/olapTable1` ORDER BY timestamp; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + } } } diff --git a/ydb/core/kqp/ut/olap/sparsed_ut.cpp b/ydb/core/kqp/ut/olap/sparsed_ut.cpp new file mode 100644 index 000000000000..73b75f2cc53f --- /dev/null +++ b/ydb/core/kqp/ut/olap/sparsed_ut.cpp @@ -0,0 +1,307 @@ +#include "helpers/local.h" +#include "helpers/writer.h" +#include "helpers/typed_local.h" +#include "helpers/query_executor.h" +#include "helpers/get_value.h" + +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapSparsed) { + + class TSparsedDataTest { + private: + const TKikimrSettings Settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner Kikimr; + NKikimr::NYDBTest::TControllers::TGuard CSController; + const TString StoreName; + ui32 MultiColumnRepCount = 100; + static const ui32 SKIP_GROUPS = 7; + const TVector FIELD_NAMES{"utf", "int", "uint", "float", "double"}; + public: + TSparsedDataTest(const TString& storeName) + : Kikimr(Settings) + , CSController(NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard()) + , StoreName(storeName) + { + + } + + ui32 GetCount() const { + auto selectQuery = TString(R"( + SELECT + count(*) as count, + FROM `/Root/)") + (StoreName.empty() ? "" : StoreName + "/") + "olapTable`"; + + auto tableClient = Kikimr.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, selectQuery); + return GetUint64(rows[0].at("count")); + } + + ui32 GetDefaultsCount(const TString& fieldName, const TString& defValueStr) const { + auto selectQueryTmpl = TString(R"( + SELECT + count(*) as count, + FROM `/Root/)") + (StoreName.empty() ? "" : StoreName + "/") + R"(olapTable` + WHERE %s == %s + )"; + + auto tableClient = Kikimr.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, Sprintf(selectQueryTmpl.c_str(), fieldName.c_str(), defValueStr.c_str())); + return GetUint64(rows[0].at("count")); + } + + void GetAllDefaultsCount(ui64* counts, ui32 skipCount) { + TString query = "SELECT"; + ui32 groupsCount = 0; + for (ui32 i = 0; i < MultiColumnRepCount; i += skipCount) { + query += Sprintf("%s field_utf%u == 'abcde' AS def_utf%u, field_uint%u == 0 AS def_uint%u, field_int%u == 0 AS def_int%u, field_float%u == 0 AS def_float%u, field_double%u == 0 AS def_double%u", i == 0 ? "" : ",", i, i, i, i, i, i, i, i, i, i); + groupsCount++; + } + query += " FROM `/Root/olapStore/olapTable`"; + auto tableClient = Kikimr.GetTableClient(); + + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + auto rows = ExecuteScanQuery(tableClient, query, false); + + printTime("Executing query"); + + Fill(&counts[0], &counts[FIELD_NAMES.size() * groupsCount], 0); + + for (auto& row: rows) { + auto incCounts = [&](ui32 i, const TString& column) { + if (*NYdb::TValueParser(row.at(column)).GetOptionalBool()) { + counts[i]++; + } + }; + ui32 ind = 0; + for (ui32 i = 0; i < MultiColumnRepCount; i += skipCount) { + TString grStr = ToString(i); + incCounts(ind++, "def_utf" + grStr); + incCounts(ind++, "def_uint" + grStr); + incCounts(ind++, "def_int" + grStr); + incCounts(ind++, "def_float" + grStr); + incCounts(ind++, "def_double" + grStr); + } + } + } + + void CheckAllFieldsTable(bool firstCall, ui32 countExpectation, ui32* defCountStart) { + ui32 grCount = (MultiColumnRepCount + SKIP_GROUPS - 1) / SKIP_GROUPS; + ui64 defCounts[FIELD_NAMES.size() * grCount]; + const ui32 count = GetCount(); + GetAllDefaultsCount(defCounts, SKIP_GROUPS); + for (ui32 i = 0; i < FIELD_NAMES.size() * grCount; i++) { + if (firstCall) { + defCountStart[i] = defCounts[i]; + } else { + AFL_VERIFY(defCountStart[i] == defCounts[i]); + } + AFL_VERIFY(count == countExpectation)("expect", countExpectation)("count", count); + AFL_VERIFY(1.0 * defCounts[i] / count < 0.95)("def", defCounts[i])("count", count); + AFL_VERIFY(1.0 * defCounts[i] / count > 0.85)("def", defCounts[i])("count", count); + } + } + + void CheckTable(const TString& fieldName, const TString& defValueStr, bool firstCall, ui32 countExpectation, ui32& defCountStart) { + const ui32 defCount = GetDefaultsCount(fieldName, defValueStr); + if (firstCall) { + defCountStart = defCount; + } else { + AFL_VERIFY(defCountStart == defCount); + } + const ui32 count = GetCount(); + AFL_VERIFY(count == countExpectation)("expect", countExpectation)("count", count); + AFL_VERIFY(1.0 * defCount / count < 0.95)("def", defCount)("count", count); + AFL_VERIFY(1.0 * defCount / count > 0.85)("def", defCount)("count", count); + } + + template + void FillCircleImpl(TFillTable&& fillTable, TCheckTable&& checkTable) { + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + fillTable(); + printTime("fillTable"); + checkTable(true); + printTime("checkTable"); + + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->WaitIndexation(TDuration::Seconds(5)); + printTime("wait"); + + checkTable(false); + printTime("checkTable"); + + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitCompactions(TDuration::Seconds(5)); + printTime("wait"); + + checkTable(false); + printTime("checkTable"); + + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + printTime("wait"); + } + + void FillCircle(const double shiftKff, const ui32 countExpectation) { + ui32 defCountStart = (ui32)-1; + FillCircleImpl([&]() { + TTypedLocalHelper helper("Utf8", Kikimr, "olapTable", StoreName); + const double frq = 0.9; + NArrow::NConstruction::TStringPoolFiller sPool(1000, 52, "abcde", frq); + helper.FillTable(sPool, shiftKff, 10000); + }, + [&](bool firstCall) { + CheckTable("field", "'abcde'", firstCall, countExpectation, defCountStart); + }); + } + + void FillMultiColumnCircle(const double shiftKff, const ui32 countExpectation) { + ui32 grCount = (MultiColumnRepCount + SKIP_GROUPS - 1) / SKIP_GROUPS; + ui32 defCountStart[FIELD_NAMES.size() * grCount]; + FillCircleImpl([&]() { + TTypedLocalHelper helper("Utf8", Kikimr); + helper.FillMultiColumnTable(MultiColumnRepCount, shiftKff, 10000); + }, + [&](bool firstCall) { + CheckAllFieldsTable(firstCall, countExpectation, defCountStart); + }); + } + + void Execute() { + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + + Tests::NCommon::TLoggerInit(Kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", Kikimr, "olapTable", StoreName); + if (!StoreName.empty()) { + helper.CreateTestOlapTable(); + } else { + auto tableClient = Kikimr.GetTableClient(); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + auto query = TStringBuilder() << R"( + --!syntax_v1 + CREATE TABLE `/Root/olapTable` + ( + pk_int int64 NOT NULL, + field )" << "Utf8" << R"(, + ts TimeStamp, + PRIMARY KEY (pk_int) + ) + PARTITION BY HASH(pk_int) + WITH ( + STORE = COLUMN + ))"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + TString type = StoreName.empty() ? "TABLE" : "TABLESTORE"; + TString name = StoreName.empty() ? "olapTable" : "olapStore"; + + FillCircle(0, 10000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`, `DEFAULT_VALUE`=`abcde`);"); + FillCircle(0.1, 11000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + FillCircle(0.2, 12000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`);"); + FillCircle(0.3, 13000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + FillCircle(0.4, 14000); + } + + void ExecuteMultiColumn() { + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + + Tests::NCommon::TLoggerInit(Kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", Kikimr); + helper.CreateMultiColumnOlapTableWithStore(MultiColumnRepCount); + + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + FillMultiColumnCircle(0, 10000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`, `DEFAULT_VALUE`=" + (f == 0 ? "`abcde`" : "`0`") + ");"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.1, 11000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.2, 12000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.3, 13000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.4, 14000); + printTime("Fill"); + } + }; + + Y_UNIT_TEST(Switching) { + TSparsedDataTest test("olapStore"); + test.Execute(); + } + + Y_UNIT_TEST(SwitchingMultiColumn) { + TSparsedDataTest test("olapStore"); + test.ExecuteMultiColumn(); + } + + Y_UNIT_TEST(SwitchingStandalone) { + TSparsedDataTest test(""); + test.Execute(); + } +} + +} // namespace diff --git a/ydb/core/kqp/ut/olap/sys_view_ut.cpp b/ydb/core/kqp/ut/olap/sys_view_ut.cpp index 8583967214ca..27820452542e 100644 --- a/ydb/core/kqp/ut/olap/sys_view_ut.cpp +++ b/ydb/core/kqp/ut/olap/sys_view_ut.cpp @@ -453,7 +453,7 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { SELECT PathId, Kind, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Activity = true + PathId == UInt64("3") AND Activity == 1 GROUP BY TabletId, PathId, Kind ORDER BY TabletId, Kind )"); diff --git a/ydb/core/kqp/ut/olap/tiering_ut.cpp b/ydb/core/kqp/ut/olap/tiering_ut.cpp new file mode 100644 index 000000000000..b9cceba93738 --- /dev/null +++ b/ydb/core/kqp/ut/olap/tiering_ut.cpp @@ -0,0 +1,161 @@ +#include "helpers/get_value.h" +#include "helpers/local.h" +#include "helpers/query_executor.h" +#include "helpers/typed_local.h" +#include "helpers/writer.h" + +#include +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapTiering) { + Y_UNIT_TEST(Eviction) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + TLocalHelper localHelper(testHelper.GetKikimr()); + NYdb::NTable::TTableClient tableClient = testHelper.GetKikimr().GetTableClient(); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).Initialize(); + Singleton()->SetSecretKey("fakeSecret"); + + localHelper.CreateTestOlapTable(); + testHelper.CreateTier("tier1"); + const TString tieringRule = testHelper.CreateTieringRule("tier1", "timestamp"); + + for (ui64 i = 0; i < 100; ++i) { + WriteTestData(testHelper.GetKikimr(), "/Root/olapStore/olapTable", 0, i * 10000, 1000); + } + + csController->WaitActualization(TDuration::Seconds(5)); + + ui64 columnRawBytes = 0; + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "__DEFAULT"); + + columnRawBytes = GetUint64(rows[0].at("RawBytes")); + UNIT_ASSERT_GT(columnRawBytes, 0); + } + + testHelper.SetTiering("/Root/olapStore/olapTable", tieringRule); + csController->WaitActualization(TDuration::Seconds(5)); + + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "tier1"); + UNIT_ASSERT_VALUES_EQUAL_C(GetUint64(rows[0].at("RawBytes")), columnRawBytes, + TStringBuilder() << "RawBytes changed after eviction: before=" << columnRawBytes + << " after=" << GetUint64(rows[0].at("RawBytes"))); + } + + testHelper.ResetTiering("/Root/olapStore/olapTable"); + csController->WaitCompactions(TDuration::Seconds(5)); + + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "__DEFAULT"); + UNIT_ASSERT_VALUES_EQUAL_C(GetUint64(rows[0].at("RawBytes")), columnRawBytes, + TStringBuilder() << "RawBytes changed after resetting tiering: before=" << columnRawBytes + << " after=" << GetUint64(rows[0].at("RawBytes"))); + } + } + + Y_UNIT_TEST(TieringRuleValidation) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + TLocalHelper localHelper(testHelper.GetKikimr()); + NYdb::NTable::TTableClient tableClient = testHelper.GetKikimr().GetTableClient(); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).Initialize(); + Singleton()->SetSecretKey("fakeSecret"); + + localHelper.CreateTestOlapTable(); + testHelper.CreateTier("tier1"); + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS empty_tiering_rule (TYPE TIERING_RULE) + WITH (defaultColumn = timestamp, description = `{"rules": []}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS empty_default_column (TYPE TIERING_RULE) + WITH (defaultColumn = ``, description = `{"rules": [{ "tierName" : "tier1", "durationForEvict" : "10d" }]}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS no_default_column (TYPE TIERING_RULE) + WITH (description = `{"rules": [{ "tierName" : "tier1", "durationForEvict" : "10d" }]}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + const TString correctTieringRule = testHelper.CreateTieringRule("tier1", "timestamp"); + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET description `{"rules": []}`)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET description `{"rules": []}`)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET defaultColumn ``)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) RESET defaultColumn)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + } +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/olap/write_ut.cpp b/ydb/core/kqp/ut/olap/write_ut.cpp index ac63da37bc72..8d9751f28193 100644 --- a/ydb/core/kqp/ut/olap/write_ut.cpp +++ b/ydb/core/kqp/ut/olap/write_ut.cpp @@ -15,7 +15,7 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { Y_UNIT_TEST(TierDraftsGC) { auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csController->SetIndexWriteControllerEnabled(false); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); Singleton()->ResetWriteCounters(); auto settings = TKikimrSettings() @@ -47,10 +47,31 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { AFL_VERIFY(!Singleton()->GetSize()); } + Y_UNIT_TEST(TestRemoveTableBeforeIndexation) { + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetIndexWriteControllerEnabled(false); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TLocalHelper(kikimr).CreateTestOlapTable(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + auto tableClient = kikimr.GetTableClient(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + TTypedLocalHelper("Utf8", kikimr).ExecuteSchemeQuery("DROP TABLE `/Root/olapStore/olapTable`;"); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + csController->WaitIndexation(TDuration::Seconds(5)); + csController->WaitCompactions(TDuration::Seconds(5)); + } + Y_UNIT_TEST(TierDraftsGCWithRestart) { auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csController->SetIndexWriteControllerEnabled(false); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1000)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1000)); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); Singleton()->ResetWriteCounters(); @@ -133,7 +154,7 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { Y_UNIT_TEST(WriteDeleteCleanGC) { auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); Singleton()->ResetWriteCounters(); @@ -176,7 +197,7 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } - csController->SetReadTimeoutClean(TDuration::Zero()); + csController->SetOverrideReadTimeoutClean(TDuration::Zero()); csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); { const TInstant start = TInstant::Now(); diff --git a/ydb/core/kqp/ut/olap/ya.make b/ydb/core/kqp/ut/olap/ya.make index d9b7a06ef77b..e324116597eb 100644 --- a/ydb/core/kqp/ut/olap/ya.make +++ b/ydb/core/kqp/ut/olap/ya.make @@ -1,7 +1,7 @@ UNITTEST_FOR(ydb/core/kqp) FORK_SUBTESTS() -SPLIT_FACTOR(100) +SPLIT_FACTOR(200) IF (WITH_VALGRIND) TIMEOUT(3600) @@ -23,6 +23,9 @@ SRCS( clickbench_ut.cpp aggregations_ut.cpp write_ut.cpp + sparsed_ut.cpp + tiering_ut.cpp + decimal_ut.cpp ) PEERDIR( diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 7e607880cf0a..e65e5e34a38b 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -7070,6 +7071,7 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { } testHelper.DropTable("/Root/ColumnTableTest"); for (auto tablet: tabletIds) { + testHelper.WaitTabletDeletionInHive(tablet, TDuration::Seconds(5)); UNIT_ASSERT_C(!testHelper.GetKikimr().GetTestClient().TabletExistsInHive(&testHelper.GetRuntime(), tablet), ToString(tablet) + " is alive"); } } @@ -7778,6 +7780,87 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { testHelper.ReadData("SELECT * FROM `/Root/ColumnTableTest` WHERE id=1", "[[1;#;[\"test_res_1\"]]]"); } + void TestDropThenAddColumn(bool enableIndexation, bool enableCompaction) { + if (enableCompaction) { + Y_ABORT_UNLESS(enableIndexation); + } + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Compaction); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + + TVector schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("value").SetType(NScheme::NTypeIds::Utf8), + }; + + TTestHelper::TColumnTable testTable; + testTable.SetName("/Root/ColumnTableTest").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(schema); + testHelper.CreateTable(testTable); + + { + TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); + tableInserter.AddRow().Add(1).Add("test_res_1"); + tableInserter.AddRow().Add(2).Add("test_res_2"); + testHelper.BulkUpsert(testTable, tableInserter); + } + + if (enableCompaction) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->EnableBackground(NYDBTest::ICSController::EBackground::Compaction); + csController->WaitIndexation(TDuration::Seconds(5)); + csController->WaitCompactions(TDuration::Seconds(5)); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Compaction); + } + + { + auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << "` DROP COLUMN value;"; + auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << "` ADD COLUMN value Uint64;"; + auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + schema.back().SetType(NScheme::NTypeIds::Uint64); + + { + TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); + tableInserter.AddRow().Add(3).Add(42); + tableInserter.AddRow().Add(4).Add(43); + testHelper.BulkUpsert(testTable, tableInserter); + } + + if (enableIndexation) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); + } + if (enableCompaction) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Compaction); + csController->WaitCompactions(TDuration::Seconds(5)); + } + + testHelper.ReadData("SELECT value FROM `/Root/ColumnTableTest`", "[[#];[#];[[42u]];[[43u]]]"); + } + + Y_UNIT_TEST(DropThenAddColumn) { + TestDropThenAddColumn(false, false); + } + + Y_UNIT_TEST(DropThenAddColumnIndexation) { + TestDropThenAddColumn(true, true); + } + + Y_UNIT_TEST(DropThenAddColumnCompaction) { + TestDropThenAddColumn(true, true); + } + Y_UNIT_TEST(DropTtlColumn) { TKikimrSettings runnerSettings; runnerSettings.WithSampleTables = false; diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index c875fe47cd87..ddab286a219d 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -3235,8 +3235,8 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto session = Kikimr->GetTableClient().CreateSession().GetValueSync().GetSession(); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); const TString query = Sprintf(R"( diff --git a/ydb/core/kqp/ut/tx/kqp_sink_common.h b/ydb/core/kqp/ut/tx/kqp_sink_common.h index 2d4a7e48e9cb..80dae769b18f 100644 --- a/ydb/core/kqp/ut/tx/kqp_sink_common.h +++ b/ydb/core/kqp/ut/tx/kqp_sink_common.h @@ -37,16 +37,16 @@ class TTableDataModificationTester { auto client = Kikimr->GetQueryClient(); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); { auto type = IsOlap ? "COLUMN" : "ROW"; auto result = client.ExecuteQuery(Sprintf(R"( CREATE TABLE `/Root/Test` ( - Group Uint32, - Name String, + Group Uint32 not null, + Name String not null, Amount Uint64, Comment String, PRIMARY KEY (Group, Name) @@ -56,7 +56,7 @@ class TTableDataModificationTester { ); CREATE TABLE `/Root/KV` ( - Key Uint32, + Key Uint32 not null, Value String, PRIMARY KEY (Key) ) WITH ( @@ -68,7 +68,7 @@ class TTableDataModificationTester { ); CREATE TABLE `/Root/KV2` ( - Key Uint32, + Key Uint32 not null, Value String, PRIMARY KEY (Key) ) WITH ( diff --git a/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp index ec02a3211882..4d2361114b15 100644 --- a/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp @@ -52,7 +52,7 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { SELECT * FROM `/Root/Test` WHERE Name == "Paul" ORDER BY Group, Name; )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - CompareYson(R"([[[300u];["Changed"];[1u];["Paul"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[[300u];["Changed"];1u;"Paul"]])", FormatResultSetYson(result.GetResultSet(0))); } }; @@ -62,6 +62,12 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { tester.Execute(); } + Y_UNIT_TEST(TInvalidateOlap) { + TInvalidate tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TInvalidateOnCommit : public TTableDataModificationTester { protected: void DoExecute() override { @@ -99,7 +105,7 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { SELECT * FROM `/Root/Test` WHERE Name == "Paul" ORDER BY Group, Name; )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - CompareYson(R"([[[300u];["Changed"];[1u];["Paul"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[[300u];["Changed"];1u;"Paul"]])", FormatResultSetYson(result.GetResultSet(0))); } }; @@ -109,6 +115,11 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { tester.Execute(); } + Y_UNIT_TEST(InvalidateOlapOnCommit) { + TInvalidateOnCommit tester; + tester.SetIsOlap(true); + tester.Execute(); + } class TDifferentKeyUpdate : public TTableDataModificationTester { protected: @@ -145,6 +156,12 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { tester.Execute(); } + Y_UNIT_TEST(DifferentKeyUpdateOlap) { + TDifferentKeyUpdate tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TEmptyRange : public TTableDataModificationTester { protected: void DoExecute() override { @@ -164,7 +181,6 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { result = session2.ExecuteQuery(Q1_(R"( SELECT * FROM Test WHERE Group = 11; - UPSERT INTO Test (Group, Name, Amount) VALUES (11, "Session2", 2); )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); @@ -186,7 +202,7 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { SELECT * FROM Test WHERE Group = 11; )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - CompareYson(R"([[[2u];#;[11u];["Session2"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[[2u];#;11u;"Session2"]])", FormatResultSetYson(result.GetResultSet(0))); } }; @@ -196,6 +212,12 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { tester.Execute(); } + Y_UNIT_TEST(EmptyRangeOlap) { + TEmptyRange tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TEmptyRangeAlreadyBroken : public TTableDataModificationTester { protected: void DoExecute() override { @@ -239,7 +261,7 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { SELECT * FROM Test WHERE Group = 11; )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - CompareYson(R"([[[2u];#;[11u];["Session2"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[[2u];#;11u;"Session2"]])", FormatResultSetYson(result.GetResultSet(0))); } }; @@ -249,6 +271,12 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { tester.Execute(); } + Y_UNIT_TEST(EmptyRangeAlreadyBrokenOlap) { + TEmptyRangeAlreadyBroken tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TUncommittedRead : public TTableDataModificationTester { protected: void DoExecute() override { @@ -279,7 +307,7 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { SELECT * FROM Test WHERE Group = 11; )"), TTxControl::Tx(tx1->GetId())).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - CompareYson(R"([[[2u];#;[11u];["TEST"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[[2u];#;11u;"TEST"]])", FormatResultSetYson(result.GetResultSet(0))); } } }; @@ -289,6 +317,12 @@ Y_UNIT_TEST_SUITE(KqpSinkLocks) { tester.SetIsOlap(false); tester.Execute(); } + + Y_UNIT_TEST(OlapUncommittedRead) { + TUncommittedRead tester; + tester.SetIsOlap(true); + tester.Execute(); + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp index e50a9652b303..83a62f070faf 100644 --- a/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp @@ -26,8 +26,8 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[1u];["One"]]; - [[4000000001u];["BigOne"]] + [1u;["One"]]; + [4000000001u;["BigOne"]] ])", FormatResultSetYson(result.GetResultSet(0))); auto tx = result.GetTransaction(); @@ -69,6 +69,13 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { tester.Execute(); } +// Y_UNIT_TEST(OlapSnapshotExpiration) { +// TSnapshotExpiration tester; +// tester.SetFastSnapshotExpiration(true); +// tester.SetIsOlap(true); +// tester.Execute(); +// } + class TReadOnlyTxCommitsOnConcurrentWrite : public TTableDataModificationTester { protected: void DoExecute() override { @@ -85,8 +92,8 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[1u];["One"]]; - [[4000000001u];["BigOne"]] + [1u;["One"]]; + [4000000001u;["BigOne"]] ])", FormatResultSetYson(result.GetResultSet(0))); result = session2.ExecuteQuery(Q_(R"( @@ -101,7 +108,7 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[1u];["ChangedOne"]]; + [1u;["ChangedOne"]]; ])", FormatResultSetYson(result.GetResultSet(0))); result = session1.ExecuteQuery(Q_(R"( @@ -110,7 +117,7 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[1u];["One"]]; + [1u;["One"]]; ])", FormatResultSetYson(result.GetResultSet(0))); result = session1.ExecuteQuery(Q_(R"( @@ -119,8 +126,8 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[2u];["Two"]]; - [[4000000002u];["BigTwo"]] + [2u;["Two"]]; + [4000000002u;["BigTwo"]] ])", FormatResultSetYson(result.GetResultSet(0))); } }; @@ -130,7 +137,13 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { tester.SetIsOlap(false); tester.Execute(); } - + + Y_UNIT_TEST(OlapReadOnlyTxCommitsOnConcurrentWrite) { + TReadOnlyTxCommitsOnConcurrentWrite tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TReadWriteTxFailsOnConcurrentWrite1 : public TTableDataModificationTester { protected: void DoExecute() override { @@ -147,8 +160,8 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[1u];["One"]]; - [[4000000001u];["BigOne"]] + [1u;["One"]]; + [4000000001u;["BigOne"]] ])", FormatResultSetYson(result.GetResultSet(0))); result = session2.ExecuteQuery(Q_(R"( @@ -172,6 +185,12 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { tester.Execute(); } + Y_UNIT_TEST(OlapReadWriteTxFailsOnConcurrentWrite1) { + TReadWriteTxFailsOnConcurrentWrite1 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TReadWriteTxFailsOnConcurrentWrite2 : public TTableDataModificationTester { protected: void DoExecute() override { @@ -188,8 +207,8 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[1u];["One"]]; - [[4000000001u];["BigOne"]] + [1u;["One"]]; + [4000000001u;["BigOne"]] ])", FormatResultSetYson(result.GetResultSet(0))); // We need to sleep before the upsert below, otherwise writes @@ -218,6 +237,12 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { tester.Execute(); } + Y_UNIT_TEST(OlapReadWriteTxFailsOnConcurrentWrite2) { + TReadWriteTxFailsOnConcurrentWrite2 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TReadWriteTxFailsOnConcurrentWrite3 : public TTableDataModificationTester { protected: void DoExecute() override { @@ -234,8 +259,8 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[1u];["One"]]; - [[4000000001u];["BigOne"]] + [1u;["One"]]; + [4000000001u;["BigOne"]] ])", FormatResultSetYson(result.GetResultSet(0))); result = session2.ExecuteQuery(Q_(R"( @@ -250,8 +275,8 @@ Y_UNIT_TEST_SUITE(KqpSinkMvcc) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[2u];["Two"]]; - [[4000000002u];["BigTwo"]] + [2u;["Two"]]; + [4000000002u;["BigTwo"]] ])", FormatResultSetYson(result.GetResultSet(0))); result = session1.ExecuteQuery(Q_(R"( diff --git a/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp index e29a8a032162..4959cf240e05 100644 --- a/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp @@ -28,25 +28,25 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { auto tx = result.GetTransaction(); result = session.ExecuteQuery(Q_(R"( - SELECT * FROM `/Root/Test` WHERE Group = 1; + SELECT * FROM `/Root/Test` WHERE Group = 1 ORDER BY Name; )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[3500u];["None"];[1u];["Anna"]]; - [[300u];["None"];[1u];["Paul"]] + [[3500u];["None"];1u;"Anna"]; + [[300u];["None"];1u;"Paul"] ])", FormatResultSetYson(result.GetResultSet(0))); auto commitResult = tx->Commit().ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); result = session.ExecuteQuery(Q_(R"( - SELECT * FROM `/Root/Test` WHERE Group = 1; + SELECT * FROM `/Root/Test` WHERE Group = 1 ORDER BY Name; )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[3500u];["None"];[1u];["Anna"]]; - [[300u];["None"];[1u];["Paul"]]; - [#;#;[1u];["Sergey"]] + [[3500u];["None"];1u;"Anna"]; + [[300u];["None"];1u;"Paul"]; + [#;#;1u;"Sergey"] ])", FormatResultSetYson(result.GetResultSet(0))); } }; @@ -57,6 +57,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { tester.Execute(); } + Y_UNIT_TEST(OlapDeferredEffects) { + TDeferredEffects tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TExplicitTcl : public TTableDataModificationTester { protected: void DoExecute() override { @@ -86,7 +92,7 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { SELECT * FROM `/Root/KV` WHERE Value = "New"; )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT(result.IsSuccess()); - CompareYson(R"([[[10u];["New"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[10u;["New"]]])", FormatResultSetYson(result.GetResultSet(0))); commitResult = tx.Commit().ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::NOT_FOUND, commitResult.GetIssues().ToString()); @@ -100,6 +106,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { tester.Execute(); } + Y_UNIT_TEST(OlapExplicitTcl) { + TExplicitTcl tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TLocksAbortOnCommit : public TTableDataModificationTester { protected: void DoExecute() override { @@ -156,6 +168,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { tester.Execute(); } + Y_UNIT_TEST(OlapLocksAbortOnCommit) { + TLocksAbortOnCommit tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TInvalidateOnError : public TTableDataModificationTester { protected: void DoExecute() override { @@ -187,6 +205,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { tester.Execute(); } + Y_UNIT_TEST(OlapInvalidateOnError) { + TInvalidateOnError tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TInteractive : public TTableDataModificationTester { protected: void DoExecute() override { @@ -209,12 +233,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); result = session.ExecuteQuery(R"( - SELECT * FROM `/Root/KV` WHERE Key < 3 + SELECT * FROM `/Root/KV` WHERE Key < 3 ORDER BY Key )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([ - [[1u];["New"]]; - [[2u];["Two"]] + [1u;["New"]]; + [2u;["Two"]] ])", FormatResultSetYson(result.GetResultSet(0))); } }; @@ -225,6 +249,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { tester.Execute(); } + Y_UNIT_TEST(OlapInteractive) { + TInteractive tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TSnapshotRO : public TTableDataModificationTester { protected: void DoExecute() override { @@ -236,7 +266,7 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { SELECT * FROM KV WHERE Key = 2; )"), TTxControl::BeginTx(TTxSettings::SnapshotRO()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - CompareYson(R"([[[2u];["Two"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[2u;["Two"]]])", FormatResultSetYson(result.GetResultSet(0))); // Read Distributed result = session.ExecuteQuery(Q1_(R"( @@ -263,6 +293,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { tester.Execute(); } + Y_UNIT_TEST(OlapSnapshotRO) { + TSnapshotRO tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TSnapshotROInteractive1 : public TTableDataModificationTester { protected: void DoExecute() override { @@ -274,7 +310,7 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { )"); auto readResult = R"([ - [[1u];["One"]] + [1u;["One"]] ])"; auto result = session.ExecuteQuery(readQuery, @@ -305,6 +341,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { tester.Execute(); } + Y_UNIT_TEST(OlapSnapshotROInteractive1) { + TSnapshotROInteractive1 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + class TSnapshotROInteractive2 : public TTableDataModificationTester { protected: void DoExecute() override { @@ -350,6 +392,12 @@ Y_UNIT_TEST_SUITE(KqpSinkTx) { tester.SetIsOlap(false); tester.Execute(); } + + Y_UNIT_TEST(OlapSnapshotROInteractive2) { + TSnapshotROInteractive2 tester; + tester.SetIsOlap(true); + tester.Execute(); + } } } // namespace NKqp diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index a44ea1acb36b..b65dc98df68a 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -596,6 +596,11 @@ message TLimiterConfig { optional uint64 PeriodMilliSeconds = 3 [default = 1000]; } +message TGroupedMemoryLimiterConfig { + optional bool Enabled = 1 [default = true]; + optional uint64 MemoryLimit = 2; +} + message TExternalIndexConfig { optional bool Enabled = 1 [default = true]; optional TInternalRequestConfig RequestConfig = 2; @@ -1520,7 +1525,6 @@ message TColumnShardConfig { optional TIndexMetadataMemoryLimit IndexMetadataMemoryLimit = 12; optional bool CleanupEnabled = 13 [default = true]; - optional uint32 RemovedPortionLivetimeSeconds = 14 [default = 600]; message TRepairInfo { optional string ClassName = 1; @@ -1529,7 +1533,13 @@ message TColumnShardConfig { repeated TRepairInfo Repairs = 15; optional uint32 MaxInFlightIntervalsOnRequest = 16; - optional uint32 MaxInFlightMemoryOnRequest = 17; + optional uint32 MaxReadStaleness_ms = 18 [default = 300000]; + optional uint32 GCIntervalMs = 19 [default = 30000]; + optional uint32 CompactionActualizationLagMs = 20 [default = 1000]; + optional uint32 ActualizationTasksLagMs = 21 [default = 1000]; + optional uint32 LagForCompactionBeforeTieringsMs = 22 [default = 3600000]; + optional uint32 OptimizerFreshnessCheckDurationMs = 23 [default = 300000]; + optional uint32 SmallPortionDetectSizeLimit = 24 [default = 1048576]; // 1 << 20 } message TSchemeShardConfig { @@ -1907,6 +1917,8 @@ message TAppConfig { optional TBlobCacheConfig BlobCacheConfig = 78; optional TLimiterConfig CompDiskLimiterConfig = 79; optional TMetadataCacheConfig MetadataCacheConfig = 80; + //optional TMemoryControllerConfig MemoryControllerConfig = 81; NB. exist in main + optional TGroupedMemoryLimiterConfig GroupedMemoryLimiterConfig = 82; optional NKikimrReplication.TReplicationDefaults ReplicationConfig = 83; repeated TNamedConfig NamedConfigs = 100; diff --git a/ydb/core/protos/counters_columnshard.proto b/ydb/core/protos/counters_columnshard.proto index cb1ecf21424a..898dac98aad6 100644 --- a/ydb/core/protos/counters_columnshard.proto +++ b/ydb/core/protos/counters_columnshard.proto @@ -65,8 +65,8 @@ enum ECumulativeCounters { COUNTER_PLAN_STEP_ACCEPTED = 9 [(CounterOpts) = {Name: "PlanStepAccepted"}]; COUNTER_SCANNED_ROWS = 10 [(CounterOpts) = {Name: "ScannedRows"}]; COUNTER_SCANNED_BYTES = 11 [(CounterOpts) = {Name: "ScannedBytes"}]; - COUNTER_UPSERT_BLOBS_WRITTEN = 12 [(CounterOpts) = {Name: "UpsertBlobsWritten"}]; - COUNTER_UPSERT_BYTES_WRITTEN = 13 [(CounterOpts) = {Name: "UpsertBytesWritten"}]; + COUNTER_OPERATIONS_BLOBS_WRITTEN = 12 [(CounterOpts) = {Name: "OperationsBlobsWritten"}]; + COUNTER_OPERATIONS_BYTES_WRITTEN = 13 [(CounterOpts) = {Name: "OperationsBytesWritten"}]; COUNTER_INDEXING_BLOBS_WRITTEN = 14 [(CounterOpts) = {Name: "IndexingBlobsWritten"}]; COUNTER_INDEXING_BYTES_WRITTEN = 15 [(CounterOpts) = {Name: "IndexingBytesWritten"}]; COUNTER_COMPACTION_BLOBS_WRITTEN = 16 [(CounterOpts) = {Name: "CompactionBlobsWritten"}]; @@ -135,6 +135,10 @@ enum ECumulativeCounters { COUNTER_READING_EXPORTED_BLOBS = 79 [(CounterOpts) = {Name: "ReadingExportedBlobs"}]; COUNTER_READING_EXPORTED_BYTES = 80 [(CounterOpts) = {Name: "ReadingExportedBytes"}]; COUNTER_READING_EXPORTED_RANGES = 81 [(CounterOpts) = {Name: "ReadingExportedRanges"}]; + COUNTER_PLANNED_TX_COMPLETED = 82 [(CounterOpts) = {Name: "PlannedTxCompleted"}]; + COUNTER_IMMEDIATE_TX_COMPLETED = 83 [(CounterOpts) = {Name: "ImmediateTxCompleted"}]; + COUNTER_ROWS_ERASED = 84 [(CounterOpts) = {Name: "RowsErased"}]; + COUNTER_OPERATIONS_ROWS_WRITTEN = 85 [(CounterOpts) = {Name: "OperationsRowsWritten"}]; } enum EPercentileCounters { diff --git a/ydb/core/protos/feature_flags.proto b/ydb/core/protos/feature_flags.proto index 12663dd6fb07..af68aa5f57d0 100644 --- a/ydb/core/protos/feature_flags.proto +++ b/ydb/core/protos/feature_flags.proto @@ -148,6 +148,12 @@ message TFeatureFlags { optional bool EnableChangefeedsOnIndexTables = 134 [default = false]; optional bool EnableResourcePoolsCounters = 135 [default = false]; optional bool EnableOptionalColumnsInColumnShard = 136 [default = false]; + //optional bool EnableGranularTimecast = 137 [default = true]; NB. exists in main + optional bool EnableAlterShardingInColumnShard = 138 [default = false]; + optional bool EnablePgSyntax = 139 [default = false]; + optional bool EnableTieringInColumnShard = 140 [default = false]; optional bool EnableMetadataObjectsOnServerless = 141 [default = true]; + optional bool EnableOlapCompression = 142 [default = false]; optional bool EnableExternalDataSourcesOnServerless = 143 [default = true]; + optional bool EnableSparsedColumns = 144 [default = false]; } diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 22e35f6c12ec..4eccca7110f1 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -19,6 +19,7 @@ import "ydb/library/mkql_proto/protos/minikql.proto"; import "ydb/core/protos/index_builder.proto"; import "ydb/core/tx/columnshard/engines/scheme/defaults/protos/data.proto"; import "ydb/core/tx/columnshard/common/protos/snapshot.proto"; +import "ydb/core/formats/arrow/protos/accessor.proto"; import "google/protobuf/empty.proto"; @@ -422,6 +423,7 @@ message TOlapColumnDiff { optional TOlapColumn.TSerializer Serializer = 5; optional string StorageId = 6; optional string DefaultValue = 7; + optional NKikimrArrowAccessorProto.TRequestedConstructor DataAccessorConstructor = 8; } message TOlapColumnDescription { @@ -441,6 +443,7 @@ message TOlapColumnDescription { optional TOlapColumn.TSerializer Serializer = 10; optional string StorageId = 11; optional NKikimrColumnShardColumnDefaults.TColumnDefault DefaultValue = 12; + optional NKikimrArrowAccessorProto.TConstructor DataAccessorConstructor = 13; } message TRequestedBloomFilter { diff --git a/ydb/core/protos/flat_tx_scheme.proto b/ydb/core/protos/flat_tx_scheme.proto index dd537b4c43a1..721786432812 100644 --- a/ydb/core/protos/flat_tx_scheme.proto +++ b/ydb/core/protos/flat_tx_scheme.proto @@ -189,6 +189,8 @@ message TSchemeLimits { optional uint64 MaxExports = 16; optional uint64 MaxImports = 17; + + optional uint64 MaxColumnTableColumns = 18; } message TEvInitTenantSchemeShard { diff --git a/ydb/core/protos/tx_columnshard.proto b/ydb/core/protos/tx_columnshard.proto index b0339d5aa9fb..207a01d0835e 100644 --- a/ydb/core/protos/tx_columnshard.proto +++ b/ydb/core/protos/tx_columnshard.proto @@ -90,6 +90,7 @@ message TLogicalMetadata { optional string SpecialKeysRawData = 6; optional TEvWrite.EModificationType ModificationType = 7; optional NKikimrArrowSchema.TSchemaSubset SchemaSubset = 8; + optional string SpecialKeysPayloadData = 9; } message TEvWriteResult { @@ -144,6 +145,8 @@ enum ETransactionKind { TX_KIND_COMMIT_WRITE = 5; TX_KIND_BACKUP = 6; TX_KIND_SHARING = 7; + TX_KIND_COMMIT_WRITE_PRIMARY = 8; + TX_KIND_COMMIT_WRITE_SECONDARY = 9; } enum ETransactionFlag { @@ -204,6 +207,29 @@ message TBackupTxBody { message TCommitWriteTxBody { optional uint64 LockId = 1; + + message TPrimary { + repeated uint64 SendingShards = 1; + repeated uint64 ReceivingShards = 2; + repeated uint64 WaitShardsBrokenFlags = 3; + repeated uint64 WaitShardsResultAck = 4; + optional bool TxBroken = 5; + } + + message TSecondary { + optional uint64 ArbiterTabletId = 1; + optional bool NeedReceiveBroken = 2; + optional bool ReceiveAck = 3; + optional bool SelfBroken = 4; + optional bool TxBroken = 5; + } + + oneof Implementation { + TPrimary PrimaryTabletData = 5; + TSecondary SecondaryTabletData = 6; + } + + optional bool Broken = 4 [default = false]; } message TSchemaPresetVersionInfo { diff --git a/ydb/core/statistics/ut_common/ut_common.cpp b/ydb/core/statistics/ut_common/ut_common.cpp index 39e67132b3e9..2737cb68a5ff 100644 --- a/ydb/core/statistics/ut_common/ut_common.cpp +++ b/ydb/core/statistics/ut_common/ut_common.cpp @@ -80,8 +80,8 @@ TTestEnv::TTestEnv(ui32 staticNodes, ui32 dynamicNodes, ui32 storagePools, bool DriverConfig = NYdb::TDriverConfig().SetEndpoint(Endpoint); Driver = MakeHolder(DriverConfig); - CSController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - CSController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + CSController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); CSController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); Server->GetRuntime()->SetLogPriority(NKikimrServices::STATISTICS, NActors::NLog::PRI_DEBUG); diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h index c11266041cf1..81542a645969 100644 --- a/ydb/core/sys_view/common/schema.h +++ b/ydb/core/sys_view/common/schema.h @@ -415,7 +415,7 @@ struct Schema : NIceDb::Schema { struct BlobId : Column<10, NScheme::NTypeIds::Utf8> {}; struct BlobRangeOffset : Column<11, NScheme::NTypeIds::Uint64> {}; struct BlobRangeSize : Column<12, NScheme::NTypeIds::Uint64> {}; - struct Activity : Column<13, NScheme::NTypeIds::Bool> {}; + struct Activity : Column<13, NScheme::NTypeIds::Uint8> {}; struct TierName: Column<14, NScheme::NTypeIds::Utf8> {}; struct EntityType: Column<15, NScheme::NTypeIds::Utf8> {}; @@ -525,9 +525,10 @@ struct Schema : NIceDb::Schema { struct ColumnBlobBytes: Column<7, NScheme::NTypeIds::Uint64> {}; struct IndexBlobBytes: Column<8, NScheme::NTypeIds::Uint64> {}; struct PortionId: Column<9, NScheme::NTypeIds::Uint64> {}; - struct Activity: Column<10, NScheme::NTypeIds::Bool> {}; + struct Activity: Column<10, NScheme::NTypeIds::Uint8> {}; struct TierName: Column<11, NScheme::NTypeIds::Utf8> {}; struct Stats: Column<12, NScheme::NTypeIds::Utf8> {}; + struct Optimized: Column<13, NScheme::NTypeIds::Uint8> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -542,7 +543,8 @@ struct Schema : NIceDb::Schema { PortionId, Activity, TierName, - Stats + Stats, + Optimized >; }; diff --git a/ydb/core/tablet/tablet_counters_aggregator.cpp b/ydb/core/tablet/tablet_counters_aggregator.cpp index b94e31c175fb..28e7497d2529 100644 --- a/ydb/core/tablet/tablet_counters_aggregator.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator.cpp @@ -765,10 +765,16 @@ class TTabletMon { TCounterPtr DatashardSizeBytes; TCounterPtr DatashardCacheHitBytes; TCounterPtr DatashardCacheMissBytes; + TCounterPtr ColumnShardReadRows_; + TCounterPtr ColumnShardReadBytes_; TCounterPtr ColumnShardScanRows_; TCounterPtr ColumnShardScanBytes_; + TCounterPtr ColumnShardWriteRows_; + TCounterPtr ColumnShardWriteBytes_; TCounterPtr ColumnShardBulkUpsertRows_; TCounterPtr ColumnShardBulkUpsertBytes_; + TCounterPtr ColumnShardEraseRows_; + TCounterPtr ColumnShardEraseBytes_; TCounterPtr ResourcesStorageUsedBytes; TCounterPtr ResourcesStorageUsedBytesOnSsd; TCounterPtr ResourcesStorageUsedBytesOnHdd; @@ -787,6 +793,7 @@ class TTabletMon { TCounterPtr ResourcesStreamReservedStorageLimit; THistogramPtr ShardCpuUtilization; + THistogramPtr ColumnShardCpuUtilization; TCounterPtr RowUpdates; TCounterPtr RowUpdateBytes; @@ -808,8 +815,11 @@ class TTabletMon { TCounterPtr ColumnShardScannedBytes_; TCounterPtr ColumnShardScannedRows_; - TCounterPtr ColumnShardUpsertBlobsWritten_; - TCounterPtr ColumnShardUpsertBytesWritten_; + TCounterPtr ColumnShardOperationsRowsWritten_; + TCounterPtr ColumnShardOperationsBytesWritten_; + TCounterPtr ColumnShardErasedBytes_; + TCounterPtr ColumnShardErasedRows_; + THistogramPtr ColumnShardConsumedCpuHistogram; TCounterPtr DiskSpaceTablesTotalBytes; TCounterPtr DiskSpaceTablesTotalBytesOnSsd; @@ -859,14 +869,26 @@ class TTabletMon { DatashardCacheMissBytes = ydbGroup->GetNamedCounter("name", "table.datashard.cache_miss.bytes", true); + ColumnShardReadRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.read.rows", true); + ColumnShardReadBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.read.bytes", true); ColumnShardScanRows_ = ydbGroup->GetNamedCounter("name", "table.columnshard.scan.rows", true); ColumnShardScanBytes_ = ydbGroup->GetNamedCounter("name", "table.columnshard.scan.bytes", true); + ColumnShardWriteRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.write.rows", true); + ColumnShardWriteBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.write.bytes", true); ColumnShardBulkUpsertRows_ = ydbGroup->GetNamedCounter("name", "table.columnshard.bulk_upsert.rows", true); ColumnShardBulkUpsertBytes_ = ydbGroup->GetNamedCounter("name", "table.columnshard.bulk_upsert.bytes", true); + ColumnShardEraseRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.erase.rows", true); + ColumnShardEraseBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.erase.bytes", true); ResourcesStorageUsedBytes = ydbGroup->GetNamedCounter("name", "resources.storage.used_bytes", false); @@ -908,6 +930,8 @@ class TTabletMon { ShardCpuUtilization = ydbGroup->GetNamedHistogram("name", "table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false); + ColumnShardCpuUtilization = ydbGroup->GetNamedHistogram("name", + "table.columnshard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false); }; void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard, bool hasColumnShard) { @@ -943,8 +967,11 @@ class TTabletMon { ColumnShardScannedBytes_ = appGroup->GetCounter("ColumnShard/ScannedBytes"); ColumnShardScannedRows_ = appGroup->GetCounter("ColumnShard/ScannedRows"); - ColumnShardUpsertBlobsWritten_ = appGroup->GetCounter("ColumnShard/UpsertBlobsWritten"); - ColumnShardUpsertBytesWritten_ = appGroup->GetCounter("ColumnShard/UpsertBytesWritten"); + ColumnShardOperationsRowsWritten_ = appGroup->GetCounter("ColumnShard/OperationsRowsWritten"); + ColumnShardOperationsBytesWritten_ = appGroup->GetCounter("ColumnShard/OperationsBytesWritten"); + ColumnShardErasedBytes_ = appGroup->GetCounter("ColumnShard/BytesErased"); + ColumnShardErasedRows_ = appGroup->GetCounter("ColumnShard/RowsErased"); + ColumnShardConsumedCpuHistogram = appGroup->FindHistogram("HIST(ConsumedCPU)"); } if (hasSchemeshard && !DiskSpaceTablesTotalBytes) { @@ -990,10 +1017,20 @@ class TTabletMon { } if (ColumnShardScannedBytes_) { + ColumnShardReadRows_->Set(0); + ColumnShardReadBytes_->Set(0); ColumnShardScanRows_->Set(ColumnShardScannedRows_->Val()); ColumnShardScanBytes_->Set(ColumnShardScannedBytes_->Val()); - ColumnShardBulkUpsertRows_->Set(ColumnShardUpsertBlobsWritten_->Val()); - ColumnShardBulkUpsertBytes_->Set(ColumnShardUpsertBytesWritten_->Val()); + ColumnShardWriteRows_->Set(ColumnShardOperationsRowsWritten_->Val()); + ColumnShardWriteBytes_->Set(ColumnShardOperationsBytesWritten_->Val()); + ColumnShardBulkUpsertRows_->Set(ColumnShardOperationsRowsWritten_->Val()); + ColumnShardBulkUpsertBytes_->Set(ColumnShardOperationsBytesWritten_->Val()); + ColumnShardEraseRows_->Set(ColumnShardErasedRows_->Val()); + ColumnShardEraseBytes_->Set(ColumnShardErasedBytes_->Val()); + + if (ColumnShardConsumedCpuHistogram) { + TransferBuckets(ColumnShardCpuUtilization, ColumnShardConsumedCpuHistogram); + } } if (DiskSpaceTablesTotalBytes) { diff --git a/ydb/core/testlib/basics/feature_flags.h b/ydb/core/testlib/basics/feature_flags.h index fff759eb7361..70b240bd829f 100644 --- a/ydb/core/testlib/basics/feature_flags.h +++ b/ydb/core/testlib/basics/feature_flags.h @@ -62,7 +62,10 @@ class TTestFeatureFlagsHolder { FEATURE_FLAG_SETTER(EnableTableDatetime64) FEATURE_FLAG_SETTER(EnableResourcePools) FEATURE_FLAG_SETTER(EnableChangefeedsOnIndexTables) + FEATURE_FLAG_SETTER(EnablePgSyntax) + FEATURE_FLAG_SETTER(EnableTieringInColumnShard) FEATURE_FLAG_SETTER(EnableMetadataObjectsOnServerless) + FEATURE_FLAG_SETTER(EnableOlapCompression) #undef FEATURE_FLAG_SETTER }; diff --git a/ydb/core/testlib/cs_helper.cpp b/ydb/core/testlib/cs_helper.cpp index c6f05ec8c86f..dd26da35fa74 100644 --- a/ydb/core/testlib/cs_helper.cpp +++ b/ydb/core/testlib/cs_helper.cpp @@ -183,7 +183,7 @@ std::shared_ptr THelper::TestArrowBatch(ui64 pathIdBegin, ui TString THelper::GetTestTableSchema() const { TStringBuilder sb; sb << R"(Columns{ Name: "timestamp" Type : "Timestamp" NotNull : true })"; - sb << R"(Columns{ Name: "resource_id" Type : "Utf8" })"; + sb << R"(Columns{ Name: "resource_id" Type : "Utf8" DataAccessorConstructor{ ClassName: "SPARSED" } })"; sb << "Columns{ Name: \"uid\" Type : \"Utf8\" NotNull : true StorageId : \"" + OptionalStorageId + "\" }"; sb << R"(Columns{ Name: "level" Type : "Int32" })"; sb << "Columns{ Name: \"message\" Type : \"Utf8\" StorageId : \"" + OptionalStorageId + "\" }"; @@ -198,7 +198,7 @@ TString THelper::GetTestTableSchema() const { return sb; } -void THelper::CreateOlapTableWithStore(TString tableName /*= "olapTable"*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { +void THelper::CreateSchemaOlapTablesWithStore(const TString tableSchema, TVector tableNames /*= "olapTable"*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { TActorId sender = Server.GetRuntime()->AllocateEdgeActor(); CreateTestOlapStore(sender, Sprintf(R"( Name: "%s" @@ -209,19 +209,25 @@ void THelper::CreateOlapTableWithStore(TString tableName /*= "olapTable"*/, TStr %s } } - )", storeName.c_str(), storeShardsCount, GetTestTableSchema().data())); + )", storeName.c_str(), storeShardsCount, tableSchema.data())); const TString shardingColumns = "[\"" + JoinSeq("\",\"", GetShardingColumns()) + "\"]"; - TBase::CreateTestOlapTable(sender, storeName, Sprintf(R"( - Name: "%s" - ColumnShardCount: %d - Sharding { - HashSharding { - Function: %s - Columns: %s - } - })", tableName.c_str(), tableShardsCount, ShardingMethod.data(), shardingColumns.c_str())); + for (const TString& tableName : tableNames) { + TBase::CreateTestOlapTable(sender, storeName, Sprintf(R"( + Name: "%s" + ColumnShardCount: %d + Sharding { + HashSharding { + Function: %s + Columns: %s + } + })", tableName.c_str(), tableShardsCount, ShardingMethod.data(), shardingColumns.c_str())); + } +} + +void THelper::CreateOlapTablesWithStore(TVector tableNames /*= {"olapTable"}*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { + CreateSchemaOlapTablesWithStore(GetTestTableSchema(), tableNames, storeName, storeShardsCount, tableShardsCount); } // Clickbench table diff --git a/ydb/core/testlib/cs_helper.h b/ydb/core/testlib/cs_helper.h index 7a9e3dad1bf3..95c8877b6ba6 100644 --- a/ydb/core/testlib/cs_helper.h +++ b/ydb/core/testlib/cs_helper.h @@ -27,11 +27,16 @@ class THelper: public THelperSchemaless { std::shared_ptr GetArrowSchema() const; YDB_FLAG_ACCESSOR(WithJsonDocument, false); YDB_ACCESSOR(TString, OptionalStorageId, "__MEMORY"); +protected: TString ShardingMethod = "HASH_FUNCTION_CONSISTENCY_64"; +private: bool WithSomeNulls_ = false; protected: - void CreateOlapTableWithStore(TString tableName = "olapTable", TString storeName = "olapStore", + void CreateSchemaOlapTablesWithStore(const TString tableSchema, TVector tableName = {"olapTable"}, TString storeName = "olapStore", + ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); + void CreateOlapTablesWithStore(TVector tableName = {"olapTable"}, TString storeName = "olapStore", ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); + public: using TBase::TBase; @@ -43,9 +48,9 @@ class THelper: public THelperSchemaless { static constexpr const char * PROTO_SCHEMA = R"( Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } - Columns { Name: "resource_id" Type: "Utf8" } + Columns { Name: "resource_id" Type: "Utf8" DataAccessorConstructor{ ClassName: "SPARSED" }} Columns { Name: "uid" Type: "Utf8" } - Columns { Name: "level" Type: "Int32" } + Columns { Name: "level" Type: "Int32" DataAccessorConstructor{ ClassName: "SPARSED" }} Columns { Name: "message" Type: "Utf8" } KeyColumnNames: "timestamp" Engine: COLUMN_ENGINE_REPLACING_TIMESERIES diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index 296e2c02b840..94512331f5c1 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -113,6 +113,7 @@ #include #include #include +#include #include #include @@ -763,6 +764,11 @@ namespace Tests { const auto aid = Runtime->Register(actor, nodeIdx, appData.SystemPoolId, TMailboxType::Revolving, 0); Runtime->RegisterService(NCSIndex::MakeServiceId(Runtime->GetNodeId(nodeIdx)), aid, nodeIdx); } + { + auto* actor = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(NOlap::NGroupedMemoryManager::TConfig(), new ::NMonitoring::TDynamicCounters()); + const auto aid = Runtime->Register(actor, nodeIdx, appData.UserPoolId, TMailboxType::Revolving, 0); + Runtime->RegisterService(NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::MakeServiceId(Runtime->GetNodeId(nodeIdx)), aid, nodeIdx); + } { auto* actor = NConveyor::TScanServiceOperator::CreateService(NConveyor::TConfig(), new ::NMonitoring::TDynamicCounters()); const auto aid = Runtime->Register(actor, nodeIdx, appData.UserPoolId, TMailboxType::Revolving, 0); diff --git a/ydb/core/testlib/ya.make b/ydb/core/testlib/ya.make index 58502dca6227..5a63f36a639f 100644 --- a/ydb/core/testlib/ya.make +++ b/ydb/core/testlib/ya.make @@ -102,6 +102,7 @@ PEERDIR( ydb/services/ext_index/service ydb/services/ymq ydb/core/tx/conveyor/service + ydb/core/tx/limiter/grouped_memory/usage ydb/services/fq ydb/services/kesus ydb/services/persqueue_cluster_discovery diff --git a/ydb/core/tx/columnshard/background_controller.h b/ydb/core/tx/columnshard/background_controller.h index bb38f2744061..b57a29d5b072 100644 --- a/ydb/core/tx/columnshard/background_controller.h +++ b/ydb/core/tx/columnshard/background_controller.h @@ -1,6 +1,7 @@ #pragma once #include "engines/changes/abstract/compaction_info.h" #include "engines/portions/meta.h" +#include namespace NKikimr::NOlap { class TColumnEngineChanges; @@ -15,11 +16,16 @@ class TBackgroundController { using TCurrentCompaction = THashMap; TCurrentCompaction ActiveCompactionInfo; + std::shared_ptr Counters; bool ActiveCleanupPortions = false; bool ActiveCleanupTables = false; bool ActiveCleanupInsertTable = false; YDB_READONLY(TMonotonic, LastIndexationInstant, TMonotonic::Zero()); public: + TBackgroundController(std::shared_ptr counters) + : Counters(std::move(counters)) { + } + THashSet GetConflictTTLPortions() const; THashSet GetConflictCompactionPortions() const; @@ -29,6 +35,7 @@ class TBackgroundController { bool StartCompaction(const NOlap::TPlanCompactionInfo& info); void FinishCompaction(const NOlap::TPlanCompactionInfo& info) { Y_ABORT_UNLESS(ActiveCompactionInfo.erase(info.GetPathId())); + Counters->OnCompactionFinish(info.GetPathId()); } const TCurrentCompaction& GetActiveCompaction() const { return ActiveCompactionInfo; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp index cda7186419f6..5008e3e57c7f 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp @@ -140,14 +140,9 @@ bool TBlobManager::LoadState(IBlobManagerDb& db, const TTabletId selfTabletId) { if (!db.LoadLastGcBarrier(LastCollectedGenStep)) { return false; } - //https://github.com/ydb-platform/ydb/issues/7468 - TGenStep storedGCBarrierPreparation; - if (!db.LoadGCBarrierPreparation(storedGCBarrierPreparation)) { + if (!db.LoadGCBarrierPreparation(GCBarrierPreparation)) { return false; } - if (storedGCBarrierPreparation < LastCollectedGenStep) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("mem_genstep", GCBarrierPreparation)("last_genstep", LastCollectedGenStep)("db_genstep", storedGCBarrierPreparation); - } AFL_VERIFY(!GCBarrierPreparation.Generation() || LastCollectedGenStep <= GCBarrierPreparation)("prepared", GCBarrierPreparation)("last", LastCollectedGenStep); // Load the keep and delete queues @@ -314,7 +309,7 @@ std::shared_ptr TBlobManager::BuildGCTas return nullptr; } - if (AppData()->TimeProvider->Now() - PreviousGCTime < NYDBTest::TControllers::GetColumnShardController()->GetOverridenGCPeriod(TDuration::Seconds(GC_INTERVAL_SECONDS))) { + if (AppData()->TimeProvider->Now() - PreviousGCTime < NYDBTest::TControllers::GetColumnShardController()->GetOverridenGCPeriod()) { ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep)("reason", "too_often"); BlobsManagerCounters.GCCounters.SkipCollectionThrottling->Add(1); return nullptr; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h index 90094e62b7a3..52e0f573eb60 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h @@ -133,9 +133,6 @@ struct TBlobManagerCounters { // The implementation of BlobManager that hides all GC-related details class TBlobManager : public IBlobManager, public TCommonBlobsTracker { -private: - static constexpr ui64 GC_INTERVAL_SECONDS = 30; - private: using TBlobAddress = NBlobOperations::NBlobStorage::TBlobAddress; class TGCContext; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/write.cpp b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp index 021abc972a0d..06ebc64eccf3 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp @@ -15,13 +15,10 @@ void TWriteAction::DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& self, co ui64 blobsWritten = BlobBatch.GetBlobCount(); ui64 bytesWritten = BlobBatch.GetTotalSize(); if (blobsWroteSuccessfully) { - self.IncCounter(NColumnShard::COUNTER_UPSERT_BLOBS_WRITTEN, blobsWritten); - self.IncCounter(NColumnShard::COUNTER_UPSERT_BYTES_WRITTEN, bytesWritten); - // self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_UPSERTED, insertedBytes); - self.IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS); + self.Counters.GetTabletCounters()->OnWriteSuccess(blobsWritten, bytesWritten); Manager->SaveBlobBatchOnComplete(std::move(BlobBatch)); } else { - self.IncCounter(NColumnShard::COUNTER_WRITE_FAIL); + self.Counters.GetTabletCounters()->OnWriteFailure(); } } diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h index 5ca66fe90a34..96d8f09e5e6d 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h @@ -5,10 +5,10 @@ namespace NKikimr::NColumnShard { class TTxInsertTableCleanup: public TTransactionBase { private: - THashSet WriteIdsToAbort; + THashSet WriteIdsToAbort; std::shared_ptr BlobsAction; public: - TTxInsertTableCleanup(TColumnShard* self, THashSet&& writeIdsToAbort) + TTxInsertTableCleanup(TColumnShard* self, THashSet&& writeIdsToAbort) : TBase(self) , WriteIdsToAbort(std::move(writeIdsToAbort)) { Y_ABORT_UNLESS(WriteIdsToAbort.size() || self->InsertTable->GetAborted().size()); diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp index 8e12cf1b8a63..96a5cf794190 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp @@ -1,13 +1,17 @@ #include "tx_write.h" +#include +#include + namespace NKikimr::NColumnShard { -bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId) { +bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TInsertWriteId writeId) { NKikimrTxColumnShard::TLogicalMetadata meta; meta.SetNumRows(batch->GetRowsCount()); meta.SetRawBytes(batch->GetRawBytes()); meta.SetDirtyWriteTimeSeconds(batch.GetStartInstant().Seconds()); - meta.SetSpecialKeysRawData(batch->GetSpecialKeysSafe()); + meta.SetSpecialKeysRawData(batch->GetSpecialKeysFullSafe()); + meta.SetSpecialKeysPayloadData(batch->GetSpecialKeysPayloadSafe()); const auto& blobRange = batch.GetRange(); Y_ABORT_UNLESS(blobRange.GetBlobId().IsValid()); @@ -22,9 +26,8 @@ bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali auto schemeVersion = batch.GetAggregation().GetSchemaVersion(); auto tableSchema = Self->TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchemaVerified(schemeVersion); - NOlap::TInsertedData insertData((ui64)writeId, writeMeta.GetTableId(), writeMeta.GetDedupId(), blobRange, - meta, tableSchema->GetVersion(), - batch->GetData()); + auto userData = std::make_shared(writeMeta.GetTableId(), blobRange, meta, tableSchema->GetVersion(), batch->GetData()); + NOlap::TInsertedData insertData(writeId, userData); bool ok = Self->InsertTable->Insert(dbTable, std::move(insertData)); if (ok) { Self->UpdateInsertTableCounters(); @@ -35,7 +38,8 @@ bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { TMemoryProfileGuard mpg("TTxWrite::Execute"); - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "execute"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "execute"); ACFL_DEBUG("event", "start_execute"); const NOlap::TWritingBuffer& buffer = PutBlobResult->Get()->MutableWritesBuffer(); for (auto&& aggr : buffer.GetAggregations()) { @@ -44,33 +48,27 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { txc.DB.NoMoreReadsForTx(); TWriteOperation::TPtr operation; if (writeMeta.HasLongTxId()) { + NIceDb::TNiceDb db(txc.DB); + const TInsertWriteId insertWriteId = + Self->GetLongTxWrite(db, writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId(), writeMeta.GetGranuleShardingVersion()); + aggr->AddInsertWriteId(insertWriteId); if (writeMeta.IsGuaranteeWriter()) { AFL_VERIFY(aggr->GetSplittedBlobs().size() == 1)("count", aggr->GetSplittedBlobs().size()); } else { AFL_VERIFY(aggr->GetSplittedBlobs().size() <= 1)("count", aggr->GetSplittedBlobs().size()); } + if (aggr->GetSplittedBlobs().size() == 1) { + AFL_VERIFY(InsertOneBlob(txc, aggr->GetSplittedBlobs().front(), insertWriteId))("write_id", writeMeta.GetWriteId())( + "insert_write_id", insertWriteId); + } } else { - operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); - Y_ABORT_UNLESS(operation); + operation = Self->OperationsManager->GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); - } - - auto writeId = TWriteId(writeMeta.GetWriteId()); - if (!operation) { - NIceDb::TNiceDb db(txc.DB); - writeId = Self->GetLongTxWrite(db, writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId(), writeMeta.GetGranuleShardingVersion()); - aggr->AddWriteId(writeId); - } - - for (auto&& i : aggr->GetSplittedBlobs()) { - if (operation) { - writeId = Self->BuildNextWriteId(txc); - aggr->AddWriteId(writeId); - } - - if (!InsertOneBlob(txc, i, writeId)) { - LOG_S_DEBUG(TxPrefix() << "duplicate writeId " << (ui64)writeId << TxSuffix()); - Self->IncCounter(COUNTER_WRITE_DUPLICATE); + for (auto&& i : aggr->GetSplittedBlobs()) { + const TInsertWriteId insertWriteId = Self->InsertTable->BuildNextWriteId(txc); + aggr->AddInsertWriteId(insertWriteId); + AFL_VERIFY(InsertOneBlob(txc, i, insertWriteId))("write_id", writeMeta.GetWriteId())("insert_write_id", insertWriteId)( + "size", aggr->GetSplittedBlobs().size()); } } } @@ -87,32 +85,41 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { for (auto&& aggr : buffer.GetAggregations()) { const auto& writeMeta = aggr->GetWriteMeta(); if (!writeMeta.HasLongTxId()) { - auto operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); - Y_ABORT_UNLESS(operation); + auto operation = Self->OperationsManager->GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); - operation->OnWriteFinish(txc, aggr->GetWriteIds()); - if (operation->GetBehaviour() == EOperationBehaviour::InTxWrite) { + operation->OnWriteFinish(txc, aggr->GetInsertWriteIds(), operation->GetBehaviour() == EOperationBehaviour::NoTxWrite); + Self->OperationsManager->LinkInsertWriteIdToOperationWriteId(aggr->GetInsertWriteIds(), operation->GetWriteId()); + if (operation->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID()); + Results.emplace_back(std::move(ev), writeMeta.GetSource(), operation->GetCookie()); + Self->OperationsManager->AddTemporaryTxLink(operation->GetLockId()); + Self->OperationsManager->CommitTransactionOnExecute(*Self, operation->GetLockId(), txc, Self->GetLastTxSnapshot()); + } else if (operation->GetBehaviour() == EOperationBehaviour::InTxWrite) { NKikimrTxColumnShard::TCommitWriteTxBody proto; proto.SetLockId(operation->GetLockId()); TString txBody; Y_ABORT_UNLESS(proto.SerializeToString(&txBody)); auto op = Self->GetProgressTxController().StartProposeOnExecute( - TTxController::TTxInfo(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, operation->GetLockId(), writeMeta.GetSource(), operation->GetCookie(), {}), txBody, - txc); + TTxController::TTxInfo( + NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, operation->GetLockId(), writeMeta.GetSource(), operation->GetCookie(), {}), + txBody, txc); AFL_VERIFY(!op->IsFail()); ResultOperators.emplace_back(op); } else { + auto& info = Self->OperationsManager->GetLockVerified(operation->GetLockId()); NKikimrDataEvents::TLock lock; lock.SetLockId(operation->GetLockId()); lock.SetDataShard(Self->TabletID()); - lock.SetGeneration(1); - lock.SetCounter(1); + lock.SetGeneration(info.GetGeneration()); + lock.SetCounter(info.GetInternalGenerationCounter()); + lock.SetPathId(writeMeta.GetTableId()); auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID(), operation->GetLockId(), lock); Results.emplace_back(std::move(ev), writeMeta.GetSource(), operation->GetCookie()); } } else { - Y_ABORT_UNLESS(aggr->GetWriteIds().size() == 1); - auto ev = std::make_unique(Self->TabletID(), writeMeta, (ui64)aggr->GetWriteIds().front(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + Y_ABORT_UNLESS(aggr->GetInsertWriteIds().size() == 1); + auto ev = std::make_unique( + Self->TabletID(), writeMeta, (ui64)aggr->GetInsertWriteIds().front(), NKikimrTxColumnShard::EResultStatus::SUCCESS); Results.emplace_back(std::move(ev), writeMeta.GetSource(), 0); } } @@ -121,7 +128,8 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { void TTxWrite::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxWrite::Complete"); - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "complete"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "complete"); const auto now = TMonotonic::Now(); const NOlap::TWritingBuffer& buffer = PutBlobResult->Get()->MutableWritesBuffer(); for (auto&& i : buffer.GetAddActions()) { @@ -140,10 +148,21 @@ void TTxWrite::Complete(const TActorContext& ctx) { } for (ui32 i = 0; i < buffer.GetAggregations().size(); ++i) { const auto& writeMeta = buffer.GetAggregations()[i]->GetWriteMeta(); - Self->CSCounters.OnWriteTxComplete(now - writeMeta.GetWriteStartInstant()); - Self->CSCounters.OnSuccessWriteResponse(); + if (!writeMeta.HasLongTxId()) { + auto op = Self->GetOperationsManager().GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); + if (op->GetBehaviour() == EOperationBehaviour::WriteWithLock || op->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + auto evWrite = std::make_shared(writeMeta.GetTableId(), + buffer.GetAggregations()[i]->GetRecordBatch(), Self->GetIndexOptional()->GetVersionedIndex().GetPrimaryKey()); + Self->GetOperationsManager().AddEventForLock(*Self, op->GetLockId(), evWrite); + } + if (op->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + Self->OperationsManager->CommitTransactionOnComplete(*Self, op->GetLockId(), Self->GetLastTxSnapshot()); + } + } + Self->Counters.GetCSCounters().OnWriteTxComplete(now - writeMeta.GetWriteStartInstant()); + Self->Counters.GetCSCounters().OnSuccessWriteResponse(); } - + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_IMMEDIATE_TX_COMPLETED); } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h index 98de301e5e16..84ffbe7a9005 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h @@ -43,7 +43,7 @@ class TTxWrite : public NTabletFlatExecutor::TTransactionBase { std::vector> ResultOperators; - bool InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId); + bool InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TInsertWriteId writeId); TStringBuilder TxPrefix() const { return TStringBuilder() << "TxWrite[" << ToString(TabletTxNo) << "] "; diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp index a04c6fb2abef..57a1eee50146 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp @@ -43,7 +43,7 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) LOG_S_ERROR(TxPrefix() << " (" << changes->TypeString() << ") cannot write index blobs" << TxSuffix()); } - Self->EnqueueProgressTx(ctx); + Self->EnqueueProgressTx(ctx, std::nullopt); return true; } diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index c4da7260562e..f3a6b9e99db9 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -1,14 +1,17 @@ #include "columnshard_impl.h" + +#include "bg_tasks/manager/manager.h" #include "blobs_reader/actor.h" +#include "counters/aggregation/table_stats.h" +#include "engines/column_engine_logs.h" +#include "engines/writer/buffer/actor.h" #include "hooks/abstract/abstract.h" #include "resource_subscriber/actor.h" -#include "engines/writer/buffer/actor.h" -#include "engines/column_engine_logs.h" -#include "bg_tasks/manager/manager.h" +#include "transactions/locks/read_finished.h" -#include -#include #include +#include +#include namespace NKikimr { @@ -16,7 +19,7 @@ IActor* CreateColumnShard(const TActorId& tablet, TTabletStorageInfo* info) { return new NColumnShard::TColumnShard(info, tablet); } -} +} // namespace NKikimr namespace NKikimr::NColumnShard { @@ -26,6 +29,9 @@ void TColumnShard::CleanupActors(const TActorContext& ctx) { } ctx.Send(ResourceSubscribeActor, new TEvents::TEvPoisonPill); ctx.Send(BufferizationWriteActorId, new TEvents::TEvPoisonPill); + for (auto&& i : ActorsToStop) { + ctx.Send(i, new TEvents::TEvPoisonPill); + } StoragesManager->Stop(); DataLocksManager->Stop(); @@ -43,7 +49,8 @@ void TColumnShard::BecomeBroken(const TActorContext& ctx) { void TColumnShard::SwitchToWork(const TActorContext& ctx) { { - const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); + const TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "SwitchToWork"); for (auto&& i : TablesManager.GetTables()) { @@ -54,24 +61,30 @@ void TColumnShard::SwitchToWork(const TActorContext& ctx) { SignalTabletActive(ctx); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "SignalTabletActive"); TryRegisterMediatorTimeCast(); - EnqueueProgressTx(ctx); + EnqueueProgressTx(ctx, std::nullopt); } - CSCounters.OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); + Counters.GetCSCounters().OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); EnqueueBackgroundActivities(); BackgroundSessionsManager->Start(); + ctx.Send(SelfId(), new NActors::TEvents::TEvWakeup()); ctx.Send(SelfId(), new TEvPrivate::TEvPeriodicWakeup()); + ctx.Send(SelfId(), new TEvPrivate::TEvPingSnapshotsUsage()); NYDBTest::TControllers::GetColumnShardController()->OnSwitchToWork(TabletID()); + AFL_VERIFY(!!StartInstant); + Counters.GetCSCounters().Initialization.OnSwitchToWork(TMonotonic::Now() - *StartInstant, TMonotonic::Now() - CreateInstant); } void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { - const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); + StartInstant = TMonotonic::Now(); + Counters.GetCSCounters().Initialization.OnActivateExecutor(TMonotonic::Now() - CreateInstant); + const TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "OnActivateExecutor"); - Executor()->RegisterExternalTabletCounters(TabletCountersPtr.release()); + Executor()->RegisterExternalTabletCounters(TabletCountersHolder.release()); const auto selfActorId = SelfId(); StoragesManager->Initialize(Executor()->Generation()); - Tiers = std::make_shared(TabletID(), SelfId(), - [selfActorId](const TActorContext& ctx) { + Tiers = std::make_shared(TabletID(), SelfId(), [selfActorId](const TActorContext& ctx) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_new_event"); ctx.Send(selfActorId, new TEvPrivate::TEvTieringModified); }); @@ -79,7 +92,8 @@ void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { if (!NMetadata::NProvider::TServiceOperator::IsEnabled()) { Tiers->TakeConfigs(NYDBTest::TControllers::GetColumnShardController()->GetFallbackTiersSnapshot(), nullptr); } - BackgroundSessionsManager = std::make_shared(std::make_shared(selfActorId, (NOlap::TTabletId)TabletID(), *this)); + BackgroundSessionsManager = std::make_shared( + std::make_shared(selfActorId, (NOlap::TTabletId)TabletID(), *this)); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "initialize_tiring_finished"); auto& icb = *AppData(ctx)->Icb; @@ -141,14 +155,14 @@ void TColumnShard::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const LOG_S_DEBUG("Server pipe reset at tablet " << TabletID()); } -void TColumnShard::Handle(TEvPrivate::TEvScanStats::TPtr& ev, const TActorContext &ctx) { +void TColumnShard::Handle(TEvPrivate::TEvScanStats::TPtr& ev, const TActorContext& ctx) { Y_UNUSED(ctx); - IncCounter(COUNTER_SCANNED_ROWS, ev->Get()->Rows); - IncCounter(COUNTER_SCANNED_BYTES, ev->Get()->Bytes); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCANNED_ROWS, ev->Get()->Rows); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCANNED_BYTES, ev->Get()->Bytes); } -void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorContext &ctx) { +void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { Y_UNUSED(ctx); ui64 readCookie = ev->Get()->RequestCookie; LOG_S_DEBUG("Finished read cookie: " << readCookie << " at tablet " << TabletID()); @@ -156,17 +170,28 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon if (HasIndex()) { index = &GetIndexAs().GetVersionedIndex(); } - InFlightReadsTracker.RemoveInFlightRequest(ev->Get()->RequestCookie, index); + + auto readMetaBase = InFlightReadsTracker.ExtractInFlightRequest(ev->Get()->RequestCookie, index, TInstant::Now()); + readMetaBase->OnReadFinished(*this); ui64 txId = ev->Get()->TxId; if (ScanTxInFlight.contains(txId)) { TDuration duration = TAppData::TimeProvider->Now() - ScanTxInFlight[txId]; - IncCounter(COUNTER_SCAN_LATENCY, duration); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCAN_LATENCY, duration); ScanTxInFlight.erase(txId); - SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetTabletCounters()->IncCounter(COUNTER_IMMEDIATE_TX_COMPLETED); } } +void TColumnShard::Handle(TEvPrivate::TEvPingSnapshotsUsage::TPtr& /*ev*/, const TActorContext& ctx) { + if (auto writeTx = + InFlightReadsTracker.Ping(this, NYDBTest::TControllers::GetColumnShardController()->GetPingCheckPeriod(), TInstant::Now())) { + Execute(writeTx.release(), ctx); + } + ctx.Schedule(0.3 * GetMaxReadStaleness(), new TEvPrivate::TEvPingSnapshotsUsage()); +} + void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx) { if (ev->Get()->Manual) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "TEvPrivate::TEvPeriodicWakeup::MANUAL")("tablet_id", TabletID()); @@ -176,10 +201,20 @@ void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorC SendWaitPlanStep(GetOutdatedStep()); SendPeriodicStats(); + EnqueueBackgroundActivities(); ctx.Schedule(PeriodicWakeupActivationPeriod, new TEvPrivate::TEvPeriodicWakeup()); } } +void TColumnShard::Handle(NActors::TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx) { + if (ev->Get()->Tag == 0) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "TEvPrivate::TEvPeriodicWakeup::MANUAL")("tablet_id", TabletID()); + const TMonotonic now = TMonotonic::Now(); + GetProgressTxController().PingTimeouts(now); + ctx.Schedule(TDuration::Seconds(1), new NActors::TEvents::TEvWakeup(0)); + } +} + void TColumnShard::Handle(TEvMediatorTimecast::TEvRegisterTabletResult::TPtr& ev, const TActorContext&) { const auto* msg = ev->Get(); Y_ABORT_UNLESS(msg->TabletId == TabletID()); @@ -213,14 +248,13 @@ void TColumnShard::UpdateInsertTableCounters() { auto& prepared = InsertTable->GetCountersPrepared(); auto& committed = InsertTable->GetCountersCommitted(); - SetCounter(COUNTER_PREPARED_RECORDS, prepared.Rows); - SetCounter(COUNTER_PREPARED_BYTES, prepared.Bytes); - SetCounter(COUNTER_COMMITTED_RECORDS, committed.Rows); - SetCounter(COUNTER_COMMITTED_BYTES, committed.Bytes); + Counters.GetTabletCounters()->SetCounter(COUNTER_PREPARED_RECORDS, prepared.Rows); + Counters.GetTabletCounters()->SetCounter(COUNTER_PREPARED_BYTES, prepared.Bytes); + Counters.GetTabletCounters()->SetCounter(COUNTER_COMMITTED_RECORDS, committed.Rows); + Counters.GetTabletCounters()->SetCounter(COUNTER_COMMITTED_BYTES, committed.Bytes); - LOG_S_TRACE("InsertTable. Prepared: " << prepared.Bytes << " in " << prepared.Rows - << " records, committed: " << committed.Bytes << " in " << committed.Rows - << " records at tablet " << TabletID()); + LOG_S_TRACE("InsertTable. Prepared: " << prepared.Bytes << " in " << prepared.Rows << " records, committed: " << committed.Bytes << " in " + << committed.Rows << " records at tablet " << TabletID()); } void TColumnShard::UpdateIndexCounters() { @@ -229,70 +263,63 @@ void TColumnShard::UpdateIndexCounters() { } auto& stats = TablesManager.MutablePrimaryIndex().GetTotalStats(); - SetCounter(COUNTER_INDEX_TABLES, stats.Tables); - SetCounter(COUNTER_INDEX_COLUMN_RECORDS, stats.ColumnRecords); - SetCounter(COUNTER_INSERTED_PORTIONS, stats.GetInsertedStats().Portions); - SetCounter(COUNTER_INSERTED_BLOBS, stats.GetInsertedStats().Blobs); - SetCounter(COUNTER_INSERTED_ROWS, stats.GetInsertedStats().Rows); - SetCounter(COUNTER_INSERTED_BYTES, stats.GetInsertedStats().Bytes); - SetCounter(COUNTER_INSERTED_RAW_BYTES, stats.GetInsertedStats().RawBytes); - SetCounter(COUNTER_COMPACTED_PORTIONS, stats.GetCompactedStats().Portions); - SetCounter(COUNTER_COMPACTED_BLOBS, stats.GetCompactedStats().Blobs); - SetCounter(COUNTER_COMPACTED_ROWS, stats.GetCompactedStats().Rows); - SetCounter(COUNTER_COMPACTED_BYTES, stats.GetCompactedStats().Bytes); - SetCounter(COUNTER_COMPACTED_RAW_BYTES, stats.GetCompactedStats().RawBytes); - SetCounter(COUNTER_SPLIT_COMPACTED_PORTIONS, stats.GetSplitCompactedStats().Portions); - SetCounter(COUNTER_SPLIT_COMPACTED_BLOBS, stats.GetSplitCompactedStats().Blobs); - SetCounter(COUNTER_SPLIT_COMPACTED_ROWS, stats.GetSplitCompactedStats().Rows); - SetCounter(COUNTER_SPLIT_COMPACTED_BYTES, stats.GetSplitCompactedStats().Bytes); - SetCounter(COUNTER_SPLIT_COMPACTED_RAW_BYTES, stats.GetSplitCompactedStats().RawBytes); - SetCounter(COUNTER_INACTIVE_PORTIONS, stats.GetInactiveStats().Portions); - SetCounter(COUNTER_INACTIVE_BLOBS, stats.GetInactiveStats().Blobs); - SetCounter(COUNTER_INACTIVE_ROWS, stats.GetInactiveStats().Rows); - SetCounter(COUNTER_INACTIVE_BYTES, stats.GetInactiveStats().Bytes); - SetCounter(COUNTER_INACTIVE_RAW_BYTES, stats.GetInactiveStats().RawBytes); - SetCounter(COUNTER_EVICTED_PORTIONS, stats.GetEvictedStats().Portions); - SetCounter(COUNTER_EVICTED_BLOBS, stats.GetEvictedStats().Blobs); - SetCounter(COUNTER_EVICTED_ROWS, stats.GetEvictedStats().Rows); - SetCounter(COUNTER_EVICTED_BYTES, stats.GetEvictedStats().Bytes); - SetCounter(COUNTER_EVICTED_RAW_BYTES, stats.GetEvictedStats().RawBytes); - - LOG_S_DEBUG("Index: tables " << stats.Tables - << " inserted " << stats.GetInsertedStats().DebugString() - << " compacted " << stats.GetCompactedStats().DebugString() - << " s-compacted " << stats.GetSplitCompactedStats().DebugString() - << " inactive " << stats.GetInactiveStats().DebugString() - << " evicted " << stats.GetEvictedStats().DebugString() - << " column records " << stats.ColumnRecords - << " at tablet " << TabletID()); + const std::shared_ptr& counters = Counters.GetTabletCounters(); + counters->SetCounter(COUNTER_INDEX_TABLES, stats.Tables); + counters->SetCounter(COUNTER_INDEX_COLUMN_RECORDS, stats.ColumnRecords); + counters->SetCounter(COUNTER_INSERTED_PORTIONS, stats.GetInsertedStats().Portions); + counters->SetCounter(COUNTER_INSERTED_BLOBS, stats.GetInsertedStats().Blobs); + counters->SetCounter(COUNTER_INSERTED_ROWS, stats.GetInsertedStats().Rows); + counters->SetCounter(COUNTER_INSERTED_BYTES, stats.GetInsertedStats().Bytes); + counters->SetCounter(COUNTER_INSERTED_RAW_BYTES, stats.GetInsertedStats().RawBytes); + counters->SetCounter(COUNTER_COMPACTED_PORTIONS, stats.GetCompactedStats().Portions); + counters->SetCounter(COUNTER_COMPACTED_BLOBS, stats.GetCompactedStats().Blobs); + counters->SetCounter(COUNTER_COMPACTED_ROWS, stats.GetCompactedStats().Rows); + counters->SetCounter(COUNTER_COMPACTED_BYTES, stats.GetCompactedStats().Bytes); + counters->SetCounter(COUNTER_COMPACTED_RAW_BYTES, stats.GetCompactedStats().RawBytes); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_PORTIONS, stats.GetSplitCompactedStats().Portions); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_BLOBS, stats.GetSplitCompactedStats().Blobs); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_ROWS, stats.GetSplitCompactedStats().Rows); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_BYTES, stats.GetSplitCompactedStats().Bytes); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_RAW_BYTES, stats.GetSplitCompactedStats().RawBytes); + counters->SetCounter(COUNTER_INACTIVE_PORTIONS, stats.GetInactiveStats().Portions); + counters->SetCounter(COUNTER_INACTIVE_BLOBS, stats.GetInactiveStats().Blobs); + counters->SetCounter(COUNTER_INACTIVE_ROWS, stats.GetInactiveStats().Rows); + counters->SetCounter(COUNTER_INACTIVE_BYTES, stats.GetInactiveStats().Bytes); + counters->SetCounter(COUNTER_INACTIVE_RAW_BYTES, stats.GetInactiveStats().RawBytes); + counters->SetCounter(COUNTER_EVICTED_PORTIONS, stats.GetEvictedStats().Portions); + counters->SetCounter(COUNTER_EVICTED_BLOBS, stats.GetEvictedStats().Blobs); + counters->SetCounter(COUNTER_EVICTED_ROWS, stats.GetEvictedStats().Rows); + counters->SetCounter(COUNTER_EVICTED_BYTES, stats.GetEvictedStats().Bytes); + counters->SetCounter(COUNTER_EVICTED_RAW_BYTES, stats.GetEvictedStats().RawBytes); + + LOG_S_DEBUG("Index: tables " << stats.Tables << " inserted " << stats.GetInsertedStats().DebugString() << " compacted " + << stats.GetCompactedStats().DebugString() << " s-compacted " << stats.GetSplitCompactedStats().DebugString() + << " inactive " << stats.GetInactiveStats().DebugString() << " evicted " + << stats.GetEvictedStats().DebugString() << " column records " << stats.ColumnRecords << " at tablet " + << TabletID()); } ui64 TColumnShard::MemoryUsage() const { - ui64 memory = - ProgressTxController->GetMemoryUsage() + - ScanTxInFlight.size() * (sizeof(ui64) + sizeof(TInstant)) + - LongTxWrites.size() * (sizeof(TWriteId) + sizeof(TLongTxWriteInfo)) + - LongTxWritesByUniqueId.size() * (sizeof(TULID) + sizeof(void*)) + - (WaitingScans.size()) * (sizeof(NOlap::TSnapshot) + sizeof(void*)) + - TabletCounters->Simple()[COUNTER_PREPARED_RECORDS].Get() * sizeof(NOlap::TInsertedData) + - TabletCounters->Simple()[COUNTER_COMMITTED_RECORDS].Get() * sizeof(NOlap::TInsertedData); + ui64 memory = ProgressTxController->GetMemoryUsage() + ScanTxInFlight.size() * (sizeof(ui64) + sizeof(TInstant)) + + LongTxWrites.size() * (sizeof(TInsertWriteId) + sizeof(TLongTxWriteInfo)) + + LongTxWritesByUniqueId.size() * (sizeof(TULID) + sizeof(void*)) + + (WaitingScans.size()) * (sizeof(NOlap::TSnapshot) + sizeof(void*)) + + Counters.GetTabletCounters()->GetValue(COUNTER_PREPARED_RECORDS) * sizeof(NOlap::TInsertedData) + + Counters.GetTabletCounters()->GetValue(COUNTER_COMMITTED_RECORDS) * sizeof(NOlap::TInsertedData); memory += TablesManager.GetMemoryUsage(); return memory; } void TColumnShard::UpdateResourceMetrics(const TActorContext& ctx, const TUsage& usage) { - auto * metrics = Executor()->GetResourceMetrics(); + auto* metrics = Executor()->GetResourceMetrics(); if (!metrics) { return; } ui64 storageBytes = - TabletCounters->Simple()[COUNTER_PREPARED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_COMMITTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_INSERTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_COMPACTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_SPLIT_COMPACTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_INACTIVE_BYTES].Get(); + Counters.GetTabletCounters()->GetValue(COUNTER_PREPARED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_COMMITTED_BYTES) + + Counters.GetTabletCounters()->GetValue(COUNTER_INSERTED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_COMPACTED_BYTES) + + Counters.GetTabletCounters()->GetValue(COUNTER_SPLIT_COMPACTED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_INACTIVE_BYTES); ui64 memory = MemoryUsage(); @@ -308,35 +335,6 @@ void TColumnShard::UpdateResourceMetrics(const TActorContext& ctx, const TUsage& metrics->TryUpdate(ctx); } -void TColumnShard::ConfigureStats(const NOlap::TColumnEngineStats& indexStats, - ::NKikimrTableStats::TTableStats* tabletStats) { - NOlap::TSnapshot lastIndexUpdate = TablesManager.GetPrimaryIndexSafe().LastUpdate(); - auto activeIndexStats = indexStats.Active(); // data stats excluding inactive and evicted - - if (activeIndexStats.Rows < 0 || activeIndexStats.Bytes < 0) { - LOG_S_WARN("Negative stats counter. Rows: " << activeIndexStats.Rows << " Bytes: " << activeIndexStats.Bytes - << TabletID()); - - activeIndexStats.Rows = (activeIndexStats.Rows < 0) ? 0 : activeIndexStats.Rows; - activeIndexStats.Bytes = (activeIndexStats.Bytes < 0) ? 0 : activeIndexStats.Bytes; - } - - tabletStats->SetRowCount(activeIndexStats.Rows); - tabletStats->SetDataSize(activeIndexStats.Bytes + TabletCounters->Simple()[COUNTER_COMMITTED_BYTES].Get()); - - // TODO: we need row/dataSize counters for evicted data (managed by tablet but stored outside) - // tabletStats->SetIndexSize(); // TODO: calc size of internal tables - - tabletStats->SetLastAccessTime(LastAccessTime.MilliSeconds()); - tabletStats->SetLastUpdateTime(lastIndexUpdate.GetPlanStep()); -} - -void TColumnShard::FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const { - tableStats->SetTxRejectedByOverload(TabletCounters->Cumulative()[COUNTER_WRITE_OVERLOAD].Get()); - tableStats->SetTxRejectedBySpace(TabletCounters->Cumulative()[COUNTER_OUT_OF_SPACE].Get()); - tableStats->SetInFlightTxCount(Executor()->GetStats().TxInFly); -} - void TColumnShard::FillOlapStats(const TActorContext& ctx, std::unique_ptr& ev) { ev->Record.SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready ev->Record.SetGeneration(Executor()->Generation()); @@ -346,30 +344,24 @@ void TColumnShard::FillOlapStats(const TActorContext& ctx, std::unique_ptrGetResourceMetrics()) { resourceMetrics->Fill(*ev->Record.MutableTabletMetrics()); } - auto* tabletStats = ev->Record.MutableTableStats(); - FillTxTableStats(tabletStats); + if (TablesManager.HasPrimaryIndex()) { - const auto& indexStats = TablesManager.MutablePrimaryIndex().GetTotalStats(); - ConfigureStats(indexStats, tabletStats); + TTableStatsBuilder statsBuilder(Counters, Executor(), TablesManager.MutablePrimaryIndex()); + statsBuilder.FillTotalTableStats(*ev->Record.MutableTableStats()); } } -void TColumnShard::FillColumnTableStats(const TActorContext& ctx, - std::unique_ptr& ev) { - if (!TablesManager.HasPrimaryIndex()) { - return; - } - const auto& tablesIndexStats = TablesManager.MutablePrimaryIndex().GetStats(); - LOG_S_DEBUG("There are stats for " << tablesIndexStats.size() << " tables"); - for (const auto& [tableLocalID, columnStats] : tablesIndexStats) { - if (!columnStats) { - LOG_S_ERROR("SendPeriodicStats: empty stats"); - continue; - } +void TColumnShard::FillColumnTableStats(const TActorContext& ctx, std::unique_ptr& ev) { + auto tables = TablesManager.GetTables(); + std::optional tableStatsBuilder = + TablesManager.HasPrimaryIndex() ? std::make_optional(Counters, Executor(), TablesManager.MutablePrimaryIndex()) + : std::nullopt; + LOG_S_DEBUG("There are stats for " << tables.size() << " tables"); + for (const auto& [pathId, _] : tables) { auto* periodicTableStats = ev->Record.AddTables(); periodicTableStats->SetDatashardId(TabletID()); - periodicTableStats->SetTableLocalId(tableLocalID); + periodicTableStats->SetTableLocalId(pathId); periodicTableStats->SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready periodicTableStats->SetGeneration(Executor()->Generation()); @@ -381,11 +373,11 @@ void TColumnShard::FillColumnTableStats(const TActorContext& ctx, resourceMetrics->Fill(*periodicTableStats->MutableTabletMetrics()); } - auto* tableStats = periodicTableStats->MutableTableStats(); - FillTxTableStats(tableStats); - ConfigureStats(*columnStats, tableStats); + if (tableStatsBuilder) { + tableStatsBuilder->FillTableStats(pathId, *(periodicTableStats->MutableTableStats())); + } - LOG_S_TRACE("Add stats for table, tableLocalID=" << tableLocalID); + LOG_S_TRACE("Add stats for table, tableLocalID=" << pathId); } } diff --git a/ydb/core/tx/columnshard/columnshard.h b/ydb/core/tx/columnshard/columnshard.h index 90ae1fd8c6b7..186d665153af 100644 --- a/ydb/core/tx/columnshard/columnshard.h +++ b/ydb/core/tx/columnshard/columnshard.h @@ -98,6 +98,7 @@ struct TEvColumnShard { struct TEvInternalScan: public TEventLocal { private: YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::optional, LockId); YDB_ACCESSOR(bool, Reverse, false); YDB_ACCESSOR(ui32, ItemsLimit, 0); YDB_READONLY_DEF(std::vector, ColumnIds); @@ -116,8 +117,9 @@ struct TEvColumnShard { ColumnNames.emplace_back(columnName); } - TEvInternalScan(const ui64 pathId) + TEvInternalScan(const ui64 pathId, const std::optional lockId) : PathId(pathId) + , LockId(lockId) { } diff --git a/ydb/core/tx/columnshard/columnshard__init.cpp b/ydb/core/tx/columnshard/columnshard__init.cpp index aadee209f6dd..09cf1f4ef71f 100644 --- a/ydb/core/tx/columnshard/columnshard__init.cpp +++ b/ydb/core/tx/columnshard/columnshard__init.cpp @@ -19,6 +19,9 @@ namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; class TTxInit : public TTransactionBase { +private: + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxInit(TColumnShard* self) : TBase(self) @@ -38,7 +41,6 @@ void TTxInit::SetDefaults() { Self->CurrentSchemeShardId = 0; Self->LastSchemaSeqNo = { }; Self->ProcessingParams.reset(); - Self->LastWriteId = TWriteId{0}; Self->LastPlannedStep = 0; Self->LastPlannedTxId = 0; Self->LastCompletedTx = NOlap::TSnapshot::Zero(); @@ -70,7 +72,6 @@ bool TTxInit::Precharge(TTransactionContext& txc) { ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastSchemaSeqNoGeneration, Self->LastSchemaSeqNo.Generation); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastSchemaSeqNoRound, Self->LastSchemaSeqNo.Round); ready = ready && Schema::GetSpecialProtoValue(db, Schema::EValueIds::ProcessingParams, Self->ProcessingParams); - ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastWriteId, Self->LastWriteId); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastPlannedStep, Self->LastPlannedStep); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastPlannedTxId, Self->LastPlannedTxId); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastExportNumber, Self->LastExportNo); @@ -104,7 +105,7 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) ACFL_DEBUG("step", "TInsertTable::Load_Start"); TMemoryProfileGuard g("TTxInit/InsertTable"); auto localInsertTable = std::make_unique(); - if (!localInsertTable->Load(dbTable, TAppData::TimeProvider->Now())) { + if (!localInsertTable->Load(db, dbTable, TAppData::TimeProvider->Now())) { ACFL_ERROR("step", "TInsertTable::Load_Fails"); return false; } @@ -165,9 +166,9 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } Self->TablesManager = std::move(tManagerLocal); - Self->SetCounter(COUNTER_TABLES, Self->TablesManager.GetTables().size()); - Self->SetCounter(COUNTER_TABLE_PRESETS, Self->TablesManager.GetSchemaPresets().size()); - Self->SetCounter(COUNTER_TABLE_TTLS, Self->TablesManager.GetTtl().PathsCount()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLES, Self->TablesManager.GetTables().size()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_PRESETS, Self->TablesManager.GetSchemaPresets().size()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_TTLS, Self->TablesManager.GetTtl().PathsCount()); ACFL_DEBUG("step", "TTablesManager::Load_Finish"); } @@ -179,7 +180,7 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } while (!rowset.EndOfSet()) { - const TWriteId writeId = TWriteId{ rowset.GetValue() }; + const TInsertWriteId writeId = (TInsertWriteId)rowset.GetValue(); const ui32 writePartId = rowset.GetValue(); NKikimrLongTxService::TLongTxId proto; Y_ABORT_UNLESS(proto.ParseFromString(rowset.GetValue())); @@ -222,6 +223,14 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } Self->SharingSessionsManager = local; } + { + TMemoryProfileGuard g("TTxInit/TInFlightReadsTracker"); + TInFlightReadsTracker local(Self->StoragesManager, Self->Counters.GetRequestsTracingCounters()); + if (!local.LoadFromDatabase(txc.DB)) { + return false; + } + Self->InFlightReadsTracker = std::move(local); + } Self->UpdateInsertTableCounters(); Self->UpdateIndexCounters(); @@ -250,7 +259,7 @@ bool TTxInit::Execute(TTransactionContext& txc, const TActorContext& ctx) { } void TTxInit::Complete(const TActorContext& ctx) { - Self->ProgressTxController->StartOperators(); + Self->Counters.GetCSCounters().Initialization.OnTxInitFinished(TMonotonic::Now() - StartInstant); Self->ProgressTxController->OnTabletInit(); Self->SwitchToWork(ctx); NYDBTest::TControllers::GetColumnShardController()->OnTabletInitCompleted(*Self); @@ -258,6 +267,8 @@ void TTxInit::Complete(const TActorContext& ctx) { class TTxUpdateSchema : public TTransactionBase { std::vector NormalizerTasks; + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxUpdateSchema(TColumnShard* self) : TBase(self) @@ -296,6 +307,7 @@ bool TTxUpdateSchema::Execute(TTransactionContext& txc, const TActorContext&) { void TTxUpdateSchema::Complete(const TActorContext& ctx) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("step", "TTxUpdateSchema.Complete"); + Self->Counters.GetCSCounters().Initialization.OnTxUpdateSchemaFinished(TMonotonic::Now() - StartInstant); if (NormalizerTasks.empty()) { AFL_VERIFY(Self->NormalizerController.IsNormalizationFinished())("details", Self->NormalizerController.DebugString()); Self->Execute(new TTxInit(Self), ctx); @@ -361,6 +373,9 @@ void TTxApplyNormalizer::Complete(const TActorContext& ctx) { /// Create local database on tablet start if none class TTxInitSchema : public TTransactionBase { +private: + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxInitSchema(TColumnShard* self) : TBase(self) @@ -423,6 +438,7 @@ bool TTxInitSchema::Execute(TTransactionContext& txc, const TActorContext&) { } void TTxInitSchema::Complete(const TActorContext& ctx) { + Self->Counters.GetCSCounters().Initialization.OnTxInitSchemaFinished(TMonotonic::Now() - StartInstant); LOG_S_DEBUG("TxInitSchema.Complete at tablet " << Self->TabletID();); Self->Execute(new TTxUpdateSchema(Self), ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp b/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp index cc498125c326..883844cc0357 100644 --- a/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp +++ b/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp @@ -14,7 +14,7 @@ class TTxNotifyTxCompletion : public TTransactionBase { LOG_S_DEBUG("TTxNotifyTxCompletion.Execute at tablet " << Self->TabletID()); const ui64 txId = Ev->Get()->Record.GetTxId(); - auto txOperator = Self->ProgressTxController->GetTxOperator(txId); + auto txOperator = Self->ProgressTxController->GetTxOperatorOptional(txId); if (txOperator) { txOperator->RegisterSubscriber(Ev->Sender); return true; diff --git a/ydb/core/tx/columnshard/columnshard__plan_step.cpp b/ydb/core/tx/columnshard/columnshard__plan_step.cpp index 80dda8112e0b..33420df5905c 100644 --- a/ydb/core/tx/columnshard/columnshard__plan_step.cpp +++ b/ydb/core/tx/columnshard/columnshard__plan_step.cpp @@ -102,10 +102,10 @@ bool TTxPlanStep::Execute(TTransactionContext& txc, const TActorContext& ctx) { Result = std::make_unique(Self->TabletID(), step); - Self->IncCounter(COUNTER_PLAN_STEP_ACCEPTED); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PLAN_STEP_ACCEPTED); if (plannedCount > 0 || Self->ProgressTxController->HaveOutdatedTxs()) { - Self->EnqueueProgressTx(ctx); + Self->EnqueueProgressTx(ctx, std::nullopt); } return true; } diff --git a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp index e7d90c111148..73a4a0200d97 100644 --- a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp +++ b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp @@ -14,6 +14,8 @@ class TColumnShard::TTxProgressTx: public TTransactionBase { const ui32 TabletTxNo; std::optional LastCompletedTx; std::optional PlannedQueueItem; + std::optional StartExecution; + const TMonotonic ConstructionInstant = TMonotonic::Now(); public: TTxProgressTx(TColumnShard* self) @@ -29,7 +31,7 @@ class TColumnShard::TTxProgressTx: public TTransactionBase { NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); Y_ABORT_UNLESS(Self->ProgressTxInFlight); - Self->TabletCounters->Simple()[COUNTER_TX_COMPLETE_LAG].Set(Self->GetTxCompleteLag().MilliSeconds()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TX_COMPLETE_LAG, Self->GetTxCompleteLag().MilliSeconds()); const size_t removedCount = Self->ProgressTxController->CleanExpiredTxs(txc); if (removedCount > 0) { @@ -40,11 +42,22 @@ class TColumnShard::TTxProgressTx: public TTransactionBase { } // Process a single transaction at the front of the queue - auto plannedItem = Self->ProgressTxController->StartPlannedTx(); + const auto plannedItem = Self->ProgressTxController->GetFirstPlannedTx(); if (!!plannedItem) { PlannedQueueItem.emplace(plannedItem->PlanStep, plannedItem->TxId); ui64 step = plannedItem->PlanStep; ui64 txId = plannedItem->TxId; + TxOperator = Self->ProgressTxController->GetTxOperatorVerified(txId); + if (auto txPrepare = TxOperator->BuildTxPrepareForProgress(Self)) { + AbortedThroughRemoveExpired = true; + Self->ProgressTxInFlight = txId; + Self->Execute(txPrepare.release(), ctx); + return true; + } else { + Self->ProgressTxController->PopFirstPlannedTx(); + } + StartExecution = TMonotonic::Now(); + LastCompletedTx = NOlap::TSnapshot(step, txId); if (LastCompletedTx > Self->LastCompletedTx) { NIceDb::TNiceDb db(txc.DB); @@ -52,9 +65,13 @@ class TColumnShard::TTxProgressTx: public TTransactionBase { Schema::SaveSpecialValue(db, Schema::EValueIds::LastCompletedTxId, LastCompletedTx->GetTxId()); } - TxOperator = Self->ProgressTxController->GetVerifiedTxOperator(txId); AFL_VERIFY(TxOperator->ProgressOnExecute(*Self, NOlap::TSnapshot(step, txId), txc)); - Self->ProgressTxController->FinishPlannedTx(txId, txc); + Self->ProgressTxController->ProgressOnExecute(txId, txc); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PLANNED_TX_COMPLETED); + } + Self->ProgressTxInFlight = std::nullopt; + if (!!Self->ProgressTxController->GetPlannedTx()) { + Self->EnqueueProgressTx(ctx, std::nullopt); } return true; } @@ -70,23 +87,29 @@ class TColumnShard::TTxProgressTx: public TTransactionBase { Self->RescheduleWaitingReads(); } if (PlannedQueueItem) { - Self->GetProgressTxController().CompleteRunningTx(*PlannedQueueItem); + AFL_VERIFY(TxOperator); + Self->GetProgressTxController().GetCounters().OnTxProgressLag( + TxOperator->GetOpType(), TMonotonic::Now() - TMonotonic::MilliSeconds(PlannedQueueItem->Step)); + Self->GetProgressTxController().ProgressOnComplete(*PlannedQueueItem); } if (LastCompletedTx) { Self->LastCompletedTx = std::max(*LastCompletedTx, Self->LastCompletedTx); } - Self->ProgressTxInFlight = false; - if (!!Self->ProgressTxController->GetPlannedTx()) { - Self->EnqueueProgressTx(ctx); + if (StartExecution) { + Self->GetProgressTxController().GetCounters().OnTxExecuteDuration(TxOperator->GetOpType(), TMonotonic::Now() - *StartExecution); + Self->GetProgressTxController().GetCounters().OnTxLiveDuration(TxOperator->GetOpType(), TMonotonic::Now() - ConstructionInstant); } Self->SetupIndexation(); } }; -void TColumnShard::EnqueueProgressTx(const TActorContext& ctx) { +void TColumnShard::EnqueueProgressTx(const TActorContext& ctx, const std::optional continueTxId) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "EnqueueProgressTx")("tablet_id", TabletID()); - if (!ProgressTxInFlight) { - ProgressTxInFlight = true; + if (continueTxId) { + AFL_VERIFY(!ProgressTxInFlight || ProgressTxInFlight == continueTxId)("current", ProgressTxInFlight)("expected", continueTxId); + } + if (!ProgressTxInFlight || ProgressTxInFlight == continueTxId) { + ProgressTxInFlight = continueTxId.value_or(0); Execute(new TTxProgressTx(this), ctx); } } diff --git a/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp b/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp index d42ad4fc8d4f..d4ded82be3d8 100644 --- a/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp +++ b/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp @@ -25,13 +25,14 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseIncCounter(COUNTER_PREPARE_REQUEST); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_REQUEST); auto& record = Proto(Ev->Get()); const auto txKind = record.GetTxKind(); const ui64 txId = record.GetTxId(); const auto& txBody = record.GetTxBody(); - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())("tx_id", txId)("this", (ui64)this); + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())("tx_id", txId)("this", (ui64)this); if (txKind == NKikimrTxColumnShard::TX_KIND_TTL) { auto proposeResult = ProposeTtlDeprecated(txBody); @@ -51,7 +52,7 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseCurrentSchemeShardId = record.GetSchemeShardId(); Schema::SaveSpecialValue(db, Schema::EValueIds::CurrentSchemeShardId, Self->CurrentSchemeShardId); } else { - Y_ABORT_UNLESS(Self->CurrentSchemeShardId == record.GetSchemeShardId()); + AFL_VERIFY(Self->CurrentSchemeShardId == record.GetSchemeShardId()); } } std::optional msgSeqNo; @@ -79,28 +80,34 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseTabletID())("request_tx", TxInfo->DebugString())( - "this", (ui64)this)("op_tx", TxOperator->GetTxInfo().DebugString()); + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())( + "request_tx", TxInfo->DebugString())("this", (ui64)this)("op_tx", TxOperator->GetTxInfo().DebugString()); + + Self->TryRegisterMediatorTimeCast(); if (TxOperator->IsFail()) { TxOperator->SendReply(*Self, ctx); + return; + } + auto internalOp = Self->GetProgressTxController().GetTxOperatorOptional(txId); + if (!internalOp) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "removed tx operator"); + return; + } + NActors::TLogContextGuard lGuardTx = + NActors::TLogContextBuilder::Build()("int_op_tx", internalOp->GetTxInfo().DebugString())("int_this", (ui64)internalOp.get()); + if (!internalOp->CheckTxInfoForReply(*TxInfo)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "deprecated tx operator"); + return; + } + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "actual tx operator"); + if (internalOp->IsAsync()) { + Self->GetProgressTxController().StartProposeOnComplete(*internalOp, ctx); } else { - auto internalOp = Self->GetProgressTxController().GetVerifiedTxOperator(TxOperator->GetTxId()); - NActors::TLogContextGuard lGuardTx = NActors::TLogContextBuilder::Build()("int_op_tx", internalOp->GetTxInfo().DebugString()); - if (!TxOperator->CheckTxInfoForReply(*TxInfo)) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "deprecated tx operator"); - return; - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "actual tx operator"); - } - if (TxOperator->IsAsync()) { - Self->GetProgressTxController().StartProposeOnComplete(txId, ctx); - } else { - Self->GetProgressTxController().FinishProposeOnComplete(txId, ctx); - } + Self->GetProgressTxController().FinishProposeOnComplete(*internalOp, ctx); } - Self->TryRegisterMediatorTimeCast(); } TTxType GetTxType() const override { @@ -138,11 +145,14 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseTablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()->GetSchema(); - auto ttlColumn = schema->GetFieldByName(columnName); - if (!ttlColumn) { - return TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "TTL tx wrong TTL column '" + columnName + "'"); + auto schemaSnapshot = Self->TablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema(); + auto schema = schemaSnapshot->GetSchema(); + auto index = schemaSnapshot->GetColumnIdOptional(columnName); + if (!index) { + return TTxController::TProposeResult( + NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "TTL tx wrong TTL column '" + columnName + "'"); } + auto ttlColumn = schemaSnapshot->GetFieldByColumnIdVerified(*index); const TInstant now = TlsActivationContext ? AppData()->TimeProvider->Now() : TInstant::Now(); for (ui64 pathId : ttlBody.GetPathIds()) { diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index a749e3c4f571..dd60f823139e 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -5,6 +5,7 @@ #include "engines/reader/transaction/tx_internal_scan.h" #include +#include namespace NKikimr::NColumnShard { @@ -29,9 +30,9 @@ void TColumnShard::Handle(TEvColumnShard::TEvScan::TPtr& ev, const TActorContext return; } - LastAccessTime = TAppData::TimeProvider->Now(); - ScanTxInFlight.insert({txId, LastAccessTime}); - SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetColumnTablesCounters()->GetPathIdCounter(record.GetLocalPathId())->OnReadEvent(); + ScanTxInFlight.insert({txId, TAppData::TimeProvider->Now()}); + Counters.GetTabletCounters()->SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); Execute(new NOlap::NReader::TTxScan(this, ev), ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index c7a59351c639..58c521e31dae 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -1,9 +1,16 @@ #include "columnshard_impl.h" -#include "blobs_action/transaction/tx_write.h" + #include "blobs_action/transaction/tx_draft.h" +#include "blobs_action/transaction/tx_write.h" +#include "common/limits.h" #include "counters/columnshard.h" +#include "engines/column_engine_logs.h" #include "operations/batch_builder/builder.h" +#include "operations/manager.h" #include "operations/write_data.h" +#include "transactions/operators/ev_write/primary.h" +#include "transactions/operators/ev_write/secondary.h" +#include "transactions/operators/ev_write/sync.h" #include #include @@ -12,40 +19,36 @@ namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; -void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx) { - IncCounter(COUNTER_WRITE_FAIL); +void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, + std::unique_ptr&& event, const TActorContext& ctx) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); switch (overloadReason) { case EOverloadStatus::Disk: - IncCounter(COUNTER_OUT_OF_SPACE); + Counters.OnWriteOverloadDisk(); break; case EOverloadStatus::InsertTable: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadInsertTable(writeData.GetSize()); + Counters.OnWriteOverloadInsertTable(writeSize); break; case EOverloadStatus::OverloadMetadata: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadMetadata(writeData.GetSize()); + Counters.OnWriteOverloadMetadata(writeSize); break; case EOverloadStatus::ShardTxInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardTx(writeData.GetSize()); + Counters.OnWriteOverloadShardTx(writeSize); break; case EOverloadStatus::ShardWritesInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardWrites(writeData.GetSize()); + Counters.OnWriteOverloadShardWrites(writeSize); break; case EOverloadStatus::ShardWritesSizeInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardWritesSize(writeData.GetSize()); + Counters.OnWriteOverloadShardWritesSize(writeSize); break; case EOverloadStatus::None: Y_ABORT("invalid function usage"); } - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "write_overload")("size", writeData.GetSize()) - ("path_id", writeData.GetWriteMeta().GetTableId())("reason", overloadReason); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "write_overload")("size", writeSize)("path_id", writeMeta.GetTableId())( + "reason", overloadReason); - ctx.Send(writeData.GetWriteMeta().GetSource(), event.release(), 0, cookie); + ctx.Send(writeMeta.GetSource(), event.release(), 0, cookie); } TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) const { @@ -57,7 +60,7 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) return EOverloadStatus::InsertTable; } - CSCounters.OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); + Counters.GetCSCounters().OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); if (TablesManager.GetPrimaryIndex() && TablesManager.GetPrimaryIndex()->IsOverloadedByMetadata(NOlap::IColumnEngine::GetMetadataLimit())) { return EOverloadStatus::OverloadMetadata; } @@ -66,22 +69,26 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) ui64 writesLimit = Settings.OverloadWritesInFlight; ui64 writesSizeLimit = Settings.OverloadWritesSizeInFlight; if (txLimit && Executor()->GetStats().TxInFly > txLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "tx_in_fly")("sum", Executor()->GetStats().TxInFly)("limit", txLimit); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "tx_in_fly")("sum", Executor()->GetStats().TxInFly)( + "limit", txLimit); return EOverloadStatus::ShardTxInFly; } - if (writesLimit && WritesMonitor.GetWritesInFlight() > writesLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_in_fly")("sum", WritesMonitor.GetWritesInFlight())("limit", writesLimit); + if (writesLimit && Counters.GetWritesMonitor()->GetWritesInFlight() > writesLimit) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_in_fly")( + "sum", Counters.GetWritesMonitor()->GetWritesInFlight())("limit", writesLimit); return EOverloadStatus::ShardWritesInFly; } - if (writesSizeLimit && WritesMonitor.GetWritesSizeInFlight() > writesSizeLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_size_in_fly")("sum", WritesMonitor.GetWritesSizeInFlight())("limit", writesSizeLimit); + if (writesSizeLimit && Counters.GetWritesMonitor()->GetWritesSizeInFlight() > writesSizeLimit) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_size_in_fly")( + "sum", Counters.GetWritesMonitor()->GetWritesSizeInFlight())("limit", writesSizeLimit); return EOverloadStatus::ShardWritesSizeInFly; } return EOverloadStatus::None; } void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActorContext& ctx) { - NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); + NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); auto& putResult = ev->Get()->GetPutResult(); OnYellowChannels(putResult); @@ -89,25 +96,25 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo auto baseAggregations = wBuffer.GetAggregations(); wBuffer.InitReplyReceived(TMonotonic::Now()); - auto wg = WritesMonitor.FinishWrite(wBuffer.GetSumSize(), wBuffer.GetAggregations().size()); + Counters.GetWritesMonitor()->OnFinishWrite(wBuffer.GetSumSize(), wBuffer.GetAggregations().size()); for (auto&& aggr : baseAggregations) { const auto& writeMeta = aggr->GetWriteMeta(); if (!TablesManager.IsReadyForWrite(writeMeta.GetTableId())) { ACFL_ERROR("event", "absent_pathId")("path_id", writeMeta.GetTableId())("has_index", TablesManager.HasPrimaryIndex()); - IncCounter(COUNTER_WRITE_FAIL); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto result = std::make_unique(TabletID(), writeMeta, NKikimrTxColumnShard::EResultStatus::ERROR); ctx.Send(writeMeta.GetSource(), result.release()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::NoTable); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::NoTable); wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator()); continue; } if (putResult.GetPutStatus() != NKikimrProto::OK) { - CSCounters.OnWritePutBlobsFail(TMonotonic::Now() - writeMeta.GetWriteStartInstant()); - IncCounter(COUNTER_WRITE_FAIL); + Counters.GetCSCounters().OnWritePutBlobsFail(TMonotonic::Now() - writeMeta.GetWriteStartInstant()); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto errCode = NKikimrTxColumnShard::EResultStatus::STORAGE_ERROR; if (putResult.GetPutStatus() == NKikimrProto::TIMEOUT || putResult.GetPutStatus() == NKikimrProto::DEADLINE) { @@ -122,26 +129,26 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo auto result = std::make_unique(TabletID(), writeMeta, errCode); ctx.Send(writeMeta.GetSource(), result.release()); } else { - auto operation = OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); + auto operation = OperationsManager->GetOperation((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), operation->GetLockId(), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, - ev->Get()->GetErrorMessage() ? ev->Get()->GetErrorMessage() : "put data fails"); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), operation->GetLockId(), + ev->Get()->GetWriteResultStatus(), ev->Get()->GetErrorMessage() ? ev->Get()->GetErrorMessage() : "put data fails"); ctx.Send(writeMeta.GetSource(), result.release(), 0, operation->GetCookie()); } - CSCounters.OnFailedWriteResponse(EWriteFailReason::PutBlob); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::PutBlob); wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator()); } else { const TMonotonic now = TMonotonic::Now(); - CSCounters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant()); - CSCounters.OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant()); - CSCounters.OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant()); - CSCounters.OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant()); - CSCounters.OnWriteMiddle4PutBlobsSuccess(now - writeMeta.GetWriteMiddle4StartInstant()); - CSCounters.OnWriteMiddle5PutBlobsSuccess(now - writeMeta.GetWriteMiddle5StartInstant()); - CSCounters.OnWriteMiddle6PutBlobsSuccess(now - writeMeta.GetWriteMiddle6StartInstant()); + Counters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant(), aggr->GetRows()); + Counters.GetCSCounters().OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant()); + Counters.GetCSCounters().OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant()); + Counters.GetCSCounters().OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant()); + Counters.GetCSCounters().OnWriteMiddle4PutBlobsSuccess(now - writeMeta.GetWriteMiddle4StartInstant()); + Counters.GetCSCounters().OnWriteMiddle5PutBlobsSuccess(now - writeMeta.GetWriteMiddle5StartInstant()); + Counters.GetCSCounters().OnWriteMiddle6PutBlobsSuccess(now - writeMeta.GetWriteMiddle6StartInstant()); LOG_S_DEBUG("Write (record) into pathId " << writeMeta.GetTableId() - << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : "") << " at tablet " << TabletID()); - + << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : "") + << " at tablet " << TabletID()); } } Execute(new TTxWrite(this, ev), ctx); @@ -152,8 +159,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteDraft::TPtr& ev, const TActorConte } void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContext& ctx) { - CSCounters.OnStartWriteRequest(); - LastAccessTime = TAppData::TimeProvider->Now(); + Counters.GetCSCounters().OnStartWriteRequest(); const auto& record = Proto(ev->Get()); const ui64 tableId = record.GetTableId(); @@ -162,6 +168,8 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex const TString dedupId = record.GetDedupId(); const auto source = ev->Sender; + Counters.GetColumnTablesCounters()->GetPathIdCounter(tableId)->OnWriteEvent(); + std::optional granuleShardingVersion; if (record.HasGranuleShardingVersion()) { granuleShardingVersion = record.GetGranuleShardingVersion(); @@ -176,181 +184,336 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex writeMeta.SetLongTxId(NLongTxService::TLongTxId::FromProto(record.GetLongTxId())); writeMeta.SetWritePartId(record.GetWritePartId()); - const auto returnFail = [&](const NColumnShard::ECumulativeCounters signalIndex) { - IncCounter(signalIndex); + const auto returnFail = [&](const NColumnShard::ECumulativeCounters signalIndex, const EWriteFailReason reason) { + Counters.GetTabletCounters()->IncCounter(signalIndex); ctx.Send(source, std::make_unique(TabletID(), writeMeta, NKikimrTxColumnShard::EResultStatus::ERROR)); + Counters.GetCSCounters().OnFailedWriteResponse(reason); return; }; if (!AppDataVerified().ColumnShardConfig.GetWritingEnabled()) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_writing")("reason", "disabled"); - CSCounters.OnFailedWriteResponse(EWriteFailReason::Disabled); - return returnFail(COUNTER_WRITE_FAIL); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::Disabled); } if (!TablesManager.IsReadyForWrite(tableId)) { - LOG_S_NOTICE("Write (fail) into pathId:" << writeMeta.GetTableId() << (TablesManager.HasPrimaryIndex()? "": " no index") - << " at tablet " << TabletID()); + LOG_S_NOTICE("Write (fail) into pathId:" << writeMeta.GetTableId() << (TablesManager.HasPrimaryIndex() ? "" : " no index") + << " at tablet " << TabletID()); + + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::NoTable); + } + + { + auto& portionsIndex = + TablesManager.GetPrimaryIndexAsVerified().GetGranuleVerified(writeMeta.GetTableId()).GetPortionsIndex(); + { + const ui64 minMemoryRead = portionsIndex.GetMinRawMemoryRead(); + if (NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit < minMemoryRead) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "overlimit")("reason", "read_raw_memory")("current", minMemoryRead)( + "limit", NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit)("table_id", writeMeta.GetTableId()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::OverlimitReadRawMemory); + } + } - CSCounters.OnFailedWriteResponse(EWriteFailReason::NoTable); - return returnFail(COUNTER_WRITE_FAIL); + { + const ui64 minMemoryRead = portionsIndex.GetMinBlobMemoryRead(); + if (NOlap::TGlobalLimits::DefaultBlobsMemoryIntervalLimit < minMemoryRead) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "overlimit")("reason", "read_blob_memory")("current", minMemoryRead)( + "limit", NOlap::TGlobalLimits::DefaultBlobsMemoryIntervalLimit)("table_id", writeMeta.GetTableId()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::OverlimitReadBlobMemory); + } + } } const auto& snapshotSchema = TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetLastSchema(); auto arrowData = std::make_shared(snapshotSchema); if (!arrowData->ParseFromProto(record)) { - LOG_S_ERROR("Write (fail) " << record.GetData().size() << " bytes into pathId " << writeMeta.GetTableId() - << " at tablet " << TabletID()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::IncorrectSchema); - return returnFail(COUNTER_WRITE_FAIL); + LOG_S_ERROR( + "Write (fail) " << record.GetData().size() << " bytes into pathId " << writeMeta.GetTableId() << " at tablet " << TabletID()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::IncorrectSchema); } NEvWrite::TWriteData writeData(writeMeta, arrowData, snapshotSchema->GetIndexInfo().GetReplaceKey(), StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING)); auto overloadStatus = CheckOverloaded(tableId); if (overloadStatus != EOverloadStatus::None) { - std::unique_ptr result = std::make_unique(TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED); - OverloadWriteFail(overloadStatus, writeData, cookie, std::move(result), ctx); - CSCounters.OnFailedWriteResponse(EWriteFailReason::Overload); + std::unique_ptr result = std::make_unique( + TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED); + OverloadWriteFail(overloadStatus, writeData.GetWriteMeta(), writeData.GetSize(), cookie, std::move(result), ctx); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::Overload); } else { if (ui64 writeId = (ui64)HasLongTxWrite(writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId())) { - LOG_S_DEBUG("Write (duplicate) into pathId " << writeMeta.GetTableId() - << " longTx " << writeMeta.GetLongTxIdUnsafe().ToString() - << " at tablet " << TabletID()); + LOG_S_DEBUG("Write (duplicate) into pathId " << writeMeta.GetTableId() << " longTx " << writeMeta.GetLongTxIdUnsafe().ToString() + << " at tablet " << TabletID()); - IncCounter(COUNTER_WRITE_DUPLICATE); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_DUPLICATE); - auto result = std::make_unique( - TabletID(), writeMeta, writeId, NKikimrTxColumnShard::EResultStatus::SUCCESS); + auto result = + std::make_unique(TabletID(), writeMeta, writeId, NKikimrTxColumnShard::EResultStatus::SUCCESS); ctx.Send(writeMeta.GetSource(), result.release()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::LongTxDuplication); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::LongTxDuplication); return; } - WritesMonitor.RegisterWrite(writeData.GetSize()); + Counters.GetWritesMonitor()->OnStartWrite(writeData.GetSize()); LOG_S_DEBUG("Write (blob) " << writeData.GetSize() << " bytes into pathId " << writeMeta.GetTableId() - << (writeMeta.GetWriteId()? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : " ") - << WritesMonitor.DebugString() - << " at tablet " << TabletID()); + << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : " ") + << Counters.GetWritesMonitor()->DebugString() << " at tablet " << TabletID()); writeData.MutableWriteMeta().SetWriteMiddle1StartInstant(TMonotonic::Now()); - std::shared_ptr task = std::make_shared(TabletID(), SelfId(), BufferizationWriteActorId, std::move(writeData), - snapshotSchema, GetLastTxSnapshot()); + std::shared_ptr task = std::make_shared( + TabletID(), SelfId(), BufferizationWriteActorId, std::move(writeData), snapshotSchema, GetLastTxSnapshot()); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); } } class TCommitOperation { +private: + const ui64 TabletId; + public: using TPtr = std::shared_ptr; - bool Parse(const NEvents::TDataEvents::TEvWrite& evWrite) { - if (evWrite.Record.GetLocks().GetLocks().size() != 1) { - return false; + bool NeedSyncLocks() const { + return SendingShards.size() && ReceivingShards.size(); + } + + bool IsPrimary() const { + AFL_VERIFY(NeedSyncLocks()); + return TabletId == *ReceivingShards.begin(); + } + + TCommitOperation(const ui64 tabletId) + : TabletId(tabletId) { + } + + TConclusionStatus Parse(const NEvents::TDataEvents::TEvWrite& evWrite) { + AFL_VERIFY(evWrite.Record.GetLocks().GetLocks().size() >= 1); + auto& locks = evWrite.Record.GetLocks(); + auto& lock = evWrite.Record.GetLocks().GetLocks()[0]; + SendingShards = std::set(locks.GetSendingShards().begin(), locks.GetSendingShards().end()); + if ((ui32)locks.GetSendingShards().size() != SendingShards.size()) { + return TConclusionStatus::Fail("duplications in SendingShards proto field"); + } + ReceivingShards = std::set(locks.GetReceivingShards().begin(), locks.GetReceivingShards().end()); + if ((ui32)locks.GetReceivingShards().size() != ReceivingShards.size()) { + return TConclusionStatus::Fail("duplications in ReceivingShards proto field"); } - LockId = evWrite.Record.GetLocks().GetLocks()[0].GetLockId(); TxId = evWrite.Record.GetTxId(); - KqpLocks = evWrite.Record.GetLocks(); - return !!LockId && !!TxId && KqpLocks.GetOp() == NKikimrDataEvents::TKqpLocks::Commit; + LockId = lock.GetLockId(); + Generation = lock.GetGeneration(); + InternalGenerationCounter = lock.GetCounter(); + if (!GetLockId()) { + return TConclusionStatus::Fail("not initialized lock info in commit message"); + } + if (!TxId) { + return TConclusionStatus::Fail("not initialized TxId for commit event"); + } + if (evWrite.Record.GetLocks().GetOp() != NKikimrDataEvents::TKqpLocks::Commit) { + return TConclusionStatus::Fail("incorrect message type"); + } + if (!ReceivingShards.size() || !SendingShards.size()) { + ReceivingShards.clear(); + SendingShards.clear(); + } else { + if (!ReceivingShards.contains(TabletId) && !SendingShards.contains(TabletId)) { + return TConclusionStatus::Fail("shard is incorrect for sending/receiving lists"); + } + } + return TConclusionStatus::Success(); + } + + std::unique_ptr CreateTxOperator( + const NKikimrTxColumnShard::ETransactionKind kind) const { + AFL_VERIFY(ReceivingShards.size()); + if (IsPrimary()) { + return std::make_unique( + TFullTxInfo::BuildFake(kind), LockId, ReceivingShards, SendingShards); + } else { + return std::make_unique( + TFullTxInfo::BuildFake(kind), LockId, *ReceivingShards.begin(), ReceivingShards.contains(TabletId)); + } } private: - NKikimrDataEvents::TKqpLocks KqpLocks; YDB_READONLY(ui64, LockId, 0); + YDB_READONLY(ui64, Generation, 0); + YDB_READONLY(ui64, InternalGenerationCounter, 0); YDB_READONLY(ui64, TxId, 0); + YDB_READONLY_DEF(std::set, SendingShards); + YDB_READONLY_DEF(std::set, ReceivingShards); }; -class TProposeWriteTransaction : public NTabletFlatExecutor::TTransactionBase { + +class TProposeWriteTransaction: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + public: TProposeWriteTransaction(TColumnShard* self, TCommitOperation::TPtr op, const TActorId source, const ui64 cookie) : TBase(self) , WriteCommit(op) , Source(source) - , Cookie(cookie) - {} + , Cookie(cookie) { + } + + virtual bool Execute(TTransactionContext& txc, const TActorContext&) override { + NKikimrTxColumnShard::TCommitWriteTxBody proto; + NKikimrTxColumnShard::ETransactionKind kind; + if (WriteCommit->NeedSyncLocks()) { + if (WriteCommit->IsPrimary()) { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_PRIMARY; + } else { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_SECONDARY; + } + proto = WriteCommit->CreateTxOperator(kind)->SerializeToProto(); + } else { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE; + } + proto.SetLockId(WriteCommit->GetLockId()); + TxOperator = Self->GetProgressTxController().StartProposeOnExecute( + TTxController::TTxInfo(kind, WriteCommit->GetTxId(), Source, Cookie, {}), proto.SerializeAsString(), txc); + return true; + } - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_PROPOSE; } + virtual void Complete(const TActorContext& ctx) override { + Self->GetProgressTxController().FinishProposeOnComplete(WriteCommit->GetTxId(), ctx); + } + TTxType GetTxType() const override { + return TXTYPE_PROPOSE; + } private: TCommitOperation::TPtr WriteCommit; TActorId Source; ui64 Cookie; + std::shared_ptr TxOperator; }; -bool TProposeWriteTransaction::Execute(TTransactionContext& txc, const TActorContext&) { - NKikimrTxColumnShard::TCommitWriteTxBody proto; - proto.SetLockId(WriteCommit->GetLockId()); - TString txBody; - Y_ABORT_UNLESS(proto.SerializeToString(&txBody)); - Y_UNUSED(Self->GetProgressTxController().StartProposeOnExecute( - TTxController::TTxInfo(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, WriteCommit->GetTxId(), Source, Cookie, {}), txBody, txc)); - return true; -} +class TAbortWriteTransaction: public NTabletFlatExecutor::TTransactionBase { +private: + using TBase = NTabletFlatExecutor::TTransactionBase; -void TProposeWriteTransaction::Complete(const TActorContext& ctx) { - Self->GetProgressTxController().FinishProposeOnComplete(WriteCommit->GetTxId(), ctx); -} +public: + TAbortWriteTransaction(TColumnShard* self, const ui64 txId, const TActorId source, const ui64 cookie) + : TBase(self) + , TxId(txId) + , Source(source) + , Cookie(cookie) { + } + + virtual bool Execute(TTransactionContext& txc, const TActorContext&) override { + Self->GetOperationsManager().AbortTransactionOnExecute(*Self, TxId, txc); + return true; + } + + virtual void Complete(const TActorContext& ctx) override { + Self->GetOperationsManager().AbortTransactionOnComplete(*Self, TxId); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID(), TxId); + ctx.Send(Source, result.release(), 0, Cookie); + } + TTxType GetTxType() const override { + return TXTYPE_PROPOSE; + } + +private: + ui64 TxId; + TActorId Source; + ui64 Cookie; +}; void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorContext& ctx) { - NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWrite"); + NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWrite"); const auto& record = ev->Get()->Record; const auto source = ev->Sender; const auto cookie = ev->Cookie; - const auto behaviour = TOperationsManager::GetBehaviour(*ev->Get()); - - if (behaviour == EOperationBehaviour::Undefined) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "invalid write event"); + const auto behaviourConclusion = TOperationsManager::GetBehaviour(*ev->Get()); + // AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("ev_write", record.DebugString()); + if (behaviourConclusion.IsFail()) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, + "invalid write event: " + behaviourConclusion.GetErrorMessage()); ctx.Send(source, result.release(), 0, cookie); return; } + auto behaviour = *behaviourConclusion; + + if (behaviour == EOperationBehaviour::AbortWriteLock) { + Execute(new TAbortWriteTransaction(this, record.GetLocks().GetLocks()[0].GetLockId(), source, cookie), ctx); + return; + } if (behaviour == EOperationBehaviour::CommitWriteLock) { - auto commitOperation = std::make_shared(); - if (!commitOperation->Parse(*ev->Get())) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "invalid commit event"); + auto commitOperation = std::make_shared(TabletID()); + const auto sendError = [&](const TString& message, const NKikimrDataEvents::TEvWriteResult::EStatus status) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, status, message); ctx.Send(source, result.release(), 0, cookie); + }; + auto conclusionParse = commitOperation->Parse(*ev->Get()); + if (conclusionParse.IsFail()) { + sendError(conclusionParse.GetErrorMessage(), NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST); + } else { + if (commitOperation->NeedSyncLocks()) { + auto* lockInfo = OperationsManager->GetLockOptional(commitOperation->GetLockId()); + if (!lockInfo) { + sendError("haven't lock for commit: " + ::ToString(commitOperation->GetLockId()), + NKikimrDataEvents::TEvWriteResult::STATUS_ABORTED); + } else { + if (lockInfo->GetGeneration() != commitOperation->GetGeneration()) { + sendError("tablet lock have another generation: " + ::ToString(lockInfo->GetGeneration()) + + " != " + ::ToString(commitOperation->GetGeneration()), + NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + } else if (lockInfo->GetInternalGenerationCounter() != commitOperation->GetInternalGenerationCounter()) { + sendError( + "tablet lock have another internal generation counter: " + ::ToString(lockInfo->GetInternalGenerationCounter()) + + " != " + ::ToString(commitOperation->GetInternalGenerationCounter()), + NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + } else { + Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); + } + } + } else { + Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); + } } - Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); return; } - const ui64 lockId = (behaviour == EOperationBehaviour::InTxWrite) ? record.GetTxId() : record.GetLockTxId(); - if (record.GetOperations().size() != 1) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only single operation is supported"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only single operation is supported"); ctx.Send(source, result.release(), 0, cookie); return; } const auto& operation = record.GetOperations()[0]; - const std::optional mType = TEnumOperator::DeserializeFromProto(operation.GetType()); + const std::optional mType = + TEnumOperator::DeserializeFromProto(operation.GetType()); if (!mType) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "operation " + NKikimrDataEvents::TEvWrite::TOperation::EOperationType_Name(operation.GetType()) + " is not supported"); ctx.Send(source, result.release(), 0, cookie); return; } if (!operation.GetTableId().HasSchemaVersion()) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "schema version not set"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "schema version not set"); ctx.Send(source, result.release(), 0, cookie); return; } auto schema = TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchema(operation.GetTableId().GetSchemaVersion()); if (!schema) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "unknown schema version"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "unknown schema version"); ctx.Send(source, result.release(), 0, cookie); return; } @@ -358,38 +521,50 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor const auto tableId = operation.GetTableId().GetTableId(); if (!TablesManager.IsReadyForWrite(tableId)) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); ctx.Send(source, result.release(), 0, cookie); return; } auto arrowData = std::make_shared(schema); if (!arrowData->Parse(operation, NEvWrite::TPayloadReader(*ev->Get()))) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); ctx.Send(source, result.release(), 0, cookie); } auto overloadStatus = CheckOverloaded(tableId); if (overloadStatus != EOverloadStatus::None) { - NEvWrite::TWriteData writeData(NEvWrite::TWriteMeta(0, tableId, source, {}), arrowData, nullptr, nullptr); - std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); - OverloadWriteFail(overloadStatus, writeData, cookie, std::move(result), ctx); + std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); + OverloadWriteFail(overloadStatus, NEvWrite::TWriteMeta(0, tableId, source, {}), arrowData->GetSize(), cookie, std::move(result), ctx); return; } - auto wg = WritesMonitor.RegisterWrite(arrowData->GetSize()); + Counters.GetWritesMonitor()->OnStartWrite(arrowData->GetSize()); std::optional granuleShardingVersionId; if (record.HasGranuleShardingVersionId()) { granuleShardingVersionId = record.GetGranuleShardingVersionId(); } + ui64 lockId = 0; + if (behaviour == EOperationBehaviour::NoTxWrite) { + lockId = BuildEphemeralTxId(); + } else if (behaviour == EOperationBehaviour::InTxWrite) { + lockId = record.GetTxId(); + } else { + lockId = record.GetLockTxId(); + } + + OperationsManager->RegisterLock(lockId, Generation()); auto writeOperation = OperationsManager->RegisterOperation(lockId, cookie, granuleShardingVersionId, *mType); Y_ABORT_UNLESS(writeOperation); writeOperation->SetBehaviour(behaviour); writeOperation->Start(*this, tableId, arrowData, source, schema, ctx); } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp index 69d54a68d1bd..27497b4e8fc6 100644 --- a/ydb/core/tx/columnshard/columnshard__write_index.cpp +++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp @@ -41,7 +41,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorConte if (IsAnyChannelYellowStop()) { ACFL_ERROR("event", "TEvWriteIndex failed")("reason", "channel yellow stop"); - IncCounter(COUNTER_OUT_OF_SPACE); + Counters.GetTabletCounters()->IncCounter(COUNTER_OUT_OF_SPACE); ev->Get()->SetPutStatus(NKikimrProto::TRYLATER); NOlap::TChangesFinishContext context("out of disk space"); ev->Get()->IndexChanges->Abort(*this, context); diff --git a/ydb/core/tx/columnshard/columnshard_common.cpp b/ydb/core/tx/columnshard/columnshard_common.cpp deleted file mode 100644 index d9f0dcc5e79b..000000000000 --- a/ydb/core/tx/columnshard/columnshard_common.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "columnshard_common.h" -#include - -namespace NKikimr::NColumnShard { - -namespace { - -using EOperation = NArrow::EOperation; -using EAggregate = NArrow::EAggregate; -using TAssign = NSsa::TAssign; -using TAggregateAssign = NSsa::TAggregateAssign; - -} - -using EOperation = NArrow::EOperation; -using TPredicate = NOlap::TPredicate; - -} diff --git a/ydb/core/tx/columnshard/columnshard_common.h b/ydb/core/tx/columnshard/columnshard_common.h deleted file mode 100644 index 455f39a512cc..000000000000 --- a/ydb/core/tx/columnshard/columnshard_common.h +++ /dev/null @@ -1,94 +0,0 @@ -#pragma once -#include "engines/reader/common/description.h" -#include "engines/predicate/predicate.h" - -#include - -namespace NKikimr::NOlap { - struct TIndexInfo; -} - -namespace NKikimr::NColumnShard { - -using TReadDescription = NOlap::NReader::TReadDescription; -using IColumnResolver = NOlap::IColumnResolver; -using NOlap::TWriteId; - -class TBatchCache { -public: - using TUnifiedBlobId = NOlap::TUnifiedBlobId; - using TInsertedBatch = std::pair>; - - static constexpr ui32 MAX_COMMITTED_COUNT = 2 * TLimits::MIN_SMALL_BLOBS_TO_INSERT; - static constexpr ui32 MAX_INSERTED_COUNT = 2 * TLimits::MIN_SMALL_BLOBS_TO_INSERT; - static constexpr ui64 MAX_TOTAL_SIZE = 2 * TLimits::MIN_BYTES_TO_INSERT; - - TBatchCache() - : Inserted(MAX_INSERTED_COUNT) - , Committed(MAX_COMMITTED_COUNT) - {} - - void Insert(TWriteId writeId, const TUnifiedBlobId& blobId, const std::shared_ptr& batch) { - if (Bytes() + blobId.BlobSize() > MAX_TOTAL_SIZE) { - return; - } - InsertedBytes += blobId.BlobSize(); - Inserted.Insert(writeId, {blobId, batch}); - } - - void Commit(TWriteId writeId) { - auto it = Inserted.FindWithoutPromote(writeId); - if (it != Inserted.End()) { - auto& blobId = it->first; - InsertedBytes -= blobId.BlobSize(); - CommittedBytes += blobId.BlobSize(); - - Committed.Insert(blobId, it->second); - Inserted.Erase(it); - } - } - - void EraseInserted(TWriteId writeId) { - auto it = Inserted.FindWithoutPromote(writeId); - if (it != Inserted.End()) { - InsertedBytes -= (*it).first.BlobSize(); - Inserted.Erase(it); - } - } - - void EraseCommitted(const TUnifiedBlobId& blobId) { - auto it = Committed.FindWithoutPromote(blobId); - if (it != Committed.End()) { - CommittedBytes -= blobId.BlobSize(); - Committed.Erase(it); - } - } - - TInsertedBatch GetInserted(TWriteId writeId) const { - auto it = Inserted.Find(writeId); - if (it != Inserted.End()) { - return *it; - } - return {}; - } - - std::shared_ptr Get(const TUnifiedBlobId& blobId) const { - auto it = Committed.Find(blobId); - if (it != Committed.End()) { - return *it; - } - return {}; - } - - ui64 Bytes() const { - return InsertedBytes + CommittedBytes; - } - -private: - mutable TLRUCache Inserted; - mutable TLRUCache> Committed; - ui64 InsertedBytes{0}; - ui64 CommittedBytes{0}; -}; - -} diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index 2bc73b8ee418..9f1c7a10859d 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -28,6 +28,7 @@ #include "engines/changes/ttl.h" #include "resource_subscriber/counters.h" +#include "transactions/operators/ev_write/sync.h" #include "bg_tasks/adapter/adapter.h" #include "bg_tasks/manager/manager.h" @@ -41,6 +42,7 @@ #include #include #include +#include namespace NKikimr::NColumnShard { @@ -64,33 +66,26 @@ NTabletPipe::TClientConfig GetPipeClientConfig() { TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet) : TActor(&TThis::StateInit) - , TTabletExecutedFlat(info, tablet, nullptr) + , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) + , TabletCountersHolder(new TProtobufTabletCounters()) + , Counters(*TabletCountersHolder) , ProgressTxController(std::make_unique(*this)) , StoragesManager(std::make_shared(*this)) , DataLocksManager(std::make_shared()) - , PeriodicWakeupActivationPeriod(NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod(TSettings::DefaultPeriodicWakeupActivationPeriod)) - , StatsReportInterval(NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval(TSettings::DefaultStatsReportInterval)) - , InFlightReadsTracker(StoragesManager) + , PeriodicWakeupActivationPeriod(NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod()) + , StatsReportInterval(NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval()) + , InFlightReadsTracker(StoragesManager, Counters.GetRequestsTracingCounters()) , TablesManager(StoragesManager, info->TabletID) , Subscribers(std::make_shared(*this)) , PipeClientCache(NTabletPipe::CreateBoundedClientCache(new NTabletPipe::TBoundedClientCacheConfig(), GetPipeClientConfig())) , InsertTable(std::make_unique()) - , SubscribeCounters(std::make_shared()) - , InsertTaskSubscription(NOlap::TInsertColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , CompactTaskSubscription(NOlap::TCompactColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , TTLTaskSubscription(NOlap::TTTLColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , ScanCounters("Scan") - , WritesMonitor(*this) - , NormalizerController(StoragesManager, SubscribeCounters) - , SysLocks(this) -{ - TabletCountersPtr.reset(new TProtobufTabletCounters< - ESimpleCounters_descriptor, - ECumulativeCounters_descriptor, - EPercentileCounters_descriptor, - ETxTypes_descriptor - >()); - TabletCounters = TabletCountersPtr.get(); + , InsertTaskSubscription(NOlap::TInsertColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , CompactTaskSubscription(NOlap::TCompactColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , TTLTaskSubscription(NOlap::TTTLColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , BackgroundController(Counters.GetBackgroundControllerCounters()) + , NormalizerController(StoragesManager, Counters.GetSubscribeCounters()) + , SysLocks(this) { } void TColumnShard::OnDetach(const TActorContext& ctx) { @@ -191,74 +186,71 @@ ui64 TColumnShard::GetOutdatedStep() const { return step; } -ui64 TColumnShard::GetMinReadStep() const { - const TDuration maxReadStaleness = NYDBTest::TControllers::GetColumnShardController()->GetReadTimeoutClean(TDuration::Minutes(5)); - ui64 delayMillisec = maxReadStaleness.MilliSeconds(); +NOlap::TSnapshot TColumnShard::GetMinReadSnapshot() const { + ui64 delayMillisec = GetMaxReadStaleness().MilliSeconds(); ui64 passedStep = GetOutdatedStep(); ui64 minReadStep = (passedStep > delayMillisec ? passedStep - delayMillisec : 0); - return minReadStep; + + if (auto ssClean = InFlightReadsTracker.GetSnapshotToClean()) { + if (ssClean->GetPlanStep() < minReadStep) { + Counters.GetRequestsTracingCounters()->OnDefaultMinSnapshotInstant(TInstant::MilliSeconds(ssClean->GetPlanStep())); + return *ssClean; + } + } + Counters.GetRequestsTracingCounters()->OnDefaultMinSnapshotInstant(TInstant::MilliSeconds(minReadStep)); + return NOlap::TSnapshot::MaxForPlanStep(minReadStep); } -TWriteId TColumnShard::HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const { +TInsertWriteId TColumnShard::HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const { auto it = LongTxWritesByUniqueId.find(longTxId.UniqueId); if (it != LongTxWritesByUniqueId.end()) { auto itPart = it->second.find(partId); if (itPart != it->second.end()) { - return (TWriteId)itPart->second->WriteId; + return itPart->second->InsertWriteId; } } - return (TWriteId)0; + return (TInsertWriteId)0; } -TWriteId TColumnShard::GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId) { +TInsertWriteId TColumnShard::GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId) { auto it = LongTxWritesByUniqueId.find(longTxId.UniqueId); if (it != LongTxWritesByUniqueId.end()) { auto itPart = it->second.find(partId); if (itPart != it->second.end()) { - return (TWriteId)itPart->second->WriteId; + return itPart->second->InsertWriteId; } } else { it = LongTxWritesByUniqueId.emplace(longTxId.UniqueId, TPartsForLTXShard()).first; } - TWriteId writeId = BuildNextWriteId(db); - auto& lw = LongTxWrites[writeId]; - lw.WriteId = (ui64)writeId; + TInsertWriteId insertWriteId = InsertTable->BuildNextWriteId(db); + auto& lw = LongTxWrites[insertWriteId]; + lw.InsertWriteId = insertWriteId; lw.WritePartId = partId; lw.LongTxId = longTxId; lw.GranuleShardingVersionId = granuleShardingVersionId; it->second[partId] = &lw; - Schema::SaveLongTxWrite(db, writeId, partId, longTxId, granuleShardingVersionId); - return writeId; -} - -TWriteId TColumnShard::BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc) { - NIceDb::TNiceDb db(txc.DB); - return BuildNextWriteId(db); + Schema::SaveLongTxWrite(db, insertWriteId, partId, longTxId, granuleShardingVersionId); + return insertWriteId; } -TWriteId TColumnShard::BuildNextWriteId(NIceDb::TNiceDb& db) { - TWriteId writeId = ++LastWriteId; - Schema::SaveSpecialValue(db, Schema::EValueIds::LastWriteId, (ui64)writeId); - return writeId; +void TColumnShard::AddLongTxWrite(const TInsertWriteId writeId, ui64 txId) { + auto it = LongTxWrites.find(writeId); + AFL_VERIFY(it != LongTxWrites.end()); + it->second.PreparedTxId = txId; } -void TColumnShard::AddLongTxWrite(TWriteId writeId, ui64 txId) { - auto& lw = LongTxWrites.at(writeId); - lw.PreparedTxId = txId; -} - -void TColumnShard::LoadLongTxWrite(TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { +void TColumnShard::LoadLongTxWrite(const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { auto& lw = LongTxWrites[writeId]; lw.WritePartId = writePartId; - lw.WriteId = (ui64)writeId; + lw.InsertWriteId = writeId; lw.LongTxId = longTxId; lw.GranuleShardingVersionId = granuleShardingVersion; LongTxWritesByUniqueId[longTxId.UniqueId][writePartId] = &lw; } -bool TColumnShard::RemoveLongTxWrite(NIceDb::TNiceDb& db, const TWriteId writeId, const ui64 txId) { +bool TColumnShard::RemoveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui64 txId) { if (auto* lw = LongTxWrites.FindPtr(writeId)) { ui64 prepared = lw->PreparedTxId; if (!prepared || txId == prepared) { @@ -272,15 +264,16 @@ bool TColumnShard::RemoveLongTxWrite(NIceDb::TNiceDb& db, const TWriteId writeId return true; } else { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_remove_prepared_tx_insertion")("write_id", (ui64)writeId)("tx_id", txId); + return false; } } else { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_remove_removed_tx_insertion")("write_id", (ui64)writeId)("tx_id", txId); + return true; } - return false; } -void TColumnShard::TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTable, THashSet&& writesToAbort) { - std::vector failedAborts; +void TColumnShard::TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTable, THashSet&& writesToAbort) { + std::vector failedAborts; for (auto& writeId : writesToAbort) { if (!RemoveLongTxWrite(db, writeId, 0)) { failedAborts.push_back(writeId); @@ -426,9 +419,9 @@ void TColumnShard::RunEnsureTable(const NKikimrTxColumnShard::TCreateTable& tabl TablesManager.AddTableVersion(pathId, version, tableVerProto, db, Tiers); - SetCounter(COUNTER_TABLES, TablesManager.GetTables().size()); - SetCounter(COUNTER_TABLE_PRESETS, TablesManager.GetSchemaPresets().size()); - SetCounter(COUNTER_TABLE_TTLS, TablesManager.GetTtl().PathsCount()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLES, TablesManager.GetTables().size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_PRESETS, TablesManager.GetSchemaPresets().size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_TTLS, TablesManager.GetTtl().PathsCount()); } void TColumnShard::RunAlterTable(const NKikimrTxColumnShard::TAlterTable& alterProto, const NOlap::TSnapshot& version, @@ -476,13 +469,6 @@ void TColumnShard::RunDropTable(const NKikimrTxColumnShard::TDropTable& dropProt LOG_S_DEBUG("DropTable for pathId: " << pathId << " at tablet " << TabletID()); TablesManager.DropTable(pathId, version, db); - - // TODO: Allow to read old snapshots after DROP - TBlobGroupSelector dsGroupSelector(Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - THashSet writesToAbort = InsertTable->DropPath(dbTable, pathId); - - TryAbortWrites(db, dbTable, std::move(writesToAbort)); } void TColumnShard::RunAlterStore(const NKikimrTxColumnShard::TAlterStore& proto, const NOlap::TSnapshot& version, @@ -514,7 +500,7 @@ void TColumnShard::EnqueueBackgroundActivities(const bool periodic) { ACFL_DEBUG("event", "EnqueueBackgroundActivities")("periodic", periodic); StoragesManager->GetOperatorVerified(NOlap::IStoragesManager::DefaultStorageId); StoragesManager->GetSharedBlobsManager()->GetStorageManagerVerified(NOlap::IStoragesManager::DefaultStorageId); - CSCounters.OnStartBackground(); + Counters.GetCSCounters().OnStartBackground(); if (!TablesManager.HasPrimaryIndex()) { AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("problem", "Background activities cannot be started: no index at tablet"); @@ -635,13 +621,16 @@ class TTTLChangesReadTask: public TChangesReadTask, public TMonitoringObjectsCou using TBase::TBase; }; -void TColumnShard::StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex) { - CSCounters.IndexationInput(bytesToIndex); +void TColumnShard::StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex) { + Counters.GetCSCounters().IndexationInput(bytesToIndex); - std::vector data; + std::vector data; data.reserve(dataToIndex.size()); for (auto& ptr : dataToIndex) { data.push_back(*ptr); + if (!TablesManager.HasTable(data.back().GetPathId())) { + data.back().SetRemove(); + } } Y_ABORT_UNLESS(data.size()); @@ -658,7 +647,8 @@ void TColumnShard::StartIndexTask(std::vector&& dat NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), IndexationCounters, GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, InsertTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetIndexationCounters(), GetLastCompletedTx()), + 0, indexChanges->CalcMemoryForUsage(), externalTaskId, InsertTaskSubscription)); } void TColumnShard::SetupIndexation() { @@ -678,8 +668,8 @@ void TColumnShard::SetupIndexation() { if (InsertTable->GetPathPriorities().size() && InsertTable->GetPathPriorities().rbegin()->first.GetCategory() == NOlap::TPathInfoIndexPriority::EIndexationPriority::PreventOverload) { force = true; } - const ui64 bytesLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationStartBytesLimit(TSettings::GuaranteeIndexationStartBytesLimit); - const TDuration durationLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationInterval(TSettings::GuaranteeIndexationInterval); + const ui64 bytesLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationStartBytesLimit(); + const TDuration durationLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationInterval(); if (!force && InsertTable->GetCountersCommitted().Bytes < bytesLimit && TMonotonic::Now() < BackgroundController.GetLastIndexationInstant() + durationLimit) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_indexation")("reason", "not_enough_data_and_too_frequency") @@ -688,10 +678,10 @@ void TColumnShard::SetupIndexation() { } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start_indexation_tasks")("insert_overload_size", InsertTable->GetCountersCommitted().Bytes); - CSCounters.OnSetupIndexation(); + Counters.GetCSCounters().OnSetupIndexation(); ui64 bytesToIndex = 0; ui64 txBytesWrite = 0; - std::vector dataToIndex; + std::vector dataToIndex; dataToIndex.reserve(TLimits::MIN_SMALL_BLOBS_TO_INSERT); for (auto it = InsertTable->GetPathPriorities().rbegin(); it != InsertTable->GetPathPriorities().rend(); ++it) { for (auto* pathInfo : it->second) { @@ -719,7 +709,7 @@ void TColumnShard::SetupCompaction() { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_compaction")("reason", "disabled"); return; } - CSCounters.OnSetupCompaction(); + Counters.GetCSCounters().OnSetupCompaction(); BackgroundController.CheckDeadlines(); while (BackgroundController.GetCompactionsCount() < TSettings::MAX_ACTIVE_COMPACTIONS) { @@ -738,7 +728,7 @@ void TColumnShard::SetupCompaction() { NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters, GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, CompactTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetCompactionCounters(), GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, CompactTaskSubscription)); } LOG_S_DEBUG("ActiveCompactions: " << BackgroundController.GetCompactionsCount() << " at tablet " << TabletID()); @@ -749,7 +739,7 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_ttl")("reason", "disabled"); return false; } - CSCounters.OnSetupTtl(); + Counters.GetCSCounters().OnSetupTtl(); THashMap eviction = pathTtls; for (auto&& i : eviction) { ACFL_DEBUG("background", "ttl")("path", i.first)("info", i.second.GetDebugString()); @@ -773,7 +763,8 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { if (needWrites) { NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters, GetLastCompletedTx()), 0, i->CalcMemoryForUsage(), externalTaskId, TTLTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetCompactionCounters(), GetLastCompletedTx()), + 0, i->CalcMemoryForUsage(), externalTaskId, TTLTaskSubscription)); } else { ev->SetPutStatus(NKikimrProto::OK); ActorContext().Send(SelfId(), std::move(ev)); @@ -783,7 +774,7 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { } void TColumnShard::SetupCleanupPortions() { - CSCounters.OnSetupCleanup(); + Counters.GetCSCounters().OnSetupCleanup(); if (!AppDataVerified().ColumnShardConfig.GetCleanupEnabled() || !NYDBTest::TControllers::GetColumnShardController()->IsBackgroundEnabled(NYDBTest::ICSController::EBackground::Cleanup)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_cleanup")("reason", "disabled"); return; @@ -793,9 +784,8 @@ void TColumnShard::SetupCleanupPortions() { return; } - NOlap::TSnapshot cleanupSnapshot{GetMinReadStep(), 0}; - - auto changes = TablesManager.MutablePrimaryIndex().StartCleanupPortions(cleanupSnapshot, TablesManager.GetPathsToDrop(), DataLocksManager); + auto changes = + TablesManager.MutablePrimaryIndex().StartCleanupPortions(GetMinReadSnapshot(), TablesManager.GetPathsToDrop(), DataLocksManager); if (!changes) { ACFL_DEBUG("background", "cleanup")("skip_reason", "no_changes"); return; @@ -812,13 +802,21 @@ void TColumnShard::SetupCleanupPortions() { } void TColumnShard::SetupCleanupTables() { - CSCounters.OnSetupCleanup(); + Counters.GetCSCounters().OnSetupCleanup(); if (BackgroundController.IsCleanupTablesActive()) { ACFL_DEBUG("background", "cleanup")("skip_reason", "in_progress"); return; } - auto changes = TablesManager.MutablePrimaryIndex().StartCleanupTables(TablesManager.MutablePathsToDrop()); + THashSet pathIdsEmptyInInsertTable; + for (auto&& i : TablesManager.GetPathsToDrop()) { + if (InsertTable->HasPathIdData(i)) { + continue; + } + pathIdsEmptyInInsertTable.emplace(i); + } + + auto changes = TablesManager.MutablePrimaryIndex().StartCleanupTables(pathIdsEmptyInInsertTable); if (!changes) { ACFL_DEBUG("background", "cleanup")("skip_reason", "no_changes"); return; @@ -890,6 +888,39 @@ void TColumnShard::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev, const TAct } } +void TColumnShard::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorContext& ctx) { + const ui64 txId = ev->Get()->Record.GetTxId(); + if (!GetProgressTxController().GetTxOperatorOptional(txId)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set_ignored")("proto", ev->Get()->Record.DebugString()); + Send(MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, txId, TabletID(), ev->Get()->Record.GetTabletProducer(), TabletID(), 0), + ev->Get()->Record.GetTabletProducer(), true), + IEventHandle::FlagTrackDelivery, txId); + return; + } + auto op = GetProgressTxController().GetTxOperatorVerifiedAs(txId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set")("proto", ev->Get()->Record.DebugString())("lock_id", op->GetLockId()); + NKikimrTx::TReadSetData data; + AFL_VERIFY(data.ParseFromArray(ev->Get()->Record.GetReadSet().data(), ev->Get()->Record.GetReadSet().size())); + auto tx = op->CreateReceiveBrokenFlagTx( + *this, ev->Get()->Record.GetTabletProducer(), data.GetDecision() != NKikimrTx::TReadSetData::DECISION_COMMIT); + Execute(tx.release(), ctx); +} + +void TColumnShard::Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorContext& ctx) { + auto opPtr = GetProgressTxController().GetTxOperatorOptional(ev->Get()->Record.GetTxId()); + if (!opPtr) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "missed_read_set_ack")("proto", ev->Get()->Record.DebugString())( + "tx_id", ev->Get()->Record.GetTxId()); + return; + } + auto op = TValidator::CheckNotNull(dynamic_pointer_cast(opPtr)); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set_ack")("proto", ev->Get()->Record.DebugString())("lock_id", op->GetLockId()); + auto tx = op->CreateReceiveResultAckTx(*this, ev->Get()->Record.GetTabletConsumer()); + Execute(tx.release(), ctx); +} + void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvProposeFromInitiator::TPtr& ev, const TActorContext& ctx) { AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvProposeFromInitiator"); auto reqSession = std::make_shared(); @@ -1142,4 +1173,8 @@ const NKikimr::NColumnShard::NTiers::TManager* TColumnShard::GetTierManagerPoint return Tiers->GetManagerOptional(tierId); } +TDuration TColumnShard::GetMaxReadStaleness() { + return NYDBTest::TControllers::GetColumnShardController()->GetReadTimeoutClean(); +} + } diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index 3500852fc734..8e303896ce75 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -3,7 +3,6 @@ #include "background_controller.h" #include "counters.h" #include "columnshard.h" -#include "columnshard_common.h" #include "columnshard_ttl.h" #include "columnshard_private_events.h" #include "tables_manager.h" @@ -13,9 +12,11 @@ #include "transactions/tx_controller.h" #include "inflight_request_tracker.h" #include "counters/columnshard.h" +#include "counters/counters_manager.h" #include "resource_subscriber/counters.h" #include "resource_subscriber/task.h" #include "normalizer/abstract/abstract.h" +#include "operations/manager.h" #include "export/events/events.h" @@ -202,6 +203,8 @@ class TColumnShard void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx); void Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext& ctx); void Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvProposeTransaction::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvCheckPlannedTransaction::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvCancelTransactionProposal::TPtr& ev, const TActorContext& ctx); @@ -216,6 +219,9 @@ class TColumnShard void Handle(TEvPrivate::TEvScanStats::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvReadFinished::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx); + void Handle(NActors::TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvPingSnapshotsUsage::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorContext& ctx); void Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& ev); void Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorContext& ctx); @@ -276,17 +282,16 @@ class TColumnShard putStatus.OnYellowChannels(Executor()); } - void SetCounter(NColumnShard::ESimpleCounters counter, ui64 num) const { - TabletCounters->Simple()[counter].Set(num); - } - - void IncCounter(NColumnShard::ECumulativeCounters counter, ui64 num = 1) const { - TabletCounters->Cumulative()[counter].Increment(num); - } - void ActivateTiering(const ui64 pathId, const TString& useTiering); void OnTieringModified(const std::optional pathId = {}); + public: + ui64 BuildEphemeralTxId() { + static TAtomicCounter Counter = 0; + static constexpr ui64 shift = (ui64)1 << 47; + return shift | Counter.Inc(); + } + enum class EOverloadStatus { ShardTxInFly /* "shard_tx" */, ShardWritesInFly /* "shard_writes" */, @@ -297,25 +302,17 @@ class TColumnShard None /* "none" */ }; - void IncCounter(NColumnShard::EPercentileCounters counter, const TDuration& latency) const { - TabletCounters->Percentile()[counter].IncrementFor(latency.MicroSeconds()); - } - - void IncCounter(NDataShard::ESimpleCounters counter, ui64 num = 1) const { - TabletCounters->Simple()[counter].Add(num); - } - // For syslocks void IncCounter(NDataShard::ECumulativeCounters counter, ui64 num = 1) const { - TabletCounters->Cumulative()[counter].Increment(num); + Counters.GetTabletCounters()->IncCounter(counter, num); } void IncCounter(NDataShard::EPercentileCounters counter, ui64 num) const { - TabletCounters->Percentile()[counter].IncrementFor(num); + Counters.GetTabletCounters()->IncCounter(counter, num); } void IncCounter(NDataShard::EPercentileCounters counter, const TDuration& latency) const { - TabletCounters->Percentile()[counter].IncrementFor(latency.MilliSeconds()); + Counters.GetTabletCounters()->IncCounter(counter, latency); } inline TRowVersion LastCompleteTxVersion() const { @@ -329,7 +326,7 @@ class TColumnShard } private: - void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx); + void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx); EOverloadStatus CheckOverloaded(const ui64 tableId) const; protected: @@ -357,6 +354,9 @@ class TColumnShard switch (ev->GetTypeRewrite()) { hFunc(NMetadata::NProvider::TEvRefreshSubscriberData, Handle); + HFunc(TEvTxProcessing::TEvReadSet, Handle); + HFunc(TEvTxProcessing::TEvReadSetAck, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); HFunc(TEvTabletPipe::TEvServerConnected, Handle); @@ -376,6 +376,9 @@ class TColumnShard HFunc(TEvPrivate::TEvScanStats, Handle); HFunc(TEvPrivate::TEvReadFinished, Handle); HFunc(TEvPrivate::TEvPeriodicWakeup, Handle); + HFunc(NActors::TEvents::TEvWakeup, Handle); + HFunc(TEvPrivate::TEvPingSnapshotsUsage, Handle); + HFunc(NEvents::TDataEvents::TEvWrite, Handle); HFunc(TEvPrivate::TEvWriteDraft, Handle); HFunc(TEvPrivate::TEvGarbageCollectionFinished, Handle); @@ -410,6 +413,9 @@ class TColumnShard } private: + std::unique_ptr TabletCountersHolder; + TCountersManager Counters; + std::unique_ptr ProgressTxController; std::unique_ptr OperationsManager; std::shared_ptr SharingSessionsManager; @@ -420,68 +426,20 @@ class TColumnShard using TSchemaPreset = TSchemaPreset; using TTableInfo = TTableInfo; + const TMonotonic CreateInstant = TMonotonic::Now(); + std::optional StartInstant; + struct TLongTxWriteInfo { - ui64 WriteId; + TInsertWriteId InsertWriteId; ui32 WritePartId; NLongTxService::TLongTxId LongTxId; ui64 PreparedTxId = 0; std::optional GranuleShardingVersionId; }; - class TWritesMonitor { - private: - TColumnShard& Owner; - YDB_READONLY(ui64, WritesInFlight, 0); - YDB_READONLY(ui64, WritesSizeInFlight, 0); - - public: - class TGuard: public TNonCopyable { - friend class TWritesMonitor; - private: - TWritesMonitor& Owner; - - explicit TGuard(TWritesMonitor& owner) - : Owner(owner) - {} - - public: - ~TGuard() { - Owner.UpdateCounters(); - } - }; - - TWritesMonitor(TColumnShard& owner) - : Owner(owner) - {} - - TGuard RegisterWrite(const ui64 dataSize) { - ++WritesInFlight; - WritesSizeInFlight += dataSize; - return TGuard(*this); - } - - TGuard FinishWrite(const ui64 dataSize, const ui32 writesCount = 1) { - Y_ABORT_UNLESS(WritesInFlight > 0); - Y_ABORT_UNLESS(WritesSizeInFlight >= dataSize); - WritesInFlight -= writesCount; - WritesSizeInFlight -= dataSize; - return TGuard(*this); - } - - TString DebugString() const { - return TStringBuilder() << "{object=write_monitor;count=" << WritesInFlight << ";size=" << WritesSizeInFlight << "}"; - } - - private: - void UpdateCounters() { - Owner.SetCounter(COUNTER_WRITES_IN_FLY, WritesInFlight); - } - }; - ui64 CurrentSchemeShardId = 0; TMessageSeqNo LastSchemaSeqNo; std::optional ProcessingParams; - TWriteId LastWriteId = TWriteId{0}; ui64 LastPlannedStep = 0; ui64 LastPlannedTxId = 0; NOlap::TSnapshot LastCompletedTx = NOlap::TSnapshot::Zero(); @@ -497,35 +455,26 @@ class TColumnShard const TDuration PeriodicWakeupActivationPeriod; TDuration FailActivationDelay = TDuration::Seconds(1); const TDuration StatsReportInterval; - TInstant LastAccessTime; TInstant LastStatsReport; TActorId ResourceSubscribeActor; TActorId BufferizationWriteActorId; TActorId StatsReportPipe; + std::vector ActorsToStop; TInFlightReadsTracker InFlightReadsTracker; TTablesManager TablesManager; std::shared_ptr Subscribers; std::shared_ptr Tiers; - std::unique_ptr TabletCountersPtr; - TTabletCountersBase* TabletCounters; std::unique_ptr PipeClientCache; std::unique_ptr InsertTable; - std::shared_ptr SubscribeCounters; NOlap::NResourceBroker::NSubscribe::TTaskContext InsertTaskSubscription; NOlap::NResourceBroker::NSubscribe::TTaskContext CompactTaskSubscription; NOlap::NResourceBroker::NSubscribe::TTaskContext TTLTaskSubscription; - const TScanCounters ScanCounters; - const TIndexationCounters CompactionCounters = TIndexationCounters("GeneralCompaction"); - const TIndexationCounters IndexationCounters = TIndexationCounters("Indexation"); - const TIndexationCounters EvictionCounters = TIndexationCounters("Eviction"); - - const TCSCounters CSCounters; - TWritesMonitor WritesMonitor; - bool ProgressTxInFlight = false; + + std::optional ProgressTxInFlight; THashMap ScanTxInFlight; - THashMap LongTxWrites; + THashMap LongTxWrites; using TPartsForLTXShard = THashMap; THashMap LongTxWritesByUniqueId; TMultiMap WaitingScans; @@ -534,32 +483,29 @@ class TColumnShard TLimits Limits; NOlap::TNormalizationController NormalizerController; NDataShard::TSysLocks SysLocks; + static TDuration GetMaxReadStaleness(); void TryRegisterMediatorTimeCast(); void UnregisterMediatorTimeCast(); - void TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTable, THashSet&& writesToAbort); + void TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTable, THashSet&& writesToAbort); bool WaitPlanStep(ui64 step); void SendWaitPlanStep(ui64 step); void RescheduleWaitingReads(); NOlap::TSnapshot GetMaxReadVersion() const; - ui64 GetMinReadStep() const; + NOlap::TSnapshot GetMinReadSnapshot() const; ui64 GetOutdatedStep() const; TDuration GetTxCompleteLag() const { ui64 mediatorTime = MediatorTimeCastEntry ? MediatorTimeCastEntry->Get(TabletID()) : 0; return ProgressTxController->GetTxCompleteLag(mediatorTime); } - TWriteId HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const; - TWriteId GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId); - void AddLongTxWrite(TWriteId writeId, ui64 txId); - void LoadLongTxWrite(TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion); - bool RemoveLongTxWrite(NIceDb::TNiceDb& db, const TWriteId writeId, const ui64 txId); + TInsertWriteId HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const; + TInsertWriteId GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId); + void AddLongTxWrite(const TInsertWriteId writeId, ui64 txId); + void LoadLongTxWrite(const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion); + bool RemoveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui64 txId); - TWriteId BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc); - TWriteId BuildNextWriteId(NIceDb::TNiceDb& db); - - void EnqueueProgressTx(const TActorContext& ctx); void EnqueueBackgroundActivities(const bool periodic = false); virtual void Enqueue(STFUNC_SIG) override; @@ -573,7 +519,7 @@ class TColumnShard void RunDropTable(const NKikimrTxColumnShard::TDropTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); void RunAlterStore(const NKikimrTxColumnShard::TAlterStore& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); - void StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex); + void StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex); void SetupIndexation(); void SetupCompaction(); bool SetupTtl(const THashMap& pathTtls = {}); @@ -590,12 +536,14 @@ class TColumnShard void SendPeriodicStats(); void FillOlapStats(const TActorContext& ctx, std::unique_ptr& ev); void FillColumnTableStats(const TActorContext& ctx, std::unique_ptr& ev); - void ConfigureStats(const NOlap::TColumnEngineStats& indexStats, ::NKikimrTableStats::TTableStats* tabletStats); - void FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const; public: ui64 TabletTxCounter = 0; + bool HasLongTxWrites(const TInsertWriteId insertWriteId) const { + return LongTxWrites.contains(insertWriteId); + } + void EnqueueProgressTx(const TActorContext& ctx, const std::optional continueTxId); NOlap::TSnapshot GetLastTxSnapshot() const { return NOlap::TSnapshot(LastPlannedStep, LastPlannedTxId); } @@ -623,6 +571,11 @@ class TColumnShard return *ProgressTxController; } + TOperationsManager& GetOperationsManager() const { + AFL_VERIFY(OperationsManager); + return *OperationsManager; + } + bool HasIndex() const { return !!TablesManager.GetPrimaryIndex(); } diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index da31b1938e5a..cb0e8cd97150 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -46,6 +46,7 @@ struct TEvPrivate { EvExportSaveCursor, EvTaskProcessedResult, + EvPingSnapshotsUsage, EvEnd }; @@ -142,8 +143,9 @@ struct TEvPrivate { struct TEvReadFinished : public TEventLocal { explicit TEvReadFinished(ui64 requestCookie, ui64 txId = 0) - : RequestCookie(requestCookie), TxId(txId) - {} + : RequestCookie(requestCookie) + , TxId(txId) { + } ui64 RequestCookie; ui64 TxId; @@ -157,17 +159,39 @@ struct TEvPrivate { bool Manual; }; - class TEvWriteBlobsResult : public TEventLocal { + struct TEvPingSnapshotsUsage: public TEventLocal { + TEvPingSnapshotsUsage() = default; + }; + + class TEvWriteBlobsResult: public TEventLocal { + public: + enum EErrorClass { + Internal, + Request + }; private: NColumnShard::TBlobPutResult::TPtr PutResult; NOlap::TWritingBuffer WritesBuffer; YDB_READONLY_DEF(TString, ErrorMessage); + YDB_ACCESSOR(EErrorClass, ErrorClass, EErrorClass::Internal); + public: + + NKikimrDataEvents::TEvWriteResult::EStatus GetWriteResultStatus() const { + switch (ErrorClass) { + case EErrorClass::Internal: + return NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR; + case EErrorClass::Request: + return NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST; + } + } - static std::unique_ptr Error(const NKikimrProto::EReplyStatus status, NOlap::TWritingBuffer&& writesBuffer, const TString& error) { - std::unique_ptr result = std::make_unique(std::make_shared(status), - std::move(writesBuffer)); + static std::unique_ptr Error( + const NKikimrProto::EReplyStatus status, NOlap::TWritingBuffer&& writesBuffer, const TString& error, const EErrorClass errorClass) { + std::unique_ptr result = + std::make_unique(std::make_shared(status), std::move(writesBuffer)); result->ErrorMessage = error; + result->ErrorClass = errorClass; return result; } diff --git a/ydb/core/tx/columnshard/columnshard_schema.cpp b/ydb/core/tx/columnshard/columnshard_schema.cpp index 3d9dc8e7a9b0..c9a60029a892 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/columnshard_schema.cpp @@ -4,51 +4,24 @@ namespace NKikimr::NColumnShard { bool Schema::InsertTable_Load(NIceDb::TNiceDb& db, const IBlobGroupSelector* dsGroupSelector, NOlap::TInsertTableAccessor& insertTable, const TInstant& /*loadTime*/) { - auto rowset = db.Table().GreaterOrEqual(0, 0, 0, 0, "").Select(); + auto rowset = db.Table().Select(); if (!rowset.IsReady()) { return false; } while (!rowset.EndOfSet()) { - EInsertTableIds recType = (EInsertTableIds)rowset.GetValue(); - ui64 planStep = rowset.GetValue(); - ui64 writeTxId = rowset.GetValueOrDefault(); - ui64 pathId = rowset.GetValue(); - TString dedupId = rowset.GetValue(); - TString strBlobId = rowset.GetValue(); - TString metaStr = rowset.GetValue(); - ui64 schemaVersion = rowset.HaveValue() ? rowset.GetValue() : 0; + NOlap::TInsertTableRecordLoadContext constructor; + constructor.ParseFromDatabase(rowset); - TString error; - NOlap::TUnifiedBlobId blobId = NOlap::TUnifiedBlobId::ParseFromString(strBlobId, dsGroupSelector, error); - Y_ABORT_UNLESS(blobId.IsValid(), "Failied to parse blob id: %s", error.c_str()); - - NKikimrTxColumnShard::TLogicalMetadata meta; - if (metaStr) { - Y_ABORT_UNLESS(meta.ParseFromString(metaStr)); - } - - std::optional rangeOffset; - if (rowset.HaveValue()) { - rangeOffset = rowset.GetValue(); - } - std::optional rangeSize; - if (rowset.HaveValue()) { - rangeSize = rowset.GetValue(); - } - - AFL_VERIFY(!!rangeOffset == !!rangeSize); - TInsertedData data(planStep, writeTxId, pathId, dedupId, NOlap::TBlobRange(blobId, rangeOffset.value_or(0), rangeSize.value_or(blobId.BlobSize())), meta, schemaVersion, {}); - - switch (recType) { - case EInsertTableIds::Inserted: - insertTable.AddInserted(std::move(data), true); + switch (constructor.GetRecType()) { + case Schema::EInsertTableIds::Inserted: + insertTable.AddInserted(constructor.BuildInsertedOrAborted(dsGroupSelector), true); break; - case EInsertTableIds::Committed: - insertTable.AddCommitted(std::move(data), true); + case Schema::EInsertTableIds::Committed: + insertTable.AddCommitted(constructor.BuildCommitted(dsGroupSelector), true); break; - case EInsertTableIds::Aborted: - insertTable.AddAborted(std::move(data), true); + case Schema::EInsertTableIds::Aborted: + insertTable.AddAborted(constructor.BuildInsertedOrAborted(dsGroupSelector), true); break; } if (!rowset.Next()) { @@ -59,6 +32,7 @@ bool Schema::InsertTable_Load(NIceDb::TNiceDb& db, const IBlobGroupSelector* dsG } void Schema::SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TString& txBody) { + AFL_VERIFY(txInfo.TxKind != NKikimrTxColumnShard::TX_KIND_NONE); db.Table().Key(txInfo.TxId).Update( NIceDb::TUpdate(txInfo.TxKind), NIceDb::TUpdate(txBody), @@ -70,11 +44,14 @@ void Schema::SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TS } void Schema::UpdateTxInfoSource(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo) { - db.Table().Key(txInfo.GetTxId()).Update( - NIceDb::TUpdate(txInfo.Source), - NIceDb::TUpdate(txInfo.Cookie), - NIceDb::TUpdate(txInfo.SerializeSeqNoAsString()) - ); + db.Table() + .Key(txInfo.GetTxId()) + .Update(NIceDb::TUpdate(txInfo.Source), NIceDb::TUpdate(txInfo.Cookie), + NIceDb::TUpdate(txInfo.SerializeSeqNoAsString())); } +void Schema::UpdateTxInfoBody(NIceDb::TNiceDb& db, const ui64 txId, const TString& txBody) { + db.Table().Key(txId).Update(NIceDb::TUpdate(txBody)); } + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard_schema.h b/ydb/core/tx/columnshard/columnshard_schema.h index fd1da16940ef..8b104b9dcd58 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.h +++ b/ydb/core/tx/columnshard/columnshard_schema.h @@ -17,7 +17,7 @@ class TColumnChunkLoadContext; namespace NKikimr::NColumnShard { -using NOlap::TWriteId; +using NOlap::TInsertWriteId; using NOlap::IBlobGroupSelector; struct TFullTxInfo; @@ -31,6 +31,7 @@ struct Schema : NIceDb::Schema { using TSettings = SchemaSettings; using TInsertedData = NOlap::TInsertedData; + using TCommittedData = NOlap::TCommittedData; using TColumnRecord = NOlap::TColumnRecord; enum EIndexTables : ui32 { @@ -108,7 +109,11 @@ struct Schema : NIceDb::Schema { TableVersionInfo = 11, SmallBlobs = 12, OneToOneEvictedBlobs = 13, - BlobsToDeleteWT = 14 + BlobsToDeleteWT = 14, + InFlightSnapshots = 15, + TxDependencies = 16, + TxStates = 17, + TxEvents = 18 }; // Tablet tables @@ -250,6 +255,40 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; + struct InFlightSnapshots: Table<(ui32)ECommonTables::InFlightSnapshots> { + struct PlanStep: Column<1, NScheme::NTypeIds::Uint64> {}; + struct TxId: Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxDependencies: Table<(ui32)ECommonTables::TxDependencies> { + struct CommitTxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct BrokenTxId: Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxStates: Table<(ui32)ECommonTables::TxStates> { + struct TxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct Broken: Column<2, NScheme::NTypeIds::Bool> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxEvents: Table<(ui32)ECommonTables::TxEvents> { + struct TxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct GenerationId: Column<2, NScheme::NTypeIds::Uint64> {}; + struct GenerationInternalId: Column<3, NScheme::NTypeIds::Uint64> {}; + struct Data: Column<4, NScheme::NTypeIds::String> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + // Index tables // InsertTable - common for all indices @@ -545,7 +584,11 @@ struct Schema : NIceDb::Schema { BackgroundSessions, ShardingInfo, Normalizers, - NormalizerEvents + NormalizerEvents, + InFlightSnapshots, + TxDependencies, + TxStates, + TxEvents >; // @@ -661,8 +704,8 @@ struct Schema : NIceDb::Schema { static void SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TString& txBody); + static void UpdateTxInfoBody(NIceDb::TNiceDb& db, const ui64 txId, const TString& txBody); static void UpdateTxInfoSource(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo); - static void UpdateTxInfoSource(NIceDb::TNiceDb& db, ui64 txId, const TActorId& source, ui64 cookie) { db.Table().Key(txId).Update( NIceDb::TUpdate(source), @@ -742,7 +785,7 @@ struct Schema : NIceDb::Schema { db.Table().Key(pathId).Delete(); } - static void SaveLongTxWrite(NIceDb::TNiceDb& db, TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { + static void SaveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { NKikimrLongTxService::TLongTxId proto; longTxId.ToProto(&proto); TString serialized; @@ -754,32 +797,49 @@ struct Schema : NIceDb::Schema { ); } - static void EraseLongTxWrite(NIceDb::TNiceDb& db, TWriteId writeId) { + static void EraseLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId) { db.Table().Key((ui64)writeId).Delete(); } // InsertTable activities - static void InsertTable_Upsert(NIceDb::TNiceDb& db, EInsertTableIds recType, const TInsertedData& data) { - db.Table().Key((ui8)recType, data.PlanStep, data.WriteTxId, data.PathId, data.DedupId).Update( - NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), - NIceDb::TUpdate(data.GetBlobRange().Offset), - NIceDb::TUpdate(data.GetBlobRange().Size), - NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), - NIceDb::TUpdate(data.GetSchemaVersion()) - ); + static void InsertTable_Upsert(NIceDb::TNiceDb& db, const EInsertTableIds recType, const TInsertedData& data) { + db.Table() + .Key((ui8)recType, 0, (ui64)data.GetInsertWriteId(), data.GetPathId(), "") + .Update(NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), + NIceDb::TUpdate(data.GetBlobRange().Offset), + NIceDb::TUpdate(data.GetBlobRange().Size), + NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), + NIceDb::TUpdate(data.GetSchemaVersion())); + } + + static void InsertTable_Upsert(NIceDb::TNiceDb& db, const TCommittedData& data) { + db.Table() + .Key((ui8)EInsertTableIds::Committed, data.GetSnapshot().GetPlanStep(), data.GetSnapshot().GetTxId(), data.GetPathId(), + data.GetDedupId()) + .Update(NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), + NIceDb::TUpdate(data.GetBlobRange().Offset), + NIceDb::TUpdate(data.GetBlobRange().Size), + NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), + NIceDb::TUpdate(data.GetSchemaVersion())); } static void InsertTable_Erase(NIceDb::TNiceDb& db, EInsertTableIds recType, const TInsertedData& data) { - db.Table().Key((ui8)recType, data.PlanStep, data.WriteTxId, data.PathId, data.DedupId).Delete(); + db.Table().Key((ui8)recType, 0, (ui64)data.GetInsertWriteId(), data.GetPathId(), "").Delete(); + } + + static void InsertTable_Erase(NIceDb::TNiceDb& db, const TCommittedData& data) { + db.Table() + .Key((ui8)EInsertTableIds::Committed, data.GetSnapshot().GetPlanStep(), data.GetSnapshot().GetTxId(), data.GetPathId(), data.GetDedupId()) + .Delete(); } static void InsertTable_Insert(NIceDb::TNiceDb& db, const TInsertedData& data) { InsertTable_Upsert(db, EInsertTableIds::Inserted, data); } - static void InsertTable_Commit(NIceDb::TNiceDb& db, const TInsertedData& data) { - InsertTable_Upsert(db, EInsertTableIds::Committed, data); + static void InsertTable_Commit(NIceDb::TNiceDb& db, const TCommittedData& data) { + InsertTable_Upsert(db, data); } static void InsertTable_Abort(NIceDb::TNiceDb& db, const TInsertedData& data) { @@ -790,8 +850,8 @@ struct Schema : NIceDb::Schema { InsertTable_Erase(db, EInsertTableIds::Inserted, data); } - static void InsertTable_EraseCommitted(NIceDb::TNiceDb& db, const TInsertedData& data) { - InsertTable_Erase(db, EInsertTableIds::Committed, data); + static void InsertTable_EraseCommitted(NIceDb::TNiceDb& db, const TCommittedData& data) { + InsertTable_Erase(db, data); } static void InsertTable_EraseAborted(NIceDb::TNiceDb& db, const TInsertedData& data) { @@ -917,4 +977,124 @@ class TIndexChunkLoadContext { } }; -} +class TInsertTableRecordLoadContext { +private: + NColumnShard::Schema::EInsertTableIds RecType; + ui64 PlanStep; + ui64 WriteTxId; + ui64 PathId; + YDB_ACCESSOR_DEF(TString, DedupId); + ui64 SchemaVersion; + TString BlobIdString; + std::optional BlobId; + TString MetadataString; + std::optional Metadata; + std::optional RangeOffset; + std::optional RangeSize; + + void Prepare(const IBlobGroupSelector* dsGroupSelector) { + AFL_VERIFY(!PreparedFlag); + PreparedFlag = true; + TString error; + NOlap::TUnifiedBlobId blobId = NOlap::TUnifiedBlobId::ParseFromString(BlobIdString, dsGroupSelector, error); + Y_ABORT_UNLESS(blobId.IsValid(), "Failied to parse blob id: %s", error.c_str()); + BlobId = blobId; + + NKikimrTxColumnShard::TLogicalMetadata meta; + AFL_VERIFY(MetadataString); + Y_ABORT_UNLESS(meta.ParseFromString(MetadataString)); + Metadata = std::move(meta); + AFL_VERIFY(!!RangeOffset == !!RangeSize); + } + + bool PreparedFlag = false; + bool ParsedFlag = false; + +public: + TInsertWriteId GetInsertWriteId() const { + AFL_VERIFY(ParsedFlag); + AFL_VERIFY(RecType != NColumnShard::Schema::EInsertTableIds::Committed); + return (TInsertWriteId)WriteTxId; + } + + NColumnShard::Schema::EInsertTableIds GetRecType() const { + AFL_VERIFY(ParsedFlag); + return RecType; + } + + ui64 GetPlanStep() const { + AFL_VERIFY(ParsedFlag); + return PlanStep; + } + + void Remove(NIceDb::TNiceDb& db) const { + AFL_VERIFY(ParsedFlag); + db.Table().Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId).Delete(); + } + + void Upsert(NIceDb::TNiceDb& db) const { + AFL_VERIFY(ParsedFlag); + using namespace NColumnShard; + if (RangeOffset) { + db.Table() + .Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId) + .Update(NIceDb::TUpdate(BlobIdString), + NIceDb::TUpdate(*RangeOffset), + NIceDb::TUpdate(*RangeSize), NIceDb::TUpdate(MetadataString), + NIceDb::TUpdate(SchemaVersion)); + } else { + db.Table() + .Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId) + .Update(NIceDb::TUpdate(BlobIdString), NIceDb::TUpdate(MetadataString), + NIceDb::TUpdate(SchemaVersion)); + } + } + + template + void ParseFromDatabase(TRowset& rowset) { + AFL_VERIFY(!ParsedFlag)("problem", "duplication parsing"); + ParsedFlag = true; + using namespace NColumnShard; + RecType = (Schema::EInsertTableIds)rowset.template GetValue(); + PlanStep = rowset.template GetValue(); + WriteTxId = rowset.template GetValueOrDefault(); + AFL_VERIFY(WriteTxId); + + PathId = rowset.template GetValue(); + DedupId = rowset.template GetValue(); + SchemaVersion = + rowset.template HaveValue() ? rowset.template GetValue() : 0; + BlobIdString = rowset.template GetValue(); + MetadataString = rowset.template GetValue(); + if (rowset.template HaveValue()) { + RangeOffset = rowset.template GetValue(); + } + if (rowset.template HaveValue()) { + RangeSize = rowset.template GetValue(); + } + } + + NOlap::TCommittedData BuildCommitted(const IBlobGroupSelector* dsGroupSelector) { + Prepare(dsGroupSelector); + using namespace NColumnShard; + AFL_VERIFY(RecType == Schema::EInsertTableIds::Committed); + auto userData = std::make_shared(PathId, + NOlap::TBlobRange(*BlobId, RangeOffset.value_or(0), RangeSize.value_or(BlobId->BlobSize())), *Metadata, SchemaVersion, std::nullopt); + AFL_VERIFY(!!DedupId); + AFL_VERIFY(PlanStep); + return NOlap::TCommittedData(userData, PlanStep, WriteTxId, DedupId); + } + + NOlap::TInsertedData BuildInsertedOrAborted(const IBlobGroupSelector* dsGroupSelector) { + Prepare(dsGroupSelector); + using namespace NColumnShard; + AFL_VERIFY(RecType != Schema::EInsertTableIds::Committed); + auto userData = std::make_shared(PathId, + NOlap::TBlobRange(*BlobId, RangeOffset.value_or(0), RangeSize.value_or(BlobId->BlobSize())), *Metadata, SchemaVersion, std::nullopt); + AFL_VERIFY(!DedupId); + AFL_VERIFY(!PlanStep); + return NOlap::TInsertedData((TInsertWriteId)WriteTxId, userData); + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/common/limits.h b/ydb/core/tx/columnshard/common/limits.h index 796ffef309ac..b30432dfb2fd 100644 --- a/ydb/core/tx/columnshard/common/limits.h +++ b/ydb/core/tx/columnshard/common/limits.h @@ -9,5 +9,10 @@ class TGlobalLimits { static constexpr inline ui64 InsertCompactionMemoryLimit = 1ULL << 30; static constexpr inline ui64 GeneralCompactionMemoryLimit = 3ULL << 30; static constexpr inline ui64 ScanMemoryLimit = 3ULL << 30; + + static constexpr inline ui64 DefaultBlobsMemoryIntervalLimit = ScanMemoryLimit; + static constexpr inline ui64 DefaultRejectMemoryIntervalLimit = ScanMemoryLimit; + static constexpr inline ui64 DefaultReduceMemoryIntervalLimit = 0.8 * ScanMemoryLimit; + static constexpr inline ui64 DefaultReadSequentiallyBufferSize = ((ui64)8) << 20; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/common/snapshot.cpp b/ydb/core/tx/columnshard/common/snapshot.cpp index 6ca80d818986..eb6e62ccac0c 100644 --- a/ydb/core/tx/columnshard/common/snapshot.cpp +++ b/ydb/core/tx/columnshard/common/snapshot.cpp @@ -35,4 +35,12 @@ TString TSnapshot::SerializeToString() const { return SerializeToProto().SerializeAsString(); } +NKikimr::NOlap::TSnapshot TSnapshot::MaxForPlanStep(const ui64 planStep) noexcept { + return TSnapshot(planStep, ::Max()); +} + +NKikimr::NOlap::TSnapshot TSnapshot::MaxForPlanInstant(const TInstant planInstant) noexcept { + return TSnapshot(planInstant.MilliSeconds(), ::Max()); +} + }; diff --git a/ydb/core/tx/columnshard/common/snapshot.h b/ydb/core/tx/columnshard/common/snapshot.h index ffa48670371e..4bc99d268420 100644 --- a/ydb/core/tx/columnshard/common/snapshot.h +++ b/ydb/core/tx/columnshard/common/snapshot.h @@ -54,6 +54,10 @@ class TSnapshot { return TSnapshot(-1ll, -1ll); } + static TSnapshot MaxForPlanInstant(const TInstant planInstant) noexcept; + + static TSnapshot MaxForPlanStep(const ui64 planStep) noexcept; + constexpr bool operator==(const TSnapshot&) const noexcept = default; constexpr auto operator<=>(const TSnapshot&) const noexcept = default; diff --git a/ydb/core/tx/columnshard/common/tests/shard_reader.cpp b/ydb/core/tx/columnshard/common/tests/shard_reader.cpp deleted file mode 100644 index 2789a63e38a8..000000000000 --- a/ydb/core/tx/columnshard/common/tests/shard_reader.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "shard_reader.h" - -namespace NKikimr::NOlap::NTests { -} diff --git a/ydb/core/tx/columnshard/common/tests/ya.make b/ydb/core/tx/columnshard/common/tests/ya.make deleted file mode 100644 index 8b3c3809006b..000000000000 --- a/ydb/core/tx/columnshard/common/tests/ya.make +++ /dev/null @@ -1,14 +0,0 @@ -LIBRARY() - -SRCS( - shard_reader.cpp -) - -PEERDIR( - ydb/core/formats/arrow/protos - contrib/libs/apache/arrow - ydb/core/formats/arrow - ydb/core/kqp/compute_actor -) - -END() diff --git a/ydb/core/tx/columnshard/common/ya.make b/ydb/core/tx/columnshard/common/ya.make index 87bd2c16b26b..300691ed711e 100644 --- a/ydb/core/tx/columnshard/common/ya.make +++ b/ydb/core/tx/columnshard/common/ya.make @@ -1,7 +1,7 @@ LIBRARY() SRCS( - limits.h + limits.cpp reverse_accessor.cpp scalars.cpp snapshot.cpp diff --git a/ydb/core/tx/columnshard/counters/aggregation/table_stats.h b/ydb/core/tx/columnshard/counters/aggregation/table_stats.h new file mode 100644 index 000000000000..68f39a4191de --- /dev/null +++ b/ydb/core/tx/columnshard/counters/aggregation/table_stats.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TTableStatsBuilder { +private: + TCountersManager& Counters; + const NTabletFlatExecutor::NFlatExecutorSetup::IExecutor& Executor; + NOlap::IColumnEngine& ColumnEngine; + +public: + TTableStatsBuilder( + TCountersManager& counters, const NTabletFlatExecutor::NFlatExecutorSetup::IExecutor* executor, NOlap::IColumnEngine& columnEngine) + : Counters(counters) + , Executor(*executor) + , ColumnEngine(columnEngine) { + } + + void FillTableStats(ui64 pathId, ::NKikimrTableStats::TTableStats& tableStats) { + Counters.FillTableStats(pathId, tableStats); + + auto columnEngineStats = ColumnEngine.GetStats().FindPtr(pathId); + if (columnEngineStats && *columnEngineStats) { + auto activeStats = (*columnEngineStats)->Active(); + tableStats.SetRowCount(activeStats.Rows); + tableStats.SetDataSize(activeStats.Bytes); + } + } + + void FillTotalTableStats(::NKikimrTableStats::TTableStats& tableStats) { + Counters.FillTotalTableStats(tableStats); + + tableStats.SetInFlightTxCount(Executor.GetStats().TxInFly); + tableStats.SetHasLoanedParts(Executor.HasLoanedParts()); + + auto activeStats = ColumnEngine.GetTotalStats().Active(); + tableStats.SetRowCount(activeStats.Rows); + tableStats.SetDataSize(activeStats.Bytes); + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/aggregation/ya.make b/ydb/core/tx/columnshard/counters/aggregation/ya.make new file mode 100644 index 000000000000..95687733d093 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/aggregation/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +SRCS() + +PEERDIR( + ydb/core/protos + ydb/core/base +) + +END() diff --git a/ydb/core/tx/columnshard/counters/background_controller.cpp b/ydb/core/tx/columnshard/counters/background_controller.cpp new file mode 100644 index 000000000000..fcc89f6ee940 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/background_controller.cpp @@ -0,0 +1,18 @@ +#include "background_controller.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +void TBackgroundControllerCounters::OnCompactionFinish(ui64 pathId) { + TInstant now = TAppData::TimeProvider->Now(); + TInstant& lastFinish = LastCompactionFinishByPathId[pathId]; + lastFinish = std::max(lastFinish, now); + + if (LastCompactionFinish < now) { + LastCompactionFinish = now; + } +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/background_controller.h b/ydb/core/tx/columnshard/counters/background_controller.h new file mode 100644 index 000000000000..d46d7cdacf91 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/background_controller.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TBackgroundControllerCounters { +private: + THashMap LastCompactionFinishByPathId; + TInstant LastCompactionFinish; + +public: + void OnCompactionFinish(ui64 pathId); + + void FillStats(ui64 pathId, ::NKikimrTableStats::TTableStats& output) const { + output.SetLastFullCompactionTs(GetLastCompactionFinishInstant(pathId).value_or(TInstant::Zero()).Seconds()); + } + + void FillTotalStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastFullCompactionTs(LastCompactionFinish.Seconds()); + } + +private: + std::optional GetLastCompactionFinishInstant(const ui64 pathId) const { + auto findInstant = LastCompactionFinishByPathId.FindPtr(pathId); + if (!findInstant) { + return std::nullopt; + } + return *findInstant; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/column_tables.cpp b/ydb/core/tx/columnshard/counters/column_tables.cpp new file mode 100644 index 000000000000..51b9ecf6283e --- /dev/null +++ b/ydb/core/tx/columnshard/counters/column_tables.cpp @@ -0,0 +1,13 @@ +#include "column_tables.h" + +namespace NKikimr::NColumnShard { + +std::shared_ptr TColumnTablesCounters::GetPathIdCounter(ui64 pathId) { + auto findCounter = PathIdCounters.FindPtr(pathId); + if (findCounter) { + return *findCounter; + } + return PathIdCounters.emplace(pathId, std::make_shared(*this)).first->second; +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/column_tables.h b/ydb/core/tx/columnshard/counters/column_tables.h new file mode 100644 index 000000000000..db00069218fb --- /dev/null +++ b/ydb/core/tx/columnshard/counters/column_tables.h @@ -0,0 +1,85 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TSingleColumnTableCounters; + +class TColumnTablesCounters { +private: + YDB_READONLY_CONST(std::shared_ptr, LastAccessTime); + YDB_READONLY_CONST(std::shared_ptr, LastUpdateTime); + + THashMap> PathIdCounters; + + friend class TSingleColumnTableCounters; + +public: + TColumnTablesCounters() + : LastAccessTime(std::make_shared()) + , LastUpdateTime(std::make_shared()) { + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastAccessTime(LastAccessTime->MilliSeconds()); + output.SetLastUpdateTime(LastUpdateTime->MilliSeconds()); + } + + std::shared_ptr GetPathIdCounter(ui64 pathId); +}; + +class TSingleColumnTableCounters { +private: + YDB_READONLY(TInstant, PathIdLastAccessTime, TInstant::Zero()); + YDB_READONLY(TInstant, PathIdLastUpdateTime, TInstant::Zero()); + + const std::shared_ptr TotalLastAccessTime; + const std::shared_ptr TotalLastUpdateTime; + +public: + TSingleColumnTableCounters(TColumnTablesCounters& owner) + : TotalLastAccessTime(owner.LastAccessTime) + , TotalLastUpdateTime(owner.LastUpdateTime) { + } + + void OnReadEvent() { + UpdateLastAccessTime(TAppData::TimeProvider->Now()); + } + + void OnWriteEvent() { + TInstant now = TAppData::TimeProvider->Now(); + UpdateLastUpdateTime(now); + UpdateLastAccessTime(now); + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastAccessTime(PathIdLastAccessTime.MilliSeconds()); + output.SetLastUpdateTime(PathIdLastUpdateTime.MilliSeconds()); + } + +private: + void UpdateLastAccessTime(TInstant value) { + if (PathIdLastAccessTime < value) { + PathIdLastAccessTime = value; + } + if (*TotalLastAccessTime < value) { + *TotalLastAccessTime = value; + } + } + + void UpdateLastUpdateTime(TInstant value) { + if (PathIdLastUpdateTime < value) { + PathIdLastUpdateTime = value; + } + if (*TotalLastUpdateTime < value) { + *TotalLastUpdateTime = value; + } + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/columnshard.cpp b/ydb/core/tx/columnshard/counters/columnshard.cpp index 64c7e2931c5f..ad7962a5f4e7 100644 --- a/ydb/core/tx/columnshard/counters/columnshard.cpp +++ b/ydb/core/tx/columnshard/counters/columnshard.cpp @@ -8,7 +8,8 @@ namespace NKikimr::NColumnShard { TCSCounters::TCSCounters() : TBase("CS") -{ + , Initialization(*this) + , TxProgress(*this) { StartBackgroundCount = TBase::GetDeriviative("StartBackground/Count"); TooEarlyBackgroundCount = TBase::GetDeriviative("TooEarlyBackground/Count"); SetupCompactionCount = TBase::GetDeriviative("SetupCompaction/Count"); @@ -52,6 +53,7 @@ TCSCounters::TCSCounters() HistogramSuccessWriteMiddle6PutBlobsDurationMs = TBase::GetHistogram("SuccessWriteMiddle6PutBlobsDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); HistogramFailedWritePutBlobsDurationMs = TBase::GetHistogram("FailedWritePutBlobsDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); HistogramWriteTxCompleteDurationMs = TBase::GetHistogram("WriteTxCompleteDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); + WritePutBlobsCount = TBase::GetValue("WritePutBlobs"); WriteRequests = TBase::GetValue("WriteRequests"); diff --git a/ydb/core/tx/columnshard/counters/columnshard.h b/ydb/core/tx/columnshard/counters/columnshard.h index 6bada377df17..081adce35687 100644 --- a/ydb/core/tx/columnshard/counters/columnshard.h +++ b/ydb/core/tx/columnshard/counters/columnshard.h @@ -1,8 +1,11 @@ #pragma once #include "common/owner.h" +#include "initialization.h" +#include "tx_progress.h" -#include +#include +#include #include namespace NKikimr::NColumnShard { @@ -13,7 +16,9 @@ enum class EWriteFailReason { LongTxDuplication /* "long_tx_duplication" */, NoTable /* "no_table" */, IncorrectSchema /* "incorrect_schema" */, - Overload /* "overload" */ + Overload /* "overload" */, + OverlimitReadRawMemory /* "overlimit_read_raw_memory" */, + OverlimitReadBlobMemory /* "overlimit_read_blob_memory" */ }; class TCSCounters: public TCommonCountersOwner { @@ -62,11 +67,15 @@ class TCSCounters: public TCommonCountersOwner { NMonitoring::THistogramPtr HistogramSuccessWriteMiddle6PutBlobsDurationMs; NMonitoring::THistogramPtr HistogramFailedWritePutBlobsDurationMs; NMonitoring::THistogramPtr HistogramWriteTxCompleteDurationMs; + NMonitoring::TDynamicCounters::TCounterPtr WritePutBlobsCount; NMonitoring::TDynamicCounters::TCounterPtr WriteRequests; THashMap FailedWriteRequests; NMonitoring::TDynamicCounters::TCounterPtr SuccessWriteRequests; public: + const TCSInitialization Initialization; + TTxProgressCounters TxProgress; + void OnStartWriteRequest() const { WriteRequests->Add(1); } @@ -130,27 +139,27 @@ class TCSCounters: public TCommonCountersOwner { SplitCompactionGranulePortionsCount->SetValue(portionsCount); } - void OnOverloadInsertTable(const ui64 size) const { + void OnWriteOverloadInsertTable(const ui64 size) const { OverloadInsertTableBytes->Add(size); OverloadInsertTableCount->Add(1); } - void OnOverloadMetadata(const ui64 size) const { + void OnWriteOverloadMetadata(const ui64 size) const { OverloadMetadataBytes->Add(size); OverloadMetadataCount->Add(1); } - void OnOverloadShardTx(const ui64 size) const { + void OnWriteOverloadShardTx(const ui64 size) const { OverloadShardTxBytes->Add(size); OverloadShardTxCount->Add(1); } - void OnOverloadShardWrites(const ui64 size) const { + void OnWriteOverloadShardWrites(const ui64 size) const { OverloadShardWritesBytes->Add(size); OverloadShardWritesCount->Add(1); } - void OnOverloadShardWritesSize(const ui64 size) const { + void OnWriteOverloadShardWritesSize(const ui64 size) const { OverloadShardWritesSizeBytes->Add(size); OverloadShardWritesSizeCount->Add(1); } diff --git a/ydb/core/tx/columnshard/counters/counters_manager.cpp b/ydb/core/tx/columnshard/counters/counters_manager.cpp new file mode 100644 index 000000000000..11b18888bc31 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/counters_manager.cpp @@ -0,0 +1,5 @@ +#include "counters_manager.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/counters_manager.h b/ydb/core/tx/columnshard/counters/counters_manager.h new file mode 100644 index 000000000000..17336ca3410d --- /dev/null +++ b/ydb/core/tx/columnshard/counters/counters_manager.h @@ -0,0 +1,97 @@ +#pragma once + +#include "background_controller.h" +#include "column_tables.h" +#include "columnshard.h" +#include "indexation.h" +#include "req_tracer.h" +#include "scan.h" +#include "tablet_counters.h" +#include "writes_monitor.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NColumnShard { + +class TCountersManager { +private: + YDB_READONLY_DEF(std::shared_ptr, TabletCounters); + YDB_READONLY_DEF(std::shared_ptr, WritesMonitor); + + YDB_READONLY_DEF(std::shared_ptr, BackgroundControllerCounters); + YDB_READONLY_DEF(std::shared_ptr, ColumnTablesCounters); + + YDB_READONLY(TCSCounters, CSCounters, TCSCounters()); + YDB_READONLY(TIndexationCounters, EvictionCounters, TIndexationCounters("Eviction")); + YDB_READONLY(TIndexationCounters, IndexationCounters, TIndexationCounters("Indexation")); + YDB_READONLY(TIndexationCounters, CompactionCounters, TIndexationCounters("GeneralCompaction")); + YDB_READONLY(TScanCounters, ScanCounters, TScanCounters("Scan")); + YDB_READONLY_DEF(std::shared_ptr, RequestsTracingCounters); + YDB_READONLY_DEF(std::shared_ptr, SubscribeCounters); + +public: + TCountersManager(TTabletCountersBase& tabletCounters) + : TabletCounters(std::make_shared(tabletCounters)) + , WritesMonitor(std::make_shared(tabletCounters)) + , BackgroundControllerCounters(std::make_shared()) + , ColumnTablesCounters(std::make_shared()) + , RequestsTracingCounters(std::make_shared()) + , SubscribeCounters(std::make_shared()) { + } + + void OnWriteOverloadDisk() const { + TabletCounters->IncCounter(COUNTER_OUT_OF_SPACE); + } + + void OnWriteOverloadInsertTable(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadInsertTable(size); + } + + void OnWriteOverloadMetadata(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadMetadata(size); + } + + void OnWriteOverloadShardTx(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardTx(size); + } + + void OnWriteOverloadShardWrites(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardWrites(size); + } + + void OnWriteOverloadShardWritesSize(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardWritesSize(size); + } + + void FillTableStats(ui64 pathId, ::NKikimrTableStats::TTableStats& tableStats) { + ColumnTablesCounters->GetPathIdCounter(pathId)->FillStats(tableStats); + BackgroundControllerCounters->FillStats(pathId, tableStats); + } + + void FillTotalTableStats(::NKikimrTableStats::TTableStats& tableStats) { + ColumnTablesCounters->FillStats(tableStats); + TabletCounters->FillStats(tableStats); + BackgroundControllerCounters->FillTotalStats(tableStats); + ScanCounters.FillStats(tableStats); + } + + void OnWritePutBlobsSuccess(const TDuration d, const ui64 rowsWritten) const { + TabletCounters->OnWritePutBlobsSuccess(rowsWritten); + CSCounters.OnWritePutBlobsSuccess(d); + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/engine_logs.h b/ydb/core/tx/columnshard/counters/engine_logs.h index 97a4716652c3..2cbaf7fa234e 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.h +++ b/ydb/core/tx/columnshard/counters/engine_logs.h @@ -85,17 +85,44 @@ class TAgentDataClassCounters: public TCommonCountersOwner { } }; +class TIntervalMemoryCounters { +public: + const std::shared_ptr MinReadBytes; + TIntervalMemoryCounters(const std::shared_ptr& minReadBytes) + : MinReadBytes(minReadBytes) + { + + } +}; + +class TPortionsIndexCounters { +public: + const TIntervalMemoryCounters RawBytes; + const TIntervalMemoryCounters BlobBytes; + TPortionsIndexCounters(TIntervalMemoryCounters&& rawBytes, TIntervalMemoryCounters&& blobBytes) + : RawBytes(std::move(rawBytes)) + , BlobBytes(std::move(blobBytes)) { + } +}; + class TGranuleDataCounters { private: const TDataClassCounters InsertedData; const TDataClassCounters CompactedData; const TDataClassCounters FullData; + const TPortionsIndexCounters PortionsIndexCounters; + public: - TGranuleDataCounters(const TDataClassCounters& insertedData, const TDataClassCounters& compactedData, const TDataClassCounters& fullData) + const TPortionsIndexCounters& GetPortionsIndexCounters() const { + return PortionsIndexCounters; + } + + TGranuleDataCounters(const TDataClassCounters& insertedData, const TDataClassCounters& compactedData, const TDataClassCounters& fullData, + TPortionsIndexCounters&& portionsIndexCounters) : InsertedData(insertedData) , CompactedData(compactedData) , FullData(fullData) - { + , PortionsIndexCounters(std::move(portionsIndexCounters)) { } void OnPortionsDataRefresh(const TBaseGranuleDataClassSummary& inserted, const TBaseGranuleDataClassSummary& compacted) const { @@ -105,20 +132,60 @@ class TGranuleDataCounters { } }; +class TIntervalMemoryAgentCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + const std::shared_ptr ReadBytes; +public: + TIntervalMemoryAgentCounters(const TCommonCountersOwner& base, const TString& memoryType) + : TBase(base, "memory", memoryType) + , ReadBytes(TBase::GetValueAutoAggregations("Bytes")) { + } + + TIntervalMemoryCounters GetClient() const { + return TIntervalMemoryCounters(ReadBytes->GetClient()); + } +}; + +class TPortionsIndexAgentsCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + TIntervalMemoryAgentCounters ReadRawBytes; + TIntervalMemoryAgentCounters ReadBlobBytes; + +public: + + TPortionsIndexAgentsCounters(const TString& baseName) + : TBase(baseName) + , ReadRawBytes(TBase::CreateSubGroup("control", "read_memory"), "raw") + , ReadBlobBytes(TBase::CreateSubGroup("control", "read_memory"), "blob") + { + } + + TPortionsIndexCounters BuildCounters() const { + return TPortionsIndexCounters(ReadRawBytes.GetClient(), ReadBlobBytes.GetClient()); + } +}; + class TAgentGranuleDataCounters { private: TAgentDataClassCounters InsertedData; TAgentDataClassCounters CompactedData; TAgentDataClassCounters FullData; + TPortionsIndexAgentsCounters PortionsIndex; + public: TAgentGranuleDataCounters(const TString& ownerId) : InsertedData(ownerId, "ByGranule/Inserted") , CompactedData(ownerId, "ByGranule/Compacted") - , FullData(ownerId, "ByGranule/Full") { + , FullData(ownerId, "ByGranule/Full") + , PortionsIndex("ByGranule/PortionsIndex") + { } TGranuleDataCounters RegisterClient() const { - return TGranuleDataCounters(InsertedData.RegisterClient(), CompactedData.RegisterClient(), FullData.RegisterClient()); + return TGranuleDataCounters( + InsertedData.RegisterClient(), CompactedData.RegisterClient(), FullData.RegisterClient(), PortionsIndex.BuildCounters()); } }; diff --git a/ydb/core/tx/columnshard/counters/initialization.h b/ydb/core/tx/columnshard/counters/initialization.h new file mode 100644 index 000000000000..2a6b432d6135 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/initialization.h @@ -0,0 +1,56 @@ +#pragma once +#include "common/owner.h" + +#include + +namespace NKikimr::NColumnShard { + +class TCSInitialization: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + + const NMonitoring::THistogramPtr HistogramTabletInitializationMs; + const NMonitoring::THistogramPtr HistogramTxInitDurationMs; + const NMonitoring::THistogramPtr HistogramTxUpdateSchemaDurationMs; + const NMonitoring::THistogramPtr HistogramTxInitSchemaDurationMs; + const NMonitoring::THistogramPtr HistogramActivateExecutorFromActivationDurationMs; + const NMonitoring::THistogramPtr HistogramSwitchToWorkFromActivationDurationMs; + const NMonitoring::THistogramPtr HistogramSwitchToWorkFromCreateDurationMs; + +public: + void OnTxInitFinished(const TDuration d) const { + HistogramTxInitDurationMs->Collect(d.MilliSeconds()); + } + + void OnTxUpdateSchemaFinished(const TDuration d) const { + HistogramTxUpdateSchemaDurationMs->Collect(d.MilliSeconds()); + } + + void OnTxInitSchemaFinished(const TDuration d) const { + HistogramTxInitSchemaDurationMs->Collect(d.MilliSeconds()); + } + + void OnActivateExecutor(const TDuration fromCreate) const { + HistogramActivateExecutorFromActivationDurationMs->Collect(fromCreate.MilliSeconds()); + } + void OnSwitchToWork(const TDuration fromStart, const TDuration fromCreate) const { + HistogramSwitchToWorkFromActivationDurationMs->Collect(fromStart.MilliSeconds()); + HistogramSwitchToWorkFromCreateDurationMs->Collect(fromCreate.MilliSeconds()); + } + + TCSInitialization(TCommonCountersOwner& owner) + : TBase(owner, "stage", "initialization") + , HistogramTabletInitializationMs(TBase::GetHistogram("TabletInitializationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxInitDurationMs(TBase::GetHistogram("TxInitDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxUpdateSchemaDurationMs(TBase::GetHistogram("TxInitDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxInitSchemaDurationMs(TBase::GetHistogram("TxInitSchemaDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramActivateExecutorFromActivationDurationMs( + TBase::GetHistogram("ActivateExecutorFromActivationDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramSwitchToWorkFromActivationDurationMs( + TBase::GetHistogram("SwitchToWorkFromActivationDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramSwitchToWorkFromCreateDurationMs( + TBase::GetHistogram("SwitchToWorkFromCreateDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) { + } +}; + +} diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write.cpp b/ydb/core/tx/columnshard/counters/req_tracer.cpp similarity index 59% rename from ydb/core/tx/columnshard/transactions/operators/ev_write.cpp rename to ydb/core/tx/columnshard/counters/req_tracer.cpp index 3aa27316237f..e40342f21db1 100644 --- a/ydb/core/tx/columnshard/transactions/operators/ev_write.cpp +++ b/ydb/core/tx/columnshard/counters/req_tracer.cpp @@ -1,4 +1,5 @@ -#include "ev_write.h" +#include "req_tracer.h" namespace NKikimr::NColumnShard { + } diff --git a/ydb/core/tx/columnshard/counters/req_tracer.h b/ydb/core/tx/columnshard/counters/req_tracer.h new file mode 100644 index 000000000000..f70cd02e4840 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/req_tracer.h @@ -0,0 +1,51 @@ +#pragma once +#include "common/owner.h" +#include + +namespace NKikimr::NColumnShard { + +class TRequestsTracerCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr RequestedMinSnapshotAge; + NMonitoring::TDynamicCounters::TCounterPtr DefaultMinSnapshotAge; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotsCount; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotLock; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotUnlock; + +public: + + TRequestsTracerCounters() + : TBase("cs_requests_tracing") + , RequestedMinSnapshotAge(TBase::GetValue("Snapshots/RequestedAge/Seconds")) + , DefaultMinSnapshotAge(TBase::GetValue("Snapshots/DefaultAge/Seconds")) + , SnapshotsCount(TBase::GetValue("Snapshots/Count")) + , SnapshotLock(TBase::GetDeriviative("Snapshots/Lock")) + , SnapshotUnlock(TBase::GetDeriviative("Snapshots/Unlock")) + { + + } + + void OnDefaultMinSnapshotInstant(const TInstant instant) const { + DefaultMinSnapshotAge->Set((TInstant::Now() - instant).Seconds()); + } + + void OnSnapshotsInfo(const ui32 count, const std::optional snapshotPlanStep) const { + if (snapshotPlanStep) { + RequestedMinSnapshotAge->Set((TInstant::Now() - snapshotPlanStep->GetPlanInstant()).Seconds()); + } else { + RequestedMinSnapshotAge->Set(0); + } + SnapshotsCount->Set(count); + + } + + void OnSnapshotLocked() const { + SnapshotLock->Add(1); + } + void OnSnapshotUnlocked() const { + SnapshotUnlock->Add(1); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/scan.cpp b/ydb/core/tx/columnshard/counters/scan.cpp index 075aa0e880ec..cdfd42aa9bc4 100644 --- a/ydb/core/tx/columnshard/counters/scan.cpp +++ b/ydb/core/tx/columnshard/counters/scan.cpp @@ -88,12 +88,14 @@ TScanCounters::TScanCounters(const TString& module) ScanIntervalState = std::make_shared(*this); ResourcesSubscriberCounters = std::make_shared(); ScanDurationByStatus.resize((ui32)EStatusFinish::COUNT); + ScansFinishedByStatus.resize((ui32)EStatusFinish::COUNT); ui32 idx = 0; for (auto&& i : GetEnumAllValues()) { if (i == EStatusFinish::COUNT) { continue; } ScanDurationByStatus[(ui32)i] = TBase::GetHistogram("ScanDuration/" + ::ToString(i) + "/Milliseconds", NMonitoring::ExponentialHistogram(18, 2, 1)); + ScansFinishedByStatus[(ui32)i] = TBase::GetDeriviative("ScansFinished/" + ::ToString(i)); AFL_VERIFY(idx == (ui32)i); ++idx; } @@ -103,4 +105,8 @@ NKikimr::NColumnShard::TScanAggregations TScanCounters::BuildAggregations() { return TScanAggregations(GetModuleId()); } +void TScanCounters::FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetRangeReads(ScansFinishedByStatus[(ui32)EStatusFinish::Success]->Val()); +} + } diff --git a/ydb/core/tx/columnshard/counters/scan.h b/ydb/core/tx/columnshard/counters/scan.h index efaec82c563e..6d1202288514 100644 --- a/ydb/core/tx/columnshard/counters/scan.h +++ b/ydb/core/tx/columnshard/counters/scan.h @@ -1,6 +1,7 @@ #pragma once #include "common/owner.h" #include "common/histogram.h" +#include #include #include #include @@ -127,6 +128,7 @@ class TScanCounters: public TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr AckWaitingDuration; std::vector ScanDurationByStatus; + std::vector ScansFinishedByStatus; NMonitoring::TDynamicCounters::TCounterPtr NoScanRecords; NMonitoring::TDynamicCounters::TCounterPtr NoScanIntervals; @@ -212,9 +214,10 @@ class TScanCounters: public TCommonCountersOwner { LogScanIntervals->Add(1); } - void OnScanDuration(const EStatusFinish status, const TDuration d) const { + void OnScanFinished(const EStatusFinish status, const TDuration d) const { AFL_VERIFY((ui32)status < ScanDurationByStatus.size()); ScanDurationByStatus[(ui32)status]->Collect(d.MilliSeconds()); + ScansFinishedByStatus[(ui32)status]->Add(1); } void AckWaitingInfo(const TDuration d) const { @@ -257,6 +260,8 @@ class TScanCounters: public TCommonCountersOwner { } TScanAggregations BuildAggregations(); + + void FillStats(::NKikimrTableStats::TTableStats& output) const; }; class TCounterGuard: TNonCopyable { @@ -282,40 +287,9 @@ class TCounterGuard: TNonCopyable { }; -class TReaderResourcesGuard { -private: - std::shared_ptr Allocated; - std::shared_ptr Requested; - const std::shared_ptr SignalCounter; - const ui64 Volume; - -public: - TReaderResourcesGuard(const ui64 volume, const std::shared_ptr& requested, const std::shared_ptr& signalWatcher) - : Requested(requested) - , SignalCounter(signalWatcher) - , Volume(volume) - { - AFL_VERIFY(Requested); - Requested->Add(Volume); - SignalCounter->AddBytes(volume); - } - - void InitResources(const std::shared_ptr& g) { - AFL_VERIFY(!Allocated); - AFL_VERIFY(g->GetMemory() == Volume)("volume", Volume)("allocated", g->GetMemory()); - Allocated = g; - } - - ~TReaderResourcesGuard() { - SignalCounter->RemoveBytes(Volume); - AFL_VERIFY(Requested->Sub(Volume) >= 0); - } -}; - class TConcreteScanCounters: public TScanCounters { private: using TBase = TScanCounters; - std::shared_ptr RequestedResourcesBytes; std::shared_ptr MergeTasksCount; std::shared_ptr AssembleTasksCount; std::shared_ptr ReadTasksCount; @@ -323,14 +297,6 @@ class TConcreteScanCounters: public TScanCounters { public: TScanAggregations Aggregations; - ui64 GetRequestedMemoryBytes() const { - return RequestedResourcesBytes->Val(); - } - - std::shared_ptr BuildRequestedResourcesGuard(const ui64 volume) const { - return std::make_shared(volume, RequestedResourcesBytes, Aggregations.GetRequestedResourcesMemory()); - } - TCounterGuard GetMergeTasksGuard() const { return TCounterGuard(MergeTasksCount); } @@ -358,7 +324,6 @@ class TConcreteScanCounters: public TScanCounters { TConcreteScanCounters(const TScanCounters& counters) : TBase(counters) - , RequestedResourcesBytes(std::make_shared()) , MergeTasksCount(std::make_shared()) , AssembleTasksCount(std::make_shared()) , ReadTasksCount(std::make_shared()) diff --git a/ydb/core/tx/columnshard/counters/tablet_counters.h b/ydb/core/tx/columnshard/counters/tablet_counters.h new file mode 100644 index 000000000000..99292c4f73b8 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/tablet_counters.h @@ -0,0 +1,132 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TTabletCountersHandle { +private: + TTabletCountersBase& TabletCounters; + +public: + TTabletCountersHandle(TTabletCountersBase& stats) + : TabletCounters(stats) { + } + + void SetCounter(NColumnShard::ESimpleCounters counter, ui64 num) const { + TabletCounters.Simple()[counter].Set(num); + } + + void IncCounter(NColumnShard::ECumulativeCounters counter, ui64 num = 1) const { + TabletCounters.Cumulative()[counter].Increment(num); + } + + void IncCounter(NColumnShard::EPercentileCounters counter, const TDuration& latency) const { + TabletCounters.Percentile()[counter].IncrementFor(latency.MicroSeconds()); + } + + void IncCounter(NDataShard::ESimpleCounters counter, ui64 num = 1) const { + TabletCounters.Simple()[counter].Add(num); + } + + void IncCounter(NDataShard::ECumulativeCounters counter, ui64 num = 1) const { + TabletCounters.Cumulative()[counter].Increment(num); + } + + void IncCounter(NDataShard::EPercentileCounters counter, ui64 num) const { + TabletCounters.Percentile()[counter].IncrementFor(num); + } + + void IncCounter(NDataShard::EPercentileCounters counter, const TDuration& latency) const { + TabletCounters.Percentile()[counter].IncrementFor(latency.MilliSeconds()); + } + + ui64 GetValue(NColumnShard::ESimpleCounters counter) const { + return TabletCounters.Simple()[counter].Get(); + } + + ui64 GetValue(NColumnShard::ECumulativeCounters counter) const { + return TabletCounters.Cumulative()[counter].Get(); + } + + const TTabletPercentileCounter& GetValue(NColumnShard::EPercentileCounters counter) const { + return TabletCounters.Percentile()[counter]; + } + + ui64 GetValue(NDataShard::ESimpleCounters counter) const { + return TabletCounters.Simple()[counter].Get(); + } + + ui64 GetValue(NDataShard::ECumulativeCounters counter) const { + return TabletCounters.Cumulative()[counter].Get(); + } + + const TTabletPercentileCounter& GetCounter(NDataShard::EPercentileCounters counter) const { + return TabletCounters.Percentile()[counter]; + } + + void OnWriteSuccess(const ui64 blobsWritten, const ui64 bytesWritten) const { + IncCounter(NColumnShard::COUNTER_OPERATIONS_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_OPERATIONS_BYTES_WRITTEN, bytesWritten); + IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS); + } + + void OnWriteFailure() const { + IncCounter(NColumnShard::COUNTER_WRITE_FAIL); + } + + void OnScanStarted(const NOlap::TSelectInfo::TStats& countersDelta) const { + IncCounter(NColumnShard::COUNTER_READ_INDEX_PORTIONS, countersDelta.Portions); + IncCounter(NColumnShard::COUNTER_READ_INDEX_BLOBS, countersDelta.Blobs); + IncCounter(NColumnShard::COUNTER_READ_INDEX_ROWS, countersDelta.Rows); + IncCounter(NColumnShard::COUNTER_READ_INDEX_BYTES, countersDelta.Bytes); + } + + void OnWriteCommitted(const NOlap::TInsertionSummary::TCounters& countersDelta) const { + IncCounter(COUNTER_BLOBS_COMMITTED, countersDelta.Rows); + IncCounter(COUNTER_BYTES_COMMITTED, countersDelta.Bytes); + IncCounter(COUNTER_RAW_BYTES_COMMITTED, countersDelta.RawBytes); + } + + void OnCompactionWriteIndexCompleted(bool success, const ui64 blobsWritten, const ui64 bytesWritten) const { + IncCounter(success ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); + IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, bytesWritten); + } + + void OnInsertionWriteIndexCompleted(const ui64 blobsWritten, const ui64 bytesWritten, const TDuration duration) const { + IncCounter(NColumnShard::COUNTER_INDEXING_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_INDEXING_BYTES_WRITTEN, bytesWritten); + IncCounter(NColumnShard::COUNTER_INDEXING_TIME, duration.MilliSeconds()); + } + + void OnWritePutBlobsSuccess(const ui64 rowsWritten) const { + IncCounter(NColumnShard::COUNTER_OPERATIONS_ROWS_WRITTEN, rowsWritten); + } + + void OnDropPortionEvent(const ui64 rawBytes, const ui64 blobBytes, const ui64 rows) const { + IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, rawBytes); + IncCounter(NColumnShard::COUNTER_BYTES_ERASED, blobBytes); + IncCounter(NColumnShard::COUNTER_ROWS_ERASED, rows); + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetRowUpdates(GetValue(COUNTER_OPERATIONS_ROWS_WRITTEN)); + output.SetRowDeletes(GetValue(COUNTER_ROWS_ERASED)); + output.SetRowReads(0); // all reads are range reads + output.SetRangeReadRows(GetValue(COUNTER_READ_INDEX_ROWS)); + + output.SetImmediateTxCompleted(GetValue(COUNTER_IMMEDIATE_TX_COMPLETED)); + output.SetTxRejectedByOverload(GetValue(COUNTER_WRITE_OVERLOAD)); + output.SetTxRejectedBySpace(GetValue(COUNTER_OUT_OF_SPACE)); + output.SetPlannedTxCompleted(GetValue(COUNTER_PLANNED_TX_COMPLETED)); + output.SetTxCompleteLagMsec(GetValue(COUNTER_TX_COMPLETE_LAG)); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/tx_progress.h b/ydb/core/tx/columnshard/counters/tx_progress.h new file mode 100644 index 000000000000..24319a3ab748 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/tx_progress.h @@ -0,0 +1,110 @@ +#pragma once +#include "common/owner.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +class TTxProgressCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + using TOpType = TString; + + class TProgressCounters: public TCommonCountersOwner { + private: + using TBase = TCommonCountersOwner; + + public: + NMonitoring::TDynamicCounters::TCounterPtr RegisterTx; + NMonitoring::TDynamicCounters::TCounterPtr RegisterTxWithDeadline; + NMonitoring::TDynamicCounters::TCounterPtr StartProposeOnExecute; + NMonitoring::TDynamicCounters::TCounterPtr StartProposeOnComplete; + NMonitoring::TDynamicCounters::TCounterPtr FinishProposeOnExecute; + NMonitoring::TDynamicCounters::TCounterPtr FinishProposeOnComplete; + NMonitoring::TDynamicCounters::TCounterPtr FinishPlannedTx; + NMonitoring::TDynamicCounters::TCounterPtr AbortTx; + NMonitoring::THistogramPtr HistogramTxExecuteDuration; + NMonitoring::THistogramPtr HistogramTxLiveDuration; + NMonitoring::THistogramPtr HistogramTxProgressLag; + + TProgressCounters(const TCommonCountersOwner& owner) + : TBase(owner) + , RegisterTx(TBase::GetDeriviative("RegisterTx")) + , RegisterTxWithDeadline(TBase::GetDeriviative("RegisterTxWithDeadline")) + , StartProposeOnExecute(TBase::GetDeriviative("StartProposeOnExecute")) + , StartProposeOnComplete(TBase::GetDeriviative("StartProposeOnComplete")) + , FinishProposeOnExecute(TBase::GetDeriviative("FinishProposeOnExecute")) + , FinishProposeOnComplete(TBase::GetDeriviative("FinishProposeOnComplete")) + , FinishPlannedTx(TBase::GetDeriviative("FinishPlannedTx")) + , AbortTx(TBase::GetDeriviative("AbortTx")) + , HistogramTxExecuteDuration(TBase::GetHistogram("TxProgress/Execution/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) + , HistogramTxLiveDuration(TBase::GetHistogram("TxProgress/Live/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) + , HistogramTxProgressLag(TBase::GetHistogram("TxProgress/LagOnComplete/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) { + } + }; + + THashMap CountersByOpType; + +public: + void OnTxExecuteDuration(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxExecuteDuration->Collect(d.MilliSeconds()); + } + + void OnTxLiveDuration(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxLiveDuration->Collect(d.MilliSeconds()); + } + + void OnTxProgressLag(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxProgressLag->Collect(d.MilliSeconds()); + } + + void OnRegisterTx(const TOpType& opType) { + GetSubGroup(opType).RegisterTx->Add(1); + } + + void OnRegisterTxWithDeadline(const TOpType& opType) { + GetSubGroup(opType).RegisterTxWithDeadline->Add(1); + } + + void OnStartProposeOnExecute(const TOpType& opType) { + GetSubGroup(opType).StartProposeOnExecute->Add(1); + } + + void OnStartProposeOnComplete(const TOpType& opType) { + GetSubGroup(opType).StartProposeOnComplete->Add(1); + } + + void OnFinishProposeOnExecute(const TOpType& opType) { + GetSubGroup(opType).FinishProposeOnExecute->Add(1); + } + + void OnFinishProposeOnComplete(const TOpType& opType) { + GetSubGroup(opType).FinishProposeOnComplete->Add(1); + } + + void OnFinishPlannedTx(const TOpType& opType) { + GetSubGroup(opType).FinishPlannedTx->Add(1); + } + + void OnAbortTx(const TOpType& opType) { + GetSubGroup(opType).AbortTx->Add(1); + } + + TTxProgressCounters(TCommonCountersOwner& owner) + : TBase(owner, "TxProgress") { + } + +private: + TProgressCounters& GetSubGroup(const TOpType& opType) { + auto findSubGroup = CountersByOpType.FindPtr(opType); + if (findSubGroup) { + return *findSubGroup; + } + + auto subGroup = TBase::CreateSubGroup("operation", opType); + return CountersByOpType.emplace(opType, subGroup).first->second; + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/writes_monitor.h b/ydb/core/tx/columnshard/counters/writes_monitor.h new file mode 100644 index 000000000000..ad8ad6e474cc --- /dev/null +++ b/ydb/core/tx/columnshard/counters/writes_monitor.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TWritesMonitor { +private: + TTabletCountersBase& Stats; + + YDB_READONLY(ui64, WritesInFlight, 0); + YDB_READONLY(ui64, WritesSizeInFlight, 0); + +public: + TWritesMonitor(TTabletCountersBase& stats) + : Stats(stats) { + } + + void OnStartWrite(const ui64 dataSize) { + ++WritesInFlight; + WritesSizeInFlight += dataSize; + UpdateTabletCounters(); + } + + void OnFinishWrite(const ui64 dataSize, const ui32 writesCount = 1) { + Y_ABORT_UNLESS(WritesInFlight > 0); + Y_ABORT_UNLESS(WritesSizeInFlight >= dataSize); + WritesInFlight -= writesCount; + WritesSizeInFlight -= dataSize; + UpdateTabletCounters(); + } + + TString DebugString() const { + return TStringBuilder() << "{object=write_monitor;count=" << WritesInFlight << ";size=" << WritesSizeInFlight + << "}"; + } + +private: + void UpdateTabletCounters() { + Stats.Simple()[COUNTER_WRITES_IN_FLY].Set(WritesInFlight); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/ya.make b/ydb/core/tx/columnshard/counters/ya.make index 65797cb34752..8707d6080e30 100644 --- a/ydb/core/tx/columnshard/counters/ya.make +++ b/ydb/core/tx/columnshard/counters/ya.make @@ -1,18 +1,23 @@ LIBRARY() SRCS( - indexation.cpp - scan.cpp - engine_logs.cpp + background_controller.cpp + counters_manager.cpp blobs_manager.cpp + column_tables.cpp columnshard.cpp - insert_table.cpp common_data.cpp + engine_logs.cpp + indexation.cpp + insert_table.cpp + req_tracer.cpp + scan.cpp splitter.cpp ) PEERDIR( library/cpp/monlib/dynamic_counters + ydb/core/tx/columnshard/counters/aggregation ydb/core/tx/columnshard/counters/common ydb/core/base ) diff --git a/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h index 055081e37173..ea9c99c7fd9d 100644 --- a/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h +++ b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h @@ -15,11 +15,11 @@ class TExtendedTransactionBase: public NTabletFlatExecutor::TTransactionBaseTabletID())("tx_no", TabletTxNo)("tx_info", TxInfo); + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("local_tx_no", TabletTxNo)("tx_info", TxInfo); return DoExecute(txc, ctx); } virtual void Complete(const NActors::TActorContext& ctx) override final { - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("tx_no", TabletTxNo)("tx_info", TxInfo); + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("local_tx_no", TabletTxNo)("tx_info", TxInfo); return DoComplete(ctx); } diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp index 7dfe36689d48..7d37981a9039 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp +++ b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp @@ -46,7 +46,7 @@ void TColumnEngineChanges::WriteIndexOnComplete(NColumnShard::TColumnShard* self DoWriteIndexOnComplete(self, context); if (self) { OnFinish(*self, context); - self->IncCounter(GetCounterIndex(context.FinishedSuccessfully)); + self->Counters.GetTabletCounters()->IncCounter(GetCounterIndex(context.FinishedSuccessfully)); } } diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp index 3aa29ed01a13..7917b77682b9 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp @@ -17,19 +17,19 @@ void TCleanupPortionsColumnEngineChanges::DoDebugString(TStringOutput& out) cons void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { THashSet pathIds; - if (self) { - THashMap> blobIdsByStorage; - for (auto&& p : PortionsToDrop) { - p.RemoveFromDatabase(context.DBWrapper); - - p.FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); - pathIds.emplace(p.GetPathId()); - } - for (auto&& i : blobIdsByStorage) { - auto action = BlobsAction.GetRemoving(i.first); - for (auto&& b : i.second) { - action->DeclareRemove((TTabletId)self->TabletID(), b); - } + if (!self) { + return; + } + THashMap> blobIdsByStorage; + for (auto&& p : PortionsToDrop) { + p.RemoveFromDatabase(context.DBWrapper); + p.FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); + pathIds.emplace(p.GetPathId()); + } + for (auto&& i : blobIdsByStorage) { + auto action = BlobsAction.GetRemoving(i.first); + for (auto&& b : i.second) { + action->DeclareRemove((TTabletId)self->TabletID(), b); } } } @@ -41,9 +41,9 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::T } } if (self) { - self->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); for (auto&& p : PortionsToDrop) { - self->IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p.GetTotalRawBytes()); + self->Counters.GetTabletCounters()->OnDropPortionEvent(p.GetTotalRawBytes(), p.GetTotalBlobBytes(), p.NumRows()); } } } diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp index 3bd8cb6bc333..34d7354b5124 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp @@ -17,13 +17,15 @@ void TCleanupTablesColumnEngineChanges::DoDebugString(TStringOutput& out) const void TCleanupTablesColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { if (self && context.DB) { for (auto&& t : TablesToDrop) { - self->TablesManager.TryFinalizeDropPathOnExecute(*context.DB, t); + AFL_VERIFY(!self->InsertTable->HasDataInPathId(t)); + AFL_VERIFY(self->TablesManager.TryFinalizeDropPathOnExecute(*context.DB, t)); } } } void TCleanupTablesColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& /*context*/) { for (auto&& t : TablesToDrop) { + self->InsertTable->ErasePath(t); self->TablesManager.TryFinalizeDropPathOnComplete(t); } self->Subscribers->OnEvent(std::make_shared(TablesToDrop)); diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.cpp b/ydb/core/tx/columnshard/engines/changes/compaction.cpp index a94d160158e6..2441ce4248b8 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction.cpp @@ -53,7 +53,7 @@ void TCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { void TCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { TBase::DoWriteIndexOnComplete(self, context); if (self) { - self->IncCounter(NColumnShard::COUNTER_COMPACTION_TIME, context.Duration.MilliSeconds()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_COMPACTION_TIME, context.Duration.MilliSeconds()); } } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp index 57a26422d8ca..9977ef51219c 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp @@ -2,14 +2,30 @@ namespace NKikimr::NOlap::NCompaction { -void IColumnMerger::Start(const std::vector>& input) { +void IColumnMerger::Start(const std::vector>& input, TMergingContext& mergeContext) { AFL_VERIFY(!Started); Started = true; - // for (auto&& i : input) { - // AFL_VERIFY(i->GetDataType()->id() == Context.GetResultField()->type()->id())("input", i->GetDataType()->ToString())( - // "result", Context.GetResultField()->ToString()); - // } - return DoStart(input); + for (auto&& i : input) { + if (!i) { + continue; + } + AFL_VERIFY(i->GetDataType()->Equals(*Context.GetResultField()->type()))("input", i->GetDataType()->ToString())( + "result", Context.GetResultField()->ToString()); + } + return DoStart(input, mergeContext); } + TMergingChunkContext::TMergingChunkContext(const std::shared_ptr& pkAndAddresses) { + auto columnPortionIdx = pkAndAddresses->GetColumnByName(IColumnMerger::PortionIdFieldName); + auto columnPortionRecordIdx = pkAndAddresses->GetColumnByName(IColumnMerger::PortionRecordIndexFieldName); + Y_ABORT_UNLESS(columnPortionIdx && columnPortionRecordIdx); + Y_ABORT_UNLESS(columnPortionIdx->type_id() == arrow::UInt16Type::type_id); + Y_ABORT_UNLESS(columnPortionRecordIdx->type_id() == arrow::UInt32Type::type_id); + IdxArray = static_pointer_cast(columnPortionIdx); + RecordIdxArray = static_pointer_cast(columnPortionRecordIdx); + + AFL_VERIFY(pkAndAddresses->num_rows() == IdxArray->length()); + AFL_VERIFY(pkAndAddresses->num_rows() == RecordIdxArray->length()); } + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h index dc547e69dcef..bf8da708f36b 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h @@ -1,15 +1,124 @@ #pragma once -#include #include +#include namespace NKikimr::NOlap::NCompaction { + +class TMergingChunkContext { +private: + std::shared_ptr IdxArray; + std::shared_ptr RecordIdxArray; + +public: + const arrow::UInt16Array& GetIdxArray() const { + return *IdxArray; + } + const arrow::UInt32Array& GetRecordIdxArray() const { + return *RecordIdxArray; + } + + TMergingChunkContext(const std::shared_ptr& pkAndAddresses); +}; + +class TMergingContext { +public: + class TAddress { + private: + YDB_ACCESSOR(i32, ChunkIdx, -1); + YDB_ACCESSOR(i32, GlobalPosition, -1); + + public: + TAddress() = default; + bool operator<(const TAddress& item) const { + if (ChunkIdx < item.ChunkIdx) { + return true; + } else if (item.ChunkIdx < ChunkIdx) { + return false; + } else { + return GlobalPosition < item.GlobalPosition; + } + } + + bool IsValid() const { + return ChunkIdx >= 0 && GlobalPosition >= 0; + } + }; + +private: + YDB_READONLY_DEF(std::vector, Chunks); + std::vector> InputContainers; + + std::optional>> RemapPortionIndexToResultIndex; + +public: + const TMergingChunkContext& GetChunk(const ui32 idx) const { + AFL_VERIFY(idx < Chunks.size()); + return Chunks[idx]; + } + + bool HasRemapInfo(const ui32 idx) { + return GetRemapPortionIndexToResultIndex(idx).size(); + } + + const std::vector>& GetRemapPortionIndexToResultIndex() { + if (!RemapPortionIndexToResultIndex) { + std::vector> result; + result.resize(InputContainers.size()); + { + ui32 idx = 0; + for (auto&& p : InputContainers) { + if (p) { + result[idx].resize(p->GetRecordsCount()); + } + ++idx; + } + } + ui32 chunkIdx = 0; + for (auto&& i : Chunks) { + auto& pIdxArray = i.GetIdxArray(); + auto& pRecordIdxArray = i.GetRecordIdxArray(); + for (ui32 recordIdx = 0; recordIdx < i.GetIdxArray().length(); ++recordIdx) { + auto& sourceRemap = result[pIdxArray.Value(recordIdx)]; + if (sourceRemap.size()) { + sourceRemap[pRecordIdxArray.Value(recordIdx)].SetChunkIdx(chunkIdx); + sourceRemap[pRecordIdxArray.Value(recordIdx)].SetGlobalPosition(recordIdx); + } + } + ++chunkIdx; + } + RemapPortionIndexToResultIndex = std::move(result); + } + return *RemapPortionIndexToResultIndex; + } + + const std::vector& GetRemapPortionIndexToResultIndex(const ui32 idx) { + auto& result = GetRemapPortionIndexToResultIndex(); + AFL_VERIFY(idx < result.size()); + return result[idx]; + } + + TMergingContext(const std::vector>& pkAndAddresses, + const std::vector>& inputContainers) + : InputContainers(inputContainers) + { + for (auto&& i : pkAndAddresses) { + Chunks.emplace_back(i); + } + } +}; + class IColumnMerger { +public: + using TFactory = NObjectFactory::TParametrizedObjectFactory; + private: bool Started = false; - virtual std::vector DoExecute( - const NCompaction::TColumnMergeContext& context, const arrow::UInt16Array& pIdxArray, const arrow::UInt32Array& pRecordIdxArray) = 0; - virtual void DoStart(const std::vector>& input) = 0; + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) = 0; + virtual void DoStart(const std::vector>& input, TMergingContext& mergeContext) = 0; + +protected: + const TColumnMergeContext& Context; public: static inline const TString PortionIdFieldName = "$$__portion_id"; @@ -19,25 +128,15 @@ class IColumnMerger { static inline const std::shared_ptr PortionRecordIndexField = std::make_shared(PortionRecordIndexFieldName, std::make_shared()); + IColumnMerger(const TColumnMergeContext& context) + : Context(context) { + } virtual ~IColumnMerger() = default; - void Start(const std::vector>& input); - - std::vector Execute( - const NCompaction::TColumnMergeContext& context, const std::shared_ptr& remap) { - - auto columnPortionIdx = remap->GetColumnByName(IColumnMerger::PortionIdFieldName); - auto columnPortionRecordIdx = remap->GetColumnByName(IColumnMerger::PortionRecordIndexFieldName); - Y_ABORT_UNLESS(columnPortionIdx && columnPortionRecordIdx); - Y_ABORT_UNLESS(columnPortionIdx->type_id() == arrow::UInt16Type::type_id); - Y_ABORT_UNLESS(columnPortionRecordIdx->type_id() == arrow::UInt32Type::type_id); - const arrow::UInt16Array& pIdxArray = static_cast(*columnPortionIdx); - const arrow::UInt32Array& pRecordIdxArray = static_cast(*columnPortionRecordIdx); - - AFL_VERIFY(remap->num_rows() == pIdxArray.length()); - AFL_VERIFY(remap->num_rows() == pRecordIdxArray.length()); + void Start(const std::vector>& input, TMergingContext& mergeContext); - return DoExecute(context, pIdxArray, pRecordIdxArray); + std::vector Execute(const TChunkMergeContext& context, TMergingContext& mergeContext) { + return DoExecute(context, mergeContext); } }; diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h index 80356224909f..ebe3394f299e 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h @@ -1,8 +1,8 @@ #pragma once +#include #include #include #include -#include #include @@ -12,25 +12,29 @@ class TColumnMergeContext { private: YDB_READONLY(ui32, ColumnId, 0); ISnapshotSchema::TPtr SchemaInfo; - YDB_READONLY_DEF(TColumnSaver, Saver); + YDB_ACCESSOR_DEF(TColumnSaver, Saver); YDB_READONLY_DEF(std::shared_ptr, Loader); YDB_READONLY_DEF(std::shared_ptr, ResultField); - YDB_READONLY(ui32, PortionRowsCountLimit, 10000); YDB_READONLY(ui64, ChunkPackedBytesLimit, 7 * 1024 * 1024); YDB_READONLY(ui64, ExpectedBlobPackedBytes, 4 * 1024 * 1024); YDB_READONLY(ui64, ChunkRawBytesLimit, 50 * 1024 * 1024); YDB_READONLY(ui64, StorePackedChunkSizeLimit, 512 * 1024); YDB_READONLY(bool, UseWholeChunksOptimization, true); - std::optional ColumnStat; + std::optional ColumnStat; const TIndexInfo& IndexInfo; + public: + std::shared_ptr GetDefaultValue() const { + return Loader->GetDefaultValue(); + } + ISnapshotSchema::TPtr GetSchemaInfo() const { return SchemaInfo; } - const std::optional& GetColumnStat() const { + const std::optional& GetColumnStat() const { return ColumnStat; } @@ -42,25 +46,35 @@ class TColumnMergeContext { return IndexInfo; } - TColumnMergeContext(const ui32 columnId, const ISnapshotSchema::TPtr& schema, const ui32 portionRowsCountLimit, - const ui32 chunkRawBytesLimit, const std::optional& columnStat, - const NArrow::NSerialization::TSerializerContainer& overrideSerializer) + TColumnMergeContext(const ui32 columnId, const ISnapshotSchema::TPtr& schema, const ui32 chunkRawBytesLimit, + const std::optional& columnStat) : ColumnId(columnId) , SchemaInfo(schema) , Saver(schema->GetColumnSaver(columnId)) , Loader(schema->GetColumnLoaderOptional(columnId)) , ResultField(schema->GetIndexInfo().GetColumnFieldVerified(columnId)) - , PortionRowsCountLimit(portionRowsCountLimit) , ChunkRawBytesLimit(chunkRawBytesLimit) , UseWholeChunksOptimization(!schema->GetIndexInfo().GetReplaceKey()->GetFieldByName(ResultField->name())) , ColumnStat(columnStat) , IndexInfo(schema->GetIndexInfo()) { - Y_ABORT_UNLESS(PortionRowsCountLimit); Y_ABORT_UNLESS(ChunkRawBytesLimit); - if (!!overrideSerializer) { - Saver.ResetSerializer(overrideSerializer); - } } }; -} +class TChunkMergeContext { +private: + YDB_READONLY(ui32, PortionRowsCountLimit, 10000); + YDB_READONLY(ui32, BatchIdx, 0); + YDB_READONLY(ui32, RecordsCount, 0); + +public: + TChunkMergeContext(const ui32 portionRowsCountLimit, const ui32 batchIdx, const ui32 recordsCount) + : PortionRowsCountLimit(portionRowsCountLimit) + , BatchIdx(batchIdx) + , RecordsCount(recordsCount) + { + AFL_VERIFY(RecordsCount); + AFL_VERIFY(PortionRowsCountLimit); + } +}; +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp index 599b1b2d3159..72ca7d2019ed 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp @@ -2,6 +2,7 @@ #include "abstract/merger.h" #include "plain/logic.h" +#include "sparsed/logic.h" #include #include @@ -11,7 +12,7 @@ namespace NKikimr::NOlap::NCompaction { -std::vector TMerger::Execute(const std::shared_ptr& stats, +std::vector TMerger::Execute(const std::shared_ptr& stats, const NArrow::NMerger::TIntervalPositions& checkPoints, const std::shared_ptr& resultFiltered, const ui64 pathId, const std::optional shardingActualVersion) { AFL_VERIFY(Batches.size() == Filters.size()); @@ -30,8 +31,6 @@ std::vector TMerger::Execute(c ui32 idx = 0; for (auto&& batch : Batches) { - AFL_VERIFY(batch->GetColumnsCount() == resultFiltered->GetColumnsCount())("data", batch->GetColumnsCount())( - "schema", resultFiltered->GetColumnsCount()); { NArrow::NConstruction::IArrayBuilder::TPtr column = std::make_shared>>( @@ -52,43 +51,57 @@ std::vector TMerger::Execute(c std::vector>> chunkGroups; chunkGroups.resize(batchResults.size()); - for (auto&& columnId : resultFiltered->GetColumnIds()) { - NActors::TLogContextGuard logGuard( - NActors::TLogContextBuilder::Build()("field_name", resultFiltered->GetIndexInfo().GetColumnName(columnId))); - auto columnInfo = stats->GetColumnInfo(columnId); - std::shared_ptr merger = std::make_shared(); - // resultFiltered->BuildColumnMergerVerified(columnId); - { - std::vector> parts; - for (auto&& p : Batches) { - parts.emplace_back(p->GetColumnVerified(resultFiltered->GetFieldIndex(columnId))); + using TColumnData = std::vector>; + THashMap columnsData; + { + ui32 batchIdx = 0; + for (auto&& p : Batches) { + ui32 columnIdx = 0; + for (auto&& i : p->GetSchema()->GetFields()) { + const std::optional columnId = resultFiltered->GetIndexInfo().GetColumnIdOptional(i->name()); + if (columnId) { + auto it = columnsData.find(*columnId); + if (it == columnsData.end()) { + it = columnsData.emplace(*columnId, TColumnData(Batches.size())).first; + } + it->second[batchIdx] = p->GetColumnVerified(columnIdx); + } + ++columnIdx; } + ++batchIdx; + } + } + + TMergingContext mergingContext(batchResults, Batches); - merger->Start(parts); + for (auto&& [columnId, columnData] : columnsData) { + const TString& columnName = resultFiltered->GetIndexInfo().GetColumnName(columnId); + NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("field_name", columnName)); + auto columnInfo = stats->GetColumnInfo(columnId); + + TColumnMergeContext commonContext( + columnId, resultFiltered, NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo); + if (OptimizationWritingPackMode) { + commonContext.MutableSaver().AddSerializerWithBorder( + 100, std::make_shared(arrow::Compression::type::UNCOMPRESSED)); + commonContext.MutableSaver().AddSerializerWithBorder( + Max(), std::make_shared(arrow::Compression::type::LZ4_FRAME)); } - std::map> columnChunks; + THolder merger = + IColumnMerger::TFactory::MakeHolder(commonContext.GetLoader()->GetAccessorConstructor().GetClassName(), commonContext); + AFL_VERIFY(!!merger)("problem", "cannot create merger")( + "class_name", commonContext.GetLoader()->GetAccessorConstructor().GetClassName()); + merger->Start(columnData, mergingContext); + ui32 batchIdx = 0; for (auto&& batchResult : batchResults) { const ui32 portionRecordsCountLimit = batchResult->num_rows() / (batchResult->num_rows() / NSplitter::TSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; - NArrow::NSerialization::TSerializerContainer externalSaver; - if (OptimizationWritingPackMode) { - if (batchResult->num_rows() < 100) { - externalSaver = NArrow::NSerialization::TSerializerContainer( - std::make_shared(arrow::Compression::type::UNCOMPRESSED)); - } else { - externalSaver = NArrow::NSerialization::TSerializerContainer( - std::make_shared(arrow::Compression::type::LZ4_FRAME)); - } - } - - NCompaction::TColumnMergeContext context(columnId, resultFiltered, portionRecordsCountLimit, - NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo, externalSaver); - - chunkGroups[batchIdx][columnId] = merger->Execute(context, batchResult); + TChunkMergeContext context(portionRecordsCountLimit, batchIdx, batchResult->num_rows()); + chunkGroups[batchIdx][columnId] = merger->Execute(context, mergingContext); ++batchIdx; } } @@ -129,7 +142,7 @@ std::vector TMerger::Execute(c } batchSlices.emplace_back(portionColumns, schemaDetails, Context.Counters.SplitterCounters); } - TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); + NArrow::NSplitter::TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); auto packs = slicer.Split(batchSlices); ui32 recordIdx = 0; diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.h b/ydb/core/tx/columnshard/engines/changes/compaction/merger.h index be9beae47584..ed862f2e25dd 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merger.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.h @@ -1,8 +1,8 @@ #pragma once -#include #include +#include #include -#include +#include #include #include #include @@ -25,14 +25,11 @@ class TMerger { TMerger(const TConstructionContext& context, const TSaverContext& saverContext) : Context(context) - , SaverContext(saverContext) - { - + , SaverContext(saverContext) { } TMerger(const TConstructionContext& context, const TSaverContext& saverContext, - std::vector>&& batches, - std::vector>&& filters) + std::vector>&& batches, std::vector>&& filters) : Batches(std::move(batches)) , Filters(std::move(filters)) , Context(context) @@ -40,9 +37,8 @@ class TMerger { AFL_VERIFY(Batches.size() == Filters.size()); } - std::vector Execute( - const std::shared_ptr& stats, - const NArrow::NMerger::TIntervalPositions& checkPoints, - const std::shared_ptr& resultFiltered, const ui64 pathId, const std::optional shardingActualVersion); + std::vector Execute(const std::shared_ptr& stats, + const NArrow::NMerger::TIntervalPositions& checkPoints, const std::shared_ptr& resultFiltered, + const ui64 pathId, const std::optional shardingActualVersion); }; -} +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp index 65412522c879..95b7acab74dc 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp @@ -5,33 +5,40 @@ namespace NKikimr::NOlap::NCompaction { bool TPortionColumnCursor::Fetch(TMergedColumn& column) { Y_ABORT_UNLESS(RecordIndexStart); - if (CurrentChunk && CurrentChunk->GetStartPosition() <= *RecordIndexStart && *RecordIndexStart < CurrentChunk->GetFinishPosition()) { - + if (!BlobChunks) { + if (!DefaultArray || DefaultArray->length() < RecordIndexFinish - *RecordIndexStart) { + DefaultArray = NArrow::TThreadSimpleArraysCache::Get(DataType, DefaultValue, RecordIndexFinish - *RecordIndexStart); + } + column.AppendSlice(DefaultArray, 0, RecordIndexFinish - *RecordIndexStart); } else { - CurrentChunk = BlobChunks->GetChunk(CurrentChunk, *RecordIndexStart); - } - - ui32 currentStart = *RecordIndexStart; - while (RecordIndexFinish >= CurrentChunk->GetFinishPosition()) { - column.AppendSlice( - CurrentChunk->GetArray(), currentStart - CurrentChunk->GetStartPosition(), CurrentChunk->GetFinishPosition() - currentStart); - currentStart = CurrentChunk->GetFinishPosition(); - if (currentStart < BlobChunks->GetRecordsCount()) { - CurrentChunk = BlobChunks->GetChunk(CurrentChunk, currentStart); + if (CurrentChunk && CurrentChunk->GetAddress().Contains(*RecordIndexStart)) { } else { - CurrentChunk.reset(); - break; + CurrentChunk = BlobChunks->GetChunk(CurrentChunk, *RecordIndexStart); } - } - if (currentStart < RecordIndexFinish) { - AFL_VERIFY(CurrentChunk); - Y_ABORT_UNLESS(RecordIndexFinish < CurrentChunk->GetFinishPosition()); - column.AppendSlice(CurrentChunk->GetArray(), currentStart - CurrentChunk->GetStartPosition(), RecordIndexFinish - currentStart); - } + ui32 currentStart = *RecordIndexStart; + while (CurrentChunk->GetAddress().GetGlobalFinishPosition() <= RecordIndexFinish) { + column.AppendSlice(CurrentChunk->GetArray(), CurrentChunk->GetAddress().GetLocalIndex(currentStart), + CurrentChunk->GetAddress().GetGlobalFinishPosition() - currentStart); + currentStart = CurrentChunk->GetAddress().GetGlobalFinishPosition(); + if (currentStart < BlobChunks->GetRecordsCount()) { + CurrentChunk = BlobChunks->GetChunk(CurrentChunk, currentStart); + } else { + CurrentChunk.reset(); + break; + } + } + if (currentStart < RecordIndexFinish) { + AFL_VERIFY(CurrentChunk); + Y_ABORT_UNLESS(RecordIndexFinish < CurrentChunk->GetAddress().GetGlobalFinishPosition()); + column.AppendSlice( + CurrentChunk->GetArray(), CurrentChunk->GetAddress().GetLocalIndex(currentStart), RecordIndexFinish - currentStart); + } + } RecordIndexStart.reset(); RecordIndexFinish = 0; + return true; } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h index 0e54ade2b372..4180444c9487 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h @@ -1,18 +1,24 @@ #pragma once #include "merged_column.h" -#include + #include #include +#include + #include namespace NKikimr::NOlap::NCompaction { class TPortionColumnCursor { private: - std::optional CurrentChunk; + std::optional CurrentChunk; std::shared_ptr BlobChunks; + std::shared_ptr DefaultArray; std::optional RecordIndexStart; + std::shared_ptr DataType; + std::shared_ptr DefaultValue; YDB_READONLY(ui32, RecordIndexFinish, 0); + public: ~TPortionColumnCursor() { AFL_VERIFY(!RecordIndexStart)("start", RecordIndexStart)("finish", RecordIndexFinish); @@ -24,7 +30,14 @@ class TPortionColumnCursor { TPortionColumnCursor(const std::shared_ptr& columnChunks) : BlobChunks(columnChunks) { + AFL_VERIFY(BlobChunks); + } + + TPortionColumnCursor(const std::shared_ptr& dataType, const std::shared_ptr& defaultValue) + : DataType(dataType) + , DefaultValue(defaultValue) { + AFL_VERIFY(DataType); } }; -} +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp index 1cd921676f01..dde08cabb4fc 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp @@ -1,52 +1,27 @@ #include "column_portion_chunk.h" + +#include #include -#include #include #include namespace NKikimr::NOlap::NCompaction { -std::shared_ptr TColumnPortion::AppendBlob(const TString& data, const TColumnRecord& columnChunk, ui32& remained) { -// if (CurrentPortionRecords + columnChunk.GetMeta().GetNumRows() <= Context.GetPortionRowsCountLimit() && -// columnChunk.GetMeta().GetRawBytes() < Context.GetChunkRawBytesLimit() && -// data.size() < Context.GetChunkPackedBytesLimit() && -// columnChunk.GetMeta().GetRawBytes() > Context.GetStorePackedChunkSizeLimit() && Context.GetSaver().IsHardPacker() && -// Context.GetUseWholeChunksOptimization()) -// { -// NChanges::TGeneralCompactionCounters::OnFullBlobAppend(columnChunk.BlobRange.GetBlobSize()); -// FlushBuffer(); -// Chunks.emplace_back(std::make_shared(data, columnChunk, Context.GetSchemaInfo())); -// PackedSize += Chunks.back()->GetPackedSize(); -// CurrentPortionRecords += columnChunk.GetMeta().GetNumRows(); -// return nullptr; -// } else { - NChanges::TGeneralCompactionCounters::OnSplittedBlobAppend(columnChunk.BlobRange.GetSize()); - auto batch = NArrow::TStatusValidator::GetValid(Context.GetLoader()->Apply(data)); - AFL_VERIFY(batch->num_columns() == 1); - auto batchArray = batch->column(0); - remained = AppendSlice(batchArray, 0, batch->num_rows()); - if (remained) { - return batchArray; - } else { - return nullptr; - } -// } -} - ui32 TColumnPortion::AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length) { Y_ABORT_UNLESS(a); Y_ABORT_UNLESS(length); - Y_ABORT_UNLESS(CurrentPortionRecords < Context.GetPortionRowsCountLimit()); + Y_ABORT_UNLESS(CurrentPortionRecords < ChunkContext.GetPortionRowsCountLimit()); Y_ABORT_UNLESS(startIndex + length <= a->length()); AFL_VERIFY(Type->id() == a->type_id())("own", Type->ToString())("a", a->type()->ToString()); ui32 i = startIndex; const ui32 packedRecordSize = Context.GetColumnStat() ? Context.GetColumnStat()->GetPackedRecordSize() : 0; for (; i < startIndex + length; ++i) { ui64 recordSize = 0; - AFL_VERIFY(NArrow::Append(*Builder, *a, i, &recordSize))("a", a->ToString())("a_type", a->type()->ToString())("builder_type", Builder->type()->ToString()); + AFL_VERIFY(NArrow::Append(*Builder, *a, i, &recordSize))("a", a->ToString())("a_type", a->type()->ToString())( + "builder_type", Builder->type()->ToString()); CurrentChunkRawSize += recordSize; PredictedPackedBytes += packedRecordSize ? packedRecordSize : (recordSize / 2); - if (++CurrentPortionRecords == Context.GetPortionRowsCountLimit()) { + if (++CurrentPortionRecords == ChunkContext.GetPortionRowsCountLimit()) { FlushBuffer(); ++i; break; @@ -59,17 +34,17 @@ ui32 TColumnPortion::AppendSlice(const std::shared_ptr& a, const u } bool TColumnPortion::FlushBuffer() { - if (Builder->length()) { - auto newArrayChunk = NArrow::TStatusValidator::GetValid(Builder->Finish()); - Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(newArrayChunk, Context.GetResultField()), newArrayChunk, TChunkAddress(Context.GetColumnId(), 0), ColumnInfo)); - Builder = Context.MakeBuilder(); - CurrentChunkRawSize = 0; - PredictedPackedBytes = 0; - PackedSize += Chunks.back()->GetPackedSize(); - return true; - } else { + if (!Builder->length()) { return false; } + auto newArrayChunk = NArrow::TStatusValidator::GetValid(Builder->Finish()); + Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(newArrayChunk, Context.GetResultField()), + std::make_shared(newArrayChunk), TChunkAddress(Context.GetColumnId(), 0), ColumnInfo)); + Builder = Context.MakeBuilder(); + CurrentChunkRawSize = 0; + PredictedPackedBytes = 0; + PackedSize += Chunks.back()->GetPackedSize(); + return true; } -} +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h index 98fe703f7e1a..ce10642ae95d 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h @@ -17,6 +17,7 @@ class TColumnPortion: public TColumnPortionResult { std::unique_ptr Builder; std::shared_ptr Type; const TColumnMergeContext& Context; + const TChunkMergeContext& ChunkContext; YDB_READONLY(ui64, CurrentChunkRawSize, 0); double PredictedPackedBytes = 0; const TSimpleColumnInfo ColumnInfo; @@ -24,22 +25,22 @@ class TColumnPortion: public TColumnPortionResult { ui64 CurrentPortionRecords = 0; public: - TColumnPortion(const TColumnMergeContext& context) + TColumnPortion(const TColumnMergeContext& context, const TChunkMergeContext& chunkContext) : TBase(context.GetColumnId()) , Context(context) + , ChunkContext(chunkContext) , ColumnInfo(Context.GetIndexInfo().GetColumnFeaturesVerified(context.GetColumnId())) { Builder = Context.MakeBuilder(); Type = Builder->type(); } bool IsFullPortion() const { - Y_ABORT_UNLESS(CurrentPortionRecords <= Context.GetPortionRowsCountLimit()); - return CurrentPortionRecords == Context.GetPortionRowsCountLimit(); + Y_ABORT_UNLESS(CurrentPortionRecords <= ChunkContext.GetPortionRowsCountLimit()); + return CurrentPortionRecords == ChunkContext.GetPortionRowsCountLimit(); } bool FlushBuffer(); - std::shared_ptr AppendBlob(const TString& data, const TColumnRecord& columnChunk, ui32& remained); ui32 AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length); }; diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp index ac8cb351c572..0081d33d01b5 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp @@ -2,31 +2,37 @@ namespace NKikimr::NOlap::NCompaction { -void TPlainMerger::DoStart(const std::vector>& input) { +void TPlainMerger::DoStart(const std::vector>& input, TMergingContext& /*mContext*/) { for (auto&& p : input) { - Cursors.emplace_back(NCompaction::TPortionColumnCursor(p)); + if (p) { + Cursors.emplace_back(NCompaction::TPortionColumnCursor(p)); + } else { + Cursors.emplace_back( + NCompaction::TPortionColumnCursor(Context.GetLoader()->GetResultField()->type(), Context.GetLoader()->GetDefaultValue())); + } + } } std::vector TPlainMerger::DoExecute( - const NCompaction::TColumnMergeContext& context, const arrow::UInt16Array& pIdxArray, const arrow::UInt32Array& pRecordIdxArray) { - NCompaction::TMergedColumn mColumn(context); - + const TChunkMergeContext& chunkContext, TMergingContext& mContext) { + NCompaction::TMergedColumn mColumn(Context, chunkContext); + auto& chunkInfo = mContext.GetChunk(chunkContext.GetBatchIdx()); std::optional predPortionIdx; - for (ui32 idx = 0; idx < pIdxArray.length(); ++idx) { - const ui16 portionIdx = pIdxArray.Value(idx); - const ui32 portionRecordIdx = pRecordIdxArray.Value(idx); + for (ui32 idx = 0; idx < chunkInfo.GetIdxArray().length(); ++idx) { + const ui16 portionIdx = chunkInfo.GetIdxArray().Value(idx); + const ui32 portionRecordIdx = chunkInfo.GetRecordIdxArray().Value(idx); auto& cursor = Cursors[portionIdx]; cursor.Next(portionRecordIdx, mColumn); if (predPortionIdx && portionIdx != *predPortionIdx) { Cursors[*predPortionIdx].Fetch(mColumn); } - if (idx + 1 == pIdxArray.length()) { + if (idx + 1 == chunkInfo.GetIdxArray().length()) { cursor.Fetch(mColumn); } predPortionIdx = portionIdx; } - AFL_VERIFY(pIdxArray.length() == mColumn.GetRecordsCount()); + AFL_VERIFY(chunkInfo.GetIdxArray().length() == mColumn.GetRecordsCount()); return mColumn.BuildResult(); } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h index 995cd1c33a72..9e3ec9a7c184 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h @@ -1,19 +1,22 @@ #pragma once #include "column_cursor.h" -#include +#include +#include #include namespace NKikimr::NOlap::NCompaction { class TPlainMerger: public IColumnMerger { private: + static inline auto Registrator = TFactory::TRegistrator(NArrow::NAccessor::TGlobalConst::PlainDataAccessorName); + using TBase = IColumnMerger; std::vector Cursors; - virtual void DoStart(const std::vector>& input) override; + virtual void DoStart(const std::vector>& input, TMergingContext& mergeContext) override; - virtual std::vector DoExecute(const NCompaction::TColumnMergeContext& context, const arrow::UInt16Array& pIdxArray, - const arrow::UInt32Array& pRecordIdxArray) override; + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) override; public: + using TBase::TBase; }; } // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp index 5f638a30f155..84dd8608ffc4 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp @@ -2,20 +2,6 @@ namespace NKikimr::NOlap::NCompaction { -void TMergedColumn::AppendBlob(const TString& data, const TColumnRecord& columnChunk) { - RecordsCount += columnChunk.GetMeta().GetNumRows(); - ui32 remained; - std::shared_ptr dataArray = Portions.back().AppendBlob(data, columnChunk, remained); - while (remained) { - Y_ABORT_UNLESS(Portions.back().IsFullPortion()); - NewPortion(); - remained = Portions.back().AppendSlice(dataArray, dataArray->length() - remained, remained); - } - if (Portions.back().IsFullPortion()) { - NewPortion(); - } -} - void TMergedColumn::AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length) { RecordsCount += length; Y_ABORT_UNLESS(data); @@ -44,7 +30,7 @@ void TMergedColumn::NewPortion() { if (Portions.size()) { Portions.back().FlushBuffer(); } - Portions.emplace_back(TColumnPortion(Context)); + Portions.emplace_back(TColumnPortion(Context, ChunkContext)); } } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h index 9dee31b84215..2433bb8f4862 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h @@ -9,18 +9,20 @@ namespace NKikimr::NOlap::NCompaction { class TMergedColumn { private: TColumnMergeContext Context; + TChunkMergeContext ChunkContext; YDB_READONLY_DEF(std::vector, Portions); YDB_READONLY(ui32, RecordsCount, 0); void NewPortion(); public: - TMergedColumn(const TColumnMergeContext& context) - : Context(context) { + TMergedColumn(const TColumnMergeContext& context, const TChunkMergeContext& chunkContext) + : Context(context) + , ChunkContext(chunkContext) + { NewPortion(); } - void AppendBlob(const TString& data, const TColumnRecord& columnChunk); void AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length); std::vector BuildResult(); diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make index 64de6caea075..91991ea51097 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make @@ -4,7 +4,7 @@ SRCS( column_cursor.cpp column_portion_chunk.cpp merged_column.cpp - logic.cpp + GLOBAL logic.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp new file mode 100644 index 000000000000..fb682bf79e5b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp @@ -0,0 +1,169 @@ +#include "logic.h" + +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +void TSparsedMerger::DoStart(const std::vector>& input, TMergingContext& mergingContext) { + ui32 idx = 0; + for (auto&& p : input) { + if (p) { + Cursors.emplace_back(p, Context); + if (mergingContext.HasRemapInfo(idx)) { + CursorPositions.emplace_back(TCursorPosition(&Cursors.back(), mergingContext.GetRemapPortionIndexToResultIndex(idx))); + if (CursorPositions.back().IsFinished()) { + CursorPositions.pop_back(); + } + } + } + ++idx; + } +} + +std::vector TSparsedMerger::DoExecute(const TChunkMergeContext& chunkContext, TMergingContext& /*mergeContext*/) { + std::vector result; + std::shared_ptr writer = std::make_shared(Context); + const auto addSkipsToWriter = [&](i64 delta) { + if (!delta) { + return; + } + AFL_VERIFY(delta >= 0); + if (chunkContext.GetPortionRowsCountLimit() <= writer->GetCurrentSize() + delta) { + const i64 diff = chunkContext.GetPortionRowsCountLimit() - writer->GetCurrentSize(); + writer->AddPositions(diff); + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + delta -= diff; + } + while (chunkContext.GetPortionRowsCountLimit() <= delta) { + writer->AddPositions(chunkContext.GetPortionRowsCountLimit()); + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + delta -= chunkContext.GetPortionRowsCountLimit(); + } + if (delta) { + writer->AddPositions(delta); + } + }; + + std::vector heap; + for (auto it = CursorPositions.begin(); it != CursorPositions.end();) { + AFL_VERIFY(chunkContext.GetBatchIdx() <= it->GetCurrentGlobalChunkIdx()); + if (it->GetCurrentGlobalChunkIdx() == chunkContext.GetBatchIdx()) { + heap.emplace_back(std::move(*it)); + it = CursorPositions.erase(it); + } else { + ++it; + } + } + std::make_heap(heap.begin(), heap.end()); + ui32 nextGlobalPosition = 0; + while (heap.size()) { + std::pop_heap(heap.begin(), heap.end()); + while (heap.size() == 1 || (heap.size() > 1 && heap.front() < heap.back())) { + { + auto& address = heap.back().GetCurrentAddress(); + AFL_VERIFY(nextGlobalPosition <= (ui32)address.GetGlobalPosition()); + addSkipsToWriter(address.GetGlobalPosition() - nextGlobalPosition); + + heap.back().AddIndexTo(*writer); + if (chunkContext.GetPortionRowsCountLimit() == writer->GetCurrentSize()) { + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + } + nextGlobalPosition = address.GetGlobalPosition() + 1; + } + if (!heap.back().Next()) { + heap.pop_back(); + break; + } else if (heap.back().GetCurrentGlobalChunkIdx() != chunkContext.GetBatchIdx()) { + CursorPositions.emplace_back(std::move(heap.back())); + heap.pop_back(); + break; + } + } + std::push_heap(heap.begin(), heap.end()); + } + AFL_VERIFY(nextGlobalPosition <= chunkContext.GetRecordsCount()); + addSkipsToWriter(chunkContext.GetRecordsCount() - nextGlobalPosition); + if (writer->HasData()) { + result.emplace_back(writer->Flush()); + } + return result; +} + +void TSparsedMerger::TWriter::AddRealData(const std::shared_ptr& arr, const ui32 index) { + AFL_VERIFY(arr); + AFL_VERIFY(NArrow::Append(*ValueBuilder, *arr, index)); + NArrow::TStatusValidator::Validate(IndexBuilderImpl->Append(CurrentRecordIdx)); + ++UsefulRecordsCount; + ++CurrentRecordIdx; +} + +TColumnPortionResult TSparsedMerger::TWriter::Flush() { + std::vector> fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", DataType) }; + auto schema = std::make_shared(fields); + std::vector> columns = { NArrow::TStatusValidator::GetValid(IndexBuilder->Finish()), + NArrow::TStatusValidator::GetValid(ValueBuilder->Finish()) }; + + auto recordBatch = arrow::RecordBatch::Make(schema, UsefulRecordsCount, columns); + NArrow::NAccessor::TSparsedArray::TBuilder builder( + Context.GetIndexInfo().GetColumnFeaturesVerified(Context.GetColumnId()).GetDefaultValue().GetValue(), Context.GetResultField()->type()); + builder.AddChunk(CurrentRecordIdx, recordBatch); + Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(recordBatch), builder.Finish(), + TChunkAddress(ColumnId, 0), Context.GetIndexInfo().GetColumnFeaturesVerified(ColumnId))); + return *this; +} + +TSparsedMerger::TWriter::TWriter(const TColumnMergeContext& context) + : TBase(context.GetColumnId()) + , DataType(context.GetResultField()->type()) + , Context(context) { + IndexBuilder = NArrow::MakeBuilder(arrow::uint32()); + ValueBuilder = NArrow::MakeBuilder(DataType); + IndexBuilderImpl = (arrow::UInt32Builder*)(IndexBuilder.get()); +} + +bool TSparsedMerger::TPlainChunkCursor::AddIndexTo(const ui32 index, TWriter& writer) { + AFL_VERIFY(ChunkStartPosition <= index); + writer.AddRealData(ChunkAddress->GetArray(), index - ChunkStartPosition); + return true; +} + +bool TSparsedMerger::TSparsedChunkCursor::AddIndexTo(const ui32 index, TWriter& writer) { + AFL_VERIFY(ChunkStartGlobalPosition <= index); + AFL_VERIFY(index == NextGlobalPosition); + writer.AddRealData(Chunk->GetColValue(), NextLocalPosition); + return true; +} + +bool TSparsedMerger::TCursor::AddIndexTo(const ui32 index, TWriter& writer) { + if (FinishGlobalPosition <= index) { + InitArrays(index); + } + if (SparsedCursor) { + return SparsedCursor->AddIndexTo(index, writer); + } else if (PlainCursor) { + return PlainCursor->AddIndexTo(index, writer); + } else { + return false; + } +} + +void TSparsedMerger::TCursor::InitArrays(const ui32 position) { + AFL_VERIFY(!CurrentOwnedArray || !CurrentOwnedArray->GetAddress().Contains(position)); + CurrentOwnedArray = Array->GetArray(CurrentOwnedArray, position, Array); + if (CurrentOwnedArray->GetArray()->GetType() == NArrow::NAccessor::IChunkedArray::EType::SparsedArray) { + auto sparsedArray = static_pointer_cast(CurrentOwnedArray->GetArray()); + SparsedCursor = std::make_shared(sparsedArray, &*CurrentOwnedArray); + PlainCursor = nullptr; + } else { + PlainCursor = make_shared(CurrentOwnedArray->GetArray(), &*CurrentOwnedArray); + SparsedCursor = nullptr; + } + FinishGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + CurrentOwnedArray->GetArray()->GetRecordsCount(); +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h new file mode 100644 index 000000000000..bf7be222ff48 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h @@ -0,0 +1,280 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TSparsedMerger: public IColumnMerger { +private: + static inline auto Registrator = TFactory::TRegistrator(NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName); + + using TBase = IColumnMerger; + class TWriter: public TColumnPortionResult { + private: + using TBase = TColumnPortionResult; + const std::shared_ptr DataType; + const TColumnMergeContext& Context; + std::unique_ptr IndexBuilder; + std::unique_ptr ValueBuilder; + arrow::UInt32Builder* IndexBuilderImpl = nullptr; + ui32 CurrentRecordIdx = 0; + ui32 UsefulRecordsCount = 0; + + public: + TWriter(const TColumnMergeContext& context); + + bool HasData() const { + return CurrentRecordIdx; + } + + ui32 GetCurrentSize() const { + return CurrentRecordIdx; + } + + bool HasUsefulData() const { + return UsefulRecordsCount; + } + + ui32 AddPositions(const i32 delta) { + AFL_VERIFY(delta > 0); + CurrentRecordIdx += delta; + return CurrentRecordIdx; + } + + void AddRealData(const std::shared_ptr& arr, const ui32 index); + + TColumnPortionResult Flush(); + }; + + class TPlainChunkCursor { + private: + std::shared_ptr CurrentChunkedArray; + std::optional ChunkAddress; + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* CurrentOwnedArray; + ui32 ChunkStartPosition = 0; + ui32 ChunkFinishPosition = 0; + + void InitArrays(const ui32 position) { + AFL_VERIFY(!ChunkAddress || ChunkFinishPosition <= position); + ChunkAddress = CurrentChunkedArray->GetChunk(ChunkAddress, position); + AFL_VERIFY(ChunkAddress); + ChunkStartPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + ChunkAddress->GetAddress().GetGlobalStartPosition(); + ChunkFinishPosition = + CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + ChunkAddress->GetAddress().GetGlobalFinishPosition(); + } + + public: + TPlainChunkCursor(const std::shared_ptr& chunked, + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* currentOwnedArray) + : CurrentChunkedArray(chunked) + , CurrentOwnedArray(currentOwnedArray) { + AFL_VERIFY(CurrentChunkedArray); + AFL_VERIFY(CurrentOwnedArray); + InitArrays(CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + } + bool AddIndexTo(const ui32 index, TWriter& writer); + std::optional MoveToSignificant(const ui32 currentGlobalPosition, const TColumnMergeContext& context) { + AFL_VERIFY(ChunkStartPosition <= currentGlobalPosition); + ui32 currentIndex = currentGlobalPosition; + while (true) { + if (CurrentOwnedArray->GetAddress().GetGlobalFinishPosition() <= currentIndex) { + return {}; + } + if (ChunkFinishPosition <= currentIndex) { + InitArrays(currentGlobalPosition); + continue; + } + for (; currentIndex < ChunkFinishPosition; ++currentIndex) { + if (!NArrow::ColumnEqualsScalar( + ChunkAddress->GetArray(), currentIndex - ChunkStartPosition, context.GetLoader()->GetDefaultValue())) { + return currentIndex; + } + } + } + } + }; + + class TSparsedChunkCursor { + private: + std::shared_ptr CurrentSparsedArray; + const NArrow::NAccessor::TSparsedArrayChunk* Chunk = nullptr; + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* CurrentOwnedArray; + ui32 ChunkStartGlobalPosition = 0; + ui32 NextGlobalPosition = 0; + ui32 NextLocalPosition = 0; + ui32 FinishGlobalPosition = 0; + void InitArrays(const ui32 position) { + AFL_VERIFY(!Chunk || CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition() <= position); + Chunk = &CurrentSparsedArray->GetSparsedChunk(CurrentOwnedArray->GetAddress().GetLocalIndex(position)); + AFL_VERIFY(Chunk->GetRecordsCount()); + AFL_VERIFY(CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetStartPosition() <= position && + position < CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition()) + ("pos", position)("start", Chunk->GetStartPosition())("finish", Chunk->GetFinishPosition())( + "shift", CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + ChunkStartGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetStartPosition(); + NextGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFirstIndexNotDefault(); + NextLocalPosition = 0; + FinishGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition(); + } + + public: + std::optional MoveToSignificant(const ui32 currentGlobalPosition, const TColumnMergeContext& /*context*/) { + while (true) { + if (NextGlobalPosition == CurrentOwnedArray->GetAddress().GetGlobalFinishPosition()) { + return {}; + } + if (NextGlobalPosition == FinishGlobalPosition) { + InitArrays(NextGlobalPosition); + continue; + } + if (currentGlobalPosition == NextGlobalPosition) { + return NextGlobalPosition; + } + for (; NextLocalPosition < Chunk->GetNotDefaultRecordsCount(); ++NextLocalPosition) { + NextGlobalPosition = ChunkStartGlobalPosition + Chunk->GetIndexUnsafeFast(NextLocalPosition); + if (currentGlobalPosition <= NextGlobalPosition) { + return NextGlobalPosition; + } + } + NextGlobalPosition = FinishGlobalPosition; + } + } + bool AddIndexTo(const ui32 index, TWriter& writer); + TSparsedChunkCursor(const std::shared_ptr& sparsed, + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* currentOwnedArray) + : CurrentSparsedArray(sparsed) + , CurrentOwnedArray(currentOwnedArray) { + AFL_VERIFY(sparsed); + AFL_VERIFY(currentOwnedArray); + InitArrays(CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + } + }; + + class TCursor { + private: + std::shared_ptr Array; + std::optional CurrentOwnedArray; + std::shared_ptr SparsedCursor; + std::shared_ptr PlainCursor; + ui32 FinishGlobalPosition = 0; + const TColumnMergeContext& Context; + void InitArrays(const ui32 position); + + public: + TCursor(const std::shared_ptr& array, const TColumnMergeContext& context) + : Array(array) + , Context(context) { + AFL_VERIFY(Array); + AFL_VERIFY(Array->GetRecordsCount()); + InitArrays(0); + } + + ui32 GetRecordsCount() const { + return Array->GetRecordsCount(); + } + + ui32 MoveToSignificant(const ui32 start) { + ui32 currentPosition = start; + while (true) { + std::optional significantIndex; + if (SparsedCursor) { + significantIndex = SparsedCursor->MoveToSignificant(currentPosition, Context); + } else if (PlainCursor) { + significantIndex = PlainCursor->MoveToSignificant(currentPosition, Context); + } + if (significantIndex) { + return *significantIndex; + } + if (FinishGlobalPosition == Array->GetRecordsCount()) { + return FinishGlobalPosition; + } else { + InitArrays(FinishGlobalPosition); + } + } + } + + bool AddIndexTo(const ui32 index, TWriter& writer); + }; + + class TCursorPosition: TMoveOnly { + private: + TCursor* Cursor; + ui32 CurrentIndex = 0; + const std::vector* GlobalSequence = nullptr; + TMergingContext::TAddress CurrentAddress; + + bool InitPosition(const ui32 start) { + CurrentIndex = start; + while (true) { + CurrentIndex = Cursor->MoveToSignificant(CurrentIndex); + if (CurrentIndex == GlobalSequence->size()) { + return false; + } + auto& addr = (*GlobalSequence)[CurrentIndex]; + if (addr.GetGlobalPosition() != -1) { + CurrentAddress = addr; + return true; + } + if (++CurrentIndex == GlobalSequence->size()) { + return false; + } + } + } + + public: + TCursor* operator->() { + return Cursor; + } + + void AddIndexTo(TWriter& writer) const { + AFL_VERIFY(Cursor->AddIndexTo(CurrentIndex, writer)); + } + + TCursorPosition(TCursor* cursor, const std::vector& globalSequence) + : Cursor(cursor) + , GlobalSequence(&globalSequence) { + AFL_VERIFY(GlobalSequence->size() == cursor->GetRecordsCount()); + InitPosition(0); + } + + bool IsFinished() const { + AFL_VERIFY(CurrentIndex <= GlobalSequence->size()); + return CurrentIndex == GlobalSequence->size(); + } + + ui32 GetCurrentGlobalPosition() const { + return CurrentAddress.GetGlobalPosition(); + } + + ui32 GetCurrentGlobalChunkIdx() const { + return CurrentAddress.GetChunkIdx(); + } + + const TMergingContext::TAddress& GetCurrentAddress() const { + return CurrentAddress; + } + + bool operator<(const TCursorPosition& item) const { + return item.GetCurrentAddress() < GetCurrentAddress(); + } + + [[nodiscard]] bool Next() { + return InitPosition(++CurrentIndex); + } + }; + + std::deque Cursors; + std::list CursorPositions; + + virtual void DoStart( + const std::vector>& input, TMergingContext& mergeContext) override; + + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) override; + +public: + using TBase::TBase; +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make new file mode 100644 index 000000000000..e24e8341aa7d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + GLOBAL logic.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/compaction/common +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/ya.make index c6a7bc101f9a..5e76aa0d8971 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/ya.make +++ b/ydb/core/tx/columnshard/engines/changes/compaction/ya.make @@ -7,8 +7,9 @@ SRCS( PEERDIR( ydb/core/tx/tiering ydb/core/tx/columnshard/engines/changes/compaction/abstract - ydb/core/tx/columnshard/engines/changes/compaction/plain ydb/core/tx/columnshard/engines/changes/compaction/common + ydb/core/tx/columnshard/engines/changes/compaction/plain + ydb/core/tx/columnshard/engines/changes/compaction/sparsed ) END() diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index a3db1b8f3eb2..ea7b6ddc2eb4 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -81,7 +81,7 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( auto resultSchema = context.SchemaVersions.GetLastSchema(); auto shardingActual = context.SchemaVersions.GetShardingInfoActual(GranuleMeta->GetPathId()); - std::shared_ptr stats = std::make_shared(); + std::shared_ptr stats = std::make_shared(); std::shared_ptr resultFiltered; NCompaction::TMerger merger(context, SaverContext); { @@ -115,12 +115,11 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( pkColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); } } - resultFiltered = std::make_shared(resultSchema, dataColumnIds); { auto seqDataColumnIds = dataColumnIds; for (auto&& i : pkColumnIds) { - AFL_VERIFY(seqDataColumnIds.erase(i)); + AFL_VERIFY(seqDataColumnIds.erase(i))("id", i); } THashSet usedPortionIds; for (auto&& i : portions) { @@ -196,17 +195,15 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc void TGeneralCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { TBase::DoWriteIndexOnComplete(self, context); if (self) { - self->IncCounter( - context.FinishedSuccessfully ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, context.BlobsWritten); - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, context.BytesWritten); + self->Counters.GetTabletCounters()->OnCompactionWriteIndexCompleted( + context.FinishedSuccessfully, context.BlobsWritten, context.BytesWritten); } } void TGeneralCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { TBase::DoStart(self); auto& g = *GranuleMeta; - self.CSCounters.OnSplitCompactionInfo( + self.Counters.GetCSCounters().OnSplitCompactionInfo( g.GetAdditiveSummary().GetCompacted().GetTotalPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); } diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index bca7277947ca..22ca7fd2c738 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -1,6 +1,7 @@ #include "indexation.h" #include "compaction/merger.h" + #include namespace NKikimr::NOlap { @@ -35,9 +36,7 @@ void TInsertColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnSha if (!DataToIndex.empty()) { self->UpdateInsertTableCounters(); } - self->IncCounter(NColumnShard::COUNTER_INDEXING_BLOBS_WRITTEN, context.BlobsWritten); - self->IncCounter(NColumnShard::COUNTER_INDEXING_BYTES_WRITTEN, context.BytesWritten); - self->IncCounter(NColumnShard::COUNTER_INDEXING_TIME, context.Duration.MilliSeconds()); + self->Counters.GetTabletCounters()->OnInsertionWriteIndexCompleted(context.BlobsWritten, context.BytesWritten, context.Duration); } } @@ -50,17 +49,69 @@ namespace { class TBatchInfo { private: YDB_READONLY_DEF(std::shared_ptr, Batch); - const NEvWrite::EModificationType ModificationType; + +public: + TBatchInfo(const std::shared_ptr& batch, const NEvWrite::EModificationType /*modificationType*/) + : Batch(batch) { + } +}; + +class TPathFieldsInfo { +private: + std::set UsageColumnIds; + const ISnapshotSchema::TPtr ResultSchema; + THashMap Schemas; + bool Finished = false; + const ui32 FullColumnsCount; + public: - TBatchInfo(const std::shared_ptr& batch, const NEvWrite::EModificationType modificationType) - : Batch(batch) - , ModificationType(modificationType) + TPathFieldsInfo(const ISnapshotSchema::TPtr& resultSchema) + : UsageColumnIds(IIndexInfo::GetNecessarySystemColumnIdsSet()) + , ResultSchema(resultSchema) + , FullColumnsCount(ResultSchema->GetIndexInfo().GetColumnIds(true).size()) { + AFL_VERIFY(FullColumnsCount); + } + + bool IsFinished() const { + return Finished; + } + bool HasDeletion() const { + AFL_VERIFY(Finished); + return UsageColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); + } + + void Finish() { + AFL_VERIFY(UsageColumnIds.size()); + AFL_VERIFY(!Finished); + Finished = true; + if (UsageColumnIds.size() == FullColumnsCount) { + return; + } + auto defaultDiffs = ISnapshotSchema::GetColumnsWithDifferentDefaults(Schemas, ResultSchema); + UsageColumnIds.insert(defaultDiffs.begin(), defaultDiffs.end()); } - bool GetIsDeletion() const { - return ModificationType == NEvWrite::EModificationType::Delete; + const std::set& GetUsageColumnIds() const { + AFL_VERIFY(Finished); + return UsageColumnIds; + } + + void AddChunkInfo(const TCommittedData& data, const TConstructionContext& context) { + AFL_VERIFY(!Finished); + if (UsageColumnIds.size() == FullColumnsCount) { + return; + } + auto blobSchema = context.SchemaVersions.GetSchemaVerified(data.GetSchemaVersion()); + if (!Schemas.contains(data.GetSchemaVersion())) { + Schemas.emplace(data.GetSchemaVersion(), blobSchema); + } + std::vector filteredIds = data.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(false)); + if (data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { + filteredIds.emplace_back((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); + } + UsageColumnIds.insert(filteredIds.begin(), filteredIds.end()); } }; @@ -68,12 +119,20 @@ class TPathData { private: std::vector Batches; YDB_READONLY_DEF(std::optional, ShardingInfo); - bool HasDeletionFlag = false; + TPathFieldsInfo ColumnsInfo; + public: - TPathData(const std::optional& shardingInfo) + TPathData(const std::optional& shardingInfo, const ISnapshotSchema::TPtr& resultSchema) : ShardingInfo(shardingInfo) - { - + , ColumnsInfo(resultSchema) { + } + + const TPathFieldsInfo& GetColumnsInfo() const { + return ColumnsInfo; + } + + void FinishChunksInfo() { + ColumnsInfo.Finish(); } std::vector> GetGeneralContainers() const { @@ -84,14 +143,16 @@ class TPathData { return result; } + void AddChunkInfo(const NOlap::TCommittedData& data, const TConstructionContext& context) { + ColumnsInfo.AddChunkInfo(data, context); + } + bool HasDeletion() { - return HasDeletionFlag; + return ColumnsInfo.HasDeletion(); } - void AddBatch(const NOlap::TInsertedData& data, const std::shared_ptr& batch) { - if (data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { - HasDeletionFlag = true; - } + void AddBatch(const NOlap::TCommittedData& data, const std::shared_ptr& batch) { + AFL_VERIFY(ColumnsInfo.IsFinished()); AFL_VERIFY(batch); Batches.emplace_back(batch, data.GetMeta().GetModificationType()); } @@ -108,23 +169,47 @@ class TPathData { class TPathesData { private: THashMap Data; + const ISnapshotSchema::TPtr ResultSchema; public: + TPathesData(const ISnapshotSchema::TPtr& resultSchema) + : ResultSchema(resultSchema) { + } + + void FinishChunksInfo() { + for (auto&& i : Data) { + i.second.FinishChunksInfo(); + } + } + const THashMap& GetData() const { return Data; } - void Add(const NOlap::TInsertedData& inserted, const std::optional& info, - const std::shared_ptr& batch) { - auto it = Data.find(inserted.PathId); + void AddChunkInfo(const NOlap::TCommittedData& inserted, const TConstructionContext& context) { + auto shardingFilterCommit = context.SchemaVersions.GetShardingInfoOptional(inserted.GetPathId(), inserted.GetSnapshot()); + auto it = Data.find(inserted.GetPathId()); if (it == Data.end()) { - it = Data.emplace(inserted.PathId, info).first; + it = Data.emplace(inserted.GetPathId(), TPathData(shardingFilterCommit, ResultSchema)).first; } - it->second.AddShardingInfo(info); + it->second.AddChunkInfo(inserted, context); + it->second.AddShardingInfo(shardingFilterCommit); + } + + void AddBatch(const NOlap::TCommittedData& inserted, const std::shared_ptr& batch) { + auto it = Data.find(inserted.GetPathId()); + AFL_VERIFY(it != Data.end()); it->second.AddBatch(inserted, batch); } + + const TPathFieldsInfo& GetPathInfo(const ui64 pathId) const { + auto it = Data.find(pathId); + AFL_VERIFY(it != Data.end()); + return it->second.GetColumnsInfo(); + } }; -} + +} // namespace TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { Y_ABORT_UNLESS(!DataToIndex.empty()); @@ -133,34 +218,22 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont auto resultSchema = context.SchemaVersions.GetLastSchema(); Y_ABORT_UNLESS(resultSchema->GetIndexInfo().IsSorted()); - TPathesData pathBatches; - std::set usageColumnIds; - { - THashMap schemas; - for (auto& inserted : DataToIndex) { - if (schemas.contains(inserted.GetSchemaVersion())) { - continue; - } - schemas.emplace(inserted.GetSchemaVersion(), context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion())); - } - usageColumnIds = ISnapshotSchema::GetColumnsWithDifferentDefaults(schemas, resultSchema); - } - + TPathesData pathBatches(resultSchema); for (auto& inserted : DataToIndex) { - auto blobSchema = context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion()); - std::vector filteredIds = inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(false)); - usageColumnIds.insert(filteredIds.begin(), filteredIds.end()); - if (inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { - usageColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); - } - if (usageColumnIds.size() == resultSchema->GetIndexInfo().GetColumnIds(true).size()) { - break; + if (inserted.GetRemove()) { + continue; } + pathBatches.AddChunkInfo(inserted, context); } + pathBatches.FinishChunksInfo(); + for (auto& inserted : DataToIndex) { const TBlobRange& blobRange = inserted.GetBlobRange(); - auto shardingFilterCommit = context.SchemaVersions.GetShardingInfoOptional(inserted.PathId, inserted.GetSnapshot()); + if (inserted.GetRemove()) { + Blobs.Extract(IStoragesManager::DefaultStorageId, blobRange); + continue; + } auto blobSchema = context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion()); std::shared_ptr batch; @@ -169,23 +242,24 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont auto batchSchema = std::make_shared(inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().ArrowSchema()->fields())); batch = std::make_shared(NArrow::DeserializeBatch(blobData, batchSchema)); + blobSchema->AdaptBatchToSchema(*batch, resultSchema); } - IIndexInfo::AddSnapshotColumns(*batch, inserted.GetSnapshot()); - if (usageColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + + auto& pathInfo = pathBatches.GetPathInfo(inserted.GetPathId()); + + if (pathInfo.HasDeletion()) { IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); } - usageColumnIds.insert(IIndexInfo::GetSnapshotColumnIds().begin(), IIndexInfo::GetSnapshotColumnIds().end()); - batch = resultSchema->NormalizeBatch(*blobSchema, batch, usageColumnIds).DetachResult(); - pathBatches.Add(inserted, shardingFilterCommit, batch); + pathBatches.AddBatch(inserted, batch); } Y_ABORT_UNLESS(Blobs.IsEmpty()); - auto filteredSnapshot = std::make_shared(resultSchema, usageColumnIds); - auto stats = std::make_shared(); + auto stats = std::make_shared(); std::vector> filters; for (auto& [pathId, pathInfo] : pathBatches.GetData()) { + auto filteredSnapshot = std::make_shared(resultSchema, pathInfo.GetColumnsInfo().GetUsageColumnIds()); std::optional shardingVersion; if (pathInfo.GetShardingInfo()) { shardingVersion = pathInfo.GetShardingInfo()->GetSnapshotVersion(); @@ -194,7 +268,7 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont filters.resize(batches.size()); auto itGranule = PathToGranule.find(pathId); - AFL_VERIFY(itGranule != PathToGranule.end()); + AFL_VERIFY(itGranule != PathToGranule.end())("path_id", pathId); NCompaction::TMerger merger(context, SaverContext, std::move(batches), std::move(filters)); merger.SetOptimizationWritingPackMode(true); auto localAppended = merger.Execute(stats, itGranule->second, filteredSnapshot, pathId, shardingVersion); @@ -212,4 +286,4 @@ NColumnShard::ECumulativeCounters TInsertColumnEngineChanges::GetCounterIndex(co return isSuccess ? NColumnShard::COUNTER_INDEXING_SUCCESS : NColumnShard::COUNTER_INDEXING_FAIL; } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.h b/ydb/core/tx/columnshard/engines/changes/indexation.h index d130612b7451..4c7f8602a6f5 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.h +++ b/ydb/core/tx/columnshard/engines/changes/indexation.h @@ -1,16 +1,21 @@ #pragma once -#include "abstract/abstract.h" #include "with_appended.h" -#include -#include + +#include "abstract/abstract.h" + #include +#include +#include + +#include namespace NKikimr::NOlap { class TInsertColumnEngineChanges: public TChangesWithAppend { private: using TBase = TChangesWithAppend; - std::vector DataToIndex; + std::vector DataToIndex; + protected: virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; @@ -34,13 +39,12 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { public: THashMap PathToGranule; // pathId -> positions (sorted by pk) public: - TInsertColumnEngineChanges(std::vector&& dataToIndex, const TSaverContext& saverContext) + TInsertColumnEngineChanges(std::vector&& dataToIndex, const TSaverContext& saverContext) : TBase(saverContext, NBlobOperations::EConsumer::INDEXATION) - , DataToIndex(std::move(dataToIndex)) - { + , DataToIndex(std::move(dataToIndex)) { } - const std::vector& GetDataToIndex() const { + const std::vector& GetDataToIndex() const { return DataToIndex; } @@ -52,7 +56,6 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { return StaticTypeName(); } std::optional AddPathIfNotExists(ui64 pathId); - }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index 5b3d988abfba..24d44eb34587 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -19,7 +19,7 @@ void TChangesWithAppend::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, } const auto predRemoveDroppedTable = [self](const TWritePortionInfoWithBlobsResult& item) { auto& portionInfo = item.GetPortionResult(); - if (!!self && (!self->TablesManager.HasTable(portionInfo.GetPathId()) || self->TablesManager.GetTable(portionInfo.GetPathId()).IsDropped())) { + if (!!self && !self->TablesManager.HasTable(portionInfo.GetPathId(), false)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_inserted_data")("reason", "table_removed")("path_id", portionInfo.GetPathId()); return true; } else { @@ -42,13 +42,13 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self case NOlap::TPortionMeta::EProduced::UNSPECIFIED: Y_ABORT_UNLESS(false); // unexpected case NOlap::TPortionMeta::EProduced::INSERTED: - self->IncCounter(NColumnShard::COUNTER_INDEXING_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_INDEXING_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::COMPACTED: - self->IncCounter(NColumnShard::COUNTER_COMPACTION_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_COMPACTION_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::SPLIT_COMPACTED: - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::EVICTED: Y_ABORT("Unexpected evicted case"); @@ -58,19 +58,19 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self break; } } - self->IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); THashSet blobsDeactivated; for (auto& [_, portionInfo] : PortionsToRemove) { for (auto& rec : portionInfo.Records) { blobsDeactivated.emplace(portionInfo.GetBlobId(rec.BlobRange.GetBlobIdxVerified())); } - self->IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.GetTotalRawBytes()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.GetTotalRawBytes()); } - self->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); for (auto& blobId : blobsDeactivated) { - self->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); } } { diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 912223f8cf1b..2c616c06e32d 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -46,10 +46,6 @@ struct TSelectInfo { std::vector> PortionsOrderedPK; - NColumnShard::TContainerAccessorWithDirection>> GetPortionsOrdered(const bool reverse) const { - return NColumnShard::TContainerAccessorWithDirection>>(PortionsOrderedPK, reverse); - } - size_t NumChunks() const; TStats Stats() const; @@ -284,7 +280,7 @@ class IColumnEngine { } virtual bool IsOverloadedByMetadata(const ui64 limit) const = 0; virtual std::shared_ptr Select(ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter) const = 0; - virtual std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept = 0; + virtual std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept = 0; virtual std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept = 0; virtual std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept = 0; virtual std::shared_ptr StartCleanupTables(const THashSet& pathsToDrop) noexcept = 0; diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index e7fda23311ae..78ee4bea4ae4 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -157,10 +157,9 @@ void TColumnEngineForLogs::RegisterSchemaVersion(const TSnapshot& snapshot, TInd } void TColumnEngineForLogs::RegisterSchemaVersion(const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) { - std::optional indexInfoOptional = NOlap::TIndexInfo::BuildFromProto(schema, StoragesManager); + std::optional indexInfoOptional = NOlap::TIndexInfo::BuildFromProto(schema, StoragesManager, SchemaObjectsCache); AFL_VERIFY(indexInfoOptional); NOlap::TIndexInfo indexInfo = std::move(*indexInfoOptional); - indexInfo.SetAllKeys(StoragesManager); RegisterSchemaVersion(snapshot, std::move(indexInfo)); } @@ -272,7 +271,7 @@ bool TColumnEngineForLogs::LoadCounters(IDbWrapper& db) { return db.LoadCounters(callback); } -std::shared_ptr TColumnEngineForLogs::StartInsert(std::vector&& dataToIndex) noexcept { +std::shared_ptr TColumnEngineForLogs::StartInsert(std::vector&& dataToIndex) noexcept { Y_ABORT_UNLESS(dataToIndex.size()); TSaverContext saverContext(StoragesManager); @@ -280,12 +279,15 @@ std::shared_ptr TColumnEngineForLogs::StartInsert(st auto pkSchema = VersionedIndex.GetLastSchema()->GetIndexInfo().GetReplaceKey(); for (const auto& data : changes->GetDataToIndex()) { - const ui64 pathId = data.PathId; + const ui64 pathId = data.GetPathId(); if (changes->PathToGranule.contains(pathId)) { continue; } - AFL_VERIFY(changes->PathToGranule.emplace(pathId, GetGranulePtrVerified(pathId)->GetBucketPositions()).second); + if (!data.GetRemove()) { + AFL_VERIFY(changes->PathToGranule.emplace(pathId, GetGranulePtrVerified(pathId)->GetBucketPositions()).second); + } + } return changes; @@ -381,7 +383,7 @@ std::shared_ptr TColumnEngineForLogs::Start } const auto inserted = uniquePortions.emplace(it->second[i].GetAddress()).second; if (inserted) { - Y_ABORT_UNLESS(it->second[i].CheckForCleanup(snapshot)); + AFL_VERIFY(it->second[i].CheckForCleanup(snapshot))("p_snapshot", it->second[i].GetRemoveSnapshotOptional())("snapshot", snapshot); if (txSize + it->second[i].GetTxVolume() < txSizeLimit || changes->PortionsToDrop.empty()) { txSize += it->second[i].GetTxVolume(); } else { @@ -421,7 +423,7 @@ std::vector> TColumnEngineForLogs::Star TSaverContext saverContext(StoragesManager); NActualizer::TTieringProcessContext context(memoryUsageLimit, saverContext, dataLocksManager, SignalCounters, ActualizationController); - const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetActualizationTasksLag(TDuration::Seconds(1)); + const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetActualizationTasksLag(); for (auto&& i : pathEviction) { auto g = GetGranuleOptional(i.first); if (g) { @@ -486,9 +488,8 @@ void TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, const bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool updateStats) { Y_ABORT_UNLESS(!portionInfo.Empty()); const ui64 portion = portionInfo.GetPortion(); - auto spg = GetGranulePtrVerified(portionInfo.GetPathId()); - Y_ABORT_UNLESS(spg); - auto p = spg->GetPortionOptional(portion); + auto& spg = MutableGranuleVerified(portionInfo.GetPathId()); + auto p = spg.GetPortionOptional(portion); if (!p) { LOG_S_WARN("Portion erased already " << portionInfo << " at tablet " << TabletId); @@ -497,7 +498,7 @@ bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool up if (updateStats) { UpdatePortionStats(*p, EStatsUpdateType::ERASE); } - Y_ABORT_UNLESS(spg->ErasePortion(portion)); + Y_ABORT_UNLESS(spg.ErasePortion(portion)); return true; } } @@ -510,20 +511,18 @@ std::shared_ptr TColumnEngineForLogs::Select(ui64 pathId, TSnapshot return out; } - for (const auto& [indexKey, keyPortions] : spg->GetPortionsIndex().GetPoints()) { - for (auto&& [_, portionInfo] : keyPortions.GetStart()) { - if (!portionInfo->IsVisible(snapshot)) { - continue; - } - Y_ABORT_UNLESS(portionInfo->Produced()); - const bool skipPortion = !pkRangesFilter.IsPortionInUsage(*portionInfo, VersionedIndex.GetLastSchema()->GetIndexInfo()); - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", skipPortion ? "portion_skipped" : "portion_selected") - ("pathId", pathId)("portion", portionInfo->DebugString()); - if (skipPortion) { - continue; - } - out->PortionsOrderedPK.emplace_back(portionInfo); + for (const auto& [_, portionInfo] : spg->GetPortions()) { + if (!portionInfo->IsVisible(snapshot)) { + continue; } + Y_ABORT_UNLESS(portionInfo->Produced()); + const bool skipPortion = !pkRangesFilter.IsPortionInUsage(*portionInfo); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", skipPortion ? "portion_skipped" : "portion_selected")("pathId", pathId)( + "portion", portionInfo->DebugString()); + if (skipPortion) { + continue; + } + out->PortionsOrderedPK.emplace_back(portionInfo); } return out; @@ -575,12 +574,4 @@ void TColumnEngineForLogs::DoRegisterTable(const ui64 pathId) { } } -TDuration TColumnEngineForLogs::GetRemovedPortionLivetime() { - TDuration result = TDuration::Minutes(10); - if (HasAppData() && AppDataVerified().ColumnShardConfig.HasRemovedPortionLivetimeSeconds()) { - result = TDuration::Seconds(AppDataVerified().ColumnShardConfig.GetRemovedPortionLivetimeSeconds()); - } - return NYDBTest::TControllers::GetColumnShardController()->GetRemovedPortionLivetime(result); -} - } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index a48fb9cda9e0..7b515c26f40c 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -53,10 +53,7 @@ class TColumnEngineForLogs : public IColumnEngine { std::shared_ptr StoragesManager; std::shared_ptr ActualizationController; - - static TDuration GetRemovedPortionLivetime(); - - const TDuration RemovedPortionLivetime = GetRemovedPortionLivetime(); + std::shared_ptr SchemaObjectsCache = std::make_shared(); public: const std::shared_ptr& GetActualizationController() const { @@ -112,7 +109,7 @@ class TColumnEngineForLogs : public IColumnEngine { return limit < TGranulesStat::GetSumMetadataMemoryPortionsSize(); } - std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; + std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept override; std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept override; std::shared_ptr StartCleanupTables(const THashSet& pathsToDrop) noexcept override; @@ -154,6 +151,10 @@ class TColumnEngineForLogs : public IColumnEngine { return *GetGranulePtrVerified(pathId); } + TGranuleMeta& MutableGranuleVerified(const ui64 pathId) const { + return *GetGranulePtrVerified(pathId); + } + std::shared_ptr GetGranulePtrVerified(const ui64 pathId) const { auto result = GetGranuleOptional(pathId); AFL_VERIFY(result)("path_id", pathId); @@ -173,7 +174,7 @@ class TColumnEngineForLogs : public IColumnEngine { } void AddCleanupPortion(const TPortionInfo& info) { - CleanupPortions[info.GetRemoveSnapshotVerified().GetPlanInstant() + RemovedPortionLivetime].emplace_back(info); + CleanupPortions[info.GetRemoveSnapshotVerified().GetPlanInstant()].emplace_back(info); } void AddShardingInfo(const TGranuleShardingInfo& shardingInfo) { VersionedIndex.AddShardingInfo(shardingInfo); diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.cpp b/ydb/core/tx/columnshard/engines/db_wrapper.cpp index d38cdc53c1ae..b5c8e5e4ea58 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.cpp +++ b/ydb/core/tx/columnshard/engines/db_wrapper.cpp @@ -12,7 +12,7 @@ void TDbWrapper::Insert(const TInsertedData& data) { NColumnShard::Schema::InsertTable_Insert(db, data); } -void TDbWrapper::Commit(const TInsertedData& data) { +void TDbWrapper::Commit(const TCommittedData& data) { NIceDb::TNiceDb db(Database); NColumnShard::Schema::InsertTable_Commit(db, data); } @@ -27,7 +27,7 @@ void TDbWrapper::EraseInserted(const TInsertedData& data) { NColumnShard::Schema::InsertTable_EraseInserted(db, data); } -void TDbWrapper::EraseCommitted(const TInsertedData& data) { +void TDbWrapper::EraseCommitted(const TCommittedData& data) { NIceDb::TNiceDb db(Database); NColumnShard::Schema::InsertTable_EraseCommitted(db, data); } diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.h b/ydb/core/tx/columnshard/engines/db_wrapper.h index 39536cb9c987..50958b6fca29 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.h +++ b/ydb/core/tx/columnshard/engines/db_wrapper.h @@ -16,7 +16,8 @@ namespace NKikimr::NOlap { class TColumnChunkLoadContext; class TIndexChunkLoadContext; -struct TInsertedData; +class TInsertedData; +class TCommittedData; class TInsertTableAccessor; class TColumnRecord; class TIndexChunk; @@ -30,10 +31,10 @@ class IDbWrapper { virtual ~IDbWrapper() = default; virtual void Insert(const TInsertedData& data) = 0; - virtual void Commit(const TInsertedData& data) = 0; + virtual void Commit(const TCommittedData& data) = 0; virtual void Abort(const TInsertedData& data) = 0; virtual void EraseInserted(const TInsertedData& data) = 0; - virtual void EraseCommitted(const TInsertedData& data) = 0; + virtual void EraseCommitted(const TCommittedData& data) = 0; virtual void EraseAborted(const TInsertedData& data) = 0; virtual bool Load(TInsertTableAccessor& insertTable, const TInstant& loadTime) = 0; @@ -63,10 +64,10 @@ class TDbWrapper : public IDbWrapper { {} void Insert(const TInsertedData& data) override; - void Commit(const TInsertedData& data) override; + void Commit(const TCommittedData& data) override; void Abort(const TInsertedData& data) override; void EraseInserted(const TInsertedData& data) override; - void EraseCommitted(const TInsertedData& data) override; + void EraseCommitted(const TCommittedData& data) override; void EraseAborted(const TInsertedData& data) override; bool Load(TInsertTableAccessor& insertTable, const TInstant& loadTime) override; diff --git a/ydb/core/tx/columnshard/engines/defs.cpp b/ydb/core/tx/columnshard/engines/defs.cpp new file mode 100644 index 000000000000..2c50c99d1d3c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/defs.cpp @@ -0,0 +1,11 @@ +#include "defs.h" + +template <> +void Out(IOutputStream& os, TTypeTraits::TFuncParam val) { + os << (ui64)val; +} + +template <> +void Out(IOutputStream& os, TTypeTraits::TFuncParam val) { + os << (ui64)val; +} diff --git a/ydb/core/tx/columnshard/engines/defs.h b/ydb/core/tx/columnshard/engines/defs.h index a01edc7ef767..efe1f1c744a8 100644 --- a/ydb/core/tx/columnshard/engines/defs.h +++ b/ydb/core/tx/columnshard/engines/defs.h @@ -8,18 +8,33 @@ namespace NKikimr::NOlap { using TLogThis = TCtorLogger; -enum class TWriteId : ui64 {}; +enum class TOperationWriteId : ui64 { +}; +enum class TInsertWriteId : ui64 { +}; + +inline TOperationWriteId operator++(TOperationWriteId& w) noexcept { + w = TOperationWriteId{ ui64(w) + 1 }; + return w; +} -inline TWriteId operator++(TWriteId& w) noexcept { - w = TWriteId{ui64(w) + 1}; +inline TInsertWriteId operator++(TInsertWriteId& w) noexcept { + w = TInsertWriteId{ ui64(w) + 1 }; return w; } -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap + +template <> +struct THash { + inline size_t operator()(const NKikimr::NOlap::TInsertWriteId x) const noexcept { + return THash()(ui64(x)); + } +}; template <> -struct THash { - inline size_t operator()(const NKikimr::NOlap::TWriteId x) const noexcept { +struct THash { + inline size_t operator()(const NKikimr::NOlap::TOperationWriteId x) const noexcept { return THash()(ui64(x)); } }; diff --git a/ydb/core/tx/columnshard/splitter/similar_packer.cpp b/ydb/core/tx/columnshard/engines/insert_table/committed.cpp similarity index 52% rename from ydb/core/tx/columnshard/splitter/similar_packer.cpp rename to ydb/core/tx/columnshard/engines/insert_table/committed.cpp index 9d22b3a6b255..bd4bb9ff6c06 100644 --- a/ydb/core/tx/columnshard/splitter/similar_packer.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/committed.cpp @@ -1,4 +1,4 @@ -#include "similar_packer.h" +#include "committed.h" namespace NKikimr::NOlap { diff --git a/ydb/core/tx/columnshard/engines/insert_table/committed.h b/ydb/core/tx/columnshard/engines/insert_table/committed.h new file mode 100644 index 000000000000..bd633647b5ec --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/committed.h @@ -0,0 +1,161 @@ +#pragma once +#include "user_data.h" + +#include + +namespace NKikimr::NOlap { + +class TCommittedData: public TUserDataContainer { +private: + using TBase = TUserDataContainer; + YDB_READONLY(TSnapshot, Snapshot, NOlap::TSnapshot::Zero()); + YDB_READONLY_DEF(TString, DedupId); + YDB_READONLY(bool, Remove, false); + +public: + TCommittedData(const std::shared_ptr& userData, const ui64 planStep, const ui64 txId, const TInsertWriteId insertWriteId) + : TBase(userData) + , Snapshot(planStep, txId) + , DedupId(ToString(planStep) + ":" + ToString((ui64)insertWriteId)) { + } + + TCommittedData(const std::shared_ptr& userData, const ui64 planStep, const ui64 txId, const TString& dedupId) + : TBase(userData) + , Snapshot(planStep, txId) + , DedupId(dedupId) { + } + + TCommittedData(const std::shared_ptr& userData, const TSnapshot& ss, const TInsertWriteId insertWriteId) + : TBase(userData) + , Snapshot(ss) + , DedupId(ToString(ss.GetPlanStep()) + ":" + ToString((ui64)insertWriteId)) { + } + + void SetRemove() { + AFL_VERIFY(!Remove); + Remove = true; + } + + bool operator<(const TCommittedData& key) const { + if (Snapshot == key.Snapshot) { + if (UserData->GetPathId() == key.UserData->GetPathId()) { + return DedupId < key.DedupId; + } else { + return UserData->GetPathId() < key.UserData->GetPathId(); + } + } else { + return Snapshot < key.Snapshot; + } + } +}; + +class TCommittedBlob { +private: + TBlobRange BlobRange; + std::variant WriteInfo; + YDB_READONLY(ui64, SchemaVersion, 0); + YDB_READONLY(ui64, RecordsCount, 0); + YDB_READONLY(bool, IsDelete, false); + NArrow::TReplaceKey First; + NArrow::TReplaceKey Last; + YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); + +public: + const NArrow::TReplaceKey& GetFirst() const { + return First; + } + const NArrow::TReplaceKey& GetLast() const { + return Last; + } + + ui64 GetSize() const { + return BlobRange.Size; + } + + TCommittedBlob(const TBlobRange& blobRange, const TSnapshot& snapshot, const ui64 schemaVersion, const ui64 recordsCount, + const NArrow::TReplaceKey& first, const NArrow::TReplaceKey& last, const bool isDelete, + const NArrow::TSchemaSubset& subset) + : BlobRange(blobRange) + , WriteInfo(snapshot) + , SchemaVersion(schemaVersion) + , RecordsCount(recordsCount) + , IsDelete(isDelete) + , First(first) + , Last(last) + , SchemaSubset(subset) { + } + + TCommittedBlob(const TBlobRange& blobRange, const TInsertWriteId writeId, const ui64 schemaVersion, const ui64 recordsCount, + const NArrow::TReplaceKey& first, const NArrow::TReplaceKey& last, const bool isDelete, + const NArrow::TSchemaSubset& subset) + : BlobRange(blobRange) + , WriteInfo(writeId) + , SchemaVersion(schemaVersion) + , RecordsCount(recordsCount) + , IsDelete(isDelete) + , First(first) + , Last(last) + , SchemaSubset(subset) { + } + + /// It uses trick then we place key with planStep:txId in container and find them later by BlobId only. + /// So hash() and equality should depend on BlobId only. + bool operator==(const TCommittedBlob& key) const { + return BlobRange == key.BlobRange; + } + ui64 Hash() const noexcept { + return BlobRange.Hash(); + } + TString DebugString() const { + if (auto* ss = GetSnapshotOptional()) { + return TStringBuilder() << BlobRange << ";snapshot=" << ss->DebugString(); + } else { + return TStringBuilder() << BlobRange << ";write_id=" << (ui64)GetWriteIdVerified(); + } + } + + bool HasSnapshot() const { + return GetSnapshotOptional(); + } + + const TSnapshot& GetSnapshotDef(const TSnapshot& def) const { + if (auto* snapshot = GetSnapshotOptional()) { + return *snapshot; + } else { + return def; + } + } + + const TSnapshot* GetSnapshotOptional() const { + return std::get_if(&WriteInfo); + } + + const TSnapshot& GetSnapshotVerified() const { + auto* result = GetSnapshotOptional(); + AFL_VERIFY(result); + return *result; + } + + const TInsertWriteId* GetWriteIdOptional() const { + return std::get_if(&WriteInfo); + } + + TInsertWriteId GetWriteIdVerified() const { + auto* result = GetWriteIdOptional(); + AFL_VERIFY(result); + return *result; + } + + const TBlobRange& GetBlobRange() const { + return BlobRange; + } +}; + +} // namespace NKikimr::NOlap + +template <> +struct THash { + inline size_t operator()(const NKikimr::NOlap::TCommittedBlob& key) const { + return key.Hash(); + } +}; diff --git a/ydb/core/tx/columnshard/engines/insert_table/data.h b/ydb/core/tx/columnshard/engines/insert_table/data.h deleted file mode 100644 index e804fec66ec9..000000000000 --- a/ydb/core/tx/columnshard/engines/insert_table/data.h +++ /dev/null @@ -1,199 +0,0 @@ -#pragma once -#include "meta.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -struct TInsertedData { -private: - TInsertedDataMeta Meta; - YDB_READONLY_DEF(TBlobRange, BlobRange); - class TBlobStorageGuard { - private: - YDB_READONLY_DEF(TString, Data); - public: - TBlobStorageGuard(const TString& data) - : Data(data) - { - - } - ~TBlobStorageGuard(); - }; - - std::shared_ptr BlobDataGuard; - -public: - ui64 PlanStep = 0; - ui64 WriteTxId = 0; - ui64 PathId = 0; - TString DedupId; - -private: - YDB_READONLY(ui64, SchemaVersion, 0); - YDB_READONLY_FLAG(NotAbortable, false); - -public: - void MarkAsNotAbortable() { - NotAbortableFlag = true; - } - - std::optional GetBlobData() const { - if (BlobDataGuard) { - return BlobDataGuard->GetData(); - } else { - return {}; - } - } - - ui64 GetTxVolume() const { - return Meta.GetTxVolume() + sizeof(TBlobRange); - } - - const TInsertedDataMeta& GetMeta() const { - return Meta; - } - - TInsertedData() = delete; // avoid invalid TInsertedData anywhere - - TInsertedData(ui64 planStep, ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData); - - TInsertedData(ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, - const ui64 schemaVersion, const std::optional& blobData) - : TInsertedData(0, writeTxId, pathId, dedupId, blobRange, proto, schemaVersion, blobData) - {} - - TInsertedData(ui64 writeTxId, ui64 pathId, TString dedupId, const TUnifiedBlobId& blobId, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData) - : TInsertedData(0, writeTxId, pathId, dedupId, TBlobRange(blobId, 0, blobId.BlobSize()), proto, schemaVersion, blobData) - { - } - - ~TInsertedData(); - - bool operator < (const TInsertedData& key) const { - if (PlanStep < key.PlanStep) { - return true; - } else if (PlanStep > key.PlanStep) { - return false; - } - - // PlanStep == key.PlanStep - if (WriteTxId < key.WriteTxId) { - return true; - } else if (WriteTxId > key.WriteTxId) { - return false; - } - - // PlanStep == key.PlanStep && WriteTxId == key.WriteTxId - if (PathId < key.PathId) { - return true; - } else if (PathId > key.PathId) { - return false; - } - - return DedupId < key.DedupId; - } - - bool operator == (const TInsertedData& key) const { - return (PlanStep == key.PlanStep) && - (WriteTxId == key.WriteTxId) && - (PathId == key.PathId) && - (DedupId == key.DedupId); - } - - /// We commit many writeIds in one txId. There could be several blobs with same WriteId and different DedupId. - /// One of them wins and becomes committed. Original DedupId would be lost then. - /// After commit we use original Initiator:WriteId as DedupId of inserted blob inside {PlanStep, TxId}. - /// pathId, initiator, {writeId}, {dedupId} -> pathId, planStep, txId, {dedupId} - void Commit(const ui64 planStep, const ui64 txId) { - DedupId = ToString(PlanStep) + ":" + ToString((ui64)WriteTxId); - PlanStep = planStep; - WriteTxId = txId; - } - - /// Undo Commit() operation. Restore Initiator:WriteId from DedupId. - void Undo() { - TVector tokens; - size_t numTokens = Split(DedupId, ":", tokens); - Y_ABORT_UNLESS(numTokens == 2); - - PlanStep = FromString(tokens[0]); - WriteTxId = FromString(tokens[1]); - DedupId.clear(); - } - - TSnapshot GetSnapshot() const { - return TSnapshot(PlanStep, WriteTxId); - } - - ui32 BlobSize() const { return BlobRange.GetBlobSize(); } - -}; - -class TCommittedBlob { -private: - TBlobRange BlobRange; - TSnapshot CommitSnapshot; - YDB_READONLY(ui64, SchemaVersion, 0); - YDB_READONLY(ui64, RecordsCount, 0); - YDB_READONLY(bool, IsDelete, false); - YDB_READONLY_DEF(std::optional, First); - YDB_READONLY_DEF(std::optional, Last); - YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); - -public: - ui64 GetSize() const { - return BlobRange.Size; - } - - const NArrow::TReplaceKey& GetFirstVerified() const { - Y_ABORT_UNLESS(First); - return *First; - } - - const NArrow::TReplaceKey& GetLastVerified() const { - Y_ABORT_UNLESS(Last); - return *Last; - } - - TCommittedBlob(const TBlobRange& blobRange, const TSnapshot& snapshot, const ui64 schemaVersion, const ui64 recordsCount, const std::optional& first, - const std::optional& last, const bool isDelete, const NArrow::TSchemaSubset& subset) - : BlobRange(blobRange) - , CommitSnapshot(snapshot) - , SchemaVersion(schemaVersion) - , RecordsCount(recordsCount) - , IsDelete(isDelete) - , First(first) - , Last(last) - , SchemaSubset(subset) - {} - - /// It uses trick then we place key with planStep:txId in container and find them later by BlobId only. - /// So hash() and equality should depend on BlobId only. - bool operator == (const TCommittedBlob& key) const { return BlobRange == key.BlobRange; } - ui64 Hash() const noexcept { return BlobRange.Hash(); } - TString DebugString() const { - return TStringBuilder() << BlobRange << ";ps=" << CommitSnapshot.GetPlanStep() << ";ti=" << CommitSnapshot.GetTxId(); - } - - const TSnapshot& GetSnapshot() const { - return CommitSnapshot; - } - - const TBlobRange& GetBlobRange() const { - return BlobRange; - } -}; - -} - -template <> -struct THash { - inline size_t operator() (const NKikimr::NOlap::TCommittedBlob& key) const { - return key.Hash(); - } -}; diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp index 63e2d0905b99..16cbe6ff616f 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp @@ -1,7 +1,9 @@ #include "insert_table.h" + #include -#include +#include #include +#include namespace NKikimr::NOlap { @@ -11,12 +13,13 @@ bool TInsertTable::Insert(IDbWrapper& dbTable, TInsertedData&& data) { dbTable.Insert(*dataPtr); return true; } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_insertion"); return false; } } -TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 planStep, ui64 txId, - const THashSet& writeIds, std::function pathExists) { +TInsertionSummary::TCounters TInsertTable::Commit( + IDbWrapper& dbTable, ui64 planStep, ui64 txId, const THashSet& writeIds, std::function pathExists) { Y_ABORT_UNLESS(!writeIds.empty()); TInsertionSummary::TCounters counters; @@ -33,15 +36,19 @@ TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 plan dbTable.EraseInserted(*data); - const ui64 pathId = data->PathId; + const ui64 pathId = data->GetPathId(); auto* pathInfo = Summary.GetPathInfoOptional(pathId); // There could be commit after drop: propose, drop, plan if (pathInfo && pathExists(pathId)) { - data->Commit(planStep, txId); - dbTable.Commit(*data); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "commit_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString()); + auto committed = data->Commit(planStep, txId); + dbTable.Commit(committed); - pathInfo->AddCommitted(std::move(*data)); + pathInfo->AddCommitted(std::move(committed)); } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString()); dbTable.Abort(*data); Summary.AddAborted(std::move(*data)); } @@ -50,12 +57,14 @@ TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 plan return counters; } -void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds) { +void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds) { Y_ABORT_UNLESS(!writeIds.empty()); for (auto writeId : writeIds) { // There could be inconsistency with txs and writes in case of bugs. So we could find no record for writeId. if (std::optional data = Summary.ExtractInserted(writeId)) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "abort_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString())("write_id", writeId); dbTable.EraseInserted(*data); dbTable.Abort(*data); Summary.AddAborted(std::move(*data)); @@ -63,88 +72,97 @@ void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds } } -THashSet TInsertTable::OldWritesToAbort(const TInstant& now) const { +THashSet TInsertTable::OldWritesToAbort(const TInstant& now) const { return Summary.GetExpiredInsertions(now - WaitCommitDelay, CleanupPackageSize); } -THashSet TInsertTable::DropPath(IDbWrapper& dbTable, ui64 pathId) { - auto pathInfo = Summary.ExtractPathInfo(pathId); - if (!!pathInfo) { - for (auto& data : pathInfo->GetCommitted()) { - dbTable.EraseCommitted(data); - TInsertedData copy = data; - copy.Undo(); - dbTable.Abort(copy); - Summary.AddAborted(std::move(copy)); - } - } - - return Summary.GetInsertedByPathId(pathId); -} - -void TInsertTable::EraseCommittedOnExecute(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { +void TInsertTable::EraseCommittedOnExecute( + IDbWrapper& dbTable, const TCommittedData& data, const std::shared_ptr& blobsAction) { if (Summary.HasCommitted(data)) { dbTable.EraseCommitted(data); RemoveBlobLinkOnExecute(data.GetBlobRange().BlobId, blobsAction); } } -void TInsertTable::EraseCommittedOnComplete(const TInsertedData& data) { +void TInsertTable::EraseCommittedOnComplete(const TCommittedData& data) { if (Summary.EraseCommitted(data)) { RemoveBlobLinkOnComplete(data.GetBlobRange().BlobId); } } -void TInsertTable::EraseAbortedOnExecute(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { - if (Summary.HasAborted((TWriteId)data.WriteTxId)) { +void TInsertTable::EraseAbortedOnExecute( + IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { + if (Summary.HasAborted(data.GetInsertWriteId())) { dbTable.EraseAborted(data); RemoveBlobLinkOnExecute(data.GetBlobRange().BlobId, blobsAction); } } void TInsertTable::EraseAbortedOnComplete(const TInsertedData& data) { - if (Summary.EraseAborted((TWriteId)data.WriteTxId)) { + if (Summary.EraseAborted(data.GetInsertWriteId())) { RemoveBlobLinkOnComplete(data.GetBlobRange().BlobId); } } -bool TInsertTable::Load(IDbWrapper& dbTable, const TInstant loadTime) { +bool TInsertTable::Load(NIceDb::TNiceDb& db, IDbWrapper& dbTable, const TInstant loadTime) { Y_ABORT_UNLESS(!Loaded); Loaded = true; + LastWriteId = (TInsertWriteId)0; + if (!NColumnShard::Schema::GetSpecialValueOpt(db, NColumnShard::Schema::EValueIds::LastWriteId, LastWriteId)) { + return false; + } + return dbTable.Load(*this, loadTime); } -std::vector TInsertTable::Read(ui64 pathId, const TSnapshot& snapshot, const std::shared_ptr& pkSchema) const { +std::vector TInsertTable::Read(ui64 pathId, const std::optional lockId, const TSnapshot& reqSnapshot, + const std::shared_ptr& pkSchema, const TPKRangesFilter* pkRangesFilter) const { const TPathInfo* pInfo = Summary.GetPathInfoOptional(pathId); if (!pInfo) { return {}; } - std::vector ret; - ret.reserve(pInfo->GetCommitted().size()); + std::vector result; + result.reserve(pInfo->GetCommitted().size() + pInfo->GetInserted().size()); for (const auto& data : pInfo->GetCommitted()) { - if (std::less_equal()(data.GetSnapshot(), snapshot)) { - ret.emplace_back(&data); + if (lockId || data.GetSnapshot() <= reqSnapshot) { + auto start = data.GetMeta().GetFirstPK(pkSchema); + auto finish = data.GetMeta().GetLastPK(pkSchema); + if (pkRangesFilter && pkRangesFilter->IsPortionInPartialUsage(start, finish) == TPKRangeFilter::EUsageClass::DontUsage) { + continue; + } + result.emplace_back(TCommittedBlob(data.GetBlobRange(), data.GetSnapshot(), data.GetSchemaVersion(), data.GetMeta().GetNumRows(), + start, finish, data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, data.GetMeta().GetSchemaSubset())); } } - const auto pred = [pkSchema](const TInsertedData* l, const TInsertedData* r) { - return l->GetMeta().GetFirstPK(pkSchema) < r->GetMeta().GetFirstPK(pkSchema); - }; - std::sort(ret.begin(), ret.end(), pred); - - std::vector result; - result.reserve(ret.size()); - for (auto&& i : ret) { - result.emplace_back(TCommittedBlob( - i->GetBlobRange(), i->GetSnapshot(), i->GetSchemaVersion(), i->GetMeta().GetNumRows(), i->GetMeta().GetFirstPK(pkSchema), i->GetMeta().GetLastPK(pkSchema) - , i->GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, i->GetMeta().GetSchemaSubset())); + if (lockId) { + for (const auto& [writeId, data] : pInfo->GetInserted()) { + auto start = data.GetMeta().GetFirstPK(pkSchema); + auto finish = data.GetMeta().GetLastPK(pkSchema); + if (pkRangesFilter && pkRangesFilter->IsPortionInPartialUsage(start, finish) == TPKRangeFilter::EUsageClass::DontUsage) { + continue; + } + result.emplace_back(TCommittedBlob(data.GetBlobRange(), writeId, data.GetSchemaVersion(), data.GetMeta().GetNumRows(), start, finish, + data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, data.GetMeta().GetSchemaSubset())); + } } - return result; } -bool TInsertTableAccessor::RemoveBlobLinkOnExecute(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction) { +TInsertWriteId TInsertTable::BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + return BuildNextWriteId(db); +} + +TInsertWriteId TInsertTable::BuildNextWriteId(NIceDb::TNiceDb& db) { + TInsertWriteId writeId = ++LastWriteId; + NColumnShard::Schema::SaveSpecialValue(db, NColumnShard::Schema::EValueIds::LastWriteId, (ui64)writeId); + return writeId; +} + +bool TInsertTableAccessor::RemoveBlobLinkOnExecute( + const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction) { AFL_VERIFY(blobsAction); auto itBlob = BlobLinks.find(blobId); AFL_VERIFY(itBlob != BlobLinks.end()); @@ -170,4 +188,4 @@ bool TInsertTableAccessor::RemoveBlobLinkOnComplete(const TUnifiedBlobId& blobId } } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.h b/ydb/core/tx/columnshard/engines/insert_table/insert_table.h index 27762ebf5c66..b44e64312191 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.h +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.h @@ -1,11 +1,15 @@ #pragma once -#include "data.h" -#include "rt_insertion.h" +#include "committed.h" +#include "inserted.h" #include "path_info.h" +#include "rt_insertion.h" + +#include +#include #include namespace NKikimr::NOlap { - +class TPKRangesFilter; class IDbWrapper; /// Use one table for inserted and committed blobs: @@ -23,7 +27,14 @@ class TInsertTableAccessor { bool RemoveBlobLinkOnExecute(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction); bool RemoveBlobLinkOnComplete(const TUnifiedBlobId& blobId); + public: + void ErasePath(const ui64 pathId) { + Summary.ErasePath(pathId); + } + bool HasDataInPathId(const ui64 pathId) const { + return Summary.HasPathIdData(pathId); + } const std::map>& GetPathPriorities() const { return Summary.GetPathPriorities(); } @@ -46,20 +57,28 @@ class TInsertTableAccessor { return Summary.AddInserted(std::move(data), load); } bool AddAborted(TInsertedData&& data, const bool load) { + AFL_VERIFY_DEBUG(!Summary.ExtractInserted(data.GetInsertWriteId())); if (load) { AddBlobLink(data.GetBlobRange().BlobId); } return Summary.AddAborted(std::move(data), load); } - bool AddCommitted(TInsertedData&& data, const bool load) { + bool AddCommitted(TCommittedData&& data, const bool load) { if (load) { AddBlobLink(data.GetBlobRange().BlobId); } - const ui64 pathId = data.PathId; + const ui64 pathId = data.GetPathId(); return Summary.GetPathInfo(pathId).AddCommitted(std::move(data), load); } - const THashMap& GetAborted() const { return Summary.GetAborted(); } - const THashMap& GetInserted() const { return Summary.GetInserted(); } + bool HasPathIdData(const ui64 pathId) const { + return Summary.HasPathIdData(pathId); + } + const THashMap& GetAborted() const { + return Summary.GetAborted(); + } + const THashMap& GetInserted() const { + return Summary.GetInserted(); + } const TInsertionSummary::TCounters& GetCountersPrepared() const { return Summary.GetCountersPrepared(); } @@ -74,28 +93,34 @@ class TInsertTableAccessor { class TInsertTable: public TInsertTableAccessor { private: bool Loaded = false; + TInsertWriteId LastWriteId = TInsertWriteId{ 0 }; + public: static constexpr const TDuration WaitCommitDelay = TDuration::Minutes(10); static constexpr ui64 CleanupPackageSize = 10000; bool Insert(IDbWrapper& dbTable, TInsertedData&& data); - TInsertionSummary::TCounters Commit(IDbWrapper& dbTable, ui64 planStep, ui64 txId, - const THashSet& writeIds, std::function pathExists); - void Abort(IDbWrapper& dbTable, const THashSet& writeIds); - void MarkAsNotAbortable(const TWriteId writeId) { + TInsertionSummary::TCounters Commit( + IDbWrapper& dbTable, ui64 planStep, ui64 txId, const THashSet& writeIds, std::function pathExists); + void Abort(IDbWrapper& dbTable, const THashSet& writeIds); + void MarkAsNotAbortable(const TInsertWriteId writeId) { Summary.MarkAsNotAbortable(writeId); } - THashSet OldWritesToAbort(const TInstant& now) const; - THashSet DropPath(IDbWrapper& dbTable, ui64 pathId); + THashSet OldWritesToAbort(const TInstant& now) const; - void EraseCommittedOnExecute(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); - void EraseCommittedOnComplete(const TInsertedData& key); + void EraseCommittedOnExecute( + IDbWrapper& dbTable, const TCommittedData& key, const std::shared_ptr& blobsAction); + void EraseCommittedOnComplete(const TCommittedData& key); void EraseAbortedOnExecute(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); void EraseAbortedOnComplete(const TInsertedData& key); - std::vector Read(ui64 pathId, const TSnapshot& snapshot, const std::shared_ptr& pkSchema) const; - bool Load(IDbWrapper& dbTable, const TInstant loadTime); + std::vector Read(ui64 pathId, const std::optional lockId, const TSnapshot& reqSnapshot, + const std::shared_ptr& pkSchema, const TPKRangesFilter* pkRangesFilter) const; + bool Load(NIceDb::TNiceDb& db, IDbWrapper& dbTable, const TInstant loadTime); + + TInsertWriteId BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc); + TInsertWriteId BuildNextWriteId(NIceDb::TNiceDb& db); }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp b/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp new file mode 100644 index 000000000000..2986fc0b4c35 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp @@ -0,0 +1,12 @@ +#include "committed.h" +#include "inserted.h" + +#include + +namespace NKikimr::NOlap { + +TCommittedData TInsertedData::Commit(const ui64 planStep, const ui64 txId) { + return TCommittedData(UserData, planStep, txId, InsertWriteId); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/inserted.h b/ydb/core/tx/columnshard/engines/insert_table/inserted.h new file mode 100644 index 000000000000..e124edeb57e5 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/inserted.h @@ -0,0 +1,35 @@ +#pragma once +#include "user_data.h" + +#include + +namespace NKikimr::NOlap { + +class TCommittedData; + +class TInsertedData: public TUserDataContainer { +private: + using TBase = TUserDataContainer; + YDB_READONLY(TInsertWriteId, InsertWriteId, TInsertWriteId(0)); + YDB_READONLY_FLAG(NotAbortable, false); + +public: + void MarkAsNotAbortable() { + NotAbortableFlag = true; + } + + TInsertedData() = delete; // avoid invalid TInsertedData anywhere + + TInsertedData(const TInsertWriteId writeId, const std::shared_ptr& userData) + : TBase(userData) + , InsertWriteId(writeId) { + } + + /// We commit many writeIds in one txId. There could be several blobs with same WriteId and different DedupId. + /// One of them wins and becomes committed. Original DedupId would be lost then. + /// After commit we use original Initiator:WriteId as DedupId of inserted blob inside {PlanStep, TxId}. + /// pathId, initiator, {writeId}, {dedupId} -> pathId, planStep, txId, {dedupId} + [[nodiscard]] TCommittedData Commit(const ui64 planStep, const ui64 txId); +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/meta.cpp b/ydb/core/tx/columnshard/engines/insert_table/meta.cpp index caed6b91ac0d..eee7b2444b2e 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/meta.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/meta.cpp @@ -6,14 +6,23 @@ NKikimrTxColumnShard::TLogicalMetadata TInsertedDataMeta::SerializeToProto() con return OriginalProto; } -const std::optional& TInsertedDataMeta::GetSpecialKeys() const { - if (!KeysParsed) { - if (OriginalProto.HasSpecialKeysRawData()) { - SpecialKeysParsed = NArrow::TFirstLastSpecialKeys(OriginalProto.GetSpecialKeysRawData()); - } - KeysParsed = true; +std::shared_ptr TInsertedDataMeta::GetSpecialKeys(const std::shared_ptr& schema) const { + if (KeyInitialized.Val()) { + return SpecialKeysParsed; } - return SpecialKeysParsed; + std::shared_ptr result; + if (OriginalProto.HasSpecialKeysPayloadData()) { + result = std::make_shared(OriginalProto.GetSpecialKeysPayloadData(), schema); + } else if (OriginalProto.HasSpecialKeysRawData()) { + result = std::make_shared(OriginalProto.GetSpecialKeysRawData()); + } else { + AFL_VERIFY(false); + } + if (AtomicCas(&KeyInitialization, 1, 0)) { + SpecialKeysParsed = result; + KeyInitialized = 1; + } + return result; } } diff --git a/ydb/core/tx/columnshard/engines/insert_table/meta.h b/ydb/core/tx/columnshard/engines/insert_table/meta.h index 085e326d5247..cb55848c7067 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/meta.h +++ b/ydb/core/tx/columnshard/engines/insert_table/meta.h @@ -17,12 +17,12 @@ class TInsertedDataMeta { YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); - mutable bool KeysParsed = false; - mutable std::optional SpecialKeysParsed; - + mutable TAtomicCounter KeyInitialized = 0; + mutable TAtomic KeyInitialization = 0; + mutable std::shared_ptr SpecialKeysParsed; NKikimrTxColumnShard::TLogicalMetadata OriginalProto; + std::shared_ptr GetSpecialKeys(const std::shared_ptr& schema) const; - const std::optional& GetSpecialKeys() const; public: ui64 GetTxVolume() const { return 2 * sizeof(ui64) + sizeof(ui32) + sizeof(OriginalProto) + (SpecialKeysParsed ? SpecialKeysParsed->GetMemoryBytes() : 0); @@ -43,19 +43,13 @@ class TInsertedDataMeta { } } - std::optional GetFirstPK(const std::shared_ptr& schema) const { - if (GetSpecialKeys()) { - return GetSpecialKeys()->GetFirst(schema); - } else { - return {}; - } + NArrow::TReplaceKey GetFirstPK(const std::shared_ptr& schema) const { + AFL_VERIFY(schema); + return GetSpecialKeys(schema)->GetFirst(); } - std::optional GetLastPK(const std::shared_ptr& schema) const { - if (GetSpecialKeys()) { - return GetSpecialKeys()->GetLast(schema); - } else { - return {}; - } + NArrow::TReplaceKey GetLastPK(const std::shared_ptr& schema) const { + AFL_VERIFY(schema); + return GetSpecialKeys(schema)->GetLast(); } NKikimrTxColumnShard::TLogicalMetadata SerializeToProto() const; diff --git a/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp b/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp index 31be2b8a8fa3..e7f4041d6b09 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp @@ -29,7 +29,7 @@ void TPathInfo::AddInsertedSize(const i64 size, const ui64 overloadLimit) { SetInsertedOverload((ui64)InsertedSize > overloadLimit); } -bool TPathInfo::EraseCommitted(const TInsertedData& data) { +bool TPathInfo::EraseCommitted(const TCommittedData& data) { Summary->RemovePriority(*this); const bool result = Committed.erase(data); AddCommittedSize(-1 * (i64)data.BlobSize(), TCompactionLimits::OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID); @@ -39,11 +39,11 @@ bool TPathInfo::EraseCommitted(const TInsertedData& data) { return result; } -bool TPathInfo::HasCommitted(const TInsertedData& data) { +bool TPathInfo::HasCommitted(const TCommittedData& data) { return Committed.contains(data); } -bool TPathInfo::AddCommitted(TInsertedData&& data, const bool load) { +bool TPathInfo::AddCommitted(TCommittedData&& data, const bool load) { const ui64 dataSize = data.BlobSize(); Summary->RemovePriority(*this); AddCommittedSize(data.BlobSize(), TCompactionLimits::OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID); @@ -72,4 +72,8 @@ NKikimr::NOlap::TPathInfoIndexPriority TPathInfo::GetIndexationPriority() const } } +const THashMap& TPathInfo::GetInserted() const { + return Summary->GetInserted(); +} + } diff --git a/ydb/core/tx/columnshard/engines/insert_table/path_info.h b/ydb/core/tx/columnshard/engines/insert_table/path_info.h index 5e44929307c4..b5a5ccc32bc9 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/path_info.h +++ b/ydb/core/tx/columnshard/engines/insert_table/path_info.h @@ -1,7 +1,10 @@ #pragma once +#include "committed.h" +#include "inserted.h" + #include + #include -#include "data.h" namespace NKikimr::NOlap { class TInsertionSummary; @@ -17,12 +20,11 @@ class TPathInfoIndexPriority { private: YDB_READONLY(EIndexationPriority, Category, EIndexationPriority::NoPriority); const ui32 Weight; + public: TPathInfoIndexPriority(const EIndexationPriority category, const ui32 weight) : Category(category) - , Weight(weight) - { - + , Weight(weight) { } bool operator!() const { @@ -37,7 +39,7 @@ class TPathInfoIndexPriority { class TPathInfo: public TMoveOnly { private: const ui64 PathId = 0; - TSet Committed; + TSet Committed; YDB_READONLY(i64, CommittedSize, 0); YDB_READONLY(i64, InsertedSize, 0); bool CommittedOverload = false; @@ -51,6 +53,12 @@ class TPathInfo: public TMoveOnly { void AddCommittedSize(const i64 size, const ui64 overloadLimit); public: + bool IsEmpty() const { + return Committed.empty() && !InsertedSize; + } + + const THashMap& GetInserted() const; + void AddInsertedSize(const i64 size, const ui64 overloadLimit); explicit TPathInfo(TInsertionSummary& summary, const ui64 pathId); @@ -61,18 +69,18 @@ class TPathInfo: public TMoveOnly { TPathInfoIndexPriority GetIndexationPriority() const; - bool EraseCommitted(const TInsertedData& data); - bool HasCommitted(const TInsertedData& data); + bool EraseCommitted(const TCommittedData& data); + bool HasCommitted(const TCommittedData& data); - const TSet& GetCommitted() const { + const TSet& GetCommitted() const { return Committed; } - bool AddCommitted(TInsertedData&& data, const bool load = false); + bool AddCommitted(TCommittedData&& data, const bool load = false); bool IsOverloaded() const { return CommittedOverload || InsertedOverload; } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp index cf5c0ceea3f0..3ad39fcd209e 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp @@ -23,9 +23,10 @@ void TInsertionSummary::RemovePriority(const TPathInfo& pathInfo) noexcept { const auto priority = pathInfo.GetIndexationPriority(); auto it = Priorities.find(priority); if (it == Priorities.end()) { - Y_ABORT_UNLESS(!priority); + AFL_VERIFY(!priority); return; } + AFL_VERIFY(!!priority); Y_ABORT_UNLESS(it->second.erase(&pathInfo) || !priority); if (it->second.empty()) { Priorities.erase(it); @@ -33,7 +34,9 @@ void TInsertionSummary::RemovePriority(const TPathInfo& pathInfo) noexcept { } void TInsertionSummary::AddPriority(const TPathInfo& pathInfo) noexcept { - Y_ABORT_UNLESS(Priorities[pathInfo.GetIndexationPriority()].emplace(&pathInfo).second); + if (!!pathInfo.GetIndexationPriority()) { + Y_ABORT_UNLESS(Priorities[pathInfo.GetIndexationPriority()].emplace(&pathInfo).second); + } } NKikimr::NOlap::TPathInfo& TInsertionSummary::GetPathInfo(const ui64 pathId) { @@ -44,20 +47,6 @@ NKikimr::NOlap::TPathInfo& TInsertionSummary::GetPathInfo(const ui64 pathId) { return it->second; } -std::optional TInsertionSummary::ExtractPathInfo(const ui64 pathId) { - auto it = PathInfo.find(pathId); - if (it == PathInfo.end()) { - return {}; - } - RemovePriority(it->second); - std::optional result = std::move(it->second); - PathInfo.erase(it); - for (auto&& i : result->GetCommitted()) { - OnEraseCommitted(*result, i.BlobSize()); - } - return result; -} - NKikimr::NOlap::TPathInfo* TInsertionSummary::GetPathInfoOptional(const ui64 pathId) { auto it = PathInfo.find(pathId); if (it == PathInfo.end()) { @@ -100,10 +89,10 @@ void TInsertionSummary::OnEraseInserted(TPathInfo& pathInfo, const ui64 dataSize AFL_VERIFY(Counters.Inserted.GetDataSize() == (i64)StatsPrepared.Bytes); } -THashSet TInsertionSummary::GetInsertedByPathId(const ui64 pathId) const { - THashSet result; +THashSet TInsertionSummary::GetInsertedByPathId(const ui64 pathId) const { + THashSet result; for (auto& [writeId, data] : Inserted) { - if (data.PathId == pathId) { + if (data.GetPathId() == pathId) { result.insert(writeId); } } @@ -111,12 +100,12 @@ THashSet TInsertionSummary::GetInsertedByPathId(const return result; } -THashSet TInsertionSummary::GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const { +THashSet TInsertionSummary::GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const { if (timeBorder < MinInsertedTs) { return {}; } - THashSet toAbort; + THashSet toAbort; TInstant newMin = TInstant::Max(); for (auto& [writeId, data] : Inserted) { const TInstant dataInsertTs = data.GetMeta().GetDirtyWriteTime(); @@ -133,7 +122,7 @@ THashSet TInsertionSummary::GetExpiredInsertions(const return toAbort; } -bool TInsertionSummary::EraseAborted(const TWriteId writeId) { +bool TInsertionSummary::EraseAborted(const TInsertWriteId writeId) { auto it = Aborted.find(writeId); if (it == Aborted.end()) { return false; @@ -143,7 +132,7 @@ bool TInsertionSummary::EraseAborted(const TWriteId writeId) { return true; } -bool TInsertionSummary::HasAborted(const TWriteId writeId) { +bool TInsertionSummary::HasAborted(const TInsertWriteId writeId) { auto it = Aborted.find(writeId); if (it == Aborted.end()) { return false; @@ -151,8 +140,8 @@ bool TInsertionSummary::HasAborted(const TWriteId writeId) { return true; } -bool TInsertionSummary::EraseCommitted(const TInsertedData& data) { - TPathInfo* pathInfo = GetPathInfoOptional(data.PathId); +bool TInsertionSummary::EraseCommitted(const TCommittedData& data) { + TPathInfo* pathInfo = GetPathInfoOptional(data.GetPathId()); if (!pathInfo) { Counters.Committed.SkipErase(data.BlobSize()); return false; @@ -166,8 +155,8 @@ bool TInsertionSummary::EraseCommitted(const TInsertedData& data) { } } -bool TInsertionSummary::HasCommitted(const TInsertedData& data) { - TPathInfo* pathInfo = GetPathInfoOptional(data.PathId); +bool TInsertionSummary::HasCommitted(const TCommittedData& data) { + TPathInfo* pathInfo = GetPathInfoOptional(data.GetPathId()); if (!pathInfo) { return false; } @@ -175,19 +164,20 @@ bool TInsertionSummary::HasCommitted(const TInsertedData& data) { } const NKikimr::NOlap::TInsertedData* TInsertionSummary::AddAborted(TInsertedData&& data, const bool load /*= false*/) { - const TWriteId writeId((TWriteId)data.WriteTxId); + const TInsertWriteId writeId = data.GetInsertWriteId(); Counters.Aborted.Add(data.BlobSize(), load); + AFL_VERIFY_DEBUG(!Inserted.contains(writeId)); auto insertInfo = Aborted.emplace(writeId, std::move(data)); - Y_ABORT_UNLESS(insertInfo.second); + AFL_VERIFY(insertInfo.second)("write_id", writeId); return &insertInfo.first->second; } -std::optional TInsertionSummary::ExtractInserted(const TWriteId id) { +std::optional TInsertionSummary::ExtractInserted(const TInsertWriteId id) { auto it = Inserted.find(id); if (it == Inserted.end()) { return {}; } else { - auto pathInfo = GetPathInfoOptional(it->second.PathId); + auto pathInfo = GetPathInfoOptional(it->second.GetPathId()); if (pathInfo) { OnEraseInserted(*pathInfo, it->second.BlobSize()); } @@ -198,10 +188,11 @@ std::optional TInsertionSummary::ExtractInserted( } const NKikimr::NOlap::TInsertedData* TInsertionSummary::AddInserted(TInsertedData&& data, const bool load /*= false*/) { - TWriteId writeId{ data.WriteTxId }; + const TInsertWriteId writeId = data.GetInsertWriteId(); const ui32 dataSize = data.BlobSize(); - const ui64 pathId = data.PathId; + const ui64 pathId = data.GetPathId(); auto insertInfo = Inserted.emplace(writeId, std::move(data)); + AFL_VERIFY_DEBUG(!Aborted.contains(writeId)); if (insertInfo.second) { OnNewInserted(GetPathInfo(pathId), dataSize, load); return &insertInfo.first->second; diff --git a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h index 880def7bc45f..8329557117e1 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h +++ b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h @@ -1,7 +1,10 @@ #pragma once +#include "inserted.h" +#include "path_info.h" + #include + #include -#include "path_info.h" namespace NKikimr::NOlap { class IBlobsDeclareRemovingAction; @@ -19,8 +22,8 @@ class TInsertionSummary { TCounters StatsCommitted; const NColumnShard::TInsertTableCounters Counters; - THashMap Inserted; - THashMap Aborted; + THashMap Inserted; + THashMap Aborted; mutable TInstant MinInsertedTs = TInstant::Zero(); std::map> Priorities; @@ -33,8 +36,27 @@ class TInsertionSummary { void OnNewInserted(TPathInfo& pathInfo, const ui64 dataSize, const bool load) noexcept; void OnEraseInserted(TPathInfo& pathInfo, const ui64 dataSize) noexcept; static TAtomicCounter CriticalInserted; + public: - void MarkAsNotAbortable(const TWriteId writeId) { + bool HasPathIdData(const ui64 pathId) const { + auto it = PathInfo.find(pathId); + if (it == PathInfo.end()) { + return false; + } + return !it->second.IsEmpty(); + } + + void ErasePath(const ui64 pathId) { + auto it = PathInfo.find(pathId); + if (it == PathInfo.end()) { + return; + } + RemovePriority(it->second); + AFL_VERIFY(it->second.IsEmpty()); + PathInfo.erase(it); + } + + void MarkAsNotAbortable(const TInsertWriteId writeId) { auto it = Inserted.find(writeId); if (it == Inserted.end()) { return; @@ -42,34 +64,37 @@ class TInsertionSummary { it->second.MarkAsNotAbortable(); } - THashSet GetInsertedByPathId(const ui64 pathId) const; + THashSet GetInsertedByPathId(const ui64 pathId) const; - THashSet GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const; + THashSet GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const; - const THashMap& GetInserted() const { + const THashMap& GetInserted() const { return Inserted; } - const THashMap& GetAborted() const { + const THashMap& GetAborted() const { return Aborted; } const TInsertedData* AddAborted(TInsertedData&& data, const bool load = false); - bool EraseAborted(const TWriteId writeId); - bool HasAborted(const TWriteId writeId); + bool EraseAborted(const TInsertWriteId writeId); + bool HasAborted(const TInsertWriteId writeId); - bool EraseCommitted(const TInsertedData& data); - bool HasCommitted(const TInsertedData& data); + bool EraseCommitted(const TCommittedData& data); + bool HasCommitted(const TCommittedData& data); const TInsertedData* AddInserted(TInsertedData&& data, const bool load = false); - std::optional ExtractInserted(const TWriteId id); + std::optional ExtractInserted(const TInsertWriteId id); - const TCounters& GetCountersPrepared() const { return StatsPrepared; } - const TCounters& GetCountersCommitted() const { return StatsCommitted; } + const TCounters& GetCountersPrepared() const { + return StatsPrepared; + } + const TCounters& GetCountersCommitted() const { + return StatsCommitted; + } const NColumnShard::TInsertTableCounters& GetCounters() const { return Counters; } NKikimr::NOlap::TPathInfo& GetPathInfo(const ui64 pathId); - std::optional ExtractPathInfo(const ui64 pathId); TPathInfo* GetPathInfoOptional(const ui64 pathId); const TPathInfo* GetPathInfoOptional(const ui64 pathId) const; @@ -84,4 +109,4 @@ class TInsertionSummary { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/data.cpp b/ydb/core/tx/columnshard/engines/insert_table/user_data.cpp similarity index 55% rename from ydb/core/tx/columnshard/engines/insert_table/data.cpp rename to ydb/core/tx/columnshard/engines/insert_table/user_data.cpp index 36c17ba89779..f5d3db9d71d7 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/data.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/user_data.cpp @@ -1,4 +1,4 @@ -#include "data.h" +#include "user_data.h" #include namespace NKikimr::NOlap { @@ -27,27 +27,18 @@ class TInsertTableCacheController { } -TInsertedData::TBlobStorageGuard::~TBlobStorageGuard() { +TUserData::TBlobStorageGuard::~TBlobStorageGuard() { Singleton()->Return(Data.size()); } -TInsertedData::~TInsertedData() { -} - -TInsertedData::TInsertedData(ui64 planStep, ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData) + TUserData::TUserData(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, + const ui64 schemaVersion, const std::optional& blobData) : Meta(proto) , BlobRange(blobRange) - , PlanStep(planStep) - , WriteTxId(writeTxId) , PathId(pathId) - , DedupId(dedupId) , SchemaVersion(schemaVersion) { - if (blobData) { - AFL_VERIFY(blobData->size() == BlobRange.Size); - if (Singleton()->Take(blobData->size())) { - BlobDataGuard = std::make_shared(*blobData); - } + if (blobData && Singleton()->Take(blobData->size())) { + BlobDataGuard = std::make_shared(*blobData); } } diff --git a/ydb/core/tx/columnshard/engines/insert_table/user_data.h b/ydb/core/tx/columnshard/engines/insert_table/user_data.h new file mode 100644 index 000000000000..d734d90524eb --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/user_data.h @@ -0,0 +1,100 @@ +#pragma once +#include "meta.h" + +#include + +#include + +namespace NKikimr::NOlap { + +class TUserData { +private: + TInsertedDataMeta Meta; + YDB_READONLY_DEF(TBlobRange, BlobRange); + class TBlobStorageGuard { + private: + YDB_READONLY_DEF(TString, Data); + + public: + TBlobStorageGuard(const TString& data) + : Data(data) { + } + ~TBlobStorageGuard(); + }; + + std::shared_ptr BlobDataGuard; + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY(ui64, SchemaVersion, 0); + +public: + TUserData() = delete; + TUserData(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData); + + static std::shared_ptr Build(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData) { + return std::make_shared(pathId, blobRange, proto, schemaVersion, blobData); + } + + static std::shared_ptr Build(const ui64 pathId, const TUnifiedBlobId& blobId, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData) { + return std::make_shared(pathId, TBlobRange(blobId), proto, schemaVersion, blobData); + } + + std::optional GetBlobData() const { + if (BlobDataGuard) { + return BlobDataGuard->GetData(); + } else { + return std::nullopt; + } + } + + ui64 GetTxVolume() const { + return Meta.GetTxVolume() + sizeof(TBlobRange); + } + + const TInsertedDataMeta& GetMeta() const { + return Meta; + } +}; + +class TUserDataContainer { +protected: + std::shared_ptr UserData; + +public: + TUserDataContainer(const std::shared_ptr& userData) + : UserData(userData) { + AFL_VERIFY(UserData); + } + + ui64 GetSchemaVersion() const { + return UserData->GetSchemaVersion(); + } + + ui32 BlobSize() const { + return GetBlobRange().Size; + } + + ui32 GetTxVolume() const { + return UserData->GetTxVolume(); + } + + ui64 GetPathId() const { + return UserData->GetPathId(); + } + + const TBlobRange& GetBlobRange() const { + return UserData->GetBlobRange(); + } + + std::optional GetBlobData() const { + return UserData->GetBlobData(); + } + + const TInsertedDataMeta& GetMeta() const { + return UserData->GetMeta(); + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/ya.make b/ydb/core/tx/columnshard/engines/insert_table/ya.make index fd56354b62e6..852761344626 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/ya.make +++ b/ydb/core/tx/columnshard/engines/insert_table/ya.make @@ -3,7 +3,9 @@ LIBRARY() SRCS( insert_table.cpp rt_insertion.cpp - data.cpp + user_data.cpp + inserted.cpp + committed.cpp path_info.cpp meta.cpp ) diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.cpp b/ydb/core/tx/columnshard/engines/portions/column_record.cpp index e4fbef70c5fc..6127ad439326 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.cpp +++ b/ydb/core/tx/columnshard/engines/portions/column_record.cpp @@ -28,7 +28,7 @@ TChunkMeta::TChunkMeta(const TColumnChunkLoadContext& context, const TSimpleColu DeserializeFromProto(context.GetAddress(), context.GetMetaProto(), columnInfo).Validate(); } -TChunkMeta::TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) +TChunkMeta::TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) : TBase(column, columnInfo.GetNeedMinMax(), columnInfo.GetIsSorted()) { } @@ -52,7 +52,8 @@ TColumnRecord::TColumnRecord(const TBlobRangeLink16::TLinkId blobLinkId, const T { } -TColumnRecord::TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) +TColumnRecord::TColumnRecord( + const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) : Meta(column, columnInfo) , ColumnId(address.GetColumnId()) , Chunk(address.GetChunk()) diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index 0109b372f302..2b984e4f05f3 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -2,20 +2,19 @@ #include "common.h" -#include - +#include +#include #include #include -#include -#include +#include #include +#include #include -#include #include #include - +#include #include namespace NKikimrColumnShardDataSharingProto { @@ -31,16 +30,17 @@ struct TChunkMeta: public TSimpleChunkMeta { private: using TBase = TSimpleChunkMeta; TChunkMeta() = default; - [[nodiscard]] TConclusionStatus DeserializeFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo); + [[nodiscard]] TConclusionStatus DeserializeFromProto( + const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo); friend class TColumnRecord; + public: TChunkMeta(TSimpleChunkMeta&& baseMeta) - : TBase(baseMeta) - { - + : TBase(baseMeta) { } - [[nodiscard]] static TConclusion BuildFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo) { + [[nodiscard]] static TConclusion BuildFromProto( + const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo) { TChunkMeta result; auto parse = result.DeserializeFromProto(address, proto, columnInfo); if (!parse) { @@ -63,20 +63,19 @@ struct TChunkMeta: public TSimpleChunkMeta { TChunkMeta(const TColumnChunkLoadContext& context, const TSimpleColumnInfo& columnInfo); - TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); + TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); }; class TColumnRecord { private: TChunkMeta Meta; TColumnRecord(TChunkMeta&& meta) - : Meta(std::move(meta)) - { - + : Meta(std::move(meta)) { } TColumnRecord() = default; TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo); + public: ui32 ColumnId = 0; ui16 Chunk = 0; @@ -99,9 +98,7 @@ class TColumnRecord { : Meta(std::move(meta)) , ColumnId(address.GetColumnId()) , Chunk(address.GetChunk()) - , BlobRange(range) - { - + , BlobRange(range) { } class TTestInstanceBuilder { @@ -116,7 +113,7 @@ class TColumnRecord { } }; - ui32 GetColumnId() const { + ui32 GetColumnId() const { return ColumnId; } ui16 GetChunkIdx() const { @@ -127,7 +124,8 @@ class TColumnRecord { } NKikimrColumnShardDataSharingProto::TColumnRecord SerializeToProto() const; - static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo) { + static TConclusion BuildFromProto( + const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo) { TColumnRecord result; auto parse = result.DeserializeFromProto(proto, columnInfo); if (!parse) { @@ -136,14 +134,14 @@ class TColumnRecord { return result; } - TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { - TColumnSerializationStat result(ColumnId, columnName); + NArrow::NSplitter::TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { + NArrow::NSplitter::TColumnSerializationStat result(ColumnId, columnName); result.Merge(GetSerializationStat()); return result; } - TSimpleSerializationStat GetSerializationStat() const { - return TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRows(), Meta.GetRawBytes()); + NArrow::NSplitter::TSimpleSerializationStat GetSerializationStat() const { + return NArrow::NSplitter::TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRows(), Meta.GetRawBytes()); } const TChunkMeta& GetMeta() const { @@ -163,18 +161,17 @@ class TColumnRecord { } TString DebugString() const { - return TStringBuilder() - << "column_id:" << ColumnId << ";" - << "chunk_idx:" << Chunk << ";" - << "blob_range:" << BlobRange.ToString() << ";" - ; + return TStringBuilder() << "column_id:" << ColumnId << ";" + << "chunk_idx:" << Chunk << ";" + << "blob_range:" << BlobRange.ToString() << ";"; } - TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); + TColumnRecord( + const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); TColumnRecord(const TBlobRangeLink16::TLinkId blobLinkId, const TColumnChunkLoadContext& loadContext, const TSimpleColumnInfo& columnInfo); - friend IOutputStream& operator << (IOutputStream& out, const TColumnRecord& rec) { + friend IOutputStream& operator<<(IOutputStream& out, const TColumnRecord& rec) { out << '{'; if (rec.Chunk) { out << 'n' << rec.Chunk; @@ -186,52 +183,4 @@ class TColumnRecord { } }; -class TSimpleOrderedColumnChunk: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - const TColumnRecord ColumnRecord; - YDB_READONLY_DEF(TString, Data); -protected: - virtual TString DoDebugString() const override { - TStringBuilder sb; - sb << "column_id=" << GetColumnId() << ";data_size=" << Data.size() << ";"; - if (GetChunkIdxOptional()) { - sb << "chunk=" << GetChunkIdxVerified() << ";"; - } else { - sb << "chunk=NO_INITIALIZED;"; - } - return sb; - } - - virtual const TString& DoGetData() const override { - return Data; - } - virtual ui64 DoGetRawBytesImpl() const override { - return ColumnRecord.GetMeta().GetRawBytes(); - } - virtual ui32 DoGetRecordsCountImpl() const override { - return ColumnRecord.GetMeta().GetNumRows(); - } - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, - const std::vector& /*splitSizes*/) const override { - Y_ABORT_UNLESS(false); - return {}; - } - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - return ColumnRecord.GetMeta(); - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return nullptr; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return nullptr; - } -public: - TSimpleOrderedColumnChunk(const TColumnRecord& cRecord, const TString& data) - : TBase(cRecord.ColumnId, cRecord.Chunk) - , ColumnRecord(cRecord) - , Data(data) { - } -}; - -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/common.h b/ydb/core/tx/columnshard/engines/portions/common.h index 1231a1e9f5f3..3702887ccc81 100644 --- a/ydb/core/tx/columnshard/engines/portions/common.h +++ b/ydb/core/tx/columnshard/engines/portions/common.h @@ -1,7 +1,9 @@ #pragma once #include +#include namespace NKikimr::NOlap { +using TColumnSaver = NArrow::NAccessor::TColumnSaver; class TChunkAddress { private: diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.cpp b/ydb/core/tx/columnshard/engines/portions/constructor.cpp index 56575c60d54b..39cd0fe983dc 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor.cpp @@ -67,22 +67,6 @@ void TPortionInfoConstructor::LoadIndex(const TIndexChunkLoadContext& loadContex const NKikimr::NOlap::TColumnRecord& TPortionInfoConstructor::AppendOneChunkColumn(TColumnRecord&& record) { Y_ABORT_UNLESS(record.ColumnId); - std::optional maxChunk; - for (auto&& i : Records) { - if (i.ColumnId == record.ColumnId) { - if (!maxChunk) { - maxChunk = i.Chunk; - } else { - Y_ABORT_UNLESS(*maxChunk + 1 == i.Chunk); - maxChunk = i.Chunk; - } - } - } - if (maxChunk) { - AFL_VERIFY(*maxChunk + 1 == record.Chunk)("max", *maxChunk)("record", record.Chunk); - } else { - AFL_VERIFY(0 == record.Chunk)("record", record.Chunk); - } Records.emplace_back(std::move(record)); return Records.back(); } diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.h b/ydb/core/tx/columnshard/engines/portions/index_chunk.h index 1fe92adfb539..65e0855d5eef 100644 --- a/ydb/core/tx/columnshard/engines/portions/index_chunk.h +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.h @@ -6,7 +6,6 @@ #include #include -#include #include #include diff --git a/ydb/core/tx/columnshard/engines/portions/meta.cpp b/ydb/core/tx/columnshard/engines/portions/meta.cpp index e585d6a88baf..9d7e374ec8f1 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/portions/meta.cpp @@ -33,7 +33,7 @@ NKikimrTxColumnShard::TIndexPortionMeta TPortionMeta::SerializeToProto() const { break; } - portionMeta.SetPrimaryKeyBorders(ReplaceKeyEdges.SerializeToStringDataOnlyNoCompression()); + portionMeta.SetPrimaryKeyBorders(ReplaceKeyEdges.SerializePayloadToString()); RecordSnapshotMin.SerializeToProto(*portionMeta.MutableRecordSnapshotMin()); RecordSnapshotMax.SerializeToProto(*portionMeta.MutableRecordSnapshotMax()); diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index b89b63e97b5c..0605d5ffcbab 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -30,38 +32,57 @@ std::shared_ptr TPortionInfo::MaxValue(ui32 columnId) const { return result; } -ui64 TPortionInfo::GetColumnRawBytes(const std::vector& columnIds, const bool validation) const { - return GetColumnRawBytes(std::set(columnIds.begin(), columnIds.end()), validation); +ui64 TPortionInfo::GetColumnRawBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetMeta().GetRawBytes(); + }; + AggregateIndexChunksData(aggr, Records, &entityIds, validation); + return sum; } -ui64 TPortionInfo::GetColumnRawBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetColumnBlobBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetBlobRange().GetSize(); + }; + AggregateIndexChunksData(aggr, Records, &entityIds, validation); + return sum; +} + +ui64 TPortionInfo::GetColumnRawBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TColumnRecord& r) { sum += r.GetMeta().GetRawBytes(); }; - AggregateIndexChunksData(aggr, Records, entityIds, validation); + AggregateIndexChunksData(aggr, Records, nullptr, validation); return sum; } -ui64 TPortionInfo::GetColumnBlobBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetColumnBlobBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TColumnRecord& r) { sum += r.GetBlobRange().GetSize(); }; - AggregateIndexChunksData(aggr, Records, entityIds, validation); + AggregateIndexChunksData(aggr, Records, nullptr, validation); return sum; } -ui64 TPortionInfo::GetColumnBlobBytes(const std::vector& columnIds, const bool validation) const { - return GetColumnBlobBytes(std::set(columnIds.begin(), columnIds.end()), validation); +ui64 TPortionInfo::GetIndexRawBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TIndexChunk& r) { + sum += r.GetRawBytes(); + }; + AggregateIndexChunksData(aggr, Indexes, &entityIds, validation); + return sum; } -ui64 TPortionInfo::GetIndexRawBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetIndexRawBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TIndexChunk& r) { sum += r.GetRawBytes(); }; - AggregateIndexChunksData(aggr, Indexes, entityIds, validation); + AggregateIndexChunksData(aggr, Indexes, nullptr, validation); return sum; } @@ -590,11 +611,10 @@ template TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& portion, const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) { std::vector columns; - auto arrowResultSchema = resultSchema.GetSchema(); - columns.reserve(arrowResultSchema->num_fields()); + columns.reserve(resultSchema.GetColumnIds().size()); const ui32 rowsCount = portion.GetRecordsCount(); - for (auto&& i : arrowResultSchema->fields()) { - columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i->name()), resultSchema.GetColumnLoaderOptional(i->name())); + for (auto&& i : resultSchema.GetColumnIds()) { + columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i), resultSchema.GetColumnLoaderVerified(i)); } { int skipColumnId = -1; @@ -626,35 +646,11 @@ TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& port preparedColumns.emplace_back(c.Compile()); } - return TPortionInfo::TPreparedBatchData(std::move(preparedColumns), arrowResultSchema, rowsCount); + return TPortionInfo::TPreparedBatchData(std::move(preparedColumns), rowsCount); } } -namespace { -class TChunkAccessor { -private: - const std::vector& Chunks; - const std::shared_ptr Loader; -public: - TChunkAccessor(const std::vector& chunks, const std::shared_ptr& loader) - : Chunks(chunks) - , Loader(loader) - { - - } - ui64 GetChunksCount() const { - return Chunks.size(); - } - ui64 GetChunkLength(const ui32 idx) const { - return Chunks[idx].GetRecordsCount(); - } - std::shared_ptr GetArray(const ui32 idx) const { - return Chunks[idx].GetArrayVerified(Loader); - } -}; -} - ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoConstructor& portion) { if (!CurrentSchema || portion.GetMinSnapshotDeprecatedVerified() != LastSnapshot) { CurrentSchema = portion.GetSchema(VersionedIndex); @@ -664,12 +660,8 @@ ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoC return CurrentSchema; } -NArrow::NAccessor::IChunkedArray::TCurrentChunkAddress TDeserializeChunkedArray::DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const { - TChunkAccessor accessor(Chunks, Loader); - return SelectChunk(chunkCurrent, position, accessor); -} - -TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { +TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble( + const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData); } @@ -687,25 +679,18 @@ bool TPortionInfo::NeedShardingFilter(const TGranuleShardingInfo& shardingInfo) std::shared_ptr TPortionInfo::TPreparedColumn::AssembleAccessor() const { Y_ABORT_UNLESS(!Blobs.empty()); - std::vector> chunks; - chunks.reserve(Blobs.size()); + NArrow::NAccessor::TCompositeChunkedArray::TBuilder builder(GetField()->type()); for (auto& blob : Blobs) { - auto batch = blob.BuildRecordBatch(*Loader); - Y_ABORT_UNLESS(batch); - AFL_VERIFY(batch->num_columns() == 1); - chunks.emplace_back(batch->column(0)); - } - if (chunks.size() > 1) { - return std::make_shared(NArrow::TStatusValidator::GetValid(arrow::ChunkedArray::Make(chunks))); - } else { - return std::make_shared(chunks.front()); + auto chunkedArray = blob.BuildRecordBatch(*Loader); + builder.AddChunk(chunkedArray); } + return builder.Finish(); } -std::shared_ptr TPortionInfo::TPreparedColumn::AssembleForSeqAccess() const { +std::shared_ptr TPortionInfo::TPreparedColumn::AssembleForSeqAccess() const { Y_ABORT_UNLESS(!Blobs.empty()); - std::vector chunks; + std::vector chunks; chunks.reserve(Blobs.size()); ui64 recordsCount = 0; for (auto& blob : Blobs) { @@ -717,64 +702,31 @@ std::shared_ptr TPortionInfo::TPreparedColumn::Assembl } } - return std::make_shared(recordsCount, Loader, std::move(chunks)); -} - -std::shared_ptr TPortionInfo::TPreparedColumn::Assemble() const { - Y_ABORT_UNLESS(!Blobs.empty()); - - std::vector> chunks; - chunks.reserve(Blobs.size()); - for (auto& blob : Blobs) { - auto batch = blob.BuildRecordBatch(*Loader); - Y_ABORT_UNLESS(batch); - Y_ABORT_UNLESS(batch->num_columns() == 1); - chunks.emplace_back(batch->column(0)); - } - - return NArrow::TStatusValidator::GetValid(arrow::ChunkedArray::Make(chunks)); + return std::make_shared(recordsCount, Loader, std::move(chunks)); } -TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk(const std::shared_ptr& loader) const { +NArrow::NAccessor::TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk( + const std::shared_ptr& loader) const { if (DefaultRowsCount) { Y_ABORT_UNLESS(!Data); - AFL_VERIFY(loader->GetExpectedSchema()->num_fields() == 1); - auto col = NArrow::TThreadSimpleArraysCache::Get(loader->GetExpectedSchema()->field(0)->type(), DefaultValue, DefaultRowsCount); - return TDeserializeChunkedArray::TChunk(col); + auto col = std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader->GetField()->type(), DefaultValue, DefaultRowsCount)); + return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(col); } else { AFL_VERIFY(ExpectedRowsCount); - return TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); + return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); } } -std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { +std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { if (DefaultRowsCount) { Y_ABORT_UNLESS(!Data); - AFL_VERIFY(loader.GetExpectedSchema()->num_fields() == 1); - return arrow::RecordBatch::Make(loader.GetExpectedSchema(), DefaultRowsCount, - { NArrow::TThreadSimpleArraysCache::Get(loader.GetExpectedSchema()->field(0)->type(), DefaultValue, DefaultRowsCount) }); + return std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader.GetField()->type(), DefaultValue, DefaultRowsCount)); } else { - auto result = loader.Apply(Data); - if (!result.ok()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "cannot unpack batch")("error", result.status().ToString())("loader", loader.DebugString()); - return nullptr; - } - if (ExpectedRowsCount) { - AFL_VERIFY((*result)->num_rows() == ExpectedRowsCount)("real", (*result)->num_rows())("expected", ExpectedRowsCount); - } - return *result; - } -} - -std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleForSeqAccess() const { - std::vector> columns; - std::vector> fields; - for (auto&& i : Columns) { - columns.emplace_back(i.AssembleForSeqAccess()); - fields.emplace_back(i.GetField()); + AFL_VERIFY(ExpectedRowsCount); + return loader.ApplyVerified(Data, *ExpectedRowsCount); } - - return std::make_shared(fields, std::move(columns)); } std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleToGeneralContainer( @@ -782,6 +734,7 @@ std::shared_ptr TPortionInfo::TPreparedBatchData::Ass std::vector> columns; std::vector> fields; for (auto&& i : Columns) { + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("column", i.GetField()->ToString())("id", i.GetColumnId()); if (sequentialColumnIds.contains(i.GetColumnId())) { columns.emplace_back(i.AssembleForSeqAccess()); } else { diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index b7b89c2f187d..c29013dd6a7e 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -3,9 +3,11 @@ #include "index_chunk.h" #include "meta.h" +#include #include -#include +#include #include +#include #include #include #include @@ -28,60 +30,6 @@ struct TIndexInfo; class TVersionedIndex; class IDbWrapper; -class TDeserializeChunkedArray: public NArrow::NAccessor::IChunkedArray { -private: - using TBase = NArrow::NAccessor::IChunkedArray; -public: - class TChunk { - private: - YDB_READONLY(ui32, RecordsCount, 0); - std::shared_ptr PredefinedArray; - const TString Data; - public: - TChunk(const std::shared_ptr& predefinedArray) - : PredefinedArray(predefinedArray) { - AFL_VERIFY(PredefinedArray); - RecordsCount = PredefinedArray->length(); - } - - TChunk(const ui32 recordsCount, const TString& data) - : RecordsCount(recordsCount) - , Data(data) { - - } - - std::shared_ptr GetArrayVerified(const std::shared_ptr& loader) const { - if (PredefinedArray) { - return PredefinedArray; - } - auto result = loader->ApplyVerified(Data); - AFL_VERIFY(result); - AFL_VERIFY(result->num_columns() == 1); - AFL_VERIFY(result->num_rows() == RecordsCount)("length", result->num_rows())("records_count", RecordsCount); - return result->column(0); - } - }; - - std::shared_ptr Loader; - std::vector Chunks; -protected: - virtual std::optional DoGetRawSize() const override { - return {}; - } - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const override; - virtual std::shared_ptr DoGetChunkedArray() const override { - AFL_VERIFY(false); - return nullptr; - } -public: - TDeserializeChunkedArray(const ui64 recordsCount, const std::shared_ptr& loader, std::vector&& chunks) - : TBase(recordsCount, NArrow::NAccessor::IChunkedArray::EType::SerializedChunkedArray, loader->GetField()->type()) - , Loader(loader) - , Chunks(std::move(chunks)) { - AFL_VERIFY(Loader); - } -}; - class TEntityChunk { private: TChunkAddress Address; @@ -150,7 +98,7 @@ class TPortionInfo { } template - static void AggregateIndexChunksData(const TAggregator& aggr, const std::vector& chunks, const std::optional>& columnIds, const bool validation) { + static void AggregateIndexChunksData(const TAggregator& aggr, const std::vector& chunks, const std::set* columnIds, const bool validation) { if (columnIds) { auto itColumn = columnIds->begin(); auto itRecord = chunks.begin(); @@ -371,8 +319,8 @@ class TPortionInfo { return result; } - TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { - TSerializationStats result; + NArrow::NSplitter::TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { + NArrow::NSplitter::TSerializationStats result; for (auto&& i : Records) { if (schema.GetFieldByColumnIdOptional(i.ColumnId)) { result.AddStat(i.GetSerializationStat(schema.GetFieldByColumnIdVerified(i.ColumnId)->name())); @@ -390,22 +338,29 @@ class TPortionInfo { } const TColumnRecord* GetRecordPointer(const TChunkAddress& address) const { - for (auto&& i : Records) { - if (i.GetAddress() == address) { - return &i; - } + auto it = std::lower_bound(Records.begin(), Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Records.end() && it->GetAddress() == address) { + return &*it; } return nullptr; } bool HasEntityAddress(const TChunkAddress& address) const { - for (auto&& c : GetRecords()) { - if (c.GetAddress() == address) { + { + auto it = std::lower_bound(Records.begin(), Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Records.end() && it->GetAddress() == address) { return true; } } - for (auto&& c : GetIndexes()) { - if (c.GetAddress() == address) { + { + auto it = std::lower_bound(Indexes.begin(), Indexes.end(), address, [](const TIndexChunk& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Indexes.end() && it->GetAddress() == address) { return true; } } @@ -496,10 +451,7 @@ class TPortionInfo { return false; } - bool visible = (Meta.RecordSnapshotMin <= snapshot); - if (visible && RemoveSnapshot.Valid()) { - visible = snapshot < RemoveSnapshot; - } + const bool visible = (Meta.RecordSnapshotMin <= snapshot) && (!RemoveSnapshot.Valid() || snapshot < RemoveSnapshot); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "IsVisible")("analyze_portion", DebugString())("visible", visible)("snapshot", snapshot.DebugString()); return visible; @@ -585,7 +537,8 @@ class TPortionInfo { return result; } - ui64 GetIndexRawBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetIndexRawBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetIndexRawBytes(const bool validation = true) const; ui64 GetIndexBlobBytes() const noexcept { ui64 sum = 0; for (const auto& rec : Indexes) { @@ -594,11 +547,11 @@ class TPortionInfo { return sum; } - ui64 GetColumnRawBytes(const std::vector& columnIds, const bool validation = true) const; - ui64 GetColumnRawBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetColumnRawBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetColumnRawBytes(const bool validation = true) const; - ui64 GetColumnBlobBytes(const std::vector& columnIds, const bool validation = true) const; - ui64 GetColumnBlobBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetColumnBlobBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetColumnBlobBytes(const bool validation = true) const; ui64 GetTotalBlobBytes() const noexcept { return GetIndexBlobBytes() + GetColumnBlobBytes(); @@ -656,8 +609,8 @@ class TPortionInfo { return DefaultRowsCount && !Data; } - std::shared_ptr BuildRecordBatch(const TColumnLoader& loader) const; - TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; + std::shared_ptr BuildRecordBatch(const TColumnLoader& loader) const; + NArrow::NAccessor::TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; }; class TPreparedColumn { @@ -670,29 +623,26 @@ class TPortionInfo { } const std::string& GetName() const { - return Loader->GetExpectedSchema()->field(0)->name(); + return Loader->GetField()->name(); } std::shared_ptr GetField() const { - return Loader->GetExpectedSchema()->field(0); + return Loader->GetField(); } TPreparedColumn(std::vector&& blobs, const std::shared_ptr& loader) : Loader(loader) , Blobs(std::move(blobs)) { - Y_ABORT_UNLESS(Loader); - Y_ABORT_UNLESS(Loader->GetExpectedSchema()->num_fields() == 1); + AFL_VERIFY(Loader); } - std::shared_ptr Assemble() const; - std::shared_ptr AssembleForSeqAccess() const; + std::shared_ptr AssembleForSeqAccess() const; std::shared_ptr AssembleAccessor() const; }; class TPreparedBatchData { private: std::vector Columns; - std::shared_ptr Schema; size_t RowsCount = 0; public: struct TAssembleOptions { @@ -733,10 +683,6 @@ class TPortionInfo { return nullptr; } - std::vector GetSchemaColumnNames() const { - return Schema->field_names(); - } - size_t GetColumnsCount() const { return Columns.size(); } @@ -745,14 +691,12 @@ class TPortionInfo { return RowsCount; } - TPreparedBatchData(std::vector&& columns, std::shared_ptr schema, const size_t rowsCount) + TPreparedBatchData(std::vector&& columns, const size_t rowsCount) : Columns(std::move(columns)) - , Schema(schema) , RowsCount(rowsCount) { } std::shared_ptr AssembleToGeneralContainer(const std::set& sequentialColumnIds) const; - std::shared_ptr AssembleForSeqAccess() const; }; class TColumnAssemblingInfo { diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index 14fa9f854dcc..ae85ef59842c 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -104,8 +104,8 @@ std::optional TReadPortionInfoWithBlobs::SyncP std::vector> newChunks; if (it != columnChunks.end()) { newChunks = to->GetIndexInfo().ActualizeColumnData(it->second, from->GetIndexInfo(), i); + AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); } - AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); } TPortionInfoConstructor constructor(source.PortionInfo, false, true); @@ -120,7 +120,7 @@ std::optional TReadPortionInfoWithBlobs::SyncP } const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); - auto schemaTo = std::make_shared(to, std::make_shared()); + auto schemaTo = std::make_shared(to, std::make_shared()); TGeneralSerializedSlice slice(secondaryData.GetExternalData(), schemaTo, counters); return TWritePortionInfoWithBlobsConstructor::BuildByBlobs( diff --git a/ydb/core/tx/columnshard/engines/predicate/container.cpp b/ydb/core/tx/columnshard/engines/predicate/container.cpp index 0f27c3538293..8afcc1895e4d 100644 --- a/ydb/core/tx/columnshard/engines/predicate/container.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/container.cpp @@ -115,51 +115,53 @@ bool TPredicateContainer::CrossRanges(const TPredicateContainer& ext) { } } -std::optional TPredicateContainer::BuildPredicateFrom(std::shared_ptr object, const TIndexInfo* indexInfo) { +TConclusion TPredicateContainer::BuildPredicateFrom( + std::shared_ptr object, const std::shared_ptr& pkSchema) { if (!object || object->Empty()) { return TPredicateContainer(NArrow::ECompareType::GREATER_OR_EQUAL); } else { if (!object->Good()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not good 'from' predicate"); - return {}; + return TConclusionStatus::Fail("not good 'from' predicate"); } if (!object->IsFrom()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "'from' predicate not is from"); - return {}; + return TConclusionStatus::Fail("'from' predicate not is from"); } - if (indexInfo) { + if (pkSchema) { auto cNames = object->ColumnNames(); i32 countSortingFields = 0; - for (i32 i = 0; i < indexInfo->GetReplaceKey()->num_fields(); ++i) { - if (i < (int)cNames.size() && cNames[i] == indexInfo->GetReplaceKey()->field(i)->name()) { + for (i32 i = 0; i < pkSchema->num_fields(); ++i) { + if (i < (int)cNames.size() && cNames[i] == pkSchema->field(i)->name()) { ++countSortingFields; } else { break; } } - Y_ABORT_UNLESS(countSortingFields == object->Batch->num_columns()); + AFL_VERIFY(countSortingFields == object->Batch->num_columns())("count", countSortingFields)("object", object->Batch->num_columns()); } - return TPredicateContainer(object); + return TPredicateContainer(object, pkSchema ? ExtractKey(*object, pkSchema) : nullptr); } } -std::optional TPredicateContainer::BuildPredicateTo(std::shared_ptr object, const TIndexInfo* indexInfo) { +TConclusion TPredicateContainer::BuildPredicateTo( + std::shared_ptr object, const std::shared_ptr& pkSchema) { if (!object || object->Empty()) { return TPredicateContainer(NArrow::ECompareType::LESS_OR_EQUAL); } else { if (!object->Good()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not good 'to' predicate"); - return {}; + return TConclusionStatus::Fail("not good 'to' predicate"); } if (!object->IsTo()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "'to' predicate not is to"); - return {}; + return TConclusionStatus::Fail("'to' predicate not is to"); } - if (indexInfo) { + if (pkSchema) { auto cNames = object->ColumnNames(); i32 countSortingFields = 0; - for (i32 i = 0; i < indexInfo->GetReplaceKey()->num_fields(); ++i) { - if (i < (int)cNames.size() && cNames[i] == indexInfo->GetReplaceKey()->field(i)->name()) { + for (i32 i = 0; i < pkSchema->num_fields(); ++i) { + if (i < (int)cNames.size() && cNames[i] == pkSchema->field(i)->name()) { ++countSortingFields; } else { break; @@ -167,7 +169,7 @@ std::optional TPredicateContainer::BuildPre } Y_ABORT_UNLESS(countSortingFields == object->Batch->num_columns()); } - return TPredicateContainer(object); + return TPredicateContainer(object, pkSchema ? TPredicateContainer::ExtractKey(*object, pkSchema) : nullptr); } } diff --git a/ydb/core/tx/columnshard/engines/predicate/container.h b/ydb/core/tx/columnshard/engines/predicate/container.h index 5cff2cbefea5..adab69f68dc9 100644 --- a/ydb/core/tx/columnshard/engines/predicate/container.h +++ b/ydb/core/tx/columnshard/engines/predicate/container.h @@ -1,9 +1,13 @@ #pragma once #include "predicate.h" + #include #include + #include + #include + #include namespace NKikimr::NOlap { @@ -15,10 +19,12 @@ class TPredicateContainer { std::shared_ptr Object; NArrow::ECompareType CompareType; mutable std::optional> ColumnNames; + std::shared_ptr ReplaceKey; - TPredicateContainer(std::shared_ptr object) + TPredicateContainer(std::shared_ptr object, const std::shared_ptr& replaceKey) : Object(object) - , CompareType(Object->GetCompareType()) { + , CompareType(Object->GetCompareType()) + , ReplaceKey(replaceKey) { } TPredicateContainer(const NArrow::ECompareType compareType) @@ -27,15 +33,37 @@ class TPredicateContainer { static std::partial_ordering ComparePredicatesSamePrefix(const NOlap::TPredicate& l, const NOlap::TPredicate& r); + static std::shared_ptr ExtractKey(const NOlap::TPredicate& predicate, const std::shared_ptr& key) { + AFL_VERIFY(predicate.Batch); + const auto& batchFields = predicate.Batch->schema()->fields(); + const auto& keyFields = key->fields(); + size_t minSize = std::min(batchFields.size(), keyFields.size()); + for (size_t i = 0; i < minSize; ++i) { + Y_DEBUG_ABORT_UNLESS(batchFields[i]->type()->Equals(*keyFields[i]->type())); + } + if (batchFields.size() <= keyFields.size()) { + return std::make_shared(NArrow::TReplaceKey::FromBatch(predicate.Batch, predicate.Batch->schema(), 0)); + } else { + return std::make_shared(NArrow::TReplaceKey::FromBatch(predicate.Batch, key, 0)); + } + } + public: + NArrow::ECompareType GetCompareType() const { + return CompareType; + } + + const std::shared_ptr& GetReplaceKey() const { + return ReplaceKey; + } bool IsEmpty() const { return !Object; } template - std::optional Get(const ui32 colIndex, const ui32 rowIndex, - const std::optional defaultValue = {}) const { + std::optional Get( + const ui32 colIndex, const ui32 rowIndex, const std::optional defaultValue = {}) const { if (!Object) { return defaultValue; } else { @@ -59,13 +87,15 @@ class TPredicateContainer { return TPredicateContainer(NArrow::ECompareType::GREATER_OR_EQUAL); } - static std::optional BuildPredicateFrom(std::shared_ptr object, const TIndexInfo* indexInfo); + static TConclusion BuildPredicateFrom( + std::shared_ptr object, const std::shared_ptr& pkSchema); static TPredicateContainer BuildNullPredicateTo() { return TPredicateContainer(NArrow::ECompareType::LESS_OR_EQUAL); } - static std::optional BuildPredicateTo(std::shared_ptr object, const TIndexInfo* indexInfo); + static TConclusion BuildPredicateTo( + std::shared_ptr object, const std::shared_ptr& pkSchema); NKikimr::NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const { if (!Object) { @@ -73,23 +103,6 @@ class TPredicateContainer { } return NArrow::TColumnFilter::MakePredicateFilter(data, Object->Batch, CompareType); } - - std::optional ExtractKey(const std::shared_ptr& key) const { - if (Object) { - const auto& batchFields = Object->Batch->schema()->fields(); - const auto& keyFields = key->fields(); - size_t minSize = std::min(batchFields.size(), keyFields.size()); - for (size_t i = 0; i < minSize; ++i) { - Y_DEBUG_ABORT_UNLESS(batchFields[i]->type()->Equals(*keyFields[i]->type())); - } - if (batchFields.size() <= keyFields.size()) { - return NArrow::TReplaceKey::FromBatch(Object->Batch, Object->Batch->schema(), 0); - } else { - return NArrow::TReplaceKey::FromBatch(Object->Batch, key, 0); - } - } - return {}; - } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.cpp b/ydb/core/tx/columnshard/engines/predicate/filter.cpp index 10d66a832c1a..085eaa798f07 100644 --- a/ydb/core/tx/columnshard/engines/predicate/filter.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/filter.cpp @@ -1,4 +1,7 @@ #include "filter.h" + +#include + #include namespace NKikimr::NOlap { @@ -14,43 +17,50 @@ NKikimr::NArrow::TColumnFilter TPKRangesFilter::BuildFilter(const arrow::Datum& return result; } -bool TPKRangesFilter::Add(std::shared_ptr f, std::shared_ptr t, const TIndexInfo* indexInfo) { +TConclusionStatus TPKRangesFilter::Add( + std::shared_ptr f, std::shared_ptr t, const std::shared_ptr& pkSchema) { if ((!f || f->Empty()) && (!t || t->Empty())) { - return true; + return TConclusionStatus::Success(); + } + auto fromContainerConclusion = TPredicateContainer::BuildPredicateFrom(f, pkSchema); + if (fromContainerConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect from container")( + "from", fromContainerConclusion.GetErrorMessage()); + return fromContainerConclusion; } - auto fromContainer = TPredicateContainer::BuildPredicateFrom(f, indexInfo); - auto toContainer = TPredicateContainer::BuildPredicateTo(t, indexInfo); - if (!fromContainer || !toContainer) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect from/to containers")("from", !!fromContainer)("to", !!toContainer); - return false; + auto toContainerConclusion = TPredicateContainer::BuildPredicateTo(t, pkSchema); + if (toContainerConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect to container")( + "from", toContainerConclusion.GetErrorMessage()); + return toContainerConclusion; } if (SortedRanges.size() && !FakeRanges) { if (ReverseFlag) { - if (fromContainer->CrossRanges(SortedRanges.front().GetPredicateTo())) { + if (fromContainerConclusion->CrossRanges(SortedRanges.front().GetPredicateTo())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not sorted sequence"); - return false; + return TConclusionStatus::Fail("not sorted sequence"); } } else { - if (fromContainer->CrossRanges(SortedRanges.back().GetPredicateTo())) { + if (fromContainerConclusion->CrossRanges(SortedRanges.back().GetPredicateTo())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not sorted sequence"); - return false; + return TConclusionStatus::Fail("not sorted sequence"); } } } - auto pkRangeFilter = TPKRangeFilter::Build(std::move(*fromContainer), std::move(*toContainer)); - if (!pkRangeFilter) { - return false; + auto pkRangeFilterConclusion = TPKRangeFilter::Build(fromContainerConclusion.DetachResult(), toContainerConclusion.DetachResult()); + if (pkRangeFilterConclusion.IsFail()) { + return pkRangeFilterConclusion; } if (FakeRanges) { FakeRanges = false; SortedRanges.clear(); } if (ReverseFlag) { - SortedRanges.emplace_front(std::move(*pkRangeFilter)); + SortedRanges.emplace_front(pkRangeFilterConclusion.DetachResult()); } else { - SortedRanges.emplace_back(std::move(*pkRangeFilter)); + SortedRanges.emplace_back(pkRangeFilterConclusion.DetachResult()); } - return true; + return TConclusionStatus::Success(); } TString TPKRangesFilter::DebugString() const { @@ -75,30 +85,134 @@ std::set TPKRangesFilter::GetColumnIds(const TIndexInfo& indexInfo) const return result; } -bool TPKRangesFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const { +bool TPKRangesFilter::IsPortionInUsage(const TPortionInfo& info) const { for (auto&& i : SortedRanges) { - if (i.IsPortionInUsage(info, indexInfo)) { + if (i.IsPortionInUsage(info)) { return true; } } return SortedRanges.empty(); } -bool TPKRangesFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const { +bool TPKRangesFilter::CheckPoint(const NArrow::TReplaceKey& point) const { for (auto&& i : SortedRanges) { - if (i.IsPortionInPartialUsage(start, end, indexInfo)) { + if (i.CheckPoint(point)) { return true; } } - return false; + return SortedRanges.empty(); +} + +TPKRangeFilter::EUsageClass TPKRangesFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const { + for (auto&& i : SortedRanges) { + switch (i.IsPortionInPartialUsage(start, end)) { + case TPKRangeFilter::EUsageClass::FullUsage: + return TPKRangeFilter::EUsageClass::FullUsage; + case TPKRangeFilter::EUsageClass::PartialUsage: + return TPKRangeFilter::EUsageClass::PartialUsage; + case TPKRangeFilter::EUsageClass::DontUsage: + break; + } + } + return TPKRangeFilter::EUsageClass::DontUsage; } TPKRangesFilter::TPKRangesFilter(const bool reverse) - : ReverseFlag(reverse) -{ + : ReverseFlag(reverse) { auto range = TPKRangeFilter::Build(TPredicateContainer::BuildNullPredicateFrom(), TPredicateContainer::BuildNullPredicateTo()); Y_ABORT_UNLESS(range); SortedRanges.emplace_back(*range); } +std::shared_ptr TPKRangesFilter::SerializeToRecordBatch(const std::shared_ptr& pkSchema) const { + auto fullSchema = NArrow::TStatusValidator::GetValid( + pkSchema->AddField(pkSchema->num_fields(), std::make_shared(".ydb_operation_type", arrow::uint32()))); + auto builders = NArrow::MakeBuilders(fullSchema, SortedRanges.size() * 2); + for (auto&& i : SortedRanges) { + for (ui32 idx = 0; idx < (ui32)pkSchema->num_fields(); ++idx) { + if (idx < i.GetPredicateFrom().GetReplaceKey()->Size()) { + AFL_VERIFY(NArrow::Append( + *builders[idx], i.GetPredicateFrom().GetReplaceKey()->Column(idx), i.GetPredicateFrom().GetReplaceKey()->GetPosition())); + } else { + NArrow::TStatusValidator::Validate(builders[idx]->AppendNull()); + } + } + NArrow::Append(*builders[pkSchema->num_fields()], (ui32)i.GetPredicateFrom().GetCompareType()); + + for (ui32 idx = 0; idx < (ui32)pkSchema->num_fields(); ++idx) { + if (idx < i.GetPredicateTo().GetReplaceKey()->Size()) { + AFL_VERIFY(NArrow::Append( + *builders[idx], i.GetPredicateTo().GetReplaceKey()->Column(idx), i.GetPredicateTo().GetReplaceKey()->GetPosition())); + } else { + NArrow::TStatusValidator::Validate(builders[idx]->AppendNull()); + } + } + NArrow::Append(*builders[pkSchema->num_fields()], (ui32)i.GetPredicateTo().GetCompareType()); + } + return arrow::RecordBatch::Make(fullSchema, SortedRanges.size() * 2, NArrow::Finish(std::move(builders))); +} + +std::shared_ptr TPKRangesFilter::BuildFromRecordBatchLines( + const std::shared_ptr& batch, const bool reverse) { + std::shared_ptr result = std::make_shared(reverse); + for (ui32 i = 0; i < batch->num_rows(); ++i) { + auto batchRow = batch->Slice(i, 1); + auto pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batchRow); + auto pTo = std::make_shared(NKernels::EOperation::LessEqual, batchRow); + result->Add(pFrom, pTo, batch->schema()).Validate(); + } + return result; } + +std::shared_ptr TPKRangesFilter::BuildFromRecordBatchFull( + const std::shared_ptr& batch, const std::shared_ptr& pkSchema, const bool reverse) { + std::shared_ptr result = std::make_shared(reverse); + auto pkBatch = NArrow::TColumnOperator().Adapt(batch, pkSchema).DetachResult(); + auto c = batch->GetColumnByName(".ydb_operation_type"); + AFL_VERIFY(c); + AFL_VERIFY(c->type_id() == arrow::Type::UINT32); + auto cUi32 = static_pointer_cast(c); + for (ui32 i = 0; i < batch->num_rows();) { + std::shared_ptr pFrom; + std::shared_ptr pTo; + { + auto batchRow = TPredicate::CutNulls(batch->Slice(i, 1)); + NKernels::EOperation op = (NKernels::EOperation)cUi32->Value(i); + if (op == NKernels::EOperation::GreaterEqual || op == NKernels::EOperation::Greater) { + pFrom = std::make_shared(op, batchRow); + } else if (op == NKernels::EOperation::Equal) { + pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batchRow); + } else { + AFL_VERIFY(false); + } + if (op != NKernels::EOperation::Equal) { + ++i; + } + } + { + auto batchRow = TPredicate::CutNulls(batch->Slice(i, 1)); + NKernels::EOperation op = (NKernels::EOperation)cUi32->Value(i); + if (op == NKernels::EOperation::LessEqual || op == NKernels::EOperation::Less) { + pTo = std::make_shared(op, batchRow); + } else if (op == NKernels::EOperation::Equal) { + pTo = std::make_shared(NKernels::EOperation::LessEqual, batchRow); + } else { + AFL_VERIFY(false); + } + } + result->Add(pFrom, pTo, pkSchema).Validate(); + } + return result; +} + +std::shared_ptr TPKRangesFilter::BuildFromString( + const TString& data, const std::shared_ptr& pkSchema, const bool reverse) { + auto batch = NArrow::TStatusValidator::GetValid(NArrow::NSerialization::TNativeSerializer().Deserialize(data)); + return BuildFromRecordBatchFull(batch, pkSchema, reverse); +} + +TString TPKRangesFilter::SerializeToString(const std::shared_ptr& pkSchema) const { + return NArrow::NSerialization::TNativeSerializer().SerializeFull(SerializeToRecordBatch(pkSchema)); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.h b/ydb/core/tx/columnshard/engines/predicate/filter.h index 20e7a039b273..bbc70b5ff584 100644 --- a/ydb/core/tx/columnshard/engines/predicate/filter.h +++ b/ydb/core/tx/columnshard/engines/predicate/filter.h @@ -9,9 +9,15 @@ class TPKRangesFilter { bool FakeRanges = true; std::deque SortedRanges; bool ReverseFlag = false; + public: TPKRangesFilter(const bool reverse); + [[nodiscard]] TConclusionStatus Add( + std::shared_ptr f, std::shared_ptr t, const std::shared_ptr& pkSchema); + std::shared_ptr SerializeToRecordBatch(const std::shared_ptr& pkSchema) const; + TString SerializeToString(const std::shared_ptr& pkSchema) const; + bool IsEmpty() const { return SortedRanges.empty() || FakeRanges; } @@ -37,13 +43,12 @@ class TPKRangesFilter { return SortedRanges.end(); } - bool IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const; - bool IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const; + bool IsPortionInUsage(const TPortionInfo& info) const; + TPKRangeFilter::EUsageClass IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const; + bool CheckPoint(const NArrow::TReplaceKey& point) const; NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; - [[nodiscard]] bool Add(std::shared_ptr f, std::shared_ptr t, const TIndexInfo* indexInfo); - std::set GetColumnNames() const { std::set result; for (auto&& i : SortedRanges) { @@ -57,6 +62,30 @@ class TPKRangesFilter { TString DebugString() const; std::set GetColumnIds(const TIndexInfo& indexInfo) const; + + static std::shared_ptr BuildFromRecordBatchLines(const std::shared_ptr& batch, const bool reverse); + + static std::shared_ptr BuildFromRecordBatchFull( + const std::shared_ptr& batch, const std::shared_ptr& pkSchema, const bool reverse); + static std::shared_ptr BuildFromString( + const TString& data, const std::shared_ptr& pkSchema, const bool reverse); + + template + static TConclusion BuildFromProto(const TProto& proto, const bool reverse, const std::vector& ydbPk) { + TPKRangesFilter result(reverse); + for (auto& protoRange : proto.GetRanges()) { + TSerializedTableRange range(protoRange); + auto fromPredicate = std::make_shared(); + auto toPredicate = std::make_shared(); + TSerializedTableRange serializedRange(protoRange); + std::tie(*fromPredicate, *toPredicate) = TPredicate::DeserializePredicatesRange(serializedRange, ydbPk); + auto status = result.Add(fromPredicate, toPredicate, NArrow::TStatusValidator::GetValid(NArrow::MakeArrowSchema(ydbPk))); + if (status.IsFail()) { + return status; + } + } + return result; + } }; } diff --git a/ydb/core/tx/columnshard/engines/predicate/predicate.cpp b/ydb/core/tx/columnshard/engines/predicate/predicate.cpp index 7a4ee282f0cc..535172afd526 100644 --- a/ydb/core/tx/columnshard/engines/predicate/predicate.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/predicate.cpp @@ -1,20 +1,21 @@ #include "predicate.h" +#include #include #include +#include + namespace NKikimr::NOlap { TPredicate::TPredicate(EOperation op, std::shared_ptr batch) noexcept : Operation(op) - , Batch(std::move(batch)) -{ + , Batch(std::move(batch)) { Y_ABORT_UNLESS(IsFrom() || IsTo()); } TPredicate::TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr& schema) - : Operation(op) -{ + : Operation(op) { Y_ABORT_UNLESS(IsFrom() || IsTo()); if (!serializedBatch.empty()) { Batch = NArrow::DeserializeBatch(serializedBatch, schema); @@ -31,7 +32,112 @@ std::vector TPredicate::ColumnNames() const { return out; } -IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { +std::vector ExtractTypes(const std::vector>& columns) { + std::vector types; + types.reserve(columns.size()); + for (auto& [name, type] : columns) { + types.push_back(type); + } + return types; +} + +TString FromCells(const TConstArrayRef& cells, const std::vector>& columns) { + Y_ABORT_UNLESS(cells.size() == columns.size()); + if (cells.empty()) { + return {}; + } + + std::vector types = ExtractTypes(columns); + + NArrow::TArrowBatchBuilder batchBuilder; + batchBuilder.Reserve(1); + auto startStatus = batchBuilder.Start(columns); + Y_ABORT_UNLESS(startStatus.ok(), "%s", startStatus.ToString().c_str()); + + batchBuilder.AddRow(NKikimr::TDbTupleRef(), NKikimr::TDbTupleRef(types.data(), cells.data(), cells.size())); + + auto batch = batchBuilder.FlushBatch(false); + Y_ABORT_UNLESS(batch); + Y_ABORT_UNLESS(batch->num_columns() == (int)cells.size()); + Y_ABORT_UNLESS(batch->num_rows() == 1); + return NArrow::SerializeBatchNoCompression(batch); +} + +std::pair TPredicate::DeserializePredicatesRange( + const TSerializedTableRange& range, const std::vector>& columns) { + std::vector leftCells; + std::vector> leftColumns; + bool leftTrailingNull = false; + { + TConstArrayRef cells = range.From.GetCells(); + const size_t size = cells.size(); + Y_ASSERT(size <= columns.size()); + leftCells.reserve(size); + leftColumns.reserve(size); + for (size_t i = 0; i < size; ++i) { + if (!cells[i].IsNull()) { + leftCells.push_back(cells[i]); + leftColumns.push_back(columns[i]); + leftTrailingNull = false; + } else { + leftTrailingNull = true; + } + } + } + + std::vector rightCells; + std::vector> rightColumns; + bool rightTrailingNull = false; + { + TConstArrayRef cells = range.To.GetCells(); + const size_t size = cells.size(); + Y_ASSERT(size <= columns.size()); + rightCells.reserve(size); + rightColumns.reserve(size); + for (size_t i = 0; i < size; ++i) { + if (!cells[i].IsNull()) { + rightCells.push_back(cells[i]); + rightColumns.push_back(columns[i]); + rightTrailingNull = false; + } else { + rightTrailingNull = true; + } + } + } + + const bool fromInclusive = range.FromInclusive || leftTrailingNull; + const bool toInclusive = range.ToInclusive && !rightTrailingNull; + + TString leftBorder = FromCells(leftCells, leftColumns); + TString rightBorder = FromCells(rightCells, rightColumns); + auto leftSchema = NArrow::MakeArrowSchema(leftColumns); + Y_ASSERT(leftSchema.ok()); + auto rightSchema = NArrow::MakeArrowSchema(rightColumns); + Y_ASSERT(rightSchema.ok()); + return std::make_pair( + TPredicate(fromInclusive ? NKernels::EOperation::GreaterEqual : NKernels::EOperation::Greater, leftBorder, leftSchema.ValueUnsafe()), + TPredicate(toInclusive ? NKernels::EOperation::LessEqual : NKernels::EOperation::Less, rightBorder, rightSchema.ValueUnsafe())); +} + +std::shared_ptr TPredicate::CutNulls(const std::shared_ptr& batch) { + AFL_VERIFY(batch->num_rows() == 1)("count", batch->num_rows()); + AFL_VERIFY(batch->num_columns()); + std::vector> colsNotNull; + std::vector> fieldsNotNull; + ui32 idx = 0; + for (auto&& i : batch->columns()) { + if (i->IsNull(0)) { + break; + } + colsNotNull.emplace_back(i); + fieldsNotNull.emplace_back(batch->schema()->field(idx)); + ++idx; + } + AFL_VERIFY(colsNotNull.size()); + return arrow::RecordBatch::Make(std::make_shared(fieldsNotNull), 1, colsNotNull); +} + +IOutputStream& operator<<(IOutputStream& out, const TPredicate& pred) { out << NSsa::GetFunctionName(pred.Operation); for (i32 i = 0; i < pred.Batch->num_columns(); ++i) { @@ -61,4 +167,4 @@ IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { return out; } -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/predicate.h b/ydb/core/tx/columnshard/engines/predicate/predicate.h index 0b856894e65f..8365971ea29e 100644 --- a/ydb/core/tx/columnshard/engines/predicate/predicate.h +++ b/ydb/core/tx/columnshard/engines/predicate/predicate.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -10,7 +11,9 @@ struct TPredicate { private: using EOperation = NArrow::EOperation; EOperation Operation{ EOperation::Unspecified }; + public: + static std::shared_ptr CutNulls(const std::shared_ptr& batch); std::shared_ptr Batch; @@ -29,8 +32,8 @@ struct TPredicate { } template - std::optional Get(const ui32 colIndex, const ui32 rowIndex, - const std::optional defaultValue = {}) const { + std::optional Get( + const ui32 colIndex, const ui32 rowIndex, const std::optional defaultValue = {}) const { auto column = Batch->column(colIndex); if (!column) { return defaultValue; @@ -42,10 +45,18 @@ struct TPredicate { } } - bool Empty() const noexcept { return Batch.get() == nullptr; } - bool Good() const { return !Empty() && Batch->num_columns() && Batch->num_rows() == 1; } - bool IsFrom() const noexcept { return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; } - bool IsTo() const noexcept { return Operation == EOperation::Less || Operation == EOperation::LessEqual; } + bool Empty() const noexcept { + return Batch.get() == nullptr; + } + bool Good() const { + return !Empty() && Batch->num_columns() && Batch->num_rows() == 1; + } + bool IsFrom() const noexcept { + return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; + } + bool IsTo() const noexcept { + return Operation == EOperation::Less || Operation == EOperation::LessEqual; + } bool IsInclusive() const { return Operation == EOperation::GreaterEqual || Operation == EOperation::LessEqual; } @@ -56,13 +67,16 @@ struct TPredicate { return Empty() ? "()" : Batch->schema()->ToString(); } + static std::pair DeserializePredicatesRange( + const TSerializedTableRange& range, const std::vector>& columns); + constexpr TPredicate() noexcept = default; TPredicate(EOperation op, std::shared_ptr batch) noexcept; TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr& schema); - friend IOutputStream& operator << (IOutputStream& out, const TPredicate& pred); + friend IOutputStream& operator<<(IOutputStream& out, const TPredicate& pred); }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/range.cpp b/ydb/core/tx/columnshard/engines/predicate/range.cpp index 3a073fcb8fec..83c6602d242d 100644 --- a/ydb/core/tx/columnshard/engines/predicate/range.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/range.cpp @@ -6,11 +6,11 @@ namespace NKikimr::NOlap { std::set TPKRangeFilter::GetColumnIds(const TIndexInfo& indexInfo) const { std::set result; for (auto&& i : PredicateFrom.GetColumnNames()) { - result.emplace(indexInfo.GetColumnId(i)); + result.emplace(indexInfo.GetColumnIdVerified(i)); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("predicate_column", i); } for (auto&& i : PredicateTo.GetColumnNames()) { - result.emplace(indexInfo.GetColumnId(i)); + result.emplace(indexInfo.GetColumnIdVerified(i)); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("predicate_column", i); } return result; @@ -39,63 +39,84 @@ NKikimr::NArrow::TColumnFilter TPKRangeFilter::BuildFilter(const arrow::Datum& d return result.And(PredicateFrom.BuildFilter(data)); } -bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const { - if (auto from = PredicateFrom.ExtractKey(indexInfo.GetPrimaryKey())) { - const auto& portionEnd = info.IndexKeyEnd(); - const int commonSize = std::min(from->Size(), portionEnd.Size()); - if (std::is_gt(from->ComparePartNotNull(portionEnd, commonSize))) { - return false; - } - } +bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info) const { + return IsPortionInPartialUsage(info.IndexKeyStart(), info.IndexKeyEnd()) != TPKRangeFilter::EUsageClass::DontUsage; +} - if (auto to = PredicateTo.ExtractKey(indexInfo.GetPrimaryKey())) { - const auto& portionStart = info.IndexKeyStart(); - const int commonSize = std::min(to->Size(), portionStart.Size()); - if (std::is_lt(to->ComparePartNotNull(portionStart, commonSize))) { - return false; +TPKRangeFilter::EUsageClass TPKRangeFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const { + { + std::partial_ordering equalityStartWithFrom = std::partial_ordering::greater; + if (const auto& from = PredicateFrom.GetReplaceKey()) { + equalityStartWithFrom = start.ComparePartNotNull(*from, from->Size()); + } + std::partial_ordering equalityEndWithTo = std::partial_ordering::less; + if (const auto& to = PredicateTo.GetReplaceKey()) { + equalityEndWithTo = end.ComparePartNotNull(*to, to->Size()); + } + const bool startInternal = (equalityStartWithFrom == std::partial_ordering::equivalent && PredicateFrom.IsInclude()) || + (equalityStartWithFrom == std::partial_ordering::greater); + const bool endInternal = (equalityEndWithTo == std::partial_ordering::equivalent && PredicateTo.IsInclude()) || + (equalityEndWithTo == std::partial_ordering::less); + if (startInternal && endInternal) { + return EUsageClass::FullUsage; } } + - return true; -} - -bool TPKRangeFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const { - bool startUsage = false; - bool endUsage = false; - if (auto from = PredicateFrom.ExtractKey(indexInfo.GetPrimaryKey())) { - AFL_VERIFY(from->Size() <= start.Size()); - if (PredicateFrom.IsInclude()) { - startUsage = std::is_lt(start.ComparePartNotNull(*from, from->Size())); - } else { - startUsage = std::is_lteq(start.ComparePartNotNull(*from, from->Size())); + if (const auto& from = PredicateFrom.GetReplaceKey()) { + const std::partial_ordering equalityEndWithFrom = end.ComparePartNotNull(*from, from->Size()); + if (equalityEndWithFrom == std::partial_ordering::less) { + return EUsageClass::DontUsage; + } else if (equalityEndWithFrom == std::partial_ordering::equivalent) { + if (PredicateFrom.IsInclude()) { + return EUsageClass::PartialUsage; + } else { + return EUsageClass::DontUsage; + } } - } else { - startUsage = true; } - if (auto to = PredicateTo.ExtractKey(indexInfo.GetPrimaryKey())) { - AFL_VERIFY(to->Size() <= end.Size()); - if (PredicateTo.IsInclude()) { - endUsage = std::is_gt(end.ComparePartNotNull(*to, to->Size())); - } else { - endUsage = std::is_gteq(end.ComparePartNotNull(*to, to->Size())); + if (const auto& to = PredicateTo.GetReplaceKey()) { + const std::partial_ordering equalityStartWithTo = start.ComparePartNotNull(*to, to->Size()); + if (equalityStartWithTo == std::partial_ordering::greater) { + return EUsageClass::DontUsage; + } else if (equalityStartWithTo == std::partial_ordering::equivalent) { + if (PredicateTo.IsInclude()) { + return EUsageClass::PartialUsage; + } else { + return EUsageClass::DontUsage; + } } - } else { - endUsage = true; } -// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("start", start.DebugString())("end", end.DebugString())("from", PredicateFrom.DebugString())("to", PredicateTo.DebugString()) -// ("start_usage", startUsage)("end_usage", endUsage); +// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("start", start.DebugString())("end", end.DebugString())("from", PredicateFrom.DebugString())( +// "to", PredicateTo.DebugString()); - return endUsage || startUsage; + return EUsageClass::PartialUsage; } -std::optional TPKRangeFilter::Build(TPredicateContainer&& from, TPredicateContainer&& to) { +TConclusion TPKRangeFilter::Build(TPredicateContainer&& from, TPredicateContainer&& to) { if (!from.CrossRanges(to)) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "cannot_build_predicate_range")("error", "predicates from/to not intersected"); - return {}; + return TConclusionStatus::Fail("predicates from/to not intersected"); } return TPKRangeFilter(std::move(from), std::move(to)); } +bool TPKRangeFilter::CheckPoint(const NArrow::TReplaceKey& point) const { + std::partial_ordering equalityWithFrom = std::partial_ordering::greater; + if (const auto& from = PredicateFrom.GetReplaceKey()) { + equalityWithFrom = point.ComparePartNotNull(*from, from->Size()); + } + std::partial_ordering equalityWithTo = std::partial_ordering::less; + if (const auto& to = PredicateTo.GetReplaceKey()) { + equalityWithTo = point.ComparePartNotNull(*to, to->Size()); + } + const bool startInternal = (equalityWithFrom == std::partial_ordering::equivalent && PredicateFrom.IsInclude()) || + (equalityWithFrom == std::partial_ordering::greater); + const bool endInternal = (equalityWithTo == std::partial_ordering::equivalent && PredicateTo.IsInclude()) || + (equalityWithTo == std::partial_ordering::less); + return startInternal && endInternal; +} + } diff --git a/ydb/core/tx/columnshard/engines/predicate/range.h b/ydb/core/tx/columnshard/engines/predicate/range.h index ff84f35408a3..6f9f264b7d70 100644 --- a/ydb/core/tx/columnshard/engines/predicate/range.h +++ b/ydb/core/tx/columnshard/engines/predicate/range.h @@ -28,20 +28,20 @@ class TPKRangeFilter { return PredicateTo; } - std::optional KeyFrom(const std::shared_ptr& key) const { - return PredicateFrom.ExtractKey(key); - } + static TConclusion Build(TPredicateContainer&& from, TPredicateContainer&& to); - std::optional KeyTo(const std::shared_ptr& key) const { - return PredicateTo.ExtractKey(key); - } + NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; - static std::optional Build(TPredicateContainer&& from, TPredicateContainer&& to); + bool IsPortionInUsage(const TPortionInfo& info) const; + bool CheckPoint(const NArrow::TReplaceKey& point) const; - NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; + enum class EUsageClass { + DontUsage, + PartialUsage, + FullUsage + }; - bool IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const; - bool IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const; + EUsageClass IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const; std::set GetColumnIds(const TIndexInfo& indexInfo) const; TString DebugString() const; diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h index 35269dc16598..37ba57b89985 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h @@ -31,7 +31,7 @@ class TScanIteratorBase { return {}; } virtual bool Finished() const = 0; - virtual TConclusion> GetBatch() = 0; + virtual TConclusion> GetBatch() = 0; virtual void PrepareResults() { } diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h index fbcdab4d8622..3b1d545094ac 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h @@ -1,10 +1,12 @@ #pragma once #include "read_metadata.h" + #include -#include -#include #include +#include #include +#include + #include namespace NKikimr::NOlap::NReader { @@ -13,6 +15,7 @@ class TComputeShardingPolicy { private: YDB_READONLY(ui32, ShardsCount, 0); YDB_READONLY_DEF(std::vector, ColumnNames); + public: TString DebugString() const { return TStringBuilder() << "shards_count:" << ShardsCount << ";columns=" << JoinSeq(",", ColumnNames) << ";"; @@ -42,10 +45,12 @@ class TReadContext { const NColumnShard::TConcreteScanCounters Counters; TReadMetadataBase::TConstPtr ReadMetadata; NResourceBroker::NSubscribe::TTaskContext ResourcesTaskContext; + const ui64 ScanId; const TActorId ScanActorId; const TActorId ResourceSubscribeActorId; const TActorId ReadCoordinatorActorId; const TComputeShardingPolicy ComputeShardingPolicy; + public: template std::shared_ptr GetReadMetadataPtrVerifiedAs() const { @@ -74,6 +79,14 @@ class TReadContext { return ScanActorId; } + ui64 GetScanId() const { + return ScanId; + } + + bool HasLock() const { + return !!ReadMetadata->GetLockId(); + } + const TReadMetadataBase::TConstPtr& GetReadMetadata() const { return ReadMetadata; } @@ -86,17 +99,18 @@ class TReadContext { return ResourcesTaskContext; } - TReadContext(const std::shared_ptr& storagesManager, const NColumnShard::TConcreteScanCounters& counters, const TReadMetadataBase::TConstPtr& readMetadata, - const TActorId& scanActorId, const TActorId& resourceSubscribeActorId, const TActorId& readCoordinatorActorId, const TComputeShardingPolicy& computeShardingPolicy) + TReadContext(const std::shared_ptr& storagesManager, const NColumnShard::TConcreteScanCounters& counters, + const TReadMetadataBase::TConstPtr& readMetadata, const TActorId& scanActorId, const TActorId& resourceSubscribeActorId, + const TActorId& readCoordinatorActorId, const TComputeShardingPolicy& computeShardingPolicy, const ui64 scanId) : StoragesManager(storagesManager) , Counters(counters) , ReadMetadata(readMetadata) , ResourcesTaskContext("CS::SCAN_READ", counters.ResourcesSubscriberCounters) + , ScanId(scanId) , ScanActorId(scanActorId) , ResourceSubscribeActorId(resourceSubscribeActorId) , ReadCoordinatorActorId(readCoordinatorActorId) - , ComputeShardingPolicy(computeShardingPolicy) - { + , ComputeShardingPolicy(computeShardingPolicy) { Y_ABORT_UNLESS(ReadMetadata); } }; @@ -109,8 +123,9 @@ class IDataReader { virtual TString DoDebugString(const bool verbose) const = 0; virtual void DoAbort() = 0; virtual bool DoIsFinished() const = 0; - virtual std::vector DoExtractReadyResults(const int64_t maxRowsInBatch) = 0; + virtual std::vector> DoExtractReadyResults(const int64_t maxRowsInBatch) = 0; virtual TConclusion DoReadNextInterval() = 0; + public: IDataReader(const std::shared_ptr& context); virtual ~IDataReader() = default; @@ -153,7 +168,7 @@ class IDataReader { return *result; } - std::vector ExtractReadyResults(const int64_t maxRowsInBatch) { + std::vector> ExtractReadyResults(const int64_t maxRowsInBatch) { return DoExtractReadyResults(maxRowsInBatch); } @@ -171,4 +186,4 @@ class IDataReader { } }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp index e6fc29578f1c..88416a4d214f 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp @@ -1,21 +1,20 @@ #include "read_metadata.h" + #include namespace NKikimr::NOlap::NReader { -TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& insertTable, - const std::unique_ptr& index) +TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& insertTable, const std::unique_ptr& index) : InsertTable(insertTable) - , Index(index) -{} + , Index(index) { +} std::shared_ptr TDataStorageAccessor::Select(const TReadDescription& readDescription) const { if (readDescription.ReadNothing) { return std::make_shared(); } - return Index->Select(readDescription.PathId, - readDescription.GetSnapshot(), - readDescription.PKRangesFilter); + AFL_VERIFY(readDescription.PKRangesFilter); + return Index->Select(readDescription.PathId, readDescription.GetSnapshot(), *readDescription.PKRangesFilter); } ISnapshotSchema::TPtr TReadMetadataBase::GetLoadSchemaVerified(const TPortionInfo& portion) const { @@ -24,8 +23,10 @@ ISnapshotSchema::TPtr TReadMetadataBase::GetLoadSchemaVerified(const TPortionInf return schema; } -std::vector TDataStorageAccessor::GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema) const { - return std::move(InsertTable->Read(readDescription.PathId, readDescription.GetSnapshot(), pkSchema)); +std::vector TDataStorageAccessor::GetCommitedBlobs(const TReadDescription& readDescription, + const std::shared_ptr& pkSchema, const std::optional lockId, const TSnapshot& reqSnapshot) const { + AFL_VERIFY(readDescription.PKRangesFilter); + return std::move(InsertTable->Read(readDescription.PathId, lockId, reqSnapshot, pkSchema, &*readDescription.PKRangesFilter)); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h index b03982775fce..d87fcf02868e 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h @@ -1,12 +1,17 @@ #pragma once +#include +#include #include #include -#include -#include namespace NKikimr::NOlap { - class TPortionInfo; +class TPortionInfo; } + +namespace NKikimr::NKqp::NInternalImplementation { +struct TEvScanData; +} + namespace NKikimr::NOlap::NReader { class TScanIteratorBase; @@ -18,10 +23,10 @@ class TDataStorageAccessor { const std::unique_ptr& Index; public: - TDataStorageAccessor(const std::unique_ptr& insertTable, - const std::unique_ptr& index); + TDataStorageAccessor(const std::unique_ptr& insertTable, const std::unique_ptr& index); std::shared_ptr Select(const TReadDescription& readDescription) const; - std::vector GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema) const; + std::vector GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema, + const std::optional lockId, const TSnapshot& reqSnapshot) const; }; // Holds all metadata that is needed to perform read/scan @@ -32,19 +37,49 @@ struct TReadMetadataBase { ASC /* "ascending" */, DESC /* "descending" */, }; + private: - const ESorting Sorting = ESorting::ASC; // Sorting inside returned batches - std::optional PKRangesFilter; + const ESorting Sorting = ESorting::ASC; // Sorting inside returned batches + std::shared_ptr PKRangesFilter; TProgramContainer Program; std::shared_ptr IndexVersionsPointer; TSnapshot RequestSnapshot; std::optional RequestShardingInfo; + virtual void DoOnReadFinished(NColumnShard::TColumnShard& /*owner*/) const { + } + virtual void DoOnBeforeStartReading(NColumnShard::TColumnShard& /*owner*/) const { + } + virtual void DoOnReplyConstruction(const ui64 /*tabletId*/, NKqp::NInternalImplementation::TEvScanData& /*scanData*/) const { + } protected: std::shared_ptr ResultIndexSchema; + ui64 TxId = 0; + std::optional LockId; + public: using TConstPtr = std::shared_ptr; + void OnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const { + DoOnReplyConstruction(tabletId, scanData); + } + + ui64 GetTxId() const { + return TxId; + } + + std::optional GetLockId() const { + return LockId; + } + + void OnReadFinished(NColumnShard::TColumnShard& owner) const { + DoOnReadFinished(owner); + } + + void OnBeforeStartReading(NColumnShard::TColumnShard& owner) const { + DoOnBeforeStartReading(owner); + } + const TVersionedIndex& GetIndexVersions() const { AFL_VERIFY(IndexVersionsPointer); return *IndexVersionsPointer; @@ -54,8 +89,9 @@ struct TReadMetadataBase { return RequestShardingInfo; } - void SetPKRangesFilter(const TPKRangesFilter& value) { - Y_ABORT_UNLESS(IsSorted() && value.IsReverse() == IsDescSorted()); + void SetPKRangesFilter(const std::shared_ptr& value) { + AFL_VERIFY(value); + Y_ABORT_UNLESS(IsSorted() && value->IsReverse() == IsDescSorted()); Y_ABORT_UNLESS(!PKRangesFilter); PKRangesFilter = value; } @@ -65,6 +101,11 @@ struct TReadMetadataBase { return *PKRangesFilter; } + const std::shared_ptr& GetPKRangesFilterPtr() const { + Y_ABORT_UNLESS(!!PKRangesFilter); + return PKRangesFilter; + } + ISnapshotSchema::TPtr GetResultSchema() const { return ResultIndexSchema; } @@ -75,7 +116,7 @@ struct TReadMetadataBase { ISnapshotSchema::TPtr GetLoadSchemaVerified(const TPortionInfo& porition) const; - std::shared_ptr GetBlobSchema(const ui64 version) const { + const std::shared_ptr& GetBlobSchema(const ui64 version) const { return GetIndexVersions().GetSchema(version)->GetIndexInfo().ArrowSchema(); } @@ -91,13 +132,13 @@ struct TReadMetadataBase { RequestShardingInfo = IndexVersionsPointer->GetShardingInfoOptional(pathId, RequestSnapshot); } - TReadMetadataBase(const std::shared_ptr index, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema, const TSnapshot& requestSnapshot) + TReadMetadataBase(const std::shared_ptr index, const ESorting sorting, const TProgramContainer& ssaProgram, + const std::shared_ptr& schema, const TSnapshot& requestSnapshot) : Sorting(sorting) , Program(ssaProgram) , IndexVersionsPointer(index) , RequestSnapshot(requestSnapshot) - , ResultIndexSchema(schema) - { + , ResultIndexSchema(schema) { } virtual ~TReadMetadataBase() = default; @@ -111,19 +152,25 @@ struct TReadMetadataBase { std::set GetProcessingColumnIds() const { std::set result; for (auto&& i : GetProgram().GetProcessingColumns()) { - result.emplace(ResultIndexSchema->GetIndexInfo().GetColumnId(i)); + result.emplace(ResultIndexSchema->GetIndexInfo().GetColumnIdVerified(i)); } return result; } - bool IsAscSorted() const { return Sorting == ESorting::ASC; } - bool IsDescSorted() const { return Sorting == ESorting::DESC; } - bool IsSorted() const { return IsAscSorted() || IsDescSorted(); } + bool IsAscSorted() const { + return Sorting == ESorting::ASC; + } + bool IsDescSorted() const { + return Sorting == ESorting::DESC; + } + bool IsSorted() const { + return IsAscSorted() || IsDescSorted(); + } virtual std::unique_ptr StartScan(const std::shared_ptr& readContext) const = 0; virtual std::vector GetKeyYqlSchema() const = 0; // TODO: can this only be done for base class? - friend IOutputStream& operator << (IOutputStream& out, const TReadMetadataBase& meta) { + friend IOutputStream& operator<<(IOutputStream& out, const TReadMetadataBase& meta) { meta.Dump(out); return out; } @@ -161,7 +208,6 @@ struct TReadMetadataBase { } return ResultIndexSchema->GetIndexInfo().GetIndexNameOptional(entityId); } - }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp index c88a69aabc9c..00ab74fa9921 100644 --- a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp @@ -61,8 +61,7 @@ TColumnShardScan::TColumnShardScan(const TActorId& columnShardActorId, const TAc , Deadline(TInstant::Now() + (timeout ? timeout + SCAN_HARD_TIMEOUT_GAP : SCAN_HARD_TIMEOUT)) , ScanCountersPool(scanCountersPool) , Stats(NTracing::TTraceClient::GetLocalClient("SHARD", ::ToString(TabletId)/*, "SCAN_TXID:" + ::ToString(TxId)*/)) - , ComputeShardingPolicy(computeShardingPolicy) -{ + , ComputeShardingPolicy(computeShardingPolicy) { AFL_VERIFY(ReadMetadataRange); KeyYqlSchema = ReadMetadataRange->GetKeyYqlSchema(); } @@ -80,7 +79,7 @@ void TColumnShardScan::Bootstrap(const TActorContext& ctx) { ReadCoordinatorActorId = ctx.Register(new NBlobOperations::NRead::TReadCoordinatorActor(TabletId, SelfId())); std::shared_ptr context = std::make_shared(StoragesManager, ScanCountersPool, - ReadMetadataRange, SelfId(), ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy); + ReadMetadataRange, SelfId(), ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy, ScanId); ScanIterator = ReadMetadataRange->StartScan(context); auto startResult = ScanIterator->Start(); StartInstant = TMonotonic::Now(); @@ -209,7 +208,7 @@ bool TColumnShardScan::ProduceResults() noexcept { return false; } - std::optional resultOpt = resultConclusion.DetachResult(); + std::shared_ptr resultOpt = resultConclusion.DetachResult(); if (!resultOpt) { ACFL_DEBUG("stage", "no data is ready yet")("iterator", ScanIterator->DebugString()); return false; @@ -243,6 +242,7 @@ bool TColumnShardScan::ProduceResults() noexcept { Result->ArrowBatch = shardedBatch.GetRecordBatch(); Rows += batch->num_rows(); Bytes += NArrow::GetTableDataSize(Result->ArrowBatch); + ACFL_DEBUG("stage", "data_format")("batch_size", NArrow::GetTableDataSize(Result->ArrowBatch))("num_rows", numRows)("batch_columns", JoinSeq(",", batch->schema()->field_names())); } if (CurrentLastReadKey) { @@ -274,9 +274,9 @@ void TColumnShardScan::ContinueProcessing() { if (ChunksLimiter.HasMore()) { auto g = Stats->MakeGuard("Finish"); MakeResult(); + Finish(NColumnShard::TScanCounters::EStatusFinish::Success); SendResult(false, true); ScanIterator.reset(); - Finish(NColumnShard::TScanCounters::EStatusFinish::Success); } } else { while (true) { @@ -375,6 +375,7 @@ bool TColumnShardScan::SendResult(bool pageFault, bool lastBatch) { Y_ABORT_UNLESS(AckReceivedInstant); ScanCountersPool.AckWaitingInfo(TMonotonic::Now() - *AckReceivedInstant); } + ReadMetadataRange->OnReplyConstruction(TabletId, *Result); AckReceivedInstant.reset(); Send(ScanComputeActorId, Result.Release(), IEventHandle::FlagTrackDelivery); // TODO: FlagSubscribeOnSession ? @@ -402,7 +403,7 @@ void TColumnShardScan::Finish(const NColumnShard::TScanCounters::EStatusFinish s Send(ColumnShardActorId, new NColumnShard::TEvPrivate::TEvReadFinished(RequestCookie, TxId)); AFL_VERIFY(StartInstant); - ScanCountersPool.OnScanDuration(status, TMonotonic::Now() - *StartInstant); + ScanCountersPool.OnScanFinished(status, TMonotonic::Now() - *StartInstant); ReportStats(); PassAway(); } diff --git a/ydb/core/tx/columnshard/engines/reader/common/description.h b/ydb/core/tx/columnshard/engines/reader/common/description.h index 704b4bd101a9..c180dcc8d067 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/description.h +++ b/ydb/core/tx/columnshard/engines/reader/common/description.h @@ -13,13 +13,15 @@ struct TReadDescription { TProgramContainer Program; public: // Table + ui64 TxId = 0; + std::optional LockId; ui64 PathId = 0; TString TableName; bool ReadNothing = false; // Less[OrEqual], Greater[OrEqual] or both // There's complex logic in NKikimr::TTableRange comparison that could be emulated only with separated compare // operations with potentially different columns. We have to remove columns to support -Inf (Null) and +Inf. - NOlap::TPKRangesFilter PKRangesFilter; + std::shared_ptr PKRangesFilter; NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; // List of columns @@ -28,7 +30,7 @@ struct TReadDescription { TReadDescription(const TSnapshot& snapshot, const bool isReverse) : Snapshot(snapshot) - , PKRangesFilter(isReverse) { + , PKRangesFilter(std::make_shared(isReverse)) { } void SetProgram(TProgramContainer&& value) { diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.cpp b/ydb/core/tx/columnshard/engines/reader/common/result.cpp index 484165c67b54..e81e86bfc9d0 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/result.cpp +++ b/ydb/core/tx/columnshard/engines/reader/common/result.cpp @@ -4,19 +4,19 @@ namespace NKikimr::NOlap::NReader { class TCurrentBatch { private: - std::vector Results; + std::vector> Results; ui64 RecordsCount = 0; public: ui64 GetRecordsCount() const { return RecordsCount; } - void AddChunk(TPartialReadResult&& res) { - RecordsCount += res.GetRecordsCount(); + void AddChunk(std::shared_ptr&& res) { + RecordsCount += res->GetRecordsCount(); Results.emplace_back(std::move(res)); } - void FillResult(std::vector& result) const { + void FillResult(std::vector>& result) const { if (Results.empty()) { return; } @@ -26,11 +26,12 @@ class TCurrentBatch { } }; -std::vector TPartialReadResult::SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult) { +std::vector> TPartialReadResult::SplitResults( + std::vector>&& resultsExt, const ui32 maxRecordsInResult) { std::vector resultBatches; TCurrentBatch currentBatch; for (auto&& i : resultsExt) { - AFL_VERIFY(i.GetRecordsCount()); + AFL_VERIFY(i->GetRecordsCount()); currentBatch.AddChunk(std::move(i)); if (currentBatch.GetRecordsCount() >= maxRecordsInResult) { resultBatches.emplace_back(std::move(currentBatch)); @@ -41,7 +42,7 @@ std::vector TPartialReadResult::SplitResults(std::vector result; + std::vector> result; for (auto&& i : resultBatches) { i.FillResult(result); } diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.h b/ydb/core/tx/columnshard/engines/reader/common/result.h index 5780c0f2fc24..e3028b01b5ad 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/result.h +++ b/ydb/core/tx/columnshard/engines/reader/common/result.h @@ -3,15 +3,17 @@ #include #include #include +#include #include #include namespace NKikimr::NOlap::NReader { // Represents a batch of rows produced by ASC or DESC scan with applied filters and partial aggregation -class TPartialReadResult { +class TPartialReadResult: public TNonCopyable { private: - YDB_READONLY_DEF(std::vector>, ResourcesGuards); + YDB_READONLY_DEF(std::shared_ptr, ResourcesGuard); + YDB_READONLY_DEF(std::shared_ptr, GroupGuard); NArrow::TShardedRecordBatch ResultBatch; // This 1-row batch contains the last key that was read while producing the ResultBatch. @@ -33,12 +35,6 @@ class TPartialReadResult { return ResultBatch.GetRecordBatch(); } - const std::shared_ptr& GetResourcesGuardOnly() const { - AFL_VERIFY(ResourcesGuards.size() == 1); - AFL_VERIFY(!!ResourcesGuards.front()); - return ResourcesGuards.front(); - } - ui64 GetMemorySize() const { return ResultBatch.GetMemorySize(); } @@ -47,7 +43,8 @@ class TPartialReadResult { return ResultBatch.GetRecordsCount(); } - static std::vector SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult); + static std::vector> SplitResults( + std::vector>&& resultsExt, const ui32 maxRecordsInResult); const NArrow::TShardedRecordBatch& GetShardedBatch() const { return ResultBatch; @@ -57,30 +54,22 @@ class TPartialReadResult { return LastReadKey; } - explicit TPartialReadResult(const std::vector>& resourcesGuards, - const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : ResourcesGuards(resourcesGuards) + explicit TPartialReadResult(std::shared_ptr&& resourcesGuard, + std::shared_ptr&& gGuard, const NArrow::TShardedRecordBatch& batch, + std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) + : ResourcesGuard(std::move(resourcesGuard)) + , GroupGuard(std::move(gGuard)) , ResultBatch(batch) , LastReadKey(lastKey) , NotFinishedIntervalIdx(notFinishedIntervalIdx) { - for (auto&& i : ResourcesGuards) { - AFL_VERIFY(i); - } Y_ABORT_UNLESS(ResultBatch.GetRecordsCount()); Y_ABORT_UNLESS(LastReadKey); Y_ABORT_UNLESS(LastReadKey->num_rows() == 1); } - explicit TPartialReadResult(const std::shared_ptr& resourcesGuards, - const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : TPartialReadResult( - std::vector>({ resourcesGuards }), batch, lastKey, notFinishedIntervalIdx) { - AFL_VERIFY(resourcesGuards); - } - explicit TPartialReadResult( const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : TPartialReadResult(std::vector>(), batch, lastKey, notFinishedIntervalIdx) { + : TPartialReadResult(nullptr, nullptr, batch, lastKey, notFinishedIntervalIdx) { } }; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp index 87315949329a..ae28340c9932 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp @@ -1,11 +1,13 @@ #include "constructor.h" -#include "resolver.h" #include "read_metadata.h" +#include "resolver.h" + #include namespace NKikimr::NOlap::NReader::NPlain { -NKikimr::TConclusionStatus TIndexScannerConstructor::ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const { +NKikimr::TConclusionStatus TIndexScannerConstructor::ParseProgram( + const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const { AFL_VERIFY(vIndex); auto& indexInfo = vIndex->GetSchema(Snapshot)->GetIndexInfo(); TIndexColumnResolver columnResolver(indexInfo); @@ -17,26 +19,29 @@ std::vector TIndexScannerConstructor::GetPrimaryKeyScheme(const N return indexInfo.GetPrimaryKeyColumns(); } -NKikimr::TConclusion> TIndexScannerConstructor::DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { +NKikimr::TConclusion> TIndexScannerConstructor::DoBuildReadMetadata( + const NColumnShard::TColumnShard* self, const TReadDescription& read) const { auto& insertTable = self->InsertTable; auto& index = self->TablesManager.GetPrimaryIndex(); if (!insertTable || !index) { return std::shared_ptr(); } - if (read.GetSnapshot().GetPlanStep() < self->GetMinReadStep()) { - return TConclusionStatus::Fail(TStringBuilder() << "Snapshot too old: " << read.GetSnapshot()); + if (read.GetSnapshot().GetPlanInstant() < self->GetMinReadSnapshot().GetPlanInstant()) { + return TConclusionStatus::Fail(TStringBuilder() << "Snapshot too old: " << read.GetSnapshot() << ". CS min read snapshot: " + << self->GetMinReadSnapshot() << ". now: " << TInstant::Now()); } TDataStorageAccessor dataAccessor(insertTable, index); - auto readMetadata = std::make_shared(index->CopyVersionedIndexPtr(), read.GetSnapshot(), + AFL_VERIFY(read.PathId); + auto readMetadata = std::make_shared(read.PathId, index->CopyVersionedIndexPtr(), read.GetSnapshot(), IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, read.GetProgram()); - auto initResult = readMetadata->Init(read, dataAccessor); + auto initResult = readMetadata->Init(self, read, dataAccessor); if (!initResult) { return initResult; } - return dynamic_pointer_cast(readMetadata); + return static_pointer_cast(readMetadata); } -} \ No newline at end of file +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp index 076b69c7f49a..c24fbe0577a7 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp @@ -1,6 +1,11 @@ #include "read_metadata.h" + +#include +#include #include #include +#include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -8,16 +13,36 @@ std::unique_ptr TReadMetadata::StartScan(const std::shared_pt return std::make_unique(readContext, readContext->GetReadMetadataPtrVerifiedAs()); } -TConclusionStatus TReadMetadata::Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor) { +TConclusionStatus TReadMetadata::Init( + const NColumnShard::TColumnShard* owner, const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor) { SetPKRangesFilter(readDescription.PKRangesFilter); InitShardingInfo(readDescription.PathId); + TxId = readDescription.TxId; + LockId = readDescription.LockId; + if (LockId) { + owner->GetOperationsManager().RegisterLock(*LockId, owner->Generation()); + LockSharingInfo = owner->GetOperationsManager().GetLockVerified(*LockId).GetSharingInfo(); + } /// @note We could have column name changes between schema versions: /// Add '1:foo', Drop '1:foo', Add '2:foo'. Drop should hide '1:foo' from reads. /// It's expected that we have only one version on 'foo' in blob and could split them by schema {planStep:txId}. /// So '1:foo' would be omitted in blob records for the column in new snapshots. And '2:foo' - in old ones. /// It's not possible for blobs with several columns. There should be a special logic for them. - CommittedBlobs = dataAccessor.GetCommitedBlobs(readDescription, ResultIndexSchema->GetIndexInfo().GetReplaceKey()); + CommittedBlobs = + dataAccessor.GetCommitedBlobs(readDescription, ResultIndexSchema->GetIndexInfo().GetReplaceKey(), LockId, GetRequestSnapshot()); + + if (LockId) { + for (auto&& i : CommittedBlobs) { + if (auto writeId = i.GetWriteIdOptional()) { + if (owner->HasLongTxWrites(*writeId)) { + } else { + auto op = owner->GetOperationsManager().GetOperationByInsertWriteIdVerified(*writeId); + AddWriteIdToCheck(*writeId, op->GetLockId()); + } + } + } + } SelectInfo = dataAccessor.Select(readDescription); StatsMode = readDescription.StatsMode; @@ -41,7 +66,7 @@ std::set TReadMetadata::GetPKColumnIds() const { std::set result; auto& indexInfo = ResultIndexSchema->GetIndexInfo(); for (auto&& i : indexInfo.GetPrimaryKeyColumns()) { - Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnId(i.first)).second); + Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnIdVerified(i.first)).second); } return result; } @@ -51,8 +76,57 @@ std::shared_ptr TReadMetadata::BuildReader(const std::shared_ptrfield_names(), {}, IsDescSorted()); + return NArrow::NMerger::TSortableBatchPosition(key.ToBatch(GetReplaceKey()), 0, GetReplaceKey()->field_names(), {}, IsDescSorted()); +} + +void TReadMetadata::DoOnReadFinished(NColumnShard::TColumnShard& owner) const { + if (!GetLockId()) { + return; + } + const ui64 lock = *GetLockId(); + if (GetBrokenWithCommitted()) { + owner.GetOperationsManager().GetLockVerified(lock).SetBroken(); + } else { + NOlap::NTxInteractions::TTxConflicts conflicts; + for (auto&& i : GetConflictableLockIds()) { + conflicts.Add(i, lock); + } + auto writer = std::make_shared(PathId, conflicts); + owner.GetOperationsManager().AddEventForLock(owner, lock, writer); + } } +void TReadMetadata::DoOnBeforeStartReading(NColumnShard::TColumnShard& owner) const { + if (!LockId) { + return; + } + auto evWriter = std::make_shared( + PathId, GetResultSchema()->GetIndexInfo().GetPrimaryKey(), GetPKRangesFilterPtr(), GetConflictableLockIds()); + owner.GetOperationsManager().AddEventForLock(owner, *LockId, evWriter); } + +void TReadMetadata::DoOnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const { + if (LockSharingInfo) { + NKikimrDataEvents::TLock lockInfo; + lockInfo.SetLockId(LockSharingInfo->GetLockId()); + lockInfo.SetGeneration(LockSharingInfo->GetGeneration()); + lockInfo.SetDataShard(tabletId); + lockInfo.SetCounter(LockSharingInfo->GetCounter()); + lockInfo.SetPathId(PathId); + lockInfo.SetHasWrites(LockSharingInfo->HasWrites()); + if (LockSharingInfo->IsBroken()) { + scanData.LocksInfo.BrokenLocks.emplace_back(std::move(lockInfo)); + } else { + scanData.LocksInfo.Locks.emplace_back(std::move(lockInfo)); + } + } +} + +bool TReadMetadata::IsMyUncommitted(const TInsertWriteId writeId) const { + AFL_VERIFY(LockSharingInfo); + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + return it->second.GetLockId() == LockSharingInfo->GetLockId(); +} + +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h index 371a09d73102..cbd397bf366e 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h @@ -5,14 +5,100 @@ #include #include +namespace NKikimr::NColumnShard { +class TLockSharingInfo; +} + namespace NKikimr::NOlap::NReader::NPlain { // Holds all metadata that is needed to perform read/scan struct TReadMetadata : public TReadMetadataBase { using TBase = TReadMetadataBase; + +private: + const ui64 PathId; + std::shared_ptr BrokenWithCommitted = std::make_shared(); + std::shared_ptr LockSharingInfo; + + class TWriteIdInfo { + private: + const ui64 LockId; + std::shared_ptr Conflicts; + + public: + TWriteIdInfo(const ui64 lockId, const std::shared_ptr& counter) + : LockId(lockId) + , Conflicts(counter) { + } + + ui64 GetLockId() const { + return LockId; + } + + void MarkAsConflictable() const { + Conflicts->Inc(); + } + + bool IsConflictable() const { + return Conflicts->Val(); + } + }; + + THashMap> LockConflictCounters; + THashMap ConflictedWriteIds; + + virtual void DoOnReadFinished(NColumnShard::TColumnShard& owner) const override; + virtual void DoOnBeforeStartReading(NColumnShard::TColumnShard& owner) const override; + virtual void DoOnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const override; + public: using TConstPtr = std::shared_ptr; + bool GetBrokenWithCommitted() const { + return BrokenWithCommitted->Val(); + } + THashSet GetConflictableLockIds() const { + THashSet result; + for (auto&& i : ConflictedWriteIds) { + if (i.second.IsConflictable()) { + result.emplace(i.second.GetLockId()); + } + } + return result; + } + + bool IsLockConflictable(const ui64 lockId) const { + auto it = LockConflictCounters.find(lockId); + AFL_VERIFY(it != LockConflictCounters.end()); + return it->second->Val(); + } + + bool IsWriteConflictable(const TInsertWriteId writeId) const { + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + return it->second.IsConflictable(); + } + + void AddWriteIdToCheck(const TInsertWriteId writeId, const ui64 lockId) { + auto it = LockConflictCounters.find(lockId); + if (it == LockConflictCounters.end()) { + it = LockConflictCounters.emplace(lockId, std::make_shared()).first; + } + AFL_VERIFY(ConflictedWriteIds.emplace(writeId, TWriteIdInfo(lockId, it->second)).second); + } + + [[nodiscard]] bool IsMyUncommitted(const TInsertWriteId writeId) const; + + void SetConflictedWriteId(const TInsertWriteId writeId) const { + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + it->second.MarkAsConflictable(); + } + + void SetBrokenWithCommitted() const { + BrokenWithCommitted->Inc(); + } + NArrow::NMerger::TSortableBatchPosition BuildSortedPosition(const NArrow::TReplaceKey& key) const; std::shared_ptr BuildReader(const std::shared_ptr& context) const; @@ -20,13 +106,18 @@ struct TReadMetadata : public TReadMetadataBase { return GetProgram().HasProcessingColumnIds(); } + ui64 GetPathId() const { + return PathId; + } + std::shared_ptr SelectInfo; NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; std::vector CommittedBlobs; std::shared_ptr ReadStats; - TReadMetadata(const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) + TReadMetadata(const ui64 pathId, const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) : TBase(info, sorting, ssaProgram, info->GetSchema(snapshot), snapshot) + , PathId(pathId) , ReadStats(std::make_shared()) { } @@ -35,7 +126,7 @@ struct TReadMetadata : public TReadMetadataBase { return GetResultSchema()->GetIndexInfo().GetPrimaryKeyColumns(); } - TConclusionStatus Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor); + TConclusionStatus Init(const NColumnShard::TColumnShard* owner, const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor); std::vector GetColumnsOrder() const { auto schema = GetResultSchema(); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h index c5a2998a54c1..3890edc6c361 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h @@ -20,10 +20,6 @@ class TIndexColumnResolver: public IColumnResolver { return IndexInfo.GetColumnName(id, required); } - const NTable::TScheme::TTableSchema& GetSchema() const override { - return IndexInfo; - } - NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original((ui32)NOlap::TIndexInfo::ESpecialColumn::PLAN_STEP, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP); } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make index b91efa4346d8..1ab826414813 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make @@ -8,6 +8,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/engines/reader/abstract + ydb/core/kqp/compute_actor ) END() diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp index f100c8f89041..24ef9a452e4c 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp @@ -11,7 +11,7 @@ TString TColumnsSet::DebugString() const { << ");"; } -NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { +TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { if (external.IsEmpty() || IsEmpty()) { return *this; } @@ -30,7 +30,7 @@ NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator-(const TColum return result; } -NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { +TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { if (external.IsEmpty()) { return *this; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h index 08d7ac103d80..98e77f4971e9 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h @@ -1,15 +1,24 @@ #pragma once -#include -#include #include +#include + +#include + #include namespace NKikimr::NOlap::NReader::NPlain { +enum class EStageFeaturesIndexes { + Filter = 0, + Fetching = 1, + Merge = 2 +}; + class TIndexesSet { private: YDB_READONLY_DEF(std::vector, IndexIds); YDB_READONLY_DEF(std::set, IndexIdsSet); + public: TIndexesSet(const std::set& indexIds) : IndexIds(indexIds.begin(), indexIds.end()) @@ -18,8 +27,8 @@ class TIndexesSet { } TIndexesSet(const ui32& indexId) - : IndexIds({indexId}) - , IndexIdsSet({indexId}) { + : IndexIds({ indexId }) + , IndexIdsSet({ indexId }) { } ui32 GetIndexesCount() const { @@ -31,78 +40,70 @@ class TIndexesSet { } }; -class TColumnsSet { -private: - YDB_READONLY_DEF(std::set, ColumnIds); - YDB_READONLY_DEF(std::set, ColumnNames); - std::vector ColumnNamesVector; - YDB_READONLY_DEF(std::shared_ptr, Schema); - ISnapshotSchema::TPtr FullReadSchema; - YDB_READONLY_DEF(ISnapshotSchema::TPtr, FilteredSchema); - - void Rebuild(); +class TColumnsSetIds { +protected: + std::set ColumnIds; public: - TColumnsSet() = default; - bool IsEmpty() const { - return ColumnIds.empty(); + const std::set& GetColumnIds() const { + return ColumnIds; } - bool operator!() const { - return IsEmpty(); + TString DebugString() const { + return JoinSeq(",", ColumnIds); } - const std::vector& GetColumnNamesVector() const { - return ColumnNamesVector; + TColumnsSetIds(const std::set& ids) + : ColumnIds(ids) { } - - ui32 GetColumnsCount() const { - return ColumnIds.size(); + TColumnsSetIds() = default; + TColumnsSetIds(std::set&& ids) + : ColumnIds(std::move(ids)) { } - bool ColumnsOnly(const std::vector& fieldNames) const; - - TColumnsSet(const std::set& columnIds, const TIndexInfo& indexInfo, const ISnapshotSchema::TPtr& fullReadSchema) - : ColumnIds(columnIds) - , FullReadSchema(fullReadSchema) - { - Schema = indexInfo.GetColumnsSchema(ColumnIds); - Rebuild(); + TColumnsSetIds(const std::vector& ids) + : ColumnIds(ids.begin(), ids.end()) { } - TColumnsSet(const std::vector& columnIds, const TIndexInfo& indexInfo, const ISnapshotSchema::TPtr& fullReadSchema) - : ColumnIds(columnIds.begin(), columnIds.end()) - , FullReadSchema(fullReadSchema) - { - Schema = indexInfo.GetColumnsSchema(ColumnIds); - Rebuild(); + TColumnsSetIds operator+(const TColumnsSetIds& external) const { + TColumnsSetIds result = *this; + result.ColumnIds.insert(external.ColumnIds.begin(), external.ColumnIds.end()); + return result; } - const ISnapshotSchema& GetFilteredSchemaVerified() const { - AFL_VERIFY(FilteredSchema); - return *FilteredSchema; + TColumnsSetIds operator-(const TColumnsSetIds& external) const { + TColumnsSetIds result = *this; + for (auto&& i : external.ColumnIds) { + result.ColumnIds.erase(i); + } + return result; + } + bool IsEmpty() const { + return ColumnIds.empty(); } - const std::shared_ptr& GetFilteredSchemaPtrVerified() const { - AFL_VERIFY(FilteredSchema); - return FilteredSchema; + bool operator!() const { + return IsEmpty(); + } + ui32 GetColumnsCount() const { + return ColumnIds.size(); } - bool Contains(const std::shared_ptr& columnsSet) const { + bool Contains(const std::shared_ptr& columnsSet) const { if (!columnsSet) { return true; } return Contains(*columnsSet); } - bool IsEqual(const std::shared_ptr& columnsSet) const { + bool IsEqual(const std::shared_ptr& columnsSet) const { if (!columnsSet) { return false; } return IsEqual(*columnsSet); } - bool Contains(const TColumnsSet& columnsSet) const { + bool Contains(const TColumnsSetIds& columnsSet) const { for (auto&& i : columnsSet.ColumnIds) { if (!ColumnIds.contains(i)) { return false; @@ -111,7 +112,7 @@ class TColumnsSet { return true; } - bool Cross(const TColumnsSet& columnsSet) const { + bool Cross(const TColumnsSetIds& columnsSet) const { for (auto&& i : columnsSet.ColumnIds) { if (ColumnIds.contains(i)) { return true; @@ -120,7 +121,7 @@ class TColumnsSet { return false; } - std::set Intersect(const TColumnsSet& columnsSet) const { + std::set Intersect(const TColumnsSetIds& columnsSet) const { std::set result; for (auto&& i : columnsSet.ColumnIds) { if (ColumnIds.contains(i)) { @@ -130,7 +131,7 @@ class TColumnsSet { return result; } - bool IsEqual(const TColumnsSet& columnsSet) const { + bool IsEqual(const TColumnsSetIds& columnsSet) const { if (columnsSet.GetColumnIds().size() != ColumnIds.size()) { return false; } @@ -145,6 +146,56 @@ class TColumnsSet { } return true; } +}; + +class TColumnsSet: public TColumnsSetIds { +private: + using TBase = TColumnsSetIds; + YDB_READONLY_DEF(std::set, ColumnNames); + std::vector ColumnNamesVector; + YDB_READONLY_DEF(std::shared_ptr, Schema); + ISnapshotSchema::TPtr FullReadSchema; + YDB_READONLY_DEF(ISnapshotSchema::TPtr, FilteredSchema); + + void Rebuild(); + +public: + TColumnsSet() = default; + const std::vector& GetColumnNamesVector() const { + return ColumnNamesVector; + } + + bool ColumnsOnly(const std::vector& fieldNames) const; + + std::shared_ptr BuildSamePtr(const std::set& columnIds) const { + return std::make_shared(columnIds, FullReadSchema); + } + + TColumnsSet(const std::set& columnIds, const ISnapshotSchema::TPtr& fullReadSchema) + : TBase(columnIds) + , FullReadSchema(fullReadSchema) { + AFL_VERIFY(!!FullReadSchema); + Schema = FullReadSchema->GetIndexInfo().GetColumnsSchema(ColumnIds); + Rebuild(); + } + + TColumnsSet(const std::vector& columnIds, const ISnapshotSchema::TPtr& fullReadSchema) + : TBase(columnIds) + , FullReadSchema(fullReadSchema) { + AFL_VERIFY(!!FullReadSchema); + Schema = FullReadSchema->GetIndexInfo().GetColumnsSchema(ColumnIds); + Rebuild(); + } + + const ISnapshotSchema& GetFilteredSchemaVerified() const { + AFL_VERIFY(FilteredSchema); + return *FilteredSchema; + } + + const std::shared_ptr& GetFilteredSchemaPtrVerified() const { + AFL_VERIFY(FilteredSchema); + return FilteredSchema; + } TString DebugString() const; @@ -153,4 +204,4 @@ class TColumnsSet { TColumnsSet operator-(const TColumnsSet& external) const; }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp index 098e60b9626b..0efd8bfbb9d2 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp @@ -1,20 +1,22 @@ #include "context.h" #include "source.h" +#include + namespace NKikimr::NOlap::NReader::NPlain { std::unique_ptr TSpecialReadContext::BuildMerger() const { - return std::make_unique(ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse(), - IIndexInfo::GetSnapshotColumnNames()); + return std::make_unique( + ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse(), IIndexInfo::GetSnapshotColumnNames()); } -ui64 TSpecialReadContext::GetMemoryForSources(const THashMap>& sources, const bool isExclusive) { +ui64 TSpecialReadContext::GetMemoryForSources(const THashMap>& sources) { ui64 result = 0; bool hasSequentialReadSources = false; for (auto&& i : sources) { auto fetchingPlan = GetColumnsFetchingPlan(i.second); AFL_VERIFY(i.second->GetIntervalsCount()); - const ui64 sourceMemory = std::max(1, fetchingPlan->PredictRawBytes(i.second) / i.second->GetIntervalsCount()); + const ui64 sourceMemory = std::max(1, i.second->GetResourceGuardsMemory() / i.second->GetIntervalsCount()); if (!i.second->IsSourceInMemory()) { hasSequentialReadSources = true; } @@ -23,17 +25,23 @@ ui64 TSpecialReadContext::GetMemoryForSources(const THashMapIsReverse()) { - result = 2 * result; // due to in time we will have data in original portion + data in merged(or reversed) interval - } } return result; } -std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source) const { - const bool needSnapshots = !source->GetExclusiveIntervalOnly() || ReadMetadata->GetRequestSnapshot() < source->GetRecordSnapshotMax() || !source->IsSourceInMemory(); - const bool partialUsageByPK = ReadMetadata->GetPKRangesFilter().IsPortionInPartialUsage(source->GetStartReplaceKey(), source->GetFinishReplaceKey(), ReadMetadata->GetIndexInfo()); +std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source) { + const bool needSnapshots = !source->GetExclusiveIntervalOnly() || ReadMetadata->GetRequestSnapshot() < source->GetRecordSnapshotMax() || + !source->IsSourceInMemory(); + const bool partialUsageByPK = [&]() { + switch (source->GetUsageClass()) { + case TPKRangeFilter::EUsageClass::PartialUsage: + return true; + case TPKRangeFilter::EUsageClass::DontUsage: + return true; + case TPKRangeFilter::EUsageClass::FullUsage: + return false; + } + }(); const bool useIndexes = (IndexChecker ? source->HasIndexes(IndexChecker->GetIndexIds()) : false); const bool isWholeExclusiveSource = source->GetExclusiveIntervalOnly() && source->IsSourceInMemory(); const bool hasDeletions = source->GetHasDeletions(); @@ -44,16 +52,18 @@ std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(con needShardingFilter = true; } } - if (auto result = CacheFetchingScripts - [needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0] - [partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] - [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0]) { -// AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("SS", needSnapshots)("PK", partialUsageByPK)("IDX", useIndexes)("SHARDING", needShardingFilter) -// ("EXCL", source->GetExclusiveIntervalOnly())("MEM", source->IsSourceInMemory())("result", result->DebugString()); - return result; + auto result = CacheFetchingScripts[needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] + [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0]; + if (!result) { + result = BuildColumnsFetchingPlan(needSnapshots, isWholeExclusiveSource, partialUsageByPK, useIndexes, needShardingFilter, hasDeletions); + CacheFetchingScripts[needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] + [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0] = result; } - { - std::shared_ptr result = std::make_shared(); + AFL_VERIFY(result); + if (*result) { + return *result; + } else { + std::shared_ptr result = std::make_shared(*this); result->SetBranchName("FAKE"); result->AddStep(std::make_shared(source->GetRecordsCount())); return result; @@ -62,27 +72,42 @@ std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(con class TColumnsAccumulator { private: - TColumnsSet FetchingReadyColumns; - TColumnsSet AssemblerReadyColumns; + TColumnsSetIds FetchingReadyColumns; + TColumnsSetIds AssemblerReadyColumns; + ISnapshotSchema::TPtr FullSchema; + std::shared_ptr GuaranteeNotOptional; public: - bool AddFetchingStep(TFetchingScript& script, const TColumnsSet& columns) { - auto actualColumns = columns - FetchingReadyColumns; - FetchingReadyColumns = FetchingReadyColumns + columns; + TColumnsAccumulator(const std::shared_ptr& guaranteeNotOptional, const ISnapshotSchema::TPtr& fullSchema) + : FullSchema(fullSchema) + , GuaranteeNotOptional(guaranteeNotOptional) { + } + + bool AddFetchingStep(TFetchingScript& script, const TColumnsSetIds& columns, const EStageFeaturesIndexes& stage) { + auto actualColumns = (TColumnsSetIds)columns - FetchingReadyColumns; + FetchingReadyColumns = FetchingReadyColumns + (TColumnsSetIds)columns; if (!actualColumns.IsEmpty()) { - auto actualSet = std::make_shared(actualColumns); - script.AddStep(std::make_shared(actualSet)); + script.AddStep(std::make_shared(actualColumns, stage)); + script.AddStep(std::make_shared(actualColumns)); return true; } return false; } - bool AddAssembleStep(TFetchingScript& script, const TColumnsSet& columns, const TString& purposeId, const bool optional) { - auto actualColumns = columns - AssemblerReadyColumns; + bool AddAssembleStep(TFetchingScript& script, const TColumnsSetIds& columns, const TString& purposeId, const bool optional) { + auto actualColumns = (TColumnsSetIds)columns - AssemblerReadyColumns; AssemblerReadyColumns = AssemblerReadyColumns + columns; if (!actualColumns.IsEmpty()) { - auto actualSet = std::make_shared(actualColumns); + auto actualSet = std::make_shared(actualColumns.GetColumnIds(), FullSchema); if (optional) { - script.AddStep(std::make_shared(actualSet, purposeId)); + const auto notOptionalColumnIds = GuaranteeNotOptional->Intersect(*actualSet); + if (notOptionalColumnIds.size()) { + std::shared_ptr cross = actualSet->BuildSamePtr(notOptionalColumnIds); + script.AddStep(std::make_shared(cross, purposeId)); + *actualSet = *actualSet - *cross; + } + if (!actualSet->IsEmpty()) { + script.AddStep(std::make_shared(actualSet, purposeId)); + } } else { script.AddStep(std::make_shared(actualSet, purposeId)); } @@ -92,25 +117,29 @@ class TColumnsAccumulator { } }; -std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, const bool exclusiveSource, const bool partialUsageByPredicateExt, const bool useIndexes, - const bool needFilterSharding, const bool needFilterDeletion) const { - std::shared_ptr result = std::make_shared(); +std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, const bool exclusiveSource, + const bool partialUsageByPredicateExt, const bool useIndexes, const bool needFilterSharding, const bool needFilterDeletion) const { + std::shared_ptr result = std::make_shared(*this); const bool partialUsageByPredicate = partialUsageByPredicateExt && PredicateColumns->GetColumnsCount(); if (!!IndexChecker && useIndexes && exclusiveSource) { result->AddStep(std::make_shared(std::make_shared(IndexChecker->GetIndexIds()))); result->AddStep(std::make_shared(IndexChecker)); } bool hasFilterSharding = false; - TColumnsAccumulator acc; + TColumnsAccumulator acc(MergeColumns, ReadMetadata->GetResultSchema()); if (needFilterSharding && !ShardingColumns->IsEmpty()) { hasFilterSharding = true; - acc.AddFetchingStep(*result, *ShardingColumns); - acc.AddAssembleStep(*result, *ShardingColumns, "SPEC_SHARDING", false); + TColumnsSetIds columnsFetch = *ShardingColumns; + if (!exclusiveSource) { + columnsFetch = columnsFetch + *PKColumns + *SpecColumns; + } + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); + acc.AddAssembleStep(*result, columnsFetch, "SPEC_SHARDING", false); result->AddStep(std::make_shared()); } if (!EFColumns->GetColumnsCount() && !partialUsageByPredicate) { result->SetBranchName("simple"); - TColumnsSet columnsFetch = *FFColumns; + TColumnsSetIds columnsFetch = *FFColumns; if (needFilterDeletion) { columnsFetch = columnsFetch + *DeletionColumns; } @@ -118,21 +147,25 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c columnsFetch = columnsFetch + *SpecColumns; } if (!exclusiveSource) { - columnsFetch = columnsFetch + *PKColumns + *SpecColumns; + columnsFetch = columnsFetch + *MergeColumns; } else { if (columnsFetch.GetColumnsCount() == 1 && SpecColumns->Contains(columnsFetch) && !hasFilterSharding) { return nullptr; } } if (columnsFetch.GetColumnsCount() || hasFilterSharding || needFilterDeletion) { - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Fetching); + if (needSnapshots) { + acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + result->AddStep(std::make_shared()); + } + if (!exclusiveSource) { + acc.AddAssembleStep(*result, *MergeColumns, "LAST_PK", false); + } if (needFilterDeletion) { acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); result->AddStep(std::make_shared()); } - if (!exclusiveSource) { - acc.AddAssembleStep(*result, *PKColumns + *SpecColumns, "LAST_PK", false); - } acc.AddAssembleStep(*result, columnsFetch, "LAST", true); } else { return nullptr; @@ -151,32 +184,35 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c } AFL_VERIFY(columnsFetch.GetColumnsCount()); - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); if (needFilterDeletion) { acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); result->AddStep(std::make_shared()); } - if (needSnapshots || FFColumns->Cross(*SpecColumns)) { - acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); - result->AddStep(std::make_shared()); - } if (partialUsageByPredicate) { acc.AddAssembleStep(*result, *PredicateColumns, "PREDICATE", false); result->AddStep(std::make_shared()); } + if (needSnapshots || FFColumns->Cross(*SpecColumns)) { + acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + result->AddStep(std::make_shared()); + } for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { if (i->GetFilterOriginalColumnIds().empty()) { break; } - TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetResultSchema()); acc.AddAssembleStep(*result, stepColumnIds, "EF", true); result->AddStep(std::make_shared(i)); if (!i->IsFilterOnly()) { break; } } - acc.AddFetchingStep(*result, *FFColumns); + if (GetReadMetadata()->Limit) { + result->AddStep(std::make_shared(GetReadMetadata()->Limit, GetReadMetadata()->IsDescSorted())); + } + acc.AddFetchingStep(*result, *FFColumns, EStageFeaturesIndexes::Fetching); acc.AddAssembleStep(*result, *FFColumns, "LAST", true); } else { result->SetBranchName("merge"); @@ -185,17 +221,17 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c columnsFetch = columnsFetch + *DeletionColumns; } AFL_VERIFY(columnsFetch.GetColumnsCount()); - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); - if (needFilterDeletion) { - acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); - result->AddStep(std::make_shared()); - } acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + acc.AddAssembleStep(*result, *PKColumns, "PK", false); if (needSnapshots) { result->AddStep(std::make_shared()); } - acc.AddAssembleStep(*result, *PKColumns, "PK", false); + if (needFilterDeletion) { + acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); + result->AddStep(std::make_shared()); + } if (partialUsageByPredicate) { result->AddStep(std::make_shared()); } @@ -203,14 +239,14 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c if (i->GetFilterOriginalColumnIds().empty()) { break; } - TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetResultSchema()); acc.AddAssembleStep(*result, stepColumnIds, "EF", true); result->AddStep(std::make_shared(i)); if (!i->IsFilterOnly()) { break; } } - acc.AddFetchingStep(*result, *FFColumns); + acc.AddFetchingStep(*result, *FFColumns, EStageFeaturesIndexes::Fetching); acc.AddAssembleStep(*result, *FFColumns, "LAST", true); } return result; @@ -218,42 +254,72 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& commonContext) : CommonContext(commonContext) { + ReadMetadata = dynamic_pointer_cast(CommonContext->GetReadMetadata()); Y_ABORT_UNLESS(ReadMetadata); Y_ABORT_UNLESS(ReadMetadata->SelectInfo); + double kffFilter = 0.45; + double kffFetching = 0.45; + double kffMerge = 0.10; + TString stagePrefix; + if (ReadMetadata->GetEarlyFilterColumnIds().size()) { + stagePrefix = "EF"; + kffFilter = 0.7; + kffFetching = 0.15; + kffMerge = 0.15; + } else { + stagePrefix = "FO"; + kffFilter = 0.1; + kffFetching = 0.75; + kffMerge = 0.15; + } + + std::vector> stages = { + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::FILTER", kffFilter * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::FETCHING", kffFetching * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures(stagePrefix + "::MERGE", kffMerge * TGlobalLimits::ScanMemoryLimit) + }; + ProcessMemoryGuard = + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildProcessGuard(CommonContext->GetReadMetadata()->GetTxId(), stages); + ProcessScopeGuard = + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildScopeGuard(CommonContext->GetReadMetadata()->GetTxId(), GetCommonContext()->GetScanId()); + auto readSchema = ReadMetadata->GetResultSchema(); - SpecColumns = std::make_shared(TIndexInfo::GetSnapshotColumnIdsSet(), ReadMetadata->GetIndexInfo(), readSchema); + SpecColumns = std::make_shared(TIndexInfo::GetSnapshotColumnIdsSet(), readSchema); IndexChecker = ReadMetadata->GetProgram().GetIndexChecker(); { auto predicateColumns = ReadMetadata->GetPKRangesFilter().GetColumnIds(ReadMetadata->GetIndexInfo()); if (predicateColumns.size()) { - PredicateColumns = std::make_shared(predicateColumns, ReadMetadata->GetIndexInfo(), readSchema); + PredicateColumns = std::make_shared(predicateColumns, readSchema); } else { PredicateColumns = std::make_shared(); } } { - std::set columnIds = {NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX}; - DeletionColumns = std::make_shared(columnIds, ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + std::set columnIds = { NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX }; + DeletionColumns = std::make_shared(columnIds, ReadMetadata->GetResultSchema()); } if (!!ReadMetadata->GetRequestShardingInfo()) { - auto shardingColumnIds = ReadMetadata->GetIndexInfo().GetColumnIdsVerified(ReadMetadata->GetRequestShardingInfo()->GetShardingInfo()->GetColumnNames()); - ShardingColumns = std::make_shared(shardingColumnIds, ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + auto shardingColumnIds = + ReadMetadata->GetIndexInfo().GetColumnIdsVerified(ReadMetadata->GetRequestShardingInfo()->GetShardingInfo()->GetColumnNames()); + ShardingColumns = std::make_shared(shardingColumnIds, ReadMetadata->GetResultSchema()); } else { ShardingColumns = std::make_shared(); } { auto efColumns = ReadMetadata->GetEarlyFilterColumnIds(); if (efColumns.size()) { - EFColumns = std::make_shared(efColumns, ReadMetadata->GetIndexInfo(), readSchema); + EFColumns = std::make_shared(efColumns, readSchema); } else { EFColumns = std::make_shared(); } } if (ReadMetadata->HasProcessingColumnIds()) { - FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); + FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), readSchema); if (SpecColumns->Contains(*FFColumns) && !EFColumns->IsEmpty()) { FFColumns = std::make_shared(*EFColumns + *SpecColumns); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_modified", FFColumns->DebugString()); @@ -270,18 +336,35 @@ TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& co ProgramInputColumns = FFColumns; } - PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); + PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), readSchema); MergeColumns = std::make_shared(*PKColumns + *SpecColumns); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); +} + +TString TSpecialReadContext::DebugString() const { + TStringBuilder sb; + sb << "ef=" << EFColumns->DebugString() << ";" + << "sharding=" << ShardingColumns->DebugString() << ";" + << "pk=" << PKColumns->DebugString() << ";" + << "ff=" << FFColumns->DebugString() << ";" + << "program_input=" << ProgramInputColumns->DebugString() << ";"; + return sb; +} + +TString TSpecialReadContext::ProfileDebugString() const { + TStringBuilder sb; const auto GetBit = [](const ui32 val, const ui32 pos) -> ui32 { return (val & (1 << pos)) ? 1 : 0; }; - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); - for (ui32 i = 0; i < (1 << 7); ++i) { - CacheFetchingScripts[GetBit(i, 0)][GetBit(i, 1)][GetBit(i, 2)][GetBit(i, 3)][GetBit(i, 4)][GetBit(i, 5)] - = BuildColumnsFetchingPlan(GetBit(i, 0), GetBit(i, 1), GetBit(i, 2), GetBit(i, 3), GetBit(i, 4), GetBit(i, 5)); + for (ui32 i = 0; i < (1 << 6); ++i) { + auto script = CacheFetchingScripts[GetBit(i, 0)][GetBit(i, 1)][GetBit(i, 2)][GetBit(i, 3)][GetBit(i, 4)][GetBit(i, 5)]; + if (script && *script) { + sb << (*script)->DebugString() << ";"; + } } + return sb; } } // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h index 5a869c5fc78e..1ae41c039808 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h @@ -1,6 +1,7 @@ #pragma once #include "columns_set.h" #include "fetching.h" +#include #include #include #include @@ -13,6 +14,8 @@ class IDataSource; class TSpecialReadContext { private: YDB_READONLY_DEF(std::shared_ptr, CommonContext); + YDB_READONLY_DEF(std::shared_ptr, ProcessMemoryGuard); + YDB_READONLY_DEF(std::shared_ptr, ProcessScopeGuard); YDB_READONLY_DEF(std::shared_ptr, SpecColumns); YDB_READONLY_DEF(std::shared_ptr, MergeColumns); @@ -24,41 +27,58 @@ class TSpecialReadContext { YDB_READONLY_DEF(std::shared_ptr, FFColumns); YDB_READONLY_DEF(std::shared_ptr, ProgramInputColumns); + YDB_READONLY_DEF(std::shared_ptr, MergeStageMemory); + YDB_READONLY_DEF(std::shared_ptr, FilterStageMemory); + YDB_READONLY_DEF(std::shared_ptr, FetchingStageMemory); + + TAtomic AbortFlag = 0; NIndexes::TIndexCheckerContainer IndexChecker; TReadMetadata::TConstPtr ReadMetadata; std::shared_ptr EmptyColumns = std::make_shared(); std::shared_ptr BuildColumnsFetchingPlan(const bool needSnapshotsFilter, const bool exclusiveSource, const bool partialUsageByPredicate, const bool useIndexes, const bool needFilterSharding, const bool needFilterDeletion) const; - std::array, 2>, 2>, 2>, 2>, 2>, 2> CacheFetchingScripts; + std::array>, 2>, 2>, 2>, 2>, 2>, 2> + CacheFetchingScripts; public: - static const inline ui64 DefaultRejectMemoryIntervalLimit = ((ui64)3) << 30; - static const inline ui64 DefaultReduceMemoryIntervalLimit = DefaultRejectMemoryIntervalLimit; - static const inline ui64 DefaultReadSequentiallyBufferSize = ((ui64)8) << 20; - - const ui64 ReduceMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetReduceMemoryIntervalLimit(DefaultReduceMemoryIntervalLimit); - const ui64 RejectMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetRejectMemoryIntervalLimit(DefaultRejectMemoryIntervalLimit); - const ui64 ReadSequentiallyBufferSize = DefaultReadSequentiallyBufferSize; + const ui64 ReduceMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetReduceMemoryIntervalLimit(); + const ui64 RejectMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetRejectMemoryIntervalLimit(); + const ui64 ReadSequentiallyBufferSize = TGlobalLimits::DefaultReadSequentiallyBufferSize; - ui64 GetMemoryForSources(const THashMap>& sources, const bool isExclusive); + ui64 GetProcessMemoryControlId() const { + AFL_VERIFY(ProcessMemoryGuard); + return ProcessMemoryGuard->GetProcessId(); + } + ui64 GetMemoryForSources(const THashMap>& sources); + ui64 GetRequestedMemoryBytes() const { + return MergeStageMemory->GetFullMemory() + FilterStageMemory->GetFullMemory() + FetchingStageMemory->GetFullMemory(); + } const TReadMetadata::TConstPtr& GetReadMetadata() const { return ReadMetadata; } - std::unique_ptr BuildMerger() const; + bool IsAborted() const { + return AtomicGet(AbortFlag); + } - TString DebugString() const { - return TStringBuilder() << "ef=" << EFColumns->DebugString() << ";" - << "sharding=" << ShardingColumns->DebugString() << ";" - << "pk=" << PKColumns->DebugString() << ";" - << "ff=" << FFColumns->DebugString() << ";" - << "program_input=" << ProgramInputColumns->DebugString(); + void Abort() { + AtomicSet(AbortFlag, 1); } + ~TSpecialReadContext() { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("profile", ProfileDebugString()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("fetching", DebugString()); + } + + std::unique_ptr BuildMerger() const; + + TString DebugString() const; + TString ProfileDebugString() const; + TSpecialReadContext(const std::shared_ptr& commonContext); - std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source) const; + std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source); }; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp index e647c77313e7..ac7fe2c16bf3 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp @@ -1,7 +1,8 @@ #include "fetched_data.h" -#include + +#include #include -#include +#include namespace NKikimr::NOlap { @@ -11,10 +12,10 @@ void TFetchedData::SyncTableColumns(const std::vectorAddField(i, std::make_shared( - NArrow::TThreadSimpleArraysCache::Get(i->type(), schema.GetExternalDefaultValueVerified(i->name()), Table->num_rows()))) + ->AddField(i, std::make_shared(NArrow::TThreadSimpleArraysCache::Get( + i->type(), schema.GetExternalDefaultValueVerified(i->name()), Table->num_rows()))) .Validate(); } } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h index adde885f1468..b535c2bc4673 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h @@ -1,14 +1,17 @@ #pragma once -#include -#include #include #include +#include #include #include #include + #include #include +#include +#include + namespace NKikimr::NOlap { class TFetchedData { @@ -18,11 +21,17 @@ class TFetchedData { YDB_READONLY_DEF(std::shared_ptr, Table); YDB_READONLY_DEF(std::shared_ptr, Filter); YDB_READONLY(bool, UseFilter, false); + public: TFetchedData(const bool useFilter) - : UseFilter(useFilter) - { + : UseFilter(useFilter) { + } + ui32 GetFilteredCount(const ui32 recordsCount, const ui32 defLimit) const { + if (!Filter) { + return std::min(defLimit, recordsCount); + } + return Filter->GetFilteredCount().value_or(recordsCount); } void SyncTableColumns(const std::vector>& fields, const ISnapshotSchema& schema); @@ -60,6 +69,11 @@ class TFetchedData { return (Filter && Filter->IsTotalDenyFilter()) || (Table && !Table->num_rows()); } + void Clear() { + Filter = std::make_shared(NArrow::TColumnFilter::BuildDenyFilter()); + Table = nullptr; + } + void AddFilter(const std::shared_ptr& filter) { if (!filter) { return; @@ -67,6 +81,31 @@ class TFetchedData { return AddFilter(*filter); } + void CutFilter(const ui32 recordsCount, const ui32 limit, const bool reverse) { + auto filter = std::make_shared(NArrow::TColumnFilter::BuildAllowFilter()); + ui32 recordsCountImpl = Filter ? Filter->GetFilteredCount().value_or(recordsCount) : recordsCount; + if (recordsCountImpl < limit) { + return; + } + if (reverse) { + filter->Add(false, recordsCountImpl - limit); + filter->Add(true, limit); + } else { + filter->Add(true, limit); + filter->Add(false, recordsCountImpl - limit); + } + if (Filter) { + if (UseFilter) { + AddFilter(*filter); + } else { + AddFilter(Filter->CombineSequentialAnd(*filter)); + } + } else { + AddFilter(*filter); + } + + } + void AddFilter(const NArrow::TColumnFilter& filter) { if (UseFilter && Table) { AFL_VERIFY(filter.Apply(Table)); @@ -106,13 +145,13 @@ class TFetchedData { AFL_VERIFY(mergeResult.IsSuccess())("error", mergeResult.GetErrorMessage()); } } - }; class TFetchedResult { private: YDB_READONLY_DEF(std::shared_ptr, Batch); YDB_READONLY_DEF(std::shared_ptr, NotAppliedFilter); + public: TFetchedResult(std::unique_ptr&& data) : Batch(data->GetTable()) @@ -124,4 +163,4 @@ class TFetchedResult { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp index 7ce1bc9b6595..e72e7b3cf2e8 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp @@ -1,7 +1,10 @@ #include "fetching.h" #include "source.h" -#include + #include +#include +#include +#include #include @@ -16,7 +19,7 @@ bool TStepAction::DoApply(IDataReader& /*owner*/) const { } TConclusionStatus TStepAction::DoExecuteImpl() { - if (Source->IsAborted()) { + if (Source->GetContext()->IsAborted()) { return TConclusionStatus::Success(); } auto executeResult = Cursor.Execute(Source); @@ -30,20 +33,31 @@ TConclusionStatus TStepAction::DoExecuteImpl() { return TConclusionStatus::Success(); } -TConclusion TColumnBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { +TConclusion TColumnBlobsFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { return !source->StartFetchingColumns(source, step, Columns); } ui64 TColumnBlobsFetchingStep::DoPredictRawBytes(const std::shared_ptr& source) const { - const ui64 result = source->GetColumnRawBytes(Columns->GetColumnIds()); + ui64 result = source->GetColumnRawBytes(Columns.GetColumnIds()); + if (source->GetContext()->GetReadMetadata()->Limit && source->GetExclusiveIntervalOnly()) { + result = std::max(result * 1.0 * source->GetContext()->GetReadMetadata()->Limit / source->GetRecordsCount(), + source->GetColumnBlobBytes(Columns.GetColumnIds())); + } if (!result) { - return Columns->GetColumnIds().size() * source->GetRecordsCount() * sizeof(ui32); // null for all records for all columns in future will be + return Columns.GetColumnIds().size() * source->GetRecordsCount() * + sizeof(ui32); // null for all records for all columns in future will be } else { return result; } } -TConclusion TIndexBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { +ui64 TColumnBlobsFetchingStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnBlobBytes(Columns.GetColumnIds()); +} + +TConclusion TIndexBlobsFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { return !source->StartFetchingIndexes(source, step, Indexes); } @@ -56,7 +70,12 @@ TConclusion TAssemblerStep::DoExecuteInplace(const std::shared_ptr TOptionalAssemblerStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { +ui64 TAssemblerStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnRawBytes(Columns->GetColumnIds()); +} + +TConclusion TOptionalAssemblerStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { source->AssembleColumns(Columns); return true; } @@ -70,6 +89,10 @@ bool TOptionalAssemblerStep::DoInitSourceSeqColumnIds(const std::shared_ptr& source) const { + return source->GetColumnRawBytes(Columns->GetColumnIds()); +} + TConclusion TFilterProgramStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { AFL_VERIFY(source); AFL_VERIFY(Step); @@ -89,7 +112,8 @@ ui64 TFilterProgramStep::DoPredictRawBytes(const std::shared_ptr& s } TConclusion TPredicateFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { - auto filter = source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()->BuildTableVerified()); + auto filter = + source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()->BuildTableVerified()); source->MutableStageData().AddFilter(filter); return true; } @@ -97,6 +121,11 @@ TConclusion TPredicateFilter::DoExecuteInplace(const std::shared_ptr TSnapshotFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { auto filter = MakeSnapshotFilter( source->GetStageData().GetTable()->BuildTableVerified(), source->GetContext()->GetReadMetadata()->GetRequestSnapshot()); + if (filter.GetFilteredCount().value_or(source->GetRecordsCount()) != source->GetRecordsCount()) { + if (source->AddTxConflict()) { + return true; + } + } source->MutableStageData().AddFilter(filter); return true; } @@ -120,8 +149,8 @@ TConclusion TDeletionFilter::DoExecuteInplace(const std::shared_ptr TShardingFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { NYDBTest::TControllers::GetColumnShardController()->OnSelectShardingFilter(); - auto filter = source->GetContext()->GetReadMetadata()->GetRequestShardingInfo()->GetShardingInfo()->GetFilter( - source->GetStageData().GetTable()->BuildTableVerified()); + const auto& shardingInfo = source->GetContext()->GetReadMetadata()->GetRequestShardingInfo()->GetShardingInfo(); + auto filter = shardingInfo->GetFilter(source->GetStageData().GetTable()->BuildTableVerified()); source->MutableStageData().AddFilter(filter); return true; } @@ -131,7 +160,8 @@ TConclusion TBuildFakeSpec::DoExecuteInplace(const std::shared_ptrfields()) { columns.emplace_back(NArrow::TThreadSimpleArraysCache::GetConst(f->type(), NArrow::DefaultScalar(f->type()), Count)); } - source->MutableStageData().AddBatch(std::make_shared(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns))); + source->MutableStageData().AddBatch( + std::make_shared(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns))); return true; } @@ -143,14 +173,21 @@ TConclusion TApplyIndexStep::DoExecuteInplace(const std::shared_ptr TFetchingScriptCursor::Execute(const std::shared_ptr& source) { AFL_VERIFY(source); NMiniKQL::TThrowingBindTerminator bind; + Script->OnExecute(); AFL_VERIFY(!Script->IsFinished(CurrentStepIdx)); while (!Script->IsFinished(CurrentStepIdx)) { if (source->GetStageData().IsEmpty()) { + source->OnEmptyStageData(); break; } auto step = Script->GetStep(CurrentStepIdx); - TMemoryProfileGuard mGuard("SCAN_PROFILE::FETCHING::" + step->GetName() + "::" + Script->GetBranchName(), IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + TMemoryProfileGuard mGuard("SCAN_PROFILE::FETCHING::" + step->GetName() + "::" + Script->GetBranchName(), + IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("scan_step", step->DebugString())("scan_step_idx", CurrentStepIdx); + AFL_VERIFY(!CurrentStartInstant); + CurrentStartInstant = TMonotonic::Now(); + AFL_VERIFY(!CurrentStartDataSize); + CurrentStartDataSize = step->GetProcessingDataSize(source); const TConclusion resultStep = step->ExecuteInplace(source, *this); if (!resultStep) { return resultStep; @@ -158,9 +195,83 @@ TConclusion TFetchingScriptCursor::Execute(const std::shared_ptr&& guard, + const std::shared_ptr& /*allocation*/) { + auto data = Source.lock(); + if (!data || data->GetContext()->IsAborted()) { + guard->Release(); + return false; + } + data->RegisterAllocationGuard(std::move(guard)); + Step.Next(); + auto task = std::make_shared(data, std::move(Step), data->GetContext()->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + return true; +} + +TAllocateMemoryStep::TFetchingStepAllocation::TFetchingStepAllocation( + const std::shared_ptr& source, const ui64 mem, const TFetchingScriptCursor& step) + : TBase(mem) + , Source(source) + , Step(step) + , TasksGuard(source->GetContext()->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) { } + +TConclusion TAllocateMemoryStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + + auto allocation = std::make_shared(source, GetProcessingDataSize(source), step); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(source->GetContext()->GetProcessMemoryControlId(), + source->GetContext()->GetCommonContext()->GetScanId(), source->GetFirstIntervalId(), { allocation }, (ui32)StageIndex); + return false; +} + +ui64 TAllocateMemoryStep::GetProcessingDataSize(const std::shared_ptr& source) const { + ui64 size = source->GetColumnRawBytes(Columns.GetColumnIds()); + + if (source->GetStageData().GetUseFilter() && source->GetContext()->GetReadMetadata()->Limit) { + const ui32 filtered = source->GetStageData().GetFilteredCount(source->GetRecordsCount(), source->GetContext()->GetReadMetadata()->Limit); + if (filtered < source->GetRecordsCount()) { + size = std::max(size * 1.0 * filtered / source->GetRecordsCount(), source->GetColumnBlobBytes(Columns.GetColumnIds())); + } + } + return size; +} + +TString TFetchingScript::DebugString() const { + TStringBuilder sb; + TStringBuilder sbBranch; + for (auto&& i : Steps) { + if (i->GetSumDuration() > TDuration::MilliSeconds(10)) { + sbBranch << "{" << i->DebugString() << "};"; + } + } + if (!sbBranch) { + return ""; + } + sb << "{branch:" << BranchName << ";limit:" << Limit << ";"; + if (FinishInstant && StartInstant) { + sb << "duration:" << *FinishInstant - *StartInstant << ";"; + } + + sb << "steps_10Ms:[" << sbBranch << "]}"; + return sb; +} + +TFetchingScript::TFetchingScript(const TSpecialReadContext& context) + : Limit(context.GetReadMetadata()->Limit) { +} + +NKikimr::TConclusion TFilterCutLimit::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->MutableStageData().CutFilter(source->GetRecordsCount(), Limit, Reverse); + return true; +} + +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h index dd11275dedf8..133aa4db3669 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h @@ -1,23 +1,38 @@ #pragma once #include "columns_set.h" -#include -#include -#include + +#include #include #include +#include +#include +#include + +#include namespace NKikimr::NOlap::NReader::NPlain { class IDataSource; class TFetchingScriptCursor; +class TSpecialReadContext; class IFetchingStep { private: YDB_READONLY_DEF(TString, Name); + YDB_READONLY(TDuration, SumDuration, TDuration::Zero()); + YDB_READONLY(ui64, SumSize, 0); + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const = 0; virtual TString DoDebugString() const { return ""; } + public: + void AddDuration(const TDuration d) { + SumDuration += d; + } + void AddDataSize(const ui64 size) { + SumSize += size; + } virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const { return 0; } @@ -31,15 +46,18 @@ class IFetchingStep { return DoExecuteInplace(source, step); } - IFetchingStep(const TString& name) - : Name(name) - { + virtual ui64 GetProcessingDataSize(const std::shared_ptr& /*source*/) const { + return 0; + } + IFetchingStep(const TString& name) + : Name(name) { } TString DebugString() const { TStringBuilder sb; - sb << "name=" << Name << ";details={" << DoDebugString() << "};"; + sb << "name=" << Name << ";duration=" << SumDuration << ";" + << "size=" << 1e-9 * SumSize << ";details={" << DoDebugString() << "};"; return sb; } }; @@ -48,19 +66,30 @@ class TFetchingScript { private: YDB_ACCESSOR(TString, BranchName, "UNDEFINED"); std::vector> Steps; + std::optional StartInstant; + std::optional FinishInstant; + const ui32 Limit; + public: - TFetchingScript() = default; + TFetchingScript(const TSpecialReadContext& context); - TString DebugString() const { - TStringBuilder sb; - sb << "["; - for (auto&& i : Steps) { - sb << "{" << i->DebugString() << "};"; + void AddStepDataSize(const ui32 index, const ui64 size) { + GetStep(index)->AddDataSize(size); + } + + void AddStepDuration(const ui32 index, const TDuration d) { + FinishInstant = TMonotonic::Now(); + GetStep(index)->AddDuration(d); + } + + void OnExecute() { + if (!StartInstant) { + StartInstant = TMonotonic::Now(); } - sb << "]"; - return sb; } + TString DebugString() const; + const std::shared_ptr& GetStep(const ui32 index) const { AFL_VERIFY(index < Steps.size()); return Steps[index]; @@ -68,7 +97,7 @@ class TFetchingScript { ui64 PredictRawBytes(const std::shared_ptr& source) const { ui64 result = 0; - for (auto&& current: Steps) { + for (auto&& current : Steps) { result += current->DoPredictRawBytes(source); } return result; @@ -98,14 +127,23 @@ class TFetchingScript { class TFetchingScriptCursor { private: + std::optional CurrentStartInstant; + std::optional CurrentStartDataSize; ui32 CurrentStepIdx = 0; std::shared_ptr Script; + void FlushDuration() { + AFL_VERIFY(CurrentStartInstant); + AFL_VERIFY(CurrentStartDataSize); + Script->AddStepDuration(CurrentStepIdx, TMonotonic::Now() - *CurrentStartInstant); + Script->AddStepDataSize(CurrentStepIdx, *CurrentStartDataSize); + CurrentStartInstant.reset(); + CurrentStartDataSize.reset(); + } + public: TFetchingScriptCursor(const std::shared_ptr& script, const ui32 index) : CurrentStepIdx(index) - , Script(script) - { - + , Script(script) { } const TString& GetName() const { @@ -117,6 +155,7 @@ class TFetchingScriptCursor { } bool Next() { + FlushDuration(); return !Script->IsFinished(++CurrentStepIdx); } @@ -129,6 +168,7 @@ class TStepAction: public IDataTasksProcessor::ITask { std::shared_ptr Source; TFetchingScriptCursor Cursor; bool FinishedFlag = false; + protected: virtual bool DoApply(IDataReader& owner) const override; virtual TConclusionStatus DoExecuteImpl() override; @@ -141,9 +181,7 @@ class TStepAction: public IDataTasksProcessor::ITask { TStepAction(const std::shared_ptr& source, TFetchingScriptCursor&& cursor, const NActors::TActorId& ownerActorId) : TBase(ownerActorId) , Source(source) - , Cursor(std::move(cursor)) - { - + , Cursor(std::move(cursor)) { } }; @@ -151,16 +189,17 @@ class TBuildFakeSpec: public IFetchingStep { private: using TBase = IFetchingStep; const ui32 Count = 0; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { return TIndexInfo::GetSpecialColumnsRecordSize() * Count; } + public: TBuildFakeSpec(const ui32 count) : TBase("FAKE_SPEC") - , Count(count) - { + , Count(count) { AFL_VERIFY(Count); } }; @@ -169,33 +208,73 @@ class TApplyIndexStep: public IFetchingStep { private: using TBase = IFetchingStep; const NIndexes::TIndexCheckerContainer IndexChecker; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + public: TApplyIndexStep(const NIndexes::TIndexCheckerContainer& indexChecker) : TBase("APPLY_INDEX") - , IndexChecker(indexChecker) - { + , IndexChecker(indexChecker) { + } +}; + +class TAllocateMemoryStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + TColumnsSetIds Columns; + const EStageFeaturesIndexes StageIndex; +protected: + class TFetchingStepAllocation: public NGroupedMemoryManager::IAllocation { + private: + using TBase = NGroupedMemoryManager::IAllocation; + std::weak_ptr Source; + TFetchingScriptCursor Step; + NColumnShard::TCounterGuard TasksGuard; + virtual bool DoOnAllocated(std::shared_ptr&& guard, + const std::shared_ptr& allocation) override; + + public: + TFetchingStepAllocation(const std::shared_ptr& source, const ui64 mem, const TFetchingScriptCursor& step); + }; + + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { + return 0; + } + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns.DebugString() << ";stage=" << StageIndex << ";"; + } + +public: + TAllocateMemoryStep(const TColumnsSetIds& columns, const EStageFeaturesIndexes stageIndex) + : TBase("ALLOCATE_MEMORY::" + ::ToString(stageIndex)) + , Columns(columns) + , StageIndex(stageIndex) { + AFL_VERIFY(Columns.GetColumnsCount()); } }; class TColumnBlobsFetchingStep: public IFetchingStep { private: using TBase = IFetchingStep; - std::shared_ptr Columns; + TColumnsSetIds Columns; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; virtual TString DoDebugString() const override { - return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; + return TStringBuilder() << "columns=" << Columns.DebugString() << ";"; } + public: - TColumnBlobsFetchingStep(const std::shared_ptr& columns) + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + TColumnBlobsFetchingStep(const TColumnsSetIds& columns) : TBase("FETCHING_COLUMNS") , Columns(columns) { - AFL_VERIFY(Columns); - AFL_VERIFY(Columns->GetColumnsCount()); + AFL_VERIFY(Columns.GetColumnsCount()); } }; @@ -203,12 +282,14 @@ class TIndexBlobsFetchingStep: public IFetchingStep { private: using TBase = IFetchingStep; std::shared_ptr Indexes; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; virtual TString DoDebugString() const override { return TStringBuilder() << "indexes=" << Indexes->DebugString() << ";"; } + public: TIndexBlobsFetchingStep(const std::shared_ptr& indexes) : TBase("FETCHING_INDEXES") @@ -225,12 +306,13 @@ class TAssemblerStep: public IFetchingStep { virtual TString DoDebugString() const override { return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; } + public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) : TBase("ASSEMBLER" + (specName ? "::" + specName : "")) - , Columns(columns) - { + , Columns(columns) { AFL_VERIFY(Columns); AFL_VERIFY(Columns->GetColumnsCount()); } @@ -243,9 +325,13 @@ class TOptionalAssemblerStep: public IFetchingStep { virtual TString DoDebugString() const override { return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; } + protected: virtual bool DoInitSourceSeqColumnIds(const std::shared_ptr& source) const override; + public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TOptionalAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) : TBase("OPTIONAL_ASSEMBLER" + (specName ? "::" + specName : "")) @@ -259,13 +345,35 @@ class TFilterProgramStep: public IFetchingStep { private: using TBase = IFetchingStep; std::shared_ptr Step; + protected: virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; + public: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TFilterProgramStep(const std::shared_ptr& step) : TBase("PROGRAM") - , Step(step) + , Step(step) { + } +}; + +class TFilterCutLimit: public IFetchingStep { +private: + using TBase = IFetchingStep; + const ui32 Limit; + const bool Reverse; + +protected: + virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { + return 0; + } + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TFilterCutLimit(const ui32 limit, const bool reverse) + : TBase("LIMIT") + , Limit(limit) + , Reverse(reverse) { } }; @@ -273,15 +381,15 @@ class TFilterProgramStep: public IFetchingStep { class TPredicateFilter: public IFetchingStep { private: using TBase = IFetchingStep; + public: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TPredicateFilter() : TBase("PREDICATE") { - } }; -class TSnapshotFilter : public IFetchingStep { +class TSnapshotFilter: public IFetchingStep { private: using TBase = IFetchingStep; @@ -303,7 +411,7 @@ class TDeletionFilter: public IFetchingStep { } }; -class TShardingFilter : public IFetchingStep { +class TShardingFilter: public IFetchingStep { private: using TBase = IFetchingStep; @@ -314,5 +422,4 @@ class TShardingFilter : public IFetchingStep { } }; - -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp index 311a3c45f61d..9da043a366c1 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp @@ -1,20 +1,29 @@ #include "interval.h" + #include +#include namespace NKikimr::NOlap::NReader::NPlain { void TFetchingInterval::ConstructResult() { - if (ReadySourcesCount.Val() != WaitSourcesCount || !ReadyGuards.Val()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_construct_result")("interval_idx", IntervalIdx); + const ui32 ready = ReadySourcesCount.Val(); + if (ready != WaitSourcesCount) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_construct_result")("interval_idx", IntervalIdx)( + "count", WaitSourcesCount)("ready", ready)("interval_id", GetIntervalId()); return; } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "start_construct_result")("interval_idx", IntervalIdx); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "start_construct_result")("interval_idx", IntervalIdx)( + "interval_id", GetIntervalId()); } if (AtomicCas(&SourcesFinalized, 1, 0)) { IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitMergerStart); + + MergingContext->SetIntervalChunkMemory(Context->GetMemoryForSources(Sources)); + auto task = std::make_shared(MergingContext, Context, std::move(Sources)); task->SetPriority(NConveyor::ITask::EPriority::High); - NConveyor::TScanServiceOperator::SendTaskToExecute(task); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(Context->GetProcessMemoryControlId(), + Context->GetCommonContext()->GetScanId(), GetIntervalId(), { task }, (ui32)EStageFeaturesIndexes::Merge); } } @@ -27,35 +36,26 @@ void TFetchingInterval::OnSourceFetchStageReady(const ui32 /*sourceIdx*/) { TFetchingInterval::TFetchingInterval(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const THashMap>& sources, const std::shared_ptr& context, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) - : TTaskBase(0, context->GetMemoryForSources(sources, isExclusiveInterval), "", context->GetCommonContext()->GetResourcesTaskContext()) - , MergingContext(std::make_shared(start, finish, intervalIdx, includeFinish, includeStart, isExclusiveInterval)) + : MergingContext(std::make_shared(start, finish, intervalIdx, includeFinish, includeStart, isExclusiveInterval)) , Context(context) , TaskGuard(Context->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) , Sources(sources) - , ResourcesGuard(Context->GetCommonContext()->GetCounters().BuildRequestedResourcesGuard(GetMemoryAllocation())) , IntervalIdx(intervalIdx) - , IntervalStateGuard(Context->GetCommonContext()->GetCounters().CreateIntervalStateGuard()) -{ - Y_ABORT_UNLESS(Sources.size()); + , IntervalGroupGuard(NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildGroupGuard( + Context->GetProcessMemoryControlId(), context->GetCommonContext()->GetScanId())) + , IntervalStateGuard(Context->GetCommonContext()->GetCounters().CreateIntervalStateGuard()) { + AFL_VERIFY(Sources.size()); for (auto&& [_, i] : Sources) { if (!i->IsDataReady()) { ++WaitSourcesCount; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "ready_source")("interval_idx", IntervalIdx)( + "interval_id", GetIntervalId()); } - i->RegisterInterval(*this); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "register_source")("interval_idx", IntervalIdx)("interval_id", GetIntervalId()); + i->RegisterInterval(*this, i); } IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitResources); -} - -void TFetchingInterval::DoOnAllocationSuccess(const std::shared_ptr& guard) { - AFL_VERIFY(guard); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("interval_idx", IntervalIdx)("event", "resources_allocated") - ("resources", guard->DebugString())("start", MergingContext->GetIncludeStart())("finish", MergingContext->GetIncludeFinish())("sources", Sources.size()); - IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitSources); - ResourcesGuard->InitResources(guard); - for (auto&& i : Sources) { - i.second->OnInitResourcesGuard(i.second); - } - AFL_VERIFY(ReadyGuards.Inc() <= 1); ConstructResult(); } @@ -76,13 +76,15 @@ void TFetchingInterval::OnPartSendingComplete() { AFL_VERIFY(Merger); AFL_VERIFY(AtomicCas(&PartSendingWait, 0, 1)); AFL_VERIFY(AtomicGet(SourcesFinalized) == 1); - if (AbortedFlag) { + if (Context->IsAborted()) { return; } IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitMergerContinue); + auto task = std::make_shared(MergingContext, Context, std::move(Merger)); task->SetPriority(NConveyor::ITask::EPriority::High); - NConveyor::TScanServiceOperator::SendTaskToExecute(task); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(Context->GetProcessMemoryControlId(), + Context->GetCommonContext()->GetScanId(), GetIntervalId(), { task }, (ui32)EStageFeaturesIndexes::Merge); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h index 6956303a48c9..86c3f1aa0510 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h @@ -6,11 +6,9 @@ namespace NKikimr::NOlap::NReader::NPlain { -class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe::ITask { +class TFetchingInterval: public TNonCopyable { private: - using TTaskBase = NResourceBroker::NSubscribe::ITask; std::shared_ptr MergingContext; - bool AbortedFlag = false; TAtomic SourcesFinalized = 0; TAtomic PartSendingWait = 0; std::unique_ptr Merger; @@ -20,14 +18,11 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe void ConstructResult(); - std::shared_ptr ResourcesGuard; const ui32 IntervalIdx; + const std::shared_ptr IntervalGroupGuard; TAtomicCounter ReadySourcesCount = 0; - TAtomicCounter ReadyGuards = 0; ui32 WaitSourcesCount = 0; NColumnShard::TConcreteScanCounters::TScanIntervalStateGuard IntervalStateGuard; -protected: - virtual void DoOnAllocationSuccess(const std::shared_ptr& guard) override; public: std::set GetPathIds() const { @@ -42,16 +37,16 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe return IntervalIdx; } - const THashMap>& GetSources() const { - return Sources; + ui32 GetIntervalId() const { + AFL_VERIFY(IntervalGroupGuard); + return IntervalGroupGuard->GetGroupId(); } - const std::shared_ptr& GetResourcesGuard() const { - return ResourcesGuard; + const THashMap>& GetSources() const { + return Sources; } void Abort() { - AbortedFlag = true; if (AtomicCas(&SourcesFinalized, 1, 0)) { for (auto&& i : Sources) { i.second->Abort(); @@ -82,10 +77,16 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe void OnPartSendingComplete(); void SetMerger(std::unique_ptr&& merger); bool HasMerger() const; + std::shared_ptr GetGroupGuard() const { + return IntervalGroupGuard; + } TFetchingInterval(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const THashMap>& sources, const std::shared_ptr& context, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval); + + ~TFetchingInterval() { + } }; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp index 2b03ac222e76..f705deb4501c 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp @@ -11,7 +11,7 @@ TColumnShardScanIterator::TColumnShardScanIterator(const std::shared_ptrGetReadMetadata()->IsSorted()); } -TConclusion> TColumnShardScanIterator::GetBatch() { +TConclusion> TColumnShardScanIterator::GetBatch() { FillReadyResults(); return ReadyResults.pop_front(); } @@ -33,11 +33,11 @@ void TColumnShardScanIterator::FillReadyResults() { i64 limitLeft = Context->GetReadMetadata()->Limit == 0 ? INT64_MAX : Context->GetReadMetadata()->Limit - ItemsRead; for (size_t i = 0; i < ready.size() && limitLeft; ++i) { auto& batch = ReadyResults.emplace_back(std::move(ready[i])); - if (batch.GetResultBatch().num_rows() > limitLeft) { - batch.Cut(limitLeft); + if (batch->GetResultBatch().num_rows() > limitLeft) { + batch->Cut(limitLeft); } - limitLeft -= batch.GetResultBatch().num_rows(); - ItemsRead += batch.GetResultBatch().num_rows(); + limitLeft -= batch->GetResultBatch().num_rows(); + ItemsRead += batch->GetResultBatch().num_rows(); } if (limitLeft == 0) { diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h index 8421e697c807..38b1fcc29882 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h @@ -9,7 +9,7 @@ namespace NKikimr::NOlap::NReader::NPlain { class TReadyResults { private: const NColumnShard::TConcreteScanCounters Counters; - std::deque Data; + std::deque> Data; i64 RecordsCount = 0; public: TString DebugString() const { @@ -19,7 +19,7 @@ class TReadyResults { << "records_count:" << RecordsCount << ";" ; if (Data.size()) { - sb << "schema=" << Data.front().GetResultBatch().schema()->ToString() << ";"; + sb << "schema=" << Data.front()->GetResultBatch().schema()->ToString() << ";"; } return sb; } @@ -28,17 +28,19 @@ class TReadyResults { { } - TPartialReadResult& emplace_back(TPartialReadResult&& v) { - RecordsCount += v.GetResultBatch().num_rows(); + const std::shared_ptr& emplace_back(std::shared_ptr&& v) { + AFL_VERIFY(!!v); + RecordsCount += v->GetResultBatch().num_rows(); Data.emplace_back(std::move(v)); return Data.back(); } - std::optional pop_front() { + std::shared_ptr pop_front() { if (Data.empty()) { return {}; } auto result = std::move(Data.front()); - RecordsCount -= result.GetResultBatch().num_rows(); + AFL_VERIFY(RecordsCount >= result->GetResultBatch().num_rows()); + RecordsCount -= result->GetResultBatch().num_rows(); Data.pop_front(); return result; } @@ -90,7 +92,7 @@ class TColumnShardScanIterator: public TScanIteratorBase { return IndexedData->IsFinished() && ReadyResults.empty(); } - TConclusion> GetBatch() override; + virtual TConclusion> GetBatch() override; virtual void PrepareResults() override; virtual TConclusion ReadNextInterval() override; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp index dfeaec1b7aca..1981faaa4314 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp @@ -2,6 +2,9 @@ #include "plain_read_data.h" #include "source.h" +#include +#include + namespace NKikimr::NOlap::NReader::NPlain { std::optional TBaseMergeTask::DrainMergerLinearScan(const std::optional resultBufferLimit) { @@ -19,12 +22,13 @@ std::optional TBaseMergeTask::DrainMergerLinearScan(co Merger = nullptr; } Context->GetCommonContext()->GetCounters().OnLinearScanInterval(rbBuilder->GetRecordsCount()); - ResultBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({rbBuilder->Finalize()})); + ResultBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({ rbBuilder->Finalize() })); return lastResultPosition; } void TBaseMergeTask::PrepareResultBatch() { if (!ResultBatch || ResultBatch->num_rows() == 0) { + AllocationGuard = nullptr; ResultBatch = nullptr; LastPK = nullptr; return; @@ -41,8 +45,10 @@ void TBaseMergeTask::PrepareResultBatch() { } else { ShardedBatch = NArrow::TShardedRecordBatch(ResultBatch); } + AllocationGuard->Update(NArrow::GetTableMemorySize(ResultBatch)); AFL_VERIFY(!!LastPK == !!ShardedBatch->GetRecordsCount())("lpk", !!LastPK)("sb", ShardedBatch->GetRecordsCount()); } else { + AllocationGuard = nullptr; ResultBatch = nullptr; LastPK = nullptr; } @@ -51,7 +57,19 @@ void TBaseMergeTask::PrepareResultBatch() { bool TBaseMergeTask::DoApply(IDataReader& indexedDataRead) const { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoApply")("interval_idx", MergingContext->GetIntervalIdx()); auto& reader = static_cast(indexedDataRead); - reader.MutableScanner().OnIntervalResult(ShardedBatch, LastPK, std::move(Merger), IntervalIdx, reader); + auto copy = AllocationGuard; + reader.MutableScanner().OnIntervalResult(std::move(copy), ShardedBatch, LastPK, std::move(Merger), IntervalIdx, reader); + return true; +} + +bool TBaseMergeTask::DoOnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation) { + if (Context->IsAborted()) { + guard->Release(); + return false; + } + AllocationGuard = std::move(guard); + NConveyor::TScanServiceOperator::SendTaskToExecute(static_pointer_cast(allocation)); return true; } @@ -73,7 +91,8 @@ TConclusionStatus TStartMergeTask::DoExecuteImpl() { if (container && container->num_rows()) { ResultBatch = container->BuildTableVerified(); LastPK = Sources.begin()->second->GetLastPK(); - ResultBatch = NArrow::TColumnOperator().VerifyIfAbsent().Extract(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); + ResultBatch = + NArrow::TColumnOperator().VerifyIfAbsent().Extract(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); Context->GetCommonContext()->GetCounters().OnNoScanInterval(ResultBatch->num_rows()); if (Context->GetCommonContext()->IsReverse()) { ResultBatch = NArrow::ReverseRecords(ResultBatch); @@ -103,7 +122,7 @@ TConclusionStatus TStartMergeTask::DoExecuteImpl() { return TConclusionStatus::Success(); } } - Merger->PutControlPoint(MergingContext->GetFinish()); + Merger->PutControlPoint(MergingContext->GetFinish(), false); Merger->SkipToLowerBound(MergingContext->GetStart(), MergingContext->GetIncludeStart()); const ui32 originalSourcesCount = Sources.size(); Sources.clear(); @@ -115,10 +134,12 @@ TConclusionStatus TStartMergeTask::DoExecuteImpl() { ResultBatch = Merger->SingleSourceDrain(MergingContext->GetFinish(), MergingContext->GetIncludeFinish(), &lastResultPosition); if (ResultBatch) { Context->GetCommonContext()->GetCounters().OnLogScanInterval(ResultBatch->num_rows()); - AFL_VERIFY(ResultBatch->schema()->Equals(Context->GetProgramInputColumns()->GetSchema()))("res", ResultBatch->schema()->ToString())("ctx", Context->GetProgramInputColumns()->GetSchema()->ToString()); + AFL_VERIFY(ResultBatch->schema()->Equals(Context->GetProgramInputColumns()->GetSchema()))("res", ResultBatch->schema()->ToString())( + "ctx", Context->GetProgramInputColumns()->GetSchema()->ToString()); } if (MergingContext->GetIncludeFinish() && originalSourcesCount == 1) { - AFL_VERIFY(Merger->IsEmpty())("merging_context_finish", MergingContext->GetFinish().DebugJson().GetStringRobust())("merger", Merger->DebugString()); + AFL_VERIFY(Merger->IsEmpty())("merging_context_finish", MergingContext->GetFinish().DebugJson().GetStringRobust())( + "merger", Merger->DebugString()); } } else { TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::MANY", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); @@ -133,10 +154,10 @@ TConclusionStatus TStartMergeTask::DoExecuteImpl() { return TConclusionStatus::Success(); } -TStartMergeTask::TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, THashMap>&& sources) +TStartMergeTask::TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + THashMap>&& sources) : TBase(mergingContext, readContext) - , Sources(std::move(sources)) -{ + , Sources(std::move(sources)) { for (auto&& s : Sources) { AFL_VERIFY(s.second->IsDataReady()); } @@ -158,4 +179,4 @@ TConclusionStatus TContinueMergeTask::DoExecuteImpl() { return TConclusionStatus::Success(); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h index daf151cd5ce7..bbe2d11ccb3a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h @@ -1,10 +1,11 @@ #pragma once #include "context.h" + #include #include - -#include #include +#include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -16,6 +17,8 @@ class TMergingContext { YDB_READONLY(bool, IncludeStart, false); YDB_READONLY(ui32, IntervalIdx, 0); bool IsExclusiveIntervalFlag = false; + std::optional IntervalChunkMemory; + public: TMergingContext(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) @@ -24,9 +27,16 @@ class TMergingContext { , IncludeFinish(includeFinish) , IncludeStart(includeStart) , IntervalIdx(intervalIdx) - , IsExclusiveIntervalFlag(isExclusiveInterval) - { + , IsExclusiveIntervalFlag(isExclusiveInterval) { + } + + void SetIntervalChunkMemory(const ui64 value) { + IntervalChunkMemory = value; + } + ui64 GetIntervalChunkMemory() const { + AFL_VERIFY(IntervalChunkMemory); + return *IntervalChunkMemory; } bool IsExclusiveInterval() const { @@ -42,12 +52,12 @@ class TMergingContext { result.InsertValue("exclusive", IsExclusiveIntervalFlag); return result; } - }; -class TBaseMergeTask: public IDataTasksProcessor::ITask { +class TBaseMergeTask: public IDataTasksProcessor::ITask, public NGroupedMemoryManager::IAllocation { private: using TBase = IDataTasksProcessor::ITask; + protected: std::shared_ptr ResultBatch; std::shared_ptr LastPK; @@ -57,20 +67,25 @@ class TBaseMergeTask: public IDataTasksProcessor::ITask { std::shared_ptr MergingContext; const ui32 IntervalIdx; std::optional ShardedBatch; + std::shared_ptr AllocationGuard; [[nodiscard]] std::optional DrainMergerLinearScan(const std::optional resultBufferLimit); void PrepareResultBatch(); + private: virtual bool DoApply(IDataReader& indexedDataRead) const override; + virtual bool DoOnAllocated(std::shared_ptr&& guard, + const std::shared_ptr& allocation) override; + public: TBaseMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext) : TBase(readContext->GetCommonContext()->GetScanActorId()) + , IAllocation(TValidator::CheckNotNull(mergingContext)->GetIntervalChunkMemory()) , Guard(readContext->GetCommonContext()->GetCounters().GetMergeTasksGuard()) , Context(readContext) , MergingContext(mergingContext) , IntervalIdx(MergingContext->GetIntervalIdx()) { - } }; @@ -79,6 +94,7 @@ class TStartMergeTask: public TBaseMergeTask { using TBase = TBaseMergeTask; bool OnlyEmptySources = true; THashMap> Sources; + protected: virtual TConclusionStatus DoExecuteImpl() override; @@ -87,13 +103,14 @@ class TStartMergeTask: public TBaseMergeTask { return "CS::MERGE_START"; } - TStartMergeTask(const std::shared_ptr& mergingContext, - const std::shared_ptr& readContext, THashMap>&& sources); + TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + THashMap>&& sources); }; class TContinueMergeTask: public TBaseMergeTask { private: using TBase = TBaseMergeTask; + protected: virtual TConclusionStatus DoExecuteImpl() override; @@ -102,11 +119,12 @@ class TContinueMergeTask: public TBaseMergeTask { return "CS::MERGE_CONTINUE"; } - TContinueMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, std::unique_ptr&& merger) + TContinueMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + std::unique_ptr&& merger) : TBase(mergingContext, readContext) { AFL_VERIFY(merger); Merger = std::move(merger); } }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp index 9def8738cab9..04ed0d1c6f26 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp @@ -8,38 +8,43 @@ TPlainReadData::TPlainReadData(const std::shared_ptr& context) { ui32 sourceIdx = 0; std::deque> sources; - const auto& portionsOrdered = GetReadMetadata()->SelectInfo->GetPortionsOrdered(GetReadMetadata()->IsDescSorted()); + const auto& portions = GetReadMetadata()->SelectInfo->PortionsOrderedPK; const auto& committed = GetReadMetadata()->CommittedBlobs; - auto itCommitted = committed.begin(); - auto itPortion = portionsOrdered.begin(); - ui64 committedPortionsBytes = 0; - ui64 insertedPortionsBytes = 0; ui64 compactedPortionsBytes = 0; - while (itCommitted != committed.end() || itPortion != portionsOrdered.end()) { - bool movePortion = false; - if (itCommitted == committed.end()) { - movePortion = true; - } else if (itPortion == portionsOrdered.end()) { - movePortion = false; - } else if (itCommitted->GetFirstVerified() < (*itPortion)->IndexKeyStart()) { - movePortion = false; + ui64 insertedPortionsBytes = 0; + ui64 committedPortionsBytes = 0; + for (auto&& i : portions) { + if (i->GetMeta().GetProduced() == NPortion::EProduced::COMPACTED || i->GetMeta().GetProduced() == NPortion::EProduced::SPLIT_COMPACTED) { + compactedPortionsBytes += i->GetTotalBlobBytes(); } else { - movePortion = true; + insertedPortionsBytes += i->GetTotalBlobBytes(); + } + sources.emplace_back(std::make_shared(sourceIdx++, i, SpecialReadContext)); + } + for (auto&& i : committed) { + if (i.HasSnapshot()) { + continue; + } + if (GetReadMetadata()->IsMyUncommitted(i.GetWriteIdVerified())) { + continue; } + if (GetReadMetadata()->GetPKRangesFilter().CheckPoint(i.GetFirst()) || + GetReadMetadata()->GetPKRangesFilter().CheckPoint(i.GetLast())) { + GetReadMetadata()->SetConflictedWriteId(i.GetWriteIdVerified()); + } + } - if (movePortion) { - if ((*itPortion)->GetMeta().GetProduced() == NPortion::EProduced::COMPACTED || (*itPortion)->GetMeta().GetProduced() == NPortion::EProduced::SPLIT_COMPACTED) { - compactedPortionsBytes += (*itPortion)->GetTotalBlobBytes(); - } else { - insertedPortionsBytes += (*itPortion)->GetTotalBlobBytes(); + for (auto&& i : committed) { + if (!i.HasSnapshot()) { + if (GetReadMetadata()->IsWriteConflictable(i.GetWriteIdVerified())) { + continue; } - sources.emplace_back(std::make_shared(sourceIdx++, *itPortion, SpecialReadContext, (*itPortion)->IndexKeyStart(), (*itPortion)->IndexKeyEnd())); - ++itPortion; - } else { - sources.emplace_back(std::make_shared(sourceIdx++, *itCommitted, SpecialReadContext, itCommitted->GetFirstVerified(), itCommitted->GetLastVerified())); - committedPortionsBytes += itCommitted->GetSize(); - ++itCommitted; + } else if (GetReadMetadata()->GetPKRangesFilter().IsPortionInPartialUsage(i.GetFirst(), i.GetLast()) == + TPKRangeFilter::EUsageClass::DontUsage) { + continue; } + sources.emplace_back(std::make_shared(sourceIdx++, i, SpecialReadContext)); + committedPortionsBytes += i.GetSize(); } Scanner = std::make_shared(std::move(sources), SpecialReadContext); @@ -54,16 +59,16 @@ TPlainReadData::TPlainReadData(const std::shared_ptr& context) } -std::vector TPlainReadData::DoExtractReadyResults(const int64_t maxRowsInBatch) { - auto result = TPartialReadResult::SplitResults(std::move(PartialResults), maxRowsInBatch); +std::vector> TPlainReadData::DoExtractReadyResults(const int64_t /*maxRowsInBatch*/) { + auto result = std::move(PartialResults); + PartialResults.clear(); +// auto result = TPartialReadResult::SplitResults(std::move(PartialResults), maxRowsInBatch); ui32 count = 0; for (auto&& r: result) { - count += r.GetRecordsCount(); + count += r->GetRecordsCount(); } AFL_VERIFY(count == ReadyResultsCount); - ReadyResultsCount = 0; - PartialResults.clear(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoExtractReadyResults")("result", result.size())("count", count)("finished", Scanner->IsFinished()); return result; @@ -76,7 +81,7 @@ TConclusion TPlainReadData::DoReadNextInterval() { void TPlainReadData::OnIntervalResult(const std::shared_ptr& result) { // result->GetResourcesGuardOnly()->Update(result->GetMemorySize()); ReadyResultsCount += result->GetRecordsCount(); - PartialResults.emplace_back(std::move(*result)); + PartialResults.emplace_back(result); } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h index 39d993b156d6..93d2a56bad14 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h @@ -14,9 +14,8 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin using TBase = IDataReader; std::shared_ptr Scanner; std::shared_ptr SpecialReadContext; - std::vector PartialResults; + std::vector> PartialResults; ui32 ReadyResultsCount = 0; - bool AbortedFlag = false; protected: virtual TConclusionStatus DoStart() override { return Scanner->Start(); @@ -31,11 +30,11 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin return sb; } - virtual std::vector DoExtractReadyResults(const int64_t maxRowsInBatch) override; + virtual std::vector> DoExtractReadyResults(const int64_t maxRowsInBatch) override; virtual TConclusion DoReadNextInterval() override; virtual void DoAbort() override { - AbortedFlag = true; + SpecialReadContext->Abort(); Scanner->Abort(); PartialResults.clear(); Y_ABORT_UNLESS(IsFinished()); @@ -68,7 +67,7 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin TPlainReadData(const std::shared_ptr& context); ~TPlainReadData() { - if (!AbortedFlag) { + if (!SpecialReadContext->IsAborted()) { Abort("unexpected on destructor"); } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp index e8570ed328e5..87de386beda9 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp @@ -1,27 +1,33 @@ -#include "scanner.h" #include "plain_read_data.h" +#include "scanner.h" + #include + #include namespace NKikimr::NOlap::NReader::NPlain { -void TScanHead::OnIntervalResult(const std::optional& newBatch, const std::shared_ptr& lastPK, +void TScanHead::OnIntervalResult(std::shared_ptr&& allocationGuard, + const std::optional& newBatch, const std::shared_ptr& lastPK, std::unique_ptr&& merger, const ui32 intervalIdx, TPlainReadData& reader) { - if (Context->GetReadMetadata()->Limit && (!newBatch || newBatch->GetRecordsCount() == 0) && InFlightLimit < 1000) { - if (++ZeroCount == std::max(16, InFlightLimit)) { - InFlightLimit = std::min(MaxInFlight, InFlightLimit * 2); - ZeroCount = 0; - } - } else { - ZeroCount = 0; + if (Context->GetReadMetadata()->Limit && (!newBatch || newBatch->GetRecordsCount() == 0) && InFlightLimit < MaxInFlight) { + InFlightLimit = std::min(MaxInFlight, InFlightLimit * 4); } auto itInterval = FetchingIntervals.find(intervalIdx); AFL_VERIFY(itInterval != FetchingIntervals.end()); itInterval->second->SetMerger(std::move(merger)); AFL_VERIFY(Context->GetCommonContext()->GetReadMetadata()->IsSorted()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result_received")("interval_idx", intervalIdx)( + "intervalId", itInterval->second->GetIntervalId()); if (newBatch && newBatch->GetRecordsCount()) { - const std::optional callbackIdxSubscriver = itInterval->second->HasMerger() ? std::optional(intervalIdx) : std::nullopt; - AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(itInterval->second->GetResourcesGuard(), *newBatch, lastPK, callbackIdxSubscriver)).second); + std::optional callbackIdxSubscriver; + std::shared_ptr gGuard; + if (itInterval->second->HasMerger()) { + callbackIdxSubscriver = intervalIdx; + } else { + gGuard = itInterval->second->GetGroupGuard(); + } + AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(std::move(allocationGuard), std::move(gGuard), *newBatch, lastPK, callbackIdxSubscriver)).second); } else { AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, nullptr).second); } @@ -31,9 +37,13 @@ void TScanHead::OnIntervalResult(const std::optionalGetIntervalIdx(); auto it = ReadyIntervals.find(intervalIdx); if (it == ReadyIntervals.end()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result_absent")("interval_idx", intervalIdx)( + "merger", interval->HasMerger())("interval_id", interval->GetIntervalId()); break; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", + it->second ? it->second->GetRecordsCount() : 0)("merger", interval->HasMerger())("interval_id", interval->GetIntervalId()); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", it->second ? it->second->GetRecordsCount() : 0); auto result = it->second; ReadyIntervals.erase(it); if (result) { @@ -51,7 +61,8 @@ void TScanHead::OnIntervalResult(const std::optionalfirst); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "wait_interval")("remained", FetchingIntervals.size())( + "interval_idx", FetchingIntervals.begin()->first); } } @@ -67,7 +78,8 @@ TConclusionStatus TScanHead::Start() { i.second->IncIntervalsCount(); } if (!detectorResult) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")("reason", detectorResult.GetErrorMessage()); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")( + "reason", detectorResult.GetErrorMessage()); Abort(); return detectorResult; } @@ -83,9 +95,11 @@ TConclusionStatus TScanHead::Start() { for (auto&& i : context.GetCurrentSources()) { i.second->IncIntervalsCount(); } - auto detectorResult = DetectSourcesFeatureInContextIntervalScan(context.GetCurrentSources(), guaranteeExclusivePK || context.GetIsExclusiveInterval()); + auto detectorResult = + DetectSourcesFeatureInContextIntervalScan(context.GetCurrentSources(), guaranteeExclusivePK || context.GetIsExclusiveInterval()); if (!detectorResult) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")("reason", detectorResult.GetErrorMessage()); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")( + "reason", detectorResult.GetErrorMessage()); Abort(); return detectorResult; } @@ -95,14 +109,8 @@ TConclusionStatus TScanHead::Start() { } TScanHead::TScanHead(std::deque>&& sources, const std::shared_ptr& context) - : Context(context) -{ - + : Context(context) { if (HasAppData()) { - if (AppDataVerified().ColumnShardConfig.HasMaxInFlightMemoryOnRequest()) { - MaxInFlightMemory = AppDataVerified().ColumnShardConfig.GetMaxInFlightMemoryOnRequest(); - } - if (AppDataVerified().ColumnShardConfig.HasMaxInFlightIntervalsOnRequest()) { MaxInFlight = AppDataVerified().ColumnShardConfig.GetMaxInFlightIntervalsOnRequest(); } @@ -125,88 +133,89 @@ class TSourcesStorageForMemoryOptimization { private: class TSourceInfo { private: + YDB_READONLY(ui64, Memory, 0); YDB_READONLY_DEF(std::shared_ptr, Source); YDB_READONLY_DEF(std::shared_ptr, FetchingInfo); + public: TSourceInfo(const std::shared_ptr& source, const std::shared_ptr& fetchingInfo) : Source(source) - , FetchingInfo(fetchingInfo) - { - + , FetchingInfo(fetchingInfo) { + Memory = FetchingInfo->PredictRawBytes(Source); } NJson::TJsonValue DebugJson() const { NJson::TJsonValue result = NJson::JSON_MAP; result.InsertValue("source", Source->DebugJsonForMemory()); -// result.InsertValue("fetching", Fetching->DebugJsonForMemory()); + result.InsertValue("memory", Memory); + // result.InsertValue("FetchingInfo", FetchingInfo->DebugJsonForMemory()); + return result; + } + + bool ReduceMemory() { + const bool result = FetchingInfo->InitSourceSeqColumnIds(Source); + if (result) { + Memory = FetchingInfo->PredictRawBytes(Source); + } return result; } + + bool operator<(const TSourceInfo& item) const { + return Memory < item.Memory; + } + }; - std::map> Sources; + std::vector Sources; YDB_READONLY(ui64, MemorySum, 0); - YDB_READONLY_DEF(std::set, PathIds); + public: TString DebugString() const { NJson::TJsonValue resultJson; auto& memorySourcesArr = resultJson.InsertValue("sources_by_memory", NJson::JSON_ARRAY); resultJson.InsertValue("sources_by_memory_count", Sources.size()); - for (auto it = Sources.rbegin(); it != Sources.rend(); ++it) { + for (auto&& it: Sources) { auto& sourceMap = memorySourcesArr.AppendValue(NJson::JSON_MAP); - sourceMap.InsertValue("memory", it->first); auto& sourcesArr = sourceMap.InsertValue("sources", NJson::JSON_ARRAY); - for (auto&& s : it->second) { - sourcesArr.AppendValue(s.second.DebugJson()); - } + sourcesArr.AppendValue(it.DebugJson()); } return resultJson.GetStringRobust(); } - void UpdateSource(const ui64 oldMemoryInfo, const ui32 sourceIdx) { - auto it = Sources.find(oldMemoryInfo); - AFL_VERIFY(it != Sources.end()); - auto itSource = it->second.find(sourceIdx); - AFL_VERIFY(itSource != it->second.end()); - auto sourceInfo = itSource->second; - it->second.erase(itSource); - if (it->second.empty()) { - Sources.erase(it); - } - AFL_VERIFY(MemorySum >= oldMemoryInfo); - MemorySum -= oldMemoryInfo; - AddSource(sourceInfo.GetSource(), sourceInfo.GetFetchingInfo()); - } - void AddSource(const std::shared_ptr& source, const std::shared_ptr& fetching) { - const ui64 sourceMemory = fetching->PredictRawBytes(source); - MemorySum += sourceMemory; - AFL_VERIFY(Sources[sourceMemory].emplace(source->GetSourceIdx(), TSourceInfo(source, fetching)).second); - PathIds.emplace(source->GetPathId()); + Sources.emplace_back(TSourceInfo(source, fetching)); + MemorySum += Sources.back().GetMemory(); } bool Optimize(const ui64 memoryLimit) { - bool modified = true; - while (MemorySum > memoryLimit && modified) { - modified = false; - for (auto it = Sources.rbegin(); it != Sources.rend(); ++it) { - for (auto&& [sourceIdx, sourceInfo] : it->second) { - if (!sourceInfo.GetFetchingInfo()->InitSourceSeqColumnIds(sourceInfo.GetSource())) { - continue; - } - modified = true; - UpdateSource(it->first, sourceIdx); - break; - } - if (modified) { - break; + if (MemorySum <= memoryLimit) { + return true; + } + std::sort(Sources.begin(), Sources.end()); + while (true) { + std::vector nextSources; + while (memoryLimit < MemorySum && Sources.size()) { + const ui64 currentMemory = Sources.back().GetMemory(); + if (Sources.back().ReduceMemory()) { + AFL_VERIFY(currentMemory <= MemorySum); + MemorySum -= currentMemory; + MemorySum += Sources.back().GetMemory(); + nextSources.emplace_back(std::move(Sources.back())); } + Sources.pop_back(); } + if (nextSources.empty() || MemorySum <= memoryLimit) { + break; + } + std::sort(nextSources.begin(), nextSources.end()); + std::swap(nextSources, Sources); } - return MemorySum < memoryLimit; + return MemorySum <= memoryLimit; } }; -TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THashMap>& intervalSources, const bool isExclusiveInterval) const { +TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan( + const THashMap>& intervalSources, const bool isExclusiveInterval) const { TSourcesStorageForMemoryOptimization optimizer; for (auto&& i : intervalSources) { if (!isExclusiveInterval) { @@ -217,23 +226,18 @@ TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THa } const ui64 startMemory = optimizer.GetMemorySum(); if (!optimizer.Optimize(Context->ReduceMemoryIntervalLimit) && Context->RejectMemoryIntervalLimit < optimizer.GetMemorySum()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "next_internal_broken") - ("reason", "a lot of memory need")("start", startMemory) - ("reduce_limit", Context->ReduceMemoryIntervalLimit) - ("reject_limit", Context->RejectMemoryIntervalLimit) - ("need", optimizer.GetMemorySum()) - ("path_ids", JoinSeq(",", optimizer.GetPathIds())) - ("details", IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD_SCAN) ? optimizer.DebugString() : "NEED_DEBUG_LEVEL"); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "next_internal_broken")("reason", "a lot of memory need")("start", startMemory)( + "reduce_limit", Context->ReduceMemoryIntervalLimit)("reject_limit", Context->RejectMemoryIntervalLimit)( + "need", optimizer.GetMemorySum())("path_id", Context->GetReadMetadata()->GetPathId())( + "details", IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD_SCAN) ? optimizer.DebugString() + : "NEED_DEBUG_LEVEL"); Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryFailed(optimizer.GetMemorySum()); - return TConclusionStatus::Fail("We need a lot of memory in time for interval scanner: " + - ::ToString(optimizer.GetMemorySum()) + " path_ids: " + JoinSeq(",", optimizer.GetPathIds()) + ". We need wait compaction processing. Sorry."); + return TConclusionStatus::Fail("We need a lot of memory in time for interval scanner: " + ::ToString(optimizer.GetMemorySum()) + + " path_id: " + Context->GetReadMetadata()->GetPathId() + ". We need wait compaction processing. Sorry."); } else if (optimizer.GetMemorySum() < startMemory) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "memory_reduce_active") - ("reason", "need reduce memory")("start", startMemory) - ("reduce_limit", Context->ReduceMemoryIntervalLimit) - ("reject_limit", Context->RejectMemoryIntervalLimit) - ("need", optimizer.GetMemorySum()) - ("path_ids", JoinSeq(",", optimizer.GetPathIds())); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "memory_reduce_active")("reason", "need reduce memory")("start", startMemory)( + "reduce_limit", Context->ReduceMemoryIntervalLimit)("reject_limit", Context->RejectMemoryIntervalLimit)( + "need", optimizer.GetMemorySum())("path_id", Context->GetReadMetadata()->GetPathId()); Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryReduced(startMemory - optimizer.GetMemorySum()); } Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryRequired(optimizer.GetMemorySum()); @@ -241,7 +245,7 @@ TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THa } TConclusion TScanHead::BuildNextInterval() { - if (AbortFlag) { + if (Context->IsAborted()) { return false; } while (BorderPoints.size()) { @@ -251,11 +255,6 @@ TConclusion TScanHead::BuildNextInterval() { "count", FetchingIntervals.size())("limit", InFlightLimit); return false; } - if (Context->GetCommonContext()->GetCounters().GetRequestedMemoryBytes() >= MaxInFlightMemory) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_next_interval")("reason", "a lot of memory in usage")( - "volume", Context->GetCommonContext()->GetCounters().GetRequestedMemoryBytes())("limit", MaxInFlightMemory); - return false; - } } auto firstBorderPointInfo = std::move(BorderPoints.begin()->second); CurrentState.OnStartPoint(firstBorderPointInfo); @@ -266,7 +265,6 @@ TConclusion TScanHead::BuildNextInterval() { CurrentState.GetCurrentSources(), Context, true, true, false); FetchingIntervals.emplace(intervalIdx, interval); IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), true); - NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)( "interval", interval->DebugJson()); } @@ -286,7 +284,6 @@ TConclusion TScanHead::BuildNextInterval() { IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), false); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)( "interval", interval->DebugJson()); - NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); return true; } else { IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), false); @@ -304,7 +301,7 @@ bool TScanHead::IsReverse() const { } void TScanHead::Abort() { - AbortFlag = true; + AFL_VERIFY(Context->IsAborted()); THashSet sourceIds; for (auto&& i : FetchingIntervals) { for (auto&& s : i.second->GetSources()) { @@ -329,4 +326,4 @@ void TScanHead::Abort() { Y_ABORT_UNLESS(IsFinished()); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h index d03680a821a0..09649e788147 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h @@ -80,14 +80,12 @@ class TScanHead { std::vector IntervalStats; ui64 InFlightLimit = 1; ui64 MaxInFlight = 256; - ui64 MaxInFlightMemory = TGlobalLimits::ScanMemoryLimit; ui64 ZeroCount = 0; - bool AbortFlag = false; void DrainSources(); [[nodiscard]] TConclusionStatus DetectSourcesFeatureInContextIntervalScan(const THashMap>& intervalSources, const bool isExclusiveInterval) const; public: void OnSentDataFromInterval(const ui32 intervalIdx) const { - if (AbortFlag) { + if (Context->IsAborted()) { return; } auto it = FetchingIntervals.find(intervalIdx); @@ -112,8 +110,10 @@ class TScanHead { return sb; } - void OnIntervalResult(const std::optional& batch, const std::shared_ptr& lastPK, - std::unique_ptr&& merger, const ui32 intervalIdx, TPlainReadData& reader); + void OnIntervalResult(std::shared_ptr&& allocationGuard, + const std::optional& batch, + const std::shared_ptr& lastPK, std::unique_ptr&& merger, + const ui32 intervalIdx, TPlainReadData& reader); TConclusionStatus Start(); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index b5927acc8d1a..38a73b19d65e 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -1,15 +1,15 @@ -#include "source.h" - #include "constructor.h" #include "fetched_data.h" #include "interval.h" #include "plain_read_data.h" +#include "source.h" #include #include #include #include #include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -19,30 +19,19 @@ void IDataSource::InitFetchingPlan(const std::shared_ptr& fetch FetchingPlan = fetching; } -void IDataSource::RegisterInterval(TFetchingInterval& interval) { +void IDataSource::RegisterInterval(TFetchingInterval& interval, const std::shared_ptr& sourcePtr) { AFL_VERIFY(FetchingPlan); + AFL_VERIFY(!Context->IsAborted()); if (!IsReadyFlag) { AFL_VERIFY(Intervals.emplace(interval.GetIntervalIdx(), &interval).second); } -} - -void IDataSource::SetIsReady() { - AFL_VERIFY(!IsReadyFlag); - IsReadyFlag = true; - for (auto&& i : Intervals) { - i.second->OnSourceFetchStageReady(SourceIdx); - } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "source_ready")("intervals_count", Intervals.size())("source_idx", SourceIdx); - Intervals.clear(); -} - -void IDataSource::OnInitResourcesGuard(const std::shared_ptr& sourcePtr) { - AFL_VERIFY(FetchingPlan); - if (AtomicCas(&FilterStageFlag, 1, 0)) { + if (AtomicCas(&SourceStartedFlag, 1, 0)) { + SetFirstIntervalId(interval.GetIntervalId()); + AFL_VERIFY(FetchingPlan); StageData = std::make_unique(GetExclusiveIntervalOnly() && IsSourceInMemory()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("InitFetchingPlan", FetchingPlan->DebugString())("source_idx", SourceIdx); NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("source", SourceIdx)("method", "InitFetchingPlan")); - if (IsAborted()) { + if (Context->IsAborted()) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "InitFetchingPlanAborted"); return; } @@ -52,6 +41,16 @@ void IDataSource::OnInitResourcesGuard(const std::shared_ptr& sourc } } +void IDataSource::SetIsReady() { + AFL_VERIFY(!IsReadyFlag); + IsReadyFlag = true; + for (auto&& i : Intervals) { + i.second->OnSourceFetchStageReady(SourceIdx); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "source_ready")("intervals_count", Intervals.size())("source_idx", SourceIdx); + Intervals.clear(); +} + void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, THashMap& defaultBlocks, const std::shared_ptr& filter) { const NArrow::TColumnFilter& cFilter = filter ? *filter : NArrow::TColumnFilter::BuildAllowFilter(); @@ -65,9 +64,10 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob auto itFilter = cFilter.GetIterator(false, Portion->NumRows(i)); bool itFinished = false; for (auto&& c : columnChunks) { - Y_ABORT_UNLESS(!itFinished); + AFL_VERIFY(!itFinished); if (!itFilter.IsBatchForSkip(c->GetMeta().GetNumRows())) { - auto reading = blobsAction.GetReading(Schema->GetIndexInfo().GetColumnStorageId(c->GetColumnId(), Portion->GetMeta().GetTierName())); + auto reading = + blobsAction.GetReading(Schema->GetIndexInfo().GetColumnStorageId(c->GetColumnId(), Portion->GetMeta().GetTierName())); reading->SetIsBackgroundProcess(false); reading->AddRange(Portion->RestoreBlobRange(c->BlobRange)); ++fetchedChunks; @@ -80,15 +80,16 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob } AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->NumRows(i)); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks) - ("reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks)( + "reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); } -bool TPortionDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) { +bool TPortionDataSource::DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); - AFL_VERIFY(columns->GetColumnsCount()); + AFL_VERIFY(columns.GetColumnsCount()); AFL_VERIFY(!StageData->GetAppliedFilter() || !StageData->GetAppliedFilter()->IsTotalDenyFilter()); - auto& columnIds = columns->GetColumnIds(); + auto& columnIds = columns.GetColumnIds(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); @@ -108,9 +109,10 @@ bool TPortionDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { +bool TPortionDataSource::DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); - Y_ABORT_UNLESS(indexes->GetIndexesCount()); + AFL_VERIFY(indexes->GetIndexesCount()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); @@ -191,7 +193,8 @@ void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& c .AssembleToGeneralContainer(SequentialEntityIds)); } -bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& /*columns*/) { +bool TCommittedDataSource::DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& /*columns*/) { if (ReadStarted) { return false; } @@ -204,7 +207,7 @@ bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptrSetIsBackgroundProcess(false); readAction->AddRange(CommittedBlob.GetBlobRange()); - std::vector> actions = {readAction}; + std::vector> actions = { readAction }; auto constructor = std::make_shared(actions, sourcePtr, step, GetContext(), "CS::READ::" + step.GetName(), ""); NActors::TActivationContext::AsActorContext().Register(new NOlap::NBlobOperations::NRead::TActor(constructor)); return true; @@ -212,6 +215,8 @@ bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptr& columns) { TMemoryProfileGuard mGuard("SCAN_PROFILE::ASSEMBLER::COMMITTED", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + const ISnapshotSchema::TPtr batchSchema = GetContext()->GetReadMetadata()->GetIndexVersions().GetSchemaVerified(GetCommitted().GetSchemaVersion()); + const ISnapshotSchema::TPtr resultSchema = GetContext()->GetReadMetadata()->GetResultSchema(); if (!GetStageData().GetTable()) { AFL_VERIFY(GetStageData().GetBlobs().size() == 1); auto bData = MutableStageData().ExtractBlob(GetStageData().GetBlobs().begin()->first); @@ -219,11 +224,12 @@ void TCommittedDataSource::DoAssembleColumns(const std::shared_ptr& auto rBatch = NArrow::DeserializeBatch(bData, std::make_shared(CommittedBlob.GetSchemaSubset().Apply(schema->fields()))); AFL_VERIFY(rBatch)("schema", schema->ToString()); auto batch = std::make_shared(rBatch); - GetContext()->GetReadMetadata()->GetIndexInfo().AddSnapshotColumns(*batch, CommittedBlob.GetSnapshot()); + batchSchema->AdaptBatchToSchema(*batch, resultSchema); + GetContext()->GetReadMetadata()->GetIndexInfo().AddSnapshotColumns(*batch, CommittedBlob.GetSnapshotDef(TSnapshot::Zero())); GetContext()->GetReadMetadata()->GetIndexInfo().AddDeleteFlagsColumn(*batch, CommittedBlob.GetIsDelete()); MutableStageData().AddBatch(batch); } - MutableStageData().SyncTableColumns(columns->GetSchema()->fields(), *GetContext()->GetReadMetadata()->GetResultSchema()); + MutableStageData().SyncTableColumns(columns->GetSchema()->fields(), *resultSchema); } } // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index 30e7fe04bf52..889f9fe5e7d4 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -1,16 +1,18 @@ #pragma once -#include "context.h" #include "columns_set.h" +#include "context.h" #include "fetched_data.h" + +#include +#include #include #include #include #include -#include -#include #include -#include -#include +#include +#include + #include namespace NKikimr::NOlap { @@ -41,8 +43,13 @@ class IDataSource { YDB_READONLY(ui32, IntervalsCount, 0); virtual NJson::TJsonValue DoDebugJson() const = 0; bool MergingStartedFlag = false; - bool AbortedFlag = false; + TAtomic SourceStartedFlag = 0; std::shared_ptr FetchingPlan; + std::vector> ResourceGuards; + std::optional FirstIntervalId; + ui32 CurrentPlanStepIndex = 0; + YDB_READONLY(TPKRangeFilter::EUsageClass, UsageClass, TPKRangeFilter::EUsageClass::PartialUsage); + protected: bool IsSourceInMemoryFlag = true; THashMap Intervals; @@ -53,8 +60,10 @@ class IDataSource { TAtomic FilterStageFlag = 0; bool IsReadyFlag = false; - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) = 0; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) = 0; + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) = 0; + virtual bool DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) = 0; virtual void DoAssembleColumns(const std::shared_ptr& columns) = 0; virtual void DoAbort() = 0; virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexMeta) = 0; @@ -62,15 +71,43 @@ class IDataSource { virtual NJson::TJsonValue DoDebugJsonForMemory() const { return NJson::JSON_MAP; } + virtual bool DoAddTxConflict() = 0; + public: - void OnInitResourcesGuard(const std::shared_ptr& sourcePtr); + bool AddTxConflict() { + if (!Context->GetCommonContext()->HasLock()) { + return false; + } + if (DoAddTxConflict()) { + StageData->Clear(); + return true; + } + return false; + } + + ui64 GetResourceGuardsMemory() const { + ui64 result = 0; + for (auto&& i : ResourceGuards) { + result += i->GetMemory(); + } + return result; + } - bool IsAborted() const { - return AbortedFlag; + void RegisterAllocationGuard(const std::shared_ptr& guard) { + ResourceGuards.emplace_back(guard); } + bool IsSourceInMemory() const { return IsSourceInMemoryFlag; } + void SetFirstIntervalId(const ui64 value) { + AFL_VERIFY(!FirstIntervalId); + FirstIntervalId = value; + } + ui64 GetFirstIntervalId() const { + AFL_VERIFY(!!FirstIntervalId); + return *FirstIntervalId; + } virtual bool IsSourceInMemory(const std::set& fieldIds) const = 0; bool AddSequentialEntityIds(const ui32 entityId) { if (DoAddSequentialEntityIds(entityId)) { @@ -114,12 +151,12 @@ class IDataSource { DoAssembleColumns(columns); } - bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) { - AFL_VERIFY(columns); + bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) { return DoStartFetchingColumns(sourcePtr, step, columns); } - bool StartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { + bool StartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { AFL_VERIFY(indexes); return DoStartFetchingIndexes(sourcePtr, step, indexes); } @@ -134,18 +171,18 @@ class IDataSource { virtual ui64 GetColumnRawBytes(const std::set& columnIds) const = 0; virtual ui64 GetIndexRawBytes(const std::set& indexIds) const = 0; + virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const = 0; bool IsMergingStarted() const { return MergingStartedFlag; } void StartMerging() { - Y_ABORT_UNLESS(!MergingStartedFlag); + AFL_VERIFY(!MergingStartedFlag); MergingStartedFlag = true; } void Abort() { - AbortedFlag = true; Intervals.clear(); DoAbort(); } @@ -172,6 +209,17 @@ class IDataSource { return IsReadyFlag; } + void OnEmptyStageData() { + if (!ResourceGuards.size()) { + return; + } + if (ExclusiveIntervalOnly) { + ResourceGuards.back()->Update(0); + } else { + ResourceGuards.back()->Update(GetColumnRawBytes(Context->GetPKColumns()->GetColumnIds())); + } + } + const TFetchedData& GetStageData() const { AFL_VERIFY(StageData); return *StageData; @@ -182,11 +230,11 @@ class IDataSource { return *StageData; } - void RegisterInterval(TFetchingInterval& interval); + void RegisterInterval(TFetchingInterval& interval, const std::shared_ptr& sourcePtr); - IDataSource(const ui32 sourceIdx, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish, const TSnapshot& recordSnapshotMin, const TSnapshot& recordSnapshotMax, - const ui32 recordsCount, const std::optional shardingVersion, const bool hasDeletions) + IDataSource(const ui32 sourceIdx, const std::shared_ptr& context, const NArrow::TReplaceKey& start, + const NArrow::TReplaceKey& finish, const TSnapshot& recordSnapshotMin, const TSnapshot& recordSnapshotMax, const ui32 recordsCount, + const std::optional shardingVersion, const bool hasDeletions) : SourceIdx(sourceIdx) , Start(context->GetReadMetadata()->BuildSortedPosition(start)) , Finish(context->GetReadMetadata()->BuildSortedPosition(finish)) @@ -197,8 +245,9 @@ class IDataSource { , RecordSnapshotMax(recordSnapshotMax) , RecordsCount(recordsCount) , ShardingVersionOptional(shardingVersion) - , HasDeletions(hasDeletions) - { + , HasDeletions(hasDeletions) { + UsageClass = Context->GetReadMetadata()->GetPKRangesFilter().IsPortionInPartialUsage(GetStartReplaceKey(), GetFinishReplaceKey()); + AFL_VERIFY(UsageClass != TPKRangeFilter::EUsageClass::DontUsage); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "portions_for_merge")("start", Start.DebugJson())("finish", Finish.DebugJson()); if (Start.IsReverseSort()) { std::swap(Start, Finish); @@ -207,7 +256,7 @@ class IDataSource { } virtual ~IDataSource() { - Y_ABORT_UNLESS(AbortedFlag || Intervals.empty()); + AFL_VERIFY(Intervals.empty()); } }; @@ -217,14 +266,16 @@ class TPortionDataSource: public IDataSource { std::set SequentialEntityIds; std::shared_ptr Portion; std::shared_ptr Schema; + mutable THashMap FingerprintedData; - void NeedFetchColumns(const std::set& columnIds, - TBlobsAction& blobsAction, THashMap& nullBlocks, - const std::shared_ptr& filter); + void NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, + THashMap& nullBlocks, const std::shared_ptr& filter); virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexChecker) override; - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) override; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) override; + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) override; + virtual bool DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) override; virtual void DoAssembleColumns(const std::shared_ptr& columns) override; virtual NJson::TJsonValue DoDebugJson() const override { NJson::TJsonValue result = NJson::JSON_MAP; @@ -239,7 +290,7 @@ class TPortionDataSource: public IDataSource { for (auto&& i : SequentialEntityIds) { AFL_VERIFY(columns.erase(i)); } -// result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); + // result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); if (SequentialEntityIds.size()) { result.InsertValue("min_memory_seq", Portion->GetMinMemoryForReadColumns(SequentialEntityIds)); result.InsertValue("min_memory_seq_blobs", Portion->GetColumnBlobBytes(SequentialEntityIds)); @@ -257,10 +308,16 @@ class TPortionDataSource: public IDataSource { return Portion->GetPathId(); } virtual bool DoAddSequentialEntityIds(const ui32 entityId) override { + FingerprintedData.clear(); return SequentialEntityIds.emplace(entityId).second; } public: + virtual bool DoAddTxConflict() override { + GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + return false; + } + virtual bool HasIndexes(const std::set& indexIds) const override { return Portion->HasIndexes(indexIds); } @@ -279,6 +336,13 @@ class TPortionDataSource: public IDataSource { } virtual ui64 GetColumnRawBytes(const std::set& columnsIds) const override { + AFL_VERIFY(columnsIds.size()); + const ui64 fp = CombineHashes(*columnsIds.begin(), *columnsIds.rbegin()); + auto it = FingerprintedData.find(fp); + if (it != FingerprintedData.end()) { + return it->second; + } + ui64 result = 0; if (SequentialEntityIds.size()) { std::set selectedSeq; std::set selectedInMem; @@ -289,12 +353,17 @@ class TPortionDataSource: public IDataSource { selectedInMem.emplace(i); } } - return Portion->GetMinMemoryForReadColumns(selectedSeq) - + Portion->GetColumnBlobBytes(selectedSeq) - + Portion->GetColumnRawBytes(selectedInMem, false); + result = Portion->GetMinMemoryForReadColumns(selectedSeq) + Portion->GetColumnBlobBytes(selectedSeq) + + Portion->GetColumnRawBytes(selectedInMem, false); } else { - return Portion->GetColumnRawBytes(columnsIds, false); + result = Portion->GetColumnRawBytes(columnsIds, false); } + FingerprintedData.emplace(fp, result); + return result; + } + + virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const override { + return Portion->GetColumnBlobBytes(columnsIds, false); } virtual ui64 GetIndexRawBytes(const std::set& indexIds) const override { @@ -309,13 +378,11 @@ class TPortionDataSource: public IDataSource { return Portion; } - TPortionDataSource(const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish) - : TBase(sourceIdx, context, start, finish, portion->RecordSnapshotMin(), portion->RecordSnapshotMax(), portion->GetRecordsCount(), portion->GetShardingVersionOptional(), - portion->GetMeta().GetDeletionsCount()) + TPortionDataSource(const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context) + : TBase(sourceIdx, context, portion->IndexKeyStart(), portion->IndexKeyEnd(), portion->RecordSnapshotMin(), portion->RecordSnapshotMax(), + portion->GetRecordsCount(), portion->GetShardingVersionOptional(), portion->GetMeta().GetDeletionsCount()) , Portion(portion) - , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion)) - { + , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion)) { } }; @@ -326,11 +393,12 @@ class TCommittedDataSource: public IDataSource { bool ReadStarted = false; virtual void DoAbort() override { - } - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) override; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& /*sourcePtr*/, const TFetchingScriptCursor& /*step*/, const std::shared_ptr& /*indexes*/) override { + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) override; + virtual bool DoStartFetchingIndexes(const std::shared_ptr& /*sourcePtr*/, const TFetchingScriptCursor& /*step*/, + const std::shared_ptr& /*indexes*/) override { return false; } virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& /*indexMeta*/) override { @@ -350,6 +418,18 @@ class TCommittedDataSource: public IDataSource { virtual bool DoAddSequentialEntityIds(const ui32 /*entityId*/) override { return false; } + + virtual bool DoAddTxConflict() override { + if (CommittedBlob.HasSnapshot()) { + GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + return true; + } else if (!GetContext()->GetReadMetadata()->IsMyUncommitted(CommittedBlob.GetWriteIdVerified())) { + GetContext()->GetReadMetadata()->SetConflictedWriteId(CommittedBlob.GetWriteIdVerified()); + return true; + } + return false; + } + public: virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const override { THashMap result; @@ -373,6 +453,10 @@ class TCommittedDataSource: public IDataSource { return CommittedBlob.GetBlobRange().Size; } + virtual ui64 GetColumnBlobBytes(const std::set& /*columnsIds*/) const override { + return CommittedBlob.GetBlobRange().Size; + } + virtual ui64 GetIndexRawBytes(const std::set& /*columnIds*/) const override { AFL_VERIFY(false); return 0; @@ -382,13 +466,11 @@ class TCommittedDataSource: public IDataSource { return CommittedBlob; } - TCommittedDataSource(const ui32 sourceIdx, const TCommittedBlob& committed, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish) - : TBase(sourceIdx, context, start, finish, committed.GetSnapshot(), committed.GetSnapshot(), committed.GetRecordsCount(), {}, - committed.GetIsDelete()) + TCommittedDataSource(const ui32 sourceIdx, const TCommittedBlob& committed, const std::shared_ptr& context) + : TBase(sourceIdx, context, committed.GetFirst(), committed.GetLast(), committed.GetSnapshotDef(TSnapshot::Zero()), + committed.GetSnapshotDef(TSnapshot::Zero()), committed.GetRecordsCount(), {}, committed.GetIsDelete()) , CommittedBlob(committed) { - } }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make index cfa691a22e84..93ba27575ade 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make @@ -18,6 +18,9 @@ PEERDIR( ydb/core/formats/arrow ydb/core/tx/columnshard/blobs_action ydb/core/tx/conveyor/usage + ydb/core/tx/limiter/grouped_memory/usage ) +GENERATE_ENUM_SERIALIZATION(columns_set.h) + END() diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp index 076f8f11adc7..2a23b12c3fae 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp @@ -16,7 +16,8 @@ NKikimr::TConclusionStatus TMetadataFromStore::DoFillMetadata(const NColumnShard } THashSet pathIds; - for (auto&& filter : read.PKRangesFilter) { + AFL_VERIFY(read.PKRangesFilter); + for (auto&& filter : *read.PKRangesFilter) { const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); auto pathInfos = logsIndex->GetTables(fromPathId, toPathId); @@ -42,7 +43,8 @@ NKikimr::TConclusionStatus TMetadataFromTable::DoFillMetadata(const NColumnShard if (!logsIndex) { return TConclusionStatus::Success(); } - for (auto&& filter : read.PKRangesFilter) { + AFL_VERIFY(read.PKRangesFilter); + for (auto&& filter : *read.PKRangesFilter) { const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); if (fromPathId <= read.PathId && read.PathId <= toPathId) { diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h index 08f4d78e0c08..33be2ac027b1 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h @@ -29,12 +29,12 @@ class TStatsIteratorBase: public TScanIteratorBase { return IndexGranules.empty(); } - virtual TConclusion> GetBatch() override { + virtual TConclusion> GetBatch() override { while (!Finished()) { auto batchOpt = ExtractStatsBatch(); if (!batchOpt) { AFL_VERIFY(Finished()); - return std::nullopt; + return std::shared_ptr(); } auto originalBatch = *batchOpt; if (originalBatch->num_rows() == 0) { @@ -55,10 +55,9 @@ class TStatsIteratorBase: public TScanIteratorBase { continue; } auto table = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({resultBatch})); - TPartialReadResult out(table, lastKey, std::nullopt); - return std::move(out); + return std::make_shared(table, lastKey, std::nullopt); } - return std::nullopt; + return std::shared_ptr(); } std::optional> ExtractStatsBatch() { @@ -136,10 +135,6 @@ class TStatsIterator : public TStatsIteratorBase { } } - const NTable::TScheme::TTableSchema& GetSchema() const override { - return StatsSchema; - } - NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original(1, "PathId"); } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index 79e3dfeaf248..da3cc74f8e92 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -6,8 +6,19 @@ namespace NKikimr::NOlap::NReader::NSysView::NChunks { void TStatsIterator::AppendStats(const std::vector>& builders, const TPortionInfo& portion) const { auto portionSchema = ReadMetadata->GetLoadSchemaVerified(portion); - const std::string prod = ::ToString(portion.GetMeta().Produced); + auto it = PortionType.find(portion.GetMeta().Produced); + if (it == PortionType.end()) { + it = PortionType.emplace(portion.GetMeta().Produced, ::ToString(portion.GetMeta().Produced)).first; + } + const arrow::util::string_view prodView = it->second.GetView(); const bool activity = !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot()); + static const TString ConstantEntityIsColumn = "COL"; + static const arrow::util::string_view ConstantEntityIsColumnView = + arrow::util::string_view(ConstantEntityIsColumn.data(), ConstantEntityIsColumn.size()); + static const TString ConstantEntityIsIndex = "IDX"; + static const arrow::util::string_view ConstantEntityIsIndexView = + arrow::util::string_view(ConstantEntityIsIndex.data(), ConstantEntityIsIndex.size()); + auto& entityStorages = EntityStorageNames[portion.GetMeta().GetTierName()]; { std::vector records; for (auto&& r : portion.Records) { @@ -16,26 +27,54 @@ void TStatsIterator::AppendStats(const std::vector blobsIds; + std::optional lastColumnId; + arrow::util::string_view lastColumnName; + arrow::util::string_view lastTierName; for (auto&& r : records) { NArrow::Append(*builders[0], portion.GetPathId()); - NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[1], prodView); NArrow::Append(*builders[2], ReadMetadata->TabletId); NArrow::Append(*builders[3], r->GetMeta().GetNumRows()); NArrow::Append(*builders[4], r->GetMeta().GetRawBytes()); NArrow::Append(*builders[5], portion.GetPortionId()); NArrow::Append(*builders[6], r->GetChunkIdx()); - NArrow::Append(*builders[7], ReadMetadata->GetColumnNameDef(r->GetColumnId()).value_or("undefined")); + if (!lastColumnId || *lastColumnId != r->GetColumnId()) { + { + auto it = ColumnNamesById.find(r->GetColumnId()); + if (it == ColumnNamesById.end()) { + it = + ColumnNamesById.emplace(r->GetColumnId(), portionSchema->GetFieldByColumnIdVerified(r->GetColumnId())->name()).first; + } + lastColumnName = it->second.GetView(); + } + { + auto it = entityStorages.find(r->GetColumnId()); + if (it == entityStorages.end()) { + it = entityStorages.emplace(r->GetColumnId(), + portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName())).first; + } + lastTierName = it->second.GetView(); + } + lastColumnId = r->GetColumnId(); + } + NArrow::Append(*builders[7], lastColumnName); NArrow::Append(*builders[8], r->GetColumnId()); - std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy(); - NArrow::Append(*builders[9], blobIdString); + { + auto itBlobIdString = blobsIds.find(r->GetBlobRange().GetBlobIdxVerified()); + if (itBlobIdString == blobsIds.end()) { + itBlobIdString = blobsIds.emplace( + r->GetBlobRange().GetBlobIdxVerified(), portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy()).first; + } + NArrow::Append( + *builders[9], arrow::util::string_view(itBlobIdString->second.data(), itBlobIdString->second.size())); + } NArrow::Append(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); - NArrow::Append(*builders[12], activity); + NArrow::Append(*builders[12], activity); - const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName()); - std::string strTierName(tierName.data(), tierName.size()); - NArrow::Append(*builders[13], strTierName); - NArrow::Append(*builders[14], "COL"); + NArrow::Append(*builders[13], arrow::util::string_view(lastTierName.data(), lastTierName.size())); + NArrow::Append(*builders[14], ConstantEntityIsColumnView); } } { @@ -48,7 +87,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[0], portion.GetPathId()); - NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[1], prodView); NArrow::Append(*builders[2], ReadMetadata->TabletId); NArrow::Append(*builders[3], r->GetRecordsCount()); NArrow::Append(*builders[4], r->GetRawBytes()); @@ -66,11 +105,11 @@ void TStatsIterator::AppendStats(const std::vector(*builders[10], 0); NArrow::Append(*builders[11], bData->size()); } - NArrow::Append(*builders[12], activity); + NArrow::Append(*builders[12], activity); const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetIndexId(), portion.GetMeta().GetTierName()); std::string strTierName(tierName.data(), tierName.size()); NArrow::Append(*builders[13], strTierName); - NArrow::Append(*builders[14], "IDX"); + NArrow::Append(*builders[14], ConstantEntityIsIndexView); } } } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h index c881d4f161bf..6fb758f46911 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h @@ -28,6 +28,32 @@ class TReadStatsMetadata: public NAbstract::TReadStatsMetadata, std::enable_shar class TStatsIterator: public NAbstract::TStatsIterator { private: + class TViewContainer { + private: + TString Data; + std::string STLData; + arrow::util::string_view View; + + public: + const arrow::util::string_view& GetView() const { + return View; + } + + TViewContainer(const TString& data) + : Data(data) + , View(arrow::util::string_view(Data.data(), Data.size())) { + } + + TViewContainer(const std::string& data) + : STLData(data) + , View(arrow::util::string_view(STLData.data(), STLData.size())) { + } + }; + + mutable THashMap ColumnNamesById; + mutable THashMap PortionType; + mutable THashMap> EntityStorageNames; + using TBase = NAbstract::TStatsIterator; virtual bool AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const override; virtual ui32 PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const override; diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp index b79da31a64ee..83b2306a1bff 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp @@ -16,7 +16,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[6], portion.GetColumnBlobBytes()); NArrow::Append(*builders[7], portion.GetIndexBlobBytes()); NArrow::Append(*builders[8], portion.GetPortionId()); - NArrow::Append(*builders[9], !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot())); + NArrow::Append(*builders[9], !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot())); auto tierName = portion.GetTierNameDef(NBlobOperations::TGlobal::DefaultStorageId); NArrow::Append(*builders[10], arrow::util::string_view(tierName.data(), tierName.size())); @@ -31,6 +31,8 @@ void TStatsIterator::AppendStats(const std::vector(*builders[11], arrow::util::string_view(statInfo.data(), statInfo.size())); + + NArrow::Append(*builders[12], portion.HasRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized)); } ui32 TStatsIterator::PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const { diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp index 5decb79c2203..55d28a5a61f4 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp @@ -1,94 +1,91 @@ #include "tx_internal_scan.h" -#include -#include -#include + #include #include +#include +#include #include +#include +#include namespace NKikimr::NOlap::NReader { -bool TTxInternalScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { - TMemoryProfileGuard mpg("TTxInternalScan::Execute"); +void TTxInternalScan::SendError(const TString& problem, const TString& details, const TActorContext& ctx) const { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("problem", problem)("details", details); auto& request = *InternalScanEvent->Get(); - const TSnapshot snapshot = request.ReadToSnapshot.value_or(NOlap::TSnapshot(Self->LastPlannedStep, Self->LastPlannedTxId)); + auto scanComputeActor = InternalScanEvent->Sender; - TReadDescription read(snapshot, request.GetReverse()); - read.PathId = request.GetPathId(); - read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); - std::unique_ptr scannerConstructor(new NPlain::TIndexScannerConstructor(snapshot, request.GetItemsLimit(), request.GetReverse())); - read.ColumnIds = request.GetColumnIds(); - read.ColumnNames = request.GetColumnNames(); - if (request.RangesFilter) { - read.PKRangesFilter = std::move(*request.RangesFilter); - } + auto ev = MakeHolder(ScanGen, Self->TabletID()); + ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, + TStringBuilder() << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << problem << "/" + << details); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; - AFL_VERIFY(vIndex); - { - TProgramContainer pContainer; - pContainer.OverrideProcessingColumns(read.ColumnNames); - read.SetProgram(std::move(pContainer)); - } + ctx.Send(scanComputeActor, ev.Release()); +} - { - auto newRange = scannerConstructor->BuildReadMetadata(Self, read); - if (!newRange) { - ErrorDescription = newRange.GetErrorMessage(); - ReadMetadataRange = nullptr; - return true; - } - ReadMetadataRange = newRange.DetachResult(); - } - AFL_VERIFY(ReadMetadataRange); +bool TTxInternalScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { return true; } void TTxInternalScan::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxInternalScan::Complete"); + auto& request = *InternalScanEvent->Get(); auto scanComputeActor = InternalScanEvent->Sender; - const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build()("tablet", Self->TabletID()); - - if (!ReadMetadataRange) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", "no metadata")("error", ErrorDescription); + const TSnapshot snapshot = request.ReadToSnapshot.value_or(NOlap::TSnapshot(Self->LastPlannedStep, Self->LastPlannedTxId)); + const NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build()("tablet", Self->TabletID())("snapshot", snapshot.DebugString()); + TReadMetadataPtr readMetadataRange; + { + TReadDescription read(snapshot, request.GetReverse()); + read.PathId = request.GetPathId(); + read.LockId = LockId; + read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); + std::unique_ptr scannerConstructor( + new NPlain::TIndexScannerConstructor(snapshot, request.GetItemsLimit(), request.GetReverse())); + read.ColumnIds = request.GetColumnIds(); + read.ColumnNames = request.GetColumnNames(); + if (request.RangesFilter) { + read.PKRangesFilter = request.RangesFilter; + } - auto ev = MakeHolder(ScanGen, Self->TabletID()); - ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, TStringBuilder() - << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << ErrorDescription ? ErrorDescription : "no metadata ranges"); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; + AFL_VERIFY(vIndex); + { + TProgramContainer pContainer; + pContainer.OverrideProcessingColumns(read.ColumnNames); + read.SetProgram(std::move(pContainer)); + } - ctx.Send(scanComputeActor, ev.Release()); - return; + { + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (!newRange) { + return SendError("cannot create read metadata", newRange.GetErrorMessage(), ctx); + } + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } } + TStringBuilder detailedInfo; if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_TRACE, NKikimrServices::TX_COLUMNSHARD)) { - detailedInfo << " read metadata: (" << *ReadMetadataRange << ")"; + detailedInfo << " read metadata: (" << *readMetadataRange << ")"; } const TVersionedIndex* index = nullptr; if (Self->HasIndex()) { index = &Self->GetIndexAs().GetVersionedIndex(); } - const TConclusion requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRange, index); - if (!requestCookie) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", requestCookie.GetErrorMessage())("trace_details", detailedInfo); - auto ev = MakeHolder(ScanGen, Self->TabletID()); + readMetadataRange->OnBeforeStartReading(*Self); - ev->Record.SetStatus(Ydb::StatusIds::INTERNAL_ERROR); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, TStringBuilder() - << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << requestCookie.GetErrorMessage()); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - Self->ScanCounters.OnScanDuration(NColumnShard::TScanCounters::EStatusFinish::CannotAddInFlight, TDuration::Zero()); - ctx.Send(scanComputeActor, ev.Release()); - return; - } - auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), - TComputeShardingPolicy(), ScanId, TxId, ScanGen, *requestCookie, Self->TabletID(), TDuration::Max(), ReadMetadataRange, - NKikimrDataEvents::FORMAT_ARROW, Self->ScanCounters)); + const ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(readMetadataRange, index); + auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), TComputeShardingPolicy(), + ScanId, LockId.value_or(0), ScanGen, requestCookie, Self->TabletID(), TDuration::Max(), readMetadataRange, + NKikimrDataEvents::FORMAT_ARROW, + Self->Counters.GetScanCounters())); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxInternalScan started")("actor_id", scanActor)("trace_detailed", detailedInfo); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h index 3e20c23a7c10..413d33714867 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h @@ -7,25 +7,26 @@ namespace NKikimr::NOlap::NReader { class TTxInternalScan: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + TEvColumnShard::TEvInternalScan::TPtr InternalScanEvent; const ui32 ScanGen = 1; - const ui32 TxId = 1; const ui32 ScanId = 1; + const std::optional LockId; + void SendError(const TString& problem, const TString& details, const TActorContext& ctx) const; + public: using TReadMetadataPtr = TReadMetadataBase::TConstPtr; TTxInternalScan(NColumnShard::TColumnShard* self, TEvColumnShard::TEvInternalScan::TPtr& ev) : TBase(self) - , InternalScanEvent(ev) { + , InternalScanEvent(ev) + , LockId(InternalScanEvent->Get()->GetLockId()) + { } bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; void Complete(const TActorContext& ctx) override; TTxType GetTxType() const override { return NColumnShard::TXTYPE_START_INTERNAL_SCAN; } -private: - TString ErrorDescription; - TEvColumnShard::TEvInternalScan::TPtr InternalScanEvent; - TReadMetadataPtr ReadMetadataRange; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp index ab0d41db1931..74f09deb0197 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp @@ -1,181 +1,32 @@ #include "tx_scan.h" -#include -#include -#include + #include #include +#include +#include #include +#include +#include namespace NKikimr::NOlap::NReader { -std::vector ExtractTypes(const std::vector>& columns) { - std::vector types; - types.reserve(columns.size()); - for (auto& [name, type] : columns) { - types.push_back(type); - } - return types; -} - -TString FromCells(const TConstArrayRef& cells, const std::vector>& columns) { - Y_ABORT_UNLESS(cells.size() == columns.size()); - if (cells.empty()) { - return {}; - } - - std::vector types = ExtractTypes(columns); - - NArrow::TArrowBatchBuilder batchBuilder; - batchBuilder.Reserve(1); - auto startStatus = batchBuilder.Start(columns); - Y_ABORT_UNLESS(startStatus.ok(), "%s", startStatus.ToString().c_str()); - - batchBuilder.AddRow(NKikimr::TDbTupleRef(), NKikimr::TDbTupleRef(types.data(), cells.data(), cells.size())); - - auto batch = batchBuilder.FlushBatch(false); - Y_ABORT_UNLESS(batch); - Y_ABORT_UNLESS(batch->num_columns() == (int)cells.size()); - Y_ABORT_UNLESS(batch->num_rows() == 1); - return NArrow::SerializeBatchNoCompression(batch); -} - -std::pair RangePredicates(const TSerializedTableRange& range, const std::vector>& columns) { - std::vector leftCells; - std::vector> leftColumns; - bool leftTrailingNull = false; - { - TConstArrayRef cells = range.From.GetCells(); - const size_t size = cells.size(); - Y_ASSERT(size <= columns.size()); - leftCells.reserve(size); - leftColumns.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (!cells[i].IsNull()) { - leftCells.push_back(cells[i]); - leftColumns.push_back(columns[i]); - leftTrailingNull = false; - } else { - leftTrailingNull = true; - } - } - } - - std::vector rightCells; - std::vector> rightColumns; - bool rightTrailingNull = false; - { - TConstArrayRef cells = range.To.GetCells(); - const size_t size = cells.size(); - Y_ASSERT(size <= columns.size()); - rightCells.reserve(size); - rightColumns.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (!cells[i].IsNull()) { - rightCells.push_back(cells[i]); - rightColumns.push_back(columns[i]); - rightTrailingNull = false; - } else { - rightTrailingNull = true; - } - } - } +void TTxScan::SendError(const TString& problem, const TString& details, const TActorContext& ctx) const { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("problem", problem)("details", details); + const auto& request = Ev->Get()->Record; + const TString table = request.GetTablePath(); + const ui32 scanGen = request.GetGeneration(); + const auto scanComputeActor = Ev->Sender; - const bool fromInclusive = range.FromInclusive || leftTrailingNull; - const bool toInclusive = range.ToInclusive && !rightTrailingNull; - - TString leftBorder = FromCells(leftCells, leftColumns); - TString rightBorder = FromCells(rightCells, rightColumns); - auto leftSchema = NArrow::MakeArrowSchema(leftColumns); - Y_ASSERT(leftSchema.ok()); - auto rightSchema = NArrow::MakeArrowSchema(rightColumns); - Y_ASSERT(rightSchema.ok()); - return std::make_pair( - TPredicate(fromInclusive ? NKernels::EOperation::GreaterEqual : NKernels::EOperation::Greater, leftBorder, leftSchema.ValueUnsafe()), - TPredicate(toInclusive ? NKernels::EOperation::LessEqual : NKernels::EOperation::Less, rightBorder, rightSchema.ValueUnsafe())); -} + auto ev = MakeHolder(scanGen, Self->TabletID()); + ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, + TStringBuilder() << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << problem << "/" << details); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); -static bool FillPredicatesFromRange(TReadDescription& read, const ::NKikimrTx::TKeyRange& keyRange, - const std::vector>& ydbPk, ui64 tabletId, const TIndexInfo* indexInfo, TString& error) { - TSerializedTableRange range(keyRange); - auto fromPredicate = std::make_shared(); - auto toPredicate = std::make_shared(); - std::tie(*fromPredicate, *toPredicate) = RangePredicates(range, ydbPk); - - LOG_S_DEBUG("TTxScan range predicate. From key size: " << range.From.GetCells().size() - << " To key size: " << range.To.GetCells().size() - << " greater predicate over columns: " << fromPredicate->ToString() - << " less predicate over columns: " << toPredicate->ToString() - << " at tablet " << tabletId); - - if (!read.PKRangesFilter.Add(fromPredicate, toPredicate, indexInfo)) { - error = "Error building filter"; - return false; - } - return true; + ctx.Send(scanComputeActor, ev.Release()); } bool TTxScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { - TMemoryProfileGuard mpg("TTxScan::Execute"); - auto& record = Ev->Get()->Record; - TSnapshot snapshot(record.GetSnapshot().GetStep(), record.GetSnapshot().GetTxId()); - const auto scanId = record.GetScanId(); - const ui64 txId = record.GetTxId(); - - LOG_S_DEBUG("TTxScan prepare txId: " << txId << " scanId: " << scanId << " at tablet " << Self->TabletID()); - - TReadDescription read(snapshot, record.GetReverse()); - read.PathId = record.GetLocalPathId(); - read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); - read.TableName = record.GetTablePath(); - bool isIndex = false; - std::unique_ptr scannerConstructor = [&]() { - const ui64 itemsLimit = record.HasItemsLimit() ? record.GetItemsLimit() : 0; - auto sysViewPolicy = NSysView::NAbstract::ISysViewPolicy::BuildByPath(read.TableName); - isIndex = !sysViewPolicy; - if (!sysViewPolicy) { - return std::unique_ptr(new NPlain::TIndexScannerConstructor(snapshot, itemsLimit, record.GetReverse())); - } else { - return sysViewPolicy->CreateConstructor(snapshot, itemsLimit, record.GetReverse()); - } - }(); - read.ColumnIds.assign(record.GetColumnTags().begin(), record.GetColumnTags().end()); - read.StatsMode = record.GetStatsMode(); - - const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; - auto parseResult = scannerConstructor->ParseProgram(vIndex, record, read); - if (!parseResult) { - ErrorDescription = parseResult.GetErrorMessage(); - return true; - } - - if (!record.RangesSize()) { - auto range = scannerConstructor->BuildReadMetadata(Self, read); - if (range.IsSuccess()) { - ReadMetadataRange = range.DetachResult(); - } else { - ErrorDescription = range.GetErrorMessage(); - } - return true; - } - - auto ydbKey = scannerConstructor->GetPrimaryKeyScheme(Self); - auto* indexInfo = (vIndex && isIndex) ? &vIndex->GetSchema(snapshot)->GetIndexInfo() : nullptr; - for (auto& range : record.GetRanges()) { - if (!FillPredicatesFromRange(read, range, ydbKey, Self->TabletID(), indexInfo, ErrorDescription)) { - ReadMetadataRange = nullptr; - return true; - } - } - { - auto newRange = scannerConstructor->BuildReadMetadata(Self, read); - if (!newRange) { - ErrorDescription = newRange.GetErrorMessage(); - ReadMetadataRange = nullptr; - return true; - } - ReadMetadataRange = newRange.DetachResult(); - } - AFL_VERIFY(ReadMetadataRange); return true; } @@ -183,67 +34,101 @@ void TTxScan::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxScan::Complete"); auto& request = Ev->Get()->Record; auto scanComputeActor = Ev->Sender; - const auto& snapshot = request.GetSnapshot(); + TSnapshot snapshot = TSnapshot(request.GetSnapshot().GetStep(), request.GetSnapshot().GetTxId()); + if (snapshot.IsZero()) { + snapshot = Self->GetLastTxSnapshot(); + } const auto scanId = request.GetScanId(); const ui64 txId = request.GetTxId(); const ui32 scanGen = request.GetGeneration(); - TString table = request.GetTablePath(); - auto dataFormat = request.GetDataFormat(); + const TString table = request.GetTablePath(); + const auto dataFormat = request.GetDataFormat(); const TDuration timeout = TDuration::MilliSeconds(request.GetTimeoutMs()); if (scanGen > 1) { - Self->IncCounter(NColumnShard::COUNTER_SCAN_RESTARTED); + Self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_SCAN_RESTARTED); } - const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build() - ("tx_id", txId)("scan_id", scanId)("gen", scanGen)("table", table)("snapshot", snapshot)("tablet", Self->TabletID())("timeout", timeout); + const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build() ("tx_id", txId)("scan_id", scanId)("gen", scanGen)( + "table", table)("snapshot", snapshot)("tablet", Self->TabletID())("timeout", timeout); - if (!ReadMetadataRange) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", "no metadata")("error", ErrorDescription); + TReadMetadataPtr readMetadataRange; + { + LOG_S_DEBUG("TTxScan prepare txId: " << txId << " scanId: " << scanId << " at tablet " << Self->TabletID()); - auto ev = MakeHolder(scanGen, Self->TabletID()); - ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, TStringBuilder() - << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << ErrorDescription ? ErrorDescription : "no metadata ranges"); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + TReadDescription read(snapshot, request.GetReverse()); + read.TxId = txId; + if (request.HasLockTxId()) { + read.LockId = request.GetLockTxId(); + } + read.PathId = request.GetLocalPathId(); + read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); + read.TableName = table; + bool isIndex = false; + std::unique_ptr scannerConstructor = [&]() { + const ui64 itemsLimit = request.HasItemsLimit() ? request.GetItemsLimit() : 0; + auto sysViewPolicy = NSysView::NAbstract::ISysViewPolicy::BuildByPath(read.TableName); + isIndex = !sysViewPolicy; + if (!sysViewPolicy) { + return std::unique_ptr(new NPlain::TIndexScannerConstructor(snapshot, itemsLimit, request.GetReverse())); + } else { + return sysViewPolicy->CreateConstructor(snapshot, itemsLimit, request.GetReverse()); + } + }(); + read.ColumnIds.assign(request.GetColumnTags().begin(), request.GetColumnTags().end()); + read.StatsMode = request.GetStatsMode(); + + const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; + auto parseResult = scannerConstructor->ParseProgram(vIndex, request, read); + if (!parseResult) { + return SendError("cannot parse program", parseResult.GetErrorMessage(), ctx); + } - ctx.Send(scanComputeActor, ev.Release()); - return; + if (!request.RangesSize()) { + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (newRange.IsSuccess()) { + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } else { + return SendError("cannot build metadata withno ranges", newRange.GetErrorMessage(), ctx); + } + } else { + auto ydbKey = scannerConstructor->GetPrimaryKeyScheme(Self); + { + auto filterConclusion = NOlap::TPKRangesFilter::BuildFromProto(request, request.GetReverse(), ydbKey); + if (filterConclusion.IsFail()) { + return SendError("cannot build ranges filter", filterConclusion.GetErrorMessage(), ctx); + } + read.PKRangesFilter = std::make_shared(filterConclusion.DetachResult()); + } + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (!newRange) { + return SendError("cannot build metadata", newRange.GetErrorMessage(), ctx); + } + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } } + AFL_VERIFY(readMetadataRange); + readMetadataRange->OnBeforeStartReading(*Self); + TStringBuilder detailedInfo; if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_TRACE, NKikimrServices::TX_COLUMNSHARD)) { - detailedInfo << " read metadata: (" << *ReadMetadataRange << ")" << " req: " << request; + detailedInfo << " read metadata: (" << *readMetadataRange << ")" + << " req: " << request; } const TVersionedIndex* index = nullptr; if (Self->HasIndex()) { index = &Self->GetIndexAs().GetVersionedIndex(); } - const TConclusion requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRange, index); - if (!requestCookie) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", requestCookie.GetErrorMessage())("trace_details", detailedInfo); - auto ev = MakeHolder(scanGen, Self->TabletID()); - - ev->Record.SetStatus(Ydb::StatusIds::INTERNAL_ERROR); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, TStringBuilder() - << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << requestCookie.GetErrorMessage()); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - Self->ScanCounters.OnScanDuration(NColumnShard::TScanCounters::EStatusFinish::CannotAddInFlight, TDuration::Zero()); - ctx.Send(scanComputeActor, ev.Release()); - return; - } - auto statsDelta = Self->InFlightReadsTracker.GetSelectStatsDelta(); + const ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(readMetadataRange, index); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_PORTIONS, statsDelta.Portions); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_BLOBS, statsDelta.Blobs); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_ROWS, statsDelta.Rows); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_BYTES, statsDelta.Bytes); + Self->Counters.GetTabletCounters()->OnScanStarted(Self->InFlightReadsTracker.GetSelectStatsDelta()); TComputeShardingPolicy shardingPolicy; AFL_VERIFY(shardingPolicy.DeserializeFromProto(request.GetComputeShardingPolicy())); - auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), - shardingPolicy, scanId, txId, scanGen, *requestCookie, Self->TabletID(), timeout, ReadMetadataRange, dataFormat, Self->ScanCounters)); + auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), shardingPolicy, scanId, + txId, scanGen, requestCookie, Self->TabletID(), timeout, readMetadataRange, dataFormat, Self->Counters.GetScanCounters())); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan started")("actor_id", scanActor)("trace_detailed", detailedInfo); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h index 2d9eb9619a64..347def3dea14 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h @@ -7,6 +7,8 @@ namespace NKikimr::NOlap::NReader { class TTxScan: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + void SendError(const TString& problem, const TString& details, const TActorContext& ctx) const; + public: using TReadMetadataPtr = TReadMetadataBase::TConstPtr; @@ -17,12 +19,12 @@ class TTxScan: public NTabletFlatExecutor::TTransactionBase +#include +#include #include #include namespace NKikimr::NOlap { +using TColumnLoader = NArrow::NAccessor::TColumnLoader; +using TColumnSaver = NArrow::NAccessor::TColumnSaver; + class IIndexInfo { public: - enum class ESpecialColumn: ui32 { + enum class ESpecialColumn : ui32 { PLAN_STEP = NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP_INDEX, TX_ID = NOlap::NPortion::TSpecialColumns::SPEC_COL_TX_ID_INDEX, DELETE_FLAG = NOlap::NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX @@ -17,7 +21,7 @@ class IIndexInfo { using TSystemColumnsSet = ui64; - enum class ESystemColumnsSet: ui64 { + enum class ESystemColumnsSet : ui64 { Snapshot = 1, Deletion = 1 << 1, }; @@ -30,6 +34,11 @@ class IIndexInfo { return SPEC_COL_DELETE_FLAG; } + static const std::set& GetNecessarySystemColumnIdsSet() { + static const std::set result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID }; + return result; + } + static const std::vector& GetSnapshotColumnNames() { static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID) }; return result; @@ -64,7 +73,8 @@ class IIndexInfo { static void AddSpecialFields(std::vector>& fields) { AddSnapshotFields(fields); - fields.push_back(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean())); + static const std::shared_ptr f = arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()); + fields.push_back(f); } static const std::vector& SnapshotColumnNames() { @@ -73,8 +83,10 @@ class IIndexInfo { } static void AddSnapshotFields(std::vector>& fields) { - fields.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64())); - fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64())); + static const std::shared_ptr ps = arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()); + static const std::shared_ptr txid = arrow::field(SPEC_COL_TX_ID, arrow::uint64()); + fields.push_back(ps); + fields.push_back(txid); } static void AddDeleteFields(std::vector>& fields) { @@ -87,20 +99,28 @@ class IIndexInfo { } static const std::vector& GetSystemColumnNames() { - static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID), std::string(SPEC_COL_DELETE_FLAG) }; + static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID), + std::string(SPEC_COL_DELETE_FLAG) }; return result; } static const std::vector& GetSystemColumnIds() { - static const std::vector result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID, (ui32)ESpecialColumn::DELETE_FLAG }; + static const std::vector result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID, + (ui32)ESpecialColumn::DELETE_FLAG }; return result; } [[nodiscard]] static std::vector AddSpecialFieldIds(const std::vector& baseColumnIds) { std::vector result = baseColumnIds; - for (auto&& i : GetSystemColumnIds()) { - result.emplace_back(i); - } + const auto& cIds = GetSystemColumnIds(); + result.insert(result.end(), cIds.begin(), cIds.end()); + return result; + } + + [[nodiscard]] static std::set AddSpecialFieldIds(const std::set& baseColumnIds) { + std::set result = baseColumnIds; + const auto& cIds = GetSystemColumnIds(); + result.insert(cIds.begin(), cIds.end()); return result; } @@ -130,17 +150,14 @@ class IIndexInfo { } static std::shared_ptr ArrowSchemaSnapshot() { - static std::shared_ptr result = std::make_shared(arrow::FieldVector{ - arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), - arrow::field(SPEC_COL_TX_ID, arrow::uint64()) - }); + static std::shared_ptr result = std::make_shared( + arrow::FieldVector{ arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), arrow::field(SPEC_COL_TX_ID, arrow::uint64()) }); return result; } static std::shared_ptr ArrowSchemaDeletion() { - static std::shared_ptr result = std::make_shared(arrow::FieldVector{ - arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()) - }); + static std::shared_ptr result = + std::make_shared(arrow::FieldVector{ arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()) }); return result; } @@ -149,19 +166,15 @@ class IIndexInfo { } static bool IsSpecialColumn(const std::string& fieldName) { - return fieldName == SPEC_COL_PLAN_STEP - || fieldName == SPEC_COL_TX_ID - || fieldName == SPEC_COL_DELETE_FLAG; + return fieldName == SPEC_COL_PLAN_STEP || fieldName == SPEC_COL_TX_ID || fieldName == SPEC_COL_DELETE_FLAG; } static bool IsSpecialColumn(const ui32 fieldId) { - return fieldId == (ui32)ESpecialColumn::PLAN_STEP - || fieldId == (ui32)ESpecialColumn::TX_ID - || fieldId == (ui32)ESpecialColumn::DELETE_FLAG; + return fieldId == (ui32)ESpecialColumn::PLAN_STEP || fieldId == (ui32)ESpecialColumn::TX_ID || + fieldId == (ui32)ESpecialColumn::DELETE_FLAG; } - static bool IsNullableVerified(const ui32 fieldId) { - Y_UNUSED(fieldId); + static bool IsNullableVerified(const ui32 /*fieldId*/) { return false; } @@ -183,4 +196,4 @@ class IIndexInfo { virtual ~IIndexInfo() = default; }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp deleted file mode 100644 index d74dc491519d..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "loader.h" -#include - -namespace NKikimr::NOlap { - -TString TColumnLoader::DebugString() const { - TStringBuilder result; - if (ExpectedSchema) { - result << "schema:" << ExpectedSchema->ToString() << ";"; - } - if (Transformer) { - result << "transformer:" << Transformer->DebugString() << ";"; - } - if (Serializer) { - result << "serializer:" << Serializer->DebugString() << ";"; - } - return result; -} - -TColumnLoader::TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& expectedSchema, const std::shared_ptr& defaultValue, const ui32 columnId) - : Transformer(transformer) - , Serializer(serializer) - , ExpectedSchema(expectedSchema) - , DefaultValue(defaultValue) - , ColumnId(columnId) { - Y_ABORT_UNLESS(ExpectedSchema); - auto fieldsCountStr = ::ToString(ExpectedSchema->num_fields()); - Y_ABORT_UNLESS(ExpectedSchema->num_fields() == 1, "%s", fieldsCountStr.data()); - Y_ABORT_UNLESS(Serializer); -} - -const std::shared_ptr& TColumnLoader::GetField() const { - return ExpectedSchema->field(0); -} - -arrow::Result> TColumnLoader::Apply(const TString& data) const { - Y_ABORT_UNLESS(Serializer); - arrow::Result> columnArray = - Transformer ? Serializer->Deserialize(data) : Serializer->Deserialize(data, ExpectedSchema); - if (!columnArray.ok()) { - return columnArray; - } - if (Transformer) { - return Transformer->Transform(*columnArray); - } else { - return columnArray; - } -} - -std::shared_ptr TColumnLoader::ApplyVerified(const TString& data) const { - return NArrow::TStatusValidator::GetValid(Apply(data)); -} - -std::shared_ptr TColumnLoader::ApplyVerifiedColumn(const TString& data) const { - auto rb = ApplyVerified(data); - AFL_VERIFY(rb->num_columns() == 1)("schema", rb->schema()->ToString()); - return rb->column(0); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h deleted file mode 100644 index d128caaecb58..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -class TColumnLoader { -private: - NArrow::NTransformation::ITransformer::TPtr Transformer; - NArrow::NSerialization::TSerializerContainer Serializer; - YDB_READONLY_DEF(std::shared_ptr, ExpectedSchema); - YDB_READONLY_DEF(std::shared_ptr, DefaultValue); - const ui32 ColumnId; -public: - bool IsEqualTo(const TColumnLoader& item) const { - if (!!Transformer != !!item.Transformer) { - return false; - } else if (!!Transformer && !Transformer->IsEqualTo(*item.Transformer)) { - return false; - } - if (!Serializer.IsEqualTo(item.Serializer)) { - return false; - } - return true; - } - - TString DebugString() const; - - TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& expectedSchema, const std::shared_ptr& defaultValue, const ui32 columnId); - - ui32 GetColumnId() const { - return ColumnId; - } - - const std::shared_ptr& GetField() const; - - arrow::Result> Apply(const TString& data) const; - - std::shared_ptr ApplyVerified(const TString& data) const; - - std::shared_ptr ApplyVerifiedColumn(const TString& data) const; -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make index b830415daae1..79b12f94389e 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make @@ -1,15 +1,12 @@ LIBRARY() SRCS( - saver.cpp index_info.cpp - loader.cpp ) PEERDIR( ydb/library/actors/core - ydb/core/formats/arrow/transformer - ydb/core/formats/arrow/serializer + ydb/core/formats/arrow/save_load ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/engines/scheme/column/info.cpp b/ydb/core/tx/columnshard/engines/scheme/column/info.cpp index b9473398fdf3..b4fae5cf09ab 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column/info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/column/info.cpp @@ -19,8 +19,7 @@ NArrow::NTransformation::ITransformer::TPtr TSimpleColumnInfo::GetLoadTransforme return transformer; } -TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo) -{ +TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo) { AFL_VERIFY(columnInfo.GetId() == ColumnId); if (columnInfo.HasSerializer()) { AFL_VERIFY(Serializer.DeserializeFromProto(columnInfo.GetSerializer())); @@ -30,29 +29,34 @@ TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp: if (columnInfo.HasDefaultValue()) { DefaultValue.DeserializeFromProto(columnInfo.GetDefaultValue()).Validate(); } + if (columnInfo.HasDataAccessorConstructor()) { + AFL_VERIFY(DataAccessorConstructor.DeserializeFromProto(columnInfo.GetDataAccessorConstructor())); + } + IsNullable = columnInfo.HasNotNull() ? !columnInfo.GetNotNull() : true; AFL_VERIFY(Serializer); if (columnInfo.HasDictionaryEncoding()) { auto settings = NArrow::NDictionary::TEncodingSettings::BuildFromProto(columnInfo.GetDictionaryEncoding()); Y_ABORT_UNLESS(settings.IsSuccess()); DictionaryEncoding = *settings; } - Loader = std::make_shared(GetLoadTransformer(), Serializer, ArrowSchema, DefaultValue.GetValue(), ColumnId); + Loader = std::make_shared(GetLoadTransformer(), Serializer, DataAccessorConstructor, ArrowField, DefaultValue.GetValue(), ColumnId); return TConclusionStatus::Success(); } -TSimpleColumnInfo::TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, - const bool needMinMax, const bool isSorted, +TSimpleColumnInfo::TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, + const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue) : ColumnId(columnId) , ArrowField(arrowField) - , ArrowSchema(std::make_shared(arrow::FieldVector({arrowField}))) , Serializer(serializer) , NeedMinMax(needMinMax) , IsSorted(isSorted) + , IsNullable(isNullable) , DefaultValue(defaultValue) { ColumnName = ArrowField->name(); - Loader = std::make_shared(GetLoadTransformer(), Serializer, ArrowSchema, DefaultValue.GetValue(), ColumnId); + Loader = std::make_shared( + GetLoadTransformer(), Serializer, DataAccessorConstructor, ArrowField, DefaultValue.GetValue(), ColumnId); } std::vector> TSimpleColumnInfo::ActualizeColumnData(const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const { @@ -86,7 +90,7 @@ std::vector> TSimpleColumnInf } std::vector> result; for (auto&& s : source) { - auto data = NArrow::TStatusValidator::GetValid(sourceColumnFeatures.Loader->Apply(s->GetData())); + auto data = sourceColumnFeatures.Loader->ApplyRawVerified(s->GetData()); result.emplace_back(s->CopyWithAnotherBlob(GetColumnSaver().Apply(data), *this)); } return result; diff --git a/ydb/core/tx/columnshard/engines/scheme/column/info.h b/ydb/core/tx/columnshard/engines/scheme/column/info.h index 6db21f1fe876..5e3259cbd707 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column/info.h +++ b/ydb/core/tx/columnshard/engines/scheme/column/info.h @@ -1,17 +1,18 @@ #pragma once -#include -#include -#include - +#include +#include #include +#include +#include #include #include -#include +#include +#include #include -#include #include +#include namespace NKikimr::NOlap { @@ -22,19 +23,19 @@ class TSimpleColumnInfo { YDB_READONLY(ui32, ColumnId, 0); YDB_READONLY_DEF(TString, ColumnName); YDB_READONLY_DEF(std::shared_ptr, ArrowField); - YDB_READONLY_DEF(std::shared_ptr, ArrowSchema); YDB_READONLY(NArrow::NSerialization::TSerializerContainer, Serializer, NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()); + YDB_READONLY(NArrow::NAccessor::TConstructorContainer, DataAccessorConstructor, NArrow::NAccessor::TConstructorContainer::GetDefaultConstructor()); YDB_READONLY(bool, NeedMinMax, false); YDB_READONLY(bool, IsSorted, false); + YDB_READONLY(bool, IsNullable, false); YDB_READONLY_DEF(TColumnDefaultScalarValue, DefaultValue); std::optional DictionaryEncoding; std::shared_ptr Loader; NArrow::NTransformation::ITransformer::TPtr GetLoadTransformer() const; public: - - TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, - const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, + TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, + const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue); TColumnSaver GetColumnSaver() const { @@ -43,7 +44,8 @@ class TSimpleColumnInfo { return TColumnSaver(transformer, Serializer); } - std::vector> ActualizeColumnData(const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const; + std::vector> ActualizeColumnData( + const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const; TString DebugString() const { TStringBuilder sb; @@ -62,4 +64,4 @@ class TSimpleColumnInfo { } }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/column_features.h b/ydb/core/tx/columnshard/engines/scheme/column_features.h index 671b35e57734..288ac6e195e4 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column_features.h +++ b/ydb/core/tx/columnshard/engines/scheme/column_features.h @@ -1,6 +1,4 @@ #pragma once -#include "abstract/loader.h" -#include "abstract/saver.h" #include "column/info.h" #include @@ -10,6 +8,7 @@ #include #include #include +#include #include #include @@ -34,9 +33,9 @@ class TColumnFeatures: public TSimpleColumnInfo { YDB_READONLY_DEF(std::shared_ptr, Operator); public: TColumnFeatures(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& bOperator, const bool needMinMax, const bool isSorted, + const std::shared_ptr& bOperator, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue) - : TBase(columnId, arrowField, serializer, needMinMax, isSorted, defaultValue) + : TBase(columnId, arrowField, serializer, needMinMax, isSorted, isNullable, defaultValue) , Operator(bOperator) { AFL_VERIFY(Operator); diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp index 835d1c71c2bd..c6203f9142a2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp @@ -1,37 +1,25 @@ #include "index_info.h" -#include -#include -#include -#include - #include #include #include #include #include #include +#include +#include +#include +#include namespace NKikimr::NOlap { -static std::vector NamesOnly(const std::vector& columns) { - std::vector out; - out.reserve(columns.size()); - for (const auto& [name, _] : columns) { - out.push_back(name); - } - return out; -} - TIndexInfo::TIndexInfo(const TString& name) - : NTable::TScheme::TTableSchema() - , Name(name) -{ + : Name(name) { CompactionPlannerConstructor = NStorageOptimizer::IOptimizerPlannerConstructor::BuildDefault(); } bool TIndexInfo::CheckCompatible(const TIndexInfo& other) const { - if (!other.GetPrimaryKey()->Equals(GetPrimaryKey())) { + if (!other.GetPrimaryKey()->Equals(PrimaryKey)) { return false; } return true; @@ -44,22 +32,24 @@ ui32 TIndexInfo::GetColumnIdVerified(const std::string& name) const { } std::optional TIndexInfo::GetColumnIdOptional(const std::string& name) const { - const auto ni = ColumnNames.find(name); - - if (ni != ColumnNames.end()) { - return ni->second; + const auto pred = [](const TNameInfo& item, const std::string& value) { + return item.GetName() < value; + }; + auto it = std::lower_bound(ColumnNames.begin(), ColumnNames.end(), name, pred); + if (it != ColumnNames.end() && it->GetName() == name) { + return it->GetColumnId(); } return IIndexInfo::GetColumnIdOptional(name); } -TString TIndexInfo::GetColumnName(ui32 id, bool required) const { - const auto ci = Columns.find(id); - - if (ci != Columns.end()) { - return ci->second.Name; +TString TIndexInfo::GetColumnName(const ui32 id, bool required) const { + const auto& f = GetColumnFeaturesOptional(id); + if (!f) { + AFL_VERIFY(!required); + return ""; + } else { + return f->GetColumnName(); } - - return IIndexInfo::GetColumnName(id, required); } const std::vector& TIndexInfo::GetColumnIds(const bool withSpecial) const { @@ -74,9 +64,7 @@ std::vector TIndexInfo::GetColumnNames(const std::vector& ids) co std::vector out; out.reserve(ids.size()); for (ui32 id : ids) { - const auto ci = Columns.find(id); - Y_ABORT_UNLESS(ci != Columns.end()); - out.push_back(ci->second.Name); + out.push_back(GetColumnName(id)); } return out; } @@ -85,46 +73,21 @@ std::vector TIndexInfo::GetColumnSTLNames(const std::vector& std::vector out; out.reserve(ids.size()); for (ui32 id : ids) { - const auto ci = Columns.find(id); - Y_ABORT_UNLESS(ci != Columns.end()); - out.push_back(ci->second.Name); + out.push_back(GetColumnName(id)); } return out; } -std::vector TIndexInfo::GetColumns(const std::vector& ids) const { - return NOlap::GetColumns(*this, ids); -} - -std::shared_ptr TIndexInfo::ArrowSchema() const { +const std::shared_ptr& TIndexInfo::ArrowSchema() const { AFL_VERIFY(Schema); return Schema; } -std::shared_ptr TIndexInfo::ArrowSchemaWithSpecials() const { +const std::shared_ptr& TIndexInfo::ArrowSchemaWithSpecials() const { AFL_VERIFY(SchemaWithSpecials); return SchemaWithSpecials; } -std::shared_ptr TIndexInfo::AddColumns( - const std::shared_ptr& src, - const std::vector& columns) const { - std::shared_ptr all = ArrowSchemaWithSpecials(); - auto fields = src->fields(); - - for (const auto& col : columns) { - const std::string name(col.data(), col.size()); - if (!src->GetFieldByName(name)) { - auto field = all->GetFieldByName(name); - if (!field) { - return {}; - } - fields.push_back(field); - } - } - return std::make_shared(std::move(fields)); -} - std::vector TIndexInfo::GetColumnIds(const std::vector& columnNames) const { std::vector ids; ids.reserve(columnNames.size()); @@ -138,34 +101,20 @@ std::vector TIndexInfo::GetColumnIds(const std::vector& columnNam return ids; } -std::shared_ptr TIndexInfo::ArrowColumnFieldVerified(const ui32 columnId) const { - auto result = ArrowColumnFieldOptional(columnId); - AFL_VERIFY(result); - return result; -} - -std::shared_ptr TIndexInfo::ArrowColumnFieldOptional(const ui32 columnId) const { - auto it = ArrowColumnByColumnIdCache.find(columnId); - if (it == ArrowColumnByColumnIdCache.end()) { - return nullptr; - } else { - return it->second; - } -} - -void TIndexInfo::SetAllKeys(const std::shared_ptr& operators) { +void TIndexInfo::SetAllKeys(const std::shared_ptr& operators, const THashMap& columns) { /// @note Setting replace and sorting key to PK we are able to: /// * apply REPLACE by MergeSort /// * apply PK predicate before REPLACE - { - AFL_VERIFY(PKColumnIds.empty()); - const auto& primaryKeyNames = NamesOnly(GetPrimaryKeyColumns()); - PKColumnIds = GetColumnIds(primaryKeyNames); - AFL_VERIFY(PKColumnIds.size()); - PrimaryKey = MakeArrowSchema(Columns, PKColumnIds); + PrimaryKey = MakeArrowSchema(columns, PKColumnIds, nullptr); + + AFL_VERIFY(PKColumns.empty()); + for (auto&& i : PKColumnIds) { + auto it = columns.find(i); + AFL_VERIFY(it != columns.end()); + PKColumns.emplace_back(TNameTypeInfo(it->second.Name, it->second.PType)); } - for (const auto& [colId, column] : Columns) { + for (const auto& [colId, column] : columns) { if (NArrow::IsPrimitiveYqlType(column.PType)) { MinMaxIdxColumnsIds.insert(colId); } @@ -173,38 +122,39 @@ void TIndexInfo::SetAllKeys(const std::shared_ptr& operators) MinMaxIdxColumnsIds.insert(GetPKFirstColumnId()); if (!Schema) { AFL_VERIFY(!SchemaWithSpecials); - InitializeCaches(operators); + InitializeCaches(operators, columns, nullptr); } } TColumnSaver TIndexInfo::GetColumnSaver(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second.GetColumnSaver(); + return GetColumnFeaturesVerified(columnId).GetColumnSaver(); } std::shared_ptr TIndexInfo::GetColumnLoaderOptional(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - if (it == ColumnFeatures.end()) { + const auto& cFeatures = GetColumnFeaturesOptional(columnId); + if (!cFeatures) { return nullptr; } else { - return it->second.GetLoader(); + return cFeatures->GetLoader(); } } -std::shared_ptr TIndexInfo::GetColumnFieldOptional(const ui32 columnId) const { - std::shared_ptr schema; - if (IsSpecialColumn(columnId)) { - return IIndexInfo::GetColumnFieldOptional(columnId); +std::optional TIndexInfo::GetColumnIndexOptional(const ui32 id) const { + auto it = std::lower_bound(SchemaColumnIdsWithSpecials.begin(), SchemaColumnIdsWithSpecials.end(), id); + if (it == SchemaColumnIdsWithSpecials.end() || *it != id) { + return std::nullopt; } else { - schema = ArrowSchema(); + return it - SchemaColumnIdsWithSpecials.begin(); } - if (const TString columnName = GetColumnName(columnId, false)) { - return schema->GetFieldByName(columnName); - } else { +} + +std::shared_ptr TIndexInfo::GetColumnFieldOptional(const ui32 columnId) const { + const std::optional index = GetColumnIndexOptional(columnId); + if (!index) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("column_id", columnId)("event", "incorrect_column_id"); return nullptr; } + return ArrowSchemaWithSpecials()->GetFieldByIndexVerified(*index); } std::shared_ptr TIndexInfo::GetColumnFieldVerified(const ui32 columnId) const { @@ -223,20 +173,24 @@ std::shared_ptr TIndexInfo::GetColumnsSchema(const std::set } std::shared_ptr TIndexInfo::GetColumnSchema(const ui32 columnId) const { - return GetColumnsSchema({columnId}); + return GetColumnsSchema({ columnId }); } -bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators) { +bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators, + const std::shared_ptr& cache) { if (schema.GetEngine() != NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "incorrect_engine_in_schema"); return false; } + AFL_VERIFY(cache); { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Optimizer"); SchemeNeedActualization = schema.GetOptions().GetSchemeNeedActualization(); ExternalGuaranteeExclusivePK = schema.GetOptions().GetExternalGuaranteeExclusivePK(); if (schema.GetOptions().HasCompactionPlannerConstructor()) { - auto container = NStorageOptimizer::TOptimizerPlannerConstructorContainer::BuildFromProto(schema.GetOptions().GetCompactionPlannerConstructor()); + auto container = + NStorageOptimizer::TOptimizerPlannerConstructorContainer::BuildFromProto(schema.GetOptions().GetCompactionPlannerConstructor()); CompactionPlannerConstructor = container.DetachResult().GetObjectPtrVerified(); } else { AFL_VERIFY(!!CompactionPlannerConstructor); @@ -244,6 +198,7 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& } if (schema.HasDefaultCompression()) { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Serializer"); NArrow::NSerialization::TSerializerContainer container; if (!container.DeserializeFromProto(schema.GetDefaultCompression())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "cannot_parse_default_serializer"); @@ -251,36 +206,68 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& } DefaultSerializer = container; } - - for (const auto& idx : schema.GetIndexes()) { - NIndexes::TIndexMetaContainer meta; - AFL_VERIFY(meta.DeserializeFromProto(idx)); - Indexes.emplace(meta->GetIndexId(), meta); + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Indexes"); + for (const auto& idx : schema.GetIndexes()) { + NIndexes::TIndexMetaContainer meta; + AFL_VERIFY(meta.DeserializeFromProto(idx)); + Indexes.emplace(meta->GetIndexId(), meta); + } } - for (const auto& col : schema.GetColumns()) { - const ui32 id = col.GetId(); - const TString& name = col.GetName(); - const bool notNull = col.HasNotNull() ? col.GetNotNull() : false; - auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(col.GetTypeId(), col.HasTypeInfo() ? &col.GetTypeInfo() : nullptr); - Columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, typeInfoMod.TypeMod, notNull); - ColumnNames[name] = id; + THashMap columns; + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Columns"); + ColumnNames.clear(); + for (const auto& col : schema.GetColumns()) { + const ui32 id = col.GetId(); + const TString& name = cache->GetStringCache(col.GetName()); + const bool notNull = col.HasNotNull() ? col.GetNotNull() : false; + auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(col.GetTypeId(), col.HasTypeInfo() ? &col.GetTypeInfo() : nullptr); + columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, cache->GetStringCache(typeInfoMod.TypeMod), notNull); + ColumnNames.emplace_back(name, id); + } + std::sort(ColumnNames.begin(), ColumnNames.end()); } for (const auto& keyName : schema.GetKeyColumnNames()) { - Y_ABORT_UNLESS(ColumnNames.contains(keyName)); - KeyColumns.push_back(ColumnNames[keyName]); - } - InitializeCaches(operators); - for (const auto& col : schema.GetColumns()) { - auto it = ColumnFeatures.find(col.GetId()); - AFL_VERIFY(it != ColumnFeatures.end()); - auto parsed = it->second.DeserializeFromProto(col, operators); - if (!parsed) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", parsed.GetErrorMessage()); - return false; + PKColumnIds.push_back(GetColumnIdVerified(keyName)); + } + InitializeCaches(operators, columns, cache, false); + SetAllKeys(operators, columns); + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Columns::Features"); + for (const auto& col : schema.GetColumns()) { + THashMap> it; + const TString fingerprint = cache ? ("C:" + col.SerializeAsString()) : Default(); + const auto createPred = [&]() -> TConclusion> { + auto f = BuildDefaultColumnFeatures(col.GetId(), columns, operators); + auto parsed = f->DeserializeFromProto(col, operators); + if (parsed.IsFail()) { + return parsed; + } + return f; + }; + auto fConclusion = cache->GetOrCreateColumnFeatures(fingerprint, createPred); + if (fConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", fConclusion.GetErrorMessage()); + return false; + } + ColumnFeatures.emplace_back(fConclusion.DetachResult()); } + for (auto&& cId : GetSystemColumnIds()) { + THashMap> it; + const TString fingerprint = "SC:" + ::ToString(cId); + const auto createPred = [&]() -> TConclusion> { + return BuildDefaultColumnFeatures(cId, {}, operators); + }; + auto fConclusion = cache->GetOrCreateColumnFeatures(fingerprint, createPred); + ColumnFeatures.emplace_back(fConclusion.DetachResult()); + } + const auto pred = [](const std::shared_ptr& l, const std::shared_ptr& r) { + return l->GetColumnId() < r->GetColumnId(); + }; + std::sort(ColumnFeatures.begin(), ColumnFeatures.end(), pred); } - Version = schema.GetVersion(); return true; } @@ -296,15 +283,17 @@ std::vector GetColumns(const NTable::TScheme::TTableSchema& table return out; } -std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators) { +std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, + const std::shared_ptr& operators, const std::shared_ptr& cache) { TIndexInfo result(""); - if (!result.DeserializeFromProto(schema, operators)) { + if (!result.DeserializeFromProto(schema, operators, cache)) { return std::nullopt; } return result; } -std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids) { +std::vector> MakeArrowFields(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache) { std::vector> fields; for (const ui32 id : ids) { AFL_VERIFY(!TIndexInfo::IsSpecialColumn(id)); @@ -315,52 +304,64 @@ std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSche std::string colName(column.Name.data(), column.Name.size()); auto arrowType = NArrow::GetArrowType(column.PType); AFL_VERIFY(arrowType.ok()); - fields.emplace_back(std::make_shared(colName, arrowType.ValueUnsafe(), !column.NotNull)); + auto f = std::make_shared(colName, arrowType.ValueUnsafe(), !column.NotNull); + if (cache) { + auto fFound = cache->GetField(f->ToString(true)); + if (!fFound) { + cache->RegisterField(f->ToString(true), f); + fields.emplace_back(f); + } else { + fields.emplace_back(fFound); + } + } else { + fields.emplace_back(f); + } } - return std::make_shared(std::move(fields)); + return fields; +} + +std::shared_ptr MakeArrowSchema( + const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const std::shared_ptr& cache) { + return std::make_shared(MakeArrowFields(columns, ids, cache)); } -void TIndexInfo::InitializeCaches(const std::shared_ptr& operators) { +void TIndexInfo::InitializeCaches(const std::shared_ptr& operators, const THashMap& columns, const std::shared_ptr& cache, + const bool withColumnFeatures) { { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Schema"); AFL_VERIFY(!Schema); - SchemaColumnIds.reserve(Columns.size()); - for (const auto& [id, _] : Columns) { + SchemaColumnIds.reserve(columns.size()); + for (const auto& [id, _] : columns) { SchemaColumnIds.push_back(id); } std::sort(SchemaColumnIds.begin(), SchemaColumnIds.end()); - Schema = MakeArrowSchema(Columns, SchemaColumnIds); + auto originalFields = MakeArrowFields(columns, SchemaColumnIds, cache); + Schema = std::make_shared(originalFields); + IIndexInfo::AddSpecialFields(originalFields); + SchemaWithSpecials = std::make_shared(originalFields); } - SchemaWithSpecials = IIndexInfo::AddSpecialFields(Schema); - SchemaColumnIdsWithSpecials = IIndexInfo::AddSpecialFieldIds(SchemaColumnIds); - - for (auto&& c : Columns) { - AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(c.first, GetColumnFieldVerified(c.first)).second); - AFL_VERIFY(ColumnFeatures.emplace(c.first, TColumnFeatures(c.first, GetColumnFieldVerified(c.first), DefaultSerializer, operators->GetDefaultOperator(), - NArrow::IsPrimitiveYqlType(c.second.PType), c.first == GetPKFirstColumnId(), nullptr)).second); - } - for (auto&& cId : GetSystemColumnIds()) { - AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(cId, GetColumnFieldVerified(cId)).second); - AFL_VERIFY(ColumnFeatures.emplace(cId, TColumnFeatures(cId, GetColumnFieldVerified(cId), DefaultSerializer, operators->GetDefaultOperator(), - false, false, IIndexInfo::DefaultColumnValue(cId))).second); + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SchemaFields"); + SchemaColumnIdsWithSpecials = IIndexInfo::AddSpecialFieldIds(SchemaColumnIds); + } + if (withColumnFeatures) { + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Columns"); + for (auto&& c : columns) { + ColumnFeatures.emplace_back(BuildDefaultColumnFeatures(c.first, columns, operators)); + } + } + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SysColumns"); + for (auto&& cId : GetSystemColumnIds()) { + ColumnFeatures.emplace_back(BuildDefaultColumnFeatures(cId, columns, operators)); + } + } } } -std::vector> TIndexInfo::MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const { - std::vector> result; - auto columnArrowSchema = GetColumnSchema(columnId); - TColumnSaver saver = GetColumnSaver(columnId); - ui32 idx = 0; - for (auto p : pages) { - auto arr = NArrow::MakeEmptyBatch(columnArrowSchema, p); - AFL_VERIFY(arr->num_columns() == 1)("count", arr->num_columns()); - result.emplace_back(std::make_shared(saver.Apply(arr), arr->column(0), TChunkAddress(columnId, idx), columnInfo)); - ++idx; - } - return result; -} - NSplitter::TEntityGroups TIndexInfo::GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const { NSplitter::TEntityGroups groups(storages.GetDefaultOperator()->GetBlobSplitSettings(), IStoragesManager::DefaultStorageId); for (auto&& i : GetEntityIds()) { @@ -385,9 +386,6 @@ std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueVerified } std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueVerified(const ui32 columnId) const { - if (IIndexInfo::IsSpecialColumn(columnId)) { - return IIndexInfo::DefaultColumnValue(columnId); - } return GetColumnFeaturesVerified(columnId).GetDefaultValue().GetValue(); } @@ -444,4 +442,22 @@ std::vector TIndexInfo::GetEntityIds() const { return result; } -} // namespace NKikimr::NOlap +std::shared_ptr TIndexInfo::BuildDefaultColumnFeatures( + const ui32 columnId, const THashMap& columns, const std::shared_ptr& operators) const { + if (IsSpecialColumn(columnId)) { + return std::make_shared(columnId, GetColumnFieldVerified(columnId), DefaultSerializer, operators->GetDefaultOperator(), + false, false, false, IIndexInfo::DefaultColumnValue(columnId)); + } else { + auto itC = columns.find(columnId); + AFL_VERIFY(itC != columns.end()); + return std::make_shared(columnId, GetColumnFieldVerified(columnId), DefaultSerializer, operators->GetDefaultOperator(), + NArrow::IsPrimitiveYqlType(itC->second.PType), columnId == GetPKFirstColumnId(), false, nullptr); + } +} + +std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueByIndexVerified(const ui32 colIndex) const { + AFL_VERIFY(colIndex < ColumnFeatures.size())("index", colIndex)("size", ColumnFeatures.size()); + return ColumnFeatures[colIndex]->GetDefaultValue().GetValue(); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index 5a92fbc3d6c3..0c04b4abd8d1 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -6,21 +6,22 @@ #include "abstract/index_info.h" #include "indexes/abstract/meta.h" -#include - -#include -#include -#include #include #include #include #include +#include +#include +#include +#include + +#include namespace arrow { - class Array; - class Field; - class Schema; -} +class Array; +class Field; +class Schema; +} // namespace arrow namespace NKikimr::NOlap { @@ -36,51 +37,141 @@ namespace NStorageOptimizer { class IOptimizerPlannerConstructor; } class TPortionInfoWithBlobs; -struct TInsertedData; class TSnapshotColumnInfo; class ISnapshotSchema; using TNameTypeInfo = std::pair; +class TSchemaObjectsCache { +private: + THashMap> Fields; + THashMap> ColumnFeatures; + THashSet StringsCache; + mutable ui64 AcceptionFieldsCount = 0; + mutable ui64 AcceptionFeaturesCount = 0; + +public: + const TString& GetStringCache(const TString& original) { + auto it = StringsCache.find(original); + if (it == StringsCache.end()) { + it = StringsCache.emplace(original).first; + } + return *it; + } + + void RegisterField(const TString& fingerprint, const std::shared_ptr& f) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "register_field")("fp", fingerprint)("f", f->ToString()); + AFL_VERIFY(Fields.emplace(fingerprint, f).second); + } + void RegisterColumnFeatures(const TString& fingerprint, const std::shared_ptr& f) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "register_column_features")("fp", fingerprint)("info", f->DebugString()); + AFL_VERIFY(ColumnFeatures.emplace(fingerprint, f).second); + } + std::shared_ptr GetField(const TString& fingerprint) const { + auto it = Fields.find(fingerprint); + if (it == Fields.end()) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_field_miss")("fp", fingerprint)("count", Fields.size())( + "acc", AcceptionFieldsCount); + return nullptr; + } + if (++AcceptionFieldsCount % 1000 == 0) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_field_accept")("fp", fingerprint)("count", Fields.size())( + "acc", AcceptionFieldsCount); + } + return it->second; + } + template + TConclusion> GetOrCreateColumnFeatures(const TString& fingerprint, const TConstructor& constructor) { + auto it = ColumnFeatures.find(fingerprint); + if (it == ColumnFeatures.end()) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_miss")("fp", UrlEscapeRet(fingerprint))( + "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); + TConclusion> resultConclusion = constructor(); + if (resultConclusion.IsFail()) { + return resultConclusion; + } + it = ColumnFeatures.emplace(fingerprint, resultConclusion.DetachResult()).first; + AFL_VERIFY(it->second); + } else { + if (++AcceptionFeaturesCount % 1000 == 0) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_accept")("fp", UrlEscapeRet(fingerprint))( + "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); + } + } + return it->second; + } +}; + /// Column engine index description in terms of tablet's local table. /// We have to use YDB types for keys here. -struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { +struct TIndexInfo: public IIndexInfo { private: - THashMap ColumnFeatures; - THashMap> ArrowColumnByColumnIdCache; + using TColumns = THashMap; + + class TNameInfo { + private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY(ui32, ColumnId, 0); + + public: + TNameInfo(const TString& name, const ui32 columnId) + : Name(name) + , ColumnId(columnId) + { + + } + + bool operator<(const TNameInfo& item) const { + return Name < item.Name; + } + }; + + std::vector ColumnNames; + std::vector PKColumnIds; + std::vector PKColumns; + + std::vector> ColumnFeatures; THashMap Indexes; TIndexInfo(const TString& name); bool SchemeNeedActualization = false; std::shared_ptr CompactionPlannerConstructor; bool ExternalGuaranteeExclusivePK = false; - bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators); - void InitializeCaches(const std::shared_ptr& operators); + bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators, + const std::shared_ptr& cache); + void InitializeCaches(const std::shared_ptr& operators, const THashMap& columns, + const std::shared_ptr& cache, const bool withColumnFeatures = true); + std::shared_ptr BuildDefaultColumnFeatures( + const ui32 columnId, const THashMap& columns, const std::shared_ptr& operators) const; + public: std::shared_ptr GetCompactionPlannerConstructor() const; - - bool IsNullableVerified(const std::string& fName) const { - return IsNullableVerified(GetColumnIdVerified(fName)); + bool IsNullableVerifiedByIndex(const ui32 colIndex) const { + AFL_VERIFY(colIndex < ColumnFeatures.size()); + return ColumnFeatures[colIndex]->GetIsNullable(); } bool IsNullableVerified(const ui32 colId) const { - auto it = Columns.find(colId); - if (it == Columns.end()) { - AFL_VERIFY(IIndexInfo::IsSpecialColumn(colId)); - return IIndexInfo::IsNullableVerified(colId); - } - return !it->second.NotNull; + return GetColumnFeaturesVerified(colId).GetIsNullable(); } std::shared_ptr GetColumnExternalDefaultValueVerified(const std::string& colName) const; std::shared_ptr GetColumnExternalDefaultValueVerified(const ui32 colId) const; + std::shared_ptr GetColumnExternalDefaultValueByIndexVerified(const ui32 colIndex) const; + bool GetExternalGuaranteeExclusivePK() const { return ExternalGuaranteeExclusivePK; } const TColumnFeatures& GetColumnFeaturesVerified(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second; + return *ColumnFeatures[GetColumnIndexVerified(columnId)]; + } + + const std::shared_ptr& GetColumnFeaturesOptional(const ui32 columnId) const { + if (auto idx = GetColumnIndexOptional(columnId)) { + return ColumnFeatures[*idx]; + } else { + return Default>(); + } } NSplitter::TEntityGroups GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const; @@ -95,14 +186,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { result.emplace(portionTierName); } else { for (auto&& i : ColumnFeatures) { - result.emplace(i.second.GetOperator()->GetStorageId()); + result.emplace(i->GetOperator()->GetStorageId()); } } return result; } - std::vector> MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const; - const THashMap& GetIndexes() const { return Indexes; } @@ -117,9 +206,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { if (specialTier && specialTier != IStoragesManager::DefaultStorageId) { return specialTier; } else { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second.GetOperator()->GetStorageId(); + return GetColumnFeaturesVerified(columnId).GetOperator()->GetStorageId(); } } @@ -134,33 +221,48 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { TString DebugString() const { TStringBuilder sb; sb << "(" - << "version=" << Version << ";" - << "name=" << Name << ";" - << ")"; + << "version=" << Version << ";" + << "name=" << Name << ";" + << ")"; for (auto&& i : ColumnFeatures) { - sb << GetColumnName(i.first) << ":" << i.second.DebugString() << ";"; + sb << i->GetColumnName() << ":" << i->DebugString() << ";"; } return sb; } + void SetAllKeys(const std::shared_ptr& operators, const THashMap& columns); + public: static TIndexInfo BuildDefault() { TIndexInfo result("dummy"); return result; } - std::vector> ActualizeColumnData(const std::vector>& source, const TIndexInfo& sourceIndexInfo, const ui32 columnId) const { - auto itCurrent = ColumnFeatures.find(columnId); - auto itPred = sourceIndexInfo.ColumnFeatures.find(columnId); - AFL_VERIFY(itCurrent != ColumnFeatures.end()); - AFL_VERIFY(itPred != sourceIndexInfo.ColumnFeatures.end()); - return itCurrent->second.ActualizeColumnData(source, itPred->second); + static TIndexInfo BuildDefault( + const std::shared_ptr& operators, const TColumns& columns, const std::vector& pkNames) { + TIndexInfo result = BuildDefault(); + for (auto&& i : columns) { + result.ColumnNames.emplace_back(i.second.Name, i.first); + } + std::sort(result.ColumnNames.begin(), result.ColumnNames.end()); + for (auto&& i : pkNames) { + const ui32 columnId = result.GetColumnIdVerified(i); + result.PKColumnIds.emplace_back(columnId); + } + result.SetAllKeys(operators, columns); + return result; } - static std::optional BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators); + std::vector> ActualizeColumnData( + const std::vector>& source, const TIndexInfo& sourceIndexInfo, const ui32 columnId) const { + return GetColumnFeaturesVerified(columnId).ActualizeColumnData(source, sourceIndexInfo.GetColumnFeaturesVerified(columnId)); + } + + static std::optional BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, + const std::shared_ptr& operators, const std::shared_ptr& cache); bool HasColumnId(const ui32 columnId) const { - return ColumnFeatures.contains(columnId); + return !!GetColumnIndexOptional(columnId); } bool HasColumnName(const std::string& columnName) const { @@ -171,6 +273,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return Indexes.contains(indexId); } + std::optional GetColumnIndexOptional(const ui32 id) const; + ui32 GetColumnIndexVerified(const ui32 id) const { + auto result = GetColumnIndexOptional(id); + AFL_VERIFY(result); + return *result; + } std::shared_ptr GetColumnFieldOptional(const ui32 columnId) const; std::shared_ptr GetColumnFieldVerified(const ui32 columnId) const; std::shared_ptr GetColumnSchema(const ui32 columnId) const; @@ -213,6 +321,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { YDB_ACCESSOR_DEF(TStorageData, SecondaryInplaceData); using TPrimaryStorageData = THashMap>>; YDB_ACCESSOR_DEF(TPrimaryStorageData, ExternalData); + public: TSecondaryData() = default; }; @@ -233,14 +342,11 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { std::shared_ptr GetIndexMetaMax(const ui32 columnId) const; std::shared_ptr GetIndexMetaCountMinSketch(const std::set& columnIds) const; - [[nodiscard]] TConclusionStatus AppendIndex(const THashMap>>& originalData, const ui32 indexId, - const std::shared_ptr& operators, TSecondaryData& result) const; + [[nodiscard]] TConclusionStatus AppendIndex(const THashMap>>& originalData, + const ui32 indexId, const std::shared_ptr& operators, TSecondaryData& result) const; /// Returns an id of the column located by name. The name should exists in the schema. ui32 GetColumnIdVerified(const std::string& name) const; - ui32 GetColumnId(const std::string& name) const { - return GetColumnIdVerified(name); - } std::set GetColumnIdsVerified(const std::set& names) const { std::set result; for (auto&& i : names) { @@ -251,7 +357,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { std::optional GetColumnIdOptional(const std::string& name) const; /// Returns a name of the column located by id. - TString GetColumnName(ui32 id, bool required = true) const; + TString GetColumnName(const ui32 id, bool required = true) const; /// Returns names of columns defined by the specific ids. std::vector GetColumnNames(const std::vector& ids) const; @@ -263,44 +369,29 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { } std::vector GetEntityIds() const; - /// Returns info of columns defined by specific ids. - std::vector GetColumns(const std::vector& ids) const; - /// Traditional Primary Key (includes uniqueness, search and sorting logic) - std::vector GetPrimaryKeyColumns() const { - return GetColumns(KeyColumns); + const std::vector& GetPrimaryKeyColumns() const { + return PKColumns; } /// Returns id of the first column of the primary key. ui32 GetPKFirstColumnId() const { - Y_ABORT_UNLESS(KeyColumns.size()); - return KeyColumns[0]; + Y_ABORT_UNLESS(PKColumnIds.size()); + return PKColumnIds[0]; } const std::shared_ptr& GetReplaceKey() const { return PrimaryKey; } const std::shared_ptr& GetPrimaryKey() const { return PrimaryKey; } - /// Initializes sorting, replace, index and extended keys. - void SetAllKeys(const std::shared_ptr& operators); - void CheckTtlColumn(const TString& ttlColumn) const { Y_ABORT_UNLESS(!ttlColumn.empty()); - Y_ABORT_UNLESS(MinMaxIdxColumnsIds.contains(GetColumnId(ttlColumn))); + Y_ABORT_UNLESS(MinMaxIdxColumnsIds.contains(GetColumnIdVerified(ttlColumn))); } std::vector GetColumnIds(const std::vector& columnNames) const; - std::shared_ptr ArrowSchema() const; - std::shared_ptr ArrowSchemaWithSpecials() const; - std::shared_ptr AddColumns(const std::shared_ptr& schema, - const std::vector& columns) const; - - std::shared_ptr ArrowColumnFieldOptional(const ui32 columnId) const; - std::shared_ptr ArrowColumnFieldVerified(const ui32 columnId) const; - - const THashSet& GetRequiredColumns() const { - return RequiredColumns; - } + const std::shared_ptr& ArrowSchema() const; + const std::shared_ptr& ArrowSchemaWithSpecials() const; const THashSet& GetMinMaxIdxColumns() const { return MinMaxIdxColumnsIds; @@ -309,8 +400,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { bool AllowTtlOverColumn(const TString& name) const; /// Returns whether the sorting keys defined. - bool IsSorted() const { return true; } - bool IsSortedColumn(const ui32 columnId) const { return GetPKFirstColumnId() == columnId; } + bool IsSorted() const { + return true; + } + bool IsSortedColumn(const ui32 columnId) const { + return GetPKFirstColumnId() == columnId; + } ui64 GetVersion() const { return Version; @@ -326,18 +421,19 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { TString Name; std::vector SchemaColumnIds; std::vector SchemaColumnIdsWithSpecials; - std::vector PKColumnIds; - std::shared_ptr Schema; - std::shared_ptr SchemaWithSpecials; + std::shared_ptr SchemaWithSpecials; + std::shared_ptr Schema; std::shared_ptr PrimaryKey; - THashSet RequiredColumns; THashSet MinMaxIdxColumnsIds; NArrow::NSerialization::TSerializerContainer DefaultSerializer = NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer(); }; -std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids); +std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache = nullptr); +std::vector> MakeArrowFields(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache = nullptr); /// Extracts columns with the specific ids from the schema. std::vector GetColumns(const NTable::TScheme::TTableSchema& tableSchema, const std::vector& ids); -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp index d16f5fcfb33f..d6a3e9b800e5 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp @@ -401,12 +401,15 @@ class TNormalForm { public: TNormalForm() = default; - bool Add(const NSsa::TAssign& assign) { + bool Add(const NSsa::TAssign& assign, const TProgramContainer& program) { std::vector> argNodes; for (auto&& arg : assign.GetArguments()) { if (arg.IsGenerated()) { auto it = Nodes.find(arg.GetColumnName()); - AFL_VERIFY(it != Nodes.end()); + if (it == Nodes.end()) { + AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("event", "program_arg_is_missing")("program", program.DebugString()); + return false; + } argNodes.emplace_back(it->second); } else { argNodes.emplace_back(std::make_shared(arg.GetColumnName())); @@ -443,7 +446,7 @@ std::shared_ptr TDataForIndexesCheckers::Build(const TP auto fStep = program.GetSteps().front(); TNormalForm nForm; for (auto&& s : fStep->GetAssignes()) { - if (!nForm.Add(s)) { + if (!nForm.Add(s, program)) { return nullptr; } } diff --git a/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h index c65cb1703ad0..1f5411e8e03c 100644 --- a/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h @@ -109,7 +109,7 @@ class TTiering { using TTiersMap = THashMap>; TTiersMap TierByName; TSet OrderedTiers; - TString TTLColumnName; + std::optional TTLColumnName; public: class TTieringContext { @@ -174,9 +174,14 @@ class TTiering { [[nodiscard]] bool Add(const std::shared_ptr& tier) { AFL_VERIFY(tier); if (!TTLColumnName) { + if (tier->GetEvictColumnName().Empty()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "empty_evict_column_name"); + return false; + } TTLColumnName = tier->GetEvictColumnName(); - } else if (TTLColumnName != tier->GetEvictColumnName()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect_tiering_metadata")("column_before", TTLColumnName)("column_new", tier->GetEvictColumnName()); + } else if (*TTLColumnName != tier->GetEvictColumnName()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect_tiering_metadata")("column_before", *TTLColumnName) + ("column_new", tier->GetEvictColumnName()); return false; } @@ -194,13 +199,9 @@ class TTiering { return {}; } - const TString& GetTtlColumn() const { - AFL_VERIFY(TTLColumnName); - return TTLColumnName; - } - const TString& GetEvictColumnName() const { - return TTLColumnName; + AFL_VERIFY(TTLColumnName); + return *TTLColumnName; } TString GetDebugString() const { diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp index d0fd1151d223..c7b8e5cb6a53 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp @@ -35,12 +35,12 @@ TConclusion> ISnapshotSchema::Normali return batch; } } - const std::shared_ptr& resultArrowSchema = GetSchema(); + const std::shared_ptr& resultArrowSchema = GetSchema(); std::shared_ptr result = std::make_shared(batch->GetRecordsCount()); for (size_t i = 0; i < resultArrowSchema->fields().size(); ++i) { auto& resultField = resultArrowSchema->fields()[i]; - auto columnId = GetIndexInfo().GetColumnId(resultField->name()); + auto columnId = GetIndexInfo().GetColumnIdVerified(resultField->name()); auto oldField = dataSchema.GetFieldByColumnIdOptional(columnId); if (oldField) { auto fAccessor = batch->GetAccessorByNameOptional(oldField->name()); @@ -55,9 +55,7 @@ TConclusion> ISnapshotSchema::Normali if (restoreColumnIds.contains(columnId)) { AFL_VERIFY(!!GetExternalDefaultValueVerified(columnId) || GetIndexInfo().IsNullableVerified(columnId))("column_name", GetIndexInfo().GetColumnName(columnId, false))("id", columnId); - result->AddField(resultField, - NArrow::TThreadSimpleArraysCache::Get(resultField->type(), GetExternalDefaultValueVerified(columnId), batch->num_rows())) - .Validate(); + result->AddField(resultField, GetColumnLoaderVerified(columnId)->BuildDefaultAccessor(batch->num_rows())).Validate(); } } return result; @@ -80,16 +78,18 @@ TConclusion> ISnapshotSchema::PrepareForModi return TConclusionStatus::Fail("not valid incoming batch: " + status.ToString()); } - const std::shared_ptr dstSchema = GetIndexInfo().ArrowSchema(); + const std::shared_ptr dstSchema = GetIndexInfo().ArrowSchema(); - auto batch = NArrow::TColumnOperator().SkipIfAbsent().Extract(incomingBatch, dstSchema->field_names()); + auto batch = NArrow::TColumnOperator().SkipIfAbsent().Extract(incomingBatch, dstSchema->fields()); for (auto&& i : batch->schema()->fields()) { - AFL_VERIFY(GetIndexInfo().HasColumnName(i->name())); - if (!dstSchema->GetFieldByName(i->name())->Equals(i)) { - return TConclusionStatus::Fail("not equal field types for column '" + i->name() + "'"); + const ui32 columnId = GetIndexInfo().GetColumnIdVerified(i->name()); + auto fSchema = GetIndexInfo().GetColumnFieldVerified(columnId); + if (!fSchema->Equals(i)) { + return TConclusionStatus::Fail( + "not equal field types for column '" + i->name() + "': " + i->ToString() + " vs " + fSchema->ToString()); } - if (GetIndexInfo().IsNullableVerified(i->name())) { + if (GetIndexInfo().IsNullableVerified(columnId)) { continue; } if (NArrow::HasNulls(batch->GetColumnByName(i->name()))) { @@ -118,18 +118,19 @@ TConclusion> ISnapshotSchema::PrepareForModi switch (mType) { case NEvWrite::EModificationType::Replace: case NEvWrite::EModificationType::Upsert: { - AFL_VERIFY(batch->num_columns() <= dstSchema->num_fields()); + AFL_VERIFY(batch->num_columns() <= dstSchema->num_fields()); if (batch->num_columns() < dstSchema->num_fields()) { - for (auto&& f : dstSchema->fields()) { - if (GetIndexInfo().IsNullableVerified(f->name())) { + for (ui32 idx = 0; idx < (ui32)dstSchema->num_fields(); ++idx) { + if (GetIndexInfo().IsNullableVerifiedByIndex(idx)) { continue; } - if (batch->GetColumnByName(f->name())) { + if (GetIndexInfo().GetColumnExternalDefaultValueByIndexVerified(idx)) { continue; } - if (!GetIndexInfo().GetColumnExternalDefaultValueVerified(f->name())) { - return TConclusionStatus::Fail("empty field for non-default column: '" + f->name() + "'"); + if (batch->GetColumnByName(dstSchema->field(idx)->name())) { + continue; } + return TConclusionStatus::Fail("empty field for non-default column: '" + dstSchema->field(idx)->name() + "'"); } } return batch; @@ -141,6 +142,22 @@ TConclusion> ISnapshotSchema::PrepareForModi } } +void ISnapshotSchema::AdaptBatchToSchema(NArrow::TGeneralContainer& batch, const ISnapshotSchema::TPtr& targetSchema) const { + if (targetSchema->GetVersion() != GetVersion()) { + std::vector columnIdxToDelete; + for (size_t columnIdx = 0; columnIdx < batch.GetSchema()->GetFields().size(); ++columnIdx) { + const std::optional targetColumnId = targetSchema->GetColumnIdOptional(batch.GetSchema()->field(columnIdx)->name()); + const ui32 batchColumnId = GetColumnIdVerified(GetFieldByIndex(columnIdx)->name()); + if (!targetColumnId || *targetColumnId != batchColumnId) { + columnIdxToDelete.emplace_back(columnIdx); + } + } + if (!columnIdxToDelete.empty()) { + batch.DeleteFieldsByIndex(columnIdxToDelete); + } + } +} + ui32 ISnapshotSchema::GetColumnId(const std::string& columnName) const { auto id = GetColumnIdOptional(columnName); AFL_VERIFY(id)("column_name", columnName)("schema", JoinSeq(",", GetSchema()->field_names())); @@ -153,19 +170,19 @@ std::shared_ptr ISnapshotSchema::GetFieldByColumnIdVerified(const return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const ui32 columnId) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const ui32 columnId) const { auto result = GetColumnLoaderOptional(columnId); AFL_VERIFY(result); return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const std::string& columnName) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const std::string& columnName) const { auto result = GetColumnLoaderOptional(columnName); AFL_VERIFY(result); return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderOptional(const std::string& columnName) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderOptional(const std::string& columnName) const { const std::optional id = GetColumnIdOptional(columnName); if (id) { return GetColumnLoaderOptional(*id); @@ -190,8 +207,9 @@ std::vector> ISnapshotSchema::GetAbsentFields(cons TConclusionStatus ISnapshotSchema::CheckColumnsDefault(const std::vector>& fields) const { for (auto&& i : fields) { - auto defaultValue = GetExternalDefaultValueVerified(i->name()); - if (!defaultValue && !GetIndexInfo().IsNullableVerified(i->name())) { + const ui32 colId = GetColumnIdVerified(i->name()); + auto defaultValue = GetExternalDefaultValueVerified(colId); + if (!defaultValue && !GetIndexInfo().IsNullableVerified(colId)) { return TConclusionStatus::Fail("not nullable field with no default: " + i->name()); } } @@ -202,8 +220,9 @@ TConclusion> ISnapshotSchema::BuildDefaultBa const std::vector>& fields, const ui32 rowsCount, const bool force) const { std::vector> columns; for (auto&& i : fields) { - auto defaultValue = GetExternalDefaultValueVerified(i->name()); - if (!defaultValue && !GetIndexInfo().IsNullableVerified(i->name())) { + const ui32 columnId = GetColumnIdVerified(i->name()); + auto defaultValue = GetExternalDefaultValueVerified(columnId); + if (!defaultValue && !GetIndexInfo().IsNullableVerified(columnId)) { if (force) { defaultValue = NArrow::DefaultScalar(i->type()); } else { diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h index a2a4cefd9215..962989d75fb2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h @@ -1,8 +1,8 @@ #pragma once #include -#include -#include +#include +#include #include #include @@ -21,18 +21,19 @@ class ISnapshotSchema { using TPtr = std::shared_ptr; virtual ~ISnapshotSchema() {} - virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; - std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; - std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const; - std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const; + virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; + std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; + std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const; + std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const; bool IsSpecialColumnId(const ui32 columnId) const; + virtual const std::vector& GetColumnIds() const = 0; - virtual TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; - TColumnSaver GetColumnSaver(const TString& columnName) const { + virtual NArrow::NAccessor::TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; + NArrow::NAccessor::TColumnSaver GetColumnSaver(const TString& columnName) const { return GetColumnSaver(GetColumnId(columnName)); } - TColumnSaver GetColumnSaver(const std::string& columnName) const { + NArrow::NAccessor::TColumnSaver GetColumnSaver(const std::string& columnName) const { return GetColumnSaver(TString(columnName.data(), columnName.size())); } @@ -48,6 +49,7 @@ class ISnapshotSchema { std::vector GetPKColumnNames() const; virtual std::optional GetColumnIdOptional(const std::string& columnName) const = 0; + virtual ui32 GetColumnIdVerified(const std::string& columnName) const = 0; virtual int GetFieldIndex(const ui32 columnId) const = 0; bool HasColumnId(const ui32 columnId) const { return GetFieldIndex(columnId) >= 0; @@ -61,7 +63,7 @@ class ISnapshotSchema { TString DebugString() const { return DoDebugString(); } - virtual const std::shared_ptr& GetSchema() const = 0; + virtual const std::shared_ptr& GetSchema() const = 0; virtual const TIndexInfo& GetIndexInfo() const = 0; virtual const TSnapshot& GetSnapshot() const = 0; virtual ui64 GetVersion() const = 0; @@ -75,6 +77,7 @@ class ISnapshotSchema { const ISnapshotSchema& dataSchema, const std::shared_ptr& batch, const std::set& restoreColumnIds) const; [[nodiscard]] TConclusion> PrepareForModification( const std::shared_ptr& incomingBatch, const NEvWrite::EModificationType mType) const; + void AdaptBatchToSchema(NArrow::TGeneralContainer& batch, const ISnapshotSchema::TPtr& targetSchema) const; }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp index 8832e7eb0ec8..a371806b2349 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp @@ -4,66 +4,58 @@ namespace NKikimr::NOlap { -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds) - : TFilteredSnapshotSchema(originalSnapshot, std::set(columnIds.begin(), columnIds.end())) -{} +TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds) + : TFilteredSnapshotSchema(originalSnapshot, std::vector(columnIds.begin(), columnIds.end())) { +} -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds) +TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds) : OriginalSnapshot(originalSnapshot) , ColumnIds(columnIds) { std::vector> schemaFields; - for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { - if (!ColumnIds.contains(OriginalSnapshot->GetIndexInfo().GetColumnId(i->name()))) { - continue; - } - schemaFields.emplace_back(i); - } - Schema = std::make_shared(schemaFields); -} - -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames) - : OriginalSnapshot(originalSnapshot) { - for (auto&& i : columnNames) { - ColumnIds.emplace(OriginalSnapshot->GetColumnId(i)); - } - std::vector> schemaFields; - for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { - if (!columnNames.contains(i->name())) { - continue; - } - schemaFields.emplace_back(i); + for (auto&& i : columnIds) { + IdIntoIndex.emplace(i, schemaFields.size()); + schemaFields.emplace_back(originalSnapshot->GetFieldByColumnIdVerified(i)); } - Schema = std::make_shared(schemaFields); + Schema = std::make_shared(schemaFields); } TColumnSaver TFilteredSnapshotSchema::GetColumnSaver(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + AFL_VERIFY(IdIntoIndex.contains(columnId)); return OriginalSnapshot->GetColumnSaver(columnId); } std::shared_ptr TFilteredSnapshotSchema::GetColumnLoaderOptional(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + AFL_VERIFY(IdIntoIndex.contains(columnId)); return OriginalSnapshot->GetColumnLoaderOptional(columnId); } std::optional TFilteredSnapshotSchema::GetColumnIdOptional(const std::string& columnName) const { - return OriginalSnapshot->GetColumnIdOptional(columnName); + auto result = OriginalSnapshot->GetColumnIdOptional(columnName); + if (!result) { + return result; + } + if (!IdIntoIndex.contains(*result)) { + return std::nullopt; + } + return result; +} + +ui32 TFilteredSnapshotSchema::GetColumnIdVerified(const std::string& columnName) const { + auto result = OriginalSnapshot->GetColumnIdVerified(columnName); + AFL_VERIFY(IdIntoIndex.contains(result)); + return result; } int TFilteredSnapshotSchema::GetFieldIndex(const ui32 columnId) const { - if (!ColumnIds.contains(columnId)) { - return -1; - } - TString columnName = OriginalSnapshot->GetIndexInfo().GetColumnName(columnId, false); - if (!columnName) { + auto it = IdIntoIndex.find(columnId); + if (it == IdIntoIndex.end()) { return -1; } - std::string name(columnName.data(), columnName.size()); - return Schema->GetFieldIndex(name); + return it->second; } -const std::shared_ptr& TFilteredSnapshotSchema::GetSchema() const { +const std::shared_ptr& TFilteredSnapshotSchema::GetSchema() const { return Schema; } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h index 1b515d5bb9cf..8fc82ee6a304 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h @@ -8,21 +8,26 @@ namespace NKikimr::NOlap { class TFilteredSnapshotSchema: public ISnapshotSchema { ISnapshotSchema::TPtr OriginalSnapshot; - std::shared_ptr Schema; - YDB_READONLY_DEF(std::set, ColumnIds); + std::shared_ptr Schema; + std::vector ColumnIds; + THashMap IdIntoIndex; + protected: virtual TString DoDebugString() const override; public: - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds); - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds); - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames); + TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds); + TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds); + virtual const std::vector& GetColumnIds() const override { + return ColumnIds; + } TColumnSaver GetColumnSaver(const ui32 columnId) const override; std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; std::optional GetColumnIdOptional(const std::string& columnName) const override; + ui32 GetColumnIdVerified(const std::string& columnName) const override; int GetFieldIndex(const ui32 columnId) const override; - const std::shared_ptr& GetSchema() const override; + const std::shared_ptr& GetSchema() const override; const TIndexInfo& GetIndexInfo() const override; const TSnapshot& GetSnapshot() const override; ui32 GetColumnsCount() const override; diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp index 1fe6820cf547..05277b7b8967 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp @@ -21,16 +21,15 @@ std::optional TSnapshotSchema::GetColumnIdOptional(const std::string& colu return IndexInfo.GetColumnIdOptional(columnName); } +ui32 TSnapshotSchema::GetColumnIdVerified(const std::string& columnName) const { + return IndexInfo.GetColumnIdVerified(columnName); +} + int TSnapshotSchema::GetFieldIndex(const ui32 columnId) const { - const TString& columnName = IndexInfo.GetColumnName(columnId, false); - if (!columnName) { - return -1; - } - std::string name(columnName.data(), columnName.size()); - return Schema->GetFieldIndex(name); + return IndexInfo.GetColumnIndexOptional(columnId).value_or(-1); } -const std::shared_ptr& TSnapshotSchema::GetSchema() const { +const std::shared_ptr& TSnapshotSchema::GetSchema() const { return Schema; } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h index 539d8f99a02c..5fa3c4ef7551 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h @@ -9,7 +9,7 @@ namespace NKikimr::NOlap { class TSnapshotSchema: public ISnapshotSchema { private: TIndexInfo IndexInfo; - std::shared_ptr Schema; + std::shared_ptr Schema; TSnapshot Snapshot; protected: virtual TString DoDebugString() const override { @@ -23,12 +23,17 @@ class TSnapshotSchema: public ISnapshotSchema { public: TSnapshotSchema(TIndexInfo&& indexInfo, const TSnapshot& snapshot); + virtual const std::vector& GetColumnIds() const override { + return IndexInfo.GetColumnIds(); + } + TColumnSaver GetColumnSaver(const ui32 columnId) const override; std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; std::optional GetColumnIdOptional(const std::string& columnName) const override; + ui32 GetColumnIdVerified(const std::string& columnName) const override; int GetFieldIndex(const ui32 columnId) const override; - const std::shared_ptr& GetSchema() const override; + const std::shared_ptr& GetSchema() const override; const TIndexInfo& GetIndexInfo() const override; const TSnapshot& GetSnapshot() const override; ui32 GetColumnsCount() const override; diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h index 70cf8830b051..fe554a790d8f 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h @@ -91,7 +91,7 @@ class TVersionedIndex { } } Y_ABORT_UNLESS(!Snapshots.empty()); - Y_ABORT_UNLESS(version.IsZero()); +// Y_ABORT_UNLESS(version.IsZero()); return Snapshots.begin()->second; } diff --git a/ydb/core/tx/columnshard/engines/scheme/ya.make b/ydb/core/tx/columnshard/engines/scheme/ya.make index 8e41573bf419..744458ff4dcb 100644 --- a/ydb/core/tx/columnshard/engines/scheme/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/ya.make @@ -20,6 +20,7 @@ PEERDIR( ydb/core/tx/columnshard/engines/scheme/tiering ydb/core/tx/columnshard/engines/scheme/column ydb/core/tx/columnshard/engines/scheme/defaults + ydb/core/formats/arrow/accessor ydb/core/tx/columnshard/blobs_action/abstract ) diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp index 88a8ca36f031..f664eb6afb6a 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp @@ -174,7 +174,7 @@ void TTieringActualizer::DoExtractTasks(TTieringProcessContext& tasksContext, co void TTieringActualizer::Refresh(const std::optional& info, const TAddExternalContext& externalContext) { Tiering = info; if (Tiering) { - TieringColumnId = VersionedIndex.GetLastSchema()->GetColumnId(Tiering->GetTtlColumn()); + TieringColumnId = VersionedIndex.GetLastSchema()->GetColumnId(Tiering->GetEvictColumnName()); } else { TieringColumnId = {}; } diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp index 485802b0e3c1..4a527f913fc7 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp @@ -1,18 +1,20 @@ #include "column.h" -#include +#include namespace NKikimr::NOlap::NChunks { -std::vector> TChunkPreparation::DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { - auto rb = NArrow::TStatusValidator::GetValid(ColumnInfo.GetLoader()->Apply(Data)); +std::vector> TChunkPreparation::DoInternalSplitImpl( + const TColumnSaver& saver, const std::shared_ptr& /*counters*/, const std::vector& splitSizes) const { + auto accessor = ColumnInfo.GetLoader()->ApplyVerified(Data, GetRecordsCountVerified()); + std::vector chunks = accessor->SplitBySizes(saver, Data, splitSizes); - auto chunks = TSimpleSplitter(saver, counters).SplitBySizes(rb, Data, splitSizes); std::vector> newChunks; for (auto&& i : chunks) { - Y_ABORT_UNLESS(i.GetSlicedBatch()->num_columns() == 1); - newChunks.emplace_back(std::make_shared(saver.Apply(i.GetSlicedBatch()), i.GetSlicedBatch()->column(0), TChunkAddress(GetColumnId(), GetChunkIdxOptional().value_or(0)), ColumnInfo)); + newChunks.emplace_back(std::make_shared( + i.GetSerializedData(), i.GetArray(), TChunkAddress(GetColumnId(), GetChunkIdxOptional().value_or(0)), ColumnInfo)); } + return newChunks; } -} +} // namespace NKikimr::NOlap::NChunks diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.h b/ydb/core/tx/columnshard/engines/storage/chunks/column.h index a8c4be1ae3de..9de818c49fb6 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/column.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.h @@ -56,14 +56,14 @@ class TChunkPreparation: public IPortionColumnChunk { AFL_VERIFY(Data.size() == Record.BlobRange.Size || Record.BlobRange.Size == 0)("data", Data.size())("record", Record.BlobRange.Size); } - TChunkPreparation(const TString& data, const std::shared_ptr& column, const TChunkAddress& address, const TSimpleColumnInfo& columnInfo) + TChunkPreparation(const TString& data, const std::shared_ptr& column, const TChunkAddress& address, const TSimpleColumnInfo& columnInfo) : TBase(address.GetColumnId()) , Data(data) , Record(address, column, columnInfo) , ColumnInfo(columnInfo) { - Y_ABORT_UNLESS(column->length()); - First = NArrow::TStatusValidator::GetValid(column->GetScalar(0)); - Last = NArrow::TStatusValidator::GetValid(column->GetScalar(column->length() - 1)); + Y_ABORT_UNLESS(column->GetRecordsCount()); + First = column->GetScalar(0); + Last = column->GetScalar(column->GetRecordsCount() - 1); Record.BlobRange.Size = data.size(); } }; diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp index 79613b5b9798..007dff83e914 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp @@ -9,4 +9,13 @@ void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRan portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); } +std::shared_ptr TPortionIndexChunk::DoCopyWithAnotherBlob( + TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const { + return std::make_shared(GetChunkAddressVerified(), RecordsCount, RawBytes, std::move(data)); +} + +void TPortionIndexChunk::DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const { + portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, GetData())); +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.h b/ydb/core/tx/columnshard/engines/storage/chunks/data.h index 8409243df347..e3f22ae2ed9d 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/data.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.h @@ -36,9 +36,9 @@ class TPortionIndexChunk: public IPortionDataChunk { return nullptr; } virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; - virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const override { - return std::make_shared(GetChunkAddressVerified(), RecordsCount, RawBytes, std::move(data)); - } + virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const override; + virtual void DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const override; + public: TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) : TBase(address.GetColumnId(), address.GetChunkIdx()) diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp deleted file mode 100644 index 9aa56e56eda3..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "null_column.h" - -namespace NKikimr::NOlap::NChunks { - -} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h deleted file mode 100644 index 3b2420f0c223..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h +++ /dev/null @@ -1,60 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace NKikimr::NOlap::NChunks { - -class TDefaultChunkPreparation: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - const std::shared_ptr DefaultValue; - const ui32 RecordsCount; - ui64 RawBytes = 0; - TString Data; -protected: - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, - const std::vector& /*splitSizes*/) const override { - AFL_VERIFY(false); - return {}; - } - virtual const TString& DoGetData() const override { - return Data; - } - virtual ui32 DoGetRecordsCountImpl() const override { - return RecordsCount; - } - virtual ui64 DoGetRawBytesImpl() const override { - return RawBytes; - } - virtual TString DoDebugString() const override { - return TStringBuilder() << "rc=" << RecordsCount << ";data_size=" << Data.size() << ";"; - } - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - AFL_VERIFY(false); - return TSimpleChunkMeta(nullptr, false, false); - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return DefaultValue; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return DefaultValue; - } - -public: - TDefaultChunkPreparation(const ui32 columnId, const ui32 recordsCount, const std::shared_ptr& f, - const std::shared_ptr& defaultValue, const TColumnSaver& saver) - : TBase(columnId) - , DefaultValue(defaultValue) - , RecordsCount(recordsCount) - { - Y_ABORT_UNLESS(RecordsCount); - auto arrowData = NArrow::TThreadSimpleArraysCache::Get(f->type(), defaultValue, RecordsCount); - RawBytes = NArrow::GetArrayDataSize(arrowData); - Data = saver.Apply(arrowData, f); - SetChunkIdx(0); - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/ya.make b/ydb/core/tx/columnshard/engines/storage/chunks/ya.make index d61554bd6f0c..cff5b9f40b53 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/chunks/ya.make @@ -3,7 +3,6 @@ LIBRARY() SRCS( data.cpp column.cpp - null_column.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp index d38851486e3d..ebb2b9acde63 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp @@ -47,10 +47,9 @@ bool TGranuleMeta::ErasePortion(const ui64 portion) { void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr portionAfter, NStorageOptimizer::IOptimizerPlanner::TModificationGuard* modificationGuard) { if (portionAfter) { - PortionsIndex.AddPortion(portionAfter); - PortionInfoGuard.OnNewPortion(portionAfter); if (!portionAfter->HasRemoveSnapshot()) { + PortionsIndex.AddPortion(portionAfter); if (modificationGuard) { modificationGuard->AddPortion(portionAfter); } else { @@ -74,10 +73,9 @@ void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr port void TGranuleMeta::OnBeforeChangePortion(const std::shared_ptr portionBefore) { if (portionBefore) { - PortionsIndex.RemovePortion(portionBefore); - PortionInfoGuard.OnDropPortion(portionBefore); if (!portionBefore->HasRemoveSnapshot()) { + PortionsIndex.RemovePortion(portionBefore); OptimizerPlanner->StartModificationGuard().RemovePortion(portionBefore); ActualizationIndex->RemovePortion(portionBefore); } @@ -138,8 +136,7 @@ TGranuleMeta::TGranuleMeta(const ui64 pathId, const TGranulesStorage& owner, con , PortionInfoGuard(owner.GetCounters().BuildPortionBlobsGuard()) , Stats(owner.GetStats()) , StoragesManager(owner.GetStoragesManager()) - , PortionsIndex(*this) -{ + , PortionsIndex(*this, Counters.GetPortionsIndexCounters()) { NStorageOptimizer::IOptimizerPlannerConstructor::TBuildContext context(PathId, owner.GetStoragesManager(), versionedIndex.GetLastSchema()->GetIndexInfo().GetPrimaryKey()); OptimizerPlanner = versionedIndex.GetLastSchema()->GetIndexInfo().GetCompactionPlannerConstructor()->BuildPlanner(context).DetachResult(); AFL_VERIFY(!!OptimizerPlanner); diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.h b/ydb/core/tx/columnshard/engines/storage/granule/granule.h index c8b3e302f1e7..d79ef50e1883 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.h @@ -21,9 +21,10 @@ class TColumnChunkLoadContext; class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { private: friend class TGranuleMeta; - THashMap ColumnStats; + THashMap ColumnStats; + public: - const THashMap& GetColumnStats() const { + const THashMap& GetColumnStats() const { return ColumnStats; } @@ -231,11 +232,11 @@ class TGranuleMeta: TNonCopyable { } } - std::shared_ptr BuildSerializationStats(ISnapshotSchema::TPtr schema) const { - auto result = std::make_shared(); + std::shared_ptr BuildSerializationStats(ISnapshotSchema::TPtr schema) const { + auto result = std::make_shared(); for (auto&& i : GetAdditiveSummary().GetCompacted().GetColumnStats()) { auto field = schema->GetFieldByColumnIdVerified(i.first); - NOlap::TColumnSerializationStat columnInfo(i.first, field->name()); + NArrow::NSplitter::TColumnSerializationStat columnInfo(i.first, field->name()); columnInfo.Merge(i.second); result->AddStat(columnInfo); } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp index 676d40ea1c48..e56487e5f8ef 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp @@ -11,7 +11,7 @@ TPortionsIndex::TPortionIntervals TPortionsIndex::GetIntervalFeatures(const TPor TPortionIntervals portionExcludeIntervals; while (true) { std::optional nextKey; - for (auto&& p : itFrom->second.GetPortionIds()) { + for (auto&& [p, _] : itFrom->second.GetPortionIds()) { if (skipPortions.contains(p)) { continue; } @@ -55,9 +55,13 @@ void TPortionsIndex::RemovePortion(const std::shared_ptr& p) { auto itTo = Points.find(p->IndexKeyEnd()); AFL_VERIFY(itTo != Points.end()); { + const TPortionInfoStat stat(p); auto it = itFrom; while (true) { - it->second.RemoveContained(p->GetPortionId()); + RemoveFromMemoryUsageControl(it->second.GetIntervalStats()); + it->second.RemoveContained(stat); + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); if (it == itTo) { break; } @@ -67,19 +71,24 @@ void TPortionsIndex::RemovePortion(const std::shared_ptr& p) { if (itFrom != itTo) { itFrom->second.RemoveStart(p); if (itFrom->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itFrom->second.GetIntervalStats()); Points.erase(itFrom); } itTo->second.RemoveFinish(p); if (itTo->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itTo->second.GetIntervalStats()); Points.erase(itTo); } } else { itTo->second.RemoveStart(p); itTo->second.RemoveFinish(p); if (itTo->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itTo->second.GetIntervalStats()); Points.erase(itTo); } } + RawMemoryUsage.FlushCounters(); + BlobMemoryUsage.FlushCounters(); } void TPortionsIndex::AddPortion(const std::shared_ptr& p) { @@ -89,13 +98,19 @@ void TPortionsIndex::AddPortion(const std::shared_ptr& p) { itTo->second.AddFinish(p); auto it = itFrom; + const TPortionInfoStat stat(p); while (true) { - it->second.AddContained(p->GetPortionId()); + RemoveFromMemoryUsageControl(it->second.GetIntervalStats()); + it->second.AddContained(stat); + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); if (it == itTo) { break; } AFL_VERIFY(++it != Points.end()); } + RawMemoryUsage.FlushCounters(); + BlobMemoryUsage.FlushCounters(); } } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h index 09ca2d65e7c0..981943dc4dab 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h @@ -1,4 +1,5 @@ #pragma once +#include #include namespace NKikimr::NOlap { @@ -7,26 +8,74 @@ class TGranuleMeta; namespace NKikimr::NOlap::NGranule::NPortionsIndex { +class TPortionInfoStat { +private: + std::shared_ptr PortionInfo; + YDB_READONLY(ui64, MinRawBytes, 0); + YDB_READONLY(ui64, BlobBytes, 0); + +public: + TPortionInfoStat(const std::shared_ptr& portionInfo) + : PortionInfo(portionInfo) + , MinRawBytes(PortionInfo->GetMinMemoryForReadColumns({})) + , BlobBytes(PortionInfo->GetTotalBlobBytes()) + { + + } + + const TPortionInfo& GetPortionInfoVerified() const { + AFL_VERIFY(PortionInfo); + return *PortionInfo; + } +}; + +class TIntervalInfoStat { +private: + YDB_READONLY(ui64, MinRawBytes, 0); + YDB_READONLY(ui64, BlobBytes, 0); + +public: + void Add(const TPortionInfoStat& source) { + MinRawBytes += source.GetMinRawBytes(); + BlobBytes += source.GetBlobBytes(); + } + + void Sub(const TPortionInfoStat& source) { + AFL_VERIFY(MinRawBytes >= source.GetMinRawBytes()); + MinRawBytes -= source.GetMinRawBytes(); + AFL_VERIFY(BlobBytes >= source.GetBlobBytes()); + BlobBytes -= source.GetBlobBytes(); + AFL_VERIFY(!!BlobBytes == !!MinRawBytes); + } + + bool operator!() const { + return !BlobBytes && !MinRawBytes; + } +}; + class TPortionsPKPoint { private: THashMap> Start; THashMap> Finish; - THashSet PortionIds; + THashMap PortionIds; + YDB_READONLY_DEF(TIntervalInfoStat, IntervalStats); + public: const THashMap>& GetStart() const { return Start; } void ProvidePortions(const TPortionsPKPoint& source) { - for (auto&& i : source.PortionIds) { + IntervalStats = TIntervalInfoStat(); + for (auto&& [i, stat] : source.PortionIds) { if (source.Finish.contains(i)) { continue; } - AFL_VERIFY(PortionIds.emplace(i).second); + AddContained(stat); } } - const THashSet& GetPortionIds() const { + const THashMap& GetPortionIds() const { return PortionIds; } @@ -34,12 +83,19 @@ class TPortionsPKPoint { return Start.empty() && Finish.empty(); } - void AddContained(const ui64 portionId) { - AFL_VERIFY(PortionIds.emplace(portionId).second); + void AddContained(const TPortionInfoStat& stat) { + if (!stat.GetPortionInfoVerified().HasRemoveSnapshot()) { + IntervalStats.Add(stat); + } + AFL_VERIFY(PortionIds.emplace(stat.GetPortionInfoVerified().GetPortionId(), stat).second); } - void RemoveContained(const ui64 portionId) { - AFL_VERIFY(PortionIds.erase(portionId)); + void RemoveContained(const TPortionInfoStat& stat) { + if (!stat.GetPortionInfoVerified().HasRemoveSnapshot()) { + IntervalStats.Sub(stat); + } + AFL_VERIFY(PortionIds.erase(stat.GetPortionInfoVerified().GetPortionId())); + AFL_VERIFY(PortionIds.size() || !IntervalStats); } void RemoveStart(const std::shared_ptr& p) { @@ -61,9 +117,48 @@ class TPortionsPKPoint { } }; +class TIntervalMemoryMonitoring { +private: + std::map CountMemoryUsages; + const NColumnShard::TIntervalMemoryCounters& Counters; + +public: + void Add(const ui64 mem) { + ++CountMemoryUsages[mem]; + } + + void Remove(const ui64 mem) { + auto it = CountMemoryUsages.find(mem); + AFL_VERIFY(it != CountMemoryUsages.end())("mem", mem); + if (!--it->second) { + CountMemoryUsages.erase(it); + } + } + + TIntervalMemoryMonitoring(const NColumnShard::TIntervalMemoryCounters& counters) + : Counters(counters) + { + + } + + ui64 GetMax() const { + if (CountMemoryUsages.size()) { + return CountMemoryUsages.rbegin()->first; + } else { + return 0; + } + } + + void FlushCounters() const { + Counters.MinReadBytes->SetValue(GetMax()); + } +}; + class TPortionsIndex { private: std::map Points; + TIntervalMemoryMonitoring RawMemoryUsage; + TIntervalMemoryMonitoring BlobMemoryUsage; const TGranuleMeta& Owner; std::map::iterator InsertPoint(const NArrow::TReplaceKey& key) { @@ -75,17 +170,34 @@ class TPortionsIndex { --itPred; it->second.ProvidePortions(itPred->second); } + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); } return it; } + void RemoveFromMemoryUsageControl(const TIntervalInfoStat& stat) { + RawMemoryUsage.Remove(stat.GetMinRawBytes()); + BlobMemoryUsage.Remove(stat.GetBlobBytes()); + } + public: - TPortionsIndex(const TGranuleMeta& owner) - : Owner(owner) + TPortionsIndex(const TGranuleMeta& owner, const NColumnShard::TPortionsIndexCounters& counters) + : RawMemoryUsage(counters.RawBytes) + , BlobMemoryUsage(counters.BlobBytes) + , Owner(owner) { } + ui64 GetMinRawMemoryRead() const { + return RawMemoryUsage.GetMax(); + } + + ui64 GetMinBlobMemoryRead() const { + return BlobMemoryUsage.GetMax(); + } + const std::map& GetPoints() const { return Points; } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp index 385f9d818d23..b017464eefeb 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp @@ -8,7 +8,7 @@ std::shared_ptr TGranulesStorage::GetGranuleForCom std::map> granulesSorted; ui32 countChecker = 0; std::optional priorityChecker; - const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetCompactionActualizationLag(TDuration::Seconds(1)); + const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetCompactionActualizationLag(); for (auto&& i : Tables) { NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("path_id", i.first); i.second->ActualizeOptimizer(now, actualizationLag); diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp index 3556cb3d1a9d..553daec4f0fe 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp @@ -16,8 +16,8 @@ TString TIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { { TChunkedColumnReader cReader = *reader.begin(); for (reader.Start(); cReader.IsCorrect(); cReader.ReadNextChunk()) { - auto minMax = NArrow::FindMinMaxPosition(cReader.GetCurrentChunk()); - auto currentScalar = NArrow::GetScalar(cReader.GetCurrentChunk(), minMax.second); + auto currentScalar = cReader.GetCurrentAccessor()->GetMaxScalar(); + AFL_VERIFY(currentScalar); if (!result || NArrow::ScalarCompare(*result, *currentScalar) == -1) { result = currentScalar; } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp index fd21cb5055cb..3f8634cac619 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp @@ -6,16 +6,6 @@ namespace NKikimr::NOlap::NIndexes { -void TPortionIndexChunk::DoAddIntoPortionBeforeBlob( - const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const { - AFL_VERIFY(!bRange.IsValid()); - portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); -} - -void TPortionIndexChunk::DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const { - portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, GetData())); -} - std::shared_ptr TIndexByColumns::DoBuildIndex( const THashMap>>& data, const TIndexInfo& indexInfo) const { AFL_VERIFY(Serializer); diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h index e083c12fa927..5356d5c4302d 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h @@ -1,56 +1,10 @@ #pragma once #include -#include +#include #include namespace NKikimr::NOlap::NIndexes { -class TPortionIndexChunk: public IPortionDataChunk { -private: - using TBase = IPortionDataChunk; - const ui32 RecordsCount; - const ui64 RawBytes; - const TString Data; -protected: - virtual const TString& DoGetData() const override { - return Data; - } - virtual TString DoDebugString() const override { - return ""; - } - virtual std::vector> DoInternalSplit(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { - AFL_VERIFY(false); - return {}; - } - virtual bool DoIsSplittable() const override { - return false; - } - virtual std::optional DoGetRecordsCount() const override { - return RecordsCount; - } - virtual std::optional DoGetRawBytes() const override { - return RawBytes; - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return nullptr; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return nullptr; - } - virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; - virtual void DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const override; - -public: - TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) - : TBase(address.GetColumnId(), address.GetChunkIdx()) - , RecordsCount(recordsCount) - , RawBytes(rawBytes) - , Data(data) - { - } - -}; - class TIndexByColumns: public IIndexMeta { private: using TBase = IIndexMeta; diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp index a36e976ed351..36f467a03133 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp @@ -3,8 +3,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NLBuckets { TDuration GetCommonFreshnessCheckDuration() { - static const TDuration CommonFreshnessCheckDuration = TDuration::Seconds(300); - return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(CommonFreshnessCheckDuration); + return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); } } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h index f83183c04e39..d686fc719112 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h @@ -19,8 +19,6 @@ namespace NKikimr::NOlap::NStorageOptimizer::NLBuckets { -static const ui64 SmallPortionDetectSizeLimit = 1 << 20; - TDuration GetCommonFreshnessCheckDuration(); class TSimplePortionsGroupInfo { @@ -683,7 +681,7 @@ class TPortionsBucket: public TMoveOnly { return; } MainPortion->InitRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized, Others.IsEmpty() && currentInstant > MainPortion->RecordSnapshotMax().GetPlanInstant() + - NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings(TDuration::Minutes(60))); + NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings()); } public: TTaskDescription GetTaskDescription() const { @@ -1104,7 +1102,7 @@ class TPortionBuckets { } void RemovePortion(const std::shared_ptr& portion) { - if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector(SmallPortionDetectSizeLimit)) { + if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector()) { Counters->SmallPortions->RemovePortion(portion); } if (!RemoveBucket(portion)) { @@ -1146,7 +1144,7 @@ class TPortionBuckets { } void AddPortion(const std::shared_ptr& portion, const TInstant now) { - if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector(SmallPortionDetectSizeLimit)) { + if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector()) { Counters->SmallPortions->AddPortion(portion); AddOther(portion, now); return; diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp index baf229b3d1fd..2fe68710d805 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp @@ -4,8 +4,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { TDuration GetCommonFreshnessCheckDuration() { - static const TDuration CommonFreshnessCheckDuration = TDuration::Seconds(300); - return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(CommonFreshnessCheckDuration); + return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); } -} +} // namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp index 9e8d21bb9357..bc007d0fff10 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp @@ -2,12 +2,11 @@ #include #include #include -#include namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { std::shared_ptr TOptimizerPlannerConstructor::BuildLogic() const { - const TDuration freshnessCheckDuration = NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(FreshnessCheckDuration); + const TDuration freshnessCheckDuration = NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); std::shared_ptr logic; if (LogicName == "one_head") { logic = std::make_shared(freshnessCheckDuration); diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h index 8b85a25c0877..cabe72ccc2a9 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h @@ -1,13 +1,16 @@ #pragma once +#include #include #include +#include namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { class TOptimizerPlannerConstructor: public IOptimizerPlannerConstructor { private: YDB_READONLY_DEF(TString, LogicName); - YDB_READONLY(TDuration, FreshnessCheckDuration, TDuration::Seconds(300)); + YDB_READONLY(TDuration, FreshnessCheckDuration, NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration()); + public: static TString GetClassNameStatic() { return "s-buckets"; diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp index 5c70d26a38a3..ec344a674fd7 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp @@ -9,7 +9,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { void TPortionsBucket::RebuildOptimizedFeature(const TInstant currentInstant) const { for (auto&& [_, p] : Portions) { p.MutablePortionInfo().InitRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized, Portions.size() == 1 && currentInstant > p->RecordSnapshotMax().GetPlanInstant() + - NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings(TDuration::Minutes(60)) + NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings() ); } } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp index 465e19e8379b..28d2914ed392 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp @@ -67,7 +67,7 @@ NKikimr::NOlap::NStorageOptimizer::NSBuckets::TCompactionTaskResult TTimeSliceLo NKikimr::NOlap::NStorageOptimizer::NSBuckets::TCalcWeightResult TTimeSliceLogic::DoCalcWeight(const TInstant /*now*/, const TBucketInfo& bucket) const { ui64 size = 0; - ui32 count = 0; + ui64 count = 0; for (auto&& [maxInstant, portions] : bucket.GetSnapshotPortions()) { for (auto&& [_, p] : portions) { if (p.GetTotalBlobBytes() > compactedDetector) { diff --git a/ydb/core/tx/columnshard/engines/ut/helper.cpp b/ydb/core/tx/columnshard/engines/ut/helper.cpp index eee9159edea7..66c67eb672d5 100644 --- a/ydb/core/tx/columnshard/engines/ut/helper.cpp +++ b/ydb/core/tx/columnshard/engines/ut/helper.cpp @@ -1,10 +1,25 @@ #include "helper.h" +#include namespace NKikimr::NOlap::NEngines::NTest { +std::shared_ptr TLocalHelper::GetMetaSchema() { + return std::make_shared(arrow::FieldVector({ std::make_shared("1", arrow::uint64()) })); +} + NKikimrTxColumnShard::TLogicalMetadata TLocalHelper::GetMetaProto() { NKikimrTxColumnShard::TLogicalMetadata result; result.SetDirtyWriteTimeSeconds(TInstant::Now().Seconds()); + + std::vector> columns; + auto schema = GetMetaSchema(); + for (auto&& i : schema->fields()) { + columns.emplace_back(NArrow::TThreadSimpleArraysCache::Get(i->type(), NArrow::DefaultScalar(i->type()), 1)); + } + auto batch = arrow::RecordBatch::Make(schema, 1, columns); + + NArrow::TFirstLastSpecialKeys flKeys = NArrow::TFirstLastSpecialKeys(batch); + result.SetSpecialKeysPayloadData(flKeys.SerializePayloadToString()); return result; } diff --git a/ydb/core/tx/columnshard/engines/ut/helper.h b/ydb/core/tx/columnshard/engines/ut/helper.h index 8e17730a21c6..c7072c13af91 100644 --- a/ydb/core/tx/columnshard/engines/ut/helper.h +++ b/ydb/core/tx/columnshard/engines/ut/helper.h @@ -6,6 +6,7 @@ namespace NKikimr::NOlap::NEngines::NTest { class TLocalHelper { public: static NKikimrTxColumnShard::TLogicalMetadata GetMetaProto(); + static std::shared_ptr GetMetaSchema(); }; }; \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp b/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp index 946b657400ca..1fa189536128 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp @@ -18,13 +18,13 @@ class TTestInsertTableDB : public IDbWrapper { public: void Insert(const TInsertedData&) override { } - void Commit(const TInsertedData&) override { + void Commit(const TCommittedData&) override { } void Abort(const TInsertedData&) override { } void EraseInserted(const TInsertedData&) override { } - void EraseCommitted(const TInsertedData&) override { + void EraseCommitted(const TCommittedData&) override { } void EraseAborted(const TInsertedData&) override { } @@ -73,7 +73,7 @@ class TTestInsertTableDB : public IDbWrapper { Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) { Y_UNIT_TEST(TestInsertCommit) { - ui64 writeId = 0; + TInsertWriteId writeId = (TInsertWriteId)0; ui64 tableId = 0; TString dedupId = "0"; TUnifiedBlobId blobId1(2222, 1, 1, 100, 2, 0, 1); @@ -81,47 +81,49 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) { TTestInsertTableDB dbTable; TInsertTable insertTable; ui64 indexSnapshot = 0; - + // insert, not commited - bool ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId1, TLocalHelper::GetMetaProto(), indexSnapshot, {})); + auto userData1 = std::make_shared(tableId, TBlobRange(blobId1), TLocalHelper::GetMetaProto(), indexSnapshot, std::nullopt); + bool ok = insertTable.Insert(dbTable, TInsertedData(writeId, userData1)); UNIT_ASSERT(ok); // insert the same blobId1 again - ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId1, TLocalHelper::GetMetaProto(), indexSnapshot, {})); + auto userData2 = std::make_shared(tableId, TBlobRange(blobId1), TLocalHelper::GetMetaProto(), indexSnapshot, std::nullopt); + ok = insertTable.Insert(dbTable, TInsertedData(writeId, userData2)); UNIT_ASSERT(!ok); // insert different blodId with the same writeId and dedupId TUnifiedBlobId blobId2(2222, 1, 2, 100, 2, 0, 1); - ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId2, TLocalHelper::GetMetaProto(), indexSnapshot, {})); + auto userData3 = std::make_shared(tableId, TBlobRange(blobId2), TLocalHelper::GetMetaProto(), indexSnapshot, std::nullopt); + ok = insertTable.Insert(dbTable, TInsertedData(writeId, userData3)); UNIT_ASSERT(!ok); // read nothing - auto blobs = insertTable.Read(tableId, TSnapshot::Zero(), nullptr); + auto blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); // commit ui64 planStep = 100; ui64 txId = 42; - insertTable.Commit(dbTable, planStep, txId, {TWriteId{writeId}}, [](ui64) { + insertTable.Commit(dbTable, planStep, txId, { writeId }, [](ui64) { return true; }); - - UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().size(), 1); - UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().begin()->second.size(), 1); - UNIT_ASSERT_EQUAL((*insertTable.GetPathPriorities().begin()->second.begin())->GetCommitted().size(), 1); +// UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().size(), 1); +// UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().begin()->second.size(), 1); +// UNIT_ASSERT_EQUAL((*insertTable.GetPathPriorities().begin()->second.begin())->GetCommitted().size(), 1); // read old snapshot - blobs = insertTable.Read(tableId, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); // read new snapshot - blobs = insertTable.Read(tableId, TSnapshot(planStep, txId), nullptr); + blobs = insertTable.Read(tableId, {}, TSnapshot(planStep, txId), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 1); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); } } diff --git a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp index 10a1891c4102..ecde3aa56673 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp @@ -48,27 +48,27 @@ class TTestDbWrapper : public IDbWrapper { } void Insert(const TInsertedData& data) override { - Inserted.emplace(TWriteId{data.WriteTxId}, data); + Inserted.emplace(data.GetInsertWriteId(), data); } - void Commit(const TInsertedData& data) override { - Committed[data.PathId].emplace(data); + void Commit(const TCommittedData& data) override { + Committed[data.GetPathId()].emplace(data); } void Abort(const TInsertedData& data) override { - Aborted.emplace(TWriteId{data.WriteTxId}, data); + Aborted.emplace(data.GetInsertWriteId(), data); } void EraseInserted(const TInsertedData& data) override { - Inserted.erase(TWriteId{data.WriteTxId}); + Inserted.erase(data.GetInsertWriteId()); } - void EraseCommitted(const TInsertedData& data) override { - Committed[data.PathId].erase(data); + void EraseCommitted(const TCommittedData& data) override { + Committed[data.GetPathId()].erase(data); } void EraseAborted(const TInsertedData& data) override { - Aborted.erase(TWriteId{data.WriteTxId}); + Aborted.erase(data.GetInsertWriteId()); } bool Load(TInsertTableAccessor& accessor, @@ -189,9 +189,9 @@ class TTestDbWrapper : public IDbWrapper { } private: - THashMap Inserted; - THashMap> Committed; - THashMap Aborted; + THashMap Inserted; + THashMap> Committed; + THashMap Aborted; THashMap Indices; }; @@ -294,12 +294,8 @@ void AddIdsToBlobs(std::vector& portions, NBlo } } -bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, std::vector&& dataToIndex, +bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, std::vector&& dataToIndex, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { - for (ui32 i = 0; i < dataToIndex.size(); ++i) { - // Commited data always has nonzero planstep (for WriteLoadRead tests) - dataToIndex[i].PlanStep = i + 1; - }; std::shared_ptr changes = engine.StartInsert(std::move(dataToIndex)); if (!changes) { return false; @@ -433,6 +429,7 @@ std::shared_ptr CommonStoragesManager = Initia Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { void WriteLoadRead(const std::vector& ydbSchema, const std::vector& key) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); @@ -446,16 +443,16 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] // load - TSnapshot indexSnaphot(1, 1); - TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnaphot, TIndexInfo(tableInfo)); + TSnapshot indexSnapshot(1, 1); + TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnapshot, TIndexInfo(tableInfo)); for (auto&& i : paths) { engine.RegisterTable(i); } engine.Load(db); - std::vector dataToIndex = { - TInsertedData(2, paths[0], "", blobRanges[0].BlobId, TLocalHelper::GetMetaProto(), 0, {}), - TInsertedData(1, paths[0], "", blobRanges[1].BlobId, TLocalHelper::GetMetaProto(), 0, {}) + std::vector dataToIndex = { + TCommittedData(TUserData::Build(paths[0], blobRanges[0], TLocalHelper::GetMetaProto(), 0, {}), TSnapshot(1, 2), (TInsertWriteId)2), + TCommittedData(TUserData::Build(paths[0], blobRanges[1], TLocalHelper::GetMetaProto(), 0, {}), TSnapshot(2, 1), (TInsertWriteId)1) }; // write @@ -473,12 +470,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // selects auto lastSchema = engine.GetVersionedIndex().GetLastSchema(); - UNIT_ASSERT_EQUAL(lastSchema->GetSnapshot(), indexSnaphot); + UNIT_ASSERT_EQUAL(lastSchema->GetSnapshot(), indexSnapshot); const TIndexInfo& indexInfo = lastSchema->GetIndexInfo(); - THashSet oneColumnId = { indexInfo.GetColumnId(testColumns[0].GetName()) }; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(testColumns[0].GetName()) }; THashSet columnIds; for (auto& c : testColumns) { - columnIds.insert(indexInfo.GetColumnId(c.GetName())); + columnIds.insert(indexInfo.GetColumnIdVerified(c.GetName())); } { // select from snap before insert @@ -528,6 +525,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { void ReadWithPredicates(const std::vector& ydbSchema, const std::vector& key) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); @@ -552,11 +550,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(str1)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; + std::vector dataToIndex; + TSnapshot ss(planStep, txId); dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, (TInsertWriteId)txId)); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); } @@ -573,7 +572,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { planStep = 3; const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = { indexInfo.GetColumnId(key[0].GetName()) }; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(key[0].GetName()) }; { // full scan ui64 txId = 1; @@ -590,7 +589,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { gt10k = MakeStrPredicate("10000", NArrow::EOperation::Greater); } NOlap::TPKRangesFilter pkFilter(false); - Y_ABORT_UNLESS(pkFilter.Add(gt10k, nullptr, nullptr)); + Y_ABORT_UNLESS(pkFilter.Add(gt10k, nullptr, indexInfo.GetReplaceKey())); auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 10); } @@ -602,7 +601,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { lt10k = MakeStrPredicate("08999", NArrow::EOperation::Less); } NOlap::TPKRangesFilter pkFilter(false); - Y_ABORT_UNLESS(pkFilter.Add(nullptr, lt10k, nullptr)); + Y_ABORT_UNLESS(pkFilter.Add(nullptr, lt10k, indexInfo.GetReplaceKey())); auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 9); } @@ -624,6 +623,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } Y_UNIT_TEST(IndexWriteOverload) { + TTestBasicRuntime runtime; TTestDbWrapper db; auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TIndexInfo tableInfo = NColumnShard::BuildTableInfo(testColumns, testKey);; @@ -649,11 +649,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(testBlob)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; - dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + std::vector dataToIndex; + TSnapshot ss(planStep, txId); + dataToIndex.push_back(TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, (TInsertWriteId)txId)); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); blobsAll.Merge(std::move(blobs)); UNIT_ASSERT(ok); } @@ -680,11 +680,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(testBlob)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; - dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + std::vector dataToIndex; + TSnapshot ss(planStep, txId); + dataToIndex.push_back(TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, TInsertWriteId(txId))); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); } @@ -696,10 +696,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } Y_UNIT_TEST(IndexTtl) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(testColumns, testKey); auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - csDefaultControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csDefaultControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); ui64 pathId = 1; ui32 step = 1000; @@ -726,11 +727,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(str1)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; + TSnapshot ss(planStep, txId); + std::vector dataToIndex; dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, TInsertWriteId(txId))); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); blobStartTs += blobTsRange; if (txId == txCount / 2) { @@ -750,7 +752,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { planStep = 3; const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].GetName())}; + THashSet oneColumnId = {indexInfo.GetColumnIdVerified(testColumns[0].GetName())}; { // full scan ui64 txId = 1; @@ -790,7 +792,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { engine.Load(db); const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].GetName())}; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(testColumns[0].GetName()) }; { // full scan ui64 txId = 1; diff --git a/ydb/core/tx/columnshard/engines/ut/ut_program.cpp b/ydb/core/tx/columnshard/engines/ut/ut_program.cpp index 798ba6ec505d..f957cfea5592 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_program.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_program.cpp @@ -852,4 +852,64 @@ Y_UNIT_TEST_SUITE(TestProgram) { auto expected = result.BuildArrow(); UNIT_ASSERT_VALUES_EQUAL(batch->ToString(), expected->ToString()); } + + Y_UNIT_TEST(CountWithNulls) { + TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); + ; + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); + + NKikimrSSA::TProgram programProto; + { + auto* command = programProto.AddCommand(); + auto* functionProto = command->MutableAssign()->MutableFunction(); + auto* column = command->MutableAssign()->MutableColumn(); + column->SetName("0"); + auto* funcArg = functionProto->AddArguments(); + funcArg->SetName("uid"); + functionProto->SetId(NKikimrSSA::TProgram::TAssignment::EFunction::TProgram_TAssignment_EFunction_FUNC_IS_NULL); + } + { + auto* command = programProto.AddCommand(); + auto* filter = command->MutableFilter(); + auto* predicate = filter->MutablePredicate(); + predicate->SetName("0"); + } + { + auto* command = programProto.AddCommand(); + auto* groupBy = command->MutableGroupBy(); + auto* aggregate = groupBy->AddAggregates(); + aggregate->MutableFunction()->SetId(static_cast(NArrow::EAggregate::Count)); + aggregate->MutableColumn()->SetName("1"); + } + { + auto* command = programProto.AddCommand(); + auto* projectionProto = command->MutableProjection(); + auto* column = projectionProto->AddColumns(); + column->SetName("1"); + } + const auto programSerialized = SerializeProgram(programProto); + + TProgramContainer program; + TString errors; + UNIT_ASSERT_C( + program.Init(columnResolver, NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS, programSerialized, errors), + errors); + + TTableUpdatesBuilder updates(NArrow::MakeArrowSchema({ std::make_pair("uid", TTypeInfo(NTypeIds::Utf8)) })); + updates.AddRow().Add("a"); + updates.AddRow().AddNull(); + updates.AddRow().Add("bbb"); + updates.AddRow().AddNull(); + updates.AddRow().AddNull(); + + auto batch = updates.BuildArrow(); + auto res = program.ApplyProgram(batch); + UNIT_ASSERT_C(res.ok(), res.ToString()); + + TTableUpdatesBuilder result(NArrow::MakeArrowSchema({ std::make_pair("1", TTypeInfo(NTypeIds::Uint64)) })); + result.AddRow().Add(3); + + auto expected = result.BuildArrow(); + UNIT_ASSERT_VALUES_EQUAL(batch->ToString(), expected->ToString()); + } } diff --git a/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp b/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp index cb243ef6c33a..eeb6242103d1 100644 --- a/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp @@ -48,7 +48,8 @@ void TActor::Handle(TEvAddInsertedDataToBuffer::TPtr& ev) { auto* evBase = ev->Get(); AFL_VERIFY(evBase->GetWriteData()->GetBlobsAction()->GetStorageId() == NOlap::IStoragesManager::DefaultStorageId); SumSize += evBase->GetWriteData()->GetSize(); - Aggregations.emplace_back(std::make_shared(*evBase->GetWriteData(), std::move(evBase->MutableBlobsToWrite()))); + Aggregations.emplace_back( + std::make_shared(*evBase->GetWriteData(), std::move(evBase->MutableBlobsToWrite()), evBase->GetRecordBatch())); if (SumSize > 4 * 1024 * 1024 || Aggregations.size() > 750 || !FlushDuration) { Flush(); } diff --git a/ydb/core/tx/columnshard/engines/writer/buffer/events.h b/ydb/core/tx/columnshard/engines/writer/buffer/events.h index ee750ad69bcf..d2a4b4453b04 100644 --- a/ydb/core/tx/columnshard/engines/writer/buffer/events.h +++ b/ydb/core/tx/columnshard/engines/writer/buffer/events.h @@ -11,11 +11,15 @@ namespace NKikimr::NColumnShard::NWriting { class TEvAddInsertedDataToBuffer: public NActors::TEventLocal { private: YDB_READONLY_DEF(std::shared_ptr, WriteData); + YDB_READONLY_DEF(std::shared_ptr, RecordBatch); YDB_ACCESSOR_DEF(std::vector, BlobsToWrite); + public: - explicit TEvAddInsertedDataToBuffer(const std::shared_ptr& writeData, std::vector&& blobs) + explicit TEvAddInsertedDataToBuffer(const std::shared_ptr& writeData, std::vector&& blobs, + const std::shared_ptr& recordBatch) : WriteData(writeData) + , RecordBatch(recordBatch) , BlobsToWrite(blobs) { } diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h index 22b6ee9b897d..92e59e9b197c 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h @@ -89,12 +89,19 @@ class TWriteAggregation { NEvWrite::TWriteMeta WriteMeta; YDB_READONLY(ui64, SchemaVersion, 0); YDB_READONLY(ui64, Size, 0); + YDB_READONLY(ui64, Rows, 0); YDB_ACCESSOR_DEF(std::vector, SplittedBlobs); - YDB_READONLY_DEF(TVector, WriteIds); + YDB_READONLY_DEF(TVector, InsertWriteIds); YDB_READONLY_DEF(std::shared_ptr, BlobsAction); YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); + std::shared_ptr RecordBatch; public: + const std::shared_ptr& GetRecordBatch() const { + AFL_VERIFY(RecordBatch); + return RecordBatch; + } + const NEvWrite::TWriteMeta& GetWriteMeta() const { return WriteMeta; } @@ -103,20 +110,24 @@ class TWriteAggregation { return WriteMeta; } - void AddWriteId(const TWriteId& id) { - WriteIds.emplace_back(id); + void AddInsertWriteId(const TInsertWriteId id) { + InsertWriteIds.emplace_back(id); } - TWriteAggregation(const NEvWrite::TWriteData& writeData, std::vector&& splittedBlobs) + TWriteAggregation(const NEvWrite::TWriteData& writeData, std::vector&& splittedBlobs, const std::shared_ptr& batch) : WriteMeta(writeData.GetWriteMeta()) , SchemaVersion(writeData.GetData()->GetSchemaVersion()) , Size(writeData.GetSize()) , BlobsAction(writeData.GetBlobsAction()) , SchemaSubset(writeData.GetSchemaSubsetVerified()) + , RecordBatch(batch) { for (auto&& s : splittedBlobs) { SplittedBlobs.emplace_back(std::move(s), *this); } + for (const auto& batch : SplittedBlobs) { + Rows += batch->GetRowsCount(); + } } TWriteAggregation(const NEvWrite::TWriteData& writeData) diff --git a/ydb/core/tx/columnshard/engines/ya.make b/ydb/core/tx/columnshard/engines/ya.make index 4772008f14f1..66b72ec25122 100644 --- a/ydb/core/tx/columnshard/engines/ya.make +++ b/ydb/core/tx/columnshard/engines/ya.make @@ -13,6 +13,7 @@ SRCS( filter.cpp portion_info.cpp tier_info.cpp + defs.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp b/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp index a4d916545eac..b275e17f2fdb 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp @@ -1,4 +1,26 @@ #include "abstract.h" +#include + namespace NKikimr::NYDBTest { + +TDuration ICSController::GetGuaranteeIndexationInterval() const { + const TDuration defaultValue = NColumnShard::TSettings::GuaranteeIndexationInterval; + return DoGetGuaranteeIndexationInterval(defaultValue); +} + +TDuration ICSController::GetPeriodicWakeupActivationPeriod() const { + const TDuration defaultValue = NColumnShard::TSettings::DefaultPeriodicWakeupActivationPeriod; + return DoGetPeriodicWakeupActivationPeriod(defaultValue); +} + +TDuration ICSController::GetStatsReportInterval() const { + const TDuration defaultValue = NColumnShard::TSettings::DefaultStatsReportInterval; + return DoGetStatsReportInterval(defaultValue); +} + +ui64 ICSController::GetGuaranteeIndexationStartBytesLimit() const { + const ui64 defaultValue = NColumnShard::TSettings::GuaranteeIndexationStartBytesLimit; + return DoGetGuaranteeIndexationStartBytesLimit(defaultValue); +} } diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.h b/ydb/core/tx/columnshard/hooks/abstract/abstract.h index c231ce94496c..c104590235d3 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.h +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.h @@ -1,21 +1,24 @@ #pragma once #include +#include #include - -#include #include +#include #include -#include -#include +#include + #include +#include +#include + #include namespace NKikimr::NColumnShard { class TTiersManager; class TColumnShard; -} +} // namespace NKikimr::NColumnShard namespace NKikimr::NOlap { class TColumnEngineChanges; @@ -24,7 +27,7 @@ class TPortionInfo; namespace NIndexes { class TIndexMetaContainer; } -} +} // namespace NKikimr::NOlap namespace arrow { class RecordBatch; } @@ -41,7 +44,8 @@ class ILocalDBModifier { public: using TPtr = std::shared_ptr; - virtual ~ILocalDBModifier() {} + virtual ~ILocalDBModifier() { + } virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const = 0; }; @@ -55,6 +59,7 @@ class ICSController { Cleanup, GC }; + protected: virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& /*shard*/) { return; @@ -77,10 +82,75 @@ class ICSController { } virtual void DoOnDataSharingFinished(const ui64 /*tabletId*/, const TString& /*sessionId*/) { } - virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString & /*sessionId*/) { + virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString& /*sessionId*/) { + } + + virtual TDuration DoGetPingCheckPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetOverridenGCPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetCompactionActualizationLag(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetActualizationTasksLag(const TDuration defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetReduceMemoryIntervalLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetRejectMemoryIntervalLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetReadSequentiallyBufferSize(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetReadTimeoutClean(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetGuaranteeIndexationInterval(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetStatsReportInterval(const TDuration defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration defaultValue) const { + return defaultValue; + } + +private: + inline static const NKikimrConfig::TColumnShardConfig DefaultConfig = {}; + + static const NKikimrConfig::TColumnShardConfig& GetConfig() { + if (HasAppData()) { + return AppDataVerified().ColumnShardConfig; + } + return DefaultConfig; } public: + virtual void OnRequestTracingChanges( + const std::set& /*snapshotsToSave*/, const std::set& /*snapshotsToRemove*/) { + } + + TDuration GetPingCheckPeriod() const { + const TDuration defaultValue = 0.6 * GetReadTimeoutClean(); + return DoGetPingCheckPeriod(defaultValue); + } + virtual bool IsBackgroundEnabled(const EBackground /*id*/) const { return true; } @@ -88,53 +158,51 @@ class ICSController { using TPtr = std::shared_ptr; virtual ~ICSController() = default; - virtual TDuration GetOverridenGCPeriod(const TDuration def) const { - return def; + TDuration GetOverridenGCPeriod() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetGCIntervalMs()); + return DoGetOverridenGCPeriod(defaultValue); } virtual void OnSelectShardingFilter() { - } - virtual TDuration GetCompactionActualizationLag(const TDuration def) const { - return def; + TDuration GetCompactionActualizationLag() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetCompactionActualizationLagMs()); + return DoGetCompactionActualizationLag(defaultValue); } - virtual NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction(const NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& /*actions*/) const { + virtual NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction( + const NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& /*actions*/) const { return original; } - virtual TDuration GetRemovedPortionLivetime(const TDuration def) const { - return def; + TDuration GetActualizationTasksLag() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetActualizationTasksLagMs()); + return DoGetActualizationTasksLag(defaultValue); } - virtual TDuration GetActualizationTasksLag(const TDuration d) const { - return d; + ui64 GetReduceMemoryIntervalLimit() const { + const ui64 defaultValue = NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit; + return DoGetReduceMemoryIntervalLimit(defaultValue); } - - virtual ui64 GetReduceMemoryIntervalLimit(const ui64 def) const { - return def; - } - virtual ui64 GetRejectMemoryIntervalLimit(const ui64 def) const { - return def; + ui64 GetRejectMemoryIntervalLimit() const { + const ui64 defaultValue = NOlap::TGlobalLimits::DefaultRejectMemoryIntervalLimit; + return DoGetRejectMemoryIntervalLimit(defaultValue); } virtual bool NeedForceCompactionBacketsConstruction() const { return false; } - virtual ui64 GetSmallPortionSizeDetector(const ui64 def) const { - return def; + ui64 GetSmallPortionSizeDetector() const { + const ui64 defaultValue = GetConfig().GetSmallPortionDetectSizeLimit(); + return DoGetSmallPortionSizeDetector(defaultValue); } virtual void OnExportFinished() { - } virtual void OnActualizationRefreshScheme() { - } virtual void OnActualizationRefreshTiering() { - } virtual void AddPortionForActualizer(const i32 /*portionsCount*/) { - } void OnDataSharingFinished(const ui64 tabletId, const TString& sessionId) { @@ -144,16 +212,15 @@ class ICSController { return DoOnDataSharingStarted(tabletId, sessionId); } virtual void OnStatisticsUsage(const NOlap::NIndexes::TIndexMetaContainer& /*statOperator*/) { - } virtual void OnPortionActualization(const NOlap::TPortionInfo& /*info*/) { - } virtual void OnMaxValueUsage() { } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration def) const { - return def; + virtual TDuration GetLagForCompactionBeforeTierings() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetLagForCompactionBeforeTieringsMs()); + return DoGetLagForCompactionBeforeTierings(defaultValue); } void OnTabletInitCompleted(const NColumnShard::TColumnShard& shard) { @@ -182,29 +249,20 @@ class ICSController { } virtual void OnIndexSelectProcessed(const std::optional /*result*/) { } - virtual TDuration GetReadTimeoutClean(const TDuration def) { - return def; + TDuration GetReadTimeoutClean() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetMaxReadStaleness_ms()); + return DoGetReadTimeoutClean(defaultValue); } virtual EOptimizerCompactionWeightControl GetCompactionControl() const { return EOptimizerCompactionWeightControl::Force; } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetGuaranteeIndexationInterval(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetStatsReportInterval(const TDuration defaultValue) const { - return defaultValue; - } - virtual ui64 GetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const { - return defaultValue; - } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const { - return defaultValue; + TDuration GetGuaranteeIndexationInterval() const; + TDuration GetPeriodicWakeupActivationPeriod() const; + TDuration GetStatsReportInterval() const; + ui64 GetGuaranteeIndexationStartBytesLimit() const; + TDuration GetOptimizerFreshnessCheckDuration() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetOptimizerFreshnessCheckDurationMs()); + return DoGetOptimizerFreshnessCheckDuration(defaultValue); } virtual void OnTieringModified(const std::shared_ptr& /*tiers*/) { @@ -215,7 +273,8 @@ class ICSController { } virtual NMetadata::NFetcher::ISnapshot::TPtr GetFallbackTiersSnapshot() const { - static std::shared_ptr result = std::make_shared(TInstant::Now()); + static std::shared_ptr result = + std::make_shared(TInstant::Now()); return result; } @@ -231,15 +290,16 @@ class ICSController { class TControllers { private: ICSController::TPtr CSController = std::make_shared(); + public: template class TGuard: TNonCopyable { private: std::shared_ptr Controller; + public: TGuard(std::shared_ptr controller) - : Controller(controller) - { + : Controller(controller) { Y_ABORT_UNLESS(Controller); } @@ -270,4 +330,4 @@ class TControllers { } }; -} +} // namespace NKikimr::NYDBTest diff --git a/ydb/core/tx/columnshard/hooks/testing/controller.h b/ydb/core/tx/columnshard/hooks/testing/controller.h index bb011d168de4..a8e259877fd0 100644 --- a/ydb/core/tx/columnshard/hooks/testing/controller.h +++ b/ydb/core/tx/columnshard/hooks/testing/controller.h @@ -12,20 +12,21 @@ namespace NKikimr::NYDBTest::NColumnShard { class TController: public TReadOnlyController { private: using TBase = TReadOnlyController; - YDB_ACCESSOR_DEF(std::optional, LagForCompactionBeforeTierings); - YDB_ACCESSOR(std::optional, GuaranteeIndexationInterval, TDuration::Zero()); - YDB_ACCESSOR(std::optional, PeriodicWakeupActivationPeriod, std::nullopt); - YDB_ACCESSOR(std::optional, StatsReportInterval, std::nullopt); - YDB_ACCESSOR(std::optional, GuaranteeIndexationStartBytesLimit, 0); - YDB_ACCESSOR(std::optional, OptimizerFreshnessCheckDuration, TDuration::Zero()); - YDB_ACCESSOR_DEF(std::optional, CompactionActualizationLag); - YDB_ACCESSOR_DEF(std::optional, TasksActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideRequestsTracePingCheckPeriod); + YDB_ACCESSOR_DEF(std::optional, OverrideLagForCompactionBeforeTierings); + YDB_ACCESSOR(std::optional, OverrideGuaranteeIndexationInterval, TDuration::Zero()); + YDB_ACCESSOR(std::optional, OverridePeriodicWakeupActivationPeriod, std::nullopt); + YDB_ACCESSOR(std::optional, OverrideStatsReportInterval, std::nullopt); + YDB_ACCESSOR(std::optional, OverrideGuaranteeIndexationStartBytesLimit, 0); + YDB_ACCESSOR(std::optional, OverrideOptimizerFreshnessCheckDuration, TDuration::Zero()); + YDB_ACCESSOR_DEF(std::optional, OverrideCompactionActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideTasksActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideReadTimeoutClean); EOptimizerCompactionWeightControl CompactionControl = EOptimizerCompactionWeightControl::Force; YDB_ACCESSOR(std::optional, OverrideReduceMemoryIntervalLimit, 1024); YDB_ACCESSOR_DEF(std::optional, OverrideRejectMemoryIntervalLimit); - std::optional ReadTimeoutClean; std::optional ExpectedShardsCount; THashMap ShardActuals; @@ -129,12 +130,16 @@ class TController: public TReadOnlyController { THashSet SharingIds; protected: virtual ::NKikimr::NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction(const ::NKikimr::NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& actions) const override; - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration def) const override { - return LagForCompactionBeforeTierings.value_or(def); + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration def) const override { + return OverrideLagForCompactionBeforeTierings.value_or(def); } - virtual TDuration GetCompactionActualizationLag(const TDuration def) const override { - return CompactionActualizationLag.value_or(def); + virtual TDuration DoGetPingCheckPeriod(const TDuration def) const override { + return OverrideRequestsTracePingCheckPeriod.value_or(def); + } + + virtual TDuration DoGetCompactionActualizationLag(const TDuration def) const override { + return OverrideCompactionActualizationLag.value_or(def); } @@ -143,8 +148,8 @@ class TController: public TReadOnlyController { return !DisabledBackgrounds.contains(id); } - virtual TDuration GetActualizationTasksLag(const TDuration d) const override { - return TasksActualizationLag.value_or(d); + virtual TDuration DoGetActualizationTasksLag(const TDuration d) const override { + return OverrideTasksActualizationLag.value_or(d); } virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& shard) override; @@ -152,23 +157,29 @@ class TController: public TReadOnlyController { virtual void DoOnAfterGCAction(const ::NKikimr::NColumnShard::TColumnShard& shard, const NOlap::IBlobsGCAction& action) override; virtual bool DoOnWriteIndexComplete(const NOlap::TColumnEngineChanges& changes, const ::NKikimr::NColumnShard::TColumnShard& shard) override; - virtual TDuration GetGuaranteeIndexationInterval(const TDuration defaultValue) const override { - return GuaranteeIndexationInterval.value_or(defaultValue); + virtual TDuration DoGetGuaranteeIndexationInterval(const TDuration defaultValue) const override { + return OverrideGuaranteeIndexationInterval.value_or(defaultValue); + } + virtual TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const override { + return OverridePeriodicWakeupActivationPeriod.value_or(defaultValue); } - TDuration GetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const override { - return PeriodicWakeupActivationPeriod.value_or(defaultValue); + virtual TDuration DoGetStatsReportInterval(const TDuration defaultValue) const override { + return OverrideStatsReportInterval.value_or(defaultValue); } - TDuration GetStatsReportInterval(const TDuration defaultValue) const override { - return StatsReportInterval.value_or(defaultValue); + virtual ui64 DoGetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const override { + return OverrideGuaranteeIndexationStartBytesLimit.value_or(defaultValue); } - virtual ui64 GetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const override { - return GuaranteeIndexationStartBytesLimit.value_or(defaultValue); + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const override { + return OverrideOptimizerFreshnessCheckDuration.value_or(defaultValue); } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const override { - return OptimizerFreshnessCheckDuration.value_or(defaultValue); + virtual TDuration DoGetReadTimeoutClean(const TDuration def) const override { + return OverrideReadTimeoutClean.value_or(def); } - virtual TDuration GetReadTimeoutClean(const TDuration def) override { - return ReadTimeoutClean.value_or(def); + virtual ui64 DoGetReduceMemoryIntervalLimit(const ui64 def) const override { + return OverrideReduceMemoryIntervalLimit.value_or(def); + } + virtual ui64 DoGetRejectMemoryIntervalLimit(const ui64 def) const override { + return OverrideRejectMemoryIntervalLimit.value_or(def); } virtual EOptimizerCompactionWeightControl GetCompactionControl() const override { return CompactionControl; @@ -185,18 +196,9 @@ class TController: public TReadOnlyController { } public: - virtual TDuration GetRemovedPortionLivetime(const TDuration /*def*/) const override { - return TDuration::Zero(); - } const TAtomicCounter& GetIndexWriteControllerBrokeCount() const { return IndexWriteControllerBrokeCount; } - virtual ui64 GetReduceMemoryIntervalLimit(const ui64 def) const override { - return OverrideReduceMemoryIntervalLimit.value_or(def); - } - virtual ui64 GetRejectMemoryIntervalLimit(const ui64 def) const override { - return OverrideRejectMemoryIntervalLimit.value_or(def); - } bool IsTrivialLinks() const; TCheckContext CheckInvariants() const; @@ -232,9 +234,6 @@ class TController: public TReadOnlyController { void SetCompactionControl(const EOptimizerCompactionWeightControl value) { CompactionControl = value; } - void SetReadTimeoutClean(const TDuration d) { - ReadTimeoutClean = d; - } bool HasPKSortingOnly() const; diff --git a/ydb/core/tx/columnshard/hooks/testing/ro_controller.h b/ydb/core/tx/columnshard/hooks/testing/ro_controller.h index c271878ea838..c55be9455204 100644 --- a/ydb/core/tx/columnshard/hooks/testing/ro_controller.h +++ b/ydb/core/tx/columnshard/hooks/testing/ro_controller.h @@ -31,9 +31,18 @@ class TReadOnlyController: public ICSController { YDB_READONLY(TAtomicCounter, ActualizationRefreshTieringCount, 0); YDB_READONLY(TAtomicCounter, ShardingFiltersCount, 0); + YDB_READONLY(TAtomicCounter, RequestTracingSnapshotsSave, 0); + YDB_READONLY(TAtomicCounter, RequestTracingSnapshotsRemove, 0); + YDB_ACCESSOR(TAtomicCounter, CompactionsLimit, 10000000); protected: + virtual void OnRequestTracingChanges( + const std::set& snapshotsToSave, const std::set& snapshotsToRemove) override { + RequestTracingSnapshotsSave.Add(snapshotsToSave.size()); + RequestTracingSnapshotsRemove.Add(snapshotsToRemove.size()); + } + virtual void OnSelectShardingFilter() override { ShardingFiltersCount.Inc(); } @@ -62,11 +71,11 @@ class TReadOnlyController: public ICSController { return EOptimizerCompactionWeightControl::Force; } -public: - virtual TDuration GetOverridenGCPeriod(const TDuration /*def*/) const override { + virtual TDuration DoGetOverridenGCPeriod(const TDuration /*def*/) const override { return TDuration::Zero(); } +public: void WaitCompactions(const TDuration d) const { TInstant start = TInstant::Now(); ui32 compactionsStart = GetCompactionStartedCounter().Val(); @@ -82,10 +91,10 @@ class TReadOnlyController: public ICSController { void WaitIndexation(const TDuration d) const { TInstant start = TInstant::Now(); - ui32 compactionsStart = GetInsertStartedCounter().Val(); + ui32 insertsStart = GetInsertStartedCounter().Val(); while (Now() - start < d) { - if (compactionsStart != GetInsertStartedCounter().Val()) { - compactionsStart = GetInsertStartedCounter().Val(); + if (insertsStart != GetInsertStartedCounter().Val()) { + insertsStart = GetInsertStartedCounter().Val(); start = TInstant::Now(); } Cerr << "WAIT_INDEXATION: " << GetInsertStartedCounter().Val() << Endl; diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.cpp b/ydb/core/tx/columnshard/inflight_request_tracker.cpp index 98ca6d7ab6da..6b7830b26cb0 100644 --- a/ydb/core/tx/columnshard/inflight_request_tracker.cpp +++ b/ydb/core/tx/columnshard/inflight_request_tracker.cpp @@ -1,90 +1,155 @@ +#include "columnshard_impl.h" +#include "columnshard_schema.h" #include "inflight_request_tracker.h" + +#include "data_sharing/common/transactions/tx_extension.h" #include "engines/column_engine.h" #include "engines/reader/plain_reader/constructor/read_metadata.h" +#include "hooks/abstract/abstract.h" namespace NKikimr::NColumnShard { -void TInFlightReadsTracker::RemoveInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index) { - Y_ABORT_UNLESS(RequestsMeta.contains(cookie), "Unknown request cookie %" PRIu64, cookie); - const auto& readMetaList = RequestsMeta[cookie]; - - for (const auto& readMetaBase : readMetaList) { - NOlap::NReader::NPlain::TReadMetadata::TConstPtr readMeta = std::dynamic_pointer_cast(readMetaBase); - - if (!readMeta) { - continue; - } - - THashMap> portionBlobIds; - for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { - const ui64 portionId = portion->GetPortion(); - AFL_VERIFY(index); - portion->FillBlobIdsByStorage(portionBlobIds, *index); - auto it = PortionUseCount.find(portionId); - Y_ABORT_UNLESS(it != PortionUseCount.end(), "Portion id %" PRIu64 " not found in request %" PRIu64, portionId, cookie); - if (it->second == 1) { - PortionUseCount.erase(it); - } else { - it->second--; +NOlap::NReader::TReadMetadataBase::TConstPtr TInFlightReadsTracker::ExtractInFlightRequest( + ui64 cookie, const NOlap::TVersionedIndex* /*index*/, const TInstant now) { + auto it = RequestsMeta.find(cookie); + AFL_VERIFY(it != RequestsMeta.end())("cookie", cookie); + const NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase = it->second; + + { + { + auto it = SnapshotsLive.find(readMetaBase->GetRequestSnapshot()); + AFL_VERIFY(it != SnapshotsLive.end()); + if (it->second.DelRequest(cookie, now)) { + SnapshotsLive.erase(it); } } - for (auto&& i : portionBlobIds) { - auto storage = StoragesManager->GetOperatorVerified(i.first); - auto tracker = storage->GetBlobsTracker(); - for (auto& blobId : i.second) { - tracker->FreeBlob(blobId); + if (NOlap::NReader::NPlain::TReadMetadata::TConstPtr readMeta = + std::dynamic_pointer_cast(readMetaBase)) { + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); + for (const auto& committedBlob : readMeta->CommittedBlobs) { + tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); } } - - auto insertStorage = StoragesManager->GetInsertOperator(); - auto tracker = insertStorage->GetBlobsTracker(); - for (const auto& committedBlob : readMeta->CommittedBlobs) { - tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); - } } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); RequestsMeta.erase(cookie); + return readMetaBase; } -TConclusionStatus TInFlightReadsTracker::AddToInFlightRequest(const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index) { - RequestsMeta[cookie].push_back(readMetaBase); +void TInFlightReadsTracker::AddToInFlightRequest( + const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* /*index*/) { + AFL_VERIFY(RequestsMeta.emplace(cookie, readMetaBase).second); auto readMeta = std::dynamic_pointer_cast(readMetaBase); if (!readMeta) { - return TConclusionStatus::Success(); + return; } auto selectInfo = readMeta->SelectInfo; Y_ABORT_UNLESS(selectInfo); SelectStatsDelta += selectInfo->Stats(); - THashMap> portionBlobIds; - for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { - const ui64 portionId = portion->GetPortion(); - PortionUseCount[portionId]++; - AFL_VERIFY(index); - portion->FillBlobIdsByStorage(portionBlobIds, *index); + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); + for (const auto& committedBlob : readMeta->CommittedBlobs) { + tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); } +} - for (auto&& i : portionBlobIds) { - auto storage = StoragesManager->GetOperatorOptional(i.first); - if (!storage) { - return TConclusionStatus::Fail("blobs storage info not ready for '" + i.first + "'"); +namespace { +class TTransactionSavePersistentSnapshots: public NOlap::NDataSharing::TExtendedTransactionBase { +private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const std::set SaveSnapshots; + const std::set RemoveSnapshots; + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : SaveSnapshots) { + db.Table().Key(i.GetPlanStep(), i.GetTxId()).Update(); } - auto tracker = storage->GetBlobsTracker(); - for (auto& blobId : i.second) { - tracker->UseBlob(blobId); + for (auto&& i : RemoveSnapshots) { + db.Table().Key(i.GetPlanStep(), i.GetTxId()).Delete(); } + return true; } - auto insertStorage = StoragesManager->GetInsertOperator(); - auto tracker = insertStorage->GetBlobsTracker(); - for (const auto& committedBlob : readMeta->CommittedBlobs) { - tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); + virtual void DoComplete(const TActorContext& /*ctx*/) override { + } + +public: + TTransactionSavePersistentSnapshots( + NColumnShard::TColumnShard* self, std::set&& saveSnapshots, std::set&& removeSnapshots) + : TBase(self) + , SaveSnapshots(std::move(saveSnapshots)) + , RemoveSnapshots(std::move(removeSnapshots)) { + AFL_VERIFY(SaveSnapshots.size() || RemoveSnapshots.size()); + } +}; +} // namespace + +std::unique_ptr TInFlightReadsTracker::Ping( + TColumnShard* self, const TDuration critDuration, const TInstant now) { + std::set snapshotsToSave; + std::set snapshotsToFree; + for (auto&& i : SnapshotsLive) { + if (i.second.Ping(critDuration, now)) { + if (i.second.GetIsLock()) { + Counters->OnSnapshotLocked(); + snapshotsToSave.emplace(i.first); + } else { + Counters->OnSnapshotUnlocked(); + snapshotsToFree.emplace(i.first); + } + } + } + for (auto&& i : snapshotsToFree) { + SnapshotsLive.erase(i); + } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + if (snapshotsToFree.size() || snapshotsToSave.size()) { + NYDBTest::TControllers::GetColumnShardController()->OnRequestTracingChanges(snapshotsToSave, snapshotsToFree); + return std::make_unique(self, std::move(snapshotsToSave), std::move(snapshotsToFree)); + } else { + return nullptr; } - return TConclusionStatus::Success(); } +bool TInFlightReadsTracker::LoadFromDatabase(NTable::TDatabase& tableDB) { + NIceDb::TNiceDb db(tableDB); + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const NOlap::TSnapshot snapshot( + rowset.GetValue(), rowset.GetValue()); + AFL_VERIFY(SnapshotsLive.emplace(snapshot, TSnapshotLiveInfo::BuildFromDatabase(snapshot)).second); + + if (!rowset.Next()) { + return false; + } + } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + return true; } + +ui64 TInFlightReadsTracker::AddInFlightRequest( + NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index) { + const ui64 cookie = NextCookie++; + auto it = SnapshotsLive.find(readMeta->GetRequestSnapshot()); + if (it == SnapshotsLive.end()) { + it = SnapshotsLive.emplace(readMeta->GetRequestSnapshot(), TSnapshotLiveInfo::BuildFromRequest(readMeta->GetRequestSnapshot())).first; + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + } + it->second.AddRequest(cookie); + AddToInFlightRequest(cookie, readMeta, index); + return cookie; +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.h b/ydb/core/tx/columnshard/inflight_request_tracker.h index d530c11d7a4f..0aeec5acddbe 100644 --- a/ydb/core/tx/columnshard/inflight_request_tracker.h +++ b/ydb/core/tx/columnshard/inflight_request_tracker.h @@ -1,6 +1,9 @@ #pragma once #include "blob.h" + +#include "counters/req_tracer.h" + #include namespace NKikimr::NOlap { @@ -8,49 +11,111 @@ class TVersionedIndex; } namespace NKikimr::NColumnShard { - +class TColumnShard; using NOlap::IBlobInUseTracker; -class TInFlightReadsTracker { +class TSnapshotLiveInfo { +private: + const NOlap::TSnapshot Snapshot; + std::optional LastPingInstant; + std::optional LastRequestFinishedInstant; + THashSet Requests; + YDB_READONLY(bool, IsLock, false); + + TSnapshotLiveInfo(const NOlap::TSnapshot& snapshot) + : Snapshot(snapshot) { + } + public: - // Returns a unique cookie associated with this request - [[nodiscard]] TConclusion AddInFlightRequest(NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index) { - const ui64 cookie = NextCookie++; - auto status = AddToInFlightRequest(cookie, readMeta, index); - if (!status) { - return status; + void AddRequest(const ui32 cookie) { + AFL_VERIFY(Requests.emplace(cookie).second); + } + + [[nodiscard]] bool DelRequest(const ui32 cookie, const TInstant now) { + AFL_VERIFY(Requests.erase(cookie)); + if (Requests.empty()) { + LastRequestFinishedInstant = now; + } + if (!IsLock && Requests.empty()) { + return true; } - return cookie; + return false; } - void RemoveInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index); + static TSnapshotLiveInfo BuildFromRequest(const NOlap::TSnapshot& reqSnapshot) { + return TSnapshotLiveInfo(reqSnapshot); + } - // Checks if the portion is in use by any in-flight request - bool IsPortionUsed(ui64 portionId) const { - return PortionUseCount.contains(portionId); + static TSnapshotLiveInfo BuildFromDatabase(const NOlap::TSnapshot& reqSnapshot) { + TSnapshotLiveInfo result(reqSnapshot); + result.LastPingInstant = TInstant::Now(); + result.LastRequestFinishedInstant = result.LastPingInstant; + result.IsLock = true; + return result; } + bool Ping(const TDuration critDuration, const TInstant now) { + LastPingInstant = now; + if (Requests.empty()) { + AFL_VERIFY(LastRequestFinishedInstant); + if (critDuration < *LastPingInstant - *LastRequestFinishedInstant && IsLock) { + IsLock = false; + return true; + } + } else { + if (critDuration < *LastPingInstant - Snapshot.GetPlanInstant() && !IsLock) { + IsLock = true; + return true; + } + } + return false; + } +}; + +class TInFlightReadsTracker { +private: + std::map SnapshotsLive; + std::shared_ptr Counters; + +public: + std::optional GetSnapshotToClean() const { + if (SnapshotsLive.empty()) { + return std::nullopt; + } else { + return SnapshotsLive.begin()->first; + } + } + + bool LoadFromDatabase(NTable::TDatabase& db); + + [[nodiscard]] std::unique_ptr Ping(TColumnShard* self, const TDuration critDuration, const TInstant now); + + // Returns a unique cookie associated with this request + [[nodiscard]] ui64 AddInFlightRequest( + NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index); + + [[nodiscard]] NOlap::NReader::TReadMetadataBase::TConstPtr ExtractInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index, const TInstant now); + NOlap::TSelectInfo::TStats GetSelectStatsDelta() { auto delta = SelectStatsDelta; SelectStatsDelta = NOlap::TSelectInfo::TStats(); return delta; } - TInFlightReadsTracker(const std::shared_ptr& storagesManager) - : StoragesManager(storagesManager) - { - + TInFlightReadsTracker(const std::shared_ptr& storagesManager, const std::shared_ptr& counters) + : Counters(counters) + , StoragesManager(storagesManager) { } private: - [[nodiscard]] TConclusionStatus AddToInFlightRequest(const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index); + void AddToInFlightRequest( + const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index); private: std::shared_ptr StoragesManager; - ui64 NextCookie{1}; - THashMap> RequestsMeta; - THashMap PortionUseCount; + ui64 NextCookie = 1; + THashMap RequestsMeta; NOlap::TSelectInfo::TStats SelectStatsDelta; }; -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/normalizer/abstract/abstract.h b/ydb/core/tx/columnshard/normalizer/abstract/abstract.h index 55b0da0e6b92..e75099ecd9ba 100644 --- a/ydb/core/tx/columnshard/normalizer/abstract/abstract.h +++ b/ydb/core/tx/columnshard/normalizer/abstract/abstract.h @@ -57,6 +57,8 @@ enum class ENormalizerSequentialId: ui32 { PortionsMetadata, CleanGranuleId, EmptyPortionsCleaner, + CleanInsertionDedup, + GCCountersNormalizer, MAX }; diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp new file mode 100644 index 000000000000..5a0934261879 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp @@ -0,0 +1,151 @@ +#include "broken_dedup.h" + +#include +#include + +namespace NKikimr::NOlap::NInsertionDedup { + +class TNormalizerRemoveChanges: public INormalizerChanges { +private: + std::vector Insertions; +public: + virtual bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /*normalizationContext*/) const override { + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : Insertions) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "remove_aborted_record")("write_id", i.GetInsertWriteId()); + i.Remove(db); + } + return true; + } + virtual void ApplyOnComplete(const TNormalizationController& /*normalizationContext*/) const override { + + } + + virtual ui64 GetSize() const override { + return Insertions.size(); + } + + TNormalizerRemoveChanges(const std::vector& insertions) + : Insertions(insertions) + { + + } +}; + +class TNormalizerCleanDedupChanges: public INormalizerChanges { +private: + mutable std::vector Insertions; + +public: + virtual bool ApplyOnExecute( + NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /*normalizationContext*/) const override { + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : Insertions) { + AFL_VERIFY(i.GetDedupId()); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "correct_record")("dedup", i.GetDedupId()); + i.Remove(db); + i.SetDedupId(""); + i.Upsert(db); + } + return true; + } + virtual void ApplyOnComplete(const TNormalizationController& /*normalizationContext*/) const override { + } + + virtual ui64 GetSize() const override { + return Insertions.size(); + } + + TNormalizerCleanDedupChanges(const std::vector& insertions) + : Insertions(insertions) { + } +}; + + +class TCollectionStates { +private: + YDB_READONLY_DEF(std::optional, Inserted); + YDB_READONLY_DEF(std::optional, Aborted); +public: + void SetInserted(const TInsertTableRecordLoadContext& context) { + AFL_VERIFY(!Inserted); + Inserted = context; + } + void SetAborted(const TInsertTableRecordLoadContext& context) { + AFL_VERIFY(!Aborted); + Aborted = context; + } +}; + +TConclusion> TInsertionsDedupNormalizer::DoInit( + const TNormalizationController& /*controller*/, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + + using namespace NColumnShard; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("cannot read insertion info"); + } + THashMap insertions; + while (!rowset.EndOfSet()) { + TInsertTableRecordLoadContext constructor; + constructor.ParseFromDatabase(rowset); + if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Committed) { + AFL_VERIFY(constructor.GetPlanStep()); + } else { + AFL_VERIFY(!constructor.GetPlanStep()); + if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Aborted) { + insertions[constructor.GetInsertWriteId()].SetAborted(constructor); + } else if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Inserted) { + insertions[constructor.GetInsertWriteId()].SetInserted(constructor); + } else { + AFL_VERIFY(false); + } + } + if (!rowset.Next()) { + return TConclusionStatus::Fail("cannot read insertion info"); + } + } + + std::vector result; + std::vector toRemove; + std::vector toCleanDedup; + for (auto&& [id, i] : insertions) { + if (i.GetInserted() && i.GetAborted()) { + toRemove.emplace_back(*i.GetInserted()); + if (i.GetAborted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetAborted()); + } + } else if (i.GetAborted()) { + if (i.GetAborted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetAborted()); + } + } else if (i.GetInserted()) { + if (i.GetInserted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetInserted()); + } + } else { + AFL_VERIFY(false); + } + if (toCleanDedup.size() == 1000) { + result.emplace_back(std::make_shared(std::make_shared(toCleanDedup))); + toCleanDedup.clear(); + } + if (toRemove.size() == 1000) { + result.emplace_back(std::make_shared(std::make_shared(toRemove))); + toRemove.clear(); + } + } + if (toCleanDedup.size()) { + result.emplace_back(std::make_shared(std::make_shared(toCleanDedup))); + toCleanDedup.clear(); + } + if (toRemove.size()) { + result.emplace_back(std::make_shared(std::make_shared(toRemove))); + toRemove.clear(); + } + + return result; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h new file mode 100644 index 000000000000..c9a935e24371 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + + +namespace NKikimr::NOlap::NInsertionDedup { + +class TInsertionsDedupNormalizer: public TNormalizationController::INormalizerComponent { +public: + static TString GetClassNameStatic() { + return "CleanInsertionDedup"; + } +private: + class TNormalizerResult; + + static const inline INormalizerComponent::TFactory::TRegistrator Registrator = + INormalizerComponent::TFactory::TRegistrator(GetClassNameStatic()); + +public: + TInsertionsDedupNormalizer(const TNormalizationController::TInitContext&) { + } + + virtual std::optional DoGetEnumSequentialId() const override { + return ENormalizerSequentialId::CleanInsertionDedup; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/ya.make b/ydb/core/tx/columnshard/normalizer/insert_table/ya.make new file mode 100644 index 000000000000..99cdc40cfaf4 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + GLOBAL broken_dedup.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/normalizer/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp index d7981d98d629..f42f38061e45 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp @@ -59,11 +59,12 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { Y_ABORT_UNLESS(!!columnLoader); TPortionInfo::TAssembleBlobInfo assembleBlob(blobData); + assembleBlob.SetExpectedRecordsCount(chunkInfo.GetRecordsCount()); auto batch = assembleBlob.BuildRecordBatch(*columnLoader); Y_ABORT_UNLESS(!!batch); - chunkInfo.MutableUpdate().SetNumRows(batch->num_rows()); - chunkInfo.MutableUpdate().SetRawBytes(NArrow::GetBatchDataSize(batch)); + chunkInfo.MutableUpdate().SetNumRows(batch->GetRecordsCount()); + chunkInfo.MutableUpdate().SetRawBytes(batch->GetRawSizeVerified()); } auto changes = std::make_shared(std::move(Chunks)); diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.h b/ydb/core/tx/columnshard/normalizer/portion/chunks.h index 59fbfe57da0c..c8a09669c7b8 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.h +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.h @@ -74,6 +74,10 @@ namespace NKikimr::NOlap { , CLContext(rowset, dsGroupSelector) {} + ui32 GetRecordsCount() const { + return CLContext.GetMetaProto().GetNumRows(); + } + const TBlobRange& GetBlobRange() const { return CLContext.GetBlobRange(); } diff --git a/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp new file mode 100644 index 000000000000..1b41c455217b --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp @@ -0,0 +1,31 @@ +#include "broken_txs.h" + +#include +#include + +namespace NKikimr::NOlap { + +TConclusion> TBrokenTxsNormalizer::DoInit( + const TNormalizationController& /*controller*/, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + + using namespace NColumnShard; + auto rowset = db.Table().GreaterOrEqual(0).Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("cannot read TxInfo"); + } + while (!rowset.EndOfSet()) { + const ui64 txId = rowset.GetValue(); + if (!rowset.HaveValue()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("event", "removed_by_normalizer")("condition", "no_kind"); + Schema::EraseTxInfo(db, txId); + } + + if (!rowset.Next()) { + return TConclusionStatus::Fail("cannot read TxInfo"); + } + } + return std::vector(); +} + +} diff --git a/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h new file mode 100644 index 000000000000..1ff68530bf35 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + + +namespace NKikimr::NOlap { + +class TBrokenTxsNormalizer: public TNormalizationController::INormalizerComponent { +public: + static TString GetClassNameStatic() { + return "BrokenTxsNormalizer"; + } +private: + class TNormalizerResult; + + static const inline INormalizerComponent::TFactory::TRegistrator Registrator = + INormalizerComponent::TFactory::TRegistrator(GetClassNameStatic()); + +public: + TBrokenTxsNormalizer(const TNormalizationController::TInitContext&) { + } + + virtual std::optional DoGetEnumSequentialId() const override { + return {}; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h b/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h index 80b56c080071..8787da559489 100644 --- a/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h +++ b/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h @@ -21,7 +21,7 @@ class TGCCountersNormalizer: public TNormalizationController::INormalizerCompone } virtual std::optional DoGetEnumSequentialId() const override { - return {}; + return ENormalizerSequentialId::GCCountersNormalizer; } virtual TString GetClassName() const override { diff --git a/ydb/core/tx/columnshard/normalizer/tablet/ya.make b/ydb/core/tx/columnshard/normalizer/tablet/ya.make index 9be6336eede5..0b39efc35b5b 100644 --- a/ydb/core/tx/columnshard/normalizer/tablet/ya.make +++ b/ydb/core/tx/columnshard/normalizer/tablet/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( GLOBAL gc_counters.cpp + GLOBAL broken_txs.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/normalizer/ya.make b/ydb/core/tx/columnshard/normalizer/ya.make index 46f7baeaea98..ced78fd812af 100644 --- a/ydb/core/tx/columnshard/normalizer/ya.make +++ b/ydb/core/tx/columnshard/normalizer/ya.make @@ -6,6 +6,7 @@ PEERDIR( ydb/core/tx/columnshard/normalizer/tablet ydb/core/tx/columnshard/normalizer/tables ydb/core/tx/columnshard/normalizer/portion + ydb/core/tx/columnshard/normalizer/insert_table ) END() diff --git a/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp b/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp index 232d3b556e95..7dabddaf606c 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp @@ -10,31 +10,35 @@ namespace NKikimr::NOlap { -void TBuildBatchesTask::ReplyError(const TString& message) { +void TBuildBatchesTask::ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "cannot build batch for insert")("reason", message)("data", WriteData.GetWriteMeta().GetLongTxIdOptional()); auto writeDataPtr = std::make_shared(std::move(WriteData)); TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); - auto result = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message); + auto result = + NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); TActorContext::AsActorContext().Send(ParentActorId, result.release()); } TConclusionStatus TBuildBatchesTask::DoExecute(const std::shared_ptr& /*taskPtr*/) { TConclusion> batchConclusion = WriteData.GetData()->ExtractBatch(); if (batchConclusion.IsFail()) { - ReplyError("cannot extract incoming batch: " + batchConclusion.GetErrorMessage()); + ReplyError( + "cannot extract incoming batch: " + batchConclusion.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); return TConclusionStatus::Fail("cannot extract incoming batch: " + batchConclusion.GetErrorMessage()); } auto preparedConclusion = ActualSchema->PrepareForModification(batchConclusion.DetachResult(), WriteData.GetWriteMeta().GetModificationType()); if (preparedConclusion.IsFail()) { - ReplyError("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage()); + ReplyError("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage(), + NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); return TConclusionStatus::Fail("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage()); } auto batch = preparedConclusion.DetachResult(); - const std::vector> defaultFields = ActualSchema->GetAbsentFields(batch->schema()); std::shared_ptr merger; switch (WriteData.GetWriteMeta().GetModificationType()) { case NEvWrite::EModificationType::Upsert: { + const std::vector> defaultFields = ActualSchema->GetAbsentFields(batch->schema()); if (defaultFields.empty()) { std::shared_ptr task = std::make_shared(TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); diff --git a/ydb/core/tx/columnshard/operations/batch_builder/builder.h b/ydb/core/tx/columnshard/operations/batch_builder/builder.h index aba480a398f9..5d32211d9779 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/builder.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/builder.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -14,7 +15,8 @@ class TBuildBatchesTask: public NConveyor::ITask { const NActors::TActorId BufferActorId; const std::shared_ptr ActualSchema; const TSnapshot ActualSnapshot; - void ReplyError(const TString& message); + void ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); + protected: virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) override; @@ -23,16 +25,14 @@ class TBuildBatchesTask: public NConveyor::ITask { return "Write::ConstructBatches"; } - TBuildBatchesTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& actualSchema, - const TSnapshot& actualSnapshot) + TBuildBatchesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& actualSchema, const TSnapshot& actualSnapshot) : WriteData(std::move(writeData)) , TabletId(tabletId) , ParentActorId(parentActorId) , BufferActorId(bufferActorId) , ActualSchema(actualSchema) - , ActualSnapshot(actualSnapshot) - { + , ActualSnapshot(actualSnapshot) { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp b/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp index 713a397c7a4e..f82c0bd42e05 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp @@ -67,12 +67,12 @@ TUpdateMerger::TUpdateMerger(const std::shared_ptr& incoming , DefaultExists(defaultExists) , InsertDenyReason(insertDenyReason) { - for (auto&& i : actualSchema->GetIndexInfo().ArrowSchema()->field_names()) { - auto fIdx = IncomingData->schema()->GetFieldIndex(i); + for (auto&& f : actualSchema->GetIndexInfo().ArrowSchema()->fields()) { + auto fIdx = IncomingData->schema()->GetFieldIndex(f->name()); if (fIdx == -1) { IncomingColumnRemap.emplace_back(); } else { - auto fExistsIdx = IncomingData->schema()->GetFieldIndex("$$EXISTS::" + i); + auto fExistsIdx = IncomingData->schema()->GetFieldIndex("$$EXISTS::" + f->name()); std::shared_ptr flagsArray; if (fExistsIdx != -1) { AFL_VERIFY(IncomingData->column(fExistsIdx)->type_id() == arrow::Type::BOOL); diff --git a/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp b/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp index 265d70a6029f..e13c7fc74eaf 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp @@ -6,16 +6,10 @@ namespace NKikimr::NOlap { std::unique_ptr TModificationRestoreTask::DoBuildRequestInitiator() const { - auto request = std::make_unique(LocalPathId); + auto request = std::make_unique(LocalPathId, WriteData.GetWriteMeta().GetLockIdOptional()); request->ReadToSnapshot = Snapshot; - request->RangesFilter = std::make_shared(false); auto pkData = NArrow::TColumnOperator().VerifyIfAbsent().Extract(IncomingData, ActualSchema->GetPKColumnNames()); - for (ui32 i = 0; i < pkData->num_rows(); ++i) { - auto batch = pkData->Slice(i, 1); - auto pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batch); - auto pTo = std::make_shared(NKernels::EOperation::LessEqual, batch); - AFL_VERIFY(request->RangesFilter->Add(pFrom, pTo, &ActualSchema->GetIndexInfo())); - } + request->RangesFilter = TPKRangesFilter::BuildFromRecordBatchLines(pkData, false); for (auto&& i : ActualSchema->GetIndexInfo().GetColumnIds(false)) { request->AddColumn(i, ActualSchema->GetIndexInfo().GetColumnName(i)); } @@ -27,7 +21,7 @@ NKikimr::TConclusionStatus TModificationRestoreTask::DoOnDataChunk(const std::sh if (result.IsFail()) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "merge_data_problems") ("write_id", WriteData.GetWriteMeta().GetWriteId())("tablet_id", TabletId)("message", result.GetErrorMessage()); - SendErrorMessage(result.GetErrorMessage()); + SendErrorMessage(result.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Request); } return result; } @@ -35,7 +29,7 @@ NKikimr::TConclusionStatus TModificationRestoreTask::DoOnDataChunk(const std::sh void TModificationRestoreTask::DoOnError(const TString& errorMessage) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "restore_data_problems")("write_id", WriteData.GetWriteMeta().GetWriteId())( "tablet_id", TabletId)("message", errorMessage); - SendErrorMessage(errorMessage); + SendErrorMessage(errorMessage, NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); } NKikimr::TConclusionStatus TModificationRestoreTask::DoOnFinished() { @@ -67,10 +61,10 @@ TModificationRestoreTask::TModificationRestoreTask(const ui64 tabletId, const NA } -void TModificationRestoreTask::SendErrorMessage(const TString& errorMessage) { +void TModificationRestoreTask::SendErrorMessage(const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { auto writeDataPtr = std::make_shared(std::move(WriteData)); TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); - auto evResult = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), errorMessage); + auto evResult = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), errorMessage, errorClass); TActorContext::AsActorContext().Send(ParentActorId, evResult.release()); } diff --git a/ydb/core/tx/columnshard/operations/batch_builder/restore.h b/ydb/core/tx/columnshard/operations/batch_builder/restore.h index cab283c4f1a2..b69a856a8a58 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/restore.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/restore.h @@ -1,6 +1,7 @@ #pragma once #include "merger.h" +#include #include #include @@ -23,12 +24,12 @@ class TModificationRestoreTask: public NDataReader::IRestoreTask { virtual TConclusionStatus DoOnDataChunk(const std::shared_ptr& data) override; virtual TConclusionStatus DoOnFinished() override; virtual void DoOnError(const TString& errorMessage) override; - void SendErrorMessage(const TString& errorMessage); + void SendErrorMessage(const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); public: - TModificationRestoreTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, - const std::shared_ptr& actualSchema, const TSnapshot actualSnapshot, const std::shared_ptr& incomingData); + TModificationRestoreTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, const std::shared_ptr& actualSchema, + const TSnapshot actualSnapshot, const std::shared_ptr& incomingData); }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/manager.cpp b/ydb/core/tx/columnshard/operations/manager.cpp new file mode 100644 index 000000000000..1527ec5d028d --- /dev/null +++ b/ydb/core/tx/columnshard/operations/manager.cpp @@ -0,0 +1,296 @@ +#include "manager.h" + +#include + +namespace NKikimr::NColumnShard { + +bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const TOperationWriteId writeId = (TOperationWriteId)rowset.GetValue(); + const ui64 createdAtSec = rowset.GetValue(); + const ui64 lockId = rowset.GetValue(); + const ui64 cookie = rowset.GetValueOrDefault(0); + const TString metadata = rowset.GetValue(); + const EOperationStatus status = (EOperationStatus)rowset.GetValue(); + std::optional granuleShardingVersionId; + if (rowset.HaveValue() && + rowset.GetValue()) { + granuleShardingVersionId = rowset.GetValue(); + } + + NKikimrTxColumnShard::TInternalOperationData metaProto; + Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); + + auto operation = std::make_shared( + writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec), granuleShardingVersionId, NEvWrite::EModificationType::Upsert); + operation->FromProto(metaProto); + LinkInsertWriteIdToOperationWriteId(operation->GetInsertWriteIds(), operation->GetWriteId()); + AFL_VERIFY(operation->GetStatus() != EOperationStatus::Draft); + + AFL_VERIFY(Operations.emplace(operation->GetWriteId(), operation).second); + auto it = LockFeatures.find(lockId); + if (it == LockFeatures.end()) { + it = LockFeatures.emplace(lockId, TLockFeatures(lockId, 0)).first; + } + it->second.MutableWriteOperations().emplace_back(operation); + LastWriteId = std::max(LastWriteId, operation->GetWriteId()); + if (!rowset.Next()) { + return false; + } + } + } + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const ui64 lockId = rowset.GetValue(); + const ui64 txId = rowset.GetValue(); + AFL_VERIFY(LockFeatures.contains(lockId))("lock_id", lockId); + AFL_VERIFY(Tx2Lock.emplace(txId, lockId).second); + if (!rowset.Next()) { + return false; + } + } + } + + return true; +} + +void TOperationsManager::CommitTransactionOnExecute( + TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) { + auto& lock = GetLockFeaturesForTxVerified(txId); + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("commit_tx_id", txId)("commit_lock_id", lock.GetLockId())); + TVector commited; + for (auto&& opPtr : lock.GetWriteOperations()) { + opPtr->CommitOnExecute(owner, txc, snapshot); + commited.emplace_back(opPtr); + } + OnTransactionFinishOnExecute(commited, lock, txId, txc); +} + +void TOperationsManager::CommitTransactionOnComplete( + TColumnShard& owner, const ui64 txId, const NOlap::TSnapshot& snapshot) { + auto& lock = GetLockFeaturesForTxVerified(txId); + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("commit_tx_id", txId)("commit_lock_id", lock.GetLockId())); + for (auto&& i : lock.GetBrokeOnCommit()) { + if (auto lockNotify = GetLockOptional(i)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("broken_lock_id", i); + lockNotify->SetBroken(); + } + } + + for (auto&& i : lock.GetNotifyOnCommit()) { + if (auto lockNotify = GetLockOptional(i)) { + lockNotify->AddNotifyCommit(lock.GetLockId()); + } + } + + TVector commited; + for (auto&& opPtr : lock.GetWriteOperations()) { + opPtr->CommitOnComplete(owner, snapshot); + commited.emplace_back(opPtr); + } + OnTransactionFinishOnComplete(commited, lock, txId); +} + +void TOperationsManager::AbortTransactionOnExecute(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + auto* lock = GetLockFeaturesForTxOptional(txId); + if (!lock) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort")("tx_id", txId)("problem", "finished"); + return; + } + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("lock_id", lock->GetLockId())); + + TVector aborted; + for (auto&& opPtr : lock->GetWriteOperations()) { + opPtr->AbortOnExecute(owner, txc); + aborted.emplace_back(opPtr); + } + + OnTransactionFinishOnExecute(aborted, *lock, txId, txc); +} + +void TOperationsManager::AbortTransactionOnComplete(TColumnShard& owner, const ui64 txId) { + auto* lock = GetLockFeaturesForTxOptional(txId); + if (!lock) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort")("tx_id", txId)("problem", "finished"); + return; + } + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("lock_id", lock->GetLockId())); + + TVector aborted; + for (auto&& opPtr : lock->GetWriteOperations()) { + opPtr->AbortOnComplete(owner); + aborted.emplace_back(opPtr); + } + + OnTransactionFinishOnComplete(aborted, *lock, txId); +} + +TWriteOperation::TPtr TOperationsManager::GetOperation(const TOperationWriteId writeId) const { + auto it = Operations.find(writeId); + if (it == Operations.end()) { + return nullptr; + } + return it->second; +} + +void TOperationsManager::OnTransactionFinishOnExecute( + const TVector& operations, const TLockFeatures& lock, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + for (auto&& op : operations) { + RemoveOperationOnExecute(op, txc); + } + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(txId, lock.GetLockId()).Delete(); +} + +void TOperationsManager::OnTransactionFinishOnComplete( + const TVector& operations, const TLockFeatures& lock, const ui64 txId) { + { + lock.RemoveInteractions(InteractionsContext); + LockFeatures.erase(lock.GetLockId()); + } + Tx2Lock.erase(txId); + for (auto&& op : operations) { + RemoveOperationOnComplete(op); + } +} + +void TOperationsManager::RemoveOperationOnExecute(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + db.Table().Key((ui64)op->GetWriteId()).Delete(); +} + +void TOperationsManager::RemoveOperationOnComplete(const TWriteOperation::TPtr& op) { + for (auto&& i : op->GetInsertWriteIds()) { + AFL_VERIFY(InsertWriteIdToOpWriteId.erase(i)); + } + Operations.erase(op->GetWriteId()); +} + +TOperationWriteId TOperationsManager::BuildNextOperationWriteId() { + return ++LastWriteId; +} + +std::optional TOperationsManager::GetLockForTx(const ui64 txId) const { + auto lockIt = Tx2Lock.find(txId); + if (lockIt != Tx2Lock.end()) { + return lockIt->second; + } + return std::nullopt; +} + +void TOperationsManager::LinkTransactionOnExecute(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(txId, lockId).Update(); + Tx2Lock[txId] = lockId; +} + +void TOperationsManager::LinkTransactionOnComplete(const ui64 /*lockId*/, const ui64 /*txId*/) { +} + +TWriteOperation::TPtr TOperationsManager::RegisterOperation( + const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) { + auto writeId = BuildNextOperationWriteId(); + auto operation = std::make_shared( + writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now(), granuleShardingVersionId, mType); + Y_ABORT_UNLESS(Operations.emplace(operation->GetWriteId(), operation).second); + GetLockVerified(operation->GetLockId()).MutableWriteOperations().emplace_back(operation); + GetLockVerified(operation->GetLockId()).AddWrite(); + return operation; +} + +TConclusion TOperationsManager::GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite) { + if (evWrite.Record.HasTxId() && evWrite.Record.HasLocks()) { + if (evWrite.Record.GetLocks().GetLocks().size() < 1) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("no locks in case tx/locks"); + } + auto& baseLock = evWrite.Record.GetLocks().GetLocks()[0]; + for (auto&& i : evWrite.Record.GetLocks().GetLocks()) { + if (i.GetLockId() != baseLock.GetLockId()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock ids in operation"); + } + if (i.GetGeneration() != baseLock.GetGeneration()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock generations in operation"); + } + if (i.GetCounter() != baseLock.GetCounter()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock generation counters in operation"); + } + } + if (evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Commit) { + return EOperationBehaviour::CommitWriteLock; + } + if (evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Rollback) { + return EOperationBehaviour::AbortWriteLock; + } + } + + if (evWrite.Record.HasLockTxId() && evWrite.Record.HasLockNodeId()) { + if (evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { + return EOperationBehaviour::WriteWithLock; + } + + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("mode not IMMEDIATE for LockTxId + LockNodeId"); + } + + if (!evWrite.Record.HasLockTxId() && !evWrite.Record.HasLockNodeId() && + evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { + return EOperationBehaviour::NoTxWrite; + } + + if (evWrite.Record.HasTxId() && evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_PREPARE) { + return EOperationBehaviour::InTxWrite; + } + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("undefined request for detect tx type"); +} + +TOperationsManager::TOperationsManager() { +} + +void TOperationsManager::AddEventForTx(TColumnShard& owner, const ui64 txId, const std::shared_ptr& writer) { + return AddEventForLock(owner, GetLockForTxVerified(txId), writer); +} + +void TOperationsManager::AddEventForLock( + TColumnShard& /*owner*/, const ui64 lockId, const std::shared_ptr& writer) { + AFL_VERIFY(writer); + NOlap::NTxInteractions::TTxConflicts txNotifications; + NOlap::NTxInteractions::TTxConflicts txConflicts; + auto& txLock = GetLockVerified(lockId); + writer->CheckInteraction(lockId, InteractionsContext, txConflicts, txNotifications); + for (auto&& i : txConflicts) { + if (auto lock = GetLockOptional(i.first)) { + GetLockVerified(i.first).AddBrokeOnCommit(i.second); + } else if (txLock.IsCommitted(i.first)) { + txLock.SetBroken(); + } + } + for (auto&& i : txNotifications) { + GetLockVerified(i.first).AddNotificationsOnCommit(i.second); + } + if (auto txEvent = writer->BuildEvent()) { + NOlap::NTxInteractions::TTxEventContainer container(lockId, txEvent); + container.AddToInteraction(InteractionsContext); + txLock.MutableEvents().emplace_back(std::move(container)); + } +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/manager.h b/ydb/core/tx/columnshard/operations/manager.h new file mode 100644 index 000000000000..9e2651e24da0 --- /dev/null +++ b/ydb/core/tx/columnshard/operations/manager.h @@ -0,0 +1,221 @@ +#pragma once +#include "write.h" + +#include +#include + +namespace NKikimr::NOlap::NTxInteractions { +class TManager; +class TTxEventContainer; +class TInteractionsContext; +class ITxEventWriter; +} // namespace NKikimr::NOlap::NTxInteractions + +namespace NKikimr::NColumnShard { + +class TColumnShard; +class TLockFeatures; + +class TLockSharingInfo { +private: + const ui64 LockId; + const ui64 Generation; + TAtomicCounter InternalGenerationCounter = 0; + TAtomicCounter Broken = 0; + TAtomicCounter WritesCounter = 0; + friend class TLockFeatures; + +public: + ui64 GetLockId() const { + return LockId; + } + ui64 GetGeneration() const { + return Generation; + } + + TLockSharingInfo(const ui64 lockId, const ui64 generation) + : LockId(lockId) + , Generation(generation) { + } + + bool HasWrites() const { + return WritesCounter.Val(); + } + + bool IsBroken() const { + return Broken.Val(); + } + + ui64 GetCounter() const { + return InternalGenerationCounter.Val(); + } +}; + +class TLockFeatures: TMoveOnly { +private: + YDB_ACCESSOR_DEF(std::vector, WriteOperations); + YDB_ACCESSOR_DEF(std::vector, Events); + YDB_ACCESSOR(ui64, LockId, 0); + YDB_ACCESSOR(ui64, Generation, 0); + std::shared_ptr SharingInfo; + + YDB_READONLY_DEF(THashSet, BrokeOnCommit); + YDB_READONLY_DEF(THashSet, NotifyOnCommit); + YDB_READONLY_DEF(THashSet, Committed); + +public: + const std::shared_ptr& GetSharingInfo() const { + return SharingInfo; + } + + ui64 GetInternalGenerationCounter() const { + return SharingInfo->GetCounter(); + } + + void AddWrite() { + SharingInfo->WritesCounter.Inc(); + } + + void SetBroken() { + SharingInfo->Broken = 1; + SharingInfo->InternalGenerationCounter = (i64)TSysTables::TLocksTable::TLock::ESetErrors::ErrorBroken; + } + + bool IsBroken() const { + return SharingInfo->IsBroken(); + } + + bool IsCommitted(const ui64 lockId) const { + return Committed.contains(lockId); + } + + void AddNotifyCommit(const ui64 lockId) { + AFL_VERIFY(NotifyOnCommit.erase(lockId)); + Committed.emplace(lockId); + } + + void AddBrokeOnCommit(const THashSet& lockIds) { + BrokeOnCommit.insert(lockIds.begin(), lockIds.end()); + } + + void AddNotificationsOnCommit(const THashSet& lockIds) { + NotifyOnCommit.insert(lockIds.begin(), lockIds.end()); + } + + void RemoveInteractions(NOlap::NTxInteractions::TInteractionsContext& context) const { + for (auto&& i : Events) { + i.RemoveFromInteraction(context); + } + } + + TLockFeatures(const ui64 lockId, const ui64 gen) + : LockId(lockId) + , Generation(gen) { + SharingInfo = std::make_shared(lockId, gen); + } +}; + +class TOperationsManager { + NOlap::NTxInteractions::TInteractionsContext InteractionsContext; + + THashMap Tx2Lock; + THashMap InsertWriteIdToOpWriteId; + THashMap LockFeatures; + THashMap Operations; + TOperationWriteId LastWriteId = TOperationWriteId(0); + +public: + + TWriteOperation::TPtr GetOperationByInsertWriteIdVerified(const TInsertWriteId insertWriteId) const { + auto it = InsertWriteIdToOpWriteId.find(insertWriteId); + AFL_VERIFY(it != InsertWriteIdToOpWriteId.end()); + return GetOperationVerified(it->second); + } + + void LinkInsertWriteIdToOperationWriteId(const std::vector& insertions, const TOperationWriteId operationId) { + for (auto&& i : insertions) { + InsertWriteIdToOpWriteId.emplace(i, operationId); + } + } + bool Load(NTabletFlatExecutor::TTransactionContext& txc); + void AddEventForTx(TColumnShard& owner, const ui64 txId, const std::shared_ptr& writer); + void AddEventForLock(TColumnShard& owner, const ui64 lockId, const std::shared_ptr& writer); + + TWriteOperation::TPtr GetOperation(const TOperationWriteId writeId) const; + TWriteOperation::TPtr GetOperationVerified(const TOperationWriteId writeId) const { + return TValidator::CheckNotNull(GetOperationOptional(writeId)); + } + TWriteOperation::TPtr GetOperationOptional(const TOperationWriteId writeId) const { + return GetOperation(writeId); + } + void CommitTransactionOnExecute( + TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot); + void CommitTransactionOnComplete( + TColumnShard& owner, const ui64 txId, const NOlap::TSnapshot& snapshot); + void AddTemporaryTxLink(const ui64 lockId) { + AFL_VERIFY(Tx2Lock.emplace(lockId, lockId).second); + } + void LinkTransactionOnExecute(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void LinkTransactionOnComplete(const ui64 lockId, const ui64 txId); + void AbortTransactionOnExecute(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void AbortTransactionOnComplete(TColumnShard& owner, const ui64 txId); + + std::optional GetLockForTx(const ui64 txId) const; + std::optional GetLockForTxOptional(const ui64 txId) const { + return GetLockForTx(txId); + } + TLockFeatures* GetLockFeaturesForTxOptional(const ui64 txId) { + auto lockId = GetLockForTxOptional(txId); + if (!lockId) { + return nullptr; + } + return &GetLockVerified(*lockId); + } + TLockFeatures& GetLockFeaturesForTxVerified(const ui64 txId) { + auto lockId = GetLockForTxOptional(txId); + AFL_VERIFY(lockId); + return GetLockVerified(*lockId); + } + ui64 GetLockForTxVerified(const ui64 txId) const { + auto result = GetLockForTxOptional(txId); + AFL_VERIFY(result)("tx_id", txId); + return *result; + } + + TWriteOperation::TPtr RegisterOperation( + const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); + bool RegisterLock(const ui64 lockId, const ui64 generationId) { + if (LockFeatures.contains(lockId)) { + return false; + } else { + LockFeatures.emplace(lockId, TLockFeatures(lockId, generationId)); + return true; + } + } + static TConclusion GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite); + TLockFeatures& GetLockVerified(const ui64 lockId) { + auto result = GetLockOptional(lockId); + AFL_VERIFY(result)("lock_id", lockId); + return *result; + } + + TLockFeatures* GetLockOptional(const ui64 lockId) { + auto it = LockFeatures.find(lockId); + if (it != LockFeatures.end()) { + return &it->second; + } else { + return nullptr; + } + } + + TOperationsManager(); + +private: + TOperationWriteId BuildNextOperationWriteId(); + void RemoveOperationOnExecute(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc); + void RemoveOperationOnComplete(const TWriteOperation::TPtr& op); + void OnTransactionFinishOnExecute(const TVector& operations, const TLockFeatures& lock, const ui64 txId, + NTabletFlatExecutor::TTransactionContext& txc); + void OnTransactionFinishOnComplete(const TVector& operations, const TLockFeatures& lock, const ui64 txId); +}; +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp b/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp index a5daa4f5d281..f7c3ac8715ab 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp +++ b/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp @@ -27,11 +27,11 @@ std::optional> TBuildSlicesTask:: return result; } -void TBuildSlicesTask::ReplyError(const TString& message) { +void TBuildSlicesTask::ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { auto writeDataPtr = std::make_shared(std::move(WriteData)); TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); auto result = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error( - NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message); + NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); TActorContext::AsActorContext().Send(ParentActorId, result.release()); } @@ -39,19 +39,21 @@ TConclusionStatus TBuildSlicesTask::DoExecute(const std::shared_ptr& /*ta NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletId)("parent_id", ParentActorId)); if (!OriginalBatch) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "ev_write_bad_data")("write_id", WriteData.GetWriteMeta().GetWriteId())("table_id", WriteData.GetWriteMeta().GetTableId()); - ReplyError("no data in batch"); + ReplyError("no data in batch", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); return TConclusionStatus::Fail("no data in batch"); } const auto& indexSchema = ActualSchema->GetIndexInfo().ArrowSchema(); - NArrow::TSchemaSubset subset; - auto reorderConclusion = NArrow::TColumnOperator().Adapt(OriginalBatch, indexSchema, &subset); - if (reorderConclusion.IsFail()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unadaptable schemas")("index", indexSchema->ToString())("problem", reorderConclusion.GetErrorMessage()); - ReplyError("cannot reorder schema: " + reorderConclusion.GetErrorMessage()); - return TConclusionStatus::Fail("cannot reorder schema: " + reorderConclusion.GetErrorMessage()); - } else { - OriginalBatch = reorderConclusion.DetachResult(); + auto subsetConclusion = NArrow::TColumnOperator().BuildSequentialSubset(OriginalBatch, indexSchema); + if (subsetConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unadaptable schemas")("index", indexSchema->ToString())( + "problem", subsetConclusion.GetErrorMessage()); + ReplyError( + "unadaptable schema: " + subsetConclusion.GetErrorMessage(), + NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("cannot reorder schema: " + subsetConclusion.GetErrorMessage()); } + NArrow::TSchemaSubset subset = subsetConclusion.DetachResult(); + if (OriginalBatch->num_columns() != indexSchema->num_fields()) { AFL_VERIFY(OriginalBatch->num_columns() < indexSchema->num_fields())("original", OriginalBatch->num_columns())( "index", indexSchema->num_fields()); @@ -70,10 +72,10 @@ TConclusionStatus TBuildSlicesTask::DoExecute(const std::shared_ptr& /*ta if (batches) { auto writeDataPtr = std::make_shared(std::move(WriteData)); writeDataPtr->SetSchemaSubset(std::move(subset)); - auto result = std::make_unique(writeDataPtr, std::move(*batches)); + auto result = std::make_unique(writeDataPtr, std::move(*batches), OriginalBatch); TActorContext::AsActorContext().Send(BufferActorId, result.release()); } else { - ReplyError("Cannot slice input to batches"); + ReplyError("Cannot slice input to batches", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); return TConclusionStatus::Fail("Cannot slice input to batches"); } diff --git a/ydb/core/tx/columnshard/operations/slice_builder/builder.h b/ydb/core/tx/columnshard/operations/slice_builder/builder.h index bd5c59064265..a22b0c7d6ca7 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder/builder.h +++ b/ydb/core/tx/columnshard/operations/slice_builder/builder.h @@ -1,8 +1,9 @@ #pragma once +#include +#include #include #include #include -#include namespace NKikimr::NOlap { @@ -15,7 +16,8 @@ class TBuildSlicesTask: public NConveyor::ITask { std::shared_ptr OriginalBatch; std::optional> BuildSlices(); const std::shared_ptr ActualSchema; - void ReplyError(const TString& message); + void ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); + protected: virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) override; @@ -24,16 +26,14 @@ class TBuildSlicesTask: public NConveyor::ITask { return "Write::ConstructBlobs::Slices"; } - TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& batch, - const std::shared_ptr& actualSchema) + TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& batch, const std::shared_ptr& actualSchema) : WriteData(std::move(writeData)) , TabletId(tabletId) , ParentActorId(parentActorId) , BufferActorId(bufferActorId) , OriginalBatch(batch) - , ActualSchema(actualSchema) - { + , ActualSchema(actualSchema) { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp index 4ff54d395e73..4a78726ca457 100644 --- a/ydb/core/tx/columnshard/operations/write.cpp +++ b/ydb/core/tx/columnshard/operations/write.cpp @@ -1,6 +1,7 @@ -#include "batch_builder/builder.h" #include "write.h" +#include "batch_builder/builder.h" + #include #include #include @@ -11,272 +12,111 @@ namespace NKikimr::NColumnShard { - TWriteOperation::TWriteOperation(const TWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, - const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) - : Status(status) - , CreatedAt(createdAt) - , WriteId(writeId) - , LockId(lockId) - , Cookie(cookie) - , GranuleShardingVersionId(granuleShardingVersionId) - , ModificationType(mType) - { - } - - void TWriteOperation::Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, - const NActors::TActorId& source, const std::shared_ptr& schema, const TActorContext& ctx) { - Y_ABORT_UNLESS(Status == EOperationStatus::Draft); - - NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source, GranuleShardingVersionId); - writeMeta.SetModificationType(ModificationType); - std::shared_ptr task = std::make_shared(owner.TabletID(), ctx.SelfID, owner.BufferizationWriteActorId, +TWriteOperation::TWriteOperation(const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, + const TInstant createdAt, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) + : Status(status) + , CreatedAt(createdAt) + , WriteId(writeId) + , LockId(lockId) + , Cookie(cookie) + , GranuleShardingVersionId(granuleShardingVersionId) + , ModificationType(mType) { +} + +void TWriteOperation::Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, + const std::shared_ptr& schema, const TActorContext& ctx) { + Y_ABORT_UNLESS(Status == EOperationStatus::Draft); + + NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source, GranuleShardingVersionId); + writeMeta.SetLockId(LockId); + writeMeta.SetModificationType(ModificationType); + std::shared_ptr task = + std::make_shared(owner.TabletID(), ctx.SelfID, owner.BufferizationWriteActorId, NEvWrite::TWriteData(writeMeta, data, owner.TablesManager.GetPrimaryIndex()->GetReplaceKey(), owner.StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING_OPERATOR)), schema, owner.GetLastTxSnapshot()); - NConveyor::TCompServiceOperator::SendTaskToExecute(task); - - Status = EOperationStatus::Started; - } - - void TWriteOperation::Commit(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { - Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - - TBlobGroupSelector dsGroupSelector(owner.Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - - for (auto gWriteId : GlobalWriteIds) { - auto pathExists = [&](ui64 pathId) { - return owner.TablesManager.HasTable(pathId); - }; - - auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), { gWriteId }, - pathExists); - - owner.IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); - owner.IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); - owner.IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); - } - owner.UpdateInsertTableCounters(); - } - - void TWriteOperation::OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const TVector& globalWriteIds) { - Y_ABORT_UNLESS(Status == EOperationStatus::Started); - Status = EOperationStatus::Prepared; - GlobalWriteIds = globalWriteIds; - - NIceDb::TNiceDb db(txc.DB); - NKikimrTxColumnShard::TInternalOperationData proto; - ToProto(proto); - - TString metadata; - Y_ABORT_UNLESS(proto.SerializeToString(&metadata)); - - db.Table().Key((ui64)WriteId).Update( - NIceDb::TUpdate((ui32)Status), - NIceDb::TUpdate(CreatedAt.Seconds()), - NIceDb::TUpdate(metadata), - NIceDb::TUpdate(LockId), - NIceDb::TUpdate(Cookie), - NIceDb::TUpdate(GranuleShardingVersionId.value_or(0))); - } + NConveyor::TCompServiceOperator::SendTaskToExecute(task); - void TWriteOperation::ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const { - for (auto&& writeId : GlobalWriteIds) { - proto.AddInternalWriteIds((ui64)writeId); - } - proto.SetModificationType((ui32)ModificationType); - } + Status = EOperationStatus::Started; +} - void TWriteOperation::FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto) { - for (auto&& writeId : proto.GetInternalWriteIds()) { - GlobalWriteIds.push_back(TWriteId(writeId)); - } - if (proto.HasModificationType()) { - ModificationType = (NEvWrite::EModificationType)proto.GetModificationType(); - } else { - ModificationType = NEvWrite::EModificationType::Replace; - } - } +void TWriteOperation::CommitOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - void TWriteOperation::Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const { - Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + TBlobGroupSelector dsGroupSelector(owner.Info()); + NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - TBlobGroupSelector dsGroupSelector(owner.Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); + for (auto gWriteId : InsertWriteIds) { + auto pathExists = [&](ui64 pathId) { + return owner.TablesManager.HasTable(pathId); + }; - THashSet writeIds; - writeIds.insert(GlobalWriteIds.begin(), GlobalWriteIds.end()); - owner.InsertTable->Abort(dbTable, writeIds); + const auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), { gWriteId }, pathExists); + owner.Counters.GetTabletCounters()->OnWriteCommitted(counters); } +} - bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) { - NIceDb::TNiceDb db(txc.DB); - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return false; - } - - while (!rowset.EndOfSet()) { - const TWriteId writeId = (TWriteId)rowset.GetValue(); - const ui64 createdAtSec = rowset.GetValue(); - const ui64 lockId = rowset.GetValue(); - const ui64 cookie = rowset.GetValueOrDefault(0); - const TString metadata = rowset.GetValue(); - const EOperationStatus status = (EOperationStatus)rowset.GetValue(); - std::optional granuleShardingVersionId; - if (rowset.HaveValue() && rowset.GetValue()) { - granuleShardingVersionId = rowset.GetValue(); - } +void TWriteOperation::CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& /*snapshot*/) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + owner.UpdateInsertTableCounters(); +} - NKikimrTxColumnShard::TInternalOperationData metaProto; - Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); +void TWriteOperation::OnWriteFinish( + NTabletFlatExecutor::TTransactionContext& txc, const std::vector& insertWriteIds, const bool ephemeralFlag) { + Y_ABORT_UNLESS(Status == EOperationStatus::Started); + Status = EOperationStatus::Prepared; + InsertWriteIds = insertWriteIds; - auto operation = std::make_shared(writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec), granuleShardingVersionId, NEvWrite::EModificationType::Upsert); - operation->FromProto(metaProto); - AFL_VERIFY(operation->GetStatus() != EOperationStatus::Draft); - - auto [_, isOk] = Operations.emplace(operation->GetWriteId(), operation); - if (!isOk) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "duplicated_operation")("operation", *operation); - return false; - } - Locks[lockId].push_back(operation->GetWriteId()); - LastWriteId = std::max(LastWriteId, operation->GetWriteId()); - if (!rowset.Next()) { - return false; - } - } - } - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return false; - } - - while (!rowset.EndOfSet()) { - const ui64 lockId = rowset.GetValue(); - const ui64 txId = rowset.GetValue(); - AFL_VERIFY(Locks.contains(lockId))("lock_id", lockId); - Tx2Lock[txId] = lockId; - if (!rowset.Next()) { - return false; - } - } - } - return true; + if (ephemeralFlag) { + return; } - bool TOperationsManager::CommitTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)); - auto lockId = GetLockForTx(txId); - if (!lockId) { - ACFL_ERROR("details", "unknown_transaction"); - return true; - } - auto tIt = Locks.find(*lockId); - AFL_VERIFY(tIt != Locks.end())("tx_id", txId)("lock_id", *lockId); + NIceDb::TNiceDb db(txc.DB); + NKikimrTxColumnShard::TInternalOperationData proto; + ToProto(proto); - TVector commited; - for (auto&& opId : tIt->second) { - auto opPtr = Operations.FindPtr(opId); - (*opPtr)->Commit(owner, txc, snapshot); - commited.emplace_back(*opPtr); - } - OnTransactionFinish(commited, txId, txc); - return true; - } - - bool TOperationsManager::AbortTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)); - - auto lockId = GetLockForTx(txId); - if (!lockId) { - ACFL_ERROR("details", "unknown_transaction"); - return true; - } - auto tIt = Locks.find(*lockId); - AFL_VERIFY(tIt != Locks.end())("tx_id", txId)("lock_id", *lockId); - - TVector aborted; - for (auto&& opId : tIt->second) { - auto opPtr = Operations.FindPtr(opId); - (*opPtr)->Abort(owner, txc); - aborted.emplace_back(*opPtr); - } - - OnTransactionFinish(aborted, txId, txc); - return true; - } + TString metadata; + Y_ABORT_UNLESS(proto.SerializeToString(&metadata)); - TWriteOperation::TPtr TOperationsManager::GetOperation(const TWriteId writeId) const { - auto it = Operations.find(writeId); - if (it == Operations.end()) { - return nullptr; - } - return it->second; - } - - void TOperationsManager::OnTransactionFinish(const TVector& operations, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - auto lockId = GetLockForTx(txId); - AFL_VERIFY(!!lockId)("tx_id", txId); - Locks.erase(*lockId); - Tx2Lock.erase(txId); - for (auto&& op : operations) { - RemoveOperation(op, txc); - } - NIceDb::TNiceDb db(txc.DB); - db.Table().Key(txId, *lockId).Delete(); - } - - void TOperationsManager::RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc) { - Operations.erase(op->GetWriteId()); - NIceDb::TNiceDb db(txc.DB); - db.Table().Key((ui64)op->GetWriteId()).Delete(); - } + db.Table() + .Key((ui64)WriteId) + .Update(NIceDb::TUpdate((ui32)Status), NIceDb::TUpdate(CreatedAt.Seconds()), + NIceDb::TUpdate(metadata), NIceDb::TUpdate(LockId), + NIceDb::TUpdate(Cookie), + NIceDb::TUpdate(GranuleShardingVersionId.value_or(0))); +} - TWriteId TOperationsManager::BuildNextWriteId() { - return ++LastWriteId; +void TWriteOperation::ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const { + for (auto&& writeId : InsertWriteIds) { + proto.AddInternalWriteIds((ui64)writeId); } + proto.SetModificationType((ui32)ModificationType); +} - std::optional TOperationsManager::GetLockForTx(const ui64 txId) const { - auto lockIt = Tx2Lock.find(txId); - if (lockIt != Tx2Lock.end()) { - return lockIt->second; - } - return std::nullopt; +void TWriteOperation::FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto) { + for (auto&& writeId : proto.GetInternalWriteIds()) { + InsertWriteIds.push_back(TInsertWriteId(writeId)); } - - void TOperationsManager::LinkTransaction(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - Tx2Lock[txId] = lockId; - NIceDb::TNiceDb db(txc.DB); - db.Table().Key(txId, lockId).Update(); + if (proto.HasModificationType()) { + ModificationType = (NEvWrite::EModificationType)proto.GetModificationType(); + } else { + ModificationType = NEvWrite::EModificationType::Replace; } +} - TWriteOperation::TPtr TOperationsManager::RegisterOperation(const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) { - auto writeId = BuildNextWriteId(); - auto operation = std::make_shared(writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now(), granuleShardingVersionId, mType); - Y_ABORT_UNLESS(Operations.emplace(operation->GetWriteId(), operation).second); - Locks[operation->GetLockId()].push_back(operation->GetWriteId()); - return operation; - } +void TWriteOperation::AbortOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - EOperationBehaviour TOperationsManager::GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite) { - if (evWrite.Record.HasTxId() && evWrite.Record.HasLocks() && evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Commit) { - return EOperationBehaviour::CommitWriteLock; - } + TBlobGroupSelector dsGroupSelector(owner.Info()); + NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - if (evWrite.Record.HasLockTxId() && evWrite.Record.HasLockNodeId()) { - if (evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { - return EOperationBehaviour::WriteWithLock; - } + THashSet writeIds; + writeIds.insert(InsertWriteIds.begin(), InsertWriteIds.end()); + owner.InsertTable->Abort(dbTable, writeIds); +} - return EOperationBehaviour::Undefined; - } +void TWriteOperation::AbortOnComplete(TColumnShard& /*owner*/) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); +} - if (evWrite.Record.HasTxId() && evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_PREPARE) { - return EOperationBehaviour::InTxWrite; - } - return EOperationBehaviour::Undefined; - } } // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/write.h b/ydb/core/tx/columnshard/operations/write.h index b72827af6c58..ad22caa651d4 100644 --- a/ydb/core/tx/columnshard/operations/write.h +++ b/ydb/core/tx/columnshard/operations/write.h @@ -1,95 +1,83 @@ #pragma once -#include -#include +#include +#include #include -#include #include -#include +#include +#include +#include -#include #include #include -#include +#include namespace NKikimr::NTabletFlatExecutor { - class TTransactionContext; +class TTransactionContext; +} + +namespace NKikimr::NOlap::NTxInteractions { +class TManager; } namespace NKikimr::NColumnShard { - class TColumnShard; - - using TWriteId = NOlap::TWriteId; - - enum class EOperationStatus : ui32 { - Draft = 1, - Started = 2, - Prepared = 3 - }; - - enum class EOperationBehaviour : ui32 { - Undefined = 1, - InTxWrite = 2, - WriteWithLock = 3, - CommitWriteLock = 4 - }; - - class TWriteOperation { - YDB_READONLY(EOperationStatus, Status, EOperationStatus::Draft); - YDB_READONLY_DEF(TInstant, CreatedAt); - YDB_READONLY_DEF(TWriteId, WriteId); - YDB_READONLY(ui64, LockId, 0); - YDB_READONLY(ui64, Cookie, 0); - YDB_READONLY_DEF(TVector, GlobalWriteIds); - YDB_ACCESSOR(EOperationBehaviour, Behaviour, EOperationBehaviour::Undefined); - YDB_READONLY_DEF(std::optional, GranuleShardingVersionId); - YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); - public: - using TPtr = std::shared_ptr; - - TWriteOperation(const TWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); - - void Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, - const NActors::TActorId& source, const std::shared_ptr& schema, const TActorContext& ctx); - void OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const TVector& globalWriteIds); - void Commit(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const; - void Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const; - - void Out(IOutputStream& out) const { - out << "write_id=" << (ui64) WriteId << ";lock_id=" << LockId; - } - - void ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const; - void FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto); - }; - - class TOperationsManager { - TMap> Locks; - TMap Tx2Lock; - TMap Operations; - TWriteId LastWriteId = TWriteId(0); - - public: - bool Load(NTabletFlatExecutor::TTransactionContext& txc); - - TWriteOperation::TPtr GetOperation(const TWriteId writeId) const; - bool CommitTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot); - bool AbortTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - void LinkTransaction(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - std::optional GetLockForTx(const ui64 lockId) const; - - TWriteOperation::TPtr RegisterOperation(const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); - static EOperationBehaviour GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite); - - private: - TWriteId BuildNextWriteId(); - void RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc); - void OnTransactionFinish(const TVector& operations, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - }; -} +class TColumnShard; + +using TOperationWriteId = NOlap::TOperationWriteId; +using TInsertWriteId = NOlap::TInsertWriteId; + +enum class EOperationStatus : ui32 { + Draft = 1, + Started = 2, + Prepared = 3 +}; + +enum class EOperationBehaviour : ui32 { + Undefined = 1, + InTxWrite = 2, + WriteWithLock = 3, + CommitWriteLock = 4, + AbortWriteLock = 5, + NoTxWrite = 6 +}; + +class TWriteOperation { + YDB_READONLY(EOperationStatus, Status, EOperationStatus::Draft); + YDB_READONLY_DEF(TInstant, CreatedAt); + YDB_READONLY_DEF(TOperationWriteId, WriteId); + YDB_READONLY(ui64, LockId, 0); + YDB_READONLY(ui64, Cookie, 0); + YDB_READONLY_DEF(std::vector, InsertWriteIds); + YDB_ACCESSOR(EOperationBehaviour, Behaviour, EOperationBehaviour::Undefined); + YDB_READONLY_DEF(std::optional, GranuleShardingVersionId); + YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); + +public: + using TPtr = std::shared_ptr; + + TWriteOperation(const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, + const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); + + void Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, + const std::shared_ptr& schema, const TActorContext& ctx); + void OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const std::vector& insertWriteIds, const bool ephemeralFlag); + void CommitOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const; + void CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& snapshot) const; + void AbortOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const; + void AbortOnComplete(TColumnShard& owner) const; + + void Out(IOutputStream& out) const { + out << "write_id=" << (ui64)WriteId << ";lock_id=" << LockId; + } + + void ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const; + void FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto); +}; + +} // namespace NKikimr::NColumnShard template <> inline void Out(IOutputStream& o, const NKikimr::NColumnShard::TWriteOperation& x) { diff --git a/ydb/core/tx/columnshard/operations/write_data.cpp b/ydb/core/tx/columnshard/operations/write_data.cpp index a0cd6450f534..56a0ad5e16cb 100644 --- a/ydb/core/tx/columnshard/operations/write_data.cpp +++ b/ydb/core/tx/columnshard/operations/write_data.cpp @@ -31,7 +31,7 @@ bool TArrowData::Parse(const NKikimrDataEvents::TEvWrite_TOperation& proto, cons TConclusion> TArrowData::ExtractBatch() { Y_ABORT_UNLESS(!!IncomingData); - auto result = NArrow::DeserializeBatch(IncomingData, BatchSchema->GetSchema()); + auto result = NArrow::DeserializeBatch(IncomingData, std::make_shared(BatchSchema->GetSchema()->fields())); IncomingData = ""; return result; } diff --git a/ydb/core/tx/columnshard/operations/ya.make b/ydb/core/tx/columnshard/operations/ya.make index c626a22b508b..c0bd3f234b78 100644 --- a/ydb/core/tx/columnshard/operations/ya.make +++ b/ydb/core/tx/columnshard/operations/ya.make @@ -3,6 +3,7 @@ LIBRARY() SRCS( write.cpp write_data.cpp + manager.cpp ) PEERDIR( @@ -11,6 +12,7 @@ PEERDIR( ydb/services/metadata ydb/core/tx/columnshard/data_sharing/destination/events ydb/core/tx/columnshard/data_reader + ydb/core/tx/columnshard/transactions/locks ydb/core/tx/columnshard/operations/batch_builder ydb/core/tx/columnshard/operations/slice_builder ) diff --git a/ydb/core/tx/columnshard/resource_subscriber/counters.cpp b/ydb/core/tx/columnshard/resource_subscriber/counters.cpp index 785ca04ba8f9..45f785c10223 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/counters.cpp +++ b/ydb/core/tx/columnshard/resource_subscriber/counters.cpp @@ -1,9 +1,12 @@ #include "counters.h" +#include + namespace NKikimr::NOlap::NResourceBroker::NSubscribe { std::shared_ptr TSubscriberCounters::GetTypeCounters(const TString& resourceType) { + TGuard lock(Mutex); auto it = ResourceTypeCounters.find(resourceType); if (it == ResourceTypeCounters.end()) { it = ResourceTypeCounters.emplace(resourceType, std::make_shared(*this, resourceType)).first; diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp index 646a458638dd..1e66bfb46e07 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp +++ b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp @@ -4,23 +4,20 @@ namespace NKikimr::NOlap { -TSimpleChunkMeta::TSimpleChunkMeta(const std::shared_ptr& column, const bool needMax, const bool isSortedColumn) { +TSimpleChunkMeta::TSimpleChunkMeta( + const std::shared_ptr& column, const bool needMax, const bool isSortedColumn) { Y_ABORT_UNLESS(column); - Y_ABORT_UNLESS(column->length()); - NumRows = column->length(); - RawBytes = NArrow::GetArrayDataSize(column); + Y_ABORT_UNLESS(column->GetRecordsCount()); + NumRows = column->GetRecordsCount(); + RawBytes = column->GetRawSizeVerified(); if (needMax) { - std::pair minMaxPos = {0, (column->length() - 1)}; if (!isSortedColumn) { - minMaxPos = NArrow::FindMinMaxPosition(column); - Y_ABORT_UNLESS(minMaxPos.first >= 0); - Y_ABORT_UNLESS(minMaxPos.second >= 0); + Max = column->GetMaxScalar(); + } else { + Max = column->GetScalar(column->GetRecordsCount() - 1); } - - Max = NArrow::GetScalar(column, minMaxPos.second); - - Y_ABORT_UNLESS(Max); +// AFL_VERIFY(Max); } } diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h index 8f8f902e4095..0e54b4c9e42a 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h +++ b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h @@ -1,4 +1,6 @@ #pragma once +#include + #include #include @@ -17,7 +19,7 @@ class TSimpleChunkMeta { ui32 RawBytes = 0; TSimpleChunkMeta() = default; public: - TSimpleChunkMeta(const std::shared_ptr& column, const bool needMinMax, const bool isSortedColumn); + TSimpleChunkMeta(const std::shared_ptr& column, const bool needMinMax, const bool isSortedColumn); ui64 GetMetadataSize() const { return sizeof(ui32) + sizeof(ui32) + 8 * 3 * 2; diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunks.h b/ydb/core/tx/columnshard/splitter/abstract/chunks.h index 3463fcea0374..d0300915f098 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunks.h +++ b/ydb/core/tx/columnshard/splitter/abstract/chunks.h @@ -15,7 +15,6 @@ namespace NKikimr::NOlap { class TPortionInfo; class TPortionInfoConstructor; class TSimpleColumnInfo; -class TColumnSaver; class IPortionDataChunk { private: diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/batch_slice.cpp index 15fd2506e3ba..7f6cc05c1e7b 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.cpp +++ b/ydb/core/tx/columnshard/splitter/batch_slice.cpp @@ -1,5 +1,4 @@ #include "batch_slice.h" -#include "simple.h" #include namespace NKikimr::NOlap { @@ -148,7 +147,8 @@ bool TGeneralSerializedSlice::GroupBlobsImpl(const NSplitter::TGroupFeatures& fe return true; } -TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) +TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap>>& data, + NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) : Schema(schema) , Counters(counters) { std::optional recordsCount; @@ -169,60 +169,14 @@ TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap counters) +TGeneralSerializedSlice::TGeneralSerializedSlice( + const ui32 recordsCount, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) : RecordsCount(recordsCount) , Schema(schema) , Counters(counters) { } -TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, - const NSplitter::TSplitSettings& settings) - : TBase(TValidator::CheckNotNull(batch)->num_rows(), schema, counters) - , Batch(batch) -{ - Y_ABORT_UNLESS(batch); - Data.reserve(batch->num_columns()); - for (auto&& i : batch->schema()->fields()) { - TSplittedEntity c(schema->GetColumnId(i->name())); - Data.emplace_back(std::move(c)); - } - - ui32 idx = 0; - for (auto&& i : batch->columns()) { - auto& c = Data[idx]; - auto columnSaver = schema->GetColumnSaver(c.GetEntityId()); - auto stats = schema->GetColumnSerializationStats(c.GetEntityId()); - TSimpleSplitter splitter(columnSaver, Counters); - splitter.SetStats(stats); - std::vector> chunks; - for (auto&& i : splitter.Split(i, Schema->GetField(c.GetEntityId()), settings.GetMaxBlobSize())) { - chunks.emplace_back(std::make_shared(c.GetEntityId(), i, Schema)); - } - c.SetChunks(chunks); - Size += c.GetSize(); - ++idx; - } -} - -std::vector TBatchSerializedSlice::BuildSimpleSlices(const std::shared_ptr& batch, const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo) { - std::vector slices; - auto stats = schemaInfo->GetBatchSerializationStats(batch); - ui32 recordsCount = settings.GetMinRecordsCount(); - if (stats) { - const ui32 recordsCountForMinSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMinBlobSize()).value_or(recordsCount); - const ui32 recordsCountForMaxPortionSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMaxPortionSize()).value_or(recordsCount); - recordsCount = std::min(recordsCountForMaxPortionSize, std::max(recordsCount, recordsCountForMinSize)); - } - auto linearSplitInfo = TSimpleSplitter::GetOptimalLinearSplitting(batch->num_rows(), recordsCount); - for (auto it = linearSplitInfo.StartIterator(); it.IsValid(); it.Next()) { - std::shared_ptr current = batch->Slice(it.GetPosition(), it.GetCurrentPackSize()); - TBatchSerializedSlice slice(current, schemaInfo, counters, settings); - slices.emplace_back(std::move(slice)); - } - return slices; -} - void TGeneralSerializedSlice::MergeSlice(TGeneralSerializedSlice&& slice) { Y_ABORT_UNLESS(Data.size() == slice.Data.size()); RecordsCount += slice.GetRecordsCount(); diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.h b/ydb/core/tx/columnshard/splitter/batch_slice.h index 00a27a3d4739..3b2dd6f1bf8e 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.h +++ b/ydb/core/tx/columnshard/splitter/batch_slice.h @@ -1,10 +1,10 @@ #pragma once #include "chunks.h" -#include "stats.h" -#include "scheme_info.h" #include "column_info.h" #include "blob_info.h" -#include "similar_packer.h" +#include +#include +#include #include #include #include @@ -14,16 +14,17 @@ namespace NKikimr::NOlap { -class TDefaultSchemaDetails: public ISchemaDetailInfo { +class TDefaultSchemaDetails: public NArrow::NSplitter::ISchemaDetailInfo { private: ISnapshotSchema::TPtr Schema; - std::shared_ptr Stats; + std::shared_ptr Stats; + protected: virtual TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { return Schema->GetColumnSaver(columnId); } public: - TDefaultSchemaDetails(ISnapshotSchema::TPtr schema, const std::shared_ptr& stats) + TDefaultSchemaDetails(ISnapshotSchema::TPtr schema, const std::shared_ptr& stats) : Schema(schema) , Stats(stats) { @@ -39,14 +40,15 @@ class TDefaultSchemaDetails: public ISchemaDetailInfo { return Schema->GetIndexInfo().IsSortedColumn(columnId); } - virtual std::optional GetColumnSerializationStats(const ui32 columnId) const override { + virtual std::optional GetColumnSerializationStats(const ui32 columnId) const override { auto stats = Stats->GetColumnInfo(columnId); if (stats && stats->GetRecordsCount() != 0) { return stats; } return std::nullopt; } - virtual std::optional GetBatchSerializationStats(const std::shared_ptr& rb) const override { + virtual std::optional GetBatchSerializationStats( + const std::shared_ptr& rb) const override { return Stats->GetStatsForRecordBatch(rb); } virtual ui32 GetColumnId(const std::string& fieldName) const override { @@ -61,7 +63,7 @@ class TGeneralSerializedSlice { protected: std::vector Data; ui64 Size = 0; - ISchemaDetailInfo::TPtr Schema; + NArrow::NSplitter::ISchemaDetailInfo::TPtr Schema; std::shared_ptr Counters; TGeneralSerializedSlice() = default; @@ -116,15 +118,17 @@ class TGeneralSerializedSlice { return blobs; } - explicit TGeneralSerializedSlice(TVectorView&& objects) { + explicit TGeneralSerializedSlice(NArrow::NSplitter::TVectorView&& objects) { Y_ABORT_UNLESS(objects.size()); std::swap(*this, objects.front()); for (ui32 i = 1; i < objects.size(); ++i) { MergeSlice(std::move(objects[i])); } } - TGeneralSerializedSlice(const THashMap>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); - TGeneralSerializedSlice(const ui32 recordsCount, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); + TGeneralSerializedSlice(const THashMap>>& data, + NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); + TGeneralSerializedSlice( + const ui32 recordsCount, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); void MergeSlice(TGeneralSerializedSlice&& slice); @@ -135,28 +139,4 @@ class TGeneralSerializedSlice { } }; -class TBatchSerializedSlice: public TGeneralSerializedSlice { -private: - using TBase = TGeneralSerializedSlice; - YDB_READONLY_DEF(std::shared_ptr, Batch); -public: - TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, const NSplitter::TSplitSettings& settings); - - explicit TBatchSerializedSlice(TVectorView&& objects) { - Y_ABORT_UNLESS(objects.size()); - std::swap(*this, objects.front()); - for (ui32 i = 1; i < objects.size(); ++i) { - MergeSlice(std::move(objects[i])); - } - } - void MergeSlice(TBatchSerializedSlice&& slice) { - Batch = NArrow::CombineBatches({Batch, slice.Batch}); - TBase::MergeSlice(std::move(slice)); - } - - static std::vector BuildSimpleSlices(const std::shared_ptr& batch, const NSplitter::TSplitSettings& settings, - const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo); - -}; - } diff --git a/ydb/core/tx/columnshard/splitter/chunks.h b/ydb/core/tx/columnshard/splitter/chunks.h index 8ee403ccb24f..021ea7e47a4f 100644 --- a/ydb/core/tx/columnshard/splitter/chunks.h +++ b/ydb/core/tx/columnshard/splitter/chunks.h @@ -27,8 +27,10 @@ class IPortionColumnChunk : public IPortionDataChunk { virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; - virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; + virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, + const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; + virtual std::vector> DoInternalSplit(const TColumnSaver& saver, + const std::shared_ptr& counters, const std::vector& splitSizes) const override; virtual bool DoIsSplittable() const override { return GetRecordsCount() > 1; } @@ -53,7 +55,8 @@ class TChunkedColumnReader { std::vector> Chunks; std::shared_ptr Loader; - std::shared_ptr CurrentChunk; + std::shared_ptr CurrentChunk; + std::optional CurrentChunkArray; ui32 CurrentChunkIndex = 0; ui32 CurrentRecordIndex = 0; public: @@ -68,16 +71,29 @@ class TChunkedColumnReader { CurrentChunkIndex = 0; CurrentRecordIndex = 0; if (Chunks.size()) { - CurrentChunk = Loader->ApplyVerifiedColumn(Chunks.front()->GetData()); + CurrentChunk = Loader->ApplyVerified(Chunks.front()->GetData(), Chunks.front()->GetRecordsCountVerified()); + CurrentChunkArray.reset(); } } - const std::shared_ptr& GetCurrentChunk() const { + const std::shared_ptr& GetCurrentChunk() { + if (!CurrentChunkArray || !CurrentChunkArray->GetAddress().Contains(CurrentRecordIndex)) { + CurrentChunkArray = CurrentChunk->GetChunk(CurrentChunkArray, CurrentRecordIndex); + } + AFL_VERIFY(CurrentChunkArray); + return CurrentChunkArray->GetArray(); + } + + const std::shared_ptr& GetCurrentAccessor() const { + AFL_VERIFY(CurrentChunk); return CurrentChunk; } - ui32 GetCurrentRecordIndex() const { - return CurrentRecordIndex; + ui32 GetCurrentRecordIndex() { + if (!CurrentChunkArray || !CurrentChunkArray->GetAddress().Contains(CurrentRecordIndex)) { + CurrentChunkArray = CurrentChunk->GetChunk(CurrentChunkArray->GetAddress(), CurrentRecordIndex); + } + return CurrentChunkArray->GetAddress().GetLocalIndex(CurrentRecordIndex); } bool IsCorrect() const { @@ -86,19 +102,21 @@ class TChunkedColumnReader { bool ReadNextChunk() { while (++CurrentChunkIndex < Chunks.size()) { - CurrentChunk = Loader->ApplyVerifiedColumn(Chunks[CurrentChunkIndex]->GetData()); + CurrentChunk = Loader->ApplyVerified(Chunks[CurrentChunkIndex]->GetData(), Chunks[CurrentChunkIndex]->GetRecordsCountVerified()); + CurrentChunkArray.reset(); CurrentRecordIndex = 0; - if (CurrentRecordIndex < CurrentChunk->length()) { + if (CurrentRecordIndex < CurrentChunk->GetRecordsCount()) { return true; } } + CurrentChunkArray.reset(); CurrentChunk = nullptr; return false; } bool ReadNext() { AFL_VERIFY(!!CurrentChunk); - if (++CurrentRecordIndex < CurrentChunk->length()) { + if (++CurrentRecordIndex < CurrentChunk->GetRecordsCount()) { return true; } return ReadNextChunk(); @@ -163,7 +181,7 @@ class TChunkedBatchReader { std::vector::iterator end() { return Columns.end(); - } + } }; } diff --git a/ydb/core/tx/columnshard/splitter/scheme_info.cpp b/ydb/core/tx/columnshard/splitter/scheme_info.cpp deleted file mode 100644 index fe4a65604e11..000000000000 --- a/ydb/core/tx/columnshard/splitter/scheme_info.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "scheme_info.h" - -namespace NKikimr::NOlap { - -NKikimr::NOlap::TColumnSaver ISchemaDetailInfo::GetColumnSaver(const ui32 columnId) const { - auto saver = DoGetColumnSaver(columnId); - if (OverrideSerializer) { - saver.ResetSerializer(*OverrideSerializer); - } - return saver; -} - -} diff --git a/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp new file mode 100644 index 000000000000..880881238fa1 --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp @@ -0,0 +1,65 @@ +#include "batch_slice.h" + +#include +#include +#include + +#include + +namespace NKikimr::NOlap { + +TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr& batch, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, + std::shared_ptr counters, const NSplitter::TSplitSettings& settings) + : TBase(TValidator::CheckNotNull(batch)->num_rows(), schema, counters) + , Batch(batch) { + Y_ABORT_UNLESS(batch); + Data.reserve(batch->num_columns()); + for (auto&& i : batch->schema()->fields()) { + TSplittedEntity c(schema->GetColumnId(i->name())); + Data.emplace_back(std::move(c)); + } + + ui32 idx = 0; + for (auto&& i : batch->columns()) { + auto& c = Data[idx]; + auto columnSaver = schema->GetColumnSaver(c.GetEntityId()); + auto stats = schema->GetColumnSerializationStats(c.GetEntityId()); + NKikimr::NArrow::NSplitter::TSimpleSplitter splitter(columnSaver); + splitter.SetStats(stats); + std::vector> chunks; + for (auto&& i : splitter.Split(i, Schema->GetField(c.GetEntityId()), settings.GetMaxBlobSize())) { + NOlap::TSimpleColumnInfo columnInfo(c.GetEntityId(), Schema->GetField(c.GetEntityId()), + Schema->GetColumnSaver(c.GetEntityId()).GetSerializer(), true, false, true, nullptr); + chunks.emplace_back(std::make_shared(i.GetSerializedChunk(), + std::make_shared(i.GetSlicedBatch()->column(0)), TChunkAddress(c.GetEntityId(), 0), + columnInfo)); + } + c.SetChunks(chunks); + Size += c.GetSize(); + ++idx; + } +} + +std::vector TBatchSerializedSlice::BuildSimpleSlices(const std::shared_ptr& batch, + const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, + const NArrow::NSplitter::ISchemaDetailInfo::TPtr& schemaInfo) { + std::vector slices; + auto stats = schemaInfo->GetBatchSerializationStats(batch); + ui32 recordsCount = settings.GetMinRecordsCount(); + if (stats) { + const ui32 recordsCountForMinSize = + stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMinBlobSize()).value_or(recordsCount); + const ui32 recordsCountForMaxPortionSize = + stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMaxPortionSize()).value_or(recordsCount); + recordsCount = std::min(recordsCountForMaxPortionSize, std::max(recordsCount, recordsCountForMinSize)); + } + auto linearSplitInfo = NKikimr::NArrow::NSplitter::TSimpleSplitter::GetOptimalLinearSplitting(batch->num_rows(), recordsCount); + for (auto it = linearSplitInfo.StartIterator(); it.IsValid(); it.Next()) { + std::shared_ptr current = batch->Slice(it.GetPosition(), it.GetCurrentPackSize()); + TBatchSerializedSlice slice(current, schemaInfo, counters, settings); + slices.emplace_back(std::move(slice)); + } + return slices; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/splitter/ut/batch_slice.h b/ydb/core/tx/columnshard/splitter/ut/batch_slice.h new file mode 100644 index 000000000000..c085eb1568ce --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/ut/batch_slice.h @@ -0,0 +1,31 @@ +#pragma once +#include + +namespace NKikimr::NOlap { + +class TBatchSerializedSlice: public TGeneralSerializedSlice { +private: + using TBase = TGeneralSerializedSlice; + YDB_READONLY_DEF(std::shared_ptr, Batch); + +public: + TBatchSerializedSlice(const std::shared_ptr& batch, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, + std::shared_ptr counters, const NSplitter::TSplitSettings& settings); + + explicit TBatchSerializedSlice(NArrow::NSplitter::TVectorView&& objects) { + Y_ABORT_UNLESS(objects.size()); + std::swap(*this, objects.front()); + for (ui32 i = 1; i < objects.size(); ++i) { + MergeSlice(std::move(objects[i])); + } + } + void MergeSlice(TBatchSerializedSlice&& slice) { + Batch = NArrow::CombineBatches({ Batch, slice.Batch }); + TBase::MergeSlice(std::move(slice)); + } + + static std::vector BuildSimpleSlices(const std::shared_ptr& batch, + const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, + const NArrow::NSplitter::ISchemaDetailInfo::TPtr& schemaInfo); +}; +} diff --git a/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp b/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp index f2f942dbbab3..06c6f5020f29 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp +++ b/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp @@ -1,29 +1,32 @@ -#include -#include -#include -#include -#include -#include +#include "batch_slice.h" -#include -#include +#include +#include +#include #include #include - -#include +#include +#include +#include +#include +#include +#include +#include #include +#include Y_UNIT_TEST_SUITE(Splitter) { - using namespace NKikimr::NArrow; - class TTestSnapshotSchema: public NKikimr::NOlap::ISchemaDetailInfo { + class TTestSnapshotSchema: public NKikimr::NArrow::NSplitter::ISchemaDetailInfo { private: mutable std::map Decoder; + protected: - virtual NKikimr::NOlap::TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { - return NKikimr::NOlap::TColumnSaver(nullptr, std::make_shared(arrow::ipc::IpcOptions::Defaults())); + virtual NKikimr::NArrow::NAccessor::TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { + return NKikimr::NArrow::NAccessor::TColumnSaver( + nullptr, std::make_shared(arrow::ipc::IpcOptions::Defaults())); } public: @@ -34,17 +37,18 @@ Y_UNIT_TEST_SUITE(Splitter) { return false; } - virtual std::optional GetColumnSerializationStats(const ui32 /*columnId*/) const override { + virtual std::optional GetColumnSerializationStats( + const ui32 /*columnId*/) const override { return {}; } - virtual std::optional GetBatchSerializationStats(const std::shared_ptr& /*rb*/) const override { + virtual std::optional GetBatchSerializationStats( + const std::shared_ptr& /*rb*/) const override { return {}; } - NKikimr::NOlap::TColumnLoader GetColumnLoader(const ui32 columnId) const { - arrow::FieldVector v = {std::make_shared(GetColumnName(columnId), std::make_shared())}; - auto schema = std::make_shared(v); - return NKikimr::NOlap::TColumnLoader(nullptr, NSerialization::TSerializerContainer::GetDefaultSerializer(), schema, nullptr, columnId); + NKikimr::NArrow::NAccessor::TColumnLoader GetColumnLoader(const ui32 columnId) const { + return NKikimr::NArrow::NAccessor::TColumnLoader(nullptr, NSerialization::TSerializerContainer::GetDefaultSerializer(), + NKikimr::NArrow::NAccessor::TConstructorContainer::GetDefaultConstructor(), GetField(columnId), nullptr, columnId); } virtual std::shared_ptr GetField(const ui32 columnId) const override { @@ -80,9 +84,8 @@ Y_UNIT_TEST_SUITE(Splitter) { YDB_ACCESSOR(std::optional, ExpectedInternalSplitsCount, 0); public: - void Execute(std::shared_ptr batch, - const NKikimr::NOlap::NSplitter::TSplitSettings& settings = NKikimr::NOlap::NSplitter::TSplitSettings() - ) { + void Execute(std::shared_ptr batch, + const NKikimr::NOlap::NSplitter::TSplitSettings& settings = NKikimr::NOlap::NSplitter::TSplitSettings()) { using namespace NKikimr::NOlap; NKikimr::NColumnShard::TIndexationCounters counters("test"); std::vector generalSlices; @@ -93,9 +96,9 @@ Y_UNIT_TEST_SUITE(Splitter) { } } - TSimilarPacker packer(settings.GetExpectedPortionSize()); + NKikimr::NArrow::NSplitter::TSimilarPacker packer(settings.GetExpectedPortionSize()); auto packs = packer.Split(generalSlices); - const NSplitter::TEntityGroups groups(settings, "default"); + const NKikimr::NOlap::NSplitter::TEntityGroups groups(settings, "default"); const ui32 portionsCount = packs.size(); ui32 blobsCount = 0; ui32 chunksCount = 0; @@ -129,9 +132,11 @@ Y_UNIT_TEST_SUITE(Splitter) { } portionSize += bSize; AFL_VERIFY(bSize < (ui64)settings.GetMaxBlobSize()); - AFL_VERIFY(bSize * 1.01 > (ui64)settings.GetMinBlobSize() || (packs.size() == 1 && blobsLocal.size() == 1))("blob_size", bSize); + AFL_VERIFY(bSize * 1.01 > (ui64)settings.GetMinBlobSize() || (packs.size() == 1 && blobsLocal.size() == 1))( + "blob_size", bSize); } - AFL_VERIFY(portionSize >= settings.GetExpectedPortionSize() || packs.size() == 1)("size", portionSize)("limit", settings.GetMaxPortionSize()); + AFL_VERIFY(portionSize >= settings.GetExpectedPortionSize() || packs.size() == 1)("size", portionSize)( + "limit", settings.GetMaxPortionSize()); THashMap> entitiesByRecordsCount; ui32 pagesRestore = 0; @@ -141,7 +146,7 @@ Y_UNIT_TEST_SUITE(Splitter) { ui32 count = 0; for (auto&& c : e.second) { auto slice = arr->Slice(count + portionShift, c->GetRecordsCountVerified()); - auto readBatch = *Schema->GetColumnLoader(e.first).Apply(c->GetData()); + auto readBatch = Schema->GetColumnLoader(e.first).ApplyRawVerified(c->GetData()); AFL_VERIFY(slice->length() == readBatch->num_rows()); Y_ABORT_UNLESS(readBatch->column(0)->RangeEquals(*slice, 0, readBatch->num_rows(), 0, arrow::EqualOptions::Defaults())); count += c->GetRecordsCountVerified(); @@ -161,91 +166,107 @@ Y_UNIT_TEST_SUITE(Splitter) { } AFL_VERIFY(portionShift = batch->num_rows()); AFL_VERIFY(pagesSum == generalSlices.size())("sum", pagesSum)("general_slices", generalSlices.size()); - AFL_VERIFY(internalSplitsCount == ExpectedInternalSplitsCount.value_or(internalSplitsCount))("expected", *ExpectedInternalSplitsCount)("real", internalSplitsCount); + AFL_VERIFY(internalSplitsCount == ExpectedInternalSplitsCount.value_or(internalSplitsCount))( + "expected", *ExpectedInternalSplitsCount)("real", internalSplitsCount); AFL_VERIFY(blobsCount == ExpectBlobsCount.value_or(blobsCount))("blobs_count", blobsCount)("expected", *ExpectBlobsCount); AFL_VERIFY(pagesSum == ExpectSlicesCount.value_or(pagesSum))("sum", pagesSum)("expected", *ExpectSlicesCount); - AFL_VERIFY(portionsCount == ExpectPortionsCount.value_or(portionsCount))("portions_count", portionsCount)("expected", *ExpectPortionsCount); + AFL_VERIFY(portionsCount == ExpectPortionsCount.value_or(portionsCount))("portions_count", portionsCount)( + "expected", *ExpectPortionsCount); AFL_VERIFY(chunksCount == ExpectChunksCount.value_or(chunksCount))("chunks_count", chunksCount)("expected", *ExpectChunksCount); - } }; Y_UNIT_TEST(Simple) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(8).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(Small) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 24)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 24)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(Minimal) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(4048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(4048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(1).Execute(batch); } Y_UNIT_TEST(Trivial) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(10048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(10048); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(1).Execute(batch); } Y_UNIT_TEST(BigAndSmall) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 1)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 1)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(8).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(CritSmallPortions) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(80).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(40) - .Execute(batch, NKikimr::NOlap::NSplitter::TSplitSettings().SetMinRecordsCount(1000).SetMaxPortionSize(8000000)); + TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(80).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(40).Execute( + batch, NKikimr::NOlap::NSplitter::TSplitSettings().SetMinRecordsCount(1000).SetMaxPortionSize(8000000)); } Y_UNIT_TEST(Crit) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(8).SetExpectPortionsCount(8).Execute(batch); + TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(8).SetExpectPortionsCount(8).Execute( + batch); } Y_UNIT_TEST(CritSimple) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(72).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(8).Execute(batch); + TSplitTester().SetExpectBlobsCount(72).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(8).Execute( + batch); } - }; diff --git a/ydb/core/tx/columnshard/splitter/ut/ya.make b/ydb/core/tx/columnshard/splitter/ut/ya.make index 24d266bffa8e..c7a6a0be4c0c 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ya.make +++ b/ydb/core/tx/columnshard/splitter/ut/ya.make @@ -18,6 +18,8 @@ PEERDIR( ydb/core/kqp/session_actor ydb/core/tx/tx_proxy ydb/core/tx/columnshard/engines/storage/chunks + ydb/core/tx/columnshard/engines/storage/indexes/max + ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch ydb/core/tx ydb/core/mind ydb/library/yql/minikql/comp_nodes/llvm14 @@ -40,6 +42,7 @@ CFLAGS( SRCS( ut_splitter.cpp + batch_slice.cpp ) END() diff --git a/ydb/core/tx/columnshard/splitter/ya.make b/ydb/core/tx/columnshard/splitter/ya.make index 5f6c60cdf1ff..380d51bca325 100644 --- a/ydb/core/tx/columnshard/splitter/ya.make +++ b/ydb/core/tx/columnshard/splitter/ya.make @@ -3,12 +3,8 @@ LIBRARY() SRCS( batch_slice.cpp chunks.cpp - simple.cpp - similar_packer.cpp - stats.cpp column_info.cpp settings.cpp - scheme_info.cpp blob_info.cpp chunk_meta.cpp ) @@ -17,6 +13,7 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/tx/columnshard/splitter/abstract ydb/core/tx/columnshard/engines/scheme + ydb/core/formats/arrow/splitter ) END() diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp index 4f8cfe4ce4b9..31de6ffef8a5 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp @@ -1,6 +1,6 @@ #include "columnshard_ut_common.h" +#include "shard_reader.h" -#include #include #include #include @@ -125,22 +125,25 @@ bool WriteDataImpl(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shar } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType) { + const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType) { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); - return WriteDataImpl(runtime, sender, shardId, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); - + return WriteDataImpl( + runtime, sender, shardId, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, bool waitResult, std::vector* writeIds, const NEvWrite::EModificationType mType) { + const std::vector& ydbSchema, bool waitResult, std::vector* writeIds, + const NEvWrite::EModificationType mType) { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); if (writeIds) { - return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); + return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, + NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); } std::vector ids; - return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), waitResult ? &ids : nullptr, mType); + return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, + NArrow::MakeArrowSchema(ydbSchema), waitResult ? &ids : nullptr, mType); } std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, @@ -431,25 +434,20 @@ void TTestSchema::InitSchema(const std::vector& colu namespace NKikimr::NColumnShard { NOlap::TIndexInfo BuildTableInfo(const std::vector& ydbSchema, const std::vector& key) { - NOlap::TIndexInfo indexInfo = NOlap::TIndexInfo::BuildDefault(); - + THashMap columns; for (ui32 i = 0; i < ydbSchema.size(); ++i) { ui32 id = i + 1; auto& name = ydbSchema[i].GetName(); auto& type = ydbSchema[i].GetType(); - indexInfo.Columns[id] = NTable::TColumn(name, id, type, ""); - indexInfo.ColumnNames[name] = id; + columns[id] = NTable::TColumn(name, id, type, ""); } + std::vector pkNames; for (const auto& c : key) { - indexInfo.KeyColumns.push_back(indexInfo.ColumnNames[c.GetName()]); + pkNames.push_back(c.GetName()); } - - auto storage = std::make_shared(); - storage->Initialize(TInstant::Now().Seconds()); - indexInfo.SetAllKeys(NOlap::TTestStoragesManager::GetInstance()); - return indexInfo; + return NOlap::TIndexInfo::BuildDefault(NOlap::TTestStoragesManager::GetInstance(), columns, pkNames); } void SetupSchema(TTestBasicRuntime& runtime, TActorId& sender, const TString& txBody, const NOlap::TSnapshot& snapshot, bool succeed) { @@ -519,7 +517,7 @@ namespace NKikimr::NColumnShard { fields.emplace_back(f.GetName()); } - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, snapshot); + NTxUT::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, snapshot); reader.SetReplyColumns(fields); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index 4306babbb1ed..7594be5da952 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -1,20 +1,21 @@ #pragma once -#include -#include - #include -#include +#include #include #include #include #include -#include +#include +#include +#include +#include +#include + +#include #include #include -#include -#include namespace NKikimr::NOlap { struct TIndexInfo; @@ -167,7 +168,7 @@ struct TTestSchema { // PK firstKeyItem, TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), - TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), TTestColumn("level", TTypeInfo(NTypeIds::Int32) ), TTestColumn("message", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), @@ -183,7 +184,7 @@ struct TTestSchema { std::vector schema = { // PK TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), - TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), // @@ -192,7 +193,7 @@ struct TTestSchema { TTestColumn("json_payload", TTypeInfo(NTypeIds::JsonDocument) ), TTestColumn("ingested_at", TTypeInfo(NTypeIds::Timestamp) ), TTestColumn("saved_at", TTypeInfo(NTypeIds::Timestamp) ), - TTestColumn("request_id", TTypeInfo(NTypeIds::Yson) ) + TTestColumn("request_id", TTypeInfo(NTypeIds::Yson)).SetAccessorClassName("SPARSED") }; return schema; }; @@ -201,7 +202,7 @@ struct TTestSchema { std::vector schema = { TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), - TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY") }; return schema; @@ -406,10 +407,12 @@ void PlanSchemaTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot void PlanWriteTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot snap, bool waitResult = true); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); + const std::vector& ydbSchema, std::vector* writeIds, + const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr, const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); + const std::vector& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr, + const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, ui64 tableId, const ui64 writePartId, const TString& data, @@ -474,11 +477,13 @@ namespace NKikimr::NColumnShard { auto& builder = Owner.Builders[Index]; auto type = builder->type(); - NArrow::SwitchType(type->id(), [&](const auto& t) { + Y_ABORT_UNLESS(NArrow::SwitchType(type->id(), [&](const auto& t) { using TWrap = std::decay_t; using T = typename TWrap::T; using TBuilder = typename arrow::TypeTraits::BuilderType; + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("T", typeid(T).name()); + auto& typedBuilder = static_cast(*builder); if constexpr (std::is_arithmetic::value) { if constexpr (arrow::has_c_type::value) { @@ -493,9 +498,16 @@ namespace NKikimr::NColumnShard { return true; } } + + if constexpr (std::is_same::value) { + if constexpr (arrow::is_decimal128_type::value) { + Y_ABORT_UNLESS(typedBuilder.Append(arrow::Decimal128(data.Hi_, data.Low_)).ok()); + return true; + } + } Y_ABORT("Unknown type combination"); return false; - }); + })); return TRowBuilder(Index + 1, Owner); } diff --git a/ydb/core/tx/columnshard/test_helper/controllers.h b/ydb/core/tx/columnshard/test_helper/controllers.h index 5b48f204e2bc..68cd6a1dc4ed 100644 --- a/ydb/core/tx/columnshard/test_helper/controllers.h +++ b/ydb/core/tx/columnshard/test_helper/controllers.h @@ -21,24 +21,21 @@ class TWaitCompactionController: public NYDBTest::NColumnShard::TController { virtual bool NeedForceCompactionBacketsConstruction() const override { return true; } - virtual ui64 GetSmallPortionSizeDetector(const ui64 /*def*/) const override { + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 /*def*/) const override { return SmallSizeDetector.value_or(0); } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { return TDuration::Zero(); } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetCompactionActualizationLag(const TDuration /*def*/) const override { + virtual TDuration DoGetCompactionActualizationLag(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { - return TDuration::Seconds(1); - } public: TWaitCompactionController() { - SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); } ui32 GetFinishedExportsCount() const { diff --git a/ydb/core/tx/columnshard/test_helper/helper.cpp b/ydb/core/tx/columnshard/test_helper/helper.cpp index a7aca5f114ca..879090b7fb68 100644 --- a/ydb/core/tx/columnshard/test_helper/helper.cpp +++ b/ydb/core/tx/columnshard/test_helper/helper.cpp @@ -1,6 +1,7 @@ #include "helper.h" #include +#include #include #include #include @@ -25,6 +26,9 @@ NKikimrSchemeOp::TOlapColumnDescription TTestColumn::CreateColumn(const ui32 id) col.SetStorageId(StorageId); } auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(Type, ""); + if (AccessorClassName) { + col.MutableDataAccessorConstructor()->SetClassName(AccessorClassName); + } col.SetTypeId(columnType.TypeId); if (columnType.TypeInfo) { *col.MutableTypeInfo() = *columnType.TypeInfo; @@ -32,6 +36,16 @@ NKikimrSchemeOp::TOlapColumnDescription TTestColumn::CreateColumn(const ui32 id) return col; } +std::set TTestColumn::GetNullableSet(const std::vector& columns) { + std::set result; + for (auto&& i : columns) { + if (!i.GetNullable()) { + result.emplace(i.GetName()); + } + } + return result; +} + std::vector> TTestColumn::ConvertToPairs(const std::vector& columns) { std::vector> result; for (auto&& i : columns) { @@ -66,15 +80,15 @@ std::vector TTestColumn::CropSchema(const s namespace NKikimr::NArrow { -std::vector> MakeArrowFields( - const std::vector& columns, const std::set& notNullColumns /*= {}*/) { +std::vector> MakeArrowFields(const std::vector& columns) { + std::set notNullColumns = NTest::TTestColumn::GetNullableSet(columns); auto result = MakeArrowFields(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); UNIT_ASSERT_C(result.ok(), result.status().ToString()); return result.ValueUnsafe(); } -std::shared_ptr MakeArrowSchema( - const std::vector& columns, const std::set& notNullColumns /*= {}*/) { +std::shared_ptr MakeArrowSchema(const std::vector& columns) { + std::set notNullColumns = NTest::TTestColumn::GetNullableSet(columns); auto result = MakeArrowSchema(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); UNIT_ASSERT_C(result.ok(), result.status().ToString()); return result.ValueUnsafe(); diff --git a/ydb/core/tx/columnshard/test_helper/helper.h b/ydb/core/tx/columnshard/test_helper/helper.h index cf71657cf97c..98f9a09b526c 100644 --- a/ydb/core/tx/columnshard/test_helper/helper.h +++ b/ydb/core/tx/columnshard/test_helper/helper.h @@ -51,6 +51,9 @@ class TTestColumn { YDB_ACCESSOR_DEF(TString, Name); YDB_ACCESSOR_DEF(NScheme::TTypeInfo, Type); YDB_ACCESSOR_DEF(TString, StorageId); + YDB_ACCESSOR_DEF(TString, AccessorClassName); + YDB_ACCESSOR(bool, Nullable, true); + public: explicit TTestColumn(const TString& name, const NScheme::TTypeInfo& type) : Name(name) @@ -63,13 +66,14 @@ class TTestColumn { static THashMap ConvertToHash(const std::vector& columns); static std::vector BuildFromPairs(const std::vector>& columns); static std::vector CropSchema(const std::vector& input, const ui32 size); + static std::set GetNullableSet(const std::vector& columns); }; } namespace NKikimr::NArrow { -std::vector> MakeArrowFields(const std::vector& columns, const std::set& notNullColumns = {}); -std::shared_ptr MakeArrowSchema(const std::vector& columns, const std::set& notNullColumns = {}); +std::vector> MakeArrowFields(const std::vector& columns); +std::shared_ptr MakeArrowSchema(const std::vector& columns); } diff --git a/ydb/core/tx/columnshard/common/tests/shard_reader.h b/ydb/core/tx/columnshard/test_helper/shard_reader.h similarity index 99% rename from ydb/core/tx/columnshard/common/tests/shard_reader.h rename to ydb/core/tx/columnshard/test_helper/shard_reader.h index 1bb3ad353835..2beaa5a782d9 100644 --- a/ydb/core/tx/columnshard/common/tests/shard_reader.h +++ b/ydb/core/tx/columnshard/test_helper/shard_reader.h @@ -10,7 +10,7 @@ #include #include -namespace NKikimr::NOlap::NTests { +namespace NKikimr::NTxUT { class TShardReader { private: @@ -267,4 +267,4 @@ class TShardReader { } }; -} +} //namespace NKikimr::NTxUT diff --git a/ydb/core/tx/columnshard/test_helper/ya.make b/ydb/core/tx/columnshard/test_helper/ya.make index a900deb266db..cab4937293dd 100644 --- a/ydb/core/tx/columnshard/test_helper/ya.make +++ b/ydb/core/tx/columnshard/test_helper/ya.make @@ -2,6 +2,7 @@ LIBRARY() PEERDIR( ydb/core/protos + ydb/core/formats/arrow contrib/libs/apache/arrow ydb/library/actors/core ydb/core/tx/columnshard/blobs_action/bs diff --git a/ydb/core/tx/columnshard/transactions/locks/abstract.cpp b/ydb/core/tx/columnshard/transactions/locks/abstract.cpp new file mode 100644 index 000000000000..927b73dcdd23 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/abstract.cpp @@ -0,0 +1,6 @@ +#include "abstract.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/abstract.h b/ydb/core/tx/columnshard/transactions/locks/abstract.h new file mode 100644 index 000000000000..5699d13d9705 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/abstract.h @@ -0,0 +1,105 @@ +#pragma once +#include "dependencies.h" +#include "interaction.h" + +#include + +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimrColumnShardTxProto { +class TEvent; +} + +namespace NKikimr::NOlap::NTxInteractions { + +class ITxEvent { +public: + using TFactory = NObjectFactory::TParametrizedObjectFactory; + using TProto = NKikimrColumnShardTxProto::TEvent; + +protected: + virtual void DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const = 0; + virtual void DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const = 0; + virtual bool DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) = 0; + virtual void DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const = 0; + +public: + ITxEvent() = default; + virtual ~ITxEvent() = default; + + virtual TString GetClassName() const = 0; + + bool DeserializeFromProto(const TProto& proto) { + return DoDeserializeFromProto(proto); + } + + void SerializeToProto(TProto& proto) const { + DoSerializeToProto(proto); + } + + void AddToInteraction(const ui64 txId, TInteractionsContext& context) const { + return DoAddToInteraction(txId, context); + } + + void RemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const { + return DoRemoveFromInteraction(txId, context); + } +}; + +class TTxEventContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + YDB_READONLY(ui64, TxId, 0); + +public: + void AddToInteraction(TInteractionsContext& context) const { + return GetObjectVerified().AddToInteraction(TxId, context); + } + + void RemoveFromInteraction(TInteractionsContext& context) const { + return GetObjectVerified().RemoveFromInteraction(TxId, context); + } + + TTxEventContainer(const ui64 txId, const std::shared_ptr& txEvent) + : TBase(txEvent) + , TxId(txId) { + } + + TTxEventContainer(const ui64 txId) + : TxId(txId) { + } + + bool operator<(const TTxEventContainer& item) const { + return TxId < item.TxId; + } +}; + +class ITxEventWriter { +protected: + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& notifications) const = 0; + virtual std::shared_ptr DoBuildEvent() = 0; + +public: + ITxEventWriter() = default; + virtual ~ITxEventWriter() = default; + + bool CheckInteraction(const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& notifications) const { + TTxConflicts conflictsResult; + TTxConflicts notificationsResult; + const bool result = DoCheckInteraction(selfTxId, context, conflictsResult, notificationsResult); + std::swap(conflictsResult, conflicts); + std::swap(notificationsResult, notifications); + return result; + } + + std::shared_ptr BuildEvent() { + return DoBuildEvent(); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp b/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp new file mode 100644 index 000000000000..921a024ce651 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp @@ -0,0 +1,48 @@ +#include "dependencies.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +bool TTxConflicts::LoadFromDatabase(NIceDb::TNiceDb& db) { + using namespace NColumnShard; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + THashMap> local; + while (!rowset.EndOfSet()) { + const ui64 txId = rowset.GetValue(); + local[txId].emplace(rowset.GetValue()); + if (!rowset.Next()) { + return false; + } + } + std::swap(local, TxIdsFromCommitToBroken); + return true; +} + +void TTxConflicts::AddOnExecute(NTabletFlatExecutor::TTransactionContext& txc) const { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for (auto&& [commitTxId, brokeTxIds] : TxIdsFromCommitToBroken) { + for (auto&& brokeTxId : brokeTxIds) { + db.Table().Key(commitTxId, brokeTxId).Update(); + } + } +} + +bool TTxConflicts::RemoveOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId) const { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + auto it = TxIdsFromCommitToBroken.find(txId); + if (it == TxIdsFromCommitToBroken.end()) { + return false; + } + for (auto&& brokeTxId : it->second) { + db.Table().Key(txId, brokeTxId).Delete(); + } + return true; +} + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/dependencies.h b/ydb/core/tx/columnshard/transactions/locks/dependencies.h new file mode 100644 index 000000000000..d749ed1db87e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/dependencies.h @@ -0,0 +1,55 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NTxInteractions { + +class TTxConflicts { +private: + THashMap> TxIdsFromCommitToBroken; + +public: + THashMap>::const_iterator begin() const { + return TxIdsFromCommitToBroken.begin(); + } + + THashMap>::const_iterator end() const { + return TxIdsFromCommitToBroken.end(); + } + + bool Add(const ui64 commitTxId, const ui64 brokenTxId) { + return TxIdsFromCommitToBroken[commitTxId].emplace(brokenTxId).second; + } + + THashSet GetBrokenTxIds(const ui64 txId) const { + auto it = TxIdsFromCommitToBroken.find(txId); + if (it == TxIdsFromCommitToBroken.end()) { + return Default>(); + } + return it->second; + } + + bool LoadFromDatabase(NIceDb::TNiceDb& db); + + bool RemoveOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId) const; + + [[nodiscard]] bool RemoveOnComplete(const ui64 txId) { + return TxIdsFromCommitToBroken.erase(txId); + } + + void AddOnExecute(NTabletFlatExecutor::TTransactionContext& txc) const; + + void MergeTo(TTxConflicts& dest) const { + for (auto&& i : TxIdsFromCommitToBroken) { + auto it = dest.TxIdsFromCommitToBroken.find(i.first); + if (it == dest.TxIdsFromCommitToBroken.end()) { + dest.TxIdsFromCommitToBroken.emplace(i.first, i.second); + } else { + it->second.insert(i.second.begin(), i.second.end()); + } + } + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/interaction.cpp b/ydb/core/tx/columnshard/transactions/locks/interaction.cpp new file mode 100644 index 000000000000..0dd52a2a79ba --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/interaction.cpp @@ -0,0 +1,26 @@ +#include "interaction.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { +TIntervalPoint TIntervalPoint::From( + const TPredicateContainer& container, const std::shared_ptr& pkSchema) { + i32 shift = container.IsInclude() ? 0 : 1; + if (!container.GetReplaceKey()) { + shift = -1; + } else if (container.GetReplaceKey()->Size() < (ui32)pkSchema->num_fields()) { + shift = 1; + } + return TIntervalPoint(container.GetReplaceKey(), shift); +} + +TIntervalPoint TIntervalPoint::To( + const TPredicateContainer& container, const std::shared_ptr& pkSchema) { + i32 shift = container.IsInclude() ? 0 : -1; + if (!container.GetReplaceKey() || container.GetReplaceKey()->Size() < (ui32)pkSchema->num_fields()) { + shift = Max(); + } + + return TIntervalPoint(container.GetReplaceKey(), shift); +} + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/interaction.h b/ydb/core/tx/columnshard/transactions/locks/interaction.h new file mode 100644 index 000000000000..bd48eb1c9460 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/interaction.h @@ -0,0 +1,458 @@ +#pragma once +#include + +#include +#include + +#include + +namespace NKikimr::NOlap { +class TPredicateContainer; +} + +namespace NKikimr::NOlap::NTxInteractions { + +class TPointTxCounters { +private: + YDB_READONLY(ui32, CountIncludes, 0); + YDB_READONLY(ui32, CountNotIncludes, 0); + +public: + void Inc(const bool include) { + if (include) { + IncInclude(); + } else { + IncNotInclude(); + } + } + bool Dec(const bool include) { + if (include) { + return DecInclude(); + } else { + return DecNotInclude(); + } + } + void IncInclude() { + ++CountIncludes; + } + [[nodiscard]] bool DecInclude() { + AFL_VERIFY(CountIncludes); + return --CountIncludes == 0; + } + void IncNotInclude() { + ++CountNotIncludes; + } + [[nodiscard]] bool DecNotInclude() { + AFL_VERIFY(CountNotIncludes); + return --CountNotIncludes == 0; + } + bool IsEmpty() const { + return !CountIncludes && !CountNotIncludes; + } + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + if (CountIncludes) { + result.InsertValue("count_include", CountIncludes); + } + if (CountNotIncludes) { + result.InsertValue("count_not_include", CountNotIncludes); + } + return result; + } + ui32 GetCountSum() const { + return CountIncludes + CountNotIncludes; + } +}; + +class TIntervalTxCounters { +private: + YDB_READONLY(ui32, Count, 0); + +public: + void Inc(const ui32 count = 1) { + Count += count; + } + [[nodiscard]] bool Dec(const ui32 count = 1) { + AFL_VERIFY(Count); + Count -= count; + return Count == 0; + } + bool IsEmpty() const { + return !Count; + } + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("count", Count); + return result; + } + + void ProvideFrom(const TIntervalTxCounters& counters) { + Count += counters.Count; + AFL_VERIFY(counters.Count); + } +}; + +class TPointInfo { +private: + THashMap StartTxIds; + THashMap FinishTxIds; + THashMap IntervalTxIds; + +public: + void InsertCurrentTxs(THashSet& txIds, const bool includePoint) const { + for (auto&& i : IntervalTxIds) { + txIds.emplace(i.first); + } + if (includePoint) { + for (auto&& i : FinishTxIds) { + if (!i.second.GetCountIncludes()) { + continue; + } + auto it = StartTxIds.find(i.first); + if (it != StartTxIds.end() && it->second.GetCountIncludes()) { + txIds.emplace(i.first); + } + } + } + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + auto& starts = result.InsertValue("starts", NJson::JSON_ARRAY); + for (auto&& i : StartTxIds) { + auto& data = starts.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + auto& finish = result.InsertValue("finishes", NJson::JSON_ARRAY); + for (auto&& i : FinishTxIds) { + auto& data = finish.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + auto& txs = result.InsertValue("txs", NJson::JSON_ARRAY); + for (auto&& i : IntervalTxIds) { + auto& data = txs.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + return result; + } + + void AddStart(const ui64 txId, const bool include) { + StartTxIds[txId].Inc(include); + } + void RemoveStart(const ui64 txId, const bool include) { + if (StartTxIds[txId].Dec(include)) { + StartTxIds.erase(txId); + } + } + void AddFinish(const ui64 txId, const bool include) { + FinishTxIds[txId].Inc(include); + } + void RemoveFinish(const ui64 txId, const bool include) { + if (FinishTxIds[txId].Dec(include)) { + FinishTxIds.erase(txId); + } + } + void AddIntervalTx(const ui64 txId) { + IntervalTxIds[txId].Inc(); + } + void RemoveIntervalTx(const ui64 txId) { + if (IntervalTxIds[txId].Dec()) { + IntervalTxIds.erase(txId); + } + } + bool TryRemoveTx(const ui64 txId, const bool include) { + bool result = false; + if (StartTxIds[txId].Dec(include)) { + StartTxIds.erase(txId); + result = true; + } + if (FinishTxIds[txId].Dec(include)) { + FinishTxIds.erase(txId); + result = true; + } + if (IntervalTxIds[txId].Dec(txId)) { + IntervalTxIds.erase(txId); + result = true; + } + return result; + } + + bool IsEmpty() const { + return StartTxIds.empty() && FinishTxIds.empty() && IntervalTxIds.empty(); + } + + void ProvideTxIdsFrom(const TPointInfo& previouse) { + for (auto&& i : previouse.IntervalTxIds) { + auto provided = i.second; + { + auto it = StartTxIds.find(i.first); + if (it != StartTxIds.end()) { + provided.Inc(it->second.GetCountSum()); + } + } + { + auto it = FinishTxIds.find(i.first); + if (it != FinishTxIds.end()) { + if (provided.Dec(it->second.GetCountSum())) { + return; + } + } + } + IntervalTxIds[i.first].ProvideFrom(provided); + } + } +}; + +class TIntervalPoint { +private: + i32 IncludeState = 0; + std::optional PrimaryKey; + + TIntervalPoint(const NArrow::TReplaceKey& primaryKey, const int includeState) + : IncludeState(includeState) + , PrimaryKey(primaryKey) { + } + + TIntervalPoint(const std::shared_ptr& primaryKey, const int includeState) + : IncludeState(includeState) { + if (primaryKey) { + PrimaryKey = *primaryKey; + } + } + +public: + static TIntervalPoint Equal(const NArrow::TReplaceKey& replaceKey) { + return TIntervalPoint(replaceKey, 0); + } + static TIntervalPoint From(const TPredicateContainer& container, const std::shared_ptr& pkSchema); + static TIntervalPoint To(const TPredicateContainer& container, const std::shared_ptr& pkSchema); + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("include", IncludeState); + if (PrimaryKey) { + result.InsertValue("pk", PrimaryKey->DebugString()); + } + return result; + } + + bool IsIncluded() const { + return IncludeState == 0; + } + + bool operator==(const TIntervalPoint& item) const { + if (!PrimaryKey && !item.PrimaryKey) { + return IncludeState == item.IncludeState; + } else if (!PrimaryKey && item.PrimaryKey) { + return false; + } else if (PrimaryKey && !item.PrimaryKey) { + return false; + } else if (IncludeState == item.IncludeState) { + if (PrimaryKey->Size() != item.PrimaryKey->Size()) { + return false; + } + return *PrimaryKey == *item.PrimaryKey; + } else { + return false; + } + } + + bool operator<=(const TIntervalPoint& point) const { + return !(point < *this); + } + + bool operator<(const TIntervalPoint& point) const { + if (!PrimaryKey && !point.PrimaryKey) { + return IncludeState < point.IncludeState; + } else if (!PrimaryKey && point.PrimaryKey) { + return IncludeState < 0; + } else if (PrimaryKey && !point.PrimaryKey) { + return 0 < point.IncludeState; + } else { + const ui32 sizeMin = std::min(PrimaryKey->Size(), point.PrimaryKey->Size()); + const std::partial_ordering compareResult = PrimaryKey->ComparePartNotNull(*point.PrimaryKey, sizeMin); + if (compareResult == std::partial_ordering::less) { + return true; + } else if (compareResult == std::partial_ordering::greater) { + return false; + } else { + AFL_VERIFY(compareResult == std::partial_ordering::equivalent); + if (PrimaryKey->Size() == point.PrimaryKey->Size()) { + return IncludeState < point.IncludeState; + } else if (PrimaryKey->Size() < point.PrimaryKey->Size()) { + if (IncludeState <= 1) { + return true; + } else { + return false; + } + } else { + if (point.IncludeState <= 1) { + return false; + } else { + return true; + } + } + return false; + } + } + } +}; + +class TReadIntervals { +private: + std::map IntervalsInfo; + +public: + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + auto& jsonIntervals = result.InsertValue("intervals", NJson::JSON_ARRAY); + for (auto&& i : IntervalsInfo) { + auto& pointInfo = jsonIntervals.AppendValue(NJson::JSON_MAP); + pointInfo.InsertValue("p", i.first.DebugJson()); + pointInfo.InsertValue("i", i.second.DebugJson()); + } + return result; + } + + bool IsEmpty() const { + return IntervalsInfo.empty(); + } + + std::map::iterator Erase(const std::map::iterator& it) { + return IntervalsInfo.erase(it); + } + + std::map::iterator GetPointIterator(const TIntervalPoint& intervalPoint) { + auto it = IntervalsInfo.find(intervalPoint); + AFL_VERIFY(it != IntervalsInfo.end()); + return it; + } + + std::map::iterator InsertPoint(const TIntervalPoint& intervalPoint) { + auto it = IntervalsInfo.lower_bound(intervalPoint); + if (it == IntervalsInfo.end() || it == IntervalsInfo.begin()) { + return IntervalsInfo.emplace(intervalPoint, TPointInfo()).first; + } else if (it->first == intervalPoint) { + return it; + } else { + --it; + auto result = IntervalsInfo.emplace(intervalPoint, TPointInfo()).first; + result->second.ProvideTxIdsFrom(it->second); + return result; + } + } + + THashSet GetAffectedTxIds(const std::shared_ptr& writtenPrimaryKeys) const { + AFL_VERIFY(writtenPrimaryKeys); + auto it = IntervalsInfo.begin(); + THashSet affectedTxIds; + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("batch", writtenPrimaryKeys->ToString())("info", DebugJson().GetStringRobust()); + for (ui32 i = 0; i < writtenPrimaryKeys->num_rows();) { + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + auto rKey = NArrow::TReplaceKey::FromBatch(writtenPrimaryKeys, writtenPrimaryKeys->schema(), i); + auto pkIntervalPoint = TIntervalPoint::Equal(rKey); + while (it != IntervalsInfo.end() && it->first < pkIntervalPoint) { + ++it; + } + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + auto itPred = it; + bool equal = false; + if (pkIntervalPoint < it->first) { + if (it == IntervalsInfo.begin()) { + ++i; + continue; + } + if (pkIntervalPoint < it->first) { + --itPred; + } + } else { + equal = true; + ++it; + } + + itPred->second.InsertCurrentTxs(affectedTxIds, equal); + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + while (i < writtenPrimaryKeys->num_rows()) { + auto rKey = NArrow::TReplaceKey::FromBatch(writtenPrimaryKeys, writtenPrimaryKeys->schema(), i); + if (TIntervalPoint::Equal(rKey) < it->first) { + ++i; + } else { + break; + } + } + } + return affectedTxIds; + } +}; + +class TInteractionsContext { +private: + THashMap ReadIntervalsByPathId; + +public: + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + for (auto&& i : ReadIntervalsByPathId) { + result.InsertValue(::ToString(i.first), i.second.DebugJson()); + } + return result; + } + + THashSet GetAffectedTxIds(const ui64 pathId, const std::shared_ptr& batch) const { + auto it = ReadIntervalsByPathId.find(pathId); + if (it == ReadIntervalsByPathId.end()) { + return {}; + } + return it->second.GetAffectedTxIds(batch); + } + + void AddInterval(const ui64 txId, const ui64 pathId, const TIntervalPoint& from, const TIntervalPoint& to) { + auto& intervals = ReadIntervalsByPathId[pathId]; + auto itFrom = intervals.InsertPoint(from); + auto itTo = intervals.InsertPoint(to); + itFrom->second.AddStart(txId, from.IsIncluded()); + for (auto it = itFrom; it != itTo; ++it) { + it->second.AddIntervalTx(txId); + } + itTo->second.AddFinish(txId, to.IsIncluded()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "add_interval")("interactions_info", DebugJson().GetStringRobust()); + } + + void RemoveInterval(const ui64 txId, const ui64 pathId, const TIntervalPoint& from, const TIntervalPoint& to) { + auto itIntervals = ReadIntervalsByPathId.find(pathId); + AFL_VERIFY(itIntervals != ReadIntervalsByPathId.end())("path_id", pathId); + auto& intervals = itIntervals->second; + auto itFrom = intervals.GetPointIterator(from); + auto itTo = intervals.GetPointIterator(to); + itFrom->second.RemoveStart(txId, from.IsIncluded()); + for (auto it = itFrom; it != itTo; ++it) { + it->second.RemoveIntervalTx(txId); + } + itTo->second.RemoveFinish(txId, to.IsIncluded()); + for (auto&& it = itFrom; it != itTo;) { + if (it->second.IsEmpty()) { + it = intervals.Erase(it); + } else { + ++it; + } + } + if (itTo->second.IsEmpty()) { + intervals.Erase(itTo); + } + if (intervals.IsEmpty()) { + ReadIntervalsByPathId.erase(itIntervals); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_interval")("interactions_info", DebugJson().GetStringRobust()); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp b/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp new file mode 100644 index 000000000000..159b5d269533 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp @@ -0,0 +1,5 @@ +#include "read_finished.h" + +namespace NKikimr::NOlap::NTxInteractions { + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/read_finished.h b/ydb/core/tx/columnshard/transactions/locks/read_finished.h new file mode 100644 index 000000000000..895123e5097b --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_finished.h @@ -0,0 +1,30 @@ +#pragma once +#include "abstract.h" + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvReadFinishedWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + TTxConflicts Conflicts; + + virtual bool DoCheckInteraction( + const ui64 /*selfTxId*/, TInteractionsContext& /*context*/, TTxConflicts& conflicts, TTxConflicts& /*notifications*/) const override { + conflicts = Conflicts; + return true; + } + + virtual std::shared_ptr DoBuildEvent() override { + return nullptr; + } + +public: + TEvReadFinishedWriter(const ui64 pathId, const TTxConflicts& conflicts) + : PathId(pathId) + , Conflicts(conflicts) + { + AFL_VERIFY(PathId); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/read_start.cpp b/ydb/core/tx/columnshard/transactions/locks/read_start.cpp new file mode 100644 index 000000000000..963c47b068a9 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_start.cpp @@ -0,0 +1,47 @@ +#include "read_start.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +std::shared_ptr TEvReadStartWriter::DoBuildEvent() { + return std::make_shared(PathId, Schema, Filter); +} + +bool TEvReadStart::DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) { + if (!proto.HasRead()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "have not 'read' in proto"); + return false; + } + Schema = NArrow::DeserializeSchema(proto.GetRead().GetSchema()); + if (!Schema) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "cannot_parse_schema"); + return false; + } + Filter = TPKRangesFilter::BuildFromString(proto.GetRead().GetFilter(), Schema, false); + if (!Filter) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "cannot_parse_filter"); + return false; + } + return true; +} + +void TEvReadStart::DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const { + AFL_VERIFY(!!Filter); + AFL_VERIFY(!!Schema); + *proto.MutableRead()->MutableFilter() = Filter->SerializeToString(Schema); + *proto.MutableRead()->MutableSchema() = NArrow::SerializeSchema(*Schema); +} + +void TEvReadStart::DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const { + for (auto&& i : *Filter) { + context.AddInterval(txId, PathId, TIntervalPoint::From(i.GetPredicateFrom(), Schema), TIntervalPoint::To(i.GetPredicateTo(), Schema)); + } +} + +void TEvReadStart::DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const { + for (auto&& i : *Filter) { + context.RemoveInterval(txId, PathId, TIntervalPoint::From(i.GetPredicateFrom(), Schema), TIntervalPoint::To(i.GetPredicateTo(), Schema)); + } +} + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/read_start.h b/ydb/core/tx/columnshard/transactions/locks/read_start.h new file mode 100644 index 000000000000..6587dfea5bda --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_start.h @@ -0,0 +1,71 @@ +#pragma once +#include "abstract.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvReadStartWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, Schema); + YDB_READONLY_DEF(std::shared_ptr, Filter); + YDB_READONLY_DEF(THashSet, LockIdsForCheck); + + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& /*context*/, TTxConflicts& /*conflicts*/, TTxConflicts& notifications) const override { + for (auto&& i : LockIdsForCheck) { + notifications.Add(i, selfTxId); + } + return true; + } + + virtual std::shared_ptr DoBuildEvent() override; + +public: + TEvReadStartWriter(const ui64 pathId, const std::shared_ptr& schema, const std::shared_ptr& filter, + const THashSet& lockIdsForCheck) + : PathId(pathId) + , Schema(schema) + , Filter(filter) + , LockIdsForCheck(lockIdsForCheck) + { + AFL_VERIFY(PathId); + AFL_VERIFY(Schema); + AFL_VERIFY(Filter); + } +}; + +class TEvReadStart: public ITxEvent { +public: + static TString GetClassNameStatic() { + return "READ_START"; + } + +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, Schema); + YDB_READONLY_DEF(std::shared_ptr, Filter); + + virtual bool DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) override; + virtual void DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const override; + virtual void DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const override; + virtual void DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const override; + static inline const TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + TEvReadStart() = default; + TEvReadStart(const ui64 pathId, const std::shared_ptr& schema, const std::shared_ptr& filter) + : PathId(pathId) + , Schema(schema) + , Filter(filter) { + AFL_VERIFY(PathId); + AFL_VERIFY(Schema); + AFL_VERIFY(Filter); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/write.cpp b/ydb/core/tx/columnshard/transactions/locks/write.cpp new file mode 100644 index 000000000000..19ff258fe7c6 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/write.cpp @@ -0,0 +1,5 @@ +#include "write.h" + +namespace NKikimr::NOlap::NTxInteractions { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/transactions/locks/write.h b/ydb/core/tx/columnshard/transactions/locks/write.h new file mode 100644 index 000000000000..78ba8baf7e0e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/write.h @@ -0,0 +1,36 @@ +#pragma once +#include "abstract.h" + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvWriteWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, RecordBatch); + + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& /*notifications*/) const override { + THashSet txIds = context.GetAffectedTxIds(PathId, RecordBatch); + txIds.erase(selfTxId); + TTxConflicts result; + for (auto&& i : txIds) { + result.Add(selfTxId, i); + } + std::swap(result, conflicts); + return true; + } + + virtual std::shared_ptr DoBuildEvent() override { + return nullptr; + } + +public: + TEvWriteWriter(const ui64 pathId, const std::shared_ptr& batch, const std::shared_ptr& pkSchema) + : PathId(pathId) + , RecordBatch(NArrow::TColumnOperator().Extract(batch, pkSchema->field_names())) { + AFL_VERIFY(PathId); + AFL_VERIFY(RecordBatch); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/ya.make b/ydb/core/tx/columnshard/transactions/locks/ya.make new file mode 100644 index 000000000000..a7ad6b27ab0e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +SRCS( + dependencies.cpp + interaction.cpp + abstract.cpp + GLOBAL read_start.cpp + GLOBAL read_finished.cpp + GLOBAL write.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/transactions/protos + ydb/core/tx/columnshard/engines/predicate + ydb/core/tx/columnshard/blobs_action/events + ydb/core/tx/columnshard/data_sharing/destination/events +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/operators/backup.h b/ydb/core/tx/columnshard/transactions/operators/backup.h index 9e1e657ad138..cd9d5785dd9c 100644 --- a/ydb/core/tx/columnshard/transactions/operators/backup.h +++ b/ydb/core/tx/columnshard/transactions/operators/backup.h @@ -24,6 +24,9 @@ class TBackupTransactionOperator: public IProposeTxOperator, public TMonitoringO } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "Backup"; + } virtual bool DoIsAsync() const override { return true; } diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write.h b/ydb/core/tx/columnshard/transactions/operators/ev_write.h deleted file mode 100644 index 15bc4e5f3ae2..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ev_write.h +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include - -namespace NKikimr::NColumnShard { - - class TEvWriteTransactionOperator: public TTxController::ITransactionOperator, public TMonitoringObjectsCounter { - using TBase = TTxController::ITransactionOperator; - using TProposeResult = TTxController::TProposeResult; - static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE); - private: - virtual TProposeResult DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { - owner.OperationsManager->LinkTransaction(LockId, GetTxId(), txc); - return TProposeResult(); - } - virtual void DoStartProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - - } - virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override { - } - virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - } - virtual bool DoIsAsync() const override { - return false; - } - virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override { - return (currentTxInfo.Source == GetTxInfo().Source && currentTxInfo.Cookie == GetTxInfo().Cookie); - } - virtual TString DoDebugString() const override { - return "EV_WRITE"; - } - virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override { - const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult; - if (IsFail()) { - evResult = NEvents::TDataEvents::TEvWriteResult::BuildError(owner.TabletID(), txInfo.GetTxId(), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, GetProposeStartInfoVerified().GetStatusMessage()); - } else { - evResult = NEvents::TDataEvents::TEvWriteResult::BuildPrepared(owner.TabletID(), txInfo.GetTxId(), owner.GetProgressTxController().BuildCoordinatorInfo(txInfo)); - } - ctx.Send(txInfo.Source, evResult.release(), 0, txInfo.Cookie); - } - - virtual bool DoParse(TColumnShard& /*owner*/, const TString& data) override { - NKikimrTxColumnShard::TCommitWriteTxBody commitTxBody; - if (!commitTxBody.ParseFromString(data)) { - return false; - } - LockId = commitTxBody.GetLockId(); - return !!LockId; - } - - public: - using TBase::TBase; - - virtual bool ProgressOnExecute( - TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { - return owner.OperationsManager->CommitTransaction(owner, GetTxId(), txc, version); - } - - virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override { - auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(owner.TabletID(), GetTxId()); - ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); - return true; - } - - virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { - return owner.OperationsManager->AbortTransaction(owner, GetTxId(), txc); - } - virtual bool CompleteOnAbort(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - return true; - } - - private: - ui64 LockId = 0; - }; - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp new file mode 100644 index 000000000000..1217abd09701 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp @@ -0,0 +1,5 @@ +#include "abstract.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h new file mode 100644 index 000000000000..5be836f1d520 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h @@ -0,0 +1,113 @@ +#pragma once + +#include + +namespace NKikimr::NColumnShard { + +class TBaseEvWriteTransactionOperator: public TTxController::ITransactionOperator { +private: + using TBase = TTxController::ITransactionOperator; + using TProposeResult = TTxController::TProposeResult; + +protected: + ui64 LockId = 0; + +private: + virtual bool DoParseImpl(TColumnShard& owner, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) = 0; + virtual TProposeResult DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override final { + owner.GetOperationsManager().LinkTransactionOnExecute(LockId, GetTxId(), txc); + return TProposeResult(); + } + virtual void DoStartProposeOnComplete(TColumnShard& owner, const TActorContext& /*ctx*/) override final { + owner.GetOperationsManager().LinkTransactionOnComplete(LockId, GetTxId()); + } + virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override final { + } + virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override final { + } + virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override final { + return (currentTxInfo.Source == GetTxInfo().Source && currentTxInfo.Cookie == GetTxInfo().Cookie); + } + virtual bool DoParse(TColumnShard& owner, const TString& data) override final { + NKikimrTxColumnShard::TCommitWriteTxBody commitTxBody; + if (!commitTxBody.ParseFromString(data)) { + return false; + } + LockId = commitTxBody.GetLockId(); + return DoParseImpl(owner, commitTxBody); + } + + virtual bool DoIsAsync() const override final { + return false; + } + + virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override { + const auto& txInfo = GetTxInfo(); + std::unique_ptr evResult; + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("send_reply_tx_id", GetTxId())("send_reply_lock_id", LockId)); + if (IsFail()) { + evResult = NEvents::TDataEvents::TEvWriteResult::BuildError(owner.TabletID(), txInfo.GetTxId(), + NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, GetProposeStartInfoVerified().GetStatusMessage()); + } else { + evResult = NEvents::TDataEvents::TEvWriteResult::BuildPrepared( + owner.TabletID(), txInfo.GetTxId(), owner.GetProgressTxController().BuildCoordinatorInfo(txInfo)); + } + ctx.Send(txInfo.Source, evResult.release(), 0, txInfo.Cookie); + } + std::optional Version; + +public: + using TBase::TBase; + TBaseEvWriteTransactionOperator(const TFullTxInfo& txInfo, const ui64 lockId) + : TBase(txInfo) + , LockId(lockId) { + } + + virtual bool IsTxBroken() const { + return false; + } + + ui64 GetLockId() const { + return LockId; + } + + virtual bool ProgressOnExecute( + TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { + Version = version; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("progress_tx_id", GetTxId())("lock_id", LockId)("broken", IsTxBroken()); + if (IsTxBroken()) { + owner.GetOperationsManager().AbortTransactionOnExecute(owner, GetTxId(), txc); + } else { + owner.GetOperationsManager().CommitTransactionOnExecute(owner, GetTxId(), txc, version); + } + return true; + } + + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("progress_tx_id", GetTxId())("lock_id", LockId)("broken", IsTxBroken()); + AFL_VERIFY(Version); + if (IsTxBroken()) { + owner.GetOperationsManager().AbortTransactionOnComplete(owner, GetTxId()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + owner.TabletID(), GetTxId(), NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN, "lock invalidated"); + ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); + } else { + owner.GetOperationsManager().CommitTransactionOnComplete(owner, GetTxId(), *Version); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(owner.TabletID(), GetTxId()); + ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); + } + return true; + } + + virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { + owner.GetOperationsManager().AbortTransactionOnExecute(owner, GetTxId(), txc); + return true; + } + virtual bool CompleteOnAbort(TColumnShard& owner, const TActorContext& /*ctx*/) override { + owner.GetOperationsManager().AbortTransactionOnComplete(owner, GetTxId()); + return true; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp new file mode 100644 index 000000000000..9e33c29d2053 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp @@ -0,0 +1,5 @@ +#include "primary.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h new file mode 100644 index 000000000000..9073e7458ae3 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h @@ -0,0 +1,287 @@ +#pragma once + +#include "sync.h" + +#include +#include + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitPrimaryTransactionOperator: public TEvWriteCommitSyncTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TEvWriteCommitSyncTransactionOperator; + using TProposeResult = TTxController::TProposeResult; + static inline auto Registrator = + TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_PRIMARY); + +private: + std::set ReceivingShards; + std::set SendingShards; + std::set WaitShardsBrokenFlags; + std::set WaitShardsResultAck; + std::optional TxBroken; + mutable TAtomicCounter ControlCounter = 0; + + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const override { + NKikimrTxColumnShard::TCommitWriteTxBody result; + auto& data = *result.MutablePrimaryTabletData(); + if (TxBroken) { + data.SetTxBroken(*TxBroken); + } + for (auto&& i : ReceivingShards) { + data.AddReceivingShards(i); + } + for (auto&& i : SendingShards) { + data.AddSendingShards(i); + } + for (auto&& i : WaitShardsBrokenFlags) { + data.AddWaitShardsBrokenFlags(i); + } + for (auto&& i : WaitShardsResultAck) { + data.AddWaitShardsResultAck(i); + } + return result; + } + + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) override { + if (!commitTxBody.HasPrimaryTabletData()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot read proto")("proto", commitTxBody.DebugString()); + return false; + } + auto& protoData = commitTxBody.GetPrimaryTabletData(); + for (auto&& i : protoData.GetReceivingShards()) { + ReceivingShards.emplace(i); + } + for (auto&& i : protoData.GetSendingShards()) { + SendingShards.emplace(i); + } + for (auto&& i : protoData.GetWaitShardsBrokenFlags()) { + WaitShardsBrokenFlags.emplace(i); + } + for (auto&& i : protoData.GetWaitShardsResultAck()) { + WaitShardsResultAck.emplace(i); + } + AFL_VERIFY(ReceivingShards.empty() == SendingShards.empty()); + if (protoData.HasTxBroken()) { + TxBroken = protoData.GetTxBroken(); + } + return true; + } + +private: + virtual TString DoGetOpType() const override { + return "EvWritePrimary"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_PRIMARY"; + } + class TTxWriteReceivedBrokenFlag: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const ui64 TabletId; + const bool BrokenFlag; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + if (copy.WaitShardsBrokenFlags.erase(TabletId)) { + copy.TxBroken = copy.TxBroken.value_or(false) || BrokenFlag; + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "repeated shard broken_flag info")("shard_id", TabletId); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.erase(TabletId)) { + op->TxBroken = op->TxBroken.value_or(false) || BrokenFlag; + op->SendBrokenFlagAck(*Self, TabletId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_tablet_id")("wait", JoinSeq(",", op->WaitShardsBrokenFlags))( + "receive", TabletId); + op->InitializeRequests(*Self); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "repeated shard broken_flag info")("shard_id", TabletId); + } + } + + public: + TTxWriteReceivedBrokenFlag(TColumnShard& owner, const ui64 txId, const ui64 tabletId, const bool broken) + : TBase(&owner, ::ToString(txId)) + , TxId(txId) + , TabletId(tabletId) + , BrokenFlag(broken) { + } + }; + + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const override { + return std::make_unique(owner, GetTxId(), sendTabletId, broken); + } + + class TTxWriteReceivedResultAck: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const ui64 TabletId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ack_tablet")("wait", JoinSeq(",", op->WaitShardsResultAck))("receive", TabletId); + AFL_VERIFY(copy.WaitShardsResultAck.erase(TabletId)); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ack_tablet")("wait", JoinSeq(",", op->WaitShardsResultAck))( + "receive", TabletId); + AFL_VERIFY(op->WaitShardsResultAck.erase(TabletId)); + op->CheckFinished(*Self); + } + + public: + TTxWriteReceivedResultAck(TColumnShard& owner, const ui64 txId, const ui64 tabletId) + : TBase(&owner) + , TxId(txId) + , TabletId(tabletId) { + } + }; + + virtual bool IsTxBroken() const override { + AFL_VERIFY(TxBroken); + return *TxBroken; + } + + void InitializeRequests(TColumnShard& owner) { + if (WaitShardsBrokenFlags.empty()) { + WaitShardsResultAck.erase(owner.TabletID()); + if (WaitShardsResultAck.size()) { + SendResult(owner); + } else { + CheckFinished(owner); + } + } + } + + void CheckFinished(TColumnShard& owner) { + if (WaitShardsResultAck.empty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "finished"); + owner.EnqueueProgressTx(NActors::TActivationContext::AsActorContext(), GetTxId()); + } + } + + virtual std::unique_ptr CreateReceiveResultAckTx( + TColumnShard& owner, const ui64 recvTabletId) const override + { + return std::make_unique(owner, GetTxId(), recvTabletId); + } + + void SendBrokenFlagAck(TColumnShard& owner, const std::optional tabletId = {}) { + for (auto&& i : SendingShards) { + if (!WaitShardsBrokenFlags.contains(i)) { + if (tabletId && *tabletId != i) { + continue; + } + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, GetTxId(), owner.TabletID(), i, owner.TabletID(), 0), i, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + } + } + + void SendResult(TColumnShard& owner) { + AFL_VERIFY(!!TxBroken); + NKikimrTx::TReadSetData readSetData; + readSetData.SetDecision(*TxBroken ? NKikimrTx::TReadSetData::DECISION_ABORT : NKikimrTx::TReadSetData::DECISION_COMMIT); + for (auto&& i : ReceivingShards) { + if (WaitShardsResultAck.contains(i)) { + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSet(0, GetTxId(), owner.TabletID(), i, owner.TabletID(), readSetData.SerializeAsString()), i, + true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + } + } + + virtual void DoOnTabletInit(TColumnShard& owner) override { + InitializeRequests(owner); + CheckFinished(owner); + } + + class TTxStartPreparation: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.contains(Self->TabletID())) { + auto copy = *op; + copy.TxBroken = lock.IsBroken(); + AFL_VERIFY(copy.WaitShardsBrokenFlags.erase(Self->TabletID())); + if (copy.WaitShardsBrokenFlags.empty()) { + AFL_VERIFY(copy.WaitShardsResultAck.erase(Self->TabletID())); + } + + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.contains(Self->TabletID())) { + op->TxBroken = lock.IsBroken(); + AFL_VERIFY(op->WaitShardsBrokenFlags.erase(Self->TabletID())); + if (op->WaitShardsBrokenFlags.empty()) { + AFL_VERIFY(op->WaitShardsResultAck.erase(Self->TabletID())); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_tablet_id")("wait", JoinSeq(",", op->WaitShardsBrokenFlags))( + "receive", Self->TabletID()); + op->CheckFinished(*Self); + } + } + + public: + TTxStartPreparation(TColumnShard* owner, const ui64 txId) + : TBase(owner) + , TxId(txId) { + } + }; + + virtual void OnTimeout(TColumnShard& owner) override { + InitializeRequests(owner); + } + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* owner) const override { + if (WaitShardsResultAck.empty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_prepare_for_progress")("lock_id", LockId); + return nullptr; + } + AFL_VERIFY(ControlCounter.Inc() <= 1); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "prepare_for_progress_started")("lock_id", LockId); + return std::make_unique(owner, GetTxId()); + } + +public: + using TBase::TBase; + TEvWriteCommitPrimaryTransactionOperator( + const TFullTxInfo& txInfo, const ui64 lockId, const std::set& receivingShards, const std::set& sendingShards) + : TBase(txInfo, lockId) + , ReceivingShards(receivingShards) + , SendingShards(sendingShards) { + WaitShardsBrokenFlags = SendingShards; + WaitShardsResultAck = ReceivingShards; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp new file mode 100644 index 000000000000..30f6078321e9 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp @@ -0,0 +1,5 @@ +#include "secondary.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h new file mode 100644 index 000000000000..ae0224057b46 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h @@ -0,0 +1,219 @@ +#pragma once + +#include "sync.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitSecondaryTransactionOperator: public TEvWriteCommitSyncTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TEvWriteCommitSyncTransactionOperator; + using TProposeResult = TTxController::TProposeResult; + static inline auto Registrator = + TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_SECONDARY); + +private: + ui64 ArbiterTabletId; + bool NeedReceiveBroken = false; + bool ReceiveAck = false; + bool SelfBroken = false; + mutable TAtomicCounter ControlCounter = 0; + std::optional TxBroken; + + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const override { + NKikimrTxColumnShard::TCommitWriteTxBody result; + auto& data = *result.MutableSecondaryTabletData(); + if (TxBroken) { + data.SetTxBroken(*TxBroken); + } + data.SetSelfBroken(SelfBroken); + data.SetNeedReceiveBroken(NeedReceiveBroken); + data.SetReceiveAck(ReceiveAck); + data.SetArbiterTabletId(ArbiterTabletId); + return result; + } + + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) override { + if (!commitTxBody.HasSecondaryTabletData()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot read proto")("proto", commitTxBody.DebugString()); + return false; + } + auto& protoData = commitTxBody.GetSecondaryTabletData(); + SelfBroken = protoData.GetSelfBroken(); + ArbiterTabletId = protoData.GetArbiterTabletId(); + NeedReceiveBroken = protoData.GetNeedReceiveBroken(); + ReceiveAck = protoData.GetReceiveAck(); + if (protoData.HasTxBroken()) { + TxBroken = protoData.GetTxBroken(); + } + return true; + } + +private: + virtual TString DoGetOpType() const override { + return "EvWriteSecondary"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_SECONDARY"; + } + class TTxWriteReceivedAck: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.ReceiveAck = true; + auto proto = copy.SerializeToProto(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, proto.SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& ctx) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->ReceiveAck = true; + if (!op->NeedReceiveBroken) { + op->TxBroken = false; + Self->EnqueueProgressTx(ctx, TxId); + } + } + + public: + TTxWriteReceivedAck(TColumnShard& owner, const ui64 txId) + : TBase(&owner) + , TxId(txId) { + } + }; + + virtual std::unique_ptr CreateReceiveResultAckTx( + TColumnShard& owner, const ui64 recvTabletId) const override { + AFL_VERIFY(recvTabletId == ArbiterTabletId)("recv", recvTabletId)("arbiter", ArbiterTabletId); + return std::make_unique(owner, GetTxId()); + } + + class TTxWriteReceivedBrokenFlag: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const bool BrokenFlag; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.TxBroken = BrokenFlag; + auto proto = copy.SerializeToProto(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, proto.SerializeAsString()); + if (BrokenFlag) { + Self->GetProgressTxController().ExecuteOnCancel(TxId, txc); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& ctx) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->TxBroken = BrokenFlag; + op->SendBrokenFlagAck(*Self); + if (BrokenFlag) { + Self->GetProgressTxController().CompleteOnCancel(TxId, ctx); + } + Self->EnqueueProgressTx(ctx, TxId); + } + + public: + TTxWriteReceivedBrokenFlag(TColumnShard* owner, const ui64 txId, const bool broken) + : TBase(owner) + , TxId(txId) + , BrokenFlag(broken) { + } + }; + + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const override { + AFL_VERIFY(ArbiterTabletId == sendTabletId); + return std::make_unique(&owner, GetTxId(), broken); + } + + void SendBrokenFlagAck(TColumnShard& owner) { + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, GetTxId(), owner.TabletID(), ArbiterTabletId, owner.TabletID(), 0), ArbiterTabletId, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + + void SendResult(TColumnShard& owner) { + NKikimrTx::TReadSetData readSetData; + readSetData.SetDecision(SelfBroken ? NKikimrTx::TReadSetData::DECISION_ABORT : NKikimrTx::TReadSetData::DECISION_COMMIT); + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward(new TEvTxProcessing::TEvReadSet( + 0, GetTxId(), owner.TabletID(), ArbiterTabletId, owner.TabletID(), readSetData.SerializeAsString()), + ArbiterTabletId, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + + virtual void DoOnTabletInit(TColumnShard& owner) override { + if (TxBroken || (ReceiveAck && !NeedReceiveBroken)) { + owner.EnqueueProgressTx(NActors::TActivationContext::AsActorContext(), GetTxId()); + } else if (!ReceiveAck) { + SendResult(owner); + } + } + + class TTxStartPreparation: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.SelfBroken = lock.IsBroken(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->SelfBroken = lock.IsBroken(); + op->SendResult(*Self); + } + + public: + TTxStartPreparation(TColumnShard* owner, const ui64 txId) + : TBase(owner) + , TxId(txId) { + } + }; + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* owner) const override { + if (TxBroken || (!NeedReceiveBroken && ReceiveAck)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_prepare_for_progress")("lock_id", LockId); + return nullptr; + } + AFL_VERIFY(ControlCounter.Inc() <= 1); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "prepare_for_progress_started")("lock_id", LockId); + return std::make_unique(owner, GetTxId()); + } + + virtual void OnTimeout(TColumnShard& owner) override { + SendResult(owner); + } + +public: + using TBase::TBase; + virtual bool IsTxBroken() const override { + AFL_VERIFY(TxBroken); + return *TxBroken; + } + + TEvWriteCommitSecondaryTransactionOperator( + const TFullTxInfo& txInfo, const ui64 lockId, const ui64 arbiterTabletId, const bool needReceiveBroken) + : TBase(txInfo, lockId) + , ArbiterTabletId(arbiterTabletId) + , NeedReceiveBroken(needReceiveBroken) { + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp new file mode 100644 index 000000000000..a6c51118a30e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp @@ -0,0 +1,5 @@ +#include "simple.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h new file mode 100644 index 000000000000..e3301117bb48 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h @@ -0,0 +1,28 @@ +#pragma once + +#include "abstract.h" + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteSimpleCommitTransactionOperator: public TBaseEvWriteTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TBaseEvWriteTransactionOperator; + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& /*commitTxBody*/) override { + return true; + } + static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE); + +public: + using TBase::TBase; + virtual TString DoGetOpType() const override { + return "EvWriteSimple"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_SIMPLE"; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp new file mode 100644 index 000000000000..a8a75f586177 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp @@ -0,0 +1,5 @@ +#include "sync.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h new file mode 100644 index 000000000000..8d149cca8100 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h @@ -0,0 +1,37 @@ +#pragma once + +#include "abstract.h" + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitSyncTransactionOperator: public TBaseEvWriteTransactionOperator { +private: + using TBase = TBaseEvWriteTransactionOperator; + mutable std::optional DeadlockControlInstant; + virtual void OnTimeout(TColumnShard& owner) = 0; + + virtual bool DoPingTimeout(TColumnShard& owner, const TMonotonic now) override final { + if (!DeadlockControlInstant) { + DeadlockControlInstant = now; + } else if (now - *DeadlockControlInstant > TDuration::Seconds(2)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "tx_timeout")("lock", LockId)("tx_id", GetTxId())( + "d", now - *DeadlockControlInstant); + DeadlockControlInstant = now; + OnTimeout(owner); + return true; + } + return false; + } + +public: + using TBase::TBase; + + virtual std::unique_ptr CreateReceiveResultAckTx(TColumnShard& owner, const ui64 recvTabletId) const = 0; + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const = 0; + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const = 0; +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make b/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make new file mode 100644 index 000000000000..c7283c74fa3d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +SRCS( + GLOBAL secondary.cpp + GLOBAL simple.cpp + GLOBAL primary.cpp + abstract.cpp + sync.cpp +) + +PEERDIR( + ydb/services/metadata/abstract + ydb/core/tx/columnshard/blobs_action/events + ydb/core/tx/columnshard/data_sharing/destination/events + ydb/core/tx/columnshard/transactions/locks +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp index 3c04dcdff64a..2b0474ad4531 100644 --- a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp @@ -22,7 +22,7 @@ TLongTxTransactionOperator::TProposeResult TLongTxTransactionOperator::DoStartPr auto it = owner.InsertTable->GetInserted().find(writeId); if (it != owner.InsertTable->GetInserted().end()) { - auto granuleShardingInfo = owner.GetIndexAs().GetVersionedIndex().GetShardingInfoActual(it->second.PathId); + auto granuleShardingInfo = owner.GetIndexAs().GetVersionedIndex().GetShardingInfoActual(it->second.GetPathId()); if (granuleShardingInfo && lw.GranuleShardingVersionId && *lw.GranuleShardingVersionId != granuleShardingInfo->GetSnapshotVersion()) { return TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Commit TxId# " << GetTxId() << " references WriteId# " << (ui64)writeId << " declined through sharding deprecated"); @@ -43,9 +43,14 @@ bool TLongTxTransactionOperator::DoParse(TColumnShard& /*owner*/, const TString& } for (auto& id : commitTxBody.GetWriteIds()) { - WriteIds.insert(TWriteId{ id }); + WriteIds.insert(TInsertWriteId{ id }); } return true; } +void TLongTxTransactionOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { + const auto& txInfo = GetTxInfo(); + ctx.Send(txInfo.Source, BuildProposeResultEvent(owner).release()); +} + } diff --git a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h index 35adecab6092..45b642c8e98a 100644 --- a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h +++ b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h @@ -24,10 +24,15 @@ namespace NKikimr::NColumnShard { virtual void DoStartProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; + virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "LongTxWrite"; + } virtual bool DoIsAsync() const override { return false; } @@ -41,7 +46,7 @@ namespace NKikimr::NColumnShard { public: using TBase::TBase; - void OnTabletInit(TColumnShard& owner) override { + virtual void DoOnTabletInit(TColumnShard& owner) override { for (auto&& writeId : WriteIds) { AFL_VERIFY(owner.LongTxWrites.contains(writeId))("problem", "ltx_not_exists_for_write_id")("txId", GetTxId())("writeId", (ui64)writeId); owner.AddLongTxWrite(writeId, GetTxId()); @@ -58,12 +63,12 @@ namespace NKikimr::NColumnShard { auto counters = owner.InsertTable->Commit(dbTable, version.GetPlanStep(), version.GetTxId(), WriteIds, pathExists); - owner.IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); - owner.IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); - owner.IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); NIceDb::TNiceDb db(txc.DB); - for (TWriteId writeId : WriteIds) { + for (TInsertWriteId writeId : WriteIds) { AFL_VERIFY(owner.RemoveLongTxWrite(db, writeId, GetTxId())); } owner.UpdateInsertTableCounters(); @@ -79,7 +84,7 @@ namespace NKikimr::NColumnShard { virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { NIceDb::TNiceDb db(txc.DB); - for (TWriteId writeId : WriteIds) { + for (TInsertWriteId writeId : WriteIds) { AFL_VERIFY(owner.RemoveLongTxWrite(db, writeId, GetTxId())); } TBlobGroupSelector dsGroupSelector(owner.Info()); @@ -92,7 +97,7 @@ namespace NKikimr::NColumnShard { } private: - THashSet WriteIds; + THashSet WriteIds; }; } // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp b/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp index c7d39da52740..2a48ca49a279 100644 --- a/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp @@ -3,21 +3,36 @@ namespace NKikimr::NColumnShard { void IProposeTxOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { + if (owner.CurrentSchemeShardId) { + AFL_VERIFY(owner.CurrentSchemeShardId); + ctx.Send(MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward(BuildProposeResultEvent(owner).release(), (ui64)owner.CurrentSchemeShardId, true)); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "scheme_shard_tablet_not_initialized")("source", GetTxInfo().Source); + ctx.Send(GetTxInfo().Source, BuildProposeResultEvent(owner).release()); + } +} + +std::unique_ptr IProposeTxOperator::BuildProposeResultEvent(const TColumnShard& owner) const { const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult = std::make_unique( - owner.TabletID(), txInfo.TxKind, txInfo.TxId, GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); + std::unique_ptr evResult = + std::make_unique(owner.TabletID(), txInfo.TxKind, txInfo.TxId, + GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); if (IsFail()) { - owner.IncCounter(COUNTER_PREPARE_ERROR); - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())("tx_id", txInfo.TxId); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_ERROR); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())( + "tx_id", txInfo.TxId); } else { evResult->Record.SetMinStep(txInfo.MinStep); evResult->Record.SetMaxStep(txInfo.MaxStep); if (owner.ProcessingParams) { evResult->Record.MutableDomainCoordinators()->CopyFrom(owner.ProcessingParams->GetCoordinators()); } - owner.IncCounter(COUNTER_PREPARE_SUCCESS); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_SUCCESS); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())( + "tx_id", txInfo.TxId); } - ctx.Send(txInfo.Source, evResult.release()); + return evResult; } } diff --git a/ydb/core/tx/columnshard/transactions/operators/propose_tx.h b/ydb/core/tx/columnshard/transactions/operators/propose_tx.h index 84b2f7e8db66..d867e71bad9b 100644 --- a/ydb/core/tx/columnshard/transactions/operators/propose_tx.h +++ b/ydb/core/tx/columnshard/transactions/operators/propose_tx.h @@ -12,6 +12,7 @@ class IProposeTxOperator: public TTxController::ITransactionOperator { virtual bool DoCheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const override { return GetTxInfo() == originalTxInfo; } + std::unique_ptr BuildProposeResultEvent(const TColumnShard& owner) const; virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override { if (!currentTxInfo.SeqNo || !GetTxInfo().SeqNo) { diff --git a/ydb/core/tx/columnshard/transactions/operators/schema.cpp b/ydb/core/tx/columnshard/transactions/operators/schema.cpp index 13f854e277b5..d4019542bf1e 100644 --- a/ydb/core/tx/columnshard/transactions/operators/schema.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/schema.cpp @@ -40,7 +40,17 @@ class TWaitEraseTablesTxSubscriber: public NSubscriber::ISubscriber { } }; -NKikimr::NColumnShard::TTxController::TProposeResult TSchemaTransactionOperator::DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TProposeResult TSchemaTransactionOperator::DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) { + auto seqNo = SeqNoFromProto(SchemaTxBody.GetSeqNo()); + auto lastSeqNo = owner.LastSchemaSeqNo; + + // Check if proposal is outdated + if (seqNo < lastSeqNo) { + auto errorMessage = TStringBuilder() << "Ignoring outdated schema tx proposal at tablet " << owner.TabletID() << " txId " << GetTxId() + << " ssId " << owner.CurrentSchemeShardId << " seqNo " << seqNo << " lastSeqNo " << lastSeqNo; + return TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_CHANGED, errorMessage); + } + switch (SchemaTxBody.TxBody_case()) { case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: { @@ -67,21 +77,6 @@ NKikimr::NColumnShard::TTxController::TProposeResult TSchemaTransactionOperator: break; } - auto seqNo = SeqNoFromProto(SchemaTxBody.GetSeqNo()); - auto lastSeqNo = owner.LastSchemaSeqNo; - - // Check if proposal is outdated - if (seqNo < lastSeqNo) { - auto errorMessage = TStringBuilder() - << "Ignoring outdated schema tx proposal at tablet " - << owner.TabletID() - << " txId " << GetTxId() - << " ssId " << owner.CurrentSchemeShardId - << " seqNo " << seqNo - << " lastSeqNo " << lastSeqNo; - return TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_CHANGED, errorMessage); - } - owner.UpdateSchemaSeqNo(seqNo, txc); return TProposeResult(); } @@ -166,7 +161,7 @@ NKikimr::TConclusionStatus TSchemaTransactionOperator::ValidateTables(::google:: } return TConclusionStatus::Success(); } -bool TSchemaTransactionOperator::DoOnStartAsync(TColumnShard& owner) { +void TSchemaTransactionOperator::DoOnTabletInit(TColumnShard& owner) { AFL_VERIFY(WaitPathIdsToErase.empty()); switch (SchemaTxBody.TxBody_case()) { case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: @@ -190,11 +185,9 @@ bool TSchemaTransactionOperator::DoOnStartAsync(TColumnShard& owner) { if (WaitPathIdsToErase.size()) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "wait_remove_path_id")("pathes", JoinSeq(",", WaitPathIdsToErase))("tx_id", GetTxId()); owner.Subscribers->RegisterSubscriber(std::make_shared(WaitPathIdsToErase, GetTxId())); - return true; } else { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "remove_pathes_cleaned")("tx_id", GetTxId()); owner.Execute(new TTxFinishAsyncTransaction(owner, GetTxId())); - return false; } } diff --git a/ydb/core/tx/columnshard/transactions/operators/schema.h b/ydb/core/tx/columnshard/transactions/operators/schema.h index f79e10fac2ca..8b7575cc3cc4 100644 --- a/ydb/core/tx/columnshard/transactions/operators/schema.h +++ b/ydb/core/tx/columnshard/transactions/operators/schema.h @@ -18,7 +18,7 @@ class TSchemaTransactionOperator: public IProposeTxOperator, public TMonitoringO THashSet NotifySubscribers; THashSet WaitPathIdsToErase; - virtual bool DoOnStartAsync(TColumnShard& owner) override; + virtual void DoOnTabletInit(TColumnShard& owner) override; template THashSet GetNotErasedTableIds(const TColumnShard& owner, const TInfoProto& tables) const { @@ -43,6 +43,22 @@ class TSchemaTransactionOperator: public IProposeTxOperator, public TMonitoringO } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + switch (SchemaTxBody.TxBody_case()) { + case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: + return "Scheme:InitShard"; + case NKikimrTxColumnShard::TSchemaTxBody::kEnsureTables: + return "Scheme:EnsureTables"; + case NKikimrTxColumnShard::TSchemaTxBody::kAlterTable: + return "Scheme:AlterTable"; + case NKikimrTxColumnShard::TSchemaTxBody::kAlterStore: + return "Scheme:AlterStore"; + case NKikimrTxColumnShard::TSchemaTxBody::kDropTable: + return "Scheme:DropTable"; + case NKikimrTxColumnShard::TSchemaTxBody::TXBODY_NOT_SET: + return "Scheme:TXBODY_NOT_SET"; + } + } virtual bool DoIsAsync() const override { return WaitPathIdsToErase.size(); } diff --git a/ydb/core/tx/columnshard/transactions/operators/sharing.h b/ydb/core/tx/columnshard/transactions/operators/sharing.h index acf02304d878..13c7df7cad0e 100644 --- a/ydb/core/tx/columnshard/transactions/operators/sharing.h +++ b/ydb/core/tx/columnshard/transactions/operators/sharing.h @@ -25,6 +25,9 @@ class TSharingTransactionOperator: public IProposeTxOperator, public TMonitoring } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "Sharing"; + } virtual bool DoIsAsync() const override { AFL_VERIFY(SharingTask); return !SharingTask->IsFinished(); diff --git a/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp b/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp deleted file mode 100644 index 1bf60d44d373..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "ss_operation.h" - -namespace NKikimr::NColumnShard { - -void ISSTransactionOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { - const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult = std::make_unique( - owner.TabletID(), txInfo.TxKind, txInfo.TxId, GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); - if (IsFail()) { - owner.IncCounter(COUNTER_PREPARE_ERROR); - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())("tx_id", txInfo.TxId); - } else { - evResult->Record.SetMinStep(txInfo.MinStep); - evResult->Record.SetMaxStep(txInfo.MaxStep); - if (owner.ProcessingParams) { - evResult->Record.MutableDomainCoordinators()->CopyFrom(owner.ProcessingParams->GetCoordinators()); - } - owner.IncCounter(COUNTER_PREPARE_SUCCESS); - } - ctx.Send(txInfo.Source, evResult.release()); -} - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ss_operation.h b/ydb/core/tx/columnshard/transactions/operators/ss_operation.h deleted file mode 100644 index feff6af77225..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ss_operation.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include -#include - -namespace NKikimr::NColumnShard { - -class ISSTransactionOperator: public TTxController::ITransactionOperator { -private: - using TBase = TTxController::ITransactionOperator; -protected: - virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; -public: - using TBase::TBase; -}; - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ya.make b/ydb/core/tx/columnshard/transactions/operators/ya.make index 579b0d07679c..e96323b51b44 100644 --- a/ydb/core/tx/columnshard/transactions/operators/ya.make +++ b/ydb/core/tx/columnshard/transactions/operators/ya.make @@ -3,7 +3,6 @@ LIBRARY() SRCS( GLOBAL schema.cpp GLOBAL long_tx_write.cpp - GLOBAL ev_write.cpp GLOBAL backup.cpp GLOBAL sharing.cpp propose_tx.cpp @@ -11,6 +10,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/data_sharing/destination/events + ydb/core/tx/columnshard/transactions/operators/ev_write ydb/core/tx/columnshard/export/session ) diff --git a/ydb/core/tx/columnshard/transactions/protos/tx_event.proto b/ydb/core/tx/columnshard/transactions/protos/tx_event.proto new file mode 100644 index 000000000000..6e64dde46e4d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/protos/tx_event.proto @@ -0,0 +1,19 @@ +package NKikimrColumnShardTxProto; + +message TEvent { + optional uint64 PathId = 1; + optional string ClassName = 2; + + message TReadEvent { + optional string Filter = 1; + optional string Schema = 2; + } + + message TWriteEvent { + } + + oneof Implementation { + TReadEvent Read = 20; + TWriteEvent Write = 21; + } +} diff --git a/ydb/core/tx/columnshard/transactions/protos/ya.make b/ydb/core/tx/columnshard/transactions/protos/ya.make new file mode 100644 index 000000000000..7a54fdc3404d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/protos/ya.make @@ -0,0 +1,12 @@ +PROTO_LIBRARY() + +SRCS( + tx_event.proto +) + +PEERDIR( + ydb/core/tx/columnshard/common/protos + ydb/core/protos +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/tx_controller.cpp b/ydb/core/tx/columnshard/transactions/tx_controller.cpp index d9b6e1451a17..afb1e8a33d50 100644 --- a/ydb/core/tx/columnshard/transactions/tx_controller.cpp +++ b/ydb/core/tx/columnshard/transactions/tx_controller.cpp @@ -1,4 +1,5 @@ #include "tx_controller.h" + #include "transactions/tx_finish_async.h" #include @@ -6,7 +7,8 @@ namespace NKikimr::NColumnShard { TTxController::TTxController(TColumnShard& owner) - : Owner(owner) { + : Owner(owner) + , Counters(owner.Counters.GetCSCounters().TxProgress) { } bool TTxController::HaveOutdatedTxs() const { @@ -52,9 +54,9 @@ bool TTxController::Load(NTabletFlatExecutor::TTransactionContext& txc) { const ui64 txId = rowset.GetValue(); const NKikimrTxColumnShard::ETransactionKind txKind = rowset.GetValue(); ITransactionOperator::TPtr txOperator(ITransactionOperator::TFactory::Construct(txKind, TTxInfo(txKind, txId))); - Y_ABORT_UNLESS(!!txOperator); + AFL_VERIFY(!!txOperator)("kind", txKind); const TString txBody = rowset.GetValue(); - Y_ABORT_UNLESS(txOperator->Parse(Owner, txBody, true)); + AFL_VERIFY(txOperator->Parse(Owner, txBody, true)); auto& txInfo = txOperator->MutableTxInfo(); txInfo.MaxStep = rowset.GetValue(); @@ -89,22 +91,9 @@ bool TTxController::Load(NTabletFlatExecutor::TTransactionContext& txc) { return true; } -TTxController::ITransactionOperator::TPtr TTxController::GetTxOperator(const ui64 txId) const { - auto it = Operators.find(txId); - if (it == Operators.end()) { - return nullptr; - } - return it->second; -} - -TTxController::ITransactionOperator::TPtr TTxController::GetVerifiedTxOperator(const ui64 txId) const { - auto it = Operators.find(txId); - AFL_VERIFY(it != Operators.end())("tx_id", txId); - return it->second; -} - -std::shared_ptr TTxController::UpdateTxSourceInfo(const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc) { - auto op = GetVerifiedTxOperator(tx.GetTxId()); +std::shared_ptr TTxController::UpdateTxSourceInfo( + const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc) { + auto op = GetTxOperatorVerified(tx.GetTxId()); op->ResetStatusOnUpdate(); auto& txInfo = op->MutableTxInfo(); txInfo.Source = tx.Source; @@ -116,17 +105,20 @@ std::shared_ptr TTxController::UpdateTxSour return op; } -TTxController::TTxInfo TTxController::RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TTxInfo TTxController::RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); auto& txInfo = txOperator->GetTxInfo(); AFL_VERIFY(txInfo.MaxStep == Max()); AFL_VERIFY(Operators.emplace(txInfo.TxId, txOperator).second); Schema::SaveTxInfo(db, txInfo, txBody); + Counters.OnRegisterTx(txOperator->GetOpType()); return txInfo; } -TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_ptr& txOperator, + const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); auto& txInfo = txOperator->MutableTxInfo(); @@ -137,22 +129,22 @@ TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_p Schema::SaveTxInfo(db, txInfo, txBody); DeadlineQueue.emplace(txInfo.MaxStep, txOperator->GetTxId()); + Counters.OnRegisterTx(txOperator->GetOpType()); return txInfo; } -bool TTxController::AbortTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - auto opIt = Operators.find(txId); +bool TTxController::AbortTx(const TPlanQueueItem planQueueItem, NTabletFlatExecutor::TTransactionContext& txc) { + auto opIt = Operators.find(planQueueItem.TxId); Y_ABORT_UNLESS(opIt != Operators.end()); Y_ABORT_UNLESS(opIt->second->GetTxInfo().PlanStep == 0); opIt->second->ExecuteOnAbort(Owner, txc); opIt->second->CompleteOnAbort(Owner, NActors::TActivationContext::AsActorContext()); + Counters.OnAbortTx(opIt->second->GetOpType()); - if (opIt->second->GetTxInfo().MaxStep != Max()) { - DeadlineQueue.erase(TPlanQueueItem(opIt->second->GetTxInfo().MaxStep, txId)); - } - Operators.erase(txId); + AFL_VERIFY(Operators.erase(planQueueItem.TxId)); + AFL_VERIFY(DeadlineQueue.erase(planQueueItem)); NIceDb::TNiceDb db(txc.DB); - Schema::EraseTxInfo(db, txId); + Schema::EraseTxInfo(db, planQueueItem.TxId); return true; } @@ -191,7 +183,14 @@ bool TTxController::ExecuteOnCancel(const ui64 txId, NTabletFlatExecutor::TTrans return true; } -std::optional TTxController::StartPlannedTx() { +std::optional TTxController::GetFirstPlannedTx() const { + if (!PlanQueue.empty()) { + return GetTxInfoVerified(PlanQueue.begin()->TxId); + } + return std::nullopt; +} + +std::optional TTxController::PopFirstPlannedTx() { if (!PlanQueue.empty()) { auto node = PlanQueue.extract(PlanQueue.begin()); auto& item = node.value(); @@ -202,13 +201,16 @@ std::optional TTxController::StartPlannedTx() { return std::nullopt; } -void TTxController::FinishPlannedTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { +void TTxController::ProgressOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); + auto opIt = Operators.find(txId); + AFL_VERIFY(opIt != Operators.end())("tx_id", txId); + Counters.OnFinishPlannedTx(opIt->second->GetOpType()); + AFL_VERIFY(Operators.erase(txId)); Schema::EraseTxInfo(db, txId); } -void TTxController::CompleteRunningTx(const TPlanQueueItem& txItem) { - AFL_VERIFY(Operators.erase(txItem.TxId)); +void TTxController::ProgressOnComplete(const TPlanQueueItem& txItem) { AFL_VERIFY(RunningQueue.erase(txItem))("info", txItem.DebugString()); } @@ -252,7 +254,7 @@ size_t TTxController::CleanExpiredTxs(NTabletFlatExecutor::TTransactionContext& } ui64 txId = it->TxId; LOG_S_DEBUG(TStringBuilder() << "Removing outdated txId " << txId << " max step " << it->Step << " outdated step "); - AbortTx(txId, txc); + AbortTx(*it, txc); ++removedCount; } } @@ -275,7 +277,10 @@ TDuration TTxController::GetTxCompleteLag(ui64 timecastStep) const { TTxController::EPlanResult TTxController::PlanTx(const ui64 planStep, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { auto it = Operators.find(txId); if (it == Operators.end()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_plan_tx")("tx_id", txId); return EPlanResult::Skipped; + } else { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "plan_tx")("tx_id", txId)("plan_step", it->second->MutableTxInfo().PlanStep); } auto& txInfo = it->second->MutableTxInfo(); if (txInfo.PlanStep == 0) { @@ -292,6 +297,8 @@ TTxController::EPlanResult TTxController::PlanTx(const ui64 planStep, const ui64 } void TTxController::OnTabletInit() { + AFL_VERIFY(!StartedFlag); + StartedFlag = true; for (auto&& txOperator : Operators) { txOperator.second->OnTabletInit(Owner); } @@ -299,22 +306,24 @@ void TTxController::OnTabletInit() { std::shared_ptr TTxController::StartProposeOnExecute( const TTxController::TTxInfo& txInfo, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnExecute")( - "tx_info", txInfo.DebugString())("tx_info", txInfo.DebugString()); + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnExecute")("tx_info", txInfo.DebugString()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); - std::shared_ptr txOperator(TTxController::ITransactionOperator::TFactory::Construct(txInfo.TxKind, txInfo)); + std::shared_ptr txOperator( + TTxController::ITransactionOperator::TFactory::Construct(txInfo.TxKind, txInfo)); AFL_VERIFY(!!txOperator); if (!txOperator->Parse(Owner, txBody)) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse txOperator"); return txOperator; } + Counters.OnStartProposeOnExecute(txOperator->GetOpType()); auto txInfoPtr = GetTxInfo(txInfo.TxId); if (!!txInfoPtr) { if (!txOperator->CheckAllowUpdate(*txInfoPtr)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "incorrect duplication")("actual_tx", txInfoPtr->DebugString()); - TTxController::TProposeResult proposeResult( - NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Another commit TxId# " << txInfo.TxId << " has already been proposed"); + TTxController::TProposeResult proposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, + TStringBuilder() << "Another commit TxId# " << txInfo.TxId << " has already been proposed"); txOperator->SetProposeStartInfo(proposeResult); return txOperator; } else { @@ -337,52 +346,48 @@ std::shared_ptr TTxController::StartPropose } } -void TTxController::StartProposeOnComplete(const ui64 txId, const TActorContext& ctx) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnComplete")("tx_id", txId); - auto txOperator = GetTxOperator(txId); - if (!txOperator) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); - txOperator->StartProposeOnComplete(Owner, ctx); - } +void TTxController::StartProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx) { + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnComplete")("tx_id", txOperator.GetTxId()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); + txOperator.StartProposeOnComplete(Owner, ctx); + Counters.OnStartProposeOnComplete(txOperator.GetOpType()); } void TTxController::FinishProposeOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnExecute")("tx_id", txId); - auto txOperator = GetTxOperator(txId); - if (!txOperator) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); - } else { + if (auto txOperator = GetTxOperatorOptional(txId)) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); txOperator->FinishProposeOnExecute(Owner, txc); + Counters.OnFinishProposeOnExecute(txOperator->GetOpType()); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); } } +void TTxController::FinishProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx) { + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnComplete")("tx_id", txOperator.GetTxId()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start")("tx_info", txOperator.GetTxInfo().DebugString()); + TTxController::TProposeResult proposeResult = txOperator.GetProposeStartInfoVerified(); + AFL_VERIFY(!txOperator.IsFail()); + txOperator.FinishProposeOnComplete(Owner, ctx); + txOperator.SendReply(Owner, ctx); + Counters.OnFinishProposeOnComplete(txOperator.GetOpType()); +} + void TTxController::FinishProposeOnComplete(const ui64 txId, const TActorContext& ctx) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnComplete")("tx_id", txId); - auto txOperator = GetTxOperator(txId); + auto txOperator = GetTxOperatorOptional(txId); if (!txOperator) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction finish")("tx_id", txId); return; } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start")("tx_info", txOperator->GetTxInfo().DebugString()); - TTxController::TProposeResult proposeResult = txOperator->GetProposeStartInfoVerified(); - AFL_VERIFY(!txOperator->IsFail()); - txOperator->FinishProposeOnComplete(Owner, ctx); - txOperator->SendReply(Owner, ctx); -} - -void TTxController::StartOperators() { - AFL_VERIFY(!StartedFlag); - StartedFlag = true; - for (auto&& i : Operators) { - Y_UNUSED(i.second->OnStartAsync(Owner)); - } + return FinishProposeOnComplete(*txOperator, ctx); } void TTxController::ITransactionOperator::SwitchStateVerified(const EStatus from, const EStatus to) { - AFL_VERIFY(!Status || *Status == from)("error", "incorrect expected status")("real_state", *Status)("expected", from)("details", DebugString()); + AFL_VERIFY(!Status || *Status == from)("error", "incorrect expected status")("real_state", *Status)("expected", from)( + "details", DebugString()); Status = to; } diff --git a/ydb/core/tx/columnshard/transactions/tx_controller.h b/ydb/core/tx/columnshard/transactions/tx_controller.h index 40cef7c84099..e48f10d3796d 100644 --- a/ydb/core/tx/columnshard/transactions/tx_controller.h +++ b/ydb/core/tx/columnshard/transactions/tx_controller.h @@ -1,11 +1,14 @@ #pragma once -#include - #include +#include +#include #include #include +namespace NKikimr::NOlap::NTxInteractions { +class TManager; +} namespace NKikimr::NColumnShard { @@ -14,10 +17,12 @@ class TColumnShard; struct TBasicTxInfo { const NKikimrTxColumnShard::ETransactionKind TxKind; const ui64 TxId; + public: TBasicTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId) : TxKind(txKind) , TxId(txId) { + AFL_VERIFY(txKind != NKikimrTxColumnShard::TX_KIND_NONE); } bool operator==(const TBasicTxInfo& item) const = default; @@ -42,12 +47,18 @@ struct TFullTxInfo: public TBasicTxInfo { TActorId Source; ui64 Cookie = 0; std::optional SeqNo; + public: + static TFullTxInfo BuildFake(const NKikimrTxColumnShard::ETransactionKind kind) { + return TFullTxInfo(kind, 0, NActors::TActorId(), 0, {}); + } + bool operator==(const TFullTxInfo& item) const = default; TString DebugString() const { TStringBuilder sb; - sb << TBase::DebugString() << ";min=" << MinStep << ";max=" << MaxStep << ";plan=" << PlanStep << ";src=" << Source << ";cookie=" << Cookie; + sb << TBase::DebugString() << ";min=" << MinStep << ";max=" << MaxStep << ";plan=" << PlanStep << ";src=" << Source + << ";cookie=" << Cookie; if (SeqNo) { sb << *SeqNo << ";"; } @@ -75,12 +86,12 @@ struct TFullTxInfo: public TBasicTxInfo { : TBasicTxInfo(txKind, txId) { } - TFullTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId, const TActorId& source, const ui64 cookie, const std::optional& seqNo) + TFullTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId, const TActorId& source, const ui64 cookie, + const std::optional& seqNo) : TBasicTxInfo(txKind, txId) , Source(source) , Cookie(cookie) - , SeqNo(seqNo) - { + , SeqNo(seqNo) { } }; @@ -89,6 +100,7 @@ class TTxProposeResult { class TProposeResult { YDB_READONLY(NKikimrTxColumnShard::EResultStatus, Status, NKikimrTxColumnShard::EResultStatus::PREPARED); YDB_READONLY_DEF(TString, StatusMessage); + public: TProposeResult() = default; TProposeResult(NKikimrTxColumnShard::EResultStatus status, const TString& statusMessage) @@ -109,16 +121,15 @@ class TTxProposeResult { std::optional BaseTxInfo; std::optional FullTxInfo; TProposeResult ProposeResult; + public: TTxProposeResult(const TBasicTxInfo& txInfo, TProposeResult&& result) : BaseTxInfo(txInfo) , ProposeResult(std::move(result)) { - } TTxProposeResult(const TFullTxInfo& txInfo, TProposeResult&& result) : FullTxInfo(txInfo) , ProposeResult(std::move(result)) { - } ui64 GetTxId() const noexcept { @@ -152,8 +163,8 @@ class TTxController { TPlanQueueItem(const ui64 step, const ui64 txId) : Step(step) - , TxId(txId) - {} + , TxId(txId) { + } inline bool operator<(const TPlanQueueItem& rhs) const { return Step < rhs.Step || (Step == rhs.Step && TxId < rhs.TxId); @@ -180,10 +191,12 @@ class TTxController { ReplySent, Failed }; + protected: TTxInfo TxInfo; YDB_READONLY_DEF(std::optional, ProposeStartInfo); std::optional Status = EStatus::Created; + private: friend class TTxController; virtual bool DoParse(TColumnShard& owner, const TString& data) = 0; @@ -191,41 +204,48 @@ class TTxController { virtual void DoStartProposeOnComplete(TColumnShard& owner, const TActorContext& ctx) = 0; virtual void DoFinishProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) = 0; virtual void DoFinishProposeOnComplete(TColumnShard& owner, const TActorContext& ctx) = 0; + virtual TString DoGetOpType() const = 0; virtual bool DoIsAsync() const = 0; virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) = 0; virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const = 0; virtual bool DoCheckTxInfoForReply(const TFullTxInfo& /*originalTxInfo*/) const { return true; } + virtual bool DoPingTimeout(TColumnShard& /*owner*/, const TMonotonic /*now*/) { + return false; + } + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* /*owner*/) const { + return nullptr; + } void SwitchStateVerified(const EStatus from, const EStatus to); TTxInfo& MutableTxInfo() { return TxInfo; } + virtual void DoOnTabletInit(TColumnShard& /*owner*/) { + } + void ResetStatusOnUpdate() { Status = {}; } virtual TString DoDebugString() const = 0; - virtual bool DoOnStartAsync(TColumnShard& /*owner*/) { - return false; - } std::optional StartedAsync; public: using TPtr = std::shared_ptr; using TFactory = NObjectFactory::TParametrizedObjectFactory; + using OpType = TString; - bool CheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const { - return DoCheckTxInfoForReply(originalTxInfo); + bool PingTimeout(TColumnShard& owner, const TMonotonic now) { + return DoPingTimeout(owner, now); } - [[nodiscard]] bool OnStartAsync(TColumnShard& owner) { - AFL_VERIFY(!StartedAsync); - StartedAsync = DoOnStartAsync(owner); - return *StartedAsync; + bool CheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const { + return DoCheckTxInfoForReply(originalTxInfo); } TString DebugString() const { @@ -236,6 +256,10 @@ class TTxController { return DoCheckAllowUpdate(currentTxInfo); } + std::unique_ptr BuildTxPrepareForProgress(TColumnShard* owner) const { + return DoBuildTxPrepareForProgress(owner); + } + bool IsFail() const { return ProposeStartInfo && ProposeStartInfo->IsFail(); } @@ -258,18 +282,23 @@ class TTxController { } ITransactionOperator(const TTxInfo& txInfo) - : TxInfo(txInfo) - {} + : TxInfo(txInfo) { + } ui64 GetTxId() const { return TxInfo.TxId; } + OpType GetOpType() const { + return DoGetOpType(); + } + bool IsAsync() const { return DoIsAsync() && Status != EStatus::Failed && Status != EStatus::ReplySent; } - virtual ~ITransactionOperator() {} + virtual ~ITransactionOperator() { + } virtual bool TxWithDeadline() const { return true; @@ -279,14 +308,15 @@ class TTxController { const bool result = DoParse(owner, data); if (!result) { AFL_VERIFY(!onLoad); - ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Error processing commit TxId# " << TxInfo.TxId - << ". Parsing error"); + ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, + TStringBuilder() << "Error processing commit TxId# " << TxInfo.TxId << ". Parsing error"); SwitchStateVerified(EStatus::Created, EStatus::Failed); } else { SwitchStateVerified(EStatus::Created, EStatus::Parsed); } if (onLoad) { - ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::PREPARED, "success on iteration before restart"); + ProposeStartInfo = + TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::PREPARED, "success on iteration before restart"); Status = {}; } return result; @@ -345,8 +375,15 @@ class TTxController { virtual void RegisterSubscriber(const TActorId&) { AFL_VERIFY(false)("message", "Not implemented"); }; - virtual void OnTabletInit(TColumnShard& /*owner*/) {} + void OnTabletInit(TColumnShard& owner) { + AFL_VERIFY(!StartedAsync); + StartedAsync = true; + DoOnTabletInit(owner); + } }; + TTxProgressCounters& GetCounters() { + return Counters; + } private: const TDuration MaxCommitTxDelay = TDuration::Seconds(30); @@ -354,44 +391,75 @@ class TTxController { std::set DeadlineQueue; std::set PlanQueue; std::set RunningQueue; + TTxProgressCounters Counters; THashMap Operators; - private: ui64 GetAllowedStep() const; - bool AbortTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + bool AbortTx(const TPlanQueueItem planQueueItem, NTabletFlatExecutor::TTransactionContext& txc); - TTxInfo RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); - TTxInfo RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); + TTxInfo RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc); + TTxInfo RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc); bool StartedFlag = false; + public: TTxController(TColumnShard& owner); - ITransactionOperator::TPtr GetTxOperator(const ui64 txId) const; - ITransactionOperator::TPtr GetVerifiedTxOperator(const ui64 txId) const; - void StartOperators(); + ITransactionOperator::TPtr GetTxOperatorOptional(const ui64 txId) const { + auto it = Operators.find(txId); + if (it == Operators.end()) { + return nullptr; + } + return it->second; + } + ITransactionOperator::TPtr GetTxOperatorVerified(const ui64 txId) const { + return TValidator::CheckNotNull(GetTxOperatorOptional(txId)); + } + template + std::shared_ptr GetTxOperatorVerifiedAs(const ui64 txId) const { + auto result = GetTxOperatorOptional(txId); + AFL_VERIFY(result); + auto resultClass = dynamic_pointer_cast(result); + AFL_VERIFY(resultClass); + return resultClass; + } + + void PingTimeouts(const TMonotonic now) { + auto txInfo = GetFirstPlannedTx(); + if (!txInfo) { + return; + } + GetTxOperatorVerified(txInfo->GetTxId())->PingTimeout(Owner, now); + } ui64 GetMemoryUsage() const; bool HaveOutdatedTxs() const; bool Load(NTabletFlatExecutor::TTransactionContext& txc); - [[nodiscard]] std::shared_ptr UpdateTxSourceInfo(const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc); + [[nodiscard]] std::shared_ptr UpdateTxSourceInfo( + const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc); [[nodiscard]] std::shared_ptr StartProposeOnExecute( const TTxController::TTxInfo& txInfo, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); - void StartProposeOnComplete(const ui64 txId, const TActorContext& ctx); - + void StartProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx); void FinishProposeOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - + void FinishProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx); void FinishProposeOnComplete(const ui64 txId, const TActorContext& ctx); + void WriteTxOperatorInfo(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId, const TString& data) { + NIceDb::TNiceDb db(txc.DB); + NColumnShard::Schema::UpdateTxInfoBody(db, txId, data); + } bool ExecuteOnCancel(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); bool CompleteOnCancel(const ui64 txId, const TActorContext& ctx); - std::optional StartPlannedTx(); - void FinishPlannedTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - void CompleteRunningTx(const TPlanQueueItem& tx); + std::optional GetFirstPlannedTx() const; + std::optional PopFirstPlannedTx(); + void ProgressOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void ProgressOnComplete(const TPlanQueueItem& tx); std::optional GetPlannedTx() const; TPlanQueueItem GetFrontTx() const; @@ -412,5 +480,4 @@ class TTxController { void OnTabletInit(); }; -} - +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/ya.make b/ydb/core/tx/columnshard/transactions/ya.make index 8479005c4d67..c6509a2a6473 100644 --- a/ydb/core/tx/columnshard/transactions/ya.make +++ b/ydb/core/tx/columnshard/transactions/ya.make @@ -11,6 +11,7 @@ PEERDIR( ydb/core/tx/columnshard/data_sharing/destination/events ydb/core/tx/columnshard/transactions/operators ydb/core/tx/columnshard/transactions/transactions + ydb/core/tx/columnshard/transactions/locks ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 700ed7157f15..8a110e996b96 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -35,12 +35,6 @@ using TTypeId = NScheme::TTypeId; using TTypeInfo = NScheme::TTypeInfo; using TDefaultTestsController = NKikimr::NYDBTest::NColumnShard::TController; -class TDisableCompactionController: public NKikimr::NYDBTest::NColumnShard::TController { -public: - TDisableCompactionController() { - DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); - } -}; template bool DataHas(const std::vector>& batches, std::pair range, @@ -538,7 +532,7 @@ void TestWriteReadDup(const TestTableDescription& table = {}) { // read if (planStep != initPlanStep) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({"timestamp"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -597,7 +591,7 @@ void TestWriteReadLongTxDup() { // read TAutoPtr handle; { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -612,7 +606,9 @@ void TestWriteReadLongTxDup() { } void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString codec = "") { - auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + csControllerGuard->SetOverrideReadTimeoutClean(TDuration::Max()); TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -681,7 +677,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 1); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"resource_type"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -698,7 +694,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 2); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"resource_type"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -708,7 +704,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 3 (committed) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 3); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -723,7 +719,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 4 (column by id) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 4); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumnIds({1}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -738,7 +734,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 5); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -776,7 +772,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 6, planstep 0 { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 6); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(!rb); @@ -786,7 +782,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 7, planstep 21 (part of index) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 7); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(21, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(21, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -803,7 +799,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 8, planstep 22 (full index) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 8); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(22, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(22, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -833,7 +829,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 9 (committed, indexed) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 9); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(23, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(23, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -858,7 +854,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 10 { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 10); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -878,8 +874,8 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString const ui64 committedBytes = reader.GetReadStat("committed_bytes"); Cerr << codec << "/" << compactedBytes << "/" << insertedBytes << "/" << committedBytes << Endl; if (insertedBytes) { - UNIT_ASSERT_GE(insertedBytes / 100000, 40); - UNIT_ASSERT_LE(insertedBytes / 100000, 50); + UNIT_ASSERT_GE(insertedBytes / 100000, 50); + UNIT_ASSERT_LE(insertedBytes / 100000, 60); } if (committedBytes) { UNIT_ASSERT_LE(committedBytes / 100000, 1); @@ -904,7 +900,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 11 (range predicate: closed interval) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 11); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); reader.AddRange(MakeTestRange({10, 42}, true, true, testYdbPk)); auto rb = reader.ReadAll(); @@ -921,7 +917,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 12 (range predicate: open interval) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 11); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); reader.AddRange(MakeTestRange({10, 42}, false, false, testYdbPk)); auto rb = reader.ReadAll(); @@ -1034,7 +1030,7 @@ void TestCompactionInGranuleImpl(bool reboots, const TestTableDescription& table --txId; for (ui32 i = 0; i < 2; ++i) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -1279,7 +1275,7 @@ void TestReadWithProgram(const TestTableDescription& table = {}) ui32 i = 0; for (auto& programText : programs) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(programText); auto rb = reader.ReadAll(); if (i < numWrong) { @@ -1348,7 +1344,7 @@ void TestReadWithProgramLike(const TestTableDescription& table = {}) { ui32 i = 0; for (auto& ssa : ssas) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(ssa); auto rb = reader.ReadAll(); @@ -1420,7 +1416,7 @@ void TestSomePrograms(const TestTableDescription& table) { // TODO: add programs with bugs here for (auto& ssaText : programs) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(ssaText); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsError()); @@ -1530,7 +1526,7 @@ void TestReadAggregate(const std::vector& ydbSchema, for (auto& programText : programs) { Cerr << "-- select program: " << prog << " is filtered: " << (int)isFiltered.count(prog) << "\n"; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(programText); auto batch = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1890,7 +1886,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1905,7 +1901,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1921,7 +1917,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1937,7 +1933,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1958,7 +1954,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -2239,7 +2235,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { const ui64 tableId = 1; std::set useFields = {"timestamp", "message"}; { // read with predicate (FROM) - NOlap::NTests::TShardReader reader(Owner.Runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(Owner.PlanStep, Owner.TxId)); + TShardReader reader(Owner.Runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(Owner.PlanStep, Owner.TxId)); reader.SetReplyColumns({"timestamp", "message"}); reader.AddRange(MakeRange(Owner.YdbPk)); auto rb = reader.ReadAll(); @@ -2334,7 +2330,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { for (ui32 i = 0; i < 2; ++i) { { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -2440,7 +2436,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ui64 numRows = static_cast(*rows).Value(i); ui64 numBytes = static_cast(*bytes).Value(i); ui64 numRawBytes = static_cast(*rawBytes).Value(i); - bool activity = static_cast(*activities).Value(i); + bool activity = static_cast(*activities).Value(i); if (!activity) { continue; } @@ -2584,7 +2580,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { // Try to read snapshot that is too old { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - staleness.MilliSeconds(), Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - staleness.MilliSeconds(), Max())); reader.SetReplyColumns({"timestamp", "message"}); reader.ReadAll(); UNIT_ASSERT(reader.IsError()); @@ -2594,8 +2590,10 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { void TestCompactionGC() { TTestBasicRuntime runtime; - TTester::Setup(runtime); auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csDefaultControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csDefaultControllerGuard->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + TTester::Setup(runtime); runtime.SetLogPriority(NKikimrServices::BLOB_CACHE, NActors::NLog::PRI_INFO); @@ -2753,7 +2751,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { --planStep; --txId; Cerr << compactionsHappened << Endl; - UNIT_ASSERT_GE(compactionsHappened, 3); // we catch it three times per action +// UNIT_ASSERT_GE(compactionsHappened, 3); // we catch it three times per action ui64 previousCompactionsHappened = compactionsHappened; ui64 previousCleanupsHappened = cleanupsHappened; @@ -2761,12 +2759,13 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { // Send a request that reads the latest version // This request is expected to read at least 1 committed blob and several index portions // These committed blob and portions must not be deleted by the BlobManager until the read request finishes - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); UNIT_ASSERT(CheckOrdered(rb)); UNIT_ASSERT(reader.GetIterationsCount() < 10); + csDefaultControllerGuard->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); // We captured EvReadFinished event and dropped is so the columnshard still thinks that // read request is in progress and keeps the portions @@ -2782,15 +2781,21 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ProposeCommit(runtime, sender, txId, writeIds); PlanCommit(runtime, sender, planStep, txId); } + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } - Cerr << "Compactions happened: " << compactionsHappened << Endl; - Cerr << "Cleanups happened: " << cleanupsHappened << Endl; + Cerr << "Compactions happened: " << csDefaultControllerGuard->GetCompactionStartedCounter().Val() << Endl; + Cerr << "Indexations happened: " << csDefaultControllerGuard->GetInsertStartedCounter().Val() << Endl; + Cerr << "Cleanups happened: " << csDefaultControllerGuard->GetCleaningStartedCounter().Val() << Endl; Cerr << "Old portions: " << JoinStrings(oldPortions.begin(), oldPortions.end(), " ") << Endl; Cerr << "Cleaned up portions: " << JoinStrings(deletedPortions.begin(), deletedPortions.end(), " ") << Endl; + Cerr << "delayedBlobs: " << JoinStrings(delayedBlobs.begin(), delayedBlobs.end(), " ") << Endl; // Check that GC happened but it didn't collect some old portions UNIT_ASSERT_GT(compactionsHappened, previousCompactionsHappened); - UNIT_ASSERT_GT(cleanupsHappened, previousCleanupsHappened); + UNIT_ASSERT_EQUAL(cleanupsHappened, 0); UNIT_ASSERT_GT_C(oldPortions.size(), deletedPortions.size(), "Some old portions must not be deleted because the are in use by read"); UNIT_ASSERT_GT_C(delayedBlobs.size(), 0, "Read request is expected to have at least one committed blob, which deletion must be delayed"); previousCompactionsHappened = compactionsHappened; @@ -2805,9 +2810,25 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } // Advance the time and trigger some more cleanups withno compactions - auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - planStep += 2 * delay.MilliSeconds(); - numWrites = 2; + csDefaultControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } + planStep += (2 * delay).MilliSeconds(); + for (ui32 i = 0; i < numWrites; ++i, ++writeId, ++planStep, ++txId) { + std::vector writeIds; + UNIT_ASSERT(WriteData(runtime, sender, writeId, tableId, triggerData, ydbSchema, true, &writeIds)); + + ProposeCommit(runtime, sender, txId, writeIds); + PlanCommit(runtime, sender, planStep, txId); + } + UNIT_ASSERT_EQUAL(cleanupsHappened, 0); + csDefaultControllerGuard->SetOverrideRequestsTracePingCheckPeriod(TDuration::Zero()); + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } for (ui32 i = 0; i < numWrites; ++i, ++writeId, ++planStep, ++txId) { std::vector writeIds; UNIT_ASSERT(WriteData(runtime, sender, writeId, tableId, triggerData, ydbSchema, true, &writeIds)); @@ -2815,9 +2836,12 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ProposeCommit(runtime, sender, txId, writeIds); PlanCommit(runtime, sender, planStep, txId); } + AFL_VERIFY(csDefaultControllerGuard->GetRequestTracingSnapshotsSave().Val() == 1); + AFL_VERIFY(csDefaultControllerGuard->GetRequestTracingSnapshotsRemove().Val() == 1); - Cerr << "Compactions happened: " << compactionsHappened << Endl; - Cerr << "Cleanups happened: " << cleanupsHappened << Endl; + Cerr << "Compactions happened: " << csDefaultControllerGuard->GetCompactionStartedCounter().Val() << Endl; + Cerr << "Indexations happened: " << csDefaultControllerGuard->GetInsertStartedCounter().Val() << Endl; + Cerr << "Cleanups happened: " << csDefaultControllerGuard->GetCleaningStartedCounter().Val() << Endl; Cerr << "Old portions: " << JoinStrings(oldPortions.begin(), oldPortions.end(), " ") << Endl; Cerr << "Cleaned up portions: " << JoinStrings(deletedPortions.begin(), deletedPortions.end(), " ") << Endl; @@ -2825,7 +2849,6 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { UNIT_ASSERT_GE(compactionsHappened, previousCompactionsHappened); UNIT_ASSERT_GT(cleanupsHappened, previousCleanupsHappened); UNIT_ASSERT_VALUES_EQUAL_C(oldPortions.size(), deletedPortions.size(), "All old portions must be deleted after read has finished"); - UNIT_ASSERT_VALUES_EQUAL_C(delayedBlobs.size(), 0, "All previously delayed deletions must now happen " + JoinSeq(",", delayedBlobs)); } Y_UNIT_TEST(CompactionGC) { diff --git a/ydb/core/tx/columnshard/ut_rw/ya.make b/ydb/core/tx/columnshard/ut_rw/ya.make index d03099069b75..7cf343aad4c6 100644 --- a/ydb/core/tx/columnshard/ut_rw/ya.make +++ b/ydb/core/tx/columnshard/ut_rw/ya.make @@ -22,8 +22,6 @@ PEERDIR( ydb/core/tx/columnshard/test_helper ydb/core/tx/columnshard/hooks/abstract ydb/core/tx/columnshard/hooks/testing - ydb/core/tx/columnshard/common/tests - ydb/core/tx/columnshard/test_helper ydb/services/metadata ydb/core/tx ydb/public/lib/yson_value diff --git a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp index cfc9ea9526b5..39673d15b071 100644 --- a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -161,7 +161,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); - csControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); std::vector ts = {1600000000, 1620000000}; ui32 ttlIncSeconds = 1; @@ -246,7 +246,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -278,7 +278,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -312,7 +312,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -513,7 +513,7 @@ std::vector> TestTiers(bool reboots, const std::vector(); csControllerGuard->DisableBackground(NYDBTest::ICSController::EBackground::TTL); - csControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -621,9 +621,9 @@ std::vector> TestTiers(bool reboots, const std::vector reader; + std::unique_ptr reader; if (!misconfig) { - reader = std::make_unique(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + reader = std::make_unique(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader->SetReplyColumns({specs[i].TtlColumn}); counter.CaptureReadEvents = specs[i].WaitEmptyAfter ? 0 : 1; // TODO: we need affected by tiering blob here counter.WaitReadsCaptured(runtime); @@ -662,7 +662,7 @@ std::vector> TestTiers(bool reboots, const std::vector())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({columnToRead}); auto rb = reader.ReadAll(); if (expectedReadResult == EExpectedResult::ERROR) { @@ -981,7 +981,7 @@ void TestDrop(bool reboots) { TAutoPtr handle; { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({TTestSchema::DefaultTtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); diff --git a/ydb/core/tx/columnshard/ya.make b/ydb/core/tx/columnshard/ya.make index 716b6eae6174..f1f4df107ffe 100644 --- a/ydb/core/tx/columnshard/ya.make +++ b/ydb/core/tx/columnshard/ya.make @@ -16,7 +16,6 @@ SRCS( columnshard__write_index.cpp columnshard.cpp columnshard_impl.cpp - columnshard_common.cpp columnshard_private_events.cpp columnshard_schema.cpp columnshard_view.cpp diff --git a/ydb/core/tx/data_events/columnshard_splitter.cpp b/ydb/core/tx/data_events/columnshard_splitter.cpp index 5b435a0f9198..19a787167270 100644 --- a/ydb/core/tx/data_events/columnshard_splitter.cpp +++ b/ydb/core/tx/data_events/columnshard_splitter.cpp @@ -66,9 +66,10 @@ NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplit } TFullSplitData result(sharding->GetShardsCount()); + const TString schemaString = NArrow::SerializeSchema(*batch->schema()); for (auto&& [shardId, chunks] : split.GetResult()) { for (auto&& c : chunks) { - result.AddShardInfo(shardId, std::make_shared(c.GetSchemaData(), c.GetData(), c.GetRowsCount(), sharding->GetShardInfoVerified(shardId).GetShardingVersion())); + result.AddShardInfo(shardId, std::make_shared(schemaString, c.GetData(), c.GetRowsCount(), sharding->GetShardInfoVerified(shardId).GetShardingVersion())); } } diff --git a/ydb/core/tx/data_events/events.h b/ydb/core/tx/data_events/events.h index d9f17beceec4..bd4f06284e9d 100644 --- a/ydb/core/tx/data_events/events.h +++ b/ydb/core/tx/data_events/events.h @@ -103,6 +103,13 @@ struct TDataEvents { return result; } + static std::unique_ptr BuildCompleted(const ui64 origin) { + auto result = std::make_unique(); + result->Record.SetOrigin(origin); + result->Record.SetStatus(NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + return result; + } + static std::unique_ptr BuildCompleted(const ui64 origin, const ui64 txId) { auto result = std::make_unique(); result->Record.SetOrigin(origin); @@ -116,7 +123,9 @@ struct TDataEvents { result->Record.SetOrigin(origin); result->Record.SetTxId(txId); result->Record.SetStatus(NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); - *result->Record.AddTxLocks() = lock; + auto& lockResult = *result->Record.AddTxLocks(); + lockResult = lock; + lockResult.SetHasWrites(true); return result; } diff --git a/ydb/core/tx/data_events/shard_writer.cpp b/ydb/core/tx/data_events/shard_writer.cpp index 0158527fee49..85fc54cf1dd1 100644 --- a/ydb/core/tx/data_events/shard_writer.cpp +++ b/ydb/core/tx/data_events/shard_writer.cpp @@ -18,19 +18,25 @@ namespace NKikimr::NEvWrite { void TWritersController::OnSuccess(const ui64 shardId, const ui64 writeId, const ui32 writePartId) { WriteIds[WritesIndex.Inc() - 1] = TWriteIdForShard(shardId, writeId, writePartId); + Counters->OnCSReply(TMonotonic::Now() - StartInstant); if (!WritesCount.Dec()) { - auto req = MakeHolder(LongTxId); - for (auto&& i : WriteIds) { - req->AddWrite(i.GetShardId(), i.GetWriteId()); - } - LongTxActorId.Send(NLongTxService::MakeLongTxServiceID(LongTxActorId.NodeId()), req.Release()); + SendReply(); } } void TWritersController::OnFail(const Ydb::StatusIds::StatusCode code, const TString& message) { - NYql::TIssues issues; - issues.AddIssue(message); - LongTxActorId.Send(LongTxActorId, new TEvPrivate::TEvShardsWriteResult(code, issues)); + Counters->OnCSFailed(code); + FailsCount.Inc(); + if (!Code) { + TGuard g(Mutex); + if (!Code) { + Issues.AddIssue(message); + Code = code; + } + } + if (!WritesCount.Dec()) { + SendReply(); + } } TShardWriter::TShardWriter(const ui64 shardId, const ui64 tableId, const TString& dedupId, const IShardInfo::TPtr& data, diff --git a/ydb/core/tx/data_events/shard_writer.h b/ydb/core/tx/data_events/shard_writer.h index d7abe2e2a3ed..aa8f2a1eed6e 100644 --- a/ydb/core/tx/data_events/shard_writer.h +++ b/ydb/core/tx/data_events/shard_writer.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace NKikimr::NEvWrite { @@ -22,19 +23,88 @@ class TWriteIdForShard { TWriteIdForShard(const ui64 shardId, const ui64 writeId, const ui32 writePartId) : ShardId(shardId) , WriteId(writeId) - , WritePartId(writePartId) - { + , WritePartId(writePartId) { + } +}; + +class TCSUploadCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr RequestsCount; + NMonitoring::THistogramPtr CSReplyDuration; + NMonitoring::THistogramPtr SucceedFullReplyDuration; + NMonitoring::THistogramPtr FailedFullReplyDuration; + NMonitoring::THistogramPtr BytesDistribution; + NMonitoring::THistogramPtr RowsDistribution; + NMonitoring::TDynamicCounters::TCounterPtr RowsCount; + NMonitoring::TDynamicCounters::TCounterPtr BytesCount; + NMonitoring::TDynamicCounters::TCounterPtr FailsCount; +public: + TCSUploadCounters() + : TBase("CSUpload") + , RequestsCount(TBase::GetDeriviative("Requests")) + , CSReplyDuration(TBase::GetHistogram("Replies/Shard/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 1))) + , SucceedFullReplyDuration(TBase::GetHistogram("Replies/Success/Full/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 1))) + , FailedFullReplyDuration(TBase::GetHistogram("Replies/Failed/Full/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 1))) + , BytesDistribution(TBase::GetHistogram("Requests/Bytes", NMonitoring::ExponentialHistogram(15, 2, 1024))) + , RowsDistribution(TBase::GetHistogram("Requests/Rows", NMonitoring::ExponentialHistogram(15, 2, 16))) + , RowsCount(TBase::GetDeriviative("Rows")) + , BytesCount(TBase::GetDeriviative("Bytes")) + , FailsCount(TBase::GetDeriviative("Fails")) { + } -}; + void OnRequest(const ui64 rows, const ui64 bytes) const { + BytesDistribution->Collect(bytes); + RowsDistribution->Collect(rows); + BytesCount->Add(bytes); + RowsCount->Add(rows); + } + + void OnCSFailed(const Ydb::StatusIds::StatusCode /*code*/) { + FailsCount->Add(1); + } + + void OnCSReply(const TDuration d) const { + CSReplyDuration->Collect(d.MilliSeconds()); + } + + void OnSucceedFullReply(const TDuration d) const { + SucceedFullReplyDuration->Collect(d.MilliSeconds()); + } + + void OnFailedFullReply(const TDuration d) const { + FailedFullReplyDuration->Collect(d.MilliSeconds()); + } +}; // External transaction controller class class TWritersController { private: TAtomicCounter WritesCount = 0; TAtomicCounter WritesIndex = 0; + TAtomicCounter FailsCount = 0; + TMutex Mutex; + NYql::TIssues Issues; + std::optional Code; NActors::TActorIdentity LongTxActorId; std::vector WriteIds; + const TMonotonic StartInstant = TMonotonic::Now(); YDB_READONLY_DEF(NLongTxService::TLongTxId, LongTxId); + YDB_READONLY(std::shared_ptr, Counters, std::make_shared()); + void SendReply() { + if (FailsCount.Val()) { + Counters->OnFailedFullReply(TMonotonic::Now() - StartInstant); + AFL_VERIFY(Code); + LongTxActorId.Send(LongTxActorId, new TEvPrivate::TEvShardsWriteResult(*Code, Issues)); + } else { + Counters->OnSucceedFullReply(TMonotonic::Now() - StartInstant); + auto req = MakeHolder(LongTxId); + for (auto&& i : WriteIds) { + req->AddWrite(i.GetShardId(), i.GetWriteId()); + } + LongTxActorId.Send(NLongTxService::MakeLongTxServiceID(LongTxActorId.NodeId()), req.Release()); + } + } public: using TPtr = std::shared_ptr; @@ -94,10 +164,6 @@ class TShardWriter: public NActors::TActorBootstrapped { TBase::PassAway(); } public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ_SHARD_WRITER; - } - TShardWriter(const ui64 shardId, const ui64 tableId, const TString& dedupId, const IShardInfo::TPtr& data, const NWilson::TProfileSpan& parentSpan, TWritersController::TPtr externalController, const ui32 writePartIdx, const EModificationType mType); diff --git a/ydb/core/tx/data_events/shards_splitter.h b/ydb/core/tx/data_events/shards_splitter.h index bed95ce29d03..77c04e1160e9 100644 --- a/ydb/core/tx/data_events/shards_splitter.h +++ b/ydb/core/tx/data_events/shards_splitter.h @@ -3,7 +3,6 @@ #include #include - #include #include @@ -21,6 +20,8 @@ class IShardsSplitter { using TYdbConclusionStatus = TConclusionSpecialStatus; class IEvWriteDataAccessor { + private: + YDB_READONLY(ui64, Size, 0); public: using TPtr = std::shared_ptr; @@ -29,6 +30,11 @@ class IShardsSplitter { } virtual std::shared_ptr GetDeserializedBatch() const = 0; virtual TString GetSerializedData() const = 0; + IEvWriteDataAccessor(const ui64 size) + : Size(size) + { + + } virtual ~IEvWriteDataAccessor() {} }; diff --git a/ydb/core/tx/data_events/write_data.h b/ydb/core/tx/data_events/write_data.h index 5fc92bf185b3..d409e87b5a7b 100644 --- a/ydb/core/tx/data_events/write_data.h +++ b/ydb/core/tx/data_events/write_data.h @@ -46,7 +46,21 @@ class TWriteMeta { YDB_ACCESSOR(TMonotonic, WriteMiddle4StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle5StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle6StartInstant, TMonotonic::Now()); + std::optional LockId; public: + void SetLockId(const ui64 lockId) { + LockId = lockId; + } + + ui64 GetLockIdVerified() const { + AFL_VERIFY(LockId); + return *LockId; + } + + std::optional GetLockIdOptional() const { + return LockId; + } + bool IsGuaranteeWriter() const { switch (ModificationType) { case EModificationType::Delete: diff --git a/ydb/core/tx/limiter/grouped_memory/service/actor.cpp b/ydb/core/tx/limiter/grouped_memory/service/actor.cpp new file mode 100644 index 000000000000..e7573c23612e --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/actor.cpp @@ -0,0 +1,50 @@ +#include "actor.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +void TMemoryLimiterActor::Bootstrap() { + Manager = std::make_shared(SelfId(), Config, Name, Signals, DefaultStage); + Become(&TThis::StateWait); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartTask::TPtr& ev) { + for (auto&& i : ev->Get()->GetAllocations()) { + Manager->RegisterAllocation(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId(), i, + ev->Get()->GetStageFeaturesIdx()); + } +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishTask::TPtr& ev) { + Manager->UnregisterAllocation(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetAllocationId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvUpdateTask::TPtr& ev) { + Manager->UpdateAllocation( + ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetAllocationId(), ev->Get()->GetVolume()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishGroup::TPtr& ev) { + Manager->UnregisterGroup(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartGroup::TPtr& ev) { + Manager->RegisterGroup(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishProcess::TPtr& ev) { + Manager->UnregisterProcess(ev->Get()->GetExternalProcessId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartProcess::TPtr& ev) { + Manager->RegisterProcess(ev->Get()->GetExternalProcessId(), ev->Get()->GetStages()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishProcessScope::TPtr& ev) { + Manager->UnregisterProcessScope(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartProcessScope::TPtr& ev) { + Manager->RegisterProcessScope(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId()); +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/actor.h b/ydb/core/tx/limiter/grouped_memory/service/actor.h new file mode 100644 index 000000000000..4b4506ba5b99 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/actor.h @@ -0,0 +1,59 @@ +#pragma once +#include "counters.h" +#include "manager.h" + +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { +class TManager; +class TMemoryLimiterActor: public NActors::TActorBootstrapped { +private: + std::shared_ptr Manager; + const TConfig Config; + const TString Name; + const std::shared_ptr Signals; + const std::shared_ptr DefaultStage; + +public: + TMemoryLimiterActor(const TConfig& config, const TString& name, const std::shared_ptr& signals, + const std::shared_ptr& defaultStage) + : Config(config) + , Name(name) + , Signals(signals) + , DefaultStage(defaultStage) { + } + + void Handle(NEvents::TEvExternal::TEvStartTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvUpdateTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartGroup::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishGroup::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartProcess::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishProcess::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartProcessScope::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishProcessScope::TPtr& ev); + + void Bootstrap(); + + STFUNC(StateWait) { + switch (ev->GetTypeRewrite()) { + hFunc(NEvents::TEvExternal::TEvStartTask, Handle); + hFunc(NEvents::TEvExternal::TEvFinishTask, Handle); + hFunc(NEvents::TEvExternal::TEvUpdateTask, Handle); + hFunc(NEvents::TEvExternal::TEvStartGroup, Handle); + hFunc(NEvents::TEvExternal::TEvFinishGroup, Handle); + hFunc(NEvents::TEvExternal::TEvStartProcess, Handle); + hFunc(NEvents::TEvExternal::TEvFinishProcess, Handle); + hFunc(NEvents::TEvExternal::TEvStartProcessScope, Handle); + hFunc(NEvents::TEvExternal::TEvFinishProcessScope, Handle); + default: + AFL_VERIFY(false)("ev_type", ev->GetTypeName()); + } + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp b/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp new file mode 100644 index 000000000000..2d04be2c9cef --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp @@ -0,0 +1,26 @@ +#include "allocation.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TAllocationInfo::TAllocationInfo(const ui64 processId, const ui64 scopeId, const ui64 allocationInternalGroupId, + const std::shared_ptr& allocation, + const std::shared_ptr& stage) + : Allocation(allocation) + , AllocationInternalGroupId(allocationInternalGroupId) + , Identifier(TValidator::CheckNotNull(Allocation)->GetIdentifier()) + , ProcessId(processId) + , ScopeId(scopeId) + , Stage(stage) { + AFL_VERIFY(Stage); + AFL_VERIFY(Allocation); + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "add")("id", Allocation->GetIdentifier())("stage", Stage->GetName()); + AllocatedVolume = Allocation->GetMemory(); + Stage->Add(AllocatedVolume, Allocation->IsAllocated()); + if (allocation->IsAllocated()) { + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocated_on_add")("allocation_id", Identifier)("stage", Stage->GetName()); + Allocation = nullptr; + } +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/allocation.h b/ydb/core/tx/limiter/grouped_memory/service/allocation.h new file mode 100644 index 000000000000..47d5043188d0 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/allocation.h @@ -0,0 +1,76 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +enum class EAllocationStatus { + Allocated, + Waiting, + Failed +}; + +class TAllocationInfo { +private: + std::shared_ptr Allocation; + YDB_READONLY(ui64, AllocationInternalGroupId, 0); + ui64 AllocatedVolume = 0; + YDB_READONLY(ui64, Identifier, 0); + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ScopeId, 0); + const std::shared_ptr Stage; + bool AllocationFailed = false; + +public: + ~TAllocationInfo() { + if (GetAllocationStatus() != EAllocationStatus::Failed) { + Stage->Free(AllocatedVolume, GetAllocationStatus() == EAllocationStatus::Allocated); + } + + AFL_TRACE(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "destroy")("allocation_id", Identifier)("stage", Stage->GetName()); + } + + bool IsAllocatable(const ui64 additional) const { + return Stage->IsAllocatable(AllocatedVolume, additional); + } + + void SetAllocatedVolume(const ui64 value) { + AFL_VERIFY(GetAllocationStatus() != EAllocationStatus::Failed); + Stage->UpdateVolume(AllocatedVolume, value, GetAllocationStatus() == EAllocationStatus::Allocated); + AllocatedVolume = value; + } + + ui64 GetAllocatedVolume() const { + return AllocatedVolume; + } + + [[nodiscard]] bool Allocate(const NActors::TActorId& ownerId) { + AFL_TRACE(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocated")("allocation_id", Identifier)("stage", Stage->GetName()); + AFL_VERIFY(Allocation)("status", GetAllocationStatus())("volume", AllocatedVolume)("id", Identifier)("stage", Stage->GetName())( + "allocation_internal_group_id", AllocationInternalGroupId); + const bool result = Allocation->OnAllocated( + std::make_shared(ProcessId, ScopeId, Allocation->GetIdentifier(), ownerId, Allocation->GetMemory()), Allocation); + if (result) { + Stage->Allocate(AllocatedVolume); + } else { + Stage->Free(AllocatedVolume, false); + AllocationFailed = true; + } + Allocation = nullptr; + return result; + } + + EAllocationStatus GetAllocationStatus() const { + if (AllocationFailed) { + return EAllocationStatus::Failed; + } else if (Allocation) { + return EAllocationStatus::Waiting; + } else { + return EAllocationStatus::Allocated; + } + } + + TAllocationInfo(const ui64 processId, const ui64 scopeId, const ui64 allocationInternalGroupId, const std::shared_ptr& allocation, + const std::shared_ptr& stage); +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/counters.cpp b/ydb/core/tx/limiter/grouped_memory/service/counters.cpp new file mode 100644 index 000000000000..d780bfd499d2 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/counters.cpp @@ -0,0 +1,5 @@ +#include "counters.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/service/counters.h b/ydb/core/tx/limiter/grouped_memory/service/counters.h new file mode 100644 index 000000000000..3c96b3b8b9a4 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/counters.h @@ -0,0 +1,62 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TStageCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr AllocatedBytes; + NMonitoring::TDynamicCounters::TCounterPtr AllocatedChunks; + NMonitoring::TDynamicCounters::TCounterPtr WaitingBytes; + NMonitoring::TDynamicCounters::TCounterPtr WaitingChunks; + +public: + TStageCounters(const TCommonCountersOwner& owner, const TString& name) + : TBase(owner, "stage", name) + , AllocatedBytes(TBase::GetValue("Allocated/Bytes")) + , AllocatedChunks(TBase::GetValue("Allocated/Count")) + , WaitingBytes(TBase::GetValue("Waiting/Bytes")) + , WaitingChunks(TBase::GetValue("Waiting/Count")) { + } + + void Add(const ui64 volume, const bool allocated) { + if (allocated) { + AllocatedBytes->Add(volume); + AllocatedChunks->Add(1); + } else { + WaitingBytes->Add(volume); + WaitingChunks->Add(1); + } + } + + void Sub(const ui64 volume, const bool allocated) { + if (allocated) { + AllocatedBytes->Sub(volume); + AllocatedChunks->Sub(1); + } else { + WaitingBytes->Sub(volume); + WaitingChunks->Sub(1); + } + } +}; + +class TCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + +public: + NMonitoring::TDynamicCounters::TCounterPtr GroupsCount; + NMonitoring::TDynamicCounters::TCounterPtr ProcessesCount; + TCounters(const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, const TString& name) + : TBase(NColumnShard::TCommonCountersOwner("grouped_memory_limiter", counters), "limiter_name", name) + , GroupsCount(TBase::GetValue("Groups/Count")) + , ProcessesCount(TBase::GetValue("Processes/Count")) { + } + + std::shared_ptr BuildStageCounters(const TString& stageName) const { + return std::make_shared(*this, stageName); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/group.cpp b/ydb/core/tx/limiter/grouped_memory/service/group.cpp new file mode 100644 index 000000000000..3bf671ff76b9 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/group.cpp @@ -0,0 +1,68 @@ +#include "group.h" +#include "process.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +std::vector> TGrouppedAllocations::AllocatePossible(const ui32 allocationsLimit) { + std::vector> result; + ui64 allocationMemory = 0; + ui32 allocationsCount = 0; + for (auto&& [_, allocation] : Allocations) { + if (allocation->IsAllocatable(allocationMemory)) { + allocationMemory += allocation->GetAllocatedVolume(); + result.emplace_back(allocation); + if (++allocationsCount == allocationsLimit) { + return result; + } + } + } + return result; +} + +bool TAllocationGroups::Allocate(const bool isPriorityProcess, TProcessMemoryScope& process, const ui32 allocationsLimit) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "try_allocation")("limit", allocationsLimit)( + "external_process_id", process.ExternalProcessId)("forced_internal_group_id", process.GroupIds.GetMinInternalIdOptional())( + "external_scope_id", process.ExternalScopeId)("forced_external_group_id", process.GroupIds.GetMinExternalIdOptional()); + ui32 allocationsCount = 0; + while (true) { + std::vector toRemove; + for (auto it = Groups.begin(); it != Groups.end();) { + const ui64 internalGroupId = it->first; + const bool forced = isPriorityProcess && internalGroupId == process.GroupIds.GetMinInternalIdVerified(); + std::vector> allocated; + if (forced) { + allocated = it->second.ExtractAllocationsToVector(); + } else if (allocationsLimit) { + allocated = it->second.AllocatePossible(allocationsLimit - allocationsCount); + } else { + break; + } + for (auto&& i : allocated) { + if (!i->Allocate(process.OwnerActorId)) { + toRemove.emplace_back(i->GetIdentifier()); + } else if (!forced) { + AFL_VERIFY(++allocationsCount <= allocationsLimit)("count", allocationsCount)("limit", allocationsLimit); + } + if (!forced) { + AFL_VERIFY(it->second.Remove(i)); + } + } + if (!it->second.IsEmpty()) { + break; + } + it = Groups.erase(it); + if (!forced && allocationsCount == allocationsLimit) { + break; + } + } + for (auto&& i : toRemove) { + process.UnregisterAllocation(i); + } + if (toRemove.empty() || allocationsCount == allocationsLimit) { + break; + } + } + return allocationsCount; +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/group.h b/ydb/core/tx/limiter/grouped_memory/service/group.h new file mode 100644 index 000000000000..8f4434c886b8 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/group.h @@ -0,0 +1,90 @@ +#pragma once +#include "allocation.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TProcessMemoryScope; + +class TGrouppedAllocations { +private: + THashMap> Allocations; + +public: + std::vector> ExtractAllocationsToVector() { + std::vector> result; + result.reserve(Allocations.size()); + for (auto&& i : Allocations) { + result.emplace_back(std::move(i.second)); + } + Allocations.clear(); + return result; + } + + const THashMap>& GetAllocations() const { + return Allocations; + } + + bool IsEmpty() const { + return Allocations.empty(); + } + + void AddAllocation(const std::shared_ptr& allocation) { + AFL_VERIFY(Allocations.emplace(allocation->GetIdentifier(), allocation).second); + } + + [[nodiscard]] bool Remove(const std::shared_ptr& allocation) { + return Allocations.erase(allocation->GetIdentifier()); + } + + std::vector> AllocatePossible(const ui32 allocationsLimit); +}; + +class TAllocationGroups { +private: + std::map Groups; + +public: + bool IsEmpty() const { + return Groups.empty(); + } + + [[nodiscard]] bool Allocate(const bool isPriorityProcess, TProcessMemoryScope& process, const ui32 allocationsLimit); + + [[nodiscard]] std::vector> ExtractGroup(const ui64 id) { + auto it = Groups.find(id); + if (it == Groups.end()) { + return {}; + } + auto result = it->second.ExtractAllocationsToVector(); + Groups.erase(it); + return result; + } + + std::optional GetMinGroupId() const { + if (Groups.size()) { + return Groups.begin()->first; + } else { + return std::nullopt; + } + } + + [[nodiscard]] bool RemoveAllocation(const ui64 internalGroupId, const std::shared_ptr& allocation) { + auto groupIt = Groups.find(internalGroupId); + if (groupIt == Groups.end()) { + return false; + } + if (!groupIt->second.Remove(allocation)) { + return false; + } + if (groupIt->second.IsEmpty()) { + Groups.erase(groupIt); + } + return true; + } + + void AddAllocation(const ui64 internalGroupId, const std::shared_ptr& allocation) { + Groups[internalGroupId].AddAllocation(allocation); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ids.cpp b/ydb/core/tx/limiter/grouped_memory/service/ids.cpp new file mode 100644 index 000000000000..163c90efcf12 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ids.cpp @@ -0,0 +1,66 @@ +#include "ids.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +ui64 TIdsControl::ExtractInternalIdVerified(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + AFL_VERIFY(it != ExternalIdIntoInternalId.end())("external_id", externalId); + const ui64 result = it->second; + InternalIdIntoExternalId.erase(result); + ExternalIdIntoInternalId.erase(it); + return result; +} + +std::optional TIdsControl::GetInternalIdOptional(const ui64 externalId) const { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it != ExternalIdIntoInternalId.end()) { + return it->second; + } + return std::nullopt; +} + +ui64 TIdsControl::GetMinInternalIdVerified() const { + AFL_VERIFY(InternalIdIntoExternalId.size()); + return InternalIdIntoExternalId.begin()->first; +} + +ui64 TIdsControl::GetInternalIdVerified(const ui64 externalId) const { + auto it = ExternalIdIntoInternalId.find(externalId); + AFL_VERIFY(it != ExternalIdIntoInternalId.end())("external_id", externalId); + return it->second; +} + +ui64 TIdsControl::RegisterExternalId(const ui64 externalId) { + AFL_VERIFY(ExternalIdIntoInternalId.emplace(externalId, ++CurrentInternalId).second); + InternalIdIntoExternalId.emplace(CurrentInternalId, externalId); + return CurrentInternalId; +} + +ui64 TIdsControl::RegisterExternalIdOrGet(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it != ExternalIdIntoInternalId.end()) { + return it->second; + } + AFL_VERIFY(ExternalIdIntoInternalId.emplace(externalId, ++CurrentInternalId).second); + InternalIdIntoExternalId.emplace(CurrentInternalId, externalId); + return CurrentInternalId; +} + +bool TIdsControl::UnregisterExternalId(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it == ExternalIdIntoInternalId.end()) { + return false; + } + AFL_VERIFY(InternalIdIntoExternalId.erase(it->second)); + ExternalIdIntoInternalId.erase(it); + return true; +} + +ui64 TIdsControl::GetExternalIdVerified(const ui64 internalId) const { + auto it = InternalIdIntoExternalId.find(internalId); + AFL_VERIFY(it != InternalIdIntoExternalId.end()); + return it->second; +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ids.h b/ydb/core/tx/limiter/grouped_memory/service/ids.h new file mode 100644 index 000000000000..acaa700411ab --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ids.h @@ -0,0 +1,70 @@ +#pragma once +#include + +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TIdsControl { +private: + THashMap ExternalIdIntoInternalId; + std::map InternalIdIntoExternalId; + ui64 CurrentInternalId = 0; + +public: + void Clear() { + ExternalIdIntoInternalId.clear(); + InternalIdIntoExternalId.clear(); + } + + const std::map& GetInternalIdToExternalIds() const { + return InternalIdIntoExternalId; + } + + ui64 GetSize() const { + return InternalIdIntoExternalId.size(); + } + + [[nodiscard]] ui64 ExtractInternalIdVerified(const ui64 externalId); + + ui64 GetMinInternalIdVerified() const; + ui64 GetExternalIdVerified(const ui64 internalId) const; + + std::optional GetInternalIdOptional(const ui64 externalId) const; + + ui64 GetInternalIdVerified(const ui64 externalId) const; + + [[nodiscard]] ui64 RegisterExternalId(const ui64 externalId); + [[nodiscard]] ui64 RegisterExternalIdOrGet(const ui64 externalId); + + [[nodiscard]] bool UnregisterExternalId(const ui64 externalId); + + std::optional GetMinInternalIdOptional() const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->first; + } else { + return std::nullopt; + } + } + + std::optional GetMinExternalIdOptional() const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->second; + } else { + return std::nullopt; + } + } + + ui64 GetMinInternalIdDef(const ui64 def) const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->first; + } else { + return def; + } + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/manager.cpp b/ydb/core/tx/limiter/grouped_memory/service/manager.cpp new file mode 100644 index 000000000000..96fe8bcefc17 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/manager.cpp @@ -0,0 +1,122 @@ +#include "manager.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TProcessMemory* TManager::GetProcessMemoryByExternalIdOptional(const ui64 externalProcessId) { + auto internalId = ProcessIds.GetInternalIdOptional(externalProcessId); + if (!internalId) { + return nullptr; + } + return GetProcessMemoryOptional(*internalId); +} + +void TManager::RegisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "register_group")("external_process_id", externalProcessId)( + "external_group_id", externalGroupId)("size", ProcessIds.GetSize())("external_scope_id", externalScopeId); + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->RegisterGroup(externalScopeId, externalGroupId); + } + RefreshSignals(); +} + +void TManager::UnregisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "unregister_group")("external_process_id", externalProcessId)( + "external_group_id", externalGroupId)("size", ProcessIds.GetSize()); + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->UnregisterGroup(externalScopeId, externalGroupId); + } + RefreshSignals(); +} + +void TManager::UpdateAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) { + TProcessMemory& process = GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)); + if (process.UpdateAllocation(externalScopeId, allocationId, volume)) { + TryAllocateWaiting(); + } + + RefreshSignals(); +} + +void TManager::TryAllocateWaiting() { + if (Processes.size()) { + auto it = Processes.find(ProcessIds.GetMinInternalIdVerified()); + AFL_VERIFY(it != Processes.end()); + AFL_VERIFY(it->second.IsPriorityProcess()); + it->second.TryAllocateWaiting(0); + } + while (true) { + bool found = false; + for (auto&& i : Processes) { + if (i.second.TryAllocateWaiting(1)) { + found = true; + } + } + if (!found) { + break; + } + } + RefreshSignals(); +} + +void TManager::UnregisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId) { + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + if (process->UnregisterAllocation(externalScopeId, allocationId)) { + TryAllocateWaiting(); + } + } + RefreshSignals(); +} + +void TManager::RegisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId, + const std::shared_ptr& task, const std::optional& stageIdx) { + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->RegisterAllocation(externalScopeId, externalGroupId, task, stageIdx); + } else { + AFL_VERIFY(!task->OnAllocated(std::make_shared(externalProcessId, externalScopeId, task->GetIdentifier(), OwnerActorId, task->GetMemory()), task))( + "ext_group", externalGroupId)("stage_idx", stageIdx); + } + RefreshSignals(); +} + +void TManager::RegisterProcess(const ui64 externalProcessId, const std::vector>& stages) { + auto internalId = ProcessIds.GetInternalIdOptional(externalProcessId); + if (!internalId) { + const ui64 internalProcessId = ProcessIds.RegisterExternalIdOrGet(externalProcessId); + AFL_VERIFY(Processes.emplace(internalProcessId, TProcessMemory(externalProcessId, OwnerActorId, Processes.empty(), stages, DefaultStage)).second); + } else { + ++Processes.find(*internalId)->second.MutableLinksCount(); + } + RefreshSignals(); +} + +void TManager::UnregisterProcess(const ui64 externalProcessId) { + const ui64 internalProcessId = ProcessIds.GetInternalIdVerified(externalProcessId); + auto it = Processes.find(internalProcessId); + AFL_VERIFY(it != Processes.end()); + if (--it->second.MutableLinksCount()) { + return; + } + Y_UNUSED(ProcessIds.ExtractInternalIdVerified(externalProcessId)); + it->second.Unregister(); + Processes.erase(it); + const ui64 nextInternalProcessId = ProcessIds.GetMinInternalIdDef(internalProcessId); + if (internalProcessId < nextInternalProcessId) { + GetProcessMemoryVerified(nextInternalProcessId).SetPriorityProcess(); + TryAllocateWaiting(); + } + RefreshSignals(); +} + +void TManager::RegisterProcessScope(const ui64 externalProcessId, const ui64 externalProcessScopeId) { + GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)).RegisterScope(externalProcessScopeId); + RefreshSignals(); +} + +void TManager::UnregisterProcessScope(const ui64 externalProcessId, const ui64 externalProcessScopeId) { + GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)).UnregisterScope(externalProcessScopeId); + RefreshSignals(); +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/manager.h b/ydb/core/tx/limiter/grouped_memory/service/manager.h new file mode 100644 index 000000000000..fd641a3f69b1 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/manager.h @@ -0,0 +1,79 @@ +#pragma once +#include "counters.h" +#include "process.h" + +#include +#include + +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TManager { +private: + const TConfig Config; + const TString Name; + const std::shared_ptr Signals; + const NActors::TActorId OwnerActorId; + THashMap Processes; + std::shared_ptr DefaultStage; + TIdsControl ProcessIds; + + void TryAllocateWaiting(); + void RefreshSignals() const { + Signals->ProcessesCount->Set(Processes.size()); + } + + TProcessMemory& GetProcessMemoryVerified(const ui64 internalProcessId) { + auto it = Processes.find(internalProcessId); + AFL_VERIFY(it != Processes.end()); + return it->second; + } + + TProcessMemory* GetProcessMemoryByExternalIdOptional(const ui64 externalProcessId); + + TProcessMemory* GetProcessMemoryOptional(const ui64 internalProcessId) { + auto it = Processes.find(internalProcessId); + if (it != Processes.end()) { + return &it->second; + } else { + return nullptr; + } + } + +public: + TManager(const NActors::TActorId& ownerActorId, const TConfig& config, const TString& name, const std::shared_ptr& signals, + const std::shared_ptr& defaultStage) + : Config(config) + , Name(name) + , Signals(signals) + , OwnerActorId(ownerActorId) + , DefaultStage(defaultStage) + { + } + + void RegisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId); + void UnregisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId); + + void RegisterProcessScope(const ui64 externalProcessId, const ui64 externalScopeId); + void UnregisterProcessScope(const ui64 externalProcessId, const ui64 externalScopeId); + + void RegisterProcess(const ui64 externalProcessId, const std::vector>& stages); + void UnregisterProcess(const ui64 externalProcessId); + + void RegisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId, + const std::shared_ptr& task, + const std::optional& stageIdx); + void UnregisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId); + void UpdateAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume); + + bool IsEmpty() const { + return Processes.empty(); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/process.cpp b/ydb/core/tx/limiter/grouped_memory/service/process.cpp new file mode 100644 index 000000000000..bcde6532e797 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/process.cpp @@ -0,0 +1,5 @@ +#include "process.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/process.h b/ydb/core/tx/limiter/grouped_memory/service/process.h new file mode 100644 index 000000000000..a1c13e091d59 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/process.h @@ -0,0 +1,262 @@ +#pragma once +#include "group.h" +#include "ids.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TProcessMemoryScope { +private: + const ui64 ExternalProcessId; + const ui64 ExternalScopeId; + TAllocationGroups WaitAllocations; + THashMap> AllocationInfo; + TIdsControl GroupIds; + ui32 Links = 1; + const NActors::TActorId OwnerActorId; + + TAllocationInfo& GetAllocationInfoVerified(const ui64 allocationId) const { + auto it = AllocationInfo.find(allocationId); + AFL_VERIFY(it != AllocationInfo.end()); + return *it->second; + } + + void UnregisterGroupImpl(const ui64 internalGroupId) { + auto data = WaitAllocations.ExtractGroup(internalGroupId); + for (auto&& allocation : data) { + AFL_VERIFY(!allocation->Allocate(OwnerActorId)); + } + } + + const std::shared_ptr& RegisterAllocationImpl( + const ui64 internalGroupId, const std::shared_ptr& task, const std::shared_ptr& stage) { + auto it = AllocationInfo.find(task->GetIdentifier()); + if (it == AllocationInfo.end()) { + it = AllocationInfo + .emplace(task->GetIdentifier(), + std::make_shared(ExternalProcessId, ExternalScopeId, internalGroupId, task, stage)) + .first; + } + return it->second; + } + + friend class TAllocationGroups; + +public: + TProcessMemoryScope(const ui64 externalProcessId, const ui64 externalScopeId, const NActors::TActorId& ownerActorId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , OwnerActorId(ownerActorId) { + } + + void Register() { + ++Links; + } + + [[nodiscard]] bool Unregister() { + if (--Links) { + return false; + } + for (auto&& [i, _] : GroupIds.GetInternalIdToExternalIds()) { + UnregisterGroupImpl(i); + } + GroupIds.Clear(); + AllocationInfo.clear(); + return true; + } + + void RegisterAllocation(const bool isPriorityProcess, const ui64 externalGroupId, const std::shared_ptr& task, + const std::shared_ptr& stage) { + AFL_VERIFY(task); + AFL_VERIFY(stage); + const std::optional internalGroupIdOptional = GroupIds.GetInternalIdOptional(externalGroupId); + if (!internalGroupIdOptional) { + AFL_VERIFY(!task->OnAllocated(std::make_shared(ExternalProcessId, ExternalScopeId, task->GetIdentifier(), OwnerActorId, task->GetMemory()), task))( + "ext_group", externalGroupId)( + "min_group", GroupIds.GetMinInternalIdOptional())("stage", stage->GetName()); + AFL_VERIFY(!AllocationInfo.contains(task->GetIdentifier())); + } else { + const ui64 internalGroupId = *internalGroupIdOptional; + auto allocationInfo = RegisterAllocationImpl(internalGroupId, task, stage); + + if (allocationInfo->GetAllocationStatus() != EAllocationStatus::Waiting) { + } else if (WaitAllocations.GetMinGroupId().value_or(internalGroupId) < internalGroupId) { + WaitAllocations.AddAllocation(internalGroupId, allocationInfo); + } else if (allocationInfo->IsAllocatable(0) || (isPriorityProcess && internalGroupId == GroupIds.GetMinInternalIdVerified())) { + Y_UNUSED(WaitAllocations.RemoveAllocation(internalGroupId, allocationInfo)); + if (!allocationInfo->Allocate(OwnerActorId)) { + UnregisterAllocation(allocationInfo->GetIdentifier()); + } + } else { + WaitAllocations.AddAllocation(internalGroupId, allocationInfo); + } + } + } + + bool UpdateAllocation(const ui64 allocationId, const ui64 volume) { + GetAllocationInfoVerified(allocationId).SetAllocatedVolume(volume); + return true; + } + + bool TryAllocateWaiting(const bool isPriorityProcess, const ui32 allocationsCountLimit) { + return WaitAllocations.Allocate(isPriorityProcess, *this, allocationsCountLimit); + } + + bool UnregisterAllocation(const ui64 allocationId) { + ui64 memoryAllocated = 0; + auto it = AllocationInfo.find(allocationId); + AFL_VERIFY(it != AllocationInfo.end()); + bool waitFlag = false; + const ui64 internalGroupId = it->second->GetAllocationInternalGroupId(); + switch (it->second->GetAllocationStatus()) { + case EAllocationStatus::Allocated: + case EAllocationStatus::Failed: + AFL_VERIFY(!WaitAllocations.RemoveAllocation(internalGroupId, it->second)); + break; + case EAllocationStatus::Waiting: + AFL_VERIFY(WaitAllocations.RemoveAllocation(internalGroupId, it->second)); + waitFlag = true; + break; + } + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocation_unregister")("allocation_id", allocationId)("wait", waitFlag)( + "internal_group_id", internalGroupId)("allocation_status", it->second->GetAllocationStatus()); + memoryAllocated = it->second->GetAllocatedVolume(); + AllocationInfo.erase(it); + return !!memoryAllocated; + } + + void UnregisterGroup(const bool isPriorityProcess, const ui64 externalGroupId) { + const ui64 internalGroupId = GroupIds.ExtractInternalIdVerified(externalGroupId); + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "remove_group")("external_group_id", externalGroupId)( + "internal_group_id", internalGroupId); + UnregisterGroupImpl(internalGroupId); + if (isPriorityProcess && (internalGroupId < GroupIds.GetMinInternalIdDef(internalGroupId))) { + Y_UNUSED(TryAllocateWaiting(isPriorityProcess, 0)); + } + } + + void RegisterGroup(const ui64 externalGroupId) { + Y_UNUSED(GroupIds.RegisterExternalId(externalGroupId)); + } +}; + +class TProcessMemory { +private: + const ui64 ExternalProcessId; + + const NActors::TActorId OwnerActorId; + bool PriorityProcessFlag = false; + + YDB_ACCESSOR(ui32, LinksCount, 1); + YDB_READONLY_DEF(std::vector>, Stages); + const std::shared_ptr DefaultStage; + THashMap> AllocationScopes; + + TProcessMemoryScope* GetAllocationScopeOptional(const ui64 externalScopeId) const { + auto it = AllocationScopes.find(externalScopeId); + if (it == AllocationScopes.end()) { + return nullptr; + } + return it->second.get(); + } + + TProcessMemoryScope& GetAllocationScopeVerified(const ui64 externalScopeId) const { + return *TValidator::CheckNotNull(GetAllocationScopeOptional(externalScopeId)); + } + +public: + bool IsPriorityProcess() const { + return PriorityProcessFlag; + } + + bool UpdateAllocation(const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) { + return GetAllocationScopeVerified(externalScopeId).UpdateAllocation(allocationId, volume); + } + + void RegisterAllocation( + const ui64 externalScopeId, const ui64 externalGroupId, const std::shared_ptr& task, const std::optional& stageIdx) { + AFL_VERIFY(task); + std::shared_ptr stage; + if (Stages.empty()) { + AFL_VERIFY(!stageIdx); + stage = DefaultStage; + } else { + AFL_VERIFY(stageIdx); + AFL_VERIFY(*stageIdx < Stages.size()); + stage = Stages[*stageIdx]; + } + AFL_VERIFY(stage); + auto& scope = GetAllocationScopeVerified(externalScopeId); + scope.RegisterAllocation(IsPriorityProcess(), externalGroupId, task, stage); + } + + bool UnregisterAllocation(const ui64 externalScopeId, const ui64 allocationId) { + if (auto* scope = GetAllocationScopeOptional(externalScopeId)) { + return scope->UnregisterAllocation(allocationId); + } + return false; + } + + void UnregisterGroup(const ui64 externalScopeId, const ui64 externalGroupId) { + if (auto* scope = GetAllocationScopeOptional(externalScopeId)) { + scope->UnregisterGroup(IsPriorityProcess(), externalGroupId); + } + } + + void RegisterGroup(const ui64 externalScopeId, const ui64 externalGroupId) { + GetAllocationScopeVerified(externalScopeId).RegisterGroup(externalGroupId); + } + + void UnregisterScope(const ui64 externalScopeId) { + auto it = AllocationScopes.find(externalScopeId); + AFL_VERIFY(it != AllocationScopes.end()); + if (it->second->Unregister()) { + AllocationScopes.erase(it); + } + + } + + void RegisterScope(const ui64 externalScopeId) { + auto it = AllocationScopes.find(externalScopeId); + if (it == AllocationScopes.end()) { + AFL_VERIFY(AllocationScopes.emplace(externalScopeId, std::make_shared(ExternalProcessId, externalScopeId, OwnerActorId)).second); + } else { + it->second->Register(); + } + + } + + void SetPriorityProcess() { + AFL_VERIFY(!PriorityProcessFlag); + PriorityProcessFlag = true; + } + + TProcessMemory(const ui64 externalProcessId, const NActors::TActorId& ownerActorId, const bool isPriority, + const std::vector>& stages, const std::shared_ptr& defaultStage) + : ExternalProcessId(externalProcessId) + , OwnerActorId(ownerActorId) + , PriorityProcessFlag(isPriority) + , Stages(stages) + , DefaultStage(defaultStage) { + } + + bool TryAllocateWaiting(const ui32 allocationsCountLimit) { + bool allocated = false; + for (auto&& i : AllocationScopes) { + if (i.second->TryAllocateWaiting(IsPriorityProcess(), allocationsCountLimit)) { + allocated = true; + } + } + return allocated; + } + + void Unregister() { + for (auto&& i : AllocationScopes) { + Y_UNUSED(i.second->Unregister()); + } + AllocationScopes.clear(); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ya.make b/ydb/core/tx/limiter/grouped_memory/service/ya.make new file mode 100644 index 000000000000..d67332688426 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +SRCS( + actor.cpp + manager.cpp + counters.cpp + group.cpp + process.cpp + allocation.cpp + ids.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/tx/columnshard/counters/common +) + +GENERATE_ENUM_SERIALIZATION(allocation.h) + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp b/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp new file mode 100644 index 000000000000..2d72f0039846 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp @@ -0,0 +1,79 @@ +#include "abstract.h" +#include "events.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TAllocationGuard::~TAllocationGuard() { + if (TlsActivationContext && !Released) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ScopeId, AllocationId)); + } +} + +void TAllocationGuard::Update(const ui64 newVolume) { + AFL_VERIFY(!Released); + Memory = newVolume; + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ScopeId, AllocationId, newVolume)); + } +} + +bool IAllocation::OnAllocated(std::shared_ptr&& guard, const std::shared_ptr& allocation) { + AFL_VERIFY(!Allocated); + Allocated = true; + AFL_VERIFY(allocation); + AFL_VERIFY(guard); + return DoOnAllocated(std::move(guard), allocation); +} + +TGroupGuard::~TGroupGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ExternalScopeId, GroupId)); + } +} + +TGroupGuard::TGroupGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 externalScopeId, const ui64 groupId) + : ActorId(actorId) + , ProcessId(processId) + , ExternalScopeId(externalScopeId) + , GroupId(groupId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ExternalScopeId, GroupId)); + } +} + +TProcessGuard::~TProcessGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId)); + } +} + +TProcessGuard::TProcessGuard(const NActors::TActorId& actorId, const ui64 processId, const std::vector>& stages) + : ActorId(actorId) + , ProcessId(processId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, stages)); + } +} + +TScopeGuard::~TScopeGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, ScopeId)); + } +} + +TScopeGuard::TScopeGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 scopeId) + : ActorId(actorId) + , ProcessId(processId) + , ScopeId(scopeId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, ScopeId)); + } +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/abstract.h b/ydb/core/tx/limiter/grouped_memory/usage/abstract.h new file mode 100644 index 000000000000..d92120f46fb6 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/abstract.h @@ -0,0 +1,223 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TStageFeatures; + +class TGroupGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, GroupId, 0); + +public: + TGroupGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 externalScopeId, const ui64 groupId); + + ~TGroupGuard(); +}; + +class TProcessGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + +public: + TProcessGuard(const NActors::TActorId& actorId, const ui64 processId, const std::vector>& stages); + + ~TProcessGuard(); +}; + +class TScopeGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ScopeId, 0); + +public: + TScopeGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 scopeId); + + ~TScopeGuard(); +}; + +class TAllocationGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0) + YDB_READONLY(ui64, ScopeId, 0) + YDB_READONLY(ui64, AllocationId, 0) + YDB_READONLY(ui64, Memory, 0) + bool Released = false; + +public: + TAllocationGuard(const ui64 processId, const ui64 scopeId, const ui64 allocationId, const NActors::TActorId actorId, const ui64 memory) + : ActorId(actorId) + , ProcessId(processId) + , ScopeId(scopeId) + , AllocationId(allocationId) + , Memory(memory) { + } + + void Release() { + AFL_VERIFY(!Released); + Released = true; + } + + void Update(const ui64 newVolume); + + ~TAllocationGuard(); +}; + +class TPositiveControlInteger { +private: + ui64 Value = 0; + +public: + void Add(const ui64 value) { + Value += value; + } + void Sub(const ui64 value) { + AFL_VERIFY(value <= Value); + Value -= value; + } + ui64 Val() const { + return Value; + } +}; + +class TStageFeatures { +private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY(ui64, Limit, 0); + YDB_ACCESSOR_DEF(TPositiveControlInteger, Usage); + YDB_ACCESSOR_DEF(TPositiveControlInteger, Waiting); + std::shared_ptr Owner; + std::shared_ptr Counters; + +public: + TString DebugString() const { + TStringBuilder result; + result << "name=" << Name << ";limit=" << Limit << ";"; + if (Owner) { + result << "owner=" << Owner->DebugString() << ";"; + } + return result; + } + + ui64 GetFullMemory() const { + return Usage.Val() + Waiting.Val(); + } + + TStageFeatures( + const TString& name, const ui64 limit, const std::shared_ptr& owner, const std::shared_ptr& counters) + : Name(name) + , Limit(limit) + , Owner(owner) + , Counters(counters) { + } + + void Allocate(const ui64 volume) { + Waiting.Sub(volume); + Usage.Add(volume); + if (Counters) { + Counters->Add(volume, true); + Counters->Sub(volume, false); + } + if (Owner) { + Owner->Allocate(volume); + } + } + + void Free(const ui64 volume, const bool allocated) { + if (Counters) { + Counters->Sub(volume, allocated); + } + if (allocated) { + Usage.Sub(volume); + } else { + Waiting.Sub(volume); + } + + if (Owner) { + Owner->Free(volume, allocated); + } + } + + void UpdateVolume(const ui64 from, const ui64 to, const bool allocated) { + if (Counters) { + Counters->Sub(from, allocated); + Counters->Add(to, allocated); + } + if (allocated) { + Usage.Sub(from); + Usage.Add(to); + } else { + Waiting.Sub(from); + Waiting.Add(to); + } + + if (Owner) { + Owner->UpdateVolume(from, to, allocated); + } + } + + bool IsAllocatable(const ui64 volume, const ui64 additional) const { + if (Limit < additional + Usage.Val() + volume) { + return false; + } + if (Owner) { + return Owner->IsAllocatable(volume, additional); + } + return true; + } + + void Add(const ui64 volume, const bool allocated) { + if (Counters) { + Counters->Add(volume, allocated); + } + if (allocated) { + Usage.Add(volume); + } else { + Waiting.Add(volume); + } + + if (Owner) { + Owner->Add(volume, allocated); + } + } +}; + +class IAllocation { +private: + static inline TAtomicCounter Counter = 0; + YDB_READONLY(ui64, Identifier, Counter.Inc()); + YDB_READONLY(ui64, Memory, 0); + bool Allocated = false; + virtual bool DoOnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation) = 0; + +public: + virtual ~IAllocation() = default; + IAllocation(const ui64 mem) + : Memory(mem) { + } + + void ResetAllocation() { + Allocated = false; + } + + bool IsAllocated() const { + return Allocated; + } + + [[nodiscard]] bool OnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation); +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/config.cpp b/ydb/core/tx/limiter/grouped_memory/usage/config.cpp new file mode 100644 index 000000000000..17fe55975744 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/config.cpp @@ -0,0 +1,20 @@ +#include "config.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +bool TConfig::DeserializeFromProto(const NKikimrConfig::TGroupedMemoryLimiterConfig& config) { + if (config.HasMemoryLimit()) { + MemoryLimit = config.GetMemoryLimit(); + } + Enabled = config.GetEnabled(); + return true; +} + +TString TConfig::DebugString() const { + TStringBuilder sb; + sb << "MemoryLimit=" << MemoryLimit << ";Enabled=" << Enabled << ";"; + return sb; +} + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/config.h b/ydb/core/tx/limiter/grouped_memory/usage/config.h new file mode 100644 index 000000000000..91a9b5bc7afe --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/config.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TConfig { +private: + YDB_READONLY(bool, Enabled, true); + YDB_READONLY(ui64, MemoryLimit, ui64(3) << 30); + +public: + + static TConfig BuildDisabledConfig() { + TConfig result; + result.Enabled = false; + return result; + } + + bool IsEnabled() const { + return Enabled; + } + bool DeserializeFromProto(const NKikimrConfig::TGroupedMemoryLimiterConfig& config); + TString DebugString() const; +}; + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/events.cpp b/ydb/core/tx/limiter/grouped_memory/usage/events.cpp new file mode 100644 index 000000000000..cafd00f2bd82 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/events.cpp @@ -0,0 +1,5 @@ +#include "events.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/events.h b/ydb/core/tx/limiter/grouped_memory/usage/events.h new file mode 100644 index 000000000000..d3a8200c584c --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/events.h @@ -0,0 +1,150 @@ +#pragma once +#include "abstract.h" + +#include + +#include +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents { +struct TEvExternal { + enum EEv { + EvStartAllocationTask = EventSpaceBegin(TKikimrEvents::ES_GROUPED_ALLOCATIONS_MANAGER), + EvFinishAllocationTask, + EvStartAllocationGroup, + EvFinishAllocationGroup, + EvUpdateAllocationTask, + EvStartAllocationProcess, + EvFinishAllocationProcess, + EvStartAllocationProcessScope, + EvFinishAllocationProcessScope, + EvEnd + }; + + class TEvStartTask: public NActors::TEventLocal { + private: + YDB_READONLY_DEF(std::vector>, Allocations); + YDB_READONLY_DEF(std::optional, StageFeaturesIdx); + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvStartTask(const ui64 externalProcessId, const ui64 externalScopeId, + const ui64 externalGroupId, const std::vector>& allocations, + const std::optional& stageFeaturesIdx) + : Allocations(allocations) + , StageFeaturesIdx(stageFeaturesIdx) + , ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + AFL_VERIFY(Allocations.size()); + } + }; + + class TEvFinishTask: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, AllocationId, 0); + + public: + explicit TEvFinishTask(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , AllocationId(allocationId) { + } + }; + + class TEvUpdateTask: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, AllocationId, 0); + YDB_READONLY(ui64, Volume, 0); + + public: + explicit TEvUpdateTask(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , AllocationId(allocationId) + , Volume(volume) { + } + }; + + class TEvFinishGroup: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui32, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvFinishGroup(const ui64 externalProcessId, const ui32 externalScopeId, const ui64 externalGroupId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + } + }; + + class TEvStartGroup: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui32, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvStartGroup(const ui64 externalProcessId, const ui32 externalScopeId, const ui64 externalGroupId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + } + }; + + class TEvFinishProcess: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + + public: + explicit TEvFinishProcess(const ui64 externalProcessId) + : ExternalProcessId(externalProcessId) { + } + }; + + class TEvStartProcess: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY_DEF(std::vector>, Stages); + public: + explicit TEvStartProcess(const ui64 externalProcessId, const std::vector>& stages) + : ExternalProcessId(externalProcessId) + , Stages(stages) { + } + }; + + class TEvFinishProcessScope: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + + public: + explicit TEvFinishProcessScope(const ui64 externalProcessId, const ui64 externalScopeId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + { + } + }; + + class TEvStartProcessScope: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + + public: + explicit TEvStartProcessScope(const ui64 externalProcessId, const ui64 externalScopeId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) { + } + }; +}; +} // namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents diff --git a/ydb/core/tx/limiter/grouped_memory/usage/service.cpp b/ydb/core/tx/limiter/grouped_memory/usage/service.cpp new file mode 100644 index 000000000000..affe243f11bb --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/service.cpp @@ -0,0 +1,5 @@ +#include "service.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/service.h b/ydb/core/tx/limiter/grouped_memory/usage/service.h new file mode 100644 index 000000000000..8192743218b1 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/service.h @@ -0,0 +1,102 @@ +#pragma once +#include "abstract.h" +#include "config.h" +#include "events.h" + +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +template +class TServiceOperatorImpl { +private: + TConfig ServiceConfig = TConfig::BuildDisabledConfig(); + std::shared_ptr Counters; + std::shared_ptr DefaultStageFeatures = std::make_shared("DEFAULT", ((ui64)3) << 30, nullptr, nullptr); + using TSelf = TServiceOperatorImpl; + static void Register(const TConfig& serviceConfig, TIntrusivePtr<::NMonitoring::TDynamicCounters> counters) { + Singleton()->Counters = std::make_shared(counters, TMemoryLimiterPolicy::Name); + Singleton()->ServiceConfig = serviceConfig; + Singleton()->DefaultStageFeatures = std::make_shared( + "GLOBAL", serviceConfig.GetMemoryLimit(), nullptr, Singleton()->Counters->BuildStageCounters("general")); + } + static const TString& GetMemoryLimiterName() { + Y_ABORT_UNLESS(TMemoryLimiterPolicy::Name.size() == 4); + return TMemoryLimiterPolicy::Name; + } + +public: + static std::shared_ptr BuildStageFeatures(const TString& name, const ui64 limit) { + if (!IsEnabled()) { + return Singleton()->DefaultStageFeatures; + } else { + AFL_VERIFY(Singleton()->DefaultStageFeatures); + return std::make_shared( + name, limit, Singleton()->DefaultStageFeatures, Singleton()->Counters->BuildStageCounters(name)); + } + } + + static std::shared_ptr GetDefaultStageFeatures() { + AFL_VERIFY(Singleton()->DefaultStageFeatures); + return Singleton()->DefaultStageFeatures; + } + + static std::shared_ptr BuildGroupGuard(const ui64 processId, const ui32 scopeId) { + static TAtomicCounter counter = 0; + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, scopeId, counter.Inc()); + } + + static std::shared_ptr BuildScopeGuard(const ui64 processId, const ui32 scopeId) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, scopeId); + } + + static std::shared_ptr BuildProcessGuard(const ui64 processId, const std::vector>& stages) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, stages); + } + + static bool SendToAllocation(const ui64 processId, const ui64 scopeId, const ui64 groupId, + const std::vector>& tasks, + const std::optional& stageIdx) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + if (TSelf::IsEnabled()) { + context.Send(MakeServiceId(selfId.NodeId()), new NEvents::TEvExternal::TEvStartTask(processId, scopeId, groupId, tasks, stageIdx)); + return true; + } else { + for (auto&& i : tasks) { + if (!i->IsAllocated()) { + AFL_VERIFY(i->OnAllocated(std::make_shared(0, 0, 0, NActors::TActorId(), i->GetMemory()), i)); + } + } + return false; + } + } + static bool IsEnabled() { + return Singleton()->ServiceConfig.IsEnabled(); + } + static NActors::TActorId MakeServiceId(const ui32 nodeId) { + return NActors::TActorId(nodeId, "SrvcMlmt" + GetMemoryLimiterName()); + } + static NActors::IActor* CreateService(const TConfig& config, TIntrusivePtr<::NMonitoring::TDynamicCounters> signals) { + Register(config, signals); + return new TMemoryLimiterActor(config, GetMemoryLimiterName(), Singleton()->Counters, Singleton()->DefaultStageFeatures); + } +}; + +class TScanMemoryLimiterPolicy { +public: + static const inline TString Name = "Scan"; +}; + +using TScanMemoryLimiterOperator = TServiceOperatorImpl; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/ya.make b/ydb/core/tx/limiter/grouped_memory/usage/ya.make new file mode 100644 index 000000000000..4295b9f8cafe --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + events.cpp + config.cpp + abstract.cpp + service.cpp +) + +PEERDIR( + ydb/library/actors/core + ydb/services/metadata/request + ydb/core/tx/limiter/grouped_memory/service +) + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp b/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp new file mode 100644 index 000000000000..277d62903205 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp @@ -0,0 +1,222 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +Y_UNIT_TEST_SUITE(GroupedMemoryLimiter) { + using namespace NKikimr; + + class TAllocation: public NOlap::NGroupedMemoryManager::IAllocation, public TObjectCounter { + private: + using TBase = NOlap::NGroupedMemoryManager::IAllocation; + virtual bool DoOnAllocated(std::shared_ptr&& /*guard*/, + const std::shared_ptr& /*allocation*/) override { + return true; + } + + public: + TAllocation(const ui64 mem) + : TBase(mem) { + } + }; + + Y_UNIT_TEST(Simplest) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + auto alloc1 = std::make_shared(50); + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc1_1 = std::make_shared(50); + manager->RegisterAllocation(0, 0, 1, alloc1_1, {}); + AFL_VERIFY(alloc1_1->IsAllocated()); + + manager->RegisterGroup(0, 0, 2); + auto alloc2 = std::make_shared(50); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + AFL_VERIFY(alloc2->IsAllocated()); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + manager->UnregisterAllocation(0, 0, alloc1_1->GetIdentifier()); + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterProcessScope(0, 0); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(Simple) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor(NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + auto alloc1 = std::make_shared(10); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc2 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 2); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + auto alloc3 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc3, {}); + AFL_VERIFY(alloc1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + auto alloc1_1 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc1_1, {}); + AFL_VERIFY(alloc1_1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + manager->UnregisterAllocation(0, 0, alloc1_1->GetIdentifier()); + AFL_VERIFY(!alloc2->IsAllocated()); + manager->UnregisterGroup(0, 0, 1); + AFL_VERIFY(alloc2->IsAllocated()); + + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + AFL_VERIFY(!alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + AFL_VERIFY(alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc3->GetIdentifier()); + manager->UnregisterProcessScope(0, 0); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(CommonUsage) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + manager->RegisterGroup(0, 0, 1); + auto alloc0 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc0, {}); + auto alloc1 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc1->IsAllocated()); + + manager->RegisterGroup(0, 0, 2); + auto alloc2 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 2, alloc0, {}); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + + auto alloc3 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc0, {}); + manager->RegisterAllocation(0, 0, 3, alloc3, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc3->IsAllocated()); + + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc3->GetIdentifier()); + manager->UnregisterAllocation(0, 0, alloc0->GetIdentifier()); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(Update) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + auto alloc1 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc2 = std::make_shared(10); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + + manager->UpdateAllocation(0, 0, alloc1->GetIdentifier(), 10); + AFL_VERIFY(alloc2->IsAllocated()); + + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } +}; diff --git a/ydb/core/tx/limiter/grouped_memory/ut/ya.make b/ydb/core/tx/limiter/grouped_memory/ut/ya.make new file mode 100644 index 000000000000..eeae3952e358 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ut/ya.make @@ -0,0 +1,17 @@ +UNITTEST_FOR(ydb/core/formats/arrow) + +SIZE(SMALL) + +PEERDIR( + ydb/core/tx/limiter/grouped_memory/usage + ydb/library/yql/public/udf/service/stub + ydb/library/yql/parser/pg_wrapper +) + +SRCS( + ut_manager.cpp +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/ya.make b/ydb/core/tx/limiter/grouped_memory/ya.make new file mode 100644 index 000000000000..2e071158a966 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ya.make @@ -0,0 +1,5 @@ +RECURSE( + ut + service + usage +) diff --git a/ydb/core/tx/limiter/ya.make b/ydb/core/tx/limiter/ya.make new file mode 100644 index 000000000000..c27ad41aa02b --- /dev/null +++ b/ydb/core/tx/limiter/ya.make @@ -0,0 +1,5 @@ +RECURSE( + grouped_memory + service + usage +) diff --git a/ydb/core/tx/program/program.cpp b/ydb/core/tx/program/program.cpp index a6dbce7fed75..a4ada441cd74 100644 --- a/ydb/core/tx/program/program.cpp +++ b/ydb/core/tx/program/program.cpp @@ -330,7 +330,7 @@ NSsa::TAggregateAssign TProgramBuilder::MakeAggregate(const NSsa::TColumnInfo& n } } else if (func.ArgumentsSize() == 0 && func.GetId() == TId::AGG_COUNT) { // COUNT(*) case - return TAggregateAssign(name, EAggregate::Count); + return TAggregateAssign(name, EAggregate::NumRows); } return TAggregateAssign(name); // !ok() } @@ -483,7 +483,7 @@ bool TProgramContainer::Init(const IColumnResolver& columnResolver, const NKikim if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { TString out; ::google::protobuf::TextFormat::PrintToString(programProto, &out); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("program", out); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "parse_program")("program", out); } if (programProto.HasKernels()) { @@ -496,6 +496,7 @@ bool TProgramContainer::Init(const IColumnResolver& columnResolver, const NKikim } return false; } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "program_parsed")("result", DebugString()); return true; } diff --git a/ydb/core/tx/program/program.h b/ydb/core/tx/program/program.h index 76fbe8702488..99d72de0a0b0 100644 --- a/ydb/core/tx/program/program.h +++ b/ydb/core/tx/program/program.h @@ -20,7 +20,6 @@ class IColumnResolver { virtual ~IColumnResolver() = default; virtual TString GetColumnName(ui32 id, bool required = true) const = 0; virtual std::optional GetColumnIdOptional(const TString& name) const = 0; - virtual const NTable::TScheme::TTableSchema& GetSchema() const = 0; virtual NSsa::TColumnInfo GetDefaultColumn() const = 0; }; @@ -35,10 +34,6 @@ class TSchemaResolverColumnsOnly: public IColumnResolver { virtual TString GetColumnName(ui32 id, bool required = true) const override; virtual std::optional GetColumnIdOptional(const TString& name) const override; - virtual const NTable::TScheme::TTableSchema& GetSchema() const override { - AFL_VERIFY(false); - return Default(); - } virtual NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original((ui32)NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP_INDEX, NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP); } diff --git a/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h b/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h index 1a56abe419e1..06c2b8939d22 100644 --- a/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h +++ b/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h @@ -21,6 +21,9 @@ class TTxChainSession: public NKikimr::NOlap::NBackground::TSessionProtoAdapter< YDB_READONLY_DEF(TTxChainData, TxData); YDB_READONLY(ui32, StepForExecute, 0); std::optional CurrentTxId; + + static const inline TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetStaticClassName()); + protected: virtual TConclusion> DoCreateActor(const NKikimr::NOlap::NBackground::TStartContext& context) const override; virtual TConclusionStatus DoDeserializeFromProto(const TProtoLogic& proto) override { @@ -48,6 +51,8 @@ class TTxChainSession: public NKikimr::NOlap::NBackground::TSessionProtoAdapter< return result; } public: + TTxChainSession() = default; + TTxChainSession(const TTxChainData& data) : TxData(data) { diff --git a/ydb/core/tx/schemeshard/olap/columns/update.cpp b/ydb/core/tx/schemeshard/olap/columns/update.cpp index 83e300176007..c66da237c712 100644 --- a/ydb/core/tx/schemeshard/olap/columns/update.cpp +++ b/ydb/core/tx/schemeshard/olap/columns/update.cpp @@ -119,6 +119,11 @@ namespace NKikimr::NSchemeShard { serializer.DeserializeFromProto(columnSchema.GetCompression()).Validate(); Serializer = serializer; } + if (columnSchema.HasDataAccessorConstructor()) { + NArrow::NAccessor::TConstructorContainer container; + AFL_VERIFY(container.DeserializeFromProto(columnSchema.GetDataAccessorConstructor())); + AccessorConstructor = container; + } if (columnSchema.HasDictionaryEncoding()) { auto settings = NArrow::NDictionary::TEncodingSettings::BuildFromProto(columnSchema.GetDictionaryEncoding()); Y_ABORT_UNLESS(settings.IsSuccess()); @@ -140,6 +145,9 @@ namespace NKikimr::NSchemeShard { if (Serializer) { Serializer->SerializeToProto(*columnSchema.MutableSerializer()); } + if (AccessorConstructor) { + *columnSchema.MutableDataAccessorConstructor() = AccessorConstructor.SerializeToProto(); + } if (DictionaryEncoding) { *columnSchema.MutableDictionaryEncoding() = DictionaryEncoding->SerializeToProto(); } @@ -160,6 +168,14 @@ namespace NKikimr::NSchemeShard { return false; } } + if (!!diffColumn.GetAccessorConstructor()) { + auto conclusion = diffColumn.GetAccessorConstructor()->BuildConstructor(); + if (conclusion.IsFail()) { + errors.AddError(conclusion.GetErrorMessage()); + return false; + } + AccessorConstructor = conclusion.DetachResult(); + } if (diffColumn.GetStorageId()) { StorageId = *diffColumn.GetStorageId(); } diff --git a/ydb/core/tx/schemeshard/olap/columns/update.h b/ydb/core/tx/schemeshard/olap/columns/update.h index ec463a69c7c2..84a728829d6e 100644 --- a/ydb/core/tx/schemeshard/olap/columns/update.h +++ b/ydb/core/tx/schemeshard/olap/columns/update.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ class TOlapColumnDiff { YDB_READONLY_DEF(NArrow::NDictionary::TEncodingDiff, DictionaryEncoding); YDB_READONLY_DEF(std::optional, StorageId); YDB_READONLY_DEF(std::optional, DefaultValue); + YDB_READONLY_DEF(NArrow::NAccessor::TRequestedConstructorContainer, AccessorConstructor); public: bool ParseFromRequest(const NKikimrSchemeOp::TOlapColumnDiff& columnSchema, IErrorCollector& errors) { Name = columnSchema.GetName(); @@ -30,6 +32,12 @@ class TOlapColumnDiff { if (columnSchema.HasDefaultValue()) { DefaultValue = columnSchema.GetDefaultValue(); } + if (columnSchema.HasDataAccessorConstructor()) { + if (!AccessorConstructor.DeserializeFromProto(columnSchema.GetDataAccessorConstructor())) { + errors.AddError("cannot parse accessor constructor from proto"); + return false; + } + } if (columnSchema.HasSerializer()) { if (!Serializer.DeserializeFromProto(columnSchema.GetSerializer())) { errors.AddError("cannot parse serializer diff from proto"); @@ -55,6 +63,7 @@ class TOlapColumnAdd { YDB_READONLY_DEF(std::optional, Serializer); YDB_READONLY_DEF(std::optional, DictionaryEncoding); YDB_READONLY_DEF(NOlap::TColumnDefaultScalarValue, DefaultValue); + YDB_READONLY_DEF(NArrow::NAccessor::TConstructorContainer, AccessorConstructor); public: TOlapColumnAdd(const std::optional& keyOrder) : KeyOrder(keyOrder) { diff --git a/ydb/core/tx/schemeshard/olap/manager/manager.cpp b/ydb/core/tx/schemeshard/olap/manager/manager.cpp index 0f8b13cd0104..72a8c93c5f1c 100644 --- a/ydb/core/tx/schemeshard/olap/manager/manager.cpp +++ b/ydb/core/tx/schemeshard/olap/manager/manager.cpp @@ -29,12 +29,12 @@ void TTablesStorage::OnRemoveObject(const TPathId& pathId, TColumnTableInfo::TPt } } -const std::set& TTablesStorage::GetTablesWithTiering(const TString& tieringId) const { +const THashSet& TTablesStorage::GetTablesWithTiering(const TString& tieringId) const { auto it = PathsByTieringId.find(tieringId); if (it != PathsByTieringId.end()) { return it->second; } else { - return Default>(); + return Default>(); } } diff --git a/ydb/core/tx/schemeshard/olap/manager/manager.h b/ydb/core/tx/schemeshard/olap/manager/manager.h index 96ffc91204eb..0873a12da22d 100644 --- a/ydb/core/tx/schemeshard/olap/manager/manager.h +++ b/ydb/core/tx/schemeshard/olap/manager/manager.h @@ -9,7 +9,7 @@ namespace NKikimr::NSchemeShard { class TTablesStorage { private: THashMap Tables; - THashMap> PathsByTieringId; + THashMap> PathsByTieringId; THashMap TablesByShard; void OnAddObject(const TPathId& pathId, TColumnTableInfo::TPtr object); @@ -20,7 +20,7 @@ class TTablesStorage { TColumnTablesLayout GetTablesLayout(const std::vector& tabletIds) const; - const std::set& GetTablesWithTiering(const TString& tieringId) const; + const THashSet& GetTablesWithTiering(const TString& tieringId) const; class TTableReadGuard { protected: diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h b/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h index c33f18eb031b..013c5601780e 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h +++ b/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace NKikimr::NSchemeShard::NOlap::NAlter { @@ -19,6 +20,16 @@ class TColumnTableUpdate: public ISSEntityUpdate { return NKikimrTxColumnShard::ETransactionKind::TX_KIND_SCHEMA; } virtual TConclusionStatus DoInitializeImpl(const TUpdateInitializationContext& context) = 0; + + bool IsAlterCompression(const TUpdateInitializationContext& context) const { + for (const auto& alterColumn : context.GetModification()->GetAlterColumnTable().GetAlterSchema().GetAlterColumns()) { + if (alterColumn.HasSerializer()) { + return true; + } + } + return false; + } + protected: virtual TConclusionStatus DoStartImpl(const TUpdateStartContext& /*context*/) { return TConclusionStatus::Success(); @@ -27,6 +38,9 @@ class TColumnTableUpdate: public ISSEntityUpdate { return TConclusionStatus::Success(); } virtual TConclusionStatus DoInitialize(const TUpdateInitializationContext& context) override final { + if (!AppData()->FeatureFlags.GetEnableOlapCompression() && IsAlterCompression(context)) { + return TConclusionStatus::Fail("Compression is disabled for OLAP tables"); + } if (!context.GetModification()->HasAlterColumnTable() && !context.GetModification()->HasAlterTable()) { return TConclusionStatus::Fail("no update data"); } @@ -52,6 +66,17 @@ class TColumnTableUpdate: public ISSEntityUpdate { return result; } + bool CheckTargetSchema(const TOlapSchema& targetSchema) { + if (!AppData()->FeatureFlags.GetEnableSparsedColumns()) { + for (auto& [_, column]: targetSchema.GetColumns().GetColumns()) { + if (column.GetDefaultValue().GetValue() || (column.GetAccessorConstructor().GetClassName() == NKikimr::NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName)) { + return false; + } + } + } + return true; + } + public: }; diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp b/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp index b94ff1888af2..a442ca80392f 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp @@ -1,6 +1,7 @@ #include "update.h" #include #include +#include namespace NKikimr::NSchemeShard::NOlap::NAlter { @@ -36,6 +37,24 @@ NKikimr::TConclusionStatus TStandaloneSchemaUpdate::DoInitializeImpl(const TUpda return TConclusionStatus::Fail("schema update error: " + collector->GetErrorMessage() + ". in alter constructor STANDALONE_UPDATE"); } } + + const TString& parentPathStr = context.GetModification()->GetWorkingDir(); + if (parentPathStr) { // Not empty only if called from Propose, not from ProgressState + NSchemeShard::TPath parentPath = NSchemeShard::TPath::Resolve(parentPathStr, context.GetSSOperationContext()->SS); + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + if (targetSchema.GetColumns().GetColumns().size() > limits.MaxColumnTableColumns) { + TString errStr = TStringBuilder() + << "Too many columns" + << ": new: " << targetSchema.GetColumns().GetColumns().size() + << ". Limit: " << limits.MaxColumnTableColumns; + return TConclusionStatus::Fail(errStr); + } + } + + if (!CheckTargetSchema(targetSchema)) { + return TConclusionStatus::Fail("schema update error: sparsed columns are disabled"); + } auto description = originalTable.GetTableInfoVerified().Description; targetSchema.Serialize(*description.MutableSchema()); auto ttl = originalTable.GetTableTTLOptional() ? *originalTable.GetTableTTLOptional() : TOlapTTL(); diff --git a/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp b/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp index 446b43017821..98a4b6b8d1c7 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp @@ -1,6 +1,9 @@ #include #include #include +#include + +#include "checks.h" namespace { @@ -434,6 +437,18 @@ class TAlterOlapStore: public TSubOperation { } } + bool IsAlterCompression() const { + const auto& alter = Transaction.GetAlterColumnStore(); + for (const auto& alterSchema : alter.GetAlterSchemaPresets()) { + for (const auto& alterColumn : alterSchema.GetAlterSchema().GetAlterColumns()) { + if (alterColumn.HasSerializer()) { + return true; + } + } + } + return false; + } + public: using TSubOperation::TSubOperation; @@ -458,7 +473,13 @@ class TAlterOlapStore: public TSubOperation { return result; } - TPath path = TPath::Resolve(parentPathStr, context.SS).Dive(name); + if (!AppData()->FeatureFlags.GetEnableOlapCompression() && IsAlterCompression()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Compression is disabled for OLAP tables"); + return result; + } + + TPath parentPath = TPath::Resolve(parentPathStr, context.SS); + TPath path = parentPath.Dive(name); { TPath::TChecker checks = path.Check(); checks @@ -504,6 +525,26 @@ class TAlterOlapStore: public TSubOperation { if (!alterData) { return result; } + + if (!AppData()->FeatureFlags.GetEnableSparsedColumns()) { + for (auto& [_, preset]: alterData->SchemaPresets) { + for (auto& [_, column]: preset.GetColumns().GetColumns()) { + if (column.GetDefaultValue().GetValue() || (column.GetAccessorConstructor().GetClassName() == NKikimr::NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName)) { + result->SetError(NKikimrScheme::StatusSchemeError,"schema update error: sparsed columns are disabled"); + return result; + } + } + } + } + + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + + if (!NKikimr::NSchemeShard::NOlap::CheckLimits(limits, alterData, errStr)) { + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } + storeInfo->AlterData = alterData; NIceDb::TNiceDb db(context.GetDB()); diff --git a/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp b/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp index 6dff78961fff..4fb76b4a75a0 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp @@ -265,6 +265,19 @@ class TAlterColumnTable: public TSubOperation { auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), ui64(ssId)); + const bool isAlterSharding = Transaction.HasAlterColumnTable() && Transaction.GetAlterColumnTable().HasReshardColumnTable(); + if (isAlterSharding && !AppData()->FeatureFlags.GetEnableAlterShardingInColumnShard()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Alter sharding is disabled for OLAP tables"); + return result; + } + + const bool hasTiering = Transaction.HasAlterColumnTable() && Transaction.GetAlterColumnTable().HasAlterTtlSettings() && + Transaction.GetAlterColumnTable().GetAlterTtlSettings().HasUseTiering(); + if (hasTiering && HasAppData() && !AppDataVerified().FeatureFlags.GetEnableTieringInColumnShard()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Tiering functionality is disabled for OLAP tables"); + return result; + } + const TString& parentPathStr = Transaction.GetWorkingDir(); const TString& name = Transaction.HasAlterColumnTable() ? Transaction.GetAlterColumnTable().GetName() : Transaction.GetAlterTable().GetName(); LOG_NOTICE_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, diff --git a/ydb/core/tx/schemeshard/olap/operations/checks.h b/ydb/core/tx/schemeshard/olap/operations/checks.h new file mode 100644 index 000000000000..7a2e58fa807d --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/operations/checks.h @@ -0,0 +1,19 @@ +#pragma once + +namespace NKikimr::NSchemeShard::NOlap { + inline bool CheckLimits(const TSchemeLimits& limits, TOlapStoreInfo::TPtr alterData, TString& errStr) { + for (auto& [_, preset]: alterData->SchemaPresets) { + ui64 columnCount = preset.GetColumns().GetColumns().size(); + if (columnCount > limits.MaxColumnTableColumns) { + errStr = TStringBuilder() + << "Too many columns" + << ". new: " << columnCount + << ". Limit: " << limits.MaxColumnTableColumns; + return false; + } + } + return true; + } +} + + diff --git a/ydb/core/tx/schemeshard/olap/operations/create_store.cpp b/ydb/core/tx/schemeshard/olap/operations/create_store.cpp index bc27020ddc45..d66f3bed7d44 100644 --- a/ydb/core/tx/schemeshard/olap/operations/create_store.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/create_store.cpp @@ -7,6 +7,8 @@ #include #include +#include "checks.h" + using namespace NKikimr; using namespace NKikimr::NSchemeShard; @@ -394,12 +396,20 @@ class TCreateOlapStore: public TSubOperation { return result; } + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + TProposeErrorCollector errors(*result); TOlapStoreInfo::TPtr storeInfo = std::make_shared(); if (!storeInfo->ParseFromRequest(createDescription, errors)) { return result; } + if (!NKikimr::NSchemeShard::NOlap::CheckLimits(limits, storeInfo, errStr)) { + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } + // Construct channels bindings for columnshards TChannelsBindings channelsBindings; if (!context.SS->GetOlapChannelsBindings(dstPath.GetPathIdForDomain(), storeInfo->GetStorageConfig(), channelsBindings, errStr)) { diff --git a/ydb/core/tx/schemeshard/olap/operations/create_table.cpp b/ydb/core/tx/schemeshard/olap/operations/create_table.cpp index 7018d09e2ead..0c155d68d761 100644 --- a/ydb/core/tx/schemeshard/olap/operations/create_table.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/create_table.cpp @@ -681,11 +681,23 @@ class TCreateColumnTable: public TSubOperation { TProposeErrorCollector errors(*result); TColumnTableInfo::TPtr tableInfo; bool needUpdateObject = false; + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + if (storeInfo) { TOlapPresetConstructor tableConstructor(*storeInfo); tableInfo = tableConstructor.BuildTableInfo(createDescription, context, errors); needUpdateObject = tableConstructor.GetNeedUpdateObject(); } else { + ui64 columnCount = createDescription.schema().columns().size(); + if (columnCount > limits.MaxColumnTableColumns) { + TString errStr = TStringBuilder() + << "Too many columns" + << ". new: " << columnCount + << ". Limit: " << limits.MaxColumnTableColumns; + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } TOlapTableConstructor tableConstructor; tableInfo = tableConstructor.BuildTableInfo(createDescription, context, errors); } diff --git a/ydb/core/tx/schemeshard/olap/table/table.h b/ydb/core/tx/schemeshard/olap/table/table.h index e37ede6c3eab..a092e175e25d 100644 --- a/ydb/core/tx/schemeshard/olap/table/table.h +++ b/ydb/core/tx/schemeshard/olap/table/table.h @@ -99,8 +99,9 @@ struct TColumnTableInfo { Stats.UpdateShardStats(shardIdx, newStats); } - void UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats) { - Stats.UpdateTableStats(pathId, newStats); + void UpdateTableStats(const TShardIdx shardIdx, const TPathId& pathId, const TPartitionStats& newStats) { + Stats.TableStats[pathId].Aggregated.PartCount = GetColumnShards().size(); + Stats.UpdateTableStats(shardIdx, pathId, newStats); } TConclusion> BuildEntity(const TPathId& pathId, const NOlap::NAlter::TEntityInitializationContext& iContext) const; diff --git a/ydb/core/tx/schemeshard/olap/ttl/ya.make b/ydb/core/tx/schemeshard/olap/ttl/ya.make index 0eb0e83c9a22..8aea246ebddf 100644 --- a/ydb/core/tx/schemeshard/olap/ttl/ya.make +++ b/ydb/core/tx/schemeshard/olap/ttl/ya.make @@ -6,6 +6,7 @@ SRCS( ) PEERDIR( + ydb/core/base ydb/core/protos ) diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index d49712d3dc6d..06576293810c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -1242,6 +1242,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { .MaxPathElementLength = rowSet.template GetValueOrDefault(defaults.MaxPathElementLength), .ExtraPathSymbolsAllowed = rowSet.template GetValueOrDefault(defaults.ExtraPathSymbolsAllowed), .MaxTableColumns = rowSet.template GetValueOrDefault(defaults.MaxTableColumns), + .MaxColumnTableColumns = rowSet.template GetValueOrDefault(defaults.MaxColumnTableColumns), .MaxTableColumnNameLength = rowSet.template GetValueOrDefault(defaults.MaxTableColumnNameLength), .MaxTableKeyColumns = rowSet.template GetValueOrDefault(defaults.MaxTableKeyColumns), .MaxTableIndices = rowSet.template GetValueOrDefault(defaults.MaxTableIndices), diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp index 72e7b4a80379..751debfd2fd2 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp @@ -321,11 +321,11 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, const TPathId tablePathId = TPathId(TOwnerId(pathId.OwnerId), TLocalPathId(table.GetTableLocalId())); if (Self->ColumnTables.contains(tablePathId)) { - LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + LOG_TRACE_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "add stats for exists table with pathId=" << tablePathId); auto columnTable = Self->ColumnTables.TakeVerified(tablePathId); - columnTable->UpdateTableStats(tablePathId, newTableStats); + columnTable->UpdateTableStats(shardIdx, tablePathId, newTableStats); } else { LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "failed add stats for table with pathId=" << tablePathId); diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index a13ef59e4de1..53c343ac09e1 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -1631,7 +1631,7 @@ void TTableInfo::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& Stats.UpdateShardStats(datashardIdx, newStats); } -void TAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats) { +void TTableAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats) { // Ignore stats from unknown datashard (it could have been split) if (!PartitionStats.contains(datashardIdx)) return; @@ -1720,33 +1720,10 @@ void TAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartition } } -void TAggregatedStats::UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats) { - if (!TableStats.contains(pathId)) { - TableStats[pathId] = newStats; - return; - } - - TPartitionStats& oldStats = TableStats[pathId]; - - if (newStats.SeqNo <= oldStats.SeqNo) { - // Ignore outdated message - return; - } - - if (newStats.SeqNo.Generation > oldStats.SeqNo.Generation) { - // Reset incremental counter baselines if tablet has restarted - oldStats.ImmediateTxCompleted = 0; - oldStats.PlannedTxCompleted = 0; - oldStats.TxRejectedByOverload = 0; - oldStats.TxRejectedBySpace = 0; - oldStats.RowUpdates = 0; - oldStats.RowDeletes = 0; - oldStats.RowReads = 0; - oldStats.RangeReads = 0; - oldStats.RangeReadRows = 0; - } - TableStats[pathId].RowCount += (newStats.RowCount - oldStats.RowCount); - TableStats[pathId].DataSize += (newStats.DataSize - oldStats.DataSize); +void TAggregatedStats::UpdateTableStats(TShardIdx shardIdx, const TPathId& pathId, const TPartitionStats& newStats) { + auto& tableStats = TableStats[pathId]; + tableStats.PartitionStats[shardIdx]; // insert if none + tableStats.UpdateShardStats(shardIdx, newStats); } void TTableInfo::RegisterSplitMergeOp(TOperationId opId, const TTxState& txState) { diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 4a27cfa2bae1..4788e69e9d4a 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -320,14 +320,18 @@ struct TPartitionStats { ui64 CPU = 0; }; -struct TAggregatedStats { +struct TTableAggregatedStats { TPartitionStats Aggregated; THashMap PartitionStats; - THashMap TableStats; size_t PartitionStatsUpdated = 0; void UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats); - void UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats); +}; + +struct TAggregatedStats : public TTableAggregatedStats { + THashMap TableStats; + + void UpdateTableStats(TShardIdx datashardIdx, const TPathId& pathId, const TPartitionStats& newStats); }; struct TSubDomainInfo; diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index 34ecf178b3bf..7b39e9113433 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -479,7 +479,9 @@ void TPathDescriber::DescribeColumnTable(TPathId pathId, TPathElement::TPtr path description->MutableSchema()->SetVersion(description->GetSchema().GetVersion() + description->GetSchemaPresetVersionAdj()); } if (tableInfo->GetStats().TableStats.contains(pathId)) { - FillTableStats(*pathDescription, tableInfo->GetStats().TableStats.at(pathId)); + FillTableStats(*pathDescription, tableInfo->GetStats().TableStats.at(pathId).Aggregated); + } else { + FillTableStats(*pathDescription, TPartitionStats()); } } } diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index ac7cce9805c8..878f5ae87e08 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -761,6 +761,7 @@ struct Schema : NIceDb::Schema { struct ImportsLimit : Column<29, NScheme::NTypeIds::Uint64> {}; struct AuditSettings : Column<30, NScheme::NTypeIds::String> {}; struct ServerlessComputeResourcesMode : Column<31, NScheme::NTypeIds::Uint32> { using Type = EServerlessComputeResourcesMode; }; + struct ColumnTableColumnsLimit : Column<32, NScheme::NTypeIds::Uint64> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -794,7 +795,8 @@ struct Schema : NIceDb::Schema { ExportsLimit, ImportsLimit, AuditSettings, - ServerlessComputeResourcesMode + ServerlessComputeResourcesMode, + ColumnTableColumnsLimit >; }; diff --git a/ydb/core/tx/schemeshard/schemeshard_types.cpp b/ydb/core/tx/schemeshard/schemeshard_types.cpp index 443cafd3e7ff..eaad291862a8 100644 --- a/ydb/core/tx/schemeshard/schemeshard_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_types.cpp @@ -20,6 +20,9 @@ TSchemeLimits TSchemeLimits::FromProto(const NKikimrScheme::TSchemeLimits& proto if (proto.HasMaxTableColumns()) { result.MaxTableColumns = proto.GetMaxTableColumns(); } + if (proto.HasMaxColumnTableColumns()) { + result.MaxColumnTableColumns = proto.GetMaxColumnTableColumns(); + } if (proto.HasMaxTableColumnNameLength()) { result.MaxTableColumnNameLength = proto.GetMaxTableColumnNameLength(); } @@ -69,6 +72,7 @@ NKikimrScheme::TSchemeLimits TSchemeLimits::AsProto() const { result.SetMaxAclBytesSize(MaxAclBytesSize); result.SetMaxTableColumns(MaxTableColumns); + result.SetMaxColumnTableColumns(MaxColumnTableColumns); result.SetMaxTableColumnNameLength(MaxTableColumnNameLength); result.SetMaxTableKeyColumns(MaxTableKeyColumns); result.SetMaxTableIndices(MaxTableIndices); diff --git a/ydb/core/tx/schemeshard/schemeshard_types.h b/ydb/core/tx/schemeshard/schemeshard_types.h index 5e843c58a293..6afcdeedb1e2 100644 --- a/ydb/core/tx/schemeshard/schemeshard_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_types.h @@ -39,6 +39,7 @@ struct TSchemeLimits { // table ui64 MaxTableColumns = 200; + ui64 MaxColumnTableColumns = 10000; ui64 MaxTableColumnNameLength = 255; ui64 MaxTableKeyColumns = 20; ui64 MaxTableIndices = 20; diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp index 0fe031eb3cf8..a0f915b1df67 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp @@ -1500,6 +1500,7 @@ namespace NSchemeShardUT_Private { (let child '('ChildrenLimit (Uint64 '%lu))) (let acl '('AclByteSizeLimit (Uint64 '%lu))) (let columns '('TableColumnsLimit (Uint64 '%lu))) + (let columnColumns '('ColumnTableColumnsLimit (Uint64 '%lu))) (let colName '('TableColumnNameLengthLimit (Uint64 '%lu))) (let keyCols '('TableKeyColumnsLimit (Uint64 '%lu))) (let indices '('TableIndicesLimit (Uint64 '%lu))) @@ -1512,11 +1513,11 @@ namespace NSchemeShardUT_Private { (let pqPartitions '('PQPartitionsLimit (Uint64 '%lu))) (let exports '('ExportsLimit (Uint64 '%lu))) (let imports '('ImportsLimit (Uint64 '%lu))) - (let ret (AsList (UpdateRow 'SubDomains key '(depth paths child acl columns colName keyCols indices streams shards pathShards consCopy maxPathLength extraSymbols pqPartitions exports imports)))) + (let ret (AsList (UpdateRow 'SubDomains key '(depth paths child acl columns columnColumns colName keyCols indices streams shards pathShards consCopy maxPathLength extraSymbols pqPartitions exports imports)))) (return ret) ) )", domainId, limits.MaxDepth, limits.MaxPaths, limits.MaxChildrenInDir, limits.MaxAclBytesSize, - limits.MaxTableColumns, limits.MaxTableColumnNameLength, limits.MaxTableKeyColumns, + limits.MaxTableColumns, limits.MaxColumnTableColumns, limits.MaxTableColumnNameLength, limits.MaxTableKeyColumns, limits.MaxTableIndices, limits.MaxTableCdcStreams, limits.MaxShards, limits.MaxShardsInPath, limits.MaxConsistentCopyTargets, limits.MaxPathElementLength, escapedStr.c_str(), limits.MaxPQPartitions, diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp index a4f0b3a544d7..9e5b7c8543f6 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp @@ -542,6 +542,7 @@ NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTe app.SetEnableAddColumsWithDefaults(opts.EnableAddColumsWithDefaults_); app.SetEnableReplaceIfExistsForExternalEntities(opts.EnableReplaceIfExistsForExternalEntities_); app.SetEnableChangefeedsOnIndexTables(opts.EnableChangefeedsOnIndexTables_); + app.SetEnableTieringInColumnShard(opts.EnableTieringInColumnShard_); app.ColumnShardConfig.SetDisabledOnSchemeShard(false); diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.h b/ydb/core/tx/schemeshard/ut_helpers/test_env.h index 388b50caa579..c433855c9c76 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.h +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.h @@ -66,6 +66,7 @@ namespace NSchemeShardUT_Private { OPTION(std::optional, EnableReplaceIfExistsForExternalEntities, std::nullopt); OPTION(std::optional, GraphBackendType, std::nullopt); OPTION(std::optional, EnableChangefeedsOnIndexTables, std::nullopt); + OPTION(std::optional, EnableTieringInColumnShard, std::nullopt); #undef OPTION }; diff --git a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp index c53fdeb58425..1a6a85e46e27 100644 --- a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp +++ b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -39,7 +40,7 @@ static const TString defaultTableSchema = R"( )"; static const TVector defaultYdbSchema = { - NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp)).SetNullable(false), NArrow::NTest::TTestColumn("data", TTypeInfo(NTypeIds::Utf8) ) }; @@ -556,11 +557,24 @@ Y_UNIT_TEST_SUITE(TOlap) { } } )", {NKikimrScheme::StatusAccepted}); + + env.TestWaitNotification(runtime, txId); + TestAlterOlapStore(runtime, ++txId, "/MyRoot", R"( + Name: "OlapStore" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AlterColumns { Name: "comment" DefaultValue: "10" } + } + } + )", {NKikimrScheme::StatusSchemeError}); } Y_UNIT_TEST(AlterTtl) { TTestBasicRuntime runtime; - TTestEnv env(runtime); + TTestEnvOptions options; + options.EnableTieringInColumnShard(true); + TTestEnv env(runtime, options); ui64 txId = 100; TString olapSchema = R"( @@ -634,17 +648,17 @@ Y_UNIT_TEST_SUITE(TOlap) { env.TestWaitNotification(runtime, txId); } - // TODO: AlterTiers - // negatives for store: disallow alters - // negatives for table: wrong tiers count, wrong tiers, wrong eviction column, wrong eviction values, - // different TTL columns in tiers -#if 0 Y_UNIT_TEST(StoreStats) { TTestBasicRuntime runtime; TTestEnv env(runtime); runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); runtime.UpdateCurrentTime(TInstant::Now() - TDuration::Seconds(600)); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + // disable stats batching auto& appData = runtime.GetAppData(); appData.SchemeShardConfig.SetStatsBatchTimeoutMs(0); @@ -690,6 +704,16 @@ Y_UNIT_TEST_SUITE(TOlap) { UNIT_ASSERT(shardId); UNIT_ASSERT(pathId); UNIT_ASSERT(planStep); + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore/ColumnTable", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); + + UNIT_ASSERT(description.GetPathDescription().HasTableStats()); + UNIT_ASSERT_EQUAL(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_EQUAL(tabletStats.GetDataSize(), 0); + } + ui32 rowsInBatch = 100000; @@ -702,7 +726,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TSet txIds; for (ui32 i = 0; i < 10; ++i) { std::vector writeIds; - NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds); + NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert); NTxUT::ProposeCommit(runtime, sender, shardId, ++txId, writeIds); txIds.insert(txId); } @@ -714,16 +738,38 @@ Y_UNIT_TEST_SUITE(TOlap) { // trigger periodic stats at shard (after timeout) std::vector writeIds; - NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds); + NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert); NTxUT::ProposeCommit(runtime, sender, shardId, ++txId, writeIds); NTxUT::PlanCommit(runtime, sender, shardId, ++planStep, {txId}); } + csController->WaitIndexation(TDuration::Seconds(5)); + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); + + UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); + UNIT_ASSERT_GT(tabletStats.GetPartCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetRowUpdates(), 0); + UNIT_ASSERT_GT(tabletStats.GetImmediateTxCompleted(), 0); + UNIT_ASSERT_GT(tabletStats.GetPlannedTxCompleted(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastAccessTime(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastUpdateTime(), 0); + } + + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore/ColumnTable", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); - auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore", true, true); - auto& tabletStats = description.GetPathDescription().GetTableStats(); + UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); + UNIT_ASSERT_GT(tabletStats.GetPartCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastAccessTime(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastUpdateTime(), 0); + } - UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); - UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); #if 0 TestDropColumnTable(runtime, ++txId, "/MyRoot/OlapStore", "ColumnTable"); env.TestWaitNotification(runtime, txId); @@ -738,5 +784,4 @@ Y_UNIT_TEST_SUITE(TOlap) { TestLsPathId(runtime, 2, NLs::PathStringEqual("")); #endif } -#endif } diff --git a/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp b/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp index 78db80f9f32c..8fc34d9edbe0 100644 --- a/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp +++ b/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp @@ -2568,6 +2568,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { } + //clear subdomain { TestDescribeResult(DescribePath(runtime, "/MyRoot"), @@ -2585,6 +2586,155 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { } } + Y_UNIT_TEST(ColumnSchemeLimitsRejects) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TSchemeLimits lowLimits; + lowLimits.MaxDepth = 4; + lowLimits.MaxPaths = 3; + lowLimits.MaxChildrenInDir = 3; + lowLimits.MaxAclBytesSize = 25; + lowLimits.MaxTableColumns = 3; + lowLimits.MaxColumnTableColumns = 3; + lowLimits.MaxTableColumnNameLength = 10; + lowLimits.MaxTableKeyColumns = 1; + lowLimits.MaxShards = 6; + lowLimits.MaxShardsInPath = 4; + lowLimits.MaxPQPartitions = 20; + + + //lowLimits.ExtraPathSymbolsAllowed = "!\"#$%&'()*+,-.:;<=>?@[\\]^_`{|}~"; + SetSchemeshardSchemaLimits(runtime, lowLimits); + TestDescribeResult(DescribePath(runtime, "/MyRoot"), + {NLs::PathExist, + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions)}); + + { + TestCreateSubDomain(runtime, txId++, "/MyRoot", + "PlanResolution: 50 " + "Coordinators: 1 " + "Mediators: 1 " + "TimeCastBucketsPerMediator: 2 " + "Name: \"USER_0\"" + " DatabaseQuotas {" + " data_stream_shards_quota: 2" + " data_stream_reserved_storage_quota: 200000" + "}"); + } + + //create column tables, column limits + { + TestMkDir(runtime, txId++, "/MyRoot/USER_0", "C"); + env.TestWaitNotification(runtime, txId - 1); + + // MaxColumnTableColumns + TestCreateColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + ColumnShardCount: 1 + Schema { + Columns { Name: "RowId" Type: "Uint64", NotNull: true } + Columns { Name: "Value0" Type: "Utf8" } + Columns { Name: "Value1" Type: "Utf8" } + KeyColumnNames: "RowId" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + AlterSchema { + DropColumns {Name: "Value0"} + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + AlterSchema { + DropColumns {Name: "Value1"} + AddColumns { Name: "Value2" Type: "Utf8" } + AddColumns { Name: "Value3" Type: "Utf8" } + AddColumns { Name: "Value4" Type: "Utf8" } + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + + TestCreateColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C1" + ColumnShardCount: 1 + Schema { + Columns { Name: "RowId" Type: "Uint64", NotNull: true } + Columns { Name: "Value0" Type: "Utf8" } + Columns { Name: "Value1" Type: "Utf8" } + Columns { Name: "Value2" Type: "Utf8" } + KeyColumnNames: "RowId" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + )", {NKikimrScheme::StatusSchemeError}); + + TString olapSchema = R"( + Name: "OlapStore1" + ColumnShardCount: 1 + SchemaPresets { + Name: "default" + Schema { + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } + Columns { Name: "data" Type: "Utf8" } + KeyColumnNames: "timestamp" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + } + )"; + + TestCreateOlapStore(runtime, txId++, "/MyRoot", olapSchema, {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TString olapSchemaBig = R"( + Name: "OlapStoreBig" + ColumnShardCount: 1 + SchemaPresets { + Name: "default" + Schema { + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } + Columns { Name: "data" Type: "Utf8" } + Columns { Name: "data2" Type: "Utf8" } + Columns { Name: "data3" Type: "Utf8" } + KeyColumnNames: "timestamp" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + } + )"; + + TestCreateOlapStore(runtime, txId++, "/MyRoot", olapSchemaBig, {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterOlapStore(runtime, txId++, "/MyRoot", R"( + Name: "OlapStore1" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AddColumns { Name: "comment" Type: "Utf8" } + } + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterOlapStore(runtime, txId++, "/MyRoot", R"( + Name: "OlapStore1" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AddColumns { Name: "comment2" Type: "Utf8" } + } + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + } + } + Y_UNIT_TEST(SchemeLimitsRejectsWithIndexedTables) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp b/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp index ce57f14992b3..1accb55c269b 100644 --- a/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp +++ b/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp @@ -1150,6 +1150,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardColumnTableTTL) { Columns { Name: "key" Type: "Uint64" NotNull: true } Columns { Name: "modified_at" Type: "Timestamp" } Columns { Name: "saved_at" Type: "Datetime" } + Columns { Name: "data" Type: "Utf8" } KeyColumnNames: ["key"] } )"); @@ -1206,6 +1207,13 @@ Y_UNIT_TEST_SUITE(TSchemeShardColumnTableTTL) { } } ); + TestAlterColumnTable(runtime, ++txId, "/MyRoot", R"( + Name: "TTLEnabledTable" + AlterSchema { + AlterColumns {Name: "data" DefaultValue: "10"} + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId); } Y_UNIT_TEST(AlterColumnTable_Negative) { diff --git a/ydb/core/tx/tiering/manager.cpp b/ydb/core/tx/tiering/manager.cpp index 85dd6d60c10b..57462d745d3a 100644 --- a/ydb/core/tx/tiering/manager.cpp +++ b/ydb/core/tx/tiering/manager.cpp @@ -199,19 +199,18 @@ THashMap TTiersManager::GetTiering() const { Y_ABORT_UNLESS(snapshotPtr); auto& tierConfigs = snapshotPtr->GetTierConfigs(); for (auto&& i : PathIdTiering) { - auto* tiering = snapshotPtr->GetTieringById(i.second); - if (tiering) { + auto* tieringRule = snapshotPtr->GetTieringById(i.second); + if (tieringRule) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("path_id", i.first)("tiering_name", i.second)("event", "activation"); - result.emplace(i.first, tiering->BuildOlapTiers()); - for (auto& [pathId, pathTiering] : result) { - for (auto& [name, tier] : pathTiering.GetTierByName()) { - AFL_VERIFY(name != NOlap::NTiering::NCommon::DeleteTierName); - auto it = tierConfigs.find(name); - if (it != tierConfigs.end()) { - tier->SetSerializer(NTiers::ConvertCompression(it->second.GetCompression())); - } + NOlap::TTiering tiering = tieringRule->BuildOlapTiers(); + for (auto& [name, tier] : tiering.GetTierByName()) { + AFL_VERIFY(name != NOlap::NTiering::NCommon::DeleteTierName); + auto it = tierConfigs.find(name); + if (it != tierConfigs.end()) { + tier->SetSerializer(NTiers::ConvertCompression(it->second.GetCompression())); } } + result.emplace(i.first, std::move(tiering)); } else { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("path_id", i.first)("tiering_name", i.second)("event", "not_found"); } diff --git a/ydb/core/tx/tiering/rule/manager.cpp b/ydb/core/tx/tiering/rule/manager.cpp index 99f8ad1177a2..a97ba742467a 100644 --- a/ydb/core/tx/tiering/rule/manager.cpp +++ b/ydb/core/tx/tiering/rule/manager.cpp @@ -13,6 +13,10 @@ void TTieringRulesManager::DoPrepareObjectsBeforeModification(std::vectorEmpty()) { + return TConclusionStatus::Fail("defaultColumn cannot be empty"); + } result.SetColumn(TTieringRule::TDecoder::DefaultColumn, NMetadata::NInternal::TYDBValue::Utf8(*fValue)); } } diff --git a/ydb/core/tx/tiering/rule/object.cpp b/ydb/core/tx/tiering/rule/object.cpp index 59d42bdb4c8e..a596b56890ca 100644 --- a/ydb/core/tx/tiering/rule/object.cpp +++ b/ydb/core/tx/tiering/rule/object.cpp @@ -30,6 +30,10 @@ bool TTieringRule::DeserializeDescriptionFromJson(const NJson::TJsonValue& jsonI if (!jsonInfo["rules"].GetArrayPointer(&rules)) { return false; } + if (rules->empty()) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_rule_deserialization_failed")("reason", "empty_rules"); + return false; + } for (auto&& i : *rules) { TTieringInterval interval; if (!interval.DeserializeFromJson(i)) { @@ -61,6 +65,9 @@ bool TTieringRule::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Val if (!decoder.Read(decoder.GetDefaultColumnIdx(), DefaultColumn, r)) { return false; } + if (DefaultColumn.Empty()) { + return false; + } NJson::TJsonValue jsonDescription; if (!decoder.ReadJson(decoder.GetDescriptionIdx(), jsonDescription, r)) { return false; @@ -72,6 +79,7 @@ bool TTieringRule::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Val } NKikimr::NOlap::TTiering TTieringRule::BuildOlapTiers() const { + AFL_VERIFY(!Intervals.empty()); NOlap::TTiering result; for (auto&& r : Intervals) { AFL_VERIFY(result.Add(std::make_shared(r.GetTierName(), r.GetDurationForEvict(), GetDefaultColumn()))); diff --git a/ydb/core/tx/tiering/rule/ss_fetcher.cpp b/ydb/core/tx/tiering/rule/ss_fetcher.cpp index 681e96780eb4..e822ace4c5a8 100644 --- a/ydb/core/tx/tiering/rule/ss_fetcher.cpp +++ b/ydb/core/tx/tiering/rule/ss_fetcher.cpp @@ -17,7 +17,7 @@ void TFetcherCheckUserTieringPermissions::DoProcess(NSchemeShard::TSchemeShard& } else { bool denied = false; for (auto&& i : TieringRuleIds) { - const std::set& pathIds = schemeShard.ColumnTables.GetTablesWithTiering(i); + const auto& pathIds = schemeShard.ColumnTables.GetTablesWithTiering(i); for (auto&& pathId : pathIds) { auto path = NSchemeShard::TPath::Init(pathId, &schemeShard); if (!path.IsResolved() || path.IsUnderDeleting() || path.IsDeleted()) { diff --git a/ydb/core/tx/tiering/tier/manager.cpp b/ydb/core/tx/tiering/tier/manager.cpp index b64439cf62d4..8d60219624b4 100644 --- a/ydb/core/tx/tiering/tier/manager.cpp +++ b/ydb/core/tx/tiering/tier/manager.cpp @@ -8,6 +8,10 @@ NMetadata::NModifications::TOperationParsingResult TTiersManager::DoBuildPatchFr const NYql::TObjectSettingsImpl& settings, TInternalModificationContext& context) const { + if (HasAppData() && !AppDataVerified().FeatureFlags.GetEnableTieringInColumnShard()) { + return TConclusionStatus::Fail("Tiering functionality is disabled for OLAP tables."); + } + NMetadata::NInternal::TTableRecord result; result.SetColumn(TTierConfig::TDecoder::TierName, NMetadata::NInternal::TYDBValue::Utf8(settings.GetObjectId())); if (settings.GetObjectId().StartsWith("$") || settings.GetObjectId().StartsWith("_")) { diff --git a/ydb/core/tx/tiering/ut/ut_tiers.cpp b/ydb/core/tx/tiering/ut/ut_tiers.cpp index 55fd3a3437e5..21fa01b29a6e 100644 --- a/ydb/core/tx/tiering/ut/ut_tiers.cpp +++ b/ydb/core/tx/tiering/ut/ut_tiers.cpp @@ -32,21 +32,15 @@ class TFastTTLCompactionController: public NKikimr::NYDBTest::ICSController { virtual bool NeedForceCompactionBacketsConstruction() const override { return true; } - virtual TDuration GetRemovedPortionLivetime(const TDuration /*def*/) const override { - return TDuration::Zero(); - } - virtual ui64 GetSmallPortionSizeDetector(const ui64 /*def*/) const override { + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 /*def*/) const override { return 0; } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { return TDuration::Zero(); } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { - return TDuration::Seconds(1); - } }; @@ -336,7 +330,8 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.GrpcPort = grpcPort; serverSettings.SetDomainName("Root") .SetUseRealThreads(false) - .SetEnableMetadataProvider(true); + .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) ; Tests::TServer::TPtr server = new Tests::TServer(serverSettings); @@ -426,6 +421,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.SetDomainName("Root") .SetUseRealThreads(false) .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) .SetAppConfig(appConfig); Tests::TServer::TPtr server = new Tests::TServer(serverSettings); @@ -556,6 +552,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.SetDomainName("Root") .SetUseRealThreads(false) .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) ; Tests::TServer::TPtr server = new Tests::TServer(serverSettings); diff --git a/ydb/core/tx/tx_processing.h b/ydb/core/tx/tx_processing.h index d992dd71474d..1f9b86527225 100644 --- a/ydb/core/tx/tx_processing.h +++ b/ydb/core/tx/tx_processing.h @@ -105,7 +105,7 @@ struct TEvTxProcessing { } }; - struct TEvReadSet : public TEventPB { + struct TEvReadSet: public TEventPB { TEvReadSet() {} diff --git a/ydb/core/tx/tx_proxy/global.cpp b/ydb/core/tx/tx_proxy/global.cpp new file mode 100644 index 000000000000..66d88d8d824e --- /dev/null +++ b/ydb/core/tx/tx_proxy/global.cpp @@ -0,0 +1,5 @@ +#include "global.h" + +namespace NKikimr::NTxProxy { + +} diff --git a/ydb/core/tx/tx_proxy/global.h b/ydb/core/tx/tx_proxy/global.h new file mode 100644 index 000000000000..00002b17e0ac --- /dev/null +++ b/ydb/core/tx/tx_proxy/global.h @@ -0,0 +1,9 @@ +#pragma once +#include + +namespace NKikimr::NTxProxy { +class TLimits { +public: + static constexpr ui64 MemoryInFlightWriting = (ui64)1 << 30; +}; +} \ No newline at end of file diff --git a/ydb/core/tx/tx_proxy/rpc_long_tx.cpp b/ydb/core/tx/tx_proxy/rpc_long_tx.cpp index f5b7c6b07cd5..557cf13c14cb 100644 --- a/ydb/core/tx/tx_proxy/rpc_long_tx.cpp +++ b/ydb/core/tx/tx_proxy/rpc_long_tx.cpp @@ -1,10 +1,15 @@ -#include -#include +#include "global.h" + +#include #include +#include #include -#include +#include +#include #include +#include + #include namespace NKikimr { @@ -16,30 +21,29 @@ using namespace NLongTxService; // Common logic of LongTx Write that takes care of splitting the data according to the sharding scheme, // sending it to shards and collecting their responses template -class TLongTxWriteBase : public TActorBootstrapped { +class TLongTxWriteBase: public TActorBootstrapped { using TBase = TActorBootstrapped; + static inline TAtomicCounter MemoryInFlight = 0; + protected: using TThis = typename TBase::TThis; public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ; - } - - TLongTxWriteBase(const TString& databaseName, const TString& path, const TString& token, - const TLongTxId& longTxId, const TString& dedupId) - : TBase() - , DatabaseName(databaseName) + TLongTxWriteBase(const TString& databaseName, const TString& path, const TString& token, const TLongTxId& longTxId, const TString& dedupId) + : DatabaseName(databaseName) , Path(path) , DedupId(dedupId) , LongTxId(longTxId) - , ActorSpan(0, NWilson::TTraceId::NewTraceId(0, Max()), "TLongTxWriteBase") - { + , ActorSpan(0, NWilson::TTraceId::NewTraceId(0, Max()), "TLongTxWriteBase") { if (token) { UserToken.emplace(token); } } + virtual ~TLongTxWriteBase() { + AFL_VERIFY(MemoryInFlight.Sub(InFlightSize) >= 0); + } + protected: void ProceedWithSchema(const NSchemeCache::TSchemeCacheNavigate& resp) { NWilson::TProfileSpan pSpan = ActorSpan.BuildChildrenSpan("ProceedWithSchema"); @@ -53,17 +57,24 @@ class TLongTxWriteBase : public TActorBootstrapped { if (UserToken && entry.SecurityObject) { const ui32 access = NACLib::UpdateRow; if (!entry.SecurityObject->CheckAccess(access, *UserToken)) { - RaiseIssue(MakeIssue(NKikimrIssues::TIssuesIds::ACCESS_DENIED, TStringBuilder() - << "User has no permission to perform writes to this table" - << " user: " << UserToken->GetUserSID() - << " path: " << Path)); + RaiseIssue(MakeIssue( + NKikimrIssues::TIssuesIds::ACCESS_DENIED, TStringBuilder() << "User has no permission to perform writes to this table" + << " user: " << UserToken->GetUserSID() << " path: " << Path)); return ReplyError(Ydb::StatusIds::UNAUTHORIZED); } } + auto accessor = ExtractDataAccessor(); + AFL_VERIFY(!InFlightSize); + InFlightSize = accessor->GetSize(); + const i64 sizeInFlight = MemoryInFlight.Add(InFlightSize); + if (TLimits::MemoryInFlightWriting < (ui64)sizeInFlight && sizeInFlight != InFlightSize) { + return ReplyError(Ydb::StatusIds::OVERLOADED, "a lot of memory in flight"); + } if (NCSIndex::TServiceOperator::IsEnabled()) { - TBase::Send(NCSIndex::MakeServiceId(TBase::SelfId().NodeId()), - new NCSIndex::TEvAddData(GetDataAccessor().GetDeserializedBatch(), Path, std::make_shared(TBase::SelfId()))); + TBase::Send( + NCSIndex::MakeServiceId(TBase::SelfId().NodeId()), new NCSIndex::TEvAddData(accessor->GetDeserializedBatch(), Path, + std::make_shared(TBase::SelfId()))); } else { IndexReady = true; } @@ -73,10 +84,11 @@ class TLongTxWriteBase : public TActorBootstrapped { return ReplyError(Ydb::StatusIds::BAD_REQUEST, "Shard splitter not implemented for table kind"); } - auto initStatus = shardsSplitter->SplitData(entry, GetDataAccessor()); + auto initStatus = shardsSplitter->SplitData(entry, *accessor); if (!initStatus.Ok()) { return ReplyError(initStatus.GetStatus(), initStatus.GetErrorMessage()); } + accessor.reset(); const auto& splittedData = shardsSplitter->GetSplitData(); InternalController = std::make_shared(splittedData.GetShardRequestsCount(), this->SelfId(), LongTxId); @@ -85,24 +97,26 @@ class TLongTxWriteBase : public TActorBootstrapped { ui32 writeIdx = 0; for (auto& [shard, infos] : splittedData.GetShardsInfo()) { for (auto&& shardInfo : infos) { + InternalController->GetCounters()->OnRequest(shardInfo->GetRowsCount(), shardInfo->GetBytes()); sumBytes += shardInfo->GetBytes(); rowsCount += shardInfo->GetRowsCount(); - this->Register(new NEvWrite::TShardWriter(shard, shardsSplitter->GetTableId(), DedupId, shardInfo, ActorSpan, InternalController, ++writeIdx, NEvWrite::EModificationType::Replace)); + this->Register(new NEvWrite::TShardWriter(shard, shardsSplitter->GetTableId(), DedupId, shardInfo, ActorSpan, InternalController, + ++writeIdx, NEvWrite::EModificationType::Replace)); } } pSpan.Attribute("affected_shards_count", (long)splittedData.GetShardsInfo().size()); pSpan.Attribute("bytes", (long)sumBytes); pSpan.Attribute("rows", (long)rowsCount); pSpan.Attribute("shards_count", (long)splittedData.GetShardsCount()); - AFL_DEBUG(NKikimrServices::LONG_TX_SERVICE)("affected_shards_count", splittedData.GetShardsInfo().size())("shards_count", splittedData.GetShardsCount()) - ("path", Path)("shards_info", splittedData.ShortLogString(32)); + AFL_DEBUG(NKikimrServices::LONG_TX_SERVICE)("affected_shards_count", splittedData.GetShardsInfo().size())( + "shards_count", splittedData.GetShardsCount())("path", Path)("shards_info", splittedData.ShortLogString(32)); this->Become(&TThis::StateMain); } private: STFUNC(StateMain) { switch (ev->GetTypeRewrite()) { - hFunc(NEvWrite::TWritersController::TEvPrivate::TEvShardsWriteResult, Handle) + hFunc(NEvWrite::TWritersController::TEvPrivate::TEvShardsWriteResult, Handle); hFunc(TEvLongTxService::TEvAttachColumnShardWritesResult, Handle); hFunc(NCSIndex::TEvAddDataResult, Handle); } @@ -150,11 +164,10 @@ class TLongTxWriteBase : public TActorBootstrapped { IndexReady = true; } } - } protected: - virtual NEvWrite::IShardsSplitter::IEvWriteDataAccessor& GetDataAccessor() const = 0; + virtual std::unique_ptr ExtractDataAccessor() = 0; virtual void RaiseIssue(const NYql::TIssue& issue) = 0; virtual void ReplyError(Ydb::StatusIds::StatusCode status, const TString& message = TString()) = 0; virtual void ReplySuccess() = 0; @@ -164,7 +177,9 @@ class TLongTxWriteBase : public TActorBootstrapped { const TString Path; const TString DedupId; TLongTxId LongTxId; + private: + i64 InFlightSize = 0; std::optional UserToken; NWilson::TProfileSpan ActorSpan; NEvWrite::TWritersController::TPtr InternalController; @@ -174,15 +189,19 @@ class TLongTxWriteBase : public TActorBootstrapped { // LongTx Write implementation called from the inside of YDB (e.g. as a part of BulkUpsert call) // NOTE: permission checks must have been done by the caller -class TLongTxWriteInternal : public TLongTxWriteBase { +class TLongTxWriteInternal: public TLongTxWriteBase { using TBase = TLongTxWriteBase; - class TParsedBatchData : public NEvWrite::IShardsSplitter::IEvWriteDataAccessor { + class TParsedBatchData: public NEvWrite::IShardsSplitter::IEvWriteDataAccessor { + private: + using TBase = NEvWrite::IShardsSplitter::IEvWriteDataAccessor; std::shared_ptr Batch; + public: TParsedBatchData(std::shared_ptr batch) - : Batch(batch) - {} + : TBase(NArrow::GetBatchMemorySize(batch)) + , Batch(batch) { + } std::shared_ptr GetDeserializedBatch() const override { return Batch; @@ -193,25 +212,19 @@ class TLongTxWriteInternal : public TLongTxWriteBase { } }; - NEvWrite::IShardsSplitter::IEvWriteDataAccessor::TPtr DataAccessor; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ; - } + std::unique_ptr DataAccessor; - explicit TLongTxWriteInternal(const TActorId& replyTo, const TLongTxId& longTxId, const TString& dedupId, - const TString& databaseName, const TString& path, - std::shared_ptr navigateResult, - std::shared_ptr batch, - std::shared_ptr issues) +public: + explicit TLongTxWriteInternal(const TActorId& replyTo, const TLongTxId& longTxId, const TString& dedupId, const TString& databaseName, + const TString& path, std::shared_ptr navigateResult, std::shared_ptr batch, + std::shared_ptr issues) : TBase(databaseName, path, TString(), longTxId, dedupId) , ReplyTo(replyTo) , NavigateResult(navigateResult) , Batch(batch) - , Issues(issues) - { + , Issues(issues) { Y_ABORT_UNLESS(Issues); - DataAccessor = std::make_shared(Batch); + DataAccessor = std::make_unique(Batch); } void Bootstrap() { @@ -220,8 +233,9 @@ class TLongTxWriteInternal : public TLongTxWriteBase { } protected: - NEvWrite::IShardsSplitter::IEvWriteDataAccessor& GetDataAccessor() const override { - return *DataAccessor; + std::unique_ptr ExtractDataAccessor() override { + AFL_VERIFY(DataAccessor); + return std::move(DataAccessor); } void RaiseIssue(const NYql::TIssue& issue) override { @@ -248,19 +262,14 @@ class TLongTxWriteInternal : public TLongTxWriteBase { std::shared_ptr Issues; }; - -TActorId DoLongTxWriteSameMailbox(const TActorContext& ctx, const TActorId& replyTo, - const NLongTxService::TLongTxId& longTxId, const TString& dedupId, - const TString& databaseName, const TString& path, - std::shared_ptr navigateResult, - std::shared_ptr batch, std::shared_ptr issues) -{ - return ctx.RegisterWithSameMailbox( - new TLongTxWriteInternal(replyTo, longTxId, dedupId, databaseName, path, navigateResult, batch, issues)); +TActorId DoLongTxWriteSameMailbox(const TActorContext& ctx, const TActorId& replyTo, const NLongTxService::TLongTxId& longTxId, + const TString& dedupId, const TString& databaseName, const TString& path, + std::shared_ptr navigateResult, std::shared_ptr batch, + std::shared_ptr issues) { + return ctx.RegisterWithSameMailbox(new TLongTxWriteInternal(replyTo, longTxId, dedupId, databaseName, path, navigateResult, batch, issues)); } // - -} -} +} // namespace NTxProxy +} // namespace NKikimr diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp b/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp index 6d487a26016b..fc3681c344bc 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp @@ -12,6 +12,11 @@ namespace NKikimr { RowsCount = TBase::GetDeriviative("Rows/Count"); PackageSize = TBase::GetHistogram("Rows/PackageSize", NMonitoring::ExponentialHistogram(15, 2, 10)); + PreparingDuration = TBase::GetHistogram("Preparing/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + WritingDuration = TBase::GetHistogram("Writing/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + CommitDuration = TBase::GetHistogram("Commit/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + PrepareReplyDuration = TBase::GetHistogram("ToReply/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + const google::protobuf::EnumDescriptor* descriptor = ::Ydb::StatusIds::StatusCode_descriptor(); for (ui32 i = 0; i < (ui32)descriptor->value_count(); ++i) { auto vDescription = descriptor->value(i); @@ -19,12 +24,4 @@ namespace NKikimr { } } - void TUploadCounters::OnReply(const TDuration d, const ::Ydb::StatusIds::StatusCode code) const { - const TString name = ::Ydb::StatusIds::StatusCode_Name(code); - auto it = CodesCount.find(name); - Y_ABORT_UNLESS(it != CodesCount.end()); - it->second->Add(1); - ReplyDuration->Collect(d.MilliSeconds()); - } - } diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h index 599f9984b8ab..d2098f26e711 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h @@ -45,17 +45,73 @@ class TUploadCounters: public NColumnShard::TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr RowsCount; NMonitoring::THistogramPtr PackageSize; + NMonitoring::THistogramPtr PreparingDuration; + NMonitoring::THistogramPtr WritingDuration; + NMonitoring::THistogramPtr CommitDuration; + NMonitoring::THistogramPtr PrepareReplyDuration; + THashMap CodesCount; public: TUploadCounters(); + class TGuard: TMoveOnly { + private: + TMonotonic Start = TMonotonic::Now(); + std::optional WritingStarted; + std::optional CommitStarted; + std::optional CommitFinished; + std::optional ReplyFinished; + TUploadCounters& Owner; + public: + TGuard(const TMonotonic start, TUploadCounters& owner) + : Start(start) + , Owner(owner) + { + + } + + void OnWritingStarted() { + WritingStarted = TMonotonic::Now(); + Owner.PreparingDuration->Collect((*WritingStarted - Start).MilliSeconds()); + } + + void OnCommitStarted() { + CommitStarted = TMonotonic::Now(); + AFL_VERIFY(WritingStarted); + Owner.WritingDuration->Collect((*CommitStarted - *WritingStarted).MilliSeconds()); + } + + void OnCommitFinished() { + CommitFinished = TMonotonic::Now(); + AFL_VERIFY(CommitStarted); + Owner.CommitDuration->Collect((*CommitFinished - *CommitStarted).MilliSeconds()); + } + + void OnReply(const ::Ydb::StatusIds::StatusCode code) { + ReplyFinished = TMonotonic::Now(); + if (CommitFinished) { + Owner.PrepareReplyDuration->Collect((*ReplyFinished - *CommitFinished).MilliSeconds()); + } + Owner.ReplyDuration->Collect((*ReplyFinished - Start).MilliSeconds()); + + const TString name = ::Ydb::StatusIds::StatusCode_Name(code); + auto it = Owner.CodesCount.find(name); + Y_ABORT_UNLESS(it != Owner.CodesCount.end()); + it->second->Add(1); + } + }; + + TGuard BuildGuard(const TMonotonic start) { + return TGuard(start, *this); + } + void OnRequest(const ui64 rowsCount) const { RequestsCount->Add(1); RowsCount->Add(rowsCount); PackageSize->Collect(rowsCount); } - void OnReply(const TDuration d, const ::Ydb::StatusIds::StatusCode code) const; + void OnReply(const TDuration dFull, const TDuration dDelta, const ::Ydb::StatusIds::StatusCode code) const; }; @@ -148,6 +204,7 @@ class TUploadRowsBase : public TActorBootstrapped StartCommitTime; TActorId TimeoutTimerActorId; TAutoPtr ResolvePartitionsResult; @@ -164,7 +221,7 @@ class TUploadRowsBase : public TActorBootstrapped Issues = std::make_shared(); NLongTxService::TLongTxId LongTxId; TUploadCounters UploadCounters; - + TUploadCounters::TGuard UploadCountersGuard; protected: enum class EUploadSource { ProtoValues = 0, @@ -216,6 +273,7 @@ class TUploadRowsBase : public TActorBootstrappedGet(); if (msg->Record.GetStatus() == Ydb::StatusIds::SUCCESS) { @@ -1263,7 +1324,7 @@ class TUploadRowsBase : public TActorBootstrappedNow() - StartTime, status); + UploadCountersGuard.OnReply(status); SendResult(ctx, status); LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, LogPrefix() << "completed with status " << status); diff --git a/ydb/core/tx/tx_proxy/ya.make b/ydb/core/tx/tx_proxy/ya.make index fa746bd1e249..d592810a65b2 100644 --- a/ydb/core/tx/tx_proxy/ya.make +++ b/ydb/core/tx/tx_proxy/ya.make @@ -14,6 +14,7 @@ SRCS( commitreq.cpp upload_rows_common_impl.cpp upload_rows.cpp + global.cpp ) GENERATE_ENUM_SERIALIZATION(read_table_impl.h) diff --git a/ydb/library/accessor/validator.h b/ydb/library/accessor/validator.h index df81586f8759..6182b524bfa7 100644 --- a/ydb/library/accessor/validator.h +++ b/ydb/library/accessor/validator.h @@ -9,4 +9,9 @@ class TValidator { AFL_VERIFY(!!object); return object; } + template + static T& CheckNotNull(T& object) { + AFL_VERIFY(!!object); + return object; + } }; \ No newline at end of file diff --git a/ydb/library/arrow_clickhouse/AggregateFunctions/AggregateFunctionNumRows.h b/ydb/library/arrow_clickhouse/AggregateFunctions/AggregateFunctionNumRows.h new file mode 100644 index 000000000000..80e5ff270b34 --- /dev/null +++ b/ydb/library/arrow_clickhouse/AggregateFunctions/AggregateFunctionNumRows.h @@ -0,0 +1,79 @@ +// The code in this file is based on original ClickHouse source code +// which is licensed under Apache license v2.0 +// See: https://github.com/ClickHouse/ClickHouse/ + +#pragma once +#include "arrow_clickhouse_types.h" + +#include +#include +#include + +#include + +namespace CH +{ + + +struct AggregateFunctionNumRowsData +{ + UInt64 count = 0; +}; + + +/// Count rows. +class AggregateFunctionNumRows final + : public IAggregateFunctionDataHelper +{ +public: + AggregateFunctionNumRows(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper(argument_types_, {}) + {} + + DataTypePtr getReturnType() const override + { + return std::make_shared(); + } + + bool allocatesMemoryInArena() const override { return false; } + + void add(AggregateDataPtr __restrict place, const IColumn **, size_t, Arena *) const override + { + ++data(place).count; + } + + void addBatchSinglePlace( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** /*columns*/, + Arena *) const override + { + data(place).count += row_end - row_begin; + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + data(place).count += data(rhs).count; + } + + void insertResultInto(AggregateDataPtr __restrict place, MutableColumn & to, Arena *) const override + { + assert_cast(to).Append(data(place).count).ok(); + } +}; + +class WrappedNumRows final : public ArrowAggregateFunctionWrapper +{ +public: + WrappedNumRows(std::string name) + : ArrowAggregateFunctionWrapper(std::move(name)) + {} + + AggregateFunctionPtr getHouseFunction(const DataTypes & argument_types) const override + { + return std::make_shared(argument_types); + } +}; + +} diff --git a/ydb/library/arrow_clickhouse/AggregateFunctions/IAggregateFunction.cpp b/ydb/library/arrow_clickhouse/AggregateFunctions/IAggregateFunction.cpp index 87eccca5e419..9442b239798a 100644 --- a/ydb/library/arrow_clickhouse/AggregateFunctions/IAggregateFunction.cpp +++ b/ydb/library/arrow_clickhouse/AggregateFunctions/IAggregateFunction.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace CH { @@ -22,6 +23,8 @@ AggregateFunctionPtr GetAggregateFunction(AggFunctionId id, const DataTypes & ar return WrappedSum("").getHouseFunction(argument_types); case AggFunctionId::AGG_AVG: return WrappedAvg("").getHouseFunction(argument_types); + case AggFunctionId::AGG_NUM_ROWS: + return WrappedNumRows("").getHouseFunction(argument_types); default: break; } diff --git a/ydb/library/arrow_clickhouse/AggregateFunctions/IAggregateFunction.h b/ydb/library/arrow_clickhouse/AggregateFunctions/IAggregateFunction.h index f4f21463a10e..8638da084af0 100644 --- a/ydb/library/arrow_clickhouse/AggregateFunctions/IAggregateFunction.h +++ b/ydb/library/arrow_clickhouse/AggregateFunctions/IAggregateFunction.h @@ -181,7 +181,7 @@ class IAggregateFunction : public std::enable_shared_from_this +template class IAggregateFunctionHelper : public IAggregateFunction { private: @@ -204,7 +204,7 @@ class IAggregateFunctionHelper : public IAggregateFunction const IColumn ** columns, Arena * arena) const override { - if (columns && columns[0]->null_bitmap_data()) + if (skip_nulls && columns && columns[0]->null_bitmap_data()) { for (size_t i = row_begin; i < row_end; ++i) { @@ -240,7 +240,7 @@ class IAggregateFunctionHelper : public IAggregateFunction const IColumn ** columns, Arena * arena) const override { - if (columns && columns[0]->null_bitmap_data()) + if (skip_nulls && columns && columns[0]->null_bitmap_data()) { for (size_t i = row_begin; i < row_end; ++i) { @@ -340,9 +340,12 @@ class IAggregateFunctionHelper : public IAggregateFunction /// Implements several methods for manipulation with data. T - type of structure with data for aggregation. -template -class IAggregateFunctionDataHelper : public IAggregateFunctionHelper +template +class IAggregateFunctionDataHelper : public IAggregateFunctionHelper { +private: + using Base = IAggregateFunctionHelper; + protected: using Data = T; @@ -354,7 +357,7 @@ class IAggregateFunctionDataHelper : public IAggregateFunctionHelper static constexpr bool DateTime64Supported = true; IAggregateFunctionDataHelper(const DataTypes & argument_types_, const Array & parameters_) - : IAggregateFunctionHelper(argument_types_, parameters_) {} + : Base(argument_types_, parameters_) {} void create(AggregateDataPtr __restrict place) const override /// NOLINT { @@ -397,7 +400,7 @@ class IAggregateFunctionDataHelper : public IAggregateFunctionHelper if (func.allocatesMemoryInArena() || sizeof(Data) > 16 || func.sizeOfData() != sizeof(Data)) { - IAggregateFunctionHelper::addBatchLookupTable8(row_begin, row_end, map, place_offset, init, key, columns, arena); + Base::addBatchLookupTable8(row_begin, row_end, map, place_offset, init, key, columns, arena); return; } @@ -493,6 +496,7 @@ enum class AggFunctionId { //AGG_QUANTILES = 14, //AGG_TOP_COUNT = 15, //AGG_TOP_SUM = 16, + AGG_NUM_ROWS = 17, }; struct GroupByOptions : public arrow::compute::ScalarAggregateOptions { diff --git a/ydb/library/arrow_kernels/func_num_rows.h b/ydb/library/arrow_kernels/func_num_rows.h new file mode 100644 index 000000000000..f9d353232ec9 --- /dev/null +++ b/ydb/library/arrow_kernels/func_num_rows.h @@ -0,0 +1,20 @@ +#pragma once +#include "clickhouse_type_traits.h" +#include "func_common.h" + +namespace NKikimr::NKernels { + +struct TNumRows: public arrow::compute::MetaFunction { +public: + TNumRows(const TString name) + : arrow::compute::MetaFunction(name.data(), arrow::compute::Arity::Unary(), nullptr) { + } + + arrow::Result ExecuteImpl(const std::vector& args, const arrow::compute::FunctionOptions* /*options*/, + arrow::compute::ExecContext* /*ctx*/) const override { + Y_ABORT_UNLESS(args.size() == 1); + return arrow::Datum(std::make_shared(args[0].make_array()->length())); + } +}; + +} // namespace NKikimr::NKernels diff --git a/ydb/library/arrow_kernels/functions.h b/ydb/library/arrow_kernels/functions.h index 2f4523a4fe20..10d45bfc7eec 100644 --- a/ydb/library/arrow_kernels/functions.h +++ b/ydb/library/arrow_kernels/functions.h @@ -7,3 +7,4 @@ #include "func_modulo_or_zero.h" #include "func_math.h" #include "func_round.h" +#include "func_num_rows.h" diff --git a/ydb/library/conclusion/result.h b/ydb/library/conclusion/result.h index 72aaf29f1a47..3e0cde0c7da2 100644 --- a/ydb/library/conclusion/result.h +++ b/ydb/library/conclusion/result.h @@ -40,6 +40,11 @@ class TConclusion { : Result(result) { } + template + TConclusion(TResultArg& result) + : Result(result) { + } + const TConclusionStatus& GetError() const { auto result = std::get_if(&Result); Y_ABORT_UNLESS(result, "incorrect object for error request"); diff --git a/ydb/library/services/services.proto b/ydb/library/services/services.proto index 3ac7f03bda68..f7a972cfc419 100644 --- a/ydb/library/services/services.proto +++ b/ydb/library/services/services.proto @@ -393,6 +393,8 @@ enum EServiceKikimr { // Distributed storage debugging BS_REQUEST_COST = 2500; + + GROUPED_MEMORY_LIMITER = 2700; }; message TActivity { diff --git a/ydb/library/yql/sql/v1/node.h b/ydb/library/yql/sql/v1/node.h index 1a0421814144..198ac249907a 100644 --- a/ydb/library/yql/sql/v1/node.h +++ b/ydb/library/yql/sql/v1/node.h @@ -1112,6 +1112,7 @@ namespace NSQLTranslationV1 { TMaybe AutoPartitioningByLoad; TNodePtr MinPartitions; TNodePtr MaxPartitions; + TNodePtr PartitionCount; TNodePtr UniformPartitions; TVector> PartitionAtKeys; TMaybe KeyBloomFilter; diff --git a/ydb/library/yql/sql/v1/query.cpp b/ydb/library/yql/sql/v1/query.cpp index e5469ef8f4d8..85b3bc3149eb 100644 --- a/ydb/library/yql/sql/v1/query.cpp +++ b/ydb/library/yql/sql/v1/query.cpp @@ -222,6 +222,10 @@ static INode::TPtr CreateTableSettings(const TTableSettings& tableSettings, ETab if (tableSettings.MaxPartitions) { settings = L(settings, Q(Y(Q("maxPartitions"), tableSettings.MaxPartitions))); } + if (tableSettings.PartitionCount) { + settings = L(settings, Q(Y(Q("maxPartitions"), tableSettings.PartitionCount))); + settings = L(settings, Q(Y(Q("minPartitions"), tableSettings.PartitionCount))); + } if (tableSettings.KeyBloomFilter) { const auto& ref = tableSettings.KeyBloomFilter.GetRef(); settings = L(settings, Q(Y(Q("keyBloomFilter"), BuildQuotedAtom(ref.Pos, ref.Name)))); diff --git a/ydb/library/yql/sql/v1/sql_translation.cpp b/ydb/library/yql/sql/v1/sql_translation.cpp index 1041a1e16965..e4c981af6d03 100644 --- a/ydb/library/yql/sql/v1/sql_translation.cpp +++ b/ydb/library/yql/sql/v1/sql_translation.cpp @@ -1892,6 +1892,17 @@ bool TSqlTranslation::StoreExternalTableSettingsEntry(const TIdentifier& id, con return true; } +bool TSqlTranslation::ValidateTableSettings(const TTableSettings& settings) { + if (settings.PartitionCount) { + if (!settings.StoreType || to_lower(settings.StoreType->Name) != "column") { + Ctx.Error() << " PARTITION_COUNT can be used only with STORE=COLUMN"; + return false; + } + } + + return true; +} + bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value* value, TTableSettings& settings, bool alter, bool reset) { YQL_ENSURE(value || reset); @@ -1950,6 +1961,16 @@ bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule Ctx.Error() << to_upper(id.Name) << " value should be an integer"; return false; } + } else if (to_lower(id.Name) == "partition_count") { + if (reset) { + Ctx.Error() << to_upper(id.Name) << " reset is not supported"; + return false; + } + + if (!StoreInt(*value, settings.PartitionCount, Ctx)) { + Ctx.Error() << to_upper(id.Name) << " value should be an integer"; + return false; + } } else if (to_lower(id.Name) == "uniform_partitions") { if (alter) { Ctx.Error() << to_upper(id.Name) << " alter is not supported"; @@ -2040,7 +2061,8 @@ bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule Ctx.Error() << "Unknown table setting: " << id.Name; return false; } - return true; + + return ValidateTableSettings(settings); } bool TSqlTranslation::StoreTableSettingsEntry(const TIdentifier& id, const TRule_table_setting_value& value, diff --git a/ydb/library/yql/sql/v1/sql_translation.h b/ydb/library/yql/sql/v1/sql_translation.h index d31ecd6a3569..07fc6764840d 100644 --- a/ydb/library/yql/sql/v1/sql_translation.h +++ b/ydb/library/yql/sql/v1/sql_translation.h @@ -258,6 +258,8 @@ class TSqlTranslation: public TTranslation { bool ClusterExpr(const TRule_cluster_expr& node, bool allowWildcard, bool allowBinding, TString& service, TDeferredAtom& cluster, bool& isBinding); bool StructLiteralItem(TVector& labels, const TRule_expr& label, TVector& values, const TRule_expr& value); + bool ValidateTableSettings(const TTableSettings& settings); + protected: NSQLTranslation::ESqlMode Mode; }; diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index c23c271b100b..e9f4cca1845b 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -6917,6 +6917,31 @@ Y_UNIT_TEST_SUITE(ResourcePool) { } } +Y_UNIT_TEST_SUITE(OlapPartitionCount) { + Y_UNIT_TEST(CorrectUsage) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE TABLE `mytable` (id Uint32, PRIMARY KEY (id)) + PARTITION BY HASH(id) + WITH (STORE = COLUMN, PARTITION_COUNT = 8); + )sql"); + + UNIT_ASSERT_C(res.IsOk(), res.Issues.ToString()); + } + + Y_UNIT_TEST(UseWithoutColumnStore) { + NYql::TAstParseResult res = SqlToYql(R"sql( + USE plato; + CREATE TABLE `mytable` (id Uint32, PRIMARY KEY (id)) + WITH (PARTITION_COUNT = 8); + )sql"); + + UNIT_ASSERT(!res.IsOk()); + UNIT_ASSERT(res.Issues.Size() == 1); + UNIT_ASSERT_STRING_CONTAINS(res.Issues.ToString(), "PARTITION_COUNT can be used only with STORE=COLUMN"); + } +} + Y_UNIT_TEST_SUITE(ResourcePoolClassifier) { Y_UNIT_TEST(CreateResourcePoolClassifier) { NYql::TAstParseResult res = SqlToYql(R"sql( diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 index b60330d3e86f..7d698c1121a3 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 @@ -59,24 +59,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableFullScan", + "Node Type": "Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "item.SearchPhrase", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "SearchPhrase != " }, @@ -133,11 +123,33 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 108 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 40 + } + ] + } + }, { "Projection": { "Columns": [ { "Id": 40 + }, + { + "Id": 108 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 index e0ebda6daef8..7c71dabd5722 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 @@ -60,24 +60,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableFullScan", + "Node Type": "Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "SearchPhrase != " }, @@ -135,6 +125,28 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 108 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 39 + }, + { + "Id": 40 + } + ] + } + }, { "Projection": { "Columns": [ @@ -143,6 +155,9 @@ }, { "Id": 40 + }, + { + "Id": 108 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 index 73b1ddfbc534..8085dd21197d 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 @@ -59,18 +59,8 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-TableFullScan", + "Node Type": "TableFullScan", "Operators": [ - { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "item.UserID", - "Inputs": [ - { - "InternalOperatorId": 1 - } - ], - "Name": "Aggregate" - }, { "Inputs": [], "Name": "TableFullScan", @@ -87,11 +77,33 @@ "Scan": "Parallel", "SsaProgram": { "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 10 + } + ] + } + }, { "Projection": { "Columns": [ { "Id": 10 + }, + { + "Id": 106 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 index 88572cf898e7..c8bdcc9e1268 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 @@ -60,18 +60,8 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-TableFullScan", + "Node Type": "TableFullScan", "Operators": [ - { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "", - "Inputs": [ - { - "InternalOperatorId": 1 - } - ], - "Name": "Aggregate" - }, { "Inputs": [], "Name": "TableFullScan", @@ -89,6 +79,28 @@ "Scan": "Parallel", "SsaProgram": { "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 40 + }, + { + "Id": 10 + } + ] + } + }, { "Projection": { "Columns": [ @@ -97,6 +109,9 @@ }, { "Id": 10 + }, + { + "Id": 106 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 index c29254659c56..8c89c77845c9 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 @@ -59,18 +59,8 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-TableFullScan", + "Node Type": "TableFullScan", "Operators": [ - { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "", - "Inputs": [ - { - "InternalOperatorId": 1 - } - ], - "Name": "Aggregate" - }, { "Inputs": [], "Name": "TableFullScan", @@ -88,6 +78,28 @@ "Scan": "Parallel", "SsaProgram": { "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 40 + }, + { + "Id": 10 + } + ] + } + }, { "Projection": { "Columns": [ @@ -96,6 +108,9 @@ }, { "Id": 10 + }, + { + "Id": 106 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 index d27488682368..52a6c18fa989 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 @@ -59,24 +59,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableFullScan", + "Node Type": "Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1),_yql_agg_1: MIN(item.URL,state._yql_agg_1)}", - "GroupBy": "item.SearchPhrase", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "" }, @@ -174,6 +164,38 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 110 + }, + "Function": { + "Id": 2 + } + }, + { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + } + ], + "Id": 3 + } + } + ], + "KeyColumns": [ + { + "Id": 40 + } + ] + } + }, { "Projection": { "Columns": [ @@ -181,7 +203,10 @@ "Id": 40 }, { - "Id": 14 + "Id": 110 + }, + { + "Id": 111 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 index 48525547c1e6..b67dd5fb5ca1 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 @@ -60,24 +60,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableFullScan", + "Node Type": "Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", - "GroupBy": "", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "SearchPhrase != " }, @@ -138,6 +128,67 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 108 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 5 + } + }, + { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 2 + } + }, + { + "Column": { + "Id": 110 + }, + "Function": { + "Id": 2 + } + }, + { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + } + ], + "Id": 5 + } + } + ], + "KeyColumns": [ + { + "Id": 8 + }, + { + "Id": 39 + } + ] + } + }, { "Projection": { "Columns": [ @@ -145,13 +196,19 @@ "Id": 8 }, { - "Id": 16 + "Id": 39 }, { - "Id": 21 + "Id": 109 }, { - "Id": 39 + "Id": 108 + }, + { + "Id": 110 + }, + { + "Id": 111 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 index 19f65373f20e..34c43822f73e 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 @@ -60,24 +60,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableFullScan", + "Node Type": "Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", - "GroupBy": "", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "SearchPhrase != " }, @@ -138,6 +128,67 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 108 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 5 + } + }, + { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 2 + } + }, + { + "Column": { + "Id": 110 + }, + "Function": { + "Id": 2 + } + }, + { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + } + ], + "Id": 5 + } + } + ], + "KeyColumns": [ + { + "Id": 8 + }, + { + "Id": 1 + } + ] + } + }, { "Projection": { "Columns": [ @@ -145,13 +196,19 @@ "Id": 8 }, { - "Id": 16 + "Id": 1 }, { - "Id": 21 + "Id": 109 }, { - "Id": 1 + "Id": 108 + }, + { + "Id": 110 + }, + { + "Id": 111 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 index 75058186e366..c6e7c9503479 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 @@ -60,18 +60,8 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-TableFullScan", + "Node Type": "TableFullScan", "Operators": [ - { - "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", - "GroupBy": "", - "Inputs": [ - { - "InternalOperatorId": 1 - } - ], - "Name": "Aggregate" - }, { "Inputs": [], "Name": "TableFullScan", @@ -91,6 +81,67 @@ "Scan": "Parallel", "SsaProgram": { "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 5 + } + }, + { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 2 + } + }, + { + "Column": { + "Id": 108 + }, + "Function": { + "Id": 2 + } + }, + { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + } + ], + "Id": 5 + } + } + ], + "KeyColumns": [ + { + "Id": 8 + }, + { + "Id": 1 + } + ] + } + }, { "Projection": { "Columns": [ @@ -98,13 +149,19 @@ "Id": 8 }, { - "Id": 16 + "Id": 1 }, { - "Id": 21 + "Id": 107 }, { - "Id": 1 + "Id": 106 + }, + { + "Id": 108 + }, + { + "Id": 109 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 index 29d853ffd35b..7e3998ff02e5 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 @@ -59,18 +59,8 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-TableFullScan", + "Node Type": "TableFullScan", "Operators": [ - { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "item.URL", - "Inputs": [ - { - "InternalOperatorId": 1 - } - ], - "Name": "Aggregate" - }, { "Inputs": [], "Name": "TableFullScan", @@ -87,11 +77,33 @@ "Scan": "Parallel", "SsaProgram": { "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 14 + } + ] + } + }, { "Projection": { "Columns": [ { "Id": 14 + }, + { + "Id": 106 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 index 6ad93db7d8bf..5309cb36fd30 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 @@ -60,18 +60,8 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-TableFullScan", + "Node Type": "TableFullScan", "Operators": [ - { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "", - "Inputs": [ - { - "InternalOperatorId": 1 - } - ], - "Name": "Aggregate" - }, { "Inputs": [], "Name": "TableFullScan", @@ -89,6 +79,28 @@ "Scan": "Parallel", "SsaProgram": { "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 14 + }, + { + "Id": 10 + } + ] + } + }, { "Projection": { "Columns": [ @@ -97,6 +109,9 @@ }, { "Id": 10 + }, + { + "Id": 106 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 index fc86824ae64c..e4c0a458ce0b 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 @@ -59,24 +59,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableRangeScan", + "Node Type": "Filter-TableRangeScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "item.URL", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "DontCountHits == 0 And IsRefresh == 0 And URL != " }, @@ -237,11 +227,33 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 114 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 14 + } + ] + } + }, { "Projection": { "Columns": [ { "Id": 14 + }, + { + "Id": 114 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 index bb54cb4527ac..32b96c95af45 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 @@ -59,24 +59,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableRangeScan", + "Node Type": "Filter-TableRangeScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "item.Title", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "DontCountHits == 0 And IsRefresh == 0 And Title != " }, @@ -237,11 +227,33 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 114 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 3 + } + ] + } + }, { "Projection": { "Columns": [ { "Id": 3 + }, + { + "Id": 114 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 index 6efc3e0af40d..4732d0053458 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 @@ -88,24 +88,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableRangeScan", + "Node Type": "Filter-TableRangeScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "item.URL", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "IsRefresh == 0 And IsLink != 0 And IsDownload == 0" }, @@ -267,11 +257,33 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 114 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 14 + } + ] + } + }, { "Projection": { "Columns": [ { "Id": 14 + }, + { + "Id": 114 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 index e0fb3e8faaf2..c550f9e5db05 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 @@ -89,24 +89,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableRangeScan", + "Node Type": "Filter-TableRangeScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "IsRefresh == 0 And DontCountHits == 0 And URLHash == 2868770270353813622" }, @@ -269,6 +259,28 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 114 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 44 + }, + { + "Id": 43 + } + ] + } + }, { "Projection": { "Columns": [ @@ -277,6 +289,9 @@ }, { "Id": 43 + }, + { + "Id": 114 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 index 22b5ecb75f84..5b28d5a82c2e 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 @@ -59,24 +59,14 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableFullScan", + "Node Type": "Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", - "GroupBy": "item.AdvEngineID", "Inputs": [ { "InternalOperatorId": 1 } ], - "Name": "Aggregate" - }, - { - "Inputs": [ - { - "InternalOperatorId": 2 - } - ], "Name": "Filter", "Predicate": "AdvEngineID != 0" }, @@ -133,11 +123,33 @@ } } }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 108 + }, + "Function": { + "Id": 2 + } + } + ], + "KeyColumns": [ + { + "Id": 41 + } + ] + } + }, { "Projection": { "Columns": [ { "Id": 41 + }, + { + "Id": 108 } ] } diff --git a/ydb/tests/functional/clickbench/ya.make b/ydb/tests/functional/clickbench/ya.make index b5a9a9fdc516..1ce9642b70df 100644 --- a/ydb/tests/functional/clickbench/ya.make +++ b/ydb/tests/functional/clickbench/ya.make @@ -9,6 +9,7 @@ SIZE(MEDIUM) ENV(YDB_USE_IN_MEMORY_PDISKS=true) ENV(YDB_CLI_BINARY="ydb/apps/ydb/ydb") +ENV(YDB_ENABLE_COLUMN_TABLES="true") REQUIREMENTS( ram:32 cpu:4 diff --git a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema index 1a4f2de0d060..23037f73bd17 100644 --- a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema +++ b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_schemeshard_/flat_schemeshard.schema @@ -1448,6 +1448,11 @@ "ColumnId": 31, "ColumnName": "ServerlessComputeResourcesMode", "ColumnType": "Uint32" + }, + { + "ColumnId": 32, + "ColumnName": "ColumnTableColumnsLimit", + "ColumnType": "Uint64" } ], "ColumnsDropped": [], @@ -1484,7 +1489,8 @@ 28, 29, 30, - 31 + 31, + 32 ], "RoomID": 0, "Codec": 0, diff --git a/ydb/tests/functional/ydb_cli/ya.make b/ydb/tests/functional/ydb_cli/ya.make index 678b5cd39f6e..1e4310e5e7fc 100644 --- a/ydb/tests/functional/ydb_cli/ya.make +++ b/ydb/tests/functional/ydb_cli/ya.make @@ -10,6 +10,7 @@ TEST_SRCS( ENV(YDB_DRIVER_BINARY="ydb/apps/ydbd/ydbd") ENV(YDB_CLI_BINARY="ydb/apps/ydb/ydb") +ENV(YDB_ENABLE_COLUMN_TABLES="true") TIMEOUT(600) SIZE(MEDIUM) diff --git a/ydb/tests/library/harness/kikimr_config.py b/ydb/tests/library/harness/kikimr_config.py index 185cc8fc0841..2fda1d85136d 100644 --- a/ydb/tests/library/harness/kikimr_config.py +++ b/ydb/tests/library/harness/kikimr_config.py @@ -83,6 +83,8 @@ def load_default_yaml(default_tablet_node_ids, ydb_domain_name, static_erasure, yaml_dict["log_config"]["entry"] = [] for log, level in six.iteritems(log_configs): yaml_dict["log_config"]["entry"].append({"component": log, "level": int(level)}) + if os.getenv("YDB_ENABLE_COLUMN_TABLES", "") == "true": + yaml_dict |= {"column_shard_config": {"disabled_on_scheme_shard": False}} return yaml_dict diff --git a/ydb/tests/library/harness/kikimr_runner.py b/ydb/tests/library/harness/kikimr_runner.py index b6f6c5066037..df67478a78af 100644 --- a/ydb/tests/library/harness/kikimr_runner.py +++ b/ydb/tests/library/harness/kikimr_runner.py @@ -613,33 +613,36 @@ def can_update(self): def start(self): if self.__slot_id is None: - return self.ssh_command("sudo start kikimr") - return self.ssh_command( - [ - "sudo", "start", - "kikimr-multi", - "slot={}".format(self.__slot_id), - "tenant=/Root/db1", - "mbus={}".format(self.__mbus_port), - "grpc={}".format(self.__grpc_port), - "mon={}".format(self.__mon_port), - "ic={}".format(self.__ic_port), - ] + return self.ssh_command("sudo service kikimr start") + + slot_dir = "/Berkanavt/kikimr_{slot}".format(slot=self.__slot_id) + slot_cfg = slot_dir + "/slot_cfg" + env_txt = slot_dir + "/env.txt" + + cfg = """\ +tenant=/Root/db1 +grpc={grpc} +mbus={mbus} +ic={ic} +mon={mon}""".format( + mbus=self.__mbus_port, + grpc=self.__grpc_port, + mon=self.__mon_port, + ic=self.__ic_port, ) + self.ssh_command(["sudo", "mkdir", slot_dir]) + self.ssh_command(["sudo", "touch", env_txt]) + self.ssh_command(["/bin/echo", "-e", "\"{}\"".format(cfg), "|", "sudo", "tee", slot_cfg]) + + return self.ssh_command(["sudo", "systemctl", "start", "kikimr-multi@{}".format(self.__slot_id)]) + def stop(self): if self.__slot_id is None: - return self.ssh_command("sudo stop kikimr") + return self.ssh_command("sudo service kikimr stop") return self.ssh_command( [ - "sudo", "stop", - "kikimr-multi", - "slot={}".format(self.__slot_id), - "tenant=/Root/db1", - "mbus={}".format(self.__mbus_port), - "grpc={}".format(self.__grpc_port), - "mon={}".format(self.__mon_port), - "ic={}".format(self.__ic_port), + "sudo", "systemctl", "start", "kikimr-multi@{}".format(self.__slot_id), ] ) diff --git a/ydb/tests/library/harness/resources/default_yaml.yml b/ydb/tests/library/harness/resources/default_yaml.yml index dc8eea16bf2e..7090fad7dedc 100644 --- a/ydb/tests/library/harness/resources/default_yaml.yml +++ b/ydb/tests/library/harness/resources/default_yaml.yml @@ -256,5 +256,3 @@ federated_query_config: uri: "" pinger: ping_period: "30s" -column_shard_config: - disabled_on_scheme_shard: false diff --git a/ydb/tests/olap/scenario/conftest.py b/ydb/tests/olap/scenario/conftest.py index 51de3c62ada0..533375e989d4 100644 --- a/ydb/tests/olap/scenario/conftest.py +++ b/ydb/tests/olap/scenario/conftest.py @@ -19,7 +19,8 @@ def get_suite_name(cls): @classmethod def setup_class(cls): - ScenarioTestHelper(None).remove_path(cls.get_suite_name()) + if not external_param_is_true('reuse-tables'): + ScenarioTestHelper(None).remove_path(cls.get_suite_name()) @classmethod def teardown_class(cls): diff --git a/ydb/tests/olap/scenario/helpers/data_generators.py b/ydb/tests/olap/scenario/helpers/data_generators.py index 4945415a1085..286f4d8347a0 100644 --- a/ydb/tests/olap/scenario/helpers/data_generators.py +++ b/ydb/tests/olap/scenario/helpers/data_generators.py @@ -42,14 +42,30 @@ def next_row(self) -> None: pass -class ColumnValueGeneratorNull(IColumnValueGenerator): - """NULL column value generator. +class ColumnValueGeneratorConst(IColumnValueGenerator): + """Const column value generator. - Allways generate NULL value.""" + Allways generate specified value.""" + + def __init__(self, value: Any) -> None: + """Constructor. + + Args: + value: Value to generate. + Example: + DataGeneratorPerColumn( + self.schema2, 10, + ColumnValueGeneratorDefault(init_value=10)) + .with_column('not_level', ColumnValueGeneratorConst(42) + ) + """ + + super().__init__() + self._value = value @override - def generate_value(column: ScenarioTestHelper.Column) -> Any: - return None + def generate_value(self, column: ScenarioTestHelper.Column) -> Any: + return self._value class ColumnValueGeneratorRandom(IColumnValueGenerator): diff --git a/ydb/tests/olap/scenario/helpers/table_helper.py b/ydb/tests/olap/scenario/helpers/table_helper.py index ef2088ecd692..8b1963fd13e0 100644 --- a/ydb/tests/olap/scenario/helpers/table_helper.py +++ b/ydb/tests/olap/scenario/helpers/table_helper.py @@ -218,6 +218,69 @@ def title(self) -> str: return f'drop column `{self._column}`' +class SetSetting(AlterTableAction): + """Set a setting value for a table-like object. + + Table-like objects are Tables and TableStore. + See {AlterTableLikeObject}. + + Example: + sth = ScenarioTestHelper(ctx) + sth.execute_scheme_query( + AlterTable('testTable').action(SetSetting('TIERING', 'tiering1)) + ) + """ + + def __init__(self, setting: str, value_literal: str) -> None: + """Constructor. + + Args: + column: Name of the column to be deleted.""" + + super().__init__() + self._setting = setting + self._value = value_literal + + @override + def to_yql(self) -> str: + return f'SET {self._setting} {self._value}' + + @override + def title(self) -> str: + return f'set {self._setting} = {self._value}' + + +class ResetSetting(AlterTableAction): + """Reset value of a setting for a table-like object. + + Table-like objects are Tables and TableStore. + See {AlterTableLikeObject}. + + Example: + sth = ScenarioTestHelper(ctx) + sth.execute_scheme_query( + AlterTable('testTable').action(ResetSetting('TIERING')) + ) + """ + + def __init__(self, setting: str) -> None: + """Constructor. + + Args: + setting: Name of altered setting.""" + + super().__init__() + self._setting = setting + + @override + def to_yql(self) -> str: + return f'RESET ({self._setting})' + + @override + def title(self) -> str: + return f'reset {self._setting}' + + class AlterTableLikeObject(ScenarioTestHelper.IYqlble): """The base class for all requests to change table-like objects. @@ -277,6 +340,42 @@ def drop_column(self, column: str) -> AlterTableLikeObject: return self(DropColumn(column)) + def set_tiering(self, tiering_rule: str) -> AlterTableLikeObject: + """Set a tiering policy. + + The method is similar to calling {AlterTableLikeObject.action} with a {SetSetting} instance. + + Args: + tiering_rule: Name of a TIERING_RULE object. + + Returns: + self.""" + + return self(SetSetting('TIERING', f'"{tiering_rule}"')) + + def reset_tiering(self) -> AlterTableLikeObject: + """Remove a tiering policy. + + The method is similar to calling {AlterTableLikeObject.action} with a {SetSetting} instance. + + Returns: + self.""" + + return self(ResetSetting('TIERING')) + + def set_ttl(self, interval: str, column: str) -> AlterTableLikeObject: + """Set TTL for rows. + + The method is similar to calling {AlterTableLikeObject.action} with a {SetSetting} instance. + + Args: + tiering_rule: Name of a TIERING_RULE object. + + Returns: + self.""" + + return self(SetSetting('TTL', f'Interval("{interval}") ON `{column}`')) + @override def params(self) -> Dict[str, str]: return {self._type(): self._name, 'actions': ', '.join([a.title() for a in self._actions])} diff --git a/ydb/tests/olap/scenario/helpers/tiering_helper.py b/ydb/tests/olap/scenario/helpers/tiering_helper.py new file mode 100644 index 000000000000..712aed66cac9 --- /dev/null +++ b/ydb/tests/olap/scenario/helpers/tiering_helper.py @@ -0,0 +1,246 @@ +from __future__ import annotations +from ydb.tests.olap.scenario.helpers.scenario_tests_helper import ( + ScenarioTestHelper, + TestContext, +) +from abc import abstractmethod + +from typing import override, Dict +from dataclasses import dataclass +import json + + +@dataclass +class ObjectStorageParams: + endpoint: str + bucket: str + access_key: str + secret_key: str + scheme: str = 'HTTP' + verify_ssl: bool = False + + def to_proto_str(self) -> str: + return ( + f'Scheme: {self.scheme}\n' + f'VerifySSL: {str(self.verify_ssl).lower()}\n' + f'Endpoint: "{self.endpoint}"\n' + f'Bucket: "{self.bucket}"\n' + f'AccessKey: "{self.access_key}"\n' + f'SecretKey: "{self.secret_key}"\n' + ) + + +@dataclass +class TieringRule: + tier_name: str + duration_for_evict: str + + def to_dict(self): + return { + 'tierName': self.tier_name, + 'durationForEvict': self.duration_for_evict, + } + + +@dataclass +class TieringPolicy: + rules: list[TieringRule] + + def __init__(self): + self.rules = [] + + def with_rule(self, rule: TieringRule): + self.rules.append(rule) + return self + + def to_json(self) -> str: + return json.dumps({'rules': list(map(lambda x: x.to_dict(), self.rules))}) + + +@dataclass +class TierConfig: + name: str + s3_params: ObjectStorageParams + + def to_proto_str(self) -> str: + return ( + f'Name: "{self.name}"\n' + f'ObjectStorage: {{\n{self.s3_params.to_proto_str()}\n}}' + ) + + +class AlterTieringRule(ScenarioTestHelper.IYqlble): + """Alter a tiering rule. + + See {ScenarioTestHelper.IYqlble}. + """ + + def __init__(self, name: str, default_column: str, config: TieringPolicy) -> None: + """Constructor. + + Args: + name: Name (relative path) of the altered object. + default_column: Default column used for tiering. + config: Tiering rules to apply.""" + + super().__init__(name) + self._default_column: str = default_column + self._config: TieringPolicy = config + + @override + def params(self) -> Dict[str, str]: + return {'tiering_rule': self._name, 'config': self._config.to_json()} + + @override + def title(self): + return 'Alter tiering rule' + + @override + def to_yql(self, ctx: TestContext) -> str: + return f'ALTER OBJECT `{self._name}` (TYPE TIERING_RULE)' \ + f' SET (defaultColumn = {self._default_column}, description = `{self._config.to_json()}`)' + + +class CreateTieringRule(AlterTieringRule): + """Create a tiering rule. + + See {ScenarioTestHelper.IYqlble}. + """ + + @override + def title(self): + return 'Create tiering rule' + + @override + def to_yql(self, ctx: TestContext) -> str: + return f'CREATE OBJECT `{self._name}` (TYPE TIERING_RULE)' \ + f' WITH (defaultColumn = {self._default_column}, description = `{self._config.to_json()}`)' + + +class CreateTieringRuleIfNotExists(AlterTieringRule): + """Create a tiering rule. If it exists, do nothing. + + See {ScenarioTestHelper.IYqlble}. + """ + + @override + def title(self): + return 'Create tiering rule' + + @override + def to_yql(self, ctx: TestContext) -> str: + return f'CREATE OBJECT IF NOT EXISTS `{self._name}` (TYPE TIERING_RULE)' \ + f' WITH (defaultColumn = {self._default_column}, description = `{self._config.to_json()}`)' + + +class AlterTier(ScenarioTestHelper.IYqlble): + """Alter a tier. + + See {ScenarioTestHelper.IYqlble}. + """ + + def __init__(self, name: str, config: TierConfig) -> None: + """Constructor. + + Args: + name: Name (relative path) of the altered object. + config: Tier configuration.""" + + super().__init__(name) + self._config: TierConfig = config + + @override + def params(self) -> Dict[str, str]: + return {'tier': self._name, 'config': self._config.to_proto_str()} + + @override + def title(self): + return 'Alter tier' + + @override + def to_yql(self, ctx: TestContext) -> str: + return f'ALTER OBJECT `{self._name}` (TYPE TIER) SET (tierConfig = `{self._config.to_proto_str()}`)' + + +class CreateTier(AlterTier): + """Create a tier. + + See {ScenarioTestHelper.IYqlble}. + """ + + @override + def title(self): + return 'Create tier' + + @override + def to_yql(self, ctx: TestContext) -> str: + return f'CREATE OBJECT `{self._name}` (TYPE TIER) WITH (tierConfig = `{self._config.to_proto_str()}`)' + + +class CreateTierIfNotExists(AlterTier): + """Create a tier. If it exists, do nothing. + + See {ScenarioTestHelper.IYqlble}. + """ + + @override + def title(self): + return 'Create tier' + + @override + def to_yql(self, ctx: TestContext) -> str: + return f'CREATE OBJECT IF NOT EXISTS `{self._name}` (TYPE TIER)' \ + f' WITH (tierConfig = `{self._config.to_proto_str()}`)' + + +class DropObjectBase(ScenarioTestHelper.IYqlble): + """Drop a tier. + + See {ScenarioTestHelper.IYqlble}. + """ + + def __init__(self, name: str) -> None: + """Constructor. + + Args: + name: Name (relative path) of the altered object.""" + + super().__init__(name) + + @override + def params(self) -> Dict[str, str]: + return {'object_type': self._object_type()} + + @override + def title(self): + return f'Drop {self._object_type().lower()}' + + @override + def to_yql(self, ctx: TestContext) -> str: + return f'DROP OBJECT `{self._name}` (TYPE {self._object_type()})' + + @abstractmethod + def _object_type(self) -> str: + pass + + +class DropTier(DropObjectBase): + """Drop a tier. + + See {ScenarioTestHelper.IYqlble}. + """ + + @override + def _object_type(self): + return 'TIER' + + +class DropTieringRule(DropObjectBase): + """Drop a tier. + + See {ScenarioTestHelper.IYqlble}. + """ + + @override + def _object_type(self): + return 'TIERING_RULE' diff --git a/ydb/tests/olap/scenario/test_alter_tiering.py b/ydb/tests/olap/scenario/test_alter_tiering.py new file mode 100644 index 000000000000..cae96e9da48d --- /dev/null +++ b/ydb/tests/olap/scenario/test_alter_tiering.py @@ -0,0 +1,152 @@ +from conftest import BaseTestSet +from ydb.tests.olap.scenario.helpers import ( + ScenarioTestHelper, + TestContext, + CreateTable, + CreateTableStore, + DropTable, +) +from helpers.tiering_helper import ( + ObjectStorageParams, + AlterTier, + CreateTierIfNotExists, + AlterTieringRule, + CreateTieringRuleIfNotExists, + TierConfig, + TieringPolicy, + TieringRule, + DropTier, + DropTieringRule, +) +import helpers.data_generators as dg +from helpers.table_helper import AlterTable + +from ydb.tests.olap.lib.utils import get_external_param +from ydb import PrimitiveType +import datetime +import random +import threading +from typing import Iterable +import time + + +class TestAlterTiering(BaseTestSet): + schema1 = ( + ScenarioTestHelper.Schema() + .with_column(name='timestamp', type=PrimitiveType.Timestamp, not_null=True) + .with_column(name='writer', type=PrimitiveType.Uint32, not_null=True) + .with_column(name='value', type=PrimitiveType.Uint64, not_null=True) + .with_column(name='data', type=PrimitiveType.String, not_null=True) + .with_key_columns('timestamp', 'writer', 'value') + ) + + class TestThread(threading.Thread): + def run(self) -> None: + self.exc = None + try: + self.ret = self._target(*self._args, **self._kwargs) + except BaseException as e: + self.exc = e + + def join(self, timeout=None): + super().join(timeout) + if self.exc: + raise self.exc + return self.ret + + def _drop_tables(self, prefix: str, count: int, ctx: TestContext): + sth = ScenarioTestHelper(ctx) + for i in range(count): + sth.execute_scheme_query(DropTable(f'store/{prefix}_{i}')) + + def _upsert(self, ctx: TestContext, table: str, writer_id: int, duration: datetime.timedelta): + deadline = datetime.datetime.now() + duration + sth = ScenarioTestHelper(ctx) + rows_written = 0 + i = 0 + while datetime.datetime.now() < deadline: + sth.bulk_upsert( + table, + dg.DataGeneratorPerColumn(self.schema1, 1000) + .with_column('timestamp', dg.ColumnValueGeneratorRandom(null_probability=0)) + .with_column('writer', dg.ColumnValueGeneratorConst(writer_id)) + .with_column('value', dg.ColumnValueGeneratorSequential(rows_written)) + .with_column('data', dg.ColumnValueGeneratorConst(random.randbytes(1024))) + ) + rows_written += 1000 + i += 1 + if rows_written > 100000 and i % 10 == 0: + scan_result = sth.execute_scan_query(f'SELECT COUNT(*) FROM `{sth.get_full_path('store/table')}` WHERE writer == {writer_id}') + assert scan_result.result_set.rows[0][0] == rows_written + + def _change_tiering_rule(self, ctx: TestContext, table: str, tiering_rules: Iterable[str], duration: datetime.timedelta): + deadline = datetime.datetime.now() + duration + sth = ScenarioTestHelper(ctx) + while datetime.datetime.now() < deadline: + for tiering_rule in tiering_rules: + sth.execute_scheme_query(AlterTable(table).set_tiering(tiering_rule)) + sth.execute_scheme_query(AlterTable(table).reset_tiering()) + + def scenario_alter_tiering_rule_while_writing(self, ctx: TestContext): + test_duration = datetime.timedelta(seconds=400) + + s3_endpoint = get_external_param('s3-endpoint', 'storage.yandexcloud.net') + s3_access_key = get_external_param('s3-access-key', 'YCAJEM3Pg9fMyuX9ZUOJ_fake') + s3_secret_key = get_external_param('s3-secret-key', 'YCM7Ovup55wDkymyEtO8pw5F10_L5jtVY8w_fake') + s3_buckets = get_external_param('s3-buckets', 'ydb-tiering-test-1,ydb-tiering-test-2').split(',') + + s3_configs = [ + ObjectStorageParams( + scheme='HTTP', + verify_ssl=False, + endpoint=s3_endpoint, + bucket=bucket, + access_key=s3_access_key, + secret_key=s3_secret_key + ) for bucket in s3_buckets + ] + + sth = ScenarioTestHelper(ctx) + + tiers: list[str] = [] + tiering_rules: list[str] = [] + for i, s3_config in enumerate(s3_configs): + tiers.append(f'TestAlterTiering:tier{i}') + tiering_rules.append(f'TestAlterTiering:tiering_rule{i}') + + tier_config = TierConfig(tiers[-1], s3_config) + tiering_config = TieringPolicy().with_rule(TieringRule(tiers[-1], '1s')) + + sth.execute_scheme_query(CreateTierIfNotExists(tiers[-1], tier_config)) + sth.execute_scheme_query(CreateTieringRuleIfNotExists(tiering_rules[-1], 'timestamp', tiering_config)) + + sth.execute_scheme_query(AlterTier(tiers[-1], tier_config)) + sth.execute_scheme_query(AlterTieringRule(tiering_rules[-1], 'timestamp', tiering_config)) + + sth.execute_scheme_query(CreateTableStore('store').with_schema(self.schema1)) + sth.execute_scheme_query(CreateTable('store/table').with_schema(self.schema1)) + + threads = [] + + threads.append(self.TestThread( + target=self._change_tiering_rule, + args=[ctx, 'store/table', tiering_rules, test_duration] + )) + writer_id_offset = random.randint(0, 1 << 30) + for i in range(4): + threads.append(self.TestThread(target=self._upsert, args=[ctx, 'store/table', writer_id_offset + i, test_duration])) + + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + for tiering in tiering_rules: + sth.execute_scheme_query(DropTieringRule(tiering)) + for tier in tiers: + sth.execute_scheme_query(DropTier(tier)) + + sth.execute_scheme_query(AlterTable('store/table').set_ttl('P1D', 'timestamp')) + + while sth.execute_scan_query(f'SELECT COUNT(*) FROM `{sth.get_full_path('store/table')}`').result_set.rows[0][0]: + time.sleep(10) diff --git a/ydb/tests/olap/scenario/ya.make b/ydb/tests/olap/scenario/ya.make index 49eee4306f26..58a33a89fdba 100644 --- a/ydb/tests/olap/scenario/ya.make +++ b/ydb/tests/olap/scenario/ya.make @@ -11,6 +11,7 @@ PY3TEST() TEST_SRCS( test_simple.py test_scheme_load.py + test_alter_tiering.py ) PEERDIR( diff --git a/ydb/tools/cfg/static.py b/ydb/tools/cfg/static.py index 345701921e90..f310546477f2 100644 --- a/ydb/tools/cfg/static.py +++ b/ydb/tools/cfg/static.py @@ -275,6 +275,10 @@ def mbus_enabled(self): def table_service_config(self): return self.__cluster_details.get_service("table_service_config") + @property + def column_shard_config(self): + return self.__cluster_details.get_service("column_shard_config") + @property def hive_config(self): return self.__proto_config("hive", config_pb2.THiveConfig, self.__cluster_details.get_service("hive_config")) @@ -386,6 +390,9 @@ def get_normalized_config(self): if self.table_service_config: normalized_config["table_service_config"] = self.table_service_config + if self.column_shard_config: + normalized_config["column_shard_config"] = self.column_shard_config + if self.__cluster_details.blob_storage_config is not None: normalized_config["blob_storage_config"] = self.__cluster_details.blob_storage_config else: diff --git a/ydb/tools/olap_workload/__main__.py b/ydb/tools/olap_workload/__main__.py new file mode 100644 index 000000000000..02ee03f4f231 --- /dev/null +++ b/ydb/tools/olap_workload/__main__.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +import argparse +import ydb +import time +import os +import random +import string + +ydb.interceptor.monkey_patch_event_handler() + + +def timestamp(): + return int(1000 * time.time()) + + +def table_name_with_timestamp(): + return os.path.join("column_table_" + str(timestamp())) + + +def random_string(length): + letters = string.ascii_lowercase + return bytes(''.join(random.choice(letters) for i in range(length)), encoding='utf8') + + +def random_type(): + return random.choice([ydb.PrimitiveType.Int64, ydb.PrimitiveType.String]) + + +def random_value(type): + if isinstance(type, ydb.OptionalType): + return random_value(type.item) + if type == ydb.PrimitiveType.Int64: + return random.randint(0, 1 << 31) + if type == ydb.PrimitiveType.String: + return random_string(random.randint(1, 32)) + + +class Workload(object): + def __init__(self, endpoint, database, duration, batch_size): + self.database = database + self.driver = ydb.Driver(ydb.DriverConfig(endpoint, database)) + self.pool = ydb.SessionPool(self.driver, size=200) + self.duration = duration + self.batch_size = batch_size + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.pool.stop() + self.driver.stop() + + def run_query_ignore_errors(self, callee): + try: + self.pool.retry_operation_sync(callee) + except Exception as e: + print(type(e), e) + + def create_table(self, table_name): + print(f"Create table {table_name}") + + def callee(session): + session.execute_scheme( + f""" + CREATE TABLE {table_name} ( + id Int64 NOT NULL, + i64Val Int64, + PRIMARY KEY(id) + ) + PARTITION BY HASH(id) + WITH ( + STORE = COLUMN + ) + """ + ) + + self.run_query_ignore_errors(callee) + + def drop_table(self, table_name): + print(f"Drop table {table_name}") + + def callee(session): + session.drop_table(self.database + "/" + table_name) + + self.run_query_ignore_errors(callee) + + def add_column(self, table_name, col_name, col_type): + print(f"Add column {table_name}.{col_name} {str(col_type)}") + + def callee(session): + session.execute_scheme(f"ALTER TABLE {table_name} ADD COLUMN {col_name} {str(col_type)}") + + self.run_query_ignore_errors(callee) + + def drop_column(self, table_name, col_name): + print(f"Drop column {table_name}.{col_name}") + + def callee(session): + session.execute_scheme(f"ALTER TABLE {table_name} DROP COLUMN {col_name}") + + self.run_query_ignore_errors(callee) + + def generate_batch(self, schema): + data = [] + + for i in range(self.batch_size): + data.append({c.name: random_value(c.type) for c in schema}) + + return data + + def add_batch(self, table_name, schema): + print(f"Add batch {table_name}") + + column_types = ydb.BulkUpsertColumns() + + for c in schema: + column_types.add_column(c.name, c.type) + + batch = self.generate_batch(schema) + + self.driver.table_client.bulk_upsert(self.database + "/" + table_name, batch, column_types) + + def list_tables(self): + db = self.driver.scheme_client.list_directory(self.database) + return [t.name for t in db.children if t.type == ydb.SchemeEntryType.COLUMN_TABLE] + + def list_columns(self, table_name): + path = self.database + "/" + table_name + + def callee(session): + return session.describe_table(path).columns + + return self.pool.retry_operation_sync(callee) + + def rows_count(self, table_name): + return self.driver.table_client.scan_query(f"SELECT count(*) FROM {table_name}").next().result_set.rows[0][0] + + def select_n(self, table_name, limit): + print(f"Select {limit} from {table_name}") + self.driver.table_client.scan_query(f"SELECT * FROM {table_name} limit {limit}").next() + + def drop_all_tables(self): + for t in self.list_tables(): + if t.startswith("column_table_"): + self.drop_table(t) + + def drop_all_columns(self, table_name): + for c in self.list_columns(table_name): + if c.name != "id": + self.drop_column(table_name, c.name) + + def queries_while_alter(self): + table_name = "queries_while_alter" + + schema = self.list_columns(table_name) + + self.select_n(table_name, 1000) + self.add_batch(table_name, schema) + self.select_n(table_name, 100) + self.add_batch(table_name, schema) + self.select_n(table_name, 300) + + if len(schema) > 50: + self.drop_all_columns(table_name) + + if self.rows_count(table_name) > 100000: + self.drop_table(table_name) + + col = "col_" + str(timestamp()) + self.add_column(table_name, col, random_type()) + + def run(self): + started_at = time.time() + + while time.time() - started_at < self.duration: + try: + self.create_table("queries_while_alter") + + self.drop_all_tables() + + self.queries_while_alter() + + table_name = table_name_with_timestamp() + self.create_table(table_name) + except Exception as e: + print(type(e), e) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="olap stability workload", formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument('--endpoint', default='localhost:2135', help="An endpoint to be used") + parser.add_argument('--database', default=None, required=True, help='A database to connect') + parser.add_argument('--duration', default=120, type=lambda x: int(x), help='A duration of workload in seconds.') + parser.add_argument('--batch_size', default=1000, help='Batch size for bulk insert') + args = parser.parse_args() + with Workload(args.endpoint, args.database, args.duration, args.batch_size) as workload: + workload.run() diff --git a/ydb/tools/olap_workload/ya.make b/ydb/tools/olap_workload/ya.make new file mode 100644 index 000000000000..939ecf1af94b --- /dev/null +++ b/ydb/tools/olap_workload/ya.make @@ -0,0 +1,12 @@ +PY3_PROGRAM(olap_workload) + +PY_SRCS( + __main__.py +) + +PEERDIR( + ydb/public/sdk/python + library/python/monlib +) + +END() diff --git a/ydb/tools/ya.make b/ydb/tools/ya.make index fc5e094eb2d9..375abee36446 100644 --- a/ydb/tools/ya.make +++ b/ydb/tools/ya.make @@ -4,6 +4,7 @@ RECURSE( query_replay query_replay_yt simple_queue + olap_workload tsserver tstool ydbd_slice