From 874cc23c719e93572316be5726189d5fcb694288 Mon Sep 17 00:00:00 2001 From: Arvid Norberg Date: Fri, 25 Feb 2022 17:00:16 +0100 Subject: [PATCH] add db validate function to check consistency of blockchain database (#10398) --- chia/cmds/db.py | 26 ++++- chia/cmds/db_validate_func.py | 187 +++++++++++++++++++++++++++++++ chia/util/db_wrapper.py | 2 +- tests/core/test_db_conversion.py | 15 +-- tests/core/test_db_validation.py | 176 +++++++++++++++++++++++++++++ tests/util/temp_file.py | 12 ++ 6 files changed, 398 insertions(+), 20 deletions(-) create mode 100644 chia/cmds/db_validate_func.py create mode 100644 tests/core/test_db_validation.py create mode 100644 tests/util/temp_file.py diff --git a/chia/cmds/db.py b/chia/cmds/db.py index 87e252b6959b..671254e68f57 100644 --- a/chia/cmds/db.py +++ b/chia/cmds/db.py @@ -1,6 +1,7 @@ from pathlib import Path import click from chia.cmds.db_upgrade_func import db_upgrade_func +from chia.cmds.db_validate_func import db_validate_func @click.group("db", short_help="Manage the blockchain database") @@ -8,7 +9,7 @@ def db_cmd() -> None: pass -@db_cmd.command("upgrade", short_help="EXPERIMENTAL: upgrade a v1 database to v2") +@db_cmd.command("upgrade", short_help="upgrade a v1 database to v2") @click.option("--input", default=None, type=click.Path(), help="specify input database file") @click.option("--output", default=None, type=click.Path(), help="specify output database file") @click.option( @@ -34,7 +35,22 @@ def db_upgrade_cmd(ctx: click.Context, no_update_config: bool, **kwargs) -> None print(f"FAILED: {e}") -if __name__ == "__main__": - from chia.util.default_root import DEFAULT_ROOT_PATH - - db_upgrade_func(DEFAULT_ROOT_PATH) +@db_cmd.command("validate", short_help="validate the (v2) blockchain database. Does not verify proofs") +@click.option("--db", default=None, type=click.Path(), help="Specifies which database file to validate") +@click.option( + "--validate-blocks", + default=False, + is_flag=True, + help="validate consistency of properties of the encoded blocks and block records", +) +@click.pass_context +def db_validate_cmd(ctx: click.Context, validate_blocks: bool, **kwargs) -> None: + try: + in_db_path = kwargs.get("input") + db_validate_func( + Path(ctx.obj["root_path"]), + None if in_db_path is None else Path(in_db_path), + validate_blocks=validate_blocks, + ) + except RuntimeError as e: + print(f"FAILED: {e}") diff --git a/chia/cmds/db_validate_func.py b/chia/cmds/db_validate_func.py new file mode 100644 index 000000000000..d5d3bdb3675b --- /dev/null +++ b/chia/cmds/db_validate_func.py @@ -0,0 +1,187 @@ +from pathlib import Path +from typing import Any, Dict, Optional + +from chia.consensus.block_record import BlockRecord +from chia.consensus.default_constants import DEFAULT_CONSTANTS +from chia.types.blockchain_format.sized_bytes import bytes32 +from chia.types.full_block import FullBlock +from chia.util.config import load_config +from chia.util.path import path_from_root + + +def db_validate_func( + root_path: Path, + in_db_path: Optional[Path] = None, + *, + validate_blocks: bool, +) -> None: + if in_db_path is None: + config: Dict[str, Any] = load_config(root_path, "config.yaml")["full_node"] + selected_network: str = config["selected_network"] + db_pattern: str = config["database_path"] + db_path_replaced: str = db_pattern.replace("CHALLENGE", selected_network) + in_db_path = path_from_root(root_path, db_path_replaced) + + validate_v2(in_db_path, validate_blocks=validate_blocks) + + print(f"\n\nDATABASE IS VALID: {in_db_path}\n") + + +def validate_v2(in_path: Path, *, validate_blocks: bool) -> None: + import sqlite3 + from contextlib import closing + + import zstd + + if not in_path.exists(): + print(f"input file doesn't exist. {in_path}") + raise RuntimeError(f"can't find {in_path}") + + print(f"opening file for reading: {in_path}") + with closing(sqlite3.connect(in_path)) as in_db: + + # read the database version + try: + with closing(in_db.execute("SELECT * FROM database_version")) as cursor: + row = cursor.fetchone() + if row is None or row == []: + raise RuntimeError("Database is missing version field") + if row[0] != 2: + raise RuntimeError(f"Database has the wrong version ({row[0]} expected 2)") + except sqlite3.OperationalError: + raise RuntimeError("Database is missing version table") + + try: + with closing(in_db.execute("SELECT hash FROM current_peak WHERE key = 0")) as cursor: + row = cursor.fetchone() + if row is None or row == []: + raise RuntimeError("Database is missing current_peak field") + peak = bytes32(row[0]) + except sqlite3.OperationalError: + raise RuntimeError("Database is missing current_peak table") + + print(f"peak hash: {peak}") + + with closing(in_db.execute("SELECT height FROM full_blocks WHERE header_hash = ?", (peak,))) as cursor: + peak_row = cursor.fetchone() + if peak_row is None or peak_row == []: + raise RuntimeError("Database is missing the peak block") + peak_height = peak_row[0] + + print(f"peak height: {peak_height}") + + print("traversing the full chain") + + current_height = peak_height + # we're looking for a block with this hash + expect_hash = peak + # once we find it, we know what the next block to look for is, which + # this is set to + next_hash = None + + num_orphans = 0 + height_to_hash = bytearray(peak_height * 32) + + with closing( + in_db.execute( + f"SELECT header_hash, prev_hash, height, in_main_chain" + f"{', block, block_record' if validate_blocks else ''} " + "FROM full_blocks ORDER BY height DESC" + ) + ) as cursor: + + for row in cursor: + + hh = row[0] + prev = row[1] + height = row[2] + in_main_chain = row[3] + + # if there are blocks being added to the database, just ignore + # the ones added since we picked the peak + if height > peak_height: + continue + + if validate_blocks: + block = FullBlock.from_bytes(zstd.decompress(row[4])) + block_record = BlockRecord.from_bytes(row[5]) + actual_header_hash = block.header_hash + actual_prev_hash = block.prev_header_hash + if actual_header_hash != hh: + raise RuntimeError( + f"Block {hh.hex()} has a blob with mismatching " f"hash: {actual_header_hash.hex()}" + ) + if block_record.header_hash != hh: + raise RuntimeError( + f"Block {hh.hex()} has a block record with mismatching " + f"hash: {block_record.header_hash.hex()}" + ) + if block_record.total_iters != block.total_iters: + raise RuntimeError( + f"Block {hh.hex()} has a block record with mismatching total " + f"iters: {block_record.total_iters} expected {block.total_iters}" + ) + if block_record.prev_hash != actual_prev_hash: + raise RuntimeError( + f"Block {hh.hex()} has a block record with mismatching " + f"prev_hash: {block_record.prev_hash} expected {actual_prev_hash.hex()}" + ) + if block.height != height: + raise RuntimeError( + f"Block {hh.hex()} has a mismatching " f"height: {block.height} expected {height}" + ) + + if height != current_height: + # we're moving to the next level. Make sure we found the block + # we were looking for at the previous level + if next_hash is None: + raise RuntimeError( + f"Database is missing the block with hash {expect_hash} at height {current_height}" + ) + expect_hash = next_hash + next_hash = None + current_height = height + + if hh == expect_hash: + if next_hash is not None: + raise RuntimeError(f"Database has multiple blocks with hash {hh.hex()}, " f"at height {height}") + if not in_main_chain: + raise RuntimeError( + f"block {hh.hex()} (height: {height}) is part of the main chain, " + f"but in_main_chain is not set" + ) + + if validate_blocks: + if actual_prev_hash != prev: + raise RuntimeError( + f"Block {hh.hex()} has a blob with mismatching " + f"prev-hash: {actual_prev_hash}, expected {prev}" + ) + + next_hash = prev + + height_to_hash[height * 32 : height * 32 + 32] = hh + + print(f"\r{height} orphaned blocks: {num_orphans} ", end="") + + else: + if in_main_chain: + raise RuntimeError( + f"block {hh.hex()} (height: {height}) is orphaned, " "but in_main_chain is set" + ) + num_orphans += 1 + print("") + + if current_height != 0: + raise RuntimeError(f"Database is missing blocks below height {current_height}") + + # make sure the prev_hash pointer of block height 0 is the genesis + # challenge + if next_hash != DEFAULT_CONSTANTS.AGG_SIG_ME_ADDITIONAL_DATA: + raise RuntimeError( + f"Blockchain has invalid genesis challenge {next_hash}, expected " + f"{DEFAULT_CONSTANTS.AGG_SIG_ME_ADDITIONAL_DATA.hex()}" + ) + + if num_orphans > 0: + print(f"{num_orphans} orphaned blocks") diff --git a/chia/util/db_wrapper.py b/chia/util/db_wrapper.py index 2d90b34a05bb..53af97f96022 100644 --- a/chia/util/db_wrapper.py +++ b/chia/util/db_wrapper.py @@ -27,5 +27,5 @@ async def rollback_transaction(self): cursor = await self.db.execute("ROLLBACK") await cursor.close() - async def commit_transaction(self): + async def commit_transaction(self) -> None: await self.db.commit() diff --git a/tests/core/test_db_conversion.py b/tests/core/test_db_conversion.py index a60bdab41cba..e6dd6e2d94cd 100644 --- a/tests/core/test_db_conversion.py +++ b/tests/core/test_db_conversion.py @@ -1,13 +1,13 @@ import pytest import pytest_asyncio import aiosqlite -import tempfile import random import asyncio from pathlib import Path from typing import List, Tuple from tests.setup_nodes import test_constants +from tests.util.temp_file import TempFile from chia.types.blockchain_format.sized_bytes import bytes32 from chia.util.ints import uint32, uint64 @@ -20,19 +20,6 @@ from chia.consensus.multiprocess_validation import PreValidationResult -class TempFile: - def __init__(self): - self.path = Path(tempfile.NamedTemporaryFile().name) - - def __enter__(self) -> Path: - if self.path.exists(): - self.path.unlink() - return self.path - - def __exit__(self, exc_t, exc_v, exc_tb): - self.path.unlink() - - def rand_bytes(num) -> bytes: ret = bytearray(num) for i in range(num): diff --git a/tests/core/test_db_validation.py b/tests/core/test_db_validation.py new file mode 100644 index 000000000000..64d5e1e79e6d --- /dev/null +++ b/tests/core/test_db_validation.py @@ -0,0 +1,176 @@ +import asyncio +import random +import sqlite3 +from asyncio.events import AbstractEventLoop +from contextlib import closing +from pathlib import Path +from typing import Iterator, List + +import aiosqlite +import pytest +import pytest_asyncio + +from chia.cmds.db_validate_func import validate_v2 +from chia.consensus.blockchain import Blockchain +from chia.consensus.default_constants import DEFAULT_CONSTANTS +from chia.consensus.multiprocess_validation import PreValidationResult +from chia.full_node.block_store import BlockStore +from chia.full_node.coin_store import CoinStore +from chia.full_node.hint_store import HintStore +from chia.types.blockchain_format.sized_bytes import bytes32 +from chia.types.full_block import FullBlock +from chia.util.db_wrapper import DBWrapper +from chia.util.ints import uint32, uint64 +from tests.setup_nodes import test_constants +from tests.util.temp_file import TempFile + + +@pytest_asyncio.fixture(scope="session") +def event_loop() -> Iterator[AbstractEventLoop]: + loop = asyncio.get_event_loop() + yield loop + + +def rand_hash() -> bytes32: + ret = bytearray(32) + for i in range(32): + ret[i] = random.getrandbits(8) + return bytes32(ret) + + +def make_version(conn: sqlite3.Connection, version: int) -> None: + conn.execute("CREATE TABLE database_version(version int)") + conn.execute("INSERT INTO database_version VALUES (?)", (version,)) + conn.commit() + + +def make_peak(conn: sqlite3.Connection, peak_hash: bytes32) -> None: + conn.execute("CREATE TABLE IF NOT EXISTS current_peak(key int PRIMARY KEY, hash blob)") + conn.execute("INSERT OR REPLACE INTO current_peak VALUES(?, ?)", (0, peak_hash)) + conn.commit() + + +def make_block_table(conn: sqlite3.Connection) -> None: + conn.execute( + "CREATE TABLE IF NOT EXISTS full_blocks(" + "header_hash blob PRIMARY KEY," + "prev_hash blob," + "height bigint," + "sub_epoch_summary blob," + "is_fully_compactified tinyint," + "in_main_chain tinyint," + "block blob," + "block_record blob)" + ) + + +def add_block( + conn: sqlite3.Connection, header_hash: bytes32, prev_hash: bytes32, height: int, in_main_chain: bool +) -> None: + conn.execute( + "INSERT INTO full_blocks VALUES(?, ?, ?, NULL, 0, ?, NULL, NULL)", + ( + header_hash, + prev_hash, + height, + in_main_chain, + ), + ) + + +def test_db_validate_wrong_version() -> None: + with TempFile() as db_file: + with closing(sqlite3.connect(db_file)) as conn: + make_version(conn, 3) + + with pytest.raises(RuntimeError) as execinfo: + validate_v2(db_file, validate_blocks=False) + assert "Database has the wrong version (3 expected 2)" in str(execinfo.value) + + +def test_db_validate_missing_peak_table() -> None: + with TempFile() as db_file: + with closing(sqlite3.connect(db_file)) as conn: + make_version(conn, 2) + + with pytest.raises(RuntimeError) as execinfo: + validate_v2(db_file, validate_blocks=False) + assert "Database is missing current_peak table" in str(execinfo.value) + + +def test_db_validate_missing_peak_block() -> None: + with TempFile() as db_file: + with closing(sqlite3.connect(db_file)) as conn: + make_version(conn, 2) + make_peak(conn, bytes32.fromhex("fafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafa")) + + make_block_table(conn) + + with pytest.raises(RuntimeError) as execinfo: + validate_v2(db_file, validate_blocks=False) + assert "Database is missing the peak block" in str(execinfo.value) + + +@pytest.mark.parametrize("invalid_in_chain", [True, False]) +def test_db_validate_in_main_chain(invalid_in_chain: bool) -> None: + with TempFile() as db_file: + with closing(sqlite3.connect(db_file)) as conn: + make_version(conn, 2) + make_block_table(conn) + + prev = bytes32(DEFAULT_CONSTANTS.AGG_SIG_ME_ADDITIONAL_DATA) + for height in range(0, 100): + header_hash = rand_hash() + add_block(conn, header_hash, prev, height, True) + if height % 4 == 0: + # insert an orphaned block + add_block(conn, rand_hash(), prev, height, invalid_in_chain) + prev = header_hash + + make_peak(conn, header_hash) + + if invalid_in_chain: + with pytest.raises(RuntimeError) as execinfo: + validate_v2(db_file, validate_blocks=False) + assert " (height: 96) is orphaned, but in_main_chain is set" in str(execinfo.value) + else: + validate_v2(db_file, validate_blocks=False) + + +async def make_db(db_file: Path, blocks: List[FullBlock]) -> None: + async with aiosqlite.connect(db_file) as conn: + + await conn.execute("pragma journal_mode=OFF") + await conn.execute("pragma synchronous=OFF") + await conn.execute("pragma locking_mode=exclusive") + + # this is done by chia init normally + await conn.execute("CREATE TABLE database_version(version int)") + await conn.execute("INSERT INTO database_version VALUES (2)") + await conn.commit() + + db_wrapper = DBWrapper(conn, 2) + block_store = await BlockStore.create(db_wrapper) + coin_store = await CoinStore.create(db_wrapper, uint32(0)) + hint_store = await HintStore.create(db_wrapper) + + bc = await Blockchain.create(coin_store, block_store, test_constants, hint_store, Path("."), reserved_cores=0) + await db_wrapper.commit_transaction() + + for block in blocks: + results = PreValidationResult(None, uint64(1), None, False) + result, err, _, _ = await bc.receive_block(block, results) + assert err is None + + +@pytest.mark.asyncio +async def test_db_validate_default_1000_blocks(default_1000_blocks: List[FullBlock]) -> None: + + with TempFile() as db_file: + await make_db(db_file, default_1000_blocks) + + # we expect everything to be valid except this is a test chain, so it + # doesn't have the correct genesis challenge + with pytest.raises(RuntimeError) as execinfo: + validate_v2(db_file, validate_blocks=True) + assert "Blockchain has invalid genesis challenge" in str(execinfo.value) diff --git a/tests/util/temp_file.py b/tests/util/temp_file.py new file mode 100644 index 000000000000..e38906fa7150 --- /dev/null +++ b/tests/util/temp_file.py @@ -0,0 +1,12 @@ +import contextlib +import tempfile +from pathlib import Path +from typing import Iterator + + +@contextlib.contextmanager +def TempFile() -> Iterator[Path]: + path = Path(tempfile.NamedTemporaryFile().name) + yield path + if path.exists(): + path.unlink()