From 874cc23c719e93572316be5726189d5fcb694288 Mon Sep 17 00:00:00 2001
From: Arvid Norberg <arvid@libtorrent.org>
Date: Fri, 25 Feb 2022 17:00:16 +0100
Subject: [PATCH] add db validate function to check consistency of blockchain
 database (#10398)

---
 chia/cmds/db.py                  |  26 ++++-
 chia/cmds/db_validate_func.py    | 187 +++++++++++++++++++++++++++++++
 chia/util/db_wrapper.py          |   2 +-
 tests/core/test_db_conversion.py |  15 +--
 tests/core/test_db_validation.py | 176 +++++++++++++++++++++++++++++
 tests/util/temp_file.py          |  12 ++
 6 files changed, 398 insertions(+), 20 deletions(-)
 create mode 100644 chia/cmds/db_validate_func.py
 create mode 100644 tests/core/test_db_validation.py
 create mode 100644 tests/util/temp_file.py

diff --git a/chia/cmds/db.py b/chia/cmds/db.py
index 87e252b6959b..671254e68f57 100644
--- a/chia/cmds/db.py
+++ b/chia/cmds/db.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 import click
 from chia.cmds.db_upgrade_func import db_upgrade_func
+from chia.cmds.db_validate_func import db_validate_func
 
 
 @click.group("db", short_help="Manage the blockchain database")
@@ -8,7 +9,7 @@ def db_cmd() -> None:
     pass
 
 
-@db_cmd.command("upgrade", short_help="EXPERIMENTAL: upgrade a v1 database to v2")
+@db_cmd.command("upgrade", short_help="upgrade a v1 database to v2")
 @click.option("--input", default=None, type=click.Path(), help="specify input database file")
 @click.option("--output", default=None, type=click.Path(), help="specify output database file")
 @click.option(
@@ -34,7 +35,22 @@ def db_upgrade_cmd(ctx: click.Context, no_update_config: bool, **kwargs) -> None
         print(f"FAILED: {e}")
 
 
-if __name__ == "__main__":
-    from chia.util.default_root import DEFAULT_ROOT_PATH
-
-    db_upgrade_func(DEFAULT_ROOT_PATH)
+@db_cmd.command("validate", short_help="validate the (v2) blockchain database. Does not verify proofs")
+@click.option("--db", default=None, type=click.Path(), help="Specifies which database file to validate")
+@click.option(
+    "--validate-blocks",
+    default=False,
+    is_flag=True,
+    help="validate consistency of properties of the encoded blocks and block records",
+)
+@click.pass_context
+def db_validate_cmd(ctx: click.Context, validate_blocks: bool, **kwargs) -> None:
+    try:
+        in_db_path = kwargs.get("input")
+        db_validate_func(
+            Path(ctx.obj["root_path"]),
+            None if in_db_path is None else Path(in_db_path),
+            validate_blocks=validate_blocks,
+        )
+    except RuntimeError as e:
+        print(f"FAILED: {e}")
diff --git a/chia/cmds/db_validate_func.py b/chia/cmds/db_validate_func.py
new file mode 100644
index 000000000000..d5d3bdb3675b
--- /dev/null
+++ b/chia/cmds/db_validate_func.py
@@ -0,0 +1,187 @@
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+from chia.consensus.block_record import BlockRecord
+from chia.consensus.default_constants import DEFAULT_CONSTANTS
+from chia.types.blockchain_format.sized_bytes import bytes32
+from chia.types.full_block import FullBlock
+from chia.util.config import load_config
+from chia.util.path import path_from_root
+
+
+def db_validate_func(
+    root_path: Path,
+    in_db_path: Optional[Path] = None,
+    *,
+    validate_blocks: bool,
+) -> None:
+    if in_db_path is None:
+        config: Dict[str, Any] = load_config(root_path, "config.yaml")["full_node"]
+        selected_network: str = config["selected_network"]
+        db_pattern: str = config["database_path"]
+        db_path_replaced: str = db_pattern.replace("CHALLENGE", selected_network)
+        in_db_path = path_from_root(root_path, db_path_replaced)
+
+    validate_v2(in_db_path, validate_blocks=validate_blocks)
+
+    print(f"\n\nDATABASE IS VALID: {in_db_path}\n")
+
+
+def validate_v2(in_path: Path, *, validate_blocks: bool) -> None:
+    import sqlite3
+    from contextlib import closing
+
+    import zstd
+
+    if not in_path.exists():
+        print(f"input file doesn't exist. {in_path}")
+        raise RuntimeError(f"can't find {in_path}")
+
+    print(f"opening file for reading: {in_path}")
+    with closing(sqlite3.connect(in_path)) as in_db:
+
+        # read the database version
+        try:
+            with closing(in_db.execute("SELECT * FROM database_version")) as cursor:
+                row = cursor.fetchone()
+                if row is None or row == []:
+                    raise RuntimeError("Database is missing version field")
+                if row[0] != 2:
+                    raise RuntimeError(f"Database has the wrong version ({row[0]} expected 2)")
+        except sqlite3.OperationalError:
+            raise RuntimeError("Database is missing version table")
+
+        try:
+            with closing(in_db.execute("SELECT hash FROM current_peak WHERE key = 0")) as cursor:
+                row = cursor.fetchone()
+                if row is None or row == []:
+                    raise RuntimeError("Database is missing current_peak field")
+                peak = bytes32(row[0])
+        except sqlite3.OperationalError:
+            raise RuntimeError("Database is missing current_peak table")
+
+        print(f"peak hash: {peak}")
+
+        with closing(in_db.execute("SELECT height FROM full_blocks WHERE header_hash = ?", (peak,))) as cursor:
+            peak_row = cursor.fetchone()
+            if peak_row is None or peak_row == []:
+                raise RuntimeError("Database is missing the peak block")
+            peak_height = peak_row[0]
+
+        print(f"peak height: {peak_height}")
+
+        print("traversing the full chain")
+
+        current_height = peak_height
+        # we're looking for a block with this hash
+        expect_hash = peak
+        # once we find it, we know what the next block to look for is, which
+        # this is set to
+        next_hash = None
+
+        num_orphans = 0
+        height_to_hash = bytearray(peak_height * 32)
+
+        with closing(
+            in_db.execute(
+                f"SELECT header_hash, prev_hash, height, in_main_chain"
+                f"{', block, block_record' if validate_blocks else ''} "
+                "FROM full_blocks ORDER BY height DESC"
+            )
+        ) as cursor:
+
+            for row in cursor:
+
+                hh = row[0]
+                prev = row[1]
+                height = row[2]
+                in_main_chain = row[3]
+
+                # if there are blocks being added to the database, just ignore
+                # the ones added since we picked the peak
+                if height > peak_height:
+                    continue
+
+                if validate_blocks:
+                    block = FullBlock.from_bytes(zstd.decompress(row[4]))
+                    block_record = BlockRecord.from_bytes(row[5])
+                    actual_header_hash = block.header_hash
+                    actual_prev_hash = block.prev_header_hash
+                    if actual_header_hash != hh:
+                        raise RuntimeError(
+                            f"Block {hh.hex()} has a blob with mismatching " f"hash: {actual_header_hash.hex()}"
+                        )
+                    if block_record.header_hash != hh:
+                        raise RuntimeError(
+                            f"Block {hh.hex()} has a block record with mismatching "
+                            f"hash: {block_record.header_hash.hex()}"
+                        )
+                    if block_record.total_iters != block.total_iters:
+                        raise RuntimeError(
+                            f"Block {hh.hex()} has a block record with mismatching total "
+                            f"iters: {block_record.total_iters} expected {block.total_iters}"
+                        )
+                    if block_record.prev_hash != actual_prev_hash:
+                        raise RuntimeError(
+                            f"Block {hh.hex()} has a block record with mismatching "
+                            f"prev_hash: {block_record.prev_hash} expected {actual_prev_hash.hex()}"
+                        )
+                    if block.height != height:
+                        raise RuntimeError(
+                            f"Block {hh.hex()} has a mismatching " f"height: {block.height} expected {height}"
+                        )
+
+                if height != current_height:
+                    # we're moving to the next level. Make sure we found the block
+                    # we were looking for at the previous level
+                    if next_hash is None:
+                        raise RuntimeError(
+                            f"Database is missing the block with hash {expect_hash} at height {current_height}"
+                        )
+                    expect_hash = next_hash
+                    next_hash = None
+                    current_height = height
+
+                if hh == expect_hash:
+                    if next_hash is not None:
+                        raise RuntimeError(f"Database has multiple blocks with hash {hh.hex()}, " f"at height {height}")
+                    if not in_main_chain:
+                        raise RuntimeError(
+                            f"block {hh.hex()} (height: {height}) is part of the main chain, "
+                            f"but in_main_chain is not set"
+                        )
+
+                    if validate_blocks:
+                        if actual_prev_hash != prev:
+                            raise RuntimeError(
+                                f"Block {hh.hex()} has a blob with mismatching "
+                                f"prev-hash: {actual_prev_hash}, expected {prev}"
+                            )
+
+                    next_hash = prev
+
+                    height_to_hash[height * 32 : height * 32 + 32] = hh
+
+                    print(f"\r{height} orphaned blocks: {num_orphans} ", end="")
+
+                else:
+                    if in_main_chain:
+                        raise RuntimeError(
+                            f"block {hh.hex()} (height: {height}) is orphaned, " "but in_main_chain is set"
+                        )
+                    num_orphans += 1
+        print("")
+
+        if current_height != 0:
+            raise RuntimeError(f"Database is missing blocks below height {current_height}")
+
+        # make sure the prev_hash pointer of block height 0 is the genesis
+        # challenge
+        if next_hash != DEFAULT_CONSTANTS.AGG_SIG_ME_ADDITIONAL_DATA:
+            raise RuntimeError(
+                f"Blockchain has invalid genesis challenge {next_hash}, expected "
+                f"{DEFAULT_CONSTANTS.AGG_SIG_ME_ADDITIONAL_DATA.hex()}"
+            )
+
+        if num_orphans > 0:
+            print(f"{num_orphans} orphaned blocks")
diff --git a/chia/util/db_wrapper.py b/chia/util/db_wrapper.py
index 2d90b34a05bb..53af97f96022 100644
--- a/chia/util/db_wrapper.py
+++ b/chia/util/db_wrapper.py
@@ -27,5 +27,5 @@ async def rollback_transaction(self):
             cursor = await self.db.execute("ROLLBACK")
             await cursor.close()
 
-    async def commit_transaction(self):
+    async def commit_transaction(self) -> None:
         await self.db.commit()
diff --git a/tests/core/test_db_conversion.py b/tests/core/test_db_conversion.py
index a60bdab41cba..e6dd6e2d94cd 100644
--- a/tests/core/test_db_conversion.py
+++ b/tests/core/test_db_conversion.py
@@ -1,13 +1,13 @@
 import pytest
 import pytest_asyncio
 import aiosqlite
-import tempfile
 import random
 import asyncio
 from pathlib import Path
 from typing import List, Tuple
 
 from tests.setup_nodes import test_constants
+from tests.util.temp_file import TempFile
 
 from chia.types.blockchain_format.sized_bytes import bytes32
 from chia.util.ints import uint32, uint64
@@ -20,19 +20,6 @@
 from chia.consensus.multiprocess_validation import PreValidationResult
 
 
-class TempFile:
-    def __init__(self):
-        self.path = Path(tempfile.NamedTemporaryFile().name)
-
-    def __enter__(self) -> Path:
-        if self.path.exists():
-            self.path.unlink()
-        return self.path
-
-    def __exit__(self, exc_t, exc_v, exc_tb):
-        self.path.unlink()
-
-
 def rand_bytes(num) -> bytes:
     ret = bytearray(num)
     for i in range(num):
diff --git a/tests/core/test_db_validation.py b/tests/core/test_db_validation.py
new file mode 100644
index 000000000000..64d5e1e79e6d
--- /dev/null
+++ b/tests/core/test_db_validation.py
@@ -0,0 +1,176 @@
+import asyncio
+import random
+import sqlite3
+from asyncio.events import AbstractEventLoop
+from contextlib import closing
+from pathlib import Path
+from typing import Iterator, List
+
+import aiosqlite
+import pytest
+import pytest_asyncio
+
+from chia.cmds.db_validate_func import validate_v2
+from chia.consensus.blockchain import Blockchain
+from chia.consensus.default_constants import DEFAULT_CONSTANTS
+from chia.consensus.multiprocess_validation import PreValidationResult
+from chia.full_node.block_store import BlockStore
+from chia.full_node.coin_store import CoinStore
+from chia.full_node.hint_store import HintStore
+from chia.types.blockchain_format.sized_bytes import bytes32
+from chia.types.full_block import FullBlock
+from chia.util.db_wrapper import DBWrapper
+from chia.util.ints import uint32, uint64
+from tests.setup_nodes import test_constants
+from tests.util.temp_file import TempFile
+
+
+@pytest_asyncio.fixture(scope="session")
+def event_loop() -> Iterator[AbstractEventLoop]:
+    loop = asyncio.get_event_loop()
+    yield loop
+
+
+def rand_hash() -> bytes32:
+    ret = bytearray(32)
+    for i in range(32):
+        ret[i] = random.getrandbits(8)
+    return bytes32(ret)
+
+
+def make_version(conn: sqlite3.Connection, version: int) -> None:
+    conn.execute("CREATE TABLE database_version(version int)")
+    conn.execute("INSERT INTO database_version VALUES (?)", (version,))
+    conn.commit()
+
+
+def make_peak(conn: sqlite3.Connection, peak_hash: bytes32) -> None:
+    conn.execute("CREATE TABLE IF NOT EXISTS current_peak(key int PRIMARY KEY, hash blob)")
+    conn.execute("INSERT OR REPLACE INTO current_peak VALUES(?, ?)", (0, peak_hash))
+    conn.commit()
+
+
+def make_block_table(conn: sqlite3.Connection) -> None:
+    conn.execute(
+        "CREATE TABLE IF NOT EXISTS full_blocks("
+        "header_hash blob PRIMARY KEY,"
+        "prev_hash blob,"
+        "height bigint,"
+        "sub_epoch_summary blob,"
+        "is_fully_compactified tinyint,"
+        "in_main_chain tinyint,"
+        "block blob,"
+        "block_record blob)"
+    )
+
+
+def add_block(
+    conn: sqlite3.Connection, header_hash: bytes32, prev_hash: bytes32, height: int, in_main_chain: bool
+) -> None:
+    conn.execute(
+        "INSERT INTO full_blocks VALUES(?, ?, ?, NULL, 0, ?, NULL, NULL)",
+        (
+            header_hash,
+            prev_hash,
+            height,
+            in_main_chain,
+        ),
+    )
+
+
+def test_db_validate_wrong_version() -> None:
+    with TempFile() as db_file:
+        with closing(sqlite3.connect(db_file)) as conn:
+            make_version(conn, 3)
+
+        with pytest.raises(RuntimeError) as execinfo:
+            validate_v2(db_file, validate_blocks=False)
+        assert "Database has the wrong version (3 expected 2)" in str(execinfo.value)
+
+
+def test_db_validate_missing_peak_table() -> None:
+    with TempFile() as db_file:
+        with closing(sqlite3.connect(db_file)) as conn:
+            make_version(conn, 2)
+
+        with pytest.raises(RuntimeError) as execinfo:
+            validate_v2(db_file, validate_blocks=False)
+        assert "Database is missing current_peak table" in str(execinfo.value)
+
+
+def test_db_validate_missing_peak_block() -> None:
+    with TempFile() as db_file:
+        with closing(sqlite3.connect(db_file)) as conn:
+            make_version(conn, 2)
+            make_peak(conn, bytes32.fromhex("fafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafa"))
+
+            make_block_table(conn)
+
+        with pytest.raises(RuntimeError) as execinfo:
+            validate_v2(db_file, validate_blocks=False)
+        assert "Database is missing the peak block" in str(execinfo.value)
+
+
+@pytest.mark.parametrize("invalid_in_chain", [True, False])
+def test_db_validate_in_main_chain(invalid_in_chain: bool) -> None:
+    with TempFile() as db_file:
+        with closing(sqlite3.connect(db_file)) as conn:
+            make_version(conn, 2)
+            make_block_table(conn)
+
+            prev = bytes32(DEFAULT_CONSTANTS.AGG_SIG_ME_ADDITIONAL_DATA)
+            for height in range(0, 100):
+                header_hash = rand_hash()
+                add_block(conn, header_hash, prev, height, True)
+                if height % 4 == 0:
+                    # insert an orphaned block
+                    add_block(conn, rand_hash(), prev, height, invalid_in_chain)
+                prev = header_hash
+
+            make_peak(conn, header_hash)
+
+        if invalid_in_chain:
+            with pytest.raises(RuntimeError) as execinfo:
+                validate_v2(db_file, validate_blocks=False)
+            assert " (height: 96) is orphaned, but in_main_chain is set" in str(execinfo.value)
+        else:
+            validate_v2(db_file, validate_blocks=False)
+
+
+async def make_db(db_file: Path, blocks: List[FullBlock]) -> None:
+    async with aiosqlite.connect(db_file) as conn:
+
+        await conn.execute("pragma journal_mode=OFF")
+        await conn.execute("pragma synchronous=OFF")
+        await conn.execute("pragma locking_mode=exclusive")
+
+        # this is done by chia init normally
+        await conn.execute("CREATE TABLE database_version(version int)")
+        await conn.execute("INSERT INTO database_version VALUES (2)")
+        await conn.commit()
+
+        db_wrapper = DBWrapper(conn, 2)
+        block_store = await BlockStore.create(db_wrapper)
+        coin_store = await CoinStore.create(db_wrapper, uint32(0))
+        hint_store = await HintStore.create(db_wrapper)
+
+        bc = await Blockchain.create(coin_store, block_store, test_constants, hint_store, Path("."), reserved_cores=0)
+        await db_wrapper.commit_transaction()
+
+        for block in blocks:
+            results = PreValidationResult(None, uint64(1), None, False)
+            result, err, _, _ = await bc.receive_block(block, results)
+            assert err is None
+
+
+@pytest.mark.asyncio
+async def test_db_validate_default_1000_blocks(default_1000_blocks: List[FullBlock]) -> None:
+
+    with TempFile() as db_file:
+        await make_db(db_file, default_1000_blocks)
+
+        # we expect everything to be valid except this is a test chain, so it
+        # doesn't have the correct genesis challenge
+        with pytest.raises(RuntimeError) as execinfo:
+            validate_v2(db_file, validate_blocks=True)
+        assert "Blockchain has invalid genesis challenge" in str(execinfo.value)
diff --git a/tests/util/temp_file.py b/tests/util/temp_file.py
new file mode 100644
index 000000000000..e38906fa7150
--- /dev/null
+++ b/tests/util/temp_file.py
@@ -0,0 +1,12 @@
+import contextlib
+import tempfile
+from pathlib import Path
+from typing import Iterator
+
+
+@contextlib.contextmanager
+def TempFile() -> Iterator[Path]:
+    path = Path(tempfile.NamedTemporaryFile().name)
+    yield path
+    if path.exists():
+        path.unlink()