From e97d3590c7e8b76840401c8e523ace813bf96835 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang <93241502+yhchiang-sol@users.noreply.github.com> Date: Wed, 20 Mar 2024 10:39:25 -0700 Subject: [PATCH] [TieredStorage] Refactor TieredStorage::new_readonly() code path (#195) #### Problem The TieredStorage::new_readonly() function currently has the following problems: * It opens the file without checking the magic number before checking and loading the footer. * It opens the file twice: first to load the footer, then open again by the reader. #### Summary of Changes This PR refactors TieredStorage::new_readonly() so that it first performs all checks inside the constructor of TieredReadableFile. The TieredReadableFile instance is then passed to the proper reader (currently HotStorageReader) when all checks are passed. #### Test Plan * Added a new test to check MagicNumberMismatch. * Existing tiered-storage tests --- accounts-db/src/tiered_storage.rs | 3 +- accounts-db/src/tiered_storage/file.rs | 86 +++++++++++++++++++--- accounts-db/src/tiered_storage/footer.rs | 24 +----- accounts-db/src/tiered_storage/hot.rs | 35 +++++---- accounts-db/src/tiered_storage/readable.rs | 6 +- 5 files changed, 105 insertions(+), 49 deletions(-) diff --git a/accounts-db/src/tiered_storage.rs b/accounts-db/src/tiered_storage.rs index cc2776ed178cf6..70169a59428fe6 100644 --- a/accounts-db/src/tiered_storage.rs +++ b/accounts-db/src/tiered_storage.rs @@ -170,7 +170,8 @@ mod tests { use { super::*, crate::account_storage::meta::StoredMetaWriteVersion, - footer::{TieredStorageFooter, TieredStorageMagicNumber}, + file::TieredStorageMagicNumber, + footer::TieredStorageFooter, hot::HOT_FORMAT, index::IndexOffset, solana_sdk::{ diff --git a/accounts-db/src/tiered_storage/file.rs b/accounts-db/src/tiered_storage/file.rs index 605e55a0b193a1..e6ea4a7c65d15d 100644 --- a/accounts-db/src/tiered_storage/file.rs +++ b/accounts-db/src/tiered_storage/file.rs @@ -1,5 +1,6 @@ use { - bytemuck::{AnyBitPattern, NoUninit}, + super::{error::TieredStorageError, TieredStorageResult}, + bytemuck::{AnyBitPattern, NoUninit, Pod, Zeroable}, std::{ fs::{File, OpenOptions}, io::{BufWriter, Read, Result as IoResult, Seek, SeekFrom, Write}, @@ -8,23 +9,37 @@ use { }, }; +/// The ending 8 bytes of a valid tiered account storage file. +pub const FILE_MAGIC_NUMBER: u64 = u64::from_le_bytes(*b"AnzaTech"); + +#[derive(Debug, PartialEq, Eq, Clone, Copy, Pod, Zeroable)] +#[repr(C)] +pub struct TieredStorageMagicNumber(pub u64); + +// Ensure there are no implicit padding bytes +const _: () = assert!(std::mem::size_of::() == 8); + +impl Default for TieredStorageMagicNumber { + fn default() -> Self { + Self(FILE_MAGIC_NUMBER) + } +} + #[derive(Debug)] pub struct TieredReadableFile(pub File); impl TieredReadableFile { - pub fn new(file_path: impl AsRef) -> Self { - Self( + pub fn new(file_path: impl AsRef) -> TieredStorageResult { + let file = Self( OpenOptions::new() .read(true) .create(false) - .open(&file_path) - .unwrap_or_else(|err| { - panic!( - "[TieredStorageError] Unable to open {} as read-only: {err}", - file_path.as_ref().display(), - ); - }), - ) + .open(&file_path)?, + ); + + file.check_magic_number()?; + + Ok(file) } pub fn new_writable(file_path: impl AsRef) -> IoResult { @@ -36,6 +51,19 @@ impl TieredReadableFile { )) } + fn check_magic_number(&self) -> TieredStorageResult<()> { + self.seek_from_end(-(std::mem::size_of::() as i64))?; + let mut magic_number = TieredStorageMagicNumber::zeroed(); + self.read_pod(&mut magic_number)?; + if magic_number != TieredStorageMagicNumber::default() { + return Err(TieredStorageError::MagicNumberMismatch( + TieredStorageMagicNumber::default().0, + magic_number.0, + )); + } + Ok(()) + } + /// Reads a value of type `T` from the file. /// /// Type T must be plain ol' data. @@ -127,3 +155,39 @@ impl TieredWritableFile { Ok(bytes.len()) } } + +#[cfg(test)] +mod tests { + use { + crate::tiered_storage::{ + error::TieredStorageError, + file::{TieredReadableFile, TieredWritableFile, FILE_MAGIC_NUMBER}, + }, + std::path::Path, + tempfile::TempDir, + }; + + fn generate_test_file_with_number(path: impl AsRef, number: u64) { + let mut file = TieredWritableFile::new(path).unwrap(); + file.write_pod(&number).unwrap(); + } + + #[test] + fn test_new() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("test_new"); + generate_test_file_with_number(&path, FILE_MAGIC_NUMBER); + assert!(TieredReadableFile::new(&path).is_ok()); + } + + #[test] + fn test_magic_number_mismatch() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("test_magic_number_mismatch"); + generate_test_file_with_number(&path, !FILE_MAGIC_NUMBER); + assert!(matches!( + TieredReadableFile::new(&path), + Err(TieredStorageError::MagicNumberMismatch(_, _)) + )); + } +} diff --git a/accounts-db/src/tiered_storage/footer.rs b/accounts-db/src/tiered_storage/footer.rs index fa885f2394ce63..89e671d121cce6 100644 --- a/accounts-db/src/tiered_storage/footer.rs +++ b/accounts-db/src/tiered_storage/footer.rs @@ -1,13 +1,13 @@ use { crate::tiered_storage::{ error::TieredStorageError, - file::{TieredReadableFile, TieredWritableFile}, + file::{TieredReadableFile, TieredStorageMagicNumber, TieredWritableFile}, index::IndexBlockFormat, mmap_utils::{get_pod, get_type}, owners::OwnersBlockFormat, TieredStorageResult, }, - bytemuck::{Pod, Zeroable}, + bytemuck::Zeroable, memmap2::Mmap, num_enum::TryFromPrimitiveError, solana_sdk::{hash::Hash, pubkey::Pubkey}, @@ -26,22 +26,6 @@ static_assertions::const_assert_eq!(mem::size_of::(), 160); /// even when the footer's format changes. pub const FOOTER_TAIL_SIZE: usize = 24; -/// The ending 8 bytes of a valid tiered account storage file. -pub const FOOTER_MAGIC_NUMBER: u64 = 0x502A2AB5; // SOLALABS -> SOLANA LABS - -#[derive(Debug, PartialEq, Eq, Clone, Copy, Pod, Zeroable)] -#[repr(C)] -pub struct TieredStorageMagicNumber(pub u64); - -// Ensure there are no implicit padding bytes -const _: () = assert!(std::mem::size_of::() == 8); - -impl Default for TieredStorageMagicNumber { - fn default() -> Self { - Self(FOOTER_MAGIC_NUMBER) - } -} - #[repr(u16)] #[derive( Clone, @@ -133,7 +117,7 @@ pub struct TieredStorageFooter { /// The size of the footer including the magic number. pub footer_size: u64, // This field is persisted in the storage but not in this struct. - // The number should match FOOTER_MAGIC_NUMBER. + // The number should match FILE_MAGIC_NUMBER. // pub magic_number: u64, } @@ -186,7 +170,7 @@ impl Default for TieredStorageFooter { impl TieredStorageFooter { pub fn new_from_path(path: impl AsRef) -> TieredStorageResult { - let file = TieredReadableFile::new(path); + let file = TieredReadableFile::new(path)?; Self::new_from_footer_block(&file) } diff --git a/accounts-db/src/tiered_storage/hot.rs b/accounts-db/src/tiered_storage/hot.rs index c00dff302c9cea..1a5017535cdded 100644 --- a/accounts-db/src/tiered_storage/hot.rs +++ b/accounts-db/src/tiered_storage/hot.rs @@ -7,7 +7,7 @@ use { accounts_hash::AccountHash, tiered_storage::{ byte_block, - file::TieredWritableFile, + file::{TieredReadableFile, TieredWritableFile}, footer::{AccountBlockFormat, AccountMetaFormat, TieredStorageFooter}, index::{AccountIndexWriterEntry, AccountOffset, IndexBlockFormat, IndexOffset}, meta::{AccountMetaFlags, AccountMetaOptionalFields, TieredAccountMeta}, @@ -24,7 +24,7 @@ use { account::ReadableAccount, pubkey::Pubkey, rent_collector::RENT_EXEMPT_RENT_EPOCH, stake_history::Epoch, }, - std::{borrow::Borrow, fs::OpenOptions, option::Option, path::Path}, + std::{borrow::Borrow, option::Option, path::Path}, }; pub const HOT_FORMAT: TieredStorageFormat = TieredStorageFormat { @@ -346,10 +346,8 @@ pub struct HotStorageReader { } impl HotStorageReader { - /// Constructs a HotStorageReader from the specified path. - pub fn new_from_path(path: impl AsRef) -> TieredStorageResult { - let file = OpenOptions::new().read(true).open(path)?; - let mmap = unsafe { MmapOptions::new().map(&file)? }; + pub fn new(file: TieredReadableFile) -> TieredStorageResult { + let mmap = unsafe { MmapOptions::new().map(&file.0)? }; // Here we are copying the footer, as accessing any data in a // TieredStorage instance requires accessing its Footer. // This can help improve cache locality and reduce the overhead @@ -899,7 +897,8 @@ pub mod tests { // Reopen the same storage, and expect the persisted footer is // the same as what we have written. { - let hot_storage = HotStorageReader::new_from_path(&path).unwrap(); + let file = TieredReadableFile::new(&path).unwrap(); + let hot_storage = HotStorageReader::new(file).unwrap(); assert_eq!(expected_footer, *hot_storage.footer()); } } @@ -945,7 +944,8 @@ pub mod tests { footer.write_footer_block(&mut file).unwrap(); } - let hot_storage = HotStorageReader::new_from_path(&path).unwrap(); + let file = TieredReadableFile::new(&path).unwrap(); + let hot_storage = HotStorageReader::new(file).unwrap(); for (offset, expected_meta) in account_offsets.iter().zip(hot_account_metas.iter()) { let meta = hot_storage.get_account_meta_from_offset(*offset).unwrap(); @@ -975,7 +975,8 @@ pub mod tests { footer.write_footer_block(&mut file).unwrap(); } - let hot_storage = HotStorageReader::new_from_path(&path).unwrap(); + let file = TieredReadableFile::new(&path).unwrap(); + let hot_storage = HotStorageReader::new(file).unwrap(); let offset = HotAccountOffset::new(footer.index_block_offset as usize).unwrap(); // Read from index_block_offset, which offset doesn't belong to // account blocks. Expect assert failure here @@ -1026,7 +1027,8 @@ pub mod tests { footer.write_footer_block(&mut file).unwrap(); } - let hot_storage = HotStorageReader::new_from_path(&path).unwrap(); + let file = TieredReadableFile::new(&path).unwrap(); + let hot_storage = HotStorageReader::new(file).unwrap(); for (i, index_writer_entry) in index_writer_entries.iter().enumerate() { let account_offset = hot_storage .get_account_offset(IndexOffset(i as u32)) @@ -1075,7 +1077,8 @@ pub mod tests { footer.write_footer_block(&mut file).unwrap(); } - let hot_storage = HotStorageReader::new_from_path(&path).unwrap(); + let file = TieredReadableFile::new(&path).unwrap(); + let hot_storage = HotStorageReader::new(file).unwrap(); for (i, address) in addresses.iter().enumerate() { assert_eq!( hot_storage @@ -1149,7 +1152,8 @@ pub mod tests { footer.write_footer_block(&mut file).unwrap(); } - let hot_storage = HotStorageReader::new_from_path(&path).unwrap(); + let file = TieredReadableFile::new(&path).unwrap(); + let hot_storage = HotStorageReader::new(file).unwrap(); // First, verify whether we can find the expected owners. let mut owner_candidates = owner_addresses.clone(); @@ -1281,7 +1285,8 @@ pub mod tests { footer.write_footer_block(&mut file).unwrap(); } - let hot_storage = HotStorageReader::new_from_path(&path).unwrap(); + let file = TieredReadableFile::new(&path).unwrap(); + let hot_storage = HotStorageReader::new(file).unwrap(); for i in 0..NUM_ACCOUNTS { let (stored_meta, next) = hot_storage @@ -1362,10 +1367,10 @@ pub mod tests { writer.write_accounts(&storable_accounts, 0).unwrap() }; - let hot_storage = HotStorageReader::new_from_path(&path).unwrap(); + let file = TieredReadableFile::new(&path).unwrap(); + let hot_storage = HotStorageReader::new(file).unwrap(); let num_accounts = account_data_sizes.len(); - for i in 0..num_accounts { let (stored_meta, next) = hot_storage .get_account(IndexOffset(i as u32)) diff --git a/accounts-db/src/tiered_storage/readable.rs b/accounts-db/src/tiered_storage/readable.rs index e3d169d4f6d99e..15d678ffc856fc 100644 --- a/accounts-db/src/tiered_storage/readable.rs +++ b/accounts-db/src/tiered_storage/readable.rs @@ -3,6 +3,7 @@ use { account_storage::meta::StoredAccountMeta, accounts_file::MatchAccountOwnerError, tiered_storage::{ + file::TieredReadableFile, footer::{AccountMetaFormat, TieredStorageFooter}, hot::HotStorageReader, index::IndexOffset, @@ -22,9 +23,10 @@ pub enum TieredStorageReader { impl TieredStorageReader { /// Creates a reader for the specified tiered storage accounts file. pub fn new_from_path(path: impl AsRef) -> TieredStorageResult { - let footer = TieredStorageFooter::new_from_path(&path)?; + let file = TieredReadableFile::new(&path)?; + let footer = TieredStorageFooter::new_from_footer_block(&file)?; match footer.account_meta_format { - AccountMetaFormat::Hot => Ok(Self::Hot(HotStorageReader::new_from_path(path)?)), + AccountMetaFormat::Hot => Ok(Self::Hot(HotStorageReader::new(file)?)), } }