Skip to content

Commit

Permalink
nydus-image:Optimize Chunkdict Save
Browse files Browse the repository at this point in the history
Refactor the Deduplicate implementation to only
initialize config when inserting chunk data.
Simplify code for better maintainability.

Signed-off-by: Lin Wang <[email protected]>
  • Loading branch information
cslinwang authored and imeoer committed Sep 22, 2023
1 parent d2fcfcd commit 278915b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 31 deletions.
25 changes: 16 additions & 9 deletions src/bin/nydus-image/deduplicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,28 +128,31 @@ impl Database for SqliteDatabase {
}

pub struct Deduplicate<D: Database + Send + Sync> {
sb: RafsSuper,
db: D,
}

const IN_MEMORY_DB_URL: &str = ":memory:";

impl Deduplicate<SqliteDatabase> {
pub fn new(bootstrap_path: &Path, config: Arc<ConfigV2>, db_url: &str) -> anyhow::Result<Self> {
let (sb, _) = RafsSuper::load_from_file(bootstrap_path, config, false)?;
pub fn new(db_url: &str) -> anyhow::Result<Self> {
let db = if db_url == IN_MEMORY_DB_URL {
SqliteDatabase::new_in_memory()?
} else {
SqliteDatabase::new(db_url)?
};
Ok(Self { sb, db })
Ok(Self { db })
}

pub fn save_metadata(&mut self) -> anyhow::Result<Vec<Arc<BlobInfo>>> {
pub fn save_metadata(
&mut self,
bootstrap_path: &Path,
config: Arc<ConfigV2>,
) -> anyhow::Result<Vec<Arc<BlobInfo>>> {
let (sb, _) = RafsSuper::load_from_file(bootstrap_path, config, false)?;
self.create_tables()?;
let blob_infos = self.sb.superblock.get_blob_infos();
let blob_infos = sb.superblock.get_blob_infos();
self.insert_blobs(&blob_infos)?;
self.insert_chunks(&blob_infos)?;
self.insert_chunks(&blob_infos, &sb)?;
Ok(blob_infos)
}

Expand All @@ -176,7 +179,11 @@ impl Deduplicate<SqliteDatabase> {
Ok(())
}

fn insert_chunks(&mut self, blob_infos: &[Arc<BlobInfo>]) -> anyhow::Result<()> {
fn insert_chunks(
&mut self,
blob_infos: &[Arc<BlobInfo>],
sb: &RafsSuper,
) -> anyhow::Result<()> {
let process_chunk = &mut |t: &Tree| -> Result<()> {
let node = t.lock_node();
for chunk in &node.chunks {
Expand All @@ -195,7 +202,7 @@ impl Deduplicate<SqliteDatabase> {
}
Ok(())
};
let tree = Tree::from_bootstrap(&self.sb, &mut ())
let tree = Tree::from_bootstrap(sb, &mut ())
.context("Failed to load bootstrap for deduplication.")?;
tree.walk_dfs_pre(process_chunk)?;
Ok(())
Expand Down
28 changes: 6 additions & 22 deletions src/bin/nydus-image/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use nydus_builder::{
use nydus_rafs::metadata::{RafsSuper, RafsSuperConfig, RafsVersion};
use nydus_storage::backend::localfs::LocalFs;
use nydus_storage::backend::BlobBackend;
use nydus_storage::device::{BlobFeatures, BlobInfo};
use nydus_storage::device::BlobFeatures;
use nydus_storage::factory::BlobFactory;
use nydus_storage::meta::{format_blob_features, BatchContextGenerator};
use nydus_storage::{RAFS_DEFAULT_CHUNK_SIZE, RAFS_MAX_CHUNK_SIZE};
Expand Down Expand Up @@ -369,7 +369,6 @@ fn prepare_cmd_args(bti_string: &'static str) -> App {
.short('B')
.long("bootstrap")
.help("File path of RAFS meta blob/bootstrap")
.conflicts_with("BOOTSTRAP")
.required(false),
)
.arg(
Expand Down Expand Up @@ -399,7 +398,7 @@ fn prepare_cmd_args(bti_string: &'static str) -> App {
)
.arg(arg_output_json.clone())
)
);
);

let app = app.subcommand(
App::new("merge")
Expand Down Expand Up @@ -1151,32 +1150,17 @@ impl Command {
bail!("Invalid database URL: {}", db_url);
}

let blobs: Vec<Arc<BlobInfo>> = match db_strs[0] {
match db_strs[0] {
"sqlite" => {
let mut deduplicate: Deduplicate<SqliteDatabase> =
Deduplicate::<SqliteDatabase>::new(bootstrap_path, config, db_strs[1])?;
deduplicate.save_metadata()?
Deduplicate::<SqliteDatabase>::new(db_strs[1])?;
deduplicate.save_metadata(bootstrap_path, config)?
}
_ => {
bail!("Unsupported database type: {}, please use a valid database URI, such as 'sqlite:///path/to/database.db'.", db_strs[0])
}
};
info!("RAFS filesystem metadata is saved:");

let mut blob_ids = Vec::new();
for (idx, blob) in blobs.iter().enumerate() {
info!(
"\t {}: {}, compressed data size 0x{:x}, compressed file size 0x{:x}, uncompressed file size 0x{:x}, chunks: 0x{:x}, features: {}.",
idx,
blob.blob_id(),
blob.compressed_data_size(),
blob.compressed_size(),
blob.uncompressed_size(),
blob.chunk_count(),
format_blob_features(blob.features()),
);
blob_ids.push(blob.blob_id().to_string());
}
info!("Chunkdict metadata is saved at: {:?}", db_url);
Ok(())
}

Expand Down

0 comments on commit 278915b

Please sign in to comment.