Skip to content

Commit

Permalink
nydusd: solving the mismatch between nydus chunk amplification and dy…
Browse files Browse the repository at this point in the history
…namic dedup.

The mismatch between dynamic dedup and nydus' chunk amplification can
result in a larger cache size after dedup than without dedup. Because
chunk amplification can cause reused chunks to be pulled multiple
times, resulting in a larger cache size after dedup is enabled than when
dedup is not enabled.

To address this issue, a dedup_bitmap was introduced. When initializing
rafs, dedup_bitmap is generated based on the chunk information in blob.
The determination of whether a chunk in a blob is ready requires both
the chunk map and deduplication bitmap to make a joint decision.

Signed-off-by: xwb1136021767 <[email protected]>
  • Loading branch information
xwb1136021767 committed Aug 9, 2023
1 parent f79015d commit 4be06b9
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 25 deletions.
2 changes: 1 addition & 1 deletion api/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2346,7 +2346,7 @@ mod tests {
"#;
let config = ConfigV2::from_str(content).unwrap();
assert_eq!(&config.id, "");
assert!(config.dedup.as_ref().unwrap().enable, "{}", false);
assert!(!config.dedup.as_ref().unwrap().enable);
assert_eq!(
&config.dedup.unwrap().work_dir,
"/home/t4/containerd/io.containerd.snapshotter.v1.nydus"
Expand Down
37 changes: 13 additions & 24 deletions storage/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1181,32 +1181,22 @@ impl BlobDevice {
/// Create new blob device instance.
pub fn new(config: &Arc<ConfigV2>, blob_infos: &[Arc<BlobInfo>]) -> io::Result<BlobDevice> {
let mut blobs = Vec::with_capacity(blob_infos.len());
let dedup_config = config.get_dedup_config()?;
let mut is_dedup = dedup_config.get_enable();
let cas_mgr = match is_dedup {
true => {
let db_path = dedup_config.get_work_dir()?;
match CasMgr::new(db_path) {
Ok(cas_mgr) => Some(cas_mgr),
Err(_) => {
is_dedup = false;
None
}
}
}
false => None,
};
let dedup_config = config.get_dedup_config().ok();
let cas_mgr = dedup_config
.as_ref()
.filter(|config| config.get_enable() && config.get_work_dir().is_ok())
.and_then(|config| CasMgr::new(config.get_work_dir().unwrap()).ok());

for blob_info in blob_infos.iter() {
let blob = match is_dedup {
true => {
let new_blob_config = Self::get_new_blob_config(&cas_mgr, blob_info, config)?;
BLOB_FACTORY.new_blob_cache(&new_blob_config, blob_info)?
}
false => BLOB_FACTORY.new_blob_cache(config, blob_info)?,
let blob = if let Some(cas_mgr) = &cas_mgr {
Self::get_new_blob_config(cas_mgr, blob_info, config).and_then(|new_blob_config| {
BLOB_FACTORY.new_blob_cache(&new_blob_config, blob_info)
})
} else {
BLOB_FACTORY.new_blob_cache(config, blob_info)
};

blobs.push(blob);
blobs.push(blob.unwrap());
}

Ok(BlobDevice {
Expand All @@ -1216,11 +1206,10 @@ impl BlobDevice {
}

fn get_new_blob_config(
cas_mgr: &Option<CasMgr>,
cas_mgr: &CasMgr,
blob_info: &Arc<BlobInfo>,
config: &Arc<ConfigV2>,
) -> io::Result<Arc<ConfigV2>> {
let cas_mgr = cas_mgr.as_ref().unwrap();
let blob_id = blob_info.blob_id();
let blob_backend = Self::get_blob_backend_config(cas_mgr, &blob_id)?;
let mut blob_config = config.deref().clone();
Expand Down

0 comments on commit 4be06b9

Please sign in to comment.