diff --git a/Cargo.toml b/Cargo.toml
index fbd64232437..9656f5a92c3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -95,6 +95,7 @@ default = [
"backend-s3",
"backend-http-proxy",
"backend-localdisk",
+ "dedup",
]
virtiofs = [
"nydus-service/virtiofs",
@@ -116,6 +117,8 @@ backend-oss = ["nydus-storage/backend-oss"]
backend-registry = ["nydus-storage/backend-registry"]
backend-s3 = ["nydus-storage/backend-s3"]
+dedup = ["nydus-storage/dedup"]
+
[workspace]
members = [
"api",
diff --git a/docs/data-deduplication.md b/docs/data-deduplication.md
index 45b259ad204..1b6e0305ec3 100644
--- a/docs/data-deduplication.md
+++ b/docs/data-deduplication.md
@@ -164,4 +164,25 @@ So Nydus provides a node level CAS system to reduce data downloaded from the reg
The node level CAS system helps to achieve O4 and O5.
-# Node Level CAS System (WIP)
+# Node Level CAS System (Experimental)
+Data deduplication can also be achieved when accessing Nydus images. The key idea is to maintain information about data chunks available on local host by using a database.
+When a chunk is needed but not available in the uncompressed data blob files yet, we will query the database using chunk digest as key.
+If a record with the same chunk digest already exists, it will be reused.
+We call such a system as CAS (Content Addressable Storage).
+
+## Chunk Deduplication by Using CAS as L2 Cache
+In this chunk deduplication mode, the CAS system works as an L2 cache to provide chunk data on demand, and it keeps Nydus bootstrap blobs as is.
+It works in this way:
+1. query the database when a chunk is needed but not available yet
+2. copy data from source blob to target blob using `copy_file_range` if a record with the same chunk digest
+3. download chunk data from remote if there's no record in database
+4. insert a new record into the database for just downloaded chunk so it can be reused later.
+
+![chunk_dedup_l2cache](images/chunk_dedup_l2_cache.png)
+
+A data download operation can be avoided if a chunk already exists in the database.
+And if the underlying filesystem support data reference, `copy_file_range` will use reference instead of data copy, thus reduce storage space consumption.
+This design has benefit of robustness, the target blob file doesn't have any dependency on the database and source blob files, so ease garbage collection.
+But it depends on capability of underlying filesystem to reduce storage consumption.
+
+## Chunk Deduplication by Rebuilding Nydus Bootstrap (WIP)
diff --git a/docs/images/chunk_dedup_l2_cache.drawio b/docs/images/chunk_dedup_l2_cache.drawio
new file mode 100644
index 00000000000..c7d1417615b
--- /dev/null
+++ b/docs/images/chunk_dedup_l2_cache.drawio
@@ -0,0 +1,265 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/images/chunk_dedup_l2_cache.png b/docs/images/chunk_dedup_l2_cache.png
new file mode 100644
index 00000000000..e931e3f6927
Binary files /dev/null and b/docs/images/chunk_dedup_l2_cache.png differ
diff --git a/smoke/go.mod b/smoke/go.mod
index dbf95cf434f..d68c7193fba 100644
--- a/smoke/go.mod
+++ b/smoke/go.mod
@@ -12,6 +12,7 @@ require (
github.com/pkg/xattr v0.4.9
github.com/stretchr/testify v1.8.4
golang.org/x/sys v0.15.0
+ github.com/mattn/go-sqlite3 v1.14.23
)
require (
@@ -27,6 +28,7 @@ require (
github.com/google/go-cmp v0.6.0 // indirect
github.com/klauspost/compress v1.17.4 // indirect
github.com/kr/pretty v0.3.1 // indirect
+ github.com/mattn/go-sqlite3 v1.14.23 // indirect
github.com/moby/sys/mountinfo v0.7.1 // indirect
github.com/moby/sys/sequential v0.5.0 // indirect
github.com/opencontainers/image-spec v1.1.0-rc5 // indirect
diff --git a/smoke/go.sum b/smoke/go.sum
index 0fa56cfafd8..67e97a62e87 100644
--- a/smoke/go.sum
+++ b/smoke/go.sum
@@ -10,6 +10,7 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX
github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw=
+github.com/containerd/containerd v1.7.11/go.mod h1:5UluHxHTX2rdvYuZ5OJTC5m/KJNs0Zs9wVoJm9zf5ZE=
github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8=
github.com/containerd/continuity v0.4.3/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ=
github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY=
@@ -53,6 +54,7 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -67,7 +69,10 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mattn/go-sqlite3 v1.14.23 h1:gbShiuAP1W5j9UOksQ06aiiqPMxYecovVGwmTxWtuw0=
+github.com/mattn/go-sqlite3 v1.14.23/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/moby/sys/mountinfo v0.7.1 h1:/tTvQaSJRr2FshkhXiIpux6fQ2Zvc4j7tAhMTStAG2g=
+github.com/moby/sys/mountinfo v0.7.1/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc=
github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@@ -135,6 +140,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
diff --git a/smoke/tests/cas_test.go b/smoke/tests/cas_test.go
new file mode 100644
index 00000000000..6520f6f5bdf
--- /dev/null
+++ b/smoke/tests/cas_test.go
@@ -0,0 +1,140 @@
+// Copyright 2024 Nydus Developers. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package tests
+
+import (
+ "database/sql"
+ "fmt"
+ "os"
+ "path/filepath"
+ "testing"
+ "time"
+
+ _ "github.com/mattn/go-sqlite3"
+
+ "github.com/dragonflyoss/nydus/smoke/tests/texture"
+ "github.com/dragonflyoss/nydus/smoke/tests/tool"
+ "github.com/dragonflyoss/nydus/smoke/tests/tool/test"
+ "github.com/stretchr/testify/require"
+)
+
+type CasTestSuite struct{}
+
+func (c *CasTestSuite) TestCasTables() test.Generator {
+ scenarios := tool.DescartesIterator{}
+ scenarios.Dimension(paramEnablePrefetch, []interface{}{false, true})
+
+ return func() (name string, testCase test.Case) {
+ if !scenarios.HasNext() {
+ return
+ }
+ scenario := scenarios.Next()
+
+ return scenario.Str(), func(t *testing.T) {
+ c.testCasTables(t, scenario.GetBool(paramEnablePrefetch))
+ }
+ }
+}
+
+func (c *CasTestSuite) testCasTables(t *testing.T, enablePrefetch bool) {
+ ctx, layer := texture.PrepareLayerWithContext(t)
+ ctx.Runtime.EnablePrefetch = enablePrefetch
+ ctx.Runtime.ChunkDedupDb = filepath.Join(ctx.Env.WorkDir, "cas.db")
+
+ nydusd, err := tool.NewNydusdWithContext(*ctx)
+ require.NoError(t, err)
+ err = nydusd.Mount()
+ require.NoError(t, err)
+ defer nydusd.Umount()
+ nydusd.Verify(t, layer.FileTree)
+
+ db, err := sql.Open("sqlite3", ctx.Runtime.ChunkDedupDb)
+ require.NoError(t, err)
+ defer db.Close()
+
+ for _, expectedTable := range []string{"Blobs", "Chunks"} {
+ var count int
+ query := fmt.Sprintf("SELECT COUNT(*) FROM %s;", expectedTable)
+ err := db.QueryRow(query).Scan(&count)
+ require.NoError(t, err)
+ if expectedTable == "Blobs" {
+ require.Equal(t, count, 1)
+ } else {
+ require.Equal(t, count, 8)
+ }
+ }
+}
+
+func (c *CasTestSuite) TestCasGc() test.Generator {
+ scenarios := tool.DescartesIterator{}
+ scenarios.Dimension(paramEnablePrefetch, []interface{}{false, true})
+
+ return func() (name string, testCase test.Case) {
+ if !scenarios.HasNext() {
+ return
+ }
+ scenario := scenarios.Next()
+
+ return scenario.Str(), func(t *testing.T) {
+ c.testCasGc(t, scenario.GetBool(paramEnablePrefetch))
+ }
+ }
+}
+
+func (c *CasTestSuite) testCasGc(t *testing.T, enablePrefetch bool) {
+ ctx, layer := texture.PrepareLayerWithContext(t)
+ defer ctx.Destroy(t)
+ config := tool.NydusdConfig{
+ NydusdPath: ctx.Binary.Nydusd,
+ MountPath: ctx.Env.MountDir,
+ APISockPath: filepath.Join(ctx.Env.WorkDir, "nydusd-api.sock"),
+ ConfigPath: filepath.Join(ctx.Env.WorkDir, "nydusd-config.fusedev.json"),
+ ChunkDedupDb: filepath.Join(ctx.Env.WorkDir, "cas.db"),
+ }
+ nydusd, err := tool.NewNydusd(config)
+ require.NoError(t, err)
+
+ err = nydusd.Mount()
+ defer nydusd.Umount()
+ require.NoError(t, err)
+
+ config.BootstrapPath = ctx.Env.BootstrapPath
+ config.MountPath = "/mount"
+ config.BackendType = "localfs"
+ config.BackendConfig = fmt.Sprintf(`{"dir": "%s"}`, ctx.Env.BlobDir)
+ config.BlobCacheDir = ctx.Env.CacheDir
+ config.CacheType = ctx.Runtime.CacheType
+ config.CacheCompressed = ctx.Runtime.CacheCompressed
+ config.RafsMode = ctx.Runtime.RafsMode
+ config.EnablePrefetch = enablePrefetch
+ config.DigestValidate = false
+ config.AmplifyIO = ctx.Runtime.AmplifyIO
+ err = nydusd.MountByAPI(config)
+ require.NoError(t, err)
+
+ nydusd.VerifyByPath(t, layer.FileTree, config.MountPath)
+
+ db, err := sql.Open("sqlite3", config.ChunkDedupDb)
+ require.NoError(t, err)
+ defer db.Close()
+
+ // Mock nydus snapshotter clear cache
+ os.RemoveAll(filepath.Join(ctx.Env.WorkDir, "cache"))
+ time.Sleep(1 * time.Second)
+
+ nydusd.UmountByAPI(config.MountPath)
+
+ for _, expectedTable := range []string{"Blobs", "Chunks"} {
+ var count int
+ query := fmt.Sprintf("SELECT COUNT(*) FROM %s;", expectedTable)
+ err := db.QueryRow(query).Scan(&count)
+ require.NoError(t, err)
+ require.Zero(t, count)
+ }
+}
+
+func TestCas(t *testing.T) {
+ test.Run(t, &CasTestSuite{})
+}
diff --git a/smoke/tests/chunk_dedup_test.go b/smoke/tests/chunk_dedup_test.go
new file mode 100644
index 00000000000..eca4d362e86
--- /dev/null
+++ b/smoke/tests/chunk_dedup_test.go
@@ -0,0 +1,125 @@
+// Copyright 2024 Nydus Developers. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package tests
+
+import (
+ "context"
+ "encoding/json"
+ "io"
+ "net"
+ "net/http"
+ "os"
+ "path/filepath"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/require"
+
+ "github.com/dragonflyoss/nydus/smoke/tests/texture"
+ "github.com/dragonflyoss/nydus/smoke/tests/tool"
+ "github.com/dragonflyoss/nydus/smoke/tests/tool/test"
+)
+
+const (
+ paramIteration = "iteration"
+)
+
+type ChunkDedupTestSuite struct{}
+
+type BackendMetrics struct {
+ ReadCount uint64 `json:"read_count"`
+ ReadAmountTotal uint64 `json:"read_amount_total"`
+ ReadErrors uint64 `json:"read_errors"`
+}
+
+func (c *ChunkDedupTestSuite) TestChunkDedup() test.Generator {
+ scenarios := tool.DescartesIterator{}
+ scenarios.Dimension(paramIteration, []interface{}{1})
+
+ file, _ := os.CreateTemp("", "cas-*.db")
+ defer os.Remove(file.Name())
+
+ return func() (name string, testCase test.Case) {
+ if !scenarios.HasNext() {
+ return
+ }
+ scenario := scenarios.Next()
+
+ return scenario.Str(), func(t *testing.T) {
+ c.testRemoteWithDedup(t, file.Name())
+ }
+ }
+}
+
+func (c *ChunkDedupTestSuite) testRemoteWithDedup(t *testing.T, dbPath string) {
+ ctx, layer := texture.PrepareLayerWithContext(t)
+ defer ctx.Destroy(t)
+ ctx.Runtime.EnablePrefetch = false
+ ctx.Runtime.ChunkDedupDb = dbPath
+
+ nydusd, err := tool.NewNydusdWithContext(*ctx)
+ require.NoError(t, err)
+ err = nydusd.Mount()
+ require.NoError(t, err)
+ defer nydusd.Umount()
+ nydusd.Verify(t, layer.FileTree)
+ metrics := c.getBackendMetrics(t, filepath.Join(ctx.Env.WorkDir, "nydusd-api.sock"))
+ require.Zero(t, metrics.ReadErrors)
+
+ ctx2, layer2 := texture.PrepareLayerWithContext(t)
+ defer ctx2.Destroy(t)
+ ctx2.Runtime.EnablePrefetch = false
+ ctx2.Runtime.ChunkDedupDb = dbPath
+
+ nydusd2, err := tool.NewNydusdWithContext(*ctx2)
+ require.NoError(t, err)
+ err = nydusd2.Mount()
+ require.NoError(t, err)
+ defer nydusd2.Umount()
+ nydusd2.Verify(t, layer2.FileTree)
+ metrics2 := c.getBackendMetrics(t, filepath.Join(ctx2.Env.WorkDir, "nydusd-api.sock"))
+ require.Zero(t, metrics2.ReadErrors)
+
+ require.Greater(t, metrics.ReadCount, metrics2.ReadCount)
+ require.Greater(t, metrics.ReadAmountTotal, metrics2.ReadAmountTotal)
+}
+
+func (c *ChunkDedupTestSuite) getBackendMetrics(t *testing.T, sockPath string) *BackendMetrics {
+ transport := &http.Transport{
+ MaxIdleConns: 10,
+ IdleConnTimeout: 10 * time.Second,
+ ExpectContinueTimeout: 1 * time.Second,
+ DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
+ dialer := &net.Dialer{
+ Timeout: 5 * time.Second,
+ KeepAlive: 5 * time.Second,
+ }
+ return dialer.DialContext(ctx, "unix", sockPath)
+ },
+ }
+
+ client := &http.Client{
+ Timeout: 30 * time.Second,
+ Transport: transport,
+ }
+
+ resp, err := client.Get("http://unix/api/v1/metrics/backend")
+ require.NoError(t, err)
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ require.NoError(t, err)
+
+ var metrics BackendMetrics
+ if err = json.Unmarshal(body, &metrics); err != nil {
+ require.NoError(t, err)
+ }
+
+ return &metrics
+}
+
+func TestChunkDedup(t *testing.T) {
+ test.Run(t, &ChunkDedupTestSuite{})
+}
diff --git a/smoke/tests/texture/layer.go b/smoke/tests/texture/layer.go
index c6a4933b3fe..ab64bff2a85 100644
--- a/smoke/tests/texture/layer.go
+++ b/smoke/tests/texture/layer.go
@@ -10,7 +10,10 @@ import (
"syscall"
"testing"
+ "github.com/containerd/nydus-snapshotter/pkg/converter"
"github.com/dragonflyoss/nydus/smoke/tests/tool"
+ "github.com/opencontainers/go-digest"
+ "github.com/stretchr/testify/require"
)
type LayerMaker func(t *testing.T, layer *tool.Layer)
@@ -135,3 +138,28 @@ func MakeMatrixLayer(t *testing.T, workDir, id string) *tool.Layer {
return layer
}
+
+func PrepareLayerWithContext(t *testing.T) (*tool.Context, *tool.Layer) {
+ ctx := tool.DefaultContext(t)
+
+ // Prepare work directory
+ ctx.PrepareWorkDir(t)
+
+ lowerLayer := MakeLowerLayer(t, filepath.Join(ctx.Env.WorkDir, "source"))
+ lowerOCIBlobDigest, lowerRafsBlobDigest := lowerLayer.PackRef(t, *ctx, ctx.Env.BlobDir, ctx.Build.OCIRefGzip)
+ mergeOption := converter.MergeOption{
+ BuilderPath: ctx.Binary.Builder,
+ ChunkDictPath: "",
+ OCIRef: true,
+ }
+ actualDigests, lowerBootstrap := tool.MergeLayers(t, *ctx, mergeOption, []converter.Layer{
+ {
+ Digest: lowerRafsBlobDigest,
+ OriginalDigest: &lowerOCIBlobDigest,
+ },
+ })
+ require.Equal(t, []digest.Digest{lowerOCIBlobDigest}, actualDigests)
+
+ ctx.Env.BootstrapPath = lowerBootstrap
+ return ctx, lowerLayer
+}
diff --git a/smoke/tests/tool/context.go b/smoke/tests/tool/context.go
index 04cf6d851cf..b1215f59b7d 100644
--- a/smoke/tests/tool/context.go
+++ b/smoke/tests/tool/context.go
@@ -39,6 +39,7 @@ type RuntimeContext struct {
RafsMode string
EnablePrefetch bool
AmplifyIO uint64
+ ChunkDedupDb string
}
type EnvContext struct {
diff --git a/smoke/tests/tool/nydusd.go b/smoke/tests/tool/nydusd.go
index c340cce88b1..344588421c6 100644
--- a/smoke/tests/tool/nydusd.go
+++ b/smoke/tests/tool/nydusd.go
@@ -74,6 +74,7 @@ type NydusdConfig struct {
AccessPattern bool
PrefetchFiles []string
AmplifyIO uint64
+ ChunkDedupDb string
// Hot Upgrade config.
Upgrade bool
SupervisorSockPath string
@@ -193,6 +194,9 @@ func newNydusd(conf NydusdConfig) (*Nydusd, error) {
if len(conf.BootstrapPath) > 0 {
args = append(args, "--bootstrap", conf.BootstrapPath)
}
+ if len(conf.ChunkDedupDb) > 0 {
+ args = append(args, "--dedup-db", conf.ChunkDedupDb)
+ }
if conf.Upgrade {
args = append(args, "--upgrade")
}
@@ -276,6 +280,7 @@ func NewNydusdWithContext(ctx Context) (*Nydusd, error) {
RafsMode: ctx.Runtime.RafsMode,
DigestValidate: false,
AmplifyIO: ctx.Runtime.AmplifyIO,
+ ChunkDedupDb: ctx.Runtime.ChunkDedupDb,
}
if err := makeConfig(NydusdConfigTpl, conf); err != nil {
@@ -346,7 +351,6 @@ func (nydusd *Nydusd) MountByAPI(config NydusdConfig) error {
)
return err
-
}
func (nydusd *Nydusd) Umount() error {
diff --git a/src/bin/nydusd/main.rs b/src/bin/nydusd/main.rs
index fc5e4b7a6b8..ab138442f6b 100644
--- a/src/bin/nydusd/main.rs
+++ b/src/bin/nydusd/main.rs
@@ -26,6 +26,7 @@ use nydus_service::{
create_daemon, create_fuse_daemon, create_vfs_backend, validate_threads_configuration,
Error as NydusError, FsBackendMountCmd, FsBackendType, ServiceArgs,
};
+use nydus_storage::cache::CasMgr;
use crate::api_server_glue::ApiServerController;
@@ -50,7 +51,7 @@ fn thread_validator(v: &str) -> std::result::Result {
}
fn append_fs_options(app: Command) -> Command {
- app.arg(
+ let mut app = app.arg(
Arg::new("bootstrap")
.long("bootstrap")
.short('B')
@@ -87,7 +88,18 @@ fn append_fs_options(app: Command) -> Command {
.help("Mountpoint within the FUSE/virtiofs device to mount the RAFS/passthroughfs filesystem")
.default_value("/")
.required(false),
- )
+ );
+
+ #[cfg(feature = "dedup")]
+ {
+ app = app.arg(
+ Arg::new("dedup-db")
+ .long("dedup-db")
+ .help("Database file for chunk deduplication"),
+ );
+ }
+
+ app
}
fn append_fuse_options(app: Command) -> Command {
@@ -750,6 +762,13 @@ fn main() -> Result<()> {
dump_program_info();
handle_rlimit_nofile_option(&args, "rlimit-nofile")?;
+ #[cfg(feature = "dedup")]
+ if let Some(db) = args.get_one::("dedup-db") {
+ let mgr = CasMgr::new(db).map_err(|e| eother!(format!("{}", e)))?;
+ info!("Enable chunk deduplication by using database at {}", db);
+ CasMgr::set_singleton(mgr);
+ }
+
match args.subcommand_name() {
Some("singleton") => {
// Safe to unwrap because the subcommand is `singleton`.
diff --git a/storage/Cargo.toml b/storage/Cargo.toml
index a45ba0a1fe1..8636d5cb53c 100644
--- a/storage/Cargo.toml
+++ b/storage/Cargo.toml
@@ -58,7 +58,6 @@ regex = "1.7.0"
toml = "0.5"
[features]
-default = ["dedup"]
backend-localdisk = []
backend-localdisk-gpt = ["gpt", "backend-localdisk"]
backend-localfs = []
diff --git a/storage/src/cache/cachedfile.rs b/storage/src/cache/cachedfile.rs
index d30bcb1762b..43c65cc3f77 100644
--- a/storage/src/cache/cachedfile.rs
+++ b/storage/src/cache/cachedfile.rs
@@ -13,6 +13,7 @@ use std::collections::HashSet;
use std::fs::File;
use std::io::{ErrorKind, Read, Result};
use std::mem::ManuallyDrop;
+use std::ops::Deref;
use std::os::unix::io::{AsRawFd, RawFd};
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::{Arc, Mutex};
@@ -29,7 +30,7 @@ use tokio::runtime::Runtime;
use crate::backend::BlobReader;
use crate::cache::state::ChunkMap;
use crate::cache::worker::{AsyncPrefetchConfig, AsyncPrefetchMessage, AsyncWorkerMgr};
-use crate::cache::{BlobCache, BlobIoMergeState};
+use crate::cache::{BlobCache, BlobIoMergeState, CasMgr};
use crate::device::{
BlobChunkInfo, BlobInfo, BlobIoDesc, BlobIoRange, BlobIoSegment, BlobIoTag, BlobIoVec,
BlobObject, BlobPrefetchRequest,
@@ -184,8 +185,10 @@ pub(crate) struct FileCacheEntry {
pub(crate) blob_info: Arc,
pub(crate) cache_cipher_object: Arc,
pub(crate) cache_cipher_context: Arc,
+ pub(crate) cas_mgr: Option>,
pub(crate) chunk_map: Arc,
pub(crate) file: Arc,
+ pub(crate) file_path: Arc,
pub(crate) meta: Option,
pub(crate) metrics: Arc,
pub(crate) prefetch_state: Arc,
@@ -233,13 +236,16 @@ impl FileCacheEntry {
}
fn delay_persist_chunk_data(&self, chunk: Arc, buffer: Arc) {
+ let blob_info = self.blob_info.clone();
let delayed_chunk_map = self.chunk_map.clone();
let file = self.file.clone();
+ let file_path = self.file_path.clone();
let metrics = self.metrics.clone();
let is_raw_data = self.is_raw_data;
let is_cache_encrypted = self.is_cache_encrypted;
let cipher_object = self.cache_cipher_object.clone();
let cipher_context = self.cache_cipher_context.clone();
+ let cas_mgr = self.cas_mgr.clone();
metrics.buffered_backend_size.add(buffer.size() as u64);
self.runtime.spawn_blocking(move || {
@@ -291,6 +297,14 @@ impl FileCacheEntry {
};
let res = Self::persist_cached_data(&file, offset, buf);
Self::_update_chunk_pending_status(&delayed_chunk_map, chunk.as_ref(), res.is_ok());
+ if let Some(mgr) = cas_mgr {
+ if let Err(e) = mgr.record_chunk(&blob_info, chunk.deref(), file_path.as_ref()) {
+ warn!(
+ "failed to record chunk state for dedup in delay_persist_chunk_data, {}",
+ e
+ );
+ }
+ }
});
}
@@ -298,6 +312,14 @@ impl FileCacheEntry {
let offset = chunk.uncompressed_offset();
let res = Self::persist_cached_data(&self.file, offset, buf);
self.update_chunk_pending_status(chunk, res.is_ok());
+ if let Some(mgr) = &self.cas_mgr {
+ if let Err(e) = mgr.record_chunk(&self.blob_info, chunk, self.file_path.as_ref()) {
+ warn!(
+ "failed to record chunk state for dedup in persist_chunk_data, {}",
+ e
+ );
+ }
+ }
}
fn persist_cached_data(file: &Arc, offset: u64, buffer: &[u8]) -> Result<()> {
@@ -1051,13 +1073,21 @@ impl FileCacheEntry {
trace!("dispatch single io range {:?}", req);
let mut blob_cci = BlobCCI::new();
for (i, chunk) in req.chunks.iter().enumerate() {
- let is_ready = match self.chunk_map.check_ready_and_mark_pending(chunk.as_ref()) {
+ let mut is_ready = match self.chunk_map.check_ready_and_mark_pending(chunk.as_ref()) {
Ok(true) => true,
Ok(false) => false,
Err(StorageError::Timeout) => false, // Retry if waiting for inflight IO timeouts
Err(e) => return Err(einval!(e)),
};
+ if !is_ready {
+ if let Some(mgr) = self.cas_mgr.as_ref() {
+ is_ready = mgr.dedup_chunk(&self.blob_info, chunk.deref(), &self.file);
+ if is_ready {
+ self.update_chunk_pending_status(chunk.deref(), true);
+ }
+ }
+ }
// Directly read chunk data from file cache into user buffer iff:
// - the chunk is ready in the file cache
// - data in the file cache is plaintext.
@@ -1454,6 +1484,16 @@ impl FileCacheEntry {
}
}
+impl Drop for FileCacheEntry {
+ fn drop(&mut self) {
+ if let Some(cas_mgr) = &self.cas_mgr {
+ if let Err(e) = cas_mgr.gc() {
+ warn!("cas_mgr gc failed: {}", e);
+ }
+ }
+ }
+}
+
/// An enum to reuse existing buffers for IO operations, and CoW on demand.
#[allow(dead_code)]
enum DataBuffer {
diff --git a/storage/src/cache/dedup/db.rs b/storage/src/cache/dedup/db.rs
index 6daff37c70b..f0cf493296b 100644
--- a/storage/src/cache/dedup/db.rs
+++ b/storage/src/cache/dedup/db.rs
@@ -8,7 +8,7 @@ use std::path::Path;
use r2d2::{Pool, PooledConnection};
use r2d2_sqlite::SqliteConnectionManager;
-use rusqlite::{Connection, DropBehavior, OptionalExtension, Transaction};
+use rusqlite::{Connection, DropBehavior, OpenFlags, OptionalExtension, Transaction};
use super::Result;
@@ -24,7 +24,8 @@ impl CasDb {
}
pub fn from_file(db_path: impl AsRef) -> Result {
- let mgr = SqliteConnectionManager::file(db_path);
+ let mgr = SqliteConnectionManager::file(db_path)
+ .with_flags(OpenFlags::SQLITE_OPEN_CREATE | OpenFlags::SQLITE_OPEN_READ_WRITE);
let pool = r2d2::Pool::new(mgr)?;
let conn = pool.get()?;
@@ -128,7 +129,7 @@ impl CasDb {
Ok(conn.last_insert_rowid() as u64)
}
- pub fn delete_blobs(&mut self, blobs: &[String]) -> Result<()> {
+ pub fn delete_blobs(&self, blobs: &[String]) -> Result<()> {
let delete_blobs_sql = "DELETE FROM Blobs WHERE BlobId = (?1)";
let delete_chunks_sql = "DELETE FROM Chunks WHERE BlobId = (?1)";
let mut conn = self.get_connection()?;
diff --git a/storage/src/cache/dedup/mod.rs b/storage/src/cache/dedup/mod.rs
index f52a8fcc1de..64bb17d6e82 100644
--- a/storage/src/cache/dedup/mod.rs
+++ b/storage/src/cache/dedup/mod.rs
@@ -2,11 +2,26 @@
//
// SPDX-License-Identifier: Apache-2.0
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
use std::fmt::{self, Display, Formatter};
+use std::fs::{File, OpenOptions};
use std::io::Error;
+use std::path::Path;
+use std::sync::{Arc, Mutex, RwLock};
+
+use nydus_utils::digest::RafsDigest;
+
+use crate::cache::dedup::db::CasDb;
+use crate::device::{BlobChunkInfo, BlobInfo};
+use crate::utils::copy_file_range;
mod db;
+lazy_static::lazy_static!(
+ static ref CAS_MGR: Mutex