Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor #6

Closed
wants to merge 13 commits into from
130 changes: 73 additions & 57 deletions repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ type Repo struct {
cloud cloud.Cloud // 云端存储服务
}

type ChunkSize struct {
zxhd863943427 marked this conversation as resolved.
Show resolved Hide resolved
MinSize uint
MaxSize uint
}

var (
RepoChunkSize = &ChunkSize{
zxhd863943427 marked this conversation as resolved.
Show resolved Hide resolved
MinSize: chunker.MinSize,
MaxSize: chunker.MaxSize,
}
)

// NewRepo 创建一个新的仓库。
func NewRepo(dataPath, repoPath, historyPath, tempPath, deviceID, deviceName, deviceOS string, aesKey []byte, ignoreLines []string, cloud cloud.Cloud) (ret *Repo, err error) {
if nil != cloud {
Expand Down Expand Up @@ -742,6 +754,15 @@ func (repo *Repo) index0(memo string, context map[string]interface{}) (ret *enti
return
}

putFileErr := repo.store.PutFile(file)
88250 marked this conversation as resolved.
Show resolved Hide resolved

if nil != putFileErr {
workerErrLock.Lock()
workerErrs = append(workerErrs, putFileErr)
workerErrLock.Unlock()
return
}

if 1 > len(file.Chunks) {
workerErrLock.Lock()
putErr = fmt.Errorf("file [%s, %s, %s, %d] has no chunks", file.ID, file.Path, time.UnixMilli(file.Updated).Format("2006-01-02 15:04:05"), file.Size)
Expand Down Expand Up @@ -849,54 +870,58 @@ func (repo *Repo) relPath(absPath string) string {
}

func (repo *Repo) putFileChunks(file *entity.File, context map[string]interface{}, count, total int) (err error) {
absPath := repo.absPath(file.Path)
chunks, err := repo.createChunks(file, RepoChunkSize)

if chunker.MinSize > file.Size {
var data []byte
data, err = filelock.ReadFile(absPath)
if nil != err {
logging.LogErrorf("read file [%s] failed: %s", absPath, err)
return
}

chunkHash := util.Hash(data)
file.Chunks = append(file.Chunks, chunkHash)
chunk := &entity.Chunk{ID: chunkHash, Data: data}
for _, chunk := range chunks {
file.Chunks = append(file.Chunks, chunk.ID)
if err = repo.store.PutChunk(chunk); nil != err {
logging.LogErrorf("put chunk [%s] failed: %s", chunkHash, err)
logging.LogErrorf("put chunk [%s] failed: %s", chunk.ID, err)
return
}

newInfo, statErr := os.Stat(absPath)
if nil != statErr {
logging.LogErrorf("stat file [%s] failed: %s", absPath, statErr)
err = statErr
return
}
}

_, checkErr := repo.checkFileIfUpdate(file)
if nil != checkErr {
err = checkErr
return
}

eventbus.Publish(eventbus.EvtIndexUpsertFile, context, count, total)

return
}

func (repo *Repo) createChunks(file *entity.File, chunkSize *ChunkSize) (chunks []*entity.Chunk, err error) {
absPath := repo.absPath(file.Path)
chunks = make([]*entity.Chunk, 0)

newSize := newInfo.Size()
newUpdated := newInfo.ModTime().Unix()
if file.Size != newSize || file.SecUpdated() != newUpdated {
logging.LogErrorf("file changed [%s], size [%d -> %d], updated [%d -> %d]", absPath, file.Size, newSize, file.SecUpdated(), newUpdated)
err = ErrIndexFileChanged
reader, err := filelock.OpenFile(absPath, os.O_RDONLY, 0644)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里请勿调整,按原来的实现。

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

现在的实现有什么问题吗?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

分块和不分块这两部分逻辑比较独立,没有必要用 if else 区别。

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

另外,eventbus.Publish(eventbus.EvtIndexUpsertFile, context, count, total) 和 putFileErr := repo.store.PutFile(file) 的顺序也不要调整。

Copy link
Author

@zxhd863943427 zxhd863943427 Sep 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

分块和不分块这两部分逻辑比较独立,没有必要用 if else 区别。

在同一个函数里,不使用 if else ,没法控制不执行在后面分块的逻辑,除非把整个复用的部分全分别塞在两部分逻辑里,使用提前返回控制。

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

是的,按照原来的实现

if nil != err {
logging.LogErrorf("open file [%s] failed: %s", absPath, err)
return
}
defer func() {
if closeErr := filelock.CloseFile(reader); nil != closeErr {
logging.LogErrorf("close file [%s] failed: %s", absPath, closeErr)
return
}
}()

eventbus.Publish(eventbus.EvtIndexUpsertFile, context, count, total)
err = repo.store.PutFile(file)
if chunker.MinSize > file.Size {
var data []byte
data, err = io.ReadAll(reader)
if nil != err {
logging.LogErrorf("read file [%s] failed: %s", absPath, err)
return
}
return
}

reader, err := filelock.OpenFile(absPath, os.O_RDONLY, 0644)
if nil != err {
logging.LogErrorf("open file [%s] failed: %s", absPath, err)
chunk := createChunk(data)
chunks = append(chunks, chunk)
return
}

chnkr := chunker.NewWithBoundaries(reader, repo.chunkPol, chunker.MinSize, chunker.MaxSize)
chnkr := chunker.NewWithBoundaries(reader, repo.chunkPol, chunkSize.MinSize, chunkSize.MaxSize)
for {
buf := make([]byte, chunker.MaxSize)
chnk, chnkErr := chnkr.Next(buf)
Expand All @@ -906,46 +931,37 @@ func (repo *Repo) putFileChunks(file *entity.File, context map[string]interface{
if nil != chnkErr {
err = chnkErr
logging.LogErrorf("chunk file [%s] failed: %s", absPath, chnkErr)
if closeErr := filelock.CloseFile(reader); nil != closeErr {
logging.LogErrorf("close file [%s] failed: %s", absPath, closeErr)
}
return
}

chunkHash := util.Hash(chnk.Data)
file.Chunks = append(file.Chunks, chunkHash)
chunk := &entity.Chunk{ID: chunkHash, Data: chnk.Data}
if err = repo.store.PutChunk(chunk); nil != err {
logging.LogErrorf("put chunk [%s] failed: %s", chunkHash, err)
if closeErr := filelock.CloseFile(reader); nil != closeErr {
logging.LogErrorf("close file [%s] failed: %s", absPath, closeErr)
}
return
}
chunk := createChunk(chnk.Data)
chunks = append(chunks, chunk)
}
return

if err = filelock.CloseFile(reader); nil != err {
logging.LogErrorf("close file [%s] failed: %s", absPath, err)
return
}
}

func createChunk(data []byte) *entity.Chunk {
chunkHash := util.Hash(data)
return &entity.Chunk{ID: chunkHash, Data: data}
}

func (repo *Repo) checkFileIfUpdate(file *entity.File) (update bool, err error) {

absPath := repo.absPath(file.Path)
newInfo, statErr := os.Stat(absPath)
if nil != statErr {
logging.LogErrorf("stat file [%s] failed: %s", absPath, statErr)
err = statErr
return
}

newSize := newInfo.Size()
newUpdated := newInfo.ModTime().Unix()
if file.Size != newSize || file.SecUpdated() != newUpdated {
logging.LogErrorf("file changed [%s], size [%d -> %d], updated [%d -> %d]", absPath, file.Size, newSize, file.Updated, newUpdated)
zxhd863943427 marked this conversation as resolved.
Show resolved Hide resolved

update = file.Size != newSize || file.SecUpdated() != newUpdated
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里没有必要抽取变量了,直接条件判断。

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个函数的本意是判断文件是否更新,虽然为了跟之前的代码保持一致,在错误里也携带了相关信息,但连 bool 也不带感觉语义怎么看都不太合适……

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个条件并不复杂,请勿抽取变量,如果觉得不好阅读,加个注释即可。

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

主要是update是返回值啊……不抽取变量的话,这个函数不就变成一个只返回error的函数了?

要不我把ErrIndexFileChanged交个上一层的putFileChunks进行生成?checkFileIfUpdate 只产生 os.Stat 的报错。

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

你外面也没有用到这个返回值啊:

_, checkErr := repo.checkFileIfUpdate(file)

ErrIndexFileChanged 原本就是在 putFileChunks 里的……

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

嗯,那我就改成ErrIndexFileChanged在putFileChunks 里,不在check 函数里了

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已修改

if update {
logging.LogErrorf("file changed [%s], size [%d -> %d], updated [%d -> %d]", absPath, file.Size, newSize, file.SecUpdated(), newUpdated)
err = ErrIndexFileChanged
return
}

eventbus.Publish(eventbus.EvtIndexUpsertFile, context, count, total)
err = repo.store.PutFile(file)
return
}

Expand Down