Skip to content

Commit

Permalink
Use SHA for BLOB update instead of modification time (#3697)
Browse files Browse the repository at this point in the history
* feat: Use file SHA instead of last modification time

* tests: Add unit tests for SHA version

* fix: add missing comment for rat-plugin

* fix: Use sha256 instead of sha1

* fix: fix tests

* feat: Use Checksum instead of hash for faster computation

* tests: Add tests for checksum
  • Loading branch information
paxadax authored Oct 4, 2024
1 parent 892ffe2 commit 1e8eee6
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ public BlobStoreFileInputStream(BlobStoreFile part) throws IOException {

@Override
public long getVersion() throws IOException {
return part.getModTime();
return part.getVersion();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ public abstract class BlobStoreFile {

public abstract long getModTime() throws IOException;

public long getVersion() throws IOException {
return getModTime();
}

public abstract InputStream getInputStream() throws IOException;

public abstract OutputStream getOutputStream() throws IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ public ReadableBlobMeta getBlobMeta(String key, Subject who) throws Authorizatio
rbm.set_settable(meta);
try {
LocalFsBlobStoreFile pf = fbs.read(DATA_PREFIX + key);
rbm.set_version(pf.getModTime());
rbm.set_version(pf.getVersion());
} catch (IOException e) {
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version
* 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
*
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p>
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
Expand All @@ -21,14 +21,20 @@
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.regex.Matcher;
import java.util.zip.CRC32C;
import java.util.zip.Checksum;

import org.apache.commons.io.FileUtils;
import org.apache.storm.generated.SettableBlobMeta;


public class LocalFsBlobStoreFile extends BlobStoreFile {

private final String key;
private final boolean isTmp;
private final File path;
private final boolean mustBeNew;
private final Checksum checksumAlgorithm;
private SettableBlobMeta meta;

public LocalFsBlobStoreFile(File base, String name) {
Expand All @@ -44,12 +50,14 @@ public LocalFsBlobStoreFile(File base, String name) {
key = base.getName();
path = new File(base, name);
mustBeNew = false;
checksumAlgorithm = new CRC32C();
}

public LocalFsBlobStoreFile(File base, boolean isTmp, boolean mustBeNew) {
key = base.getName();
this.isTmp = isTmp;
this.mustBeNew = mustBeNew;
checksumAlgorithm = new CRC32C();
if (this.isTmp) {
path = new File(base, System.currentTimeMillis() + TMP_EXT);
} else {
Expand All @@ -72,6 +80,11 @@ public String getKey() {
return key;
}

@Override
public long getVersion() throws IOException {
return FileUtils.checksum(path, checksumAlgorithm).getValue();
}

@Override
public long getModTime() throws IOException {
return path.lastModified();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version
* 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/

package org.apache.storm.blobstore;

import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.util.zip.CRC32C;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;

class LocalFsBlobStoreFileTest {

private File tempFile;
private LocalFsBlobStoreFile blobStoreFile;
private CRC32C checksumAlgorithm;

@BeforeEach
public void setUp() throws IOException {
tempFile = Files.createTempFile(null, ".tmp").toFile();
try (FileOutputStream fs = new FileOutputStream(tempFile)) {
fs.write("Content for checksum".getBytes());
}
blobStoreFile = new LocalFsBlobStoreFile(tempFile.getParentFile(), tempFile.getName());
checksumAlgorithm= new CRC32C();
}

@Test
void testGetVersion() throws IOException {
long expectedVersion = FileUtils.checksum(tempFile, checksumAlgorithm).getValue();
long actualVersion = blobStoreFile.getVersion();
assertEquals(expectedVersion, actualVersion, "The version should match the expected checksum value.");
}

@Test
void testGetVersion_Mismatch() throws IOException {
long expectedVersion = FileUtils.checksum(tempFile, checksumAlgorithm).getValue();
try (FileOutputStream fs = new FileOutputStream(tempFile)) {
fs.write("Different content".getBytes());
}
long actualVersion = blobStoreFile.getVersion();
assertNotEquals(expectedVersion, actualVersion, "The version shouldn't match the checksum value of different content.");
}

@Test
void testGetModTime() throws IOException {
long expectedModTime = tempFile.lastModified();
long actualModTime = blobStoreFile.getModTime();
assertEquals(expectedModTime, actualModTime, "The modification time should match the expected value.");
}
}

0 comments on commit 1e8eee6

Please sign in to comment.