Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: 802 composable bloom filters #917

Merged
merged 3 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion packages/core/src/bloom/bloom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* Reference: [Bloom Filter in VeChain Thor](https://github.com/vechain/thor/blob/master/thor/bloom/bloom.go).
*/
import * as utils from '@noble/curves/abstract/utils';
import { assert, BLOOM } from '@vechain/sdk-errors';
import { blake2b256 } from '../hash';

/**
Expand Down Expand Up @@ -124,6 +125,45 @@ class Filter {
this.k = k;
}

/**
* Composes the current filter with another filter by performing a bitwise OR operation on the filter bits.
* Both filters must have been generated with the same number of hash functions, and they must have the same length.
*
* @param {Filter} other - The filter to compose with.
* @returns {Filter} - A new filter that is the result of the composition.
*
* @throws InvalidBloomError If the other filter has a different length.
* @throws InvalidKError if the other filter was generated with a different `k` number of hash functions.
*/
public compose(other: Filter): Filter {
assert(
'Filter.compose',
this.bits.length === other.bits.length,
BLOOM.INVALID_BLOOM,
'Filters have different lengths',
{
this: this,
other
}
);
assert(
'Filter.compose',
this.k === other.k,
BLOOM.INVALID_K,
'Filters generated with different k number of hash functions',
{
this: this,
other
}
);
return new Filter(
new Uint8Array(
this.bits.map((bit, index) => bit | other.bits[index])
),
this.k
);
}

/**
* Checks if the Bloom filter may contain the specified key.
* Note: false positives are possible, but false negatives are not.
Expand All @@ -142,6 +182,19 @@ class Filter {
}
);
}

/**
* Checks if the current filter is composable with another filter.
* Two filters are composable if they have the same 'k' value expressing the number of hash function used for
* the generation of the filters, and the same number of bits.
*
* @param {Filter} other - The filter to compare with.
*
* @return {boolean} - True if the filters are composable, false otherwise.
*/
public isComposableWith(other: Filter): boolean {
return this.k === other.k && this.bits.length === other.bits.length;
}
}

/**
Expand Down Expand Up @@ -170,7 +223,7 @@ class Generator {
* Generates a Bloom filter with the specified number of bits per key and number of hash functions.
* The generator will be reset after generation.
*
* @param {number} bitsPerKey - The desired number of bits per key in the Bloom filter.
* @param {number} bitsPerKey - The desired number of bits per key in the Bloom filter (`m` in math literature).
* @param {number} k - The number of hash functions to use in the Bloom filter.
* @returns {Filter} - The generated Bloom filter.
*/
Expand Down
130 changes: 130 additions & 0 deletions packages/core/tests/bloom/bloom.unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as utils from '@noble/curves/abstract/utils';
import { describe, expect, test } from '@jest/globals';
import { bloom, Hex } from '../../src';
import { bloomKTestCases } from './fixture';
import { InvalidBloomError, InvalidKError } from '../../../errors';

/**
* Bloom filter tests
Expand All @@ -13,6 +14,135 @@ import { bloomKTestCases } from './fixture';
describe('Bloom Filter', () => {
const textEncoder = new TextEncoder();

describe('compose', () => {
lucanicoladebiasi marked this conversation as resolved.
Show resolved Hide resolved
const m = 20; // Bits per key.
const k = bloom.calculateK(m);
const keys1 = ['key1.1', 'key1.2', 'key1.3'];
const keys2 = ['key2.1', 'key2.2', 'key2.3'];

test('compose - invalid - different length', () => {
const gen1 = new bloom.Generator();
keys1.forEach((key) => {
gen1.add(textEncoder.encode(key));
});
const gen2 = new bloom.Generator();
keys2.forEach((key) => {
gen2.add(textEncoder.encode(key));
});
const filter1 = gen1.generate(m, k);
const filter2 = gen2.generate(m * m, k);
expect(() => {
filter1.compose(filter2);
}).toThrow(InvalidBloomError);
});

test('compose - invalid - different k', () => {
const gen1 = new bloom.Generator();
keys1.forEach((key) => {
gen1.add(textEncoder.encode(key));
});
const gen2 = new bloom.Generator();
keys2.forEach((key) => {
gen2.add(textEncoder.encode(key));
});
const filter1 = gen1.generate(m, k);
const filter2 = gen2.generate(m, k - 1);
expect(() => {
filter1.compose(filter2);
}).toThrow(InvalidKError);
});

test('compose - valid - possibly in set', () => {
const gen1 = new bloom.Generator();
keys1.forEach((key) => {
gen1.add(textEncoder.encode(key));
});
const gen2 = new bloom.Generator();
keys2.forEach((key) => {
gen2.add(textEncoder.encode(key));
});
const filter1 = gen1.generate(m, k);
const filter2 = gen2.generate(m, k);
const filterUnion = filter1.compose(filter2);
keys1.forEach((key) => {
expect(
filterUnion.contains(textEncoder.encode(key))
).toBeTruthy();
});
keys2.forEach((key) => {
expect(
filterUnion.contains(textEncoder.encode(key))
).toBeTruthy();
});
});

test('compose - valid - not in set', () => {
const gen1 = new bloom.Generator();
keys1.forEach((key) => {
gen1.add(textEncoder.encode(key));
});
const gen2 = new bloom.Generator();
keys2.forEach((key) => {
gen2.add(textEncoder.encode(key));
});
const filter1 = gen1.generate(m, k);
const filter2 = gen2.generate(m, k);
const filterUnion = filter1.compose(filter2);
expect(
filterUnion.contains(textEncoder.encode('alien'))
).toBeFalsy();
});
});

describe('isComposable', () => {
const m = 20; // Bits per key.
const k = bloom.calculateK(m);
const keys1 = ['key1.1', 'key1.2', 'key1.3'];
const keys2 = ['key2.1', 'key2.2', 'key2.3'];

test('isComposable - false - different length', () => {
const gen1 = new bloom.Generator();
keys1.forEach((key) => {
gen1.add(textEncoder.encode(key));
});
const gen2 = new bloom.Generator();
keys2.forEach((key) => {
gen2.add(textEncoder.encode(key));
});
const filter1 = gen1.generate(m, k);
const filter2 = gen2.generate(m * m, k);
expect(filter1.isComposableWith(filter2)).toBeFalsy();
});

test('isComposable - false - different k', () => {
const gen1 = new bloom.Generator();
keys1.forEach((key) => {
gen1.add(textEncoder.encode(key));
});
const gen2 = new bloom.Generator();
keys2.forEach((key) => {
gen2.add(textEncoder.encode(key));
});
const filter1 = gen1.generate(m, k);
const filter2 = gen2.generate(m, k - 1);
expect(filter1.isComposableWith(filter2)).toBeFalsy();
});

test('isComposable - true', () => {
const gen1 = new bloom.Generator();
keys1.forEach((key) => {
gen1.add(textEncoder.encode(key));
});
const gen2 = new bloom.Generator();
keys2.forEach((key) => {
gen2.add(textEncoder.encode(key));
});
const filter1 = gen1.generate(m, k);
const filter2 = gen2.generate(m, k);
expect(filter1.isComposableWith(filter2)).toBeTruthy();
});
});

/**
* Test estimate K function
*/
Expand Down
Loading