Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support BIT and BYTE options in the BITCOUNT command #2087

Merged
merged 15 commits into from
Feb 7, 2024
Merged
6 changes: 4 additions & 2 deletions src/commands/cmd_bit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,9 @@ class CommandBitCount : public Commander {

if (args.size() == 5) {
if (util::EqualICase(args[4], "BYTE")) {
jihuayu marked this conversation as resolved.
Show resolved Hide resolved
is_bit_index_ = false;
} else if (util::EqualICase(args[4], "BIT")) {
return {Status::RedisExecErr, errNotImplemented};
is_bit_index_ = true;
} else {
return {Status::RedisParseErr, errInvalidSyntax};
}
Expand All @@ -133,7 +134,7 @@ class CommandBitCount : public Commander {
Status Execute(Server *srv, Connection *conn, std::string *output) override {
uint32_t cnt = 0;
redis::Bitmap bitmap_db(srv->storage, conn->GetNamespace());
auto s = bitmap_db.BitCount(args_[1], start_, stop_, &cnt);
auto s = bitmap_db.BitCount(args_[1], start_, stop_, is_bit_index_, &cnt);
if (!s.ok()) return {Status::RedisExecErr, s.ToString()};

*output = redis::Integer(cnt);
Expand All @@ -143,6 +144,7 @@ class CommandBitCount : public Commander {
private:
int64_t start_ = 0;
int64_t stop_ = -1;
bool is_bit_index_ = false;
};

class CommandBitPos : public Commander {
Expand Down
105 changes: 65 additions & 40 deletions src/types/redis_bitmap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,31 @@ const char kErrBitmapStringOutOfRange[] =
"The size of the bitmap string exceeds the "
"configuration item max-bitmap-to-string-mb";

/*
* If you setbit bit 0 1, the value is stored as 0x01 in Kvrocks but 0x80 in Redis.
* So we need to swap bits is to keep the same return value as Redis.
* This swap table is generated according to the following mapping definition.
* kBitSwapTable(x) = ((x & 0x80) >> 7)| ((x & 0x40) >> 5)|\
* ((x & 0x20) >> 3)| ((x & 0x10) >> 1)|\
* ((x & 0x08) << 1)| ((x & 0x04) << 3)|\
* ((x & 0x02) << 5)| ((x & 0x01) << 7);
*/
static const uint8_t kBitSwapTable[256] = {
0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48,
0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4,
0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C,
0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2,
0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A,
0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E,
0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21,
0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55,
0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD,
0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B,
0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F,
0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF};

// Resize the segment to makes its new length at least min_bytes, new bytes will be set to 0.
// min_bytes can not more than kBitmapSegmentBytes
void ExpandBitmapSegment(std::string *segment, size_t min_bytes) {
Expand Down Expand Up @@ -129,34 +154,9 @@ rocksdb::Status Bitmap::GetString(const Slice &user_key, const uint32_t max_btos
uint32_t valid_size = std::min(
{fragment.size(), static_cast<size_t>(kBitmapSegmentBytes), static_cast<size_t>(metadata.size - frag_index)});

/*
* If you setbit bit 0 1, the value is stored as 0x01 in Kvrocks but 0x80 in Redis.
* So we need to swap bits is to keep the same return value as Redis.
* This swap table is generated according to the following mapping definition.
* swap_table(x) = ((x & 0x80) >> 7)| ((x & 0x40) >> 5)|\
* ((x & 0x20) >> 3)| ((x & 0x10) >> 1)|\
* ((x & 0x08) << 1)| ((x & 0x04) << 3)|\
* ((x & 0x02) << 5)| ((x & 0x01) << 7);
*/
static const uint8_t swap_table[256] = {
0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88,
0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC,
0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A,
0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE,
0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85,
0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3,
0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97,
0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
0x3F, 0xBF, 0x7F, 0xFF};
for (uint32_t i = 0; i < valid_size; i++) {
if (!fragment[i]) continue;
fragment[i] = static_cast<char>(swap_table[static_cast<uint8_t>(fragment[i])]);
fragment[i] = static_cast<char>(kBitSwapTable[static_cast<uint8_t>(fragment[i])]);
}
value->replace(frag_index, valid_size, fragment.data(), valid_size);
}
Expand Down Expand Up @@ -210,7 +210,7 @@ rocksdb::Status Bitmap::SetBit(const Slice &user_key, uint32_t offset, bool new_
return storage_->Write(storage_->DefaultWriteOptions(), batch->GetWriteBatch());
}

rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t stop, uint32_t *cnt) {
rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t stop, bool is_bit_index, uint32_t *cnt) {
*cnt = 0;
std::string raw_value;
std::string ns_key = AppendNamespacePrefix(user_key);
Expand All @@ -226,23 +226,32 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t s

if (metadata.Type() == kRedisString) {
redis::BitmapString bitmap_string_db(storage_, namespace_);
return bitmap_string_db.BitCount(raw_value, start, stop, cnt);
return bitmap_string_db.BitCount(raw_value, start, stop, is_bit_index, cnt);
}

auto totlen = static_cast<int64_t>(metadata.size);
if (is_bit_index) totlen <<= 3;
// Counting bits in byte [start, stop].
std::tie(start, stop) = BitmapString::NormalizeRange(start, stop, static_cast<int64_t>(metadata.size));
std::tie(start, stop) = BitmapString::NormalizeRange(start, stop, totlen);
// Always return 0 if start is greater than stop after normalization.
if (start > stop) return rocksdb::Status::OK();

auto u_start = static_cast<uint32_t>(start);
auto u_stop = static_cast<uint32_t>(stop);
int64_t start_byte = start;
int64_t stop_byte = stop;
uint8_t first_byte_neg_mask = 0, last_byte_neg_mask = 0;
std::tie(start_byte, stop_byte) = BitmapString::NormalizeToByteRangeWithPaddingMask(
is_bit_index, start, stop, &first_byte_neg_mask, &last_byte_neg_mask);

auto u_start = static_cast<uint32_t>(start_byte);
auto u_stop = static_cast<uint32_t>(stop_byte);

LatestSnapShot ss(storage_);
rocksdb::ReadOptions read_options;
read_options.snapshot = ss.GetSnapShot();
uint32_t start_index = u_start / kBitmapSegmentBytes;
uint32_t stop_index = u_stop / kBitmapSegmentBytes;
// Don't use multi get to prevent large range query, and take too much memory
uint32_t mask_cnt = 0;
for (uint32_t i = start_index; i <= stop_index; i++) {
rocksdb::PinnableSlice pin_value;
std::string sub_key =
Expand All @@ -252,16 +261,32 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t s
if (!s.ok() && !s.IsNotFound()) return s;
// NotFound means all bits in this segment are 0.
if (s.IsNotFound()) continue;
// Counting bits in [start_in_segment, start_in_segment + length_in_segment)
size_t start_in_segment = 0;
if (i == start_index) start_in_segment = u_start % kBitmapSegmentBytes;
// Though `ExpandBitmapSegment` might generate a segment with logical size less than pin_value.size(),
// the `RawPopcount` will always return 0 on these padding bytes, so we don't need to worry about it.
auto length_in_segment = static_cast<int64_t>(pin_value.size());
if (i == stop_index) length_in_segment = u_stop % kBitmapSegmentBytes + 1;
*cnt += BitmapString::RawPopcount(reinterpret_cast<const uint8_t *>(pin_value.data()) + start_in_segment,
length_in_segment);
// Counting bits in [start_in_segment, stop_in_segment]
int64_t start_in_segment = 0; // start_index in 1024 bytes segment
auto readable_stop_in_segment = static_cast<int64_t>(pin_value.size() - 1); // stop_index in 1024 bytes segment
auto stop_in_segment = readable_stop_in_segment;
if (i == start_index) {
start_in_segment = u_start % kBitmapSegmentBytes;
if (is_bit_index && start_in_segment <= readable_stop_in_segment && first_byte_neg_mask != 0) {
uint8_t first_mask_byte =
kBitSwapTable[static_cast<uint8_t>(pin_value[start_in_segment])] & first_byte_neg_mask;
mask_cnt += BitmapString::RawPopcount(&first_mask_byte, 1);
}
}
if (i == stop_index) {
stop_in_segment = u_stop % kBitmapSegmentBytes;
if (is_bit_index && stop_in_segment <= readable_stop_in_segment && last_byte_neg_mask != 0) {
uint8_t last_mask_byte = kBitSwapTable[static_cast<uint8_t>(pin_value[stop_in_segment])] & last_byte_neg_mask;
mask_cnt += BitmapString::RawPopcount(&last_mask_byte, 1);
}
}
if (stop_in_segment >= start_in_segment && readable_stop_in_segment >= start_in_segment) {
int64_t bytes = 0;
bytes = std::min(stop_in_segment, readable_stop_in_segment) - start_in_segment + 1;
*cnt += BitmapString::RawPopcount(reinterpret_cast<const uint8_t *>(pin_value.data()) + start_in_segment, bytes);
}
}
*cnt -= mask_cnt;
return rocksdb::Status::OK();
}

Expand Down
2 changes: 1 addition & 1 deletion src/types/redis_bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Bitmap : public Database {
rocksdb::Status GetBit(const Slice &user_key, uint32_t offset, bool *bit);
rocksdb::Status GetString(const Slice &user_key, uint32_t max_btos_size, std::string *value);
rocksdb::Status SetBit(const Slice &user_key, uint32_t offset, bool new_bit, bool *old_bit);
rocksdb::Status BitCount(const Slice &user_key, int64_t start, int64_t stop, uint32_t *cnt);
rocksdb::Status BitCount(const Slice &user_key, int64_t start, int64_t stop, bool is_bit_index, uint32_t *cnt);
rocksdb::Status BitPos(const Slice &user_key, bool bit, int64_t start, int64_t stop, bool stop_given, int64_t *pos);
rocksdb::Status BitOp(BitOpFlags op_flag, const std::string &op_name, const Slice &user_key,
const std::vector<Slice> &op_keys, int64_t *len);
Expand Down
44 changes: 39 additions & 5 deletions src/types/redis_bitmap_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,38 @@ rocksdb::Status BitmapString::SetBit(const Slice &ns_key, std::string *raw_value
return storage_->Write(storage_->DefaultWriteOptions(), batch->GetWriteBatch());
}

rocksdb::Status BitmapString::BitCount(const std::string &raw_value, int64_t start, int64_t stop, uint32_t *cnt) {
rocksdb::Status BitmapString::BitCount(const std::string &raw_value, int64_t start, int64_t stop, bool is_bit_index,
uint32_t *cnt) {
*cnt = 0;
std::string_view string_value = std::string_view{raw_value}.substr(Metadata::GetOffsetAfterExpire(raw_value[0]));
auto strlen = static_cast<int64_t>(string_value.size());
std::tie(start, stop) = NormalizeRange(start, stop, strlen);
int64_t totlen = strlen;
if (is_bit_index) totlen <<= 3;
std::tie(start, stop) = NormalizeRange(start, stop, totlen);
// Always return 0 if start is greater than stop after normalization.
if (start > stop) return rocksdb::Status::OK();

/* By default:
* start means start byte in bitmap, stop means stop byte in bitmap.
* When is_bit_index is true, start and stop means start bit and stop bit.
* So it should be normalized bit range to byte range. */
int64_t start_byte = start;
int64_t stop_byte = stop;
uint8_t first_byte_neg_mask = 0, last_byte_neg_mask = 0;
std::tie(start_byte, stop_byte) =
NormalizeToByteRangeWithPaddingMask(is_bit_index, start, stop, &first_byte_neg_mask, &last_byte_neg_mask);

/* Precondition: end >= 0 && end < strlen, so the only condition where
* zero can be returned is: start > stop. */
if (start <= stop) {
int64_t bytes = stop - start + 1;
*cnt = RawPopcount(reinterpret_cast<const uint8_t *>(string_value.data()) + start, bytes);
int64_t bytes = stop_byte - start_byte + 1;
*cnt = RawPopcount(reinterpret_cast<const uint8_t *>(string_value.data()) + start_byte, bytes);
if (first_byte_neg_mask != 0 || last_byte_neg_mask != 0) {
uint8_t firstlast[2] = {0, 0};
if (first_byte_neg_mask != 0) firstlast[0] = string_value[start_byte] & first_byte_neg_mask;
if (last_byte_neg_mask != 0) firstlast[1] = string_value[stop_byte] & last_byte_neg_mask;
*cnt -= RawPopcount(firstlast, 2);
}

return rocksdb::Status::OK();
}

Expand Down Expand Up @@ -202,6 +222,20 @@ std::pair<int64_t, int64_t> BitmapString::NormalizeRange(int64_t origin_start, i
return {origin_start, origin_end};
}

std::pair<int64_t, int64_t> BitmapString::NormalizeToByteRangeWithPaddingMask(bool is_bit, int64_t origin_start,
int64_t origin_end,
uint8_t *first_byte_neg_mask,
uint8_t *last_byte_neg_mask) {
DCHECK(origin_start <= origin_end);
if (is_bit) {
mapleFU marked this conversation as resolved.
Show resolved Hide resolved
*first_byte_neg_mask = ~((1 << (8 - (origin_start & 7))) - 1) & 0xFF;
*last_byte_neg_mask = (1 << (7 - (origin_end & 7))) - 1;
origin_start >>= 3;
origin_end >>= 3;
}
return {origin_start, origin_end};
}

rocksdb::Status BitmapString::Bitfield(const Slice &ns_key, std::string *raw_value,
const std::vector<BitfieldOperation> &ops,
std::vector<std::optional<BitfieldValue>> *rets) {
Expand Down
18 changes: 17 additions & 1 deletion src/types/redis_bitmap_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class BitmapString : public Database {
BitmapString(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {}
static rocksdb::Status GetBit(const std::string &raw_value, uint32_t offset, bool *bit);
rocksdb::Status SetBit(const Slice &ns_key, std::string *raw_value, uint32_t offset, bool new_bit, bool *old_bit);
static rocksdb::Status BitCount(const std::string &raw_value, int64_t start, int64_t stop, uint32_t *cnt);
static rocksdb::Status BitCount(const std::string &raw_value, int64_t start, int64_t stop, bool is_bit_index,
uint32_t *cnt);
static rocksdb::Status BitPos(const std::string &raw_value, bool bit, int64_t start, int64_t stop, bool stop_given,
int64_t *pos);
rocksdb::Status Bitfield(const Slice &ns_key, std::string *raw_value, const std::vector<BitfieldOperation> &ops,
Expand All @@ -56,6 +57,21 @@ class BitmapString : public Database {
// Return:
// The normalized [start, end] range.
static std::pair<int64_t, int64_t> NormalizeRange(int64_t origin_start, int64_t origin_end, int64_t length);

// NormalizeToByteRangeWithPaddingMask converts input index range to a normalized byte index range.
// If the is_bit_index is false, it does nothing.
// If the index_it_bit is true, it convert the bit index range to a normalized byte index range, and
// pad the first byte negative mask and last byte negative mask.
// Such as, If the starting bit is the third bit of the first byte like '00010000', the first_byte_neg_mask will be
// padded to '11100000', if the end bit is in the fifth bit of the last byte like '00000100', the last_byte_neg_mask
// will be padded to '00000011'.
//
// Return:
// The normalized [start_byte, stop_byte]
static std::pair<int64_t, int64_t> NormalizeToByteRangeWithPaddingMask(bool is_bit_index, int64_t origin_start,
int64_t origin_end,
uint8_t *first_byte_neg_mask,
uint8_t *last_byte_neg_mask);
};

} // namespace redis
Loading
Loading