Skip to content

Commit

Permalink
Support BIT and BYTE options in the BITCOUNT command (apache#2087)
Browse files Browse the repository at this point in the history
Co-authored-by: mwish <[email protected]>
Co-authored-by: hulk <[email protected]>
Co-authored-by: 纪华裕 <[email protected]>
  • Loading branch information
4 people authored and JoverZhang committed Feb 24, 2024
1 parent 83362f4 commit e160215
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 60 deletions.
6 changes: 4 additions & 2 deletions src/commands/cmd_bit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,9 @@ class CommandBitCount : public Commander {

if (args.size() == 5) {
if (util::EqualICase(args[4], "BYTE")) {
is_bit_index_ = false;
} else if (util::EqualICase(args[4], "BIT")) {
return {Status::RedisExecErr, errNotImplemented};
is_bit_index_ = true;
} else {
return {Status::RedisParseErr, errInvalidSyntax};
}
Expand All @@ -133,7 +134,7 @@ class CommandBitCount : public Commander {
Status Execute(Server *srv, Connection *conn, std::string *output) override {
uint32_t cnt = 0;
redis::Bitmap bitmap_db(srv->storage, conn->GetNamespace());
auto s = bitmap_db.BitCount(args_[1], start_, stop_, &cnt);
auto s = bitmap_db.BitCount(args_[1], start_, stop_, is_bit_index_, &cnt);
if (!s.ok()) return {Status::RedisExecErr, s.ToString()};

*output = redis::Integer(cnt);
Expand All @@ -143,6 +144,7 @@ class CommandBitCount : public Commander {
private:
int64_t start_ = 0;
int64_t stop_ = -1;
bool is_bit_index_ = false;
};

class CommandBitPos : public Commander {
Expand Down
105 changes: 65 additions & 40 deletions src/types/redis_bitmap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,31 @@ const char kErrBitmapStringOutOfRange[] =
"The size of the bitmap string exceeds the "
"configuration item max-bitmap-to-string-mb";

/*
* If you setbit bit 0 1, the value is stored as 0x01 in Kvrocks but 0x80 in Redis.
* So we need to swap bits is to keep the same return value as Redis.
* This swap table is generated according to the following mapping definition.
* kBitSwapTable(x) = ((x & 0x80) >> 7)| ((x & 0x40) >> 5)|\
* ((x & 0x20) >> 3)| ((x & 0x10) >> 1)|\
* ((x & 0x08) << 1)| ((x & 0x04) << 3)|\
* ((x & 0x02) << 5)| ((x & 0x01) << 7);
*/
static const uint8_t kBitSwapTable[256] = {
0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48,
0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4,
0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C,
0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2,
0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A,
0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E,
0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21,
0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55,
0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD,
0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B,
0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F,
0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF};

// Resize the segment to makes its new length at least min_bytes, new bytes will be set to 0.
// min_bytes can not more than kBitmapSegmentBytes
void ExpandBitmapSegment(std::string *segment, size_t min_bytes) {
Expand Down Expand Up @@ -129,34 +154,9 @@ rocksdb::Status Bitmap::GetString(const Slice &user_key, const uint32_t max_btos
uint32_t valid_size = std::min(
{fragment.size(), static_cast<size_t>(kBitmapSegmentBytes), static_cast<size_t>(metadata.size - frag_index)});

/*
* If you setbit bit 0 1, the value is stored as 0x01 in Kvrocks but 0x80 in Redis.
* So we need to swap bits is to keep the same return value as Redis.
* This swap table is generated according to the following mapping definition.
* swap_table(x) = ((x & 0x80) >> 7)| ((x & 0x40) >> 5)|\
* ((x & 0x20) >> 3)| ((x & 0x10) >> 1)|\
* ((x & 0x08) << 1)| ((x & 0x04) << 3)|\
* ((x & 0x02) << 5)| ((x & 0x01) << 7);
*/
static const uint8_t swap_table[256] = {
0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88,
0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC,
0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A,
0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE,
0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85,
0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3,
0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97,
0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
0x3F, 0xBF, 0x7F, 0xFF};
for (uint32_t i = 0; i < valid_size; i++) {
if (!fragment[i]) continue;
fragment[i] = static_cast<char>(swap_table[static_cast<uint8_t>(fragment[i])]);
fragment[i] = static_cast<char>(kBitSwapTable[static_cast<uint8_t>(fragment[i])]);
}
value->replace(frag_index, valid_size, fragment.data(), valid_size);
}
Expand Down Expand Up @@ -210,7 +210,7 @@ rocksdb::Status Bitmap::SetBit(const Slice &user_key, uint32_t offset, bool new_
return storage_->Write(storage_->DefaultWriteOptions(), batch->GetWriteBatch());
}

rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t stop, uint32_t *cnt) {
rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t stop, bool is_bit_index, uint32_t *cnt) {
*cnt = 0;
std::string raw_value;
std::string ns_key = AppendNamespacePrefix(user_key);
Expand All @@ -226,23 +226,32 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t s

if (metadata.Type() == kRedisString) {
redis::BitmapString bitmap_string_db(storage_, namespace_);
return bitmap_string_db.BitCount(raw_value, start, stop, cnt);
return bitmap_string_db.BitCount(raw_value, start, stop, is_bit_index, cnt);
}

auto totlen = static_cast<int64_t>(metadata.size);
if (is_bit_index) totlen <<= 3;
// Counting bits in byte [start, stop].
std::tie(start, stop) = BitmapString::NormalizeRange(start, stop, static_cast<int64_t>(metadata.size));
std::tie(start, stop) = BitmapString::NormalizeRange(start, stop, totlen);
// Always return 0 if start is greater than stop after normalization.
if (start > stop) return rocksdb::Status::OK();

auto u_start = static_cast<uint32_t>(start);
auto u_stop = static_cast<uint32_t>(stop);
int64_t start_byte = start;
int64_t stop_byte = stop;
uint8_t first_byte_neg_mask = 0, last_byte_neg_mask = 0;
std::tie(start_byte, stop_byte) = BitmapString::NormalizeToByteRangeWithPaddingMask(
is_bit_index, start, stop, &first_byte_neg_mask, &last_byte_neg_mask);

auto u_start = static_cast<uint32_t>(start_byte);
auto u_stop = static_cast<uint32_t>(stop_byte);

LatestSnapShot ss(storage_);
rocksdb::ReadOptions read_options;
read_options.snapshot = ss.GetSnapShot();
uint32_t start_index = u_start / kBitmapSegmentBytes;
uint32_t stop_index = u_stop / kBitmapSegmentBytes;
// Don't use multi get to prevent large range query, and take too much memory
uint32_t mask_cnt = 0;
for (uint32_t i = start_index; i <= stop_index; i++) {
rocksdb::PinnableSlice pin_value;
std::string sub_key =
Expand All @@ -252,16 +261,32 @@ rocksdb::Status Bitmap::BitCount(const Slice &user_key, int64_t start, int64_t s
if (!s.ok() && !s.IsNotFound()) return s;
// NotFound means all bits in this segment are 0.
if (s.IsNotFound()) continue;
// Counting bits in [start_in_segment, start_in_segment + length_in_segment)
size_t start_in_segment = 0;
if (i == start_index) start_in_segment = u_start % kBitmapSegmentBytes;
// Though `ExpandBitmapSegment` might generate a segment with logical size less than pin_value.size(),
// the `RawPopcount` will always return 0 on these padding bytes, so we don't need to worry about it.
auto length_in_segment = static_cast<int64_t>(pin_value.size());
if (i == stop_index) length_in_segment = u_stop % kBitmapSegmentBytes + 1;
*cnt += BitmapString::RawPopcount(reinterpret_cast<const uint8_t *>(pin_value.data()) + start_in_segment,
length_in_segment);
// Counting bits in [start_in_segment, stop_in_segment]
int64_t start_in_segment = 0; // start_index in 1024 bytes segment
auto readable_stop_in_segment = static_cast<int64_t>(pin_value.size() - 1); // stop_index in 1024 bytes segment
auto stop_in_segment = readable_stop_in_segment;
if (i == start_index) {
start_in_segment = u_start % kBitmapSegmentBytes;
if (is_bit_index && start_in_segment <= readable_stop_in_segment && first_byte_neg_mask != 0) {
uint8_t first_mask_byte =
kBitSwapTable[static_cast<uint8_t>(pin_value[start_in_segment])] & first_byte_neg_mask;
mask_cnt += BitmapString::RawPopcount(&first_mask_byte, 1);
}
}
if (i == stop_index) {
stop_in_segment = u_stop % kBitmapSegmentBytes;
if (is_bit_index && stop_in_segment <= readable_stop_in_segment && last_byte_neg_mask != 0) {
uint8_t last_mask_byte = kBitSwapTable[static_cast<uint8_t>(pin_value[stop_in_segment])] & last_byte_neg_mask;
mask_cnt += BitmapString::RawPopcount(&last_mask_byte, 1);
}
}
if (stop_in_segment >= start_in_segment && readable_stop_in_segment >= start_in_segment) {
int64_t bytes = 0;
bytes = std::min(stop_in_segment, readable_stop_in_segment) - start_in_segment + 1;
*cnt += BitmapString::RawPopcount(reinterpret_cast<const uint8_t *>(pin_value.data()) + start_in_segment, bytes);
}
}
*cnt -= mask_cnt;
return rocksdb::Status::OK();
}

Expand Down
2 changes: 1 addition & 1 deletion src/types/redis_bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Bitmap : public Database {
rocksdb::Status GetBit(const Slice &user_key, uint32_t offset, bool *bit);
rocksdb::Status GetString(const Slice &user_key, uint32_t max_btos_size, std::string *value);
rocksdb::Status SetBit(const Slice &user_key, uint32_t offset, bool new_bit, bool *old_bit);
rocksdb::Status BitCount(const Slice &user_key, int64_t start, int64_t stop, uint32_t *cnt);
rocksdb::Status BitCount(const Slice &user_key, int64_t start, int64_t stop, bool is_bit_index, uint32_t *cnt);
rocksdb::Status BitPos(const Slice &user_key, bool bit, int64_t start, int64_t stop, bool stop_given, int64_t *pos);
rocksdb::Status BitOp(BitOpFlags op_flag, const std::string &op_name, const Slice &user_key,
const std::vector<Slice> &op_keys, int64_t *len);
Expand Down
44 changes: 39 additions & 5 deletions src/types/redis_bitmap_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,38 @@ rocksdb::Status BitmapString::SetBit(const Slice &ns_key, std::string *raw_value
return storage_->Write(storage_->DefaultWriteOptions(), batch->GetWriteBatch());
}

rocksdb::Status BitmapString::BitCount(const std::string &raw_value, int64_t start, int64_t stop, uint32_t *cnt) {
rocksdb::Status BitmapString::BitCount(const std::string &raw_value, int64_t start, int64_t stop, bool is_bit_index,
uint32_t *cnt) {
*cnt = 0;
std::string_view string_value = std::string_view{raw_value}.substr(Metadata::GetOffsetAfterExpire(raw_value[0]));
auto strlen = static_cast<int64_t>(string_value.size());
std::tie(start, stop) = NormalizeRange(start, stop, strlen);
int64_t totlen = strlen;
if (is_bit_index) totlen <<= 3;
std::tie(start, stop) = NormalizeRange(start, stop, totlen);
// Always return 0 if start is greater than stop after normalization.
if (start > stop) return rocksdb::Status::OK();

/* By default:
* start means start byte in bitmap, stop means stop byte in bitmap.
* When is_bit_index is true, start and stop means start bit and stop bit.
* So it should be normalized bit range to byte range. */
int64_t start_byte = start;
int64_t stop_byte = stop;
uint8_t first_byte_neg_mask = 0, last_byte_neg_mask = 0;
std::tie(start_byte, stop_byte) =
NormalizeToByteRangeWithPaddingMask(is_bit_index, start, stop, &first_byte_neg_mask, &last_byte_neg_mask);

/* Precondition: end >= 0 && end < strlen, so the only condition where
* zero can be returned is: start > stop. */
if (start <= stop) {
int64_t bytes = stop - start + 1;
*cnt = RawPopcount(reinterpret_cast<const uint8_t *>(string_value.data()) + start, bytes);
int64_t bytes = stop_byte - start_byte + 1;
*cnt = RawPopcount(reinterpret_cast<const uint8_t *>(string_value.data()) + start_byte, bytes);
if (first_byte_neg_mask != 0 || last_byte_neg_mask != 0) {
uint8_t firstlast[2] = {0, 0};
if (first_byte_neg_mask != 0) firstlast[0] = string_value[start_byte] & first_byte_neg_mask;
if (last_byte_neg_mask != 0) firstlast[1] = string_value[stop_byte] & last_byte_neg_mask;
*cnt -= RawPopcount(firstlast, 2);
}

return rocksdb::Status::OK();
}

Expand Down Expand Up @@ -202,6 +222,20 @@ std::pair<int64_t, int64_t> BitmapString::NormalizeRange(int64_t origin_start, i
return {origin_start, origin_end};
}

std::pair<int64_t, int64_t> BitmapString::NormalizeToByteRangeWithPaddingMask(bool is_bit, int64_t origin_start,
int64_t origin_end,
uint8_t *first_byte_neg_mask,
uint8_t *last_byte_neg_mask) {
DCHECK(origin_start <= origin_end);
if (is_bit) {
*first_byte_neg_mask = ~((1 << (8 - (origin_start & 7))) - 1) & 0xFF;
*last_byte_neg_mask = (1 << (7 - (origin_end & 7))) - 1;
origin_start >>= 3;
origin_end >>= 3;
}
return {origin_start, origin_end};
}

rocksdb::Status BitmapString::Bitfield(const Slice &ns_key, std::string *raw_value,
const std::vector<BitfieldOperation> &ops,
std::vector<std::optional<BitfieldValue>> *rets) {
Expand Down
18 changes: 17 additions & 1 deletion src/types/redis_bitmap_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class BitmapString : public Database {
BitmapString(engine::Storage *storage, const std::string &ns) : Database(storage, ns) {}
static rocksdb::Status GetBit(const std::string &raw_value, uint32_t offset, bool *bit);
rocksdb::Status SetBit(const Slice &ns_key, std::string *raw_value, uint32_t offset, bool new_bit, bool *old_bit);
static rocksdb::Status BitCount(const std::string &raw_value, int64_t start, int64_t stop, uint32_t *cnt);
static rocksdb::Status BitCount(const std::string &raw_value, int64_t start, int64_t stop, bool is_bit_index,
uint32_t *cnt);
static rocksdb::Status BitPos(const std::string &raw_value, bool bit, int64_t start, int64_t stop, bool stop_given,
int64_t *pos);
rocksdb::Status Bitfield(const Slice &ns_key, std::string *raw_value, const std::vector<BitfieldOperation> &ops,
Expand All @@ -56,6 +57,21 @@ class BitmapString : public Database {
// Return:
// The normalized [start, end] range.
static std::pair<int64_t, int64_t> NormalizeRange(int64_t origin_start, int64_t origin_end, int64_t length);

// NormalizeToByteRangeWithPaddingMask converts input index range to a normalized byte index range.
// If the is_bit_index is false, it does nothing.
// If the index_it_bit is true, it convert the bit index range to a normalized byte index range, and
// pad the first byte negative mask and last byte negative mask.
// Such as, If the starting bit is the third bit of the first byte like '00010000', the first_byte_neg_mask will be
// padded to '11100000', if the end bit is in the fifth bit of the last byte like '00000100', the last_byte_neg_mask
// will be padded to '00000011'.
//
// Return:
// The normalized [start_byte, stop_byte]
static std::pair<int64_t, int64_t> NormalizeToByteRangeWithPaddingMask(bool is_bit_index, int64_t origin_start,
int64_t origin_end,
uint8_t *first_byte_neg_mask,
uint8_t *last_byte_neg_mask);
};

} // namespace redis
Loading

0 comments on commit e160215

Please sign in to comment.