Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hyperloglog): add support of the Hyperloglog data structure #2142

Merged
merged 38 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
3e92895
Support Hyperloglog
Mar 7, 2024
e8cdf69
add origin copyright
Mar 7, 2024
d4b174d
code tuning
Mar 8, 2024
c780c84
code format
Mar 8, 2024
5f86c40
HLL: modify subkeys storage format with dense encoding (#2)
tutububug Mar 22, 2024
cdc375b
fix error of lint and code check (#3)
tutububug Mar 28, 2024
145b703
remove debug code (#5)
tutububug Apr 3, 2024
ae5df70
trigger GitHub actions
Apr 11, 2024
85d286c
Merge branch 'unstable' into hyperloglog-dev-github
tutububug Apr 12, 2024
efa8984
fix check code (#7)
tutububug Apr 12, 2024
57cebd5
fix
Apr 15, 2024
62a943f
update
Apr 15, 2024
48eef8d
Merge branch 'unstable' into hyperloglog-dev-github
tutububug Apr 15, 2024
993dfb7
move copied functions to new file (#10)
tutububug Apr 19, 2024
6df6a69
Merge branch 'unstable' into hyperloglog-dev-github
git-hulk Apr 19, 2024
93f3bc4
correct register merge condition (#12)
tutububug Apr 20, 2024
78a57fc
fix (#13)
tutububug Apr 22, 2024
2cd439c
Merge branch 'unstable' into hyperloglog-dev-github
tutububug Apr 22, 2024
bbdbcb0
fix (#14)
tutububug Apr 23, 2024
559ebc9
fix (#15)
tutububug Apr 24, 2024
bc32fa0
Merge branch 'unstable' into hyperloglog-dev-github
tutububug Apr 24, 2024
98d77d3
Merge branch 'unstable' into hyperloglog-dev-github
tutububug Apr 28, 2024
08d07db
fix code check
Apr 28, 2024
c6d99e2
Merge branch 'unstable' into hyperloglog-dev-github
git-hulk May 10, 2024
64a7a44
Merge branch 'unstable' into hyperloglog-dev-github
tutububug May 10, 2024
7cf88ef
minor updates
mapleFU Jul 30, 2024
d3b2978
remove the code for merge
mapleFU Jul 30, 2024
38e99f0
resume code for bitmap
mapleFU Jul 30, 2024
7e72ca8
Keep cleanup the logic
mapleFU Jul 30, 2024
1af314e
Merge branch 'unstable' into hyperloglog-dev-github
mapleFU Jul 30, 2024
a8e84fd
basic skeleton finished
mapleFU Jul 30, 2024
7f6653c
Fix testing
mapleFU Jul 30, 2024
008ec3f
Update vendor lib
mapleFU Jul 30, 2024
afdb7a7
Trying to fix lint
mapleFU Jul 30, 2024
d8cc9a1
remove bad conflict resolve
mapleFU Jul 30, 2024
22923f9
update comments
mapleFU Jul 30, 2024
e4f3e77
Change HLL to 11
mapleFU Jul 30, 2024
b6fc4a1
trying to fix lint
mapleFU Jul 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions src/commands/cmd_hll.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

#include <types/redis_hyperloglog.h>

#include <algorithm>

#include "commander.h"
#include "commands/command_parser.h"
#include "commands/error_constants.h"
#include "error_constants.h"
#include "parse_util.h"
#include "server/redis_reply.h"
#include "server/server.h"
#include "storage/redis_metadata.h"

namespace redis {

/// PFADD key [element [element ...]]
/// Complexity: O(1) for each element added.
class CommandPfAdd final : public Commander {
public:
Status Execute(Server *srv, Connection *conn, std::string *output) override {
redis::HyperLogLog hll(srv->storage, conn->GetNamespace());
std::vector<uint64_t> hashes(args_.size() - 1);
for (size_t i = 1; i < args_.size(); i++) {
hashes[i - 1] = redis::HyperLogLog::HllHash(args_[i]);
}
uint64_t ret{};
auto s = hll.Add(args_[0], hashes, &ret);
if (!s.ok() && !s.IsNotFound()) {
return {Status::RedisExecErr, s.ToString()};
}
*output = redis::Integer(ret);
return Status::OK();
}
};

/// PFCOUNT key [key ...]
/// Complexity: O(1) with a very small average constant time when called with a single key.
/// O(N) with N being the number of keys, and much bigger constant times,
/// when called with multiple keys.
///
/// TODO(mwish): Currently we don't supports merge, so only one key is supported.
class CommandPfCount final : public Commander {
Status Execute(Server *srv, Connection *conn, std::string *output) override {
redis::HyperLogLog hll(srv->storage, conn->GetNamespace());
uint64_t ret{};
auto s = hll.Count(args_[0], &ret);
if (!s.ok() && !s.IsNotFound()) {
return {Status::RedisExecErr, s.ToString()};
}
if (s.IsNotFound()) {
ret = 0;
}
*output = redis::Integer(ret);
return Status::OK();
}
};

REDIS_REGISTER_COMMANDS(MakeCmdAttr<CommandPfAdd>("pfadd", -2, "write", 1, 1, 1),
MakeCmdAttr<CommandPfCount>("pfcount", 2, "read-only", 1, 1, 1), );
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about PFMERGE?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be add in next patch

PragmaTwice marked this conversation as resolved.
Show resolved Hide resolved

} // namespace redis
25 changes: 24 additions & 1 deletion src/storage/redis_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ bool Metadata::ExpireAt(uint64_t expired_ts) const {
bool Metadata::IsSingleKVType() const { return Type() == kRedisString || Type() == kRedisJson; }

bool Metadata::IsEmptyableType() const {
return IsSingleKVType() || Type() == kRedisStream || Type() == kRedisBloomFilter;
return IsSingleKVType() || Type() == kRedisStream || Type() == kRedisBloomFilter || Type() == kRedisHyperLogLog;
}

bool Metadata::Expired() const { return ExpireAt(util::GetTimeStampMS()); }
Expand Down Expand Up @@ -472,3 +472,26 @@ rocksdb::Status JsonMetadata::Decode(Slice *input) {

return rocksdb::Status::OK();
}

void HyperLogLogMetadata::Encode(std::string *dst) const {
Metadata::Encode(dst);
PutFixed8(dst, static_cast<uint8_t>(this->encode_type));
}

rocksdb::Status HyperLogLogMetadata::Decode(Slice *input) {
if (auto s = Metadata::Decode(input); !s.ok()) {
return s;
}

uint8_t encoded_type = 0;
if (!GetFixed8(input, &encoded_type)) {
return rocksdb::Status::InvalidArgument(kErrMetadataTooShort);
}
// Check validity of encode type
if (encoded_type > 0) {
return rocksdb::Status::InvalidArgument(fmt::format("Invalid encode type {}", encoded_type));
}
this->encode_type = static_cast<EncodeType>(encoded_type);

return rocksdb::Status::OK();
}
26 changes: 24 additions & 2 deletions src/storage/redis_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ enum RedisType : uint8_t {
kRedisStream = 8,
kRedisBloomFilter = 9,
kRedisJson = 10,
kRedisHyperLogLog = 11,
};

struct RedisTypes {
Expand Down Expand Up @@ -90,8 +91,9 @@ enum RedisCommand {
kRedisCmdLMove,
};

const std::vector<std::string> RedisTypeNames = {"none", "string", "hash", "list", "set", "zset",
"bitmap", "sortedint", "stream", "MBbloom--", "ReJSON-RL"};
const std::vector<std::string> RedisTypeNames = {"none", "string", "hash", "list",
"set", "zset", "bitmap", "sortedint",
"stream", "MBbloom--", "ReJSON-RL", "hyperloglog"};

constexpr const char *kErrMsgWrongType = "WRONGTYPE Operation against a key holding the wrong kind of value";
constexpr const char *kErrMsgKeyExpired = "the key was expired";
Expand Down Expand Up @@ -313,3 +315,23 @@ class JsonMetadata : public Metadata {
void Encode(std::string *dst) const override;
rocksdb::Status Decode(Slice *input) override;
};

class HyperLogLogMetadata : public Metadata {
public:
enum class EncodeType : uint8_t {
// Redis-style dense encoding implement as bitmap like sub keys to
// store registers by segment in data column family.
// The registers are stored in 6-bit format and each segment contains
// 768 registers.
DENSE = 0,
// TODO(mwish): sparse encoding
// SPARSE = 1,
};

explicit HyperLogLogMetadata(bool generate_version = true) : Metadata(kRedisHyperLogLog, generate_version) {}

void Encode(std::string *dst) const override;
rocksdb::Status Decode(Slice *input) override;

EncodeType encode_type = EncodeType::DENSE;
};
4 changes: 4 additions & 0 deletions src/storage/storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@
#include "observer_or_unique.h"
#include "status.h"

#if defined(__sparc__) || defined(__arm__)
#define USE_ALIGNED_ACCESS
#endif

enum class StorageEngineType : uint16_t {
RocksDB,
Speedb,
Expand Down
Loading
Loading