-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
planner: add canonical hasher to take in primitive type directly for …
- Loading branch information
Showing
3 changed files
with
330 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
// Copyright 2024 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package cascades | ||
|
||
import ( | ||
"math" | ||
) | ||
|
||
const ( | ||
// both offset and prime are used to compute the fnv-1a's | ||
// hash value which is more unique efficient than fnv-1. | ||
// | ||
// offset64 is ported from fnv.go from go library. | ||
offset64 = 14695981039346656037 | ||
|
||
// prime64 is ported from fnv.go from go library. | ||
prime64 = 1099511628211 | ||
) | ||
|
||
// Hasher is the interface for computing hash values of different types. | ||
type Hasher interface { | ||
HashBool(val bool) | ||
HashInt(val int) | ||
HashInt64(val int64) | ||
HashUint64(val uint64) | ||
HashFloat64(val float64) | ||
HashRune(val rune) | ||
HashString(val string) | ||
HashByte(val byte) | ||
HashBytes(val []byte) | ||
Reset() | ||
Sum64() uint64 | ||
} | ||
|
||
// Hash64a is the type for the hash value. | ||
type Hash64a uint64 | ||
|
||
// Hasher is a helper struct that's used for computing **fnv-1a** hash values and tell | ||
// the equivalence on expression/operators. To use, first call the init method, then | ||
// a series of hash methods. The final value is stored in the hash64a field. | ||
type hasher struct { | ||
// hash stores the hash value as it is incrementally computed. | ||
hash64a Hash64a | ||
} | ||
|
||
// NewHashEqualer creates a new HashEqualer. | ||
func NewHashEqualer() Hasher { | ||
return &hasher{ | ||
hash64a: offset64, | ||
} | ||
} | ||
|
||
// Reset resets the Hasher to its initial state, reusing the internal bytes slice. | ||
func (h *hasher) Reset() { | ||
h.hash64a = offset64 | ||
} | ||
|
||
func (h *hasher) Sum64() uint64 { | ||
return uint64(h.hash64a) | ||
} | ||
|
||
// ------------------------------ Hash functions ---------------------------------------- | ||
// Previously, expressions' hashcode are computed by encoding meta layer by layer from the | ||
// bottom up. This is not efficient and oom risky because each expression has cached numerous | ||
// hash bytes on their own. | ||
// | ||
// The new hash function is based on the fnv-1a hash algorithm, outputting the uint64 only. | ||
// To avoid the OOM during the hash computation, we use a shared bytes slice to take in primitive | ||
// types from targeted expressions/operators. The bytes slice is reused and reset after each | ||
// usage of them. | ||
// | ||
// The standardized fnv-1a lib only takes in bytes slice as input, so we need to convert every | ||
// primitive type to bytes slice inside Hash function implementation of every expression/operators | ||
// by allocating some temporary slice. This is undesirable, and we just made the Hasher to take in | ||
// primitive type directly. | ||
// --------------------------------------------------------------------------------------- | ||
|
||
// HashBool hashes a Boolean value. | ||
func (h *hasher) HashBool(val bool) { | ||
i := 0 | ||
if val { | ||
i = 1 | ||
} | ||
h.hash64a ^= Hash64a(i) | ||
h.hash64a *= prime64 | ||
} | ||
|
||
// HashInt hashes an integer value. | ||
func (h *hasher) HashInt(val int) { | ||
h.hash64a ^= Hash64a(val) | ||
h.hash64a *= prime64 | ||
} | ||
|
||
// HashInt64 hashes an int64 value. | ||
func (h *hasher) HashInt64(val int64) { | ||
h.hash64a ^= Hash64a(val) | ||
h.hash64a *= prime64 | ||
} | ||
|
||
// HashUint64 hashes a uint64 value. | ||
func (h *hasher) HashUint64(val uint64) { | ||
h.hash64a ^= Hash64a(val) | ||
h.hash64a *= prime64 | ||
} | ||
|
||
// HashFloat64 hashes a float64 value. | ||
func (h *hasher) HashFloat64(val float64) { | ||
h.hash64a ^= Hash64a(math.Float64bits(val)) | ||
h.hash64a *= prime64 | ||
} | ||
|
||
// HashRune hashes a rune value. | ||
func (h *hasher) HashRune(val rune) { | ||
h.hash64a ^= Hash64a(val) | ||
h.hash64a *= prime64 | ||
} | ||
|
||
// HashString hashes a string value. | ||
// eg: "我是谁" is with 3 rune inside, each rune of them takes up 3-4 bytes. | ||
func (h *hasher) HashString(val string) { | ||
h.HashInt(len(val)) | ||
for _, c := range val { | ||
h.HashRune(c) | ||
} | ||
} | ||
|
||
// HashByte hashes a byte value. | ||
// a byte can be treated as a simple rune as well. | ||
func (h *hasher) HashByte(val byte) { | ||
h.HashRune(rune(val)) | ||
} | ||
|
||
// HashBytes hashes a byte slice value. | ||
func (h *hasher) HashBytes(val []byte) { | ||
h.HashInt(len(val)) | ||
for _, c := range val { | ||
h.HashByte(c) | ||
} | ||
} | ||
|
||
// ------------------------------ Object Implementation ------------------------------------- | ||
// For primitive type, we can directly hash them and compare them. Based on the primitive | ||
// interface call listed here, we can easily implement the hash and equal functions for other | ||
// composed and complex user defined structure or types. | ||
// | ||
// Say we have a structure like this: | ||
// type MyStruct struct { | ||
// a int | ||
// b string | ||
// c OtherStruct | ||
// d Pointer | ||
// } | ||
// so we can implement the hash and equal functions like this: | ||
// func (val *MyStruct) Hash64(h Hasher) { | ||
// h.HashInt(val.a) | ||
// h.HashString(val.b) | ||
// // for c here, it calls for the hash function of OtherStruct implementor. | ||
// c.Hash64(h) | ||
// // for pointer, how it could be hashed is up to the implementor. | ||
// h.HashUint64(uint64(val.d)) | ||
// } | ||
// | ||
// func (val1 *MyStruct) Equal(val1 *MyStruct) bool { | ||
// return val1.a == val2.a && val1.b == val2.b && val1.c.Equal(val2.c) && val1.d == val2.d | ||
// } | ||
// ------------------------------------------------------------------------------------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
// Copyright 2024 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package cascades | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
type TmpStr struct { | ||
str1 string | ||
str2 string | ||
} | ||
|
||
func (ts *TmpStr) Hash64(h Hasher) { | ||
h.HashString(ts.str1) | ||
h.HashString(ts.str2) | ||
} | ||
|
||
func TestStringLen(t *testing.T) { | ||
hasher1 := NewHashEqualer() | ||
hasher2 := NewHashEqualer() | ||
a := TmpStr{ | ||
str1: "abc", | ||
str2: "def", | ||
} | ||
b := TmpStr{ | ||
str1: "abcdef", | ||
str2: "", | ||
} | ||
a.Hash64(hasher1) | ||
b.Hash64(hasher2) | ||
require.NotEqual(t, hasher1.Sum64(), hasher2.Sum64()) | ||
} | ||
|
||
type SX interface { | ||
Hash64(h Hasher) | ||
Equal(SX) bool | ||
} | ||
|
||
type SA struct { | ||
a int | ||
b string | ||
} | ||
|
||
func (sa *SA) Hash64(h Hasher) { | ||
h.HashInt(sa.a) | ||
h.HashString(sa.b) | ||
} | ||
|
||
func (sa *SA) Equal(sx SX) bool { | ||
if sa2, ok := sx.(*SA); ok { | ||
return sa.a == sa2.a && sa.b == sa2.b | ||
} | ||
return false | ||
} | ||
|
||
type SB struct { | ||
a int | ||
b string | ||
} | ||
|
||
func (sb *SB) Hash64(h Hasher) { | ||
h.HashInt(sb.a) | ||
h.HashString(sb.b) | ||
} | ||
|
||
func (sb *SB) Equal(sx SX) bool { | ||
if sb2, ok := sx.(*SB); ok { | ||
return sb.a == sb2.a && sb.b == sb2.b | ||
} | ||
return false | ||
} | ||
|
||
func TestStructType(t *testing.T) { | ||
hasher1 := NewHashEqualer() | ||
hasher2 := NewHashEqualer() | ||
a := SA{ | ||
a: 1, | ||
b: "abc", | ||
} | ||
b := SB{ | ||
a: 1, | ||
b: "abc", | ||
} | ||
a.Hash64(hasher1) | ||
b.Hash64(hasher2) | ||
// As you see from the above, the two structs are different types, but they have the same fields. | ||
// For the Hash64 function, it will hash the fields of the struct, so the hash result should be the same. | ||
// From theoretical point of view, the hash result should NOT be the same because of different types. | ||
// | ||
// While the Equal function is used to compare the two structs, so the result should be false. We don't | ||
// have to hash the golang struct type, because the dynamic runtime type pointer from reflecting is not | ||
// that elegant, we resort to Equal function to compare the two structs completely once two obj has the | ||
// same hash. | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
require.Equal(t, a.Equal(&b), false) | ||
} | ||
|
||
func TestHash64a(t *testing.T) { | ||
hasher1 := NewHashEqualer() | ||
hasher2 := NewHashEqualer() | ||
hasher1.HashBool(true) | ||
hasher2.HashBool(true) | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
hasher1.HashBool(false) | ||
hasher2.HashBool(false) | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
hasher1.HashInt(199) | ||
hasher2.HashInt(199) | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
hasher1.HashInt64(13534523462346) | ||
hasher2.HashInt64(13534523462346) | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
hasher1.HashUint64(13534523462346) | ||
hasher2.HashUint64(13534523462346) | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
hasher1.HashString("hello") | ||
hasher2.HashString("hello") | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
hasher1.HashBytes([]byte("world")) | ||
hasher2.HashBytes([]byte("world")) | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
hasher1.HashRune('我') | ||
hasher1.HashRune('是') | ||
hasher1.HashRune('谁') | ||
hasher2.HashRune('我') | ||
hasher2.HashRune('是') | ||
hasher2.HashRune('谁') | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
hasher1.Reset() | ||
hasher2.Reset() | ||
hasher1.HashString("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") | ||
hasher2.HashString("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") | ||
require.Equal(t, hasher1.Sum64(), hasher2.Sum64()) | ||
} |