From b0d9fa34ee0eb375acc4af131b854be5ab57783e Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 21 May 2021 13:48:16 +0800 Subject: [PATCH 1/3] init Signed-off-by: crazycs --- digester.go | 38 +++++++++++++++++++++++++++++++------- digester_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/digester.go b/digester.go index 40a4b763d..89bb4dec5 100644 --- a/digester.go +++ b/digester.go @@ -16,6 +16,7 @@ package parser import ( "bytes" "crypto/sha256" + "encoding/hex" "fmt" hash2 "hash" "reflect" @@ -27,6 +28,29 @@ import ( "github.com/pingcap/parser/charset" ) +type Digest struct { + b []byte + str string +} + +// NewDigest returns a new digest. +func NewDigest(b []byte) *Digest { + return &Digest{ + b: b, + str: hex.EncodeToString(b), + } +} + +// String returns the digest hex string. +func (d *Digest) String() string { + return d.str +} + +// Bytes returns the digest byte slice. +func (d *Digest) Bytes() []byte { + return d.b +} + // DigestHash generates the digest of statements. // it will generate a hash on normalized form of statement text // which removes general property of a statement but keeps specific property. @@ -68,7 +92,7 @@ func Normalize(sql string) (result string) { } // NormalizeDigest combines Normalize and DigestNormalized into one method. -func NormalizeDigest(sql string) (normalized, digest string) { +func NormalizeDigest(sql string) (normalized string, digest *Digest) { d := digesterPool.Get().(*sqlDigester) normalized, digest = d.doNormalizeDigest(sql) digesterPool.Put(d) @@ -92,7 +116,7 @@ type sqlDigester struct { tokens tokenDeque } -func (d *sqlDigester) doDigestNormalized(normalized string) (result string) { +func (d *sqlDigester) doDigestNormalized(normalized string) (digest *Digest) { hdr := *(*reflect.StringHeader)(unsafe.Pointer(&normalized)) b := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ Data: hdr.Data, @@ -100,16 +124,16 @@ func (d *sqlDigester) doDigestNormalized(normalized string) (result string) { Cap: hdr.Len, })) d.hasher.Write(b) - result = fmt.Sprintf("%x", d.hasher.Sum(nil)) + digest = NewDigest(d.hasher.Sum(nil)) d.hasher.Reset() return } -func (d *sqlDigester) doDigest(sql string) (result string) { +func (d *sqlDigester) doDigest(sql string) (digest *Digest) { d.normalize(sql) d.hasher.Write(d.buffer.Bytes()) d.buffer.Reset() - result = fmt.Sprintf("%x", d.hasher.Sum(nil)) + digest = NewDigest(d.hasher.Sum(nil)) d.hasher.Reset() return } @@ -121,12 +145,12 @@ func (d *sqlDigester) doNormalize(sql string) (result string) { return } -func (d *sqlDigester) doNormalizeDigest(sql string) (normalized, digest string) { +func (d *sqlDigester) doNormalizeDigest(sql string) (normalized string, digest *Digest) { d.normalize(sql) normalized = d.buffer.String() d.hasher.Write(d.buffer.Bytes()) d.buffer.Reset() - digest = fmt.Sprintf("%x", d.hasher.Sum(nil)) + digest = NewDigest(d.hasher.Sum(nil)) d.hasher.Reset() return } diff --git a/digester_test.go b/digester_test.go index 17a01a770..1c82d1b07 100644 --- a/digester_test.go +++ b/digester_test.go @@ -14,6 +14,11 @@ package parser_test import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "testing" + . "github.com/pingcap/check" "github.com/pingcap/parser" ) @@ -130,3 +135,32 @@ func (s *testSQLDigestSuite) TestDigestHashNotEqForSimpleSQL(c *C) { } } } + +func (s *testSQLDigestSuite) TestGenDigest(c *C) { + hash := genRandDigest("abc") + digest := parser.NewDigest(hash) + c.Assert(digest.String(), Equals, fmt.Sprintf("%x", hash)) + c.Assert(digest.Bytes(), DeepEquals, hash) +} + +func genRandDigest(str string) []byte { + hasher := sha256.New() + hasher.Write([]byte(str)) + return hasher.Sum(nil) +} + +func BenchmarkDigestHexEncode(b *testing.B) { + digest1 := genRandDigest("abc") + b.ResetTimer() + for i := 0; i < b.N; i++ { + hex.EncodeToString(digest1) + } +} + +func BenchmarkDigestSprintf(b *testing.B) { + digest1 := genRandDigest("abc") + b.ResetTimer() + for i := 0; i < b.N; i++ { + fmt.Sprintf("%x", digest1) + } +} From aac8c026347f3efcd86917f74ab728cc47d90a8a Mon Sep 17 00:00:00 2001 From: crazycs Date: Fri, 21 May 2021 13:58:36 +0800 Subject: [PATCH 2/3] fix test Signed-off-by: crazycs --- digester.go | 9 ++++----- digester_test.go | 14 +++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/digester.go b/digester.go index 89bb4dec5..d682e539b 100644 --- a/digester.go +++ b/digester.go @@ -17,7 +17,6 @@ import ( "bytes" "crypto/sha256" "encoding/hex" - "fmt" hash2 "hash" "reflect" "strings" @@ -58,9 +57,9 @@ func (d *Digest) Bytes() []byte { // for example: both DigestHash('select 1') and DigestHash('select 2') => e1c71d1661ae46e09b7aaec1c390957f0d6260410df4e4bc71b9c8d681021471 // // Deprecated: It is logically consistent with NormalizeDigest. -func DigestHash(sql string) (result string) { +func DigestHash(sql string) (digest *Digest) { d := digesterPool.Get().(*sqlDigester) - result = d.doDigest(sql) + digest = d.doDigest(sql) digesterPool.Put(d) return } @@ -72,9 +71,9 @@ func DigestHash(sql string) (result string) { // for example: DigestNormalized('select ?') // DigestNormalized should be called with a normalized SQL string (like 'select ?') generated by function Normalize. // do not call with SQL which is not normalized, DigestNormalized('select 1') and DigestNormalized('select 2') is not the same -func DigestNormalized(normalized string) (result string) { +func DigestNormalized(normalized string) (digest *Digest) { d := digesterPool.Get().(*sqlDigester) - result = d.doDigestNormalized(normalized) + digest = d.doDigestNormalized(normalized) digesterPool.Put(d) return } diff --git a/digester_test.go b/digester_test.go index 1c82d1b07..64607f444 100644 --- a/digester_test.go +++ b/digester_test.go @@ -76,7 +76,7 @@ func (s *testSQLDigestSuite) TestNormalize(c *C) { normalized2, digest2 := parser.NormalizeDigest(test.input) c.Assert(normalized2, Equals, normalized) - c.Assert(digest2, Equals, digest, Commentf("%+v", test)) + c.Assert(digest2.String(), Equals, digest.String(), Commentf("%+v", test)) } } @@ -91,12 +91,12 @@ func (s *testSQLDigestSuite) TestNormalizeDigest(c *C) { for _, test := range tests { normalized, digest := parser.NormalizeDigest(test.sql) c.Assert(normalized, Equals, test.normalized) - c.Assert(digest, Equals, test.digest) + c.Assert(digest.String(), Equals, test.digest) normalized = parser.Normalize(test.sql) digest = parser.DigestNormalized(normalized) c.Assert(normalized, Equals, test.normalized) - c.Assert(digest, Equals, test.digest) + c.Assert(digest.String(), Equals, test.digest) } } @@ -111,10 +111,10 @@ func (s *testSQLDigestSuite) TestDigestHashEqForSimpleSQL(c *C) { for _, sql := range sqlGroup { dig := parser.DigestHash(sql) if d == "" { - d = dig + d = dig.String() continue } - c.Assert(d, Equals, dig) + c.Assert(d, Equals, dig.String()) } } } @@ -128,10 +128,10 @@ func (s *testSQLDigestSuite) TestDigestHashNotEqForSimpleSQL(c *C) { for _, sql := range sqlGroup { dig := parser.DigestHash(sql) if d == "" { - d = dig + d = dig.String() continue } - c.Assert(d, Not(Equals), dig) + c.Assert(d, Not(Equals), dig.String()) } } } From 1f11bbd13fa5d5a3e56d8761d7f3817bb29c8293 Mon Sep 17 00:00:00 2001 From: crazycs Date: Mon, 24 May 2021 13:27:49 +0800 Subject: [PATCH 3/3] add test Signed-off-by: crazycs --- digester_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/digester_test.go b/digester_test.go index 64607f444..1deb1a54f 100644 --- a/digester_test.go +++ b/digester_test.go @@ -141,6 +141,9 @@ func (s *testSQLDigestSuite) TestGenDigest(c *C) { digest := parser.NewDigest(hash) c.Assert(digest.String(), Equals, fmt.Sprintf("%x", hash)) c.Assert(digest.Bytes(), DeepEquals, hash) + digest = parser.NewDigest(nil) + c.Assert(digest.String(), Equals, "") + c.Assert(digest.Bytes(), IsNil) } func genRandDigest(str string) []byte {