diff --git a/go.mod b/go.mod index 2bd4c7c38..ec4dc3ccc 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/fsnotify/fsnotify v1.4.9 // indirect github.com/hinshun/vt10x v0.0.0-20180809195222-d55458df857c github.com/hokaccha/go-prettyjson v0.0.0-20190818114111-108c894c2c0e - github.com/honeycombio/libhoney-go v1.15.2 + github.com/honeycombio/libhoney-go v1.15.3 github.com/kr/pty v1.1.8 // indirect github.com/kyokomi/emoji/v2 v2.2.8 github.com/mitchellh/go-homedir v1.1.0 diff --git a/go.sum b/go.sum index f255b68d7..1075327ae 100644 --- a/go.sum +++ b/go.sum @@ -16,8 +16,8 @@ github.com/AlecAivazis/survey/v2 v2.2.12/go.mod h1:6d4saEvBsfSHXeN1a5OA5m2+HJ2Lu github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ= -github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= +github.com/DataDog/zstd v1.4.8 h1:Rpmta4xZ/MgZnriKNd24iZMhGpP5dvUcs/uqfBapKZY= +github.com/DataDog/zstd v1.4.8/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/Netflix/go-expect v0.0.0-20180615182759-c93bf25de8e8/go.mod h1:oX5x61PbNXchhh0oikYAH+4Pcfw5LKv21+Jnpr6r6Pc= github.com/Netflix/go-expect v0.0.0-20200312175327-da48e75238e2 h1:y2avNRjCeJT8b7svzjhKZjsvW5Jki/iAqTBEPJURaUg= github.com/Netflix/go-expect v0.0.0-20200312175327-da48e75238e2/go.mod h1:oX5x61PbNXchhh0oikYAH+4Pcfw5LKv21+Jnpr6r6Pc= @@ -86,6 +86,8 @@ github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls= github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -128,8 +130,8 @@ github.com/hinshun/vt10x v0.0.0-20180809195222-d55458df857c h1:kp3AxgXgDOmIJFR7b github.com/hinshun/vt10x v0.0.0-20180809195222-d55458df857c/go.mod h1:DqJ97dSdRW1W22yXSB90986pcOyQ7r45iio1KN2ez1A= github.com/hokaccha/go-prettyjson v0.0.0-20190818114111-108c894c2c0e h1:0aewS5NTyxftZHSnFaJmWE5oCCrj4DyEXkAiMa1iZJM= github.com/hokaccha/go-prettyjson v0.0.0-20190818114111-108c894c2c0e/go.mod h1:pFlLw2CfqZiIBOx6BuCeRLCrfxBJipTY0nIOF/VbGcI= -github.com/honeycombio/libhoney-go v1.15.2 h1:5NGcjOxZZma13dmzNcl3OtGbF1hECA0XHJNHEb2t2ck= -github.com/honeycombio/libhoney-go v1.15.2/go.mod h1:JzhRPYgoBCd0rZvudrqmej4Ntx0w7AT3wAJpf5+t1WA= +github.com/honeycombio/libhoney-go v1.15.3 h1:/i2u+MsfAKX8arTKhflprtjqXs+qaODRKmAJUk51qQw= +github.com/honeycombio/libhoney-go v1.15.3/go.mod h1:SMz4WFtnFt0WxkBfJRLju/3UueS9V6pePG9KoI5c7rg= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -142,8 +144,8 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNU github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.11.4 h1:kz40R/YWls3iqT9zX9AHN3WoVsrAWVyui5sxuLqiXqU= -github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.12.2 h1:2KCfW3I9M7nSc5wOqXAlW2v2U6v+w6cbjvbfp+OykW8= +github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= diff --git a/vendor/github.com/klauspost/compress/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/.gitignore rename to vendor/github.com/golang/snappy/.gitignore diff --git a/vendor/github.com/klauspost/compress/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS similarity index 90% rename from vendor/github.com/klauspost/compress/snappy/AUTHORS rename to vendor/github.com/golang/snappy/AUTHORS index bcfa19520..203e84eba 100644 --- a/vendor/github.com/klauspost/compress/snappy/AUTHORS +++ b/vendor/github.com/golang/snappy/AUTHORS @@ -8,8 +8,10 @@ # Please keep the list sorted. +Amazon.com, Inc Damian Gryski Google Inc. Jan Mercl <0xjnml@gmail.com> +Klaus Post Rodolfo Carvalho Sebastien Binet diff --git a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS similarity index 95% rename from vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS rename to vendor/github.com/golang/snappy/CONTRIBUTORS index 931ae3160..d9914732b 100644 --- a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS +++ b/vendor/github.com/golang/snappy/CONTRIBUTORS @@ -28,7 +28,9 @@ Damian Gryski Jan Mercl <0xjnml@gmail.com> +Jonathan Swinney Kai Backman +Klaus Post Marc-Antoine Ruel Nigel Tao Rob Pike diff --git a/vendor/github.com/klauspost/compress/snappy/LICENSE b/vendor/github.com/golang/snappy/LICENSE similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/LICENSE rename to vendor/github.com/golang/snappy/LICENSE diff --git a/vendor/github.com/klauspost/compress/snappy/README b/vendor/github.com/golang/snappy/README similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/README rename to vendor/github.com/golang/snappy/README diff --git a/vendor/github.com/klauspost/compress/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go similarity index 97% rename from vendor/github.com/klauspost/compress/snappy/decode.go rename to vendor/github.com/golang/snappy/decode.go index 72efb0353..f1e04b172 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode.go +++ b/vendor/github.com/golang/snappy/decode.go @@ -52,6 +52,8 @@ const ( // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. +// +// Decode handles the Snappy block format, not the Snappy stream format. func Decode(dst, src []byte) ([]byte, error) { dLen, s, err := decodedLen(src) if err != nil { @@ -83,6 +85,8 @@ func NewReader(r io.Reader) *Reader { } // Reader is an io.Reader that can read Snappy-compressed bytes. +// +// Reader handles the Snappy stream format, not the Snappy block format. type Reader struct { r io.Reader err error diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s similarity index 98% rename from vendor/github.com/klauspost/compress/snappy/decode_amd64.s rename to vendor/github.com/golang/snappy/decode_amd64.s index 1c66e3723..e6179f65e 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s +++ b/vendor/github.com/golang/snappy/decode_amd64.s @@ -184,7 +184,9 @@ tagLit60Plus: // checks. In the asm version, we code it once instead of once per switch case. ADDQ CX, SI SUBQ $58, SI - CMPQ SI, R13 + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 JA errCorrupt // case x == 60: @@ -230,7 +232,9 @@ tagCopy4: ADDQ $5, SI // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 JA errCorrupt // length = 1 + int(src[s-5])>>2 @@ -247,7 +251,9 @@ tagCopy2: ADDQ $3, SI // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 JA errCorrupt // length = 1 + int(src[s-3])>>2 @@ -271,7 +277,9 @@ tagCopy: ADDQ $2, SI // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 JA errCorrupt // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) diff --git a/vendor/github.com/golang/snappy/decode_arm64.s b/vendor/github.com/golang/snappy/decode_arm64.s new file mode 100644 index 000000000..7a3ead17e --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_arm64.s @@ -0,0 +1,494 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - R2 scratch +// - R3 scratch +// - R4 length or x +// - R5 offset +// - R6 &src[s] +// - R7 &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7. +// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6. +TEXT ·decode(SB), NOSPLIT, $56-56 + // Initialize R6, R7 and R8-R13. + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R7 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R6 + MOVD R11, R13 + ADD R12, R13, R13 + +loop: + // for s < len(src) + CMP R13, R6 + BEQ end + + // R4 = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBU (R6), R4 + MOVW R4, R3 + ANDW $3, R3 + MOVW $1, R1 + CMPW R1, R3 + BGE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + MOVW $60, R1 + LSRW $2, R4, R4 + CMPW R4, R1 + BLS tagLit60Plus + + // case x < 60: + // s++ + ADD $1, R6, R6 + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that R4 == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // R4 can hold 64 bits, so the increment cannot overflow. + ADD $1, R4, R4 + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // R2 = len(dst) - d + // R3 = len(src) - s + MOVD R10, R2 + SUB R7, R2, R2 + MOVD R13, R3 + SUB R6, R3, R3 + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMP $16, R4 + BGT callMemmove + CMP $16, R2 + BLT callMemmove + CMP $16, R3 + BLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R6), (R14, R15) + STP (R14, R15), 0(R7) + + // d += length + // s += length + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMP R2, R4 + BGT errCorrupt + CMP R3, R4 + BGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // R7, R6 and R4 as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVD R7, 8(RSP) + MOVD R6, 16(RSP) + MOVD R4, 24(RSP) + MOVD R7, 32(RSP) + MOVD R6, 40(RSP) + MOVD R4, 48(RSP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVD 32(RSP), R7 + MOVD 40(RSP), R6 + MOVD 48(RSP), R4 + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R13 + ADD R12, R13, R13 + + // d += length + // s += length + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADD R4, R6, R6 + SUB $58, R6, R6 + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // case x == 60: + MOVW $61, R1 + CMPW R1, R4 + BEQ tagLit61 + BGT tagLit62Plus + + // x = uint32(src[s-1]) + MOVBU -1(R6), R4 + B doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVHU -2(R6), R4 + B doLit + +tagLit62Plus: + CMPW $62, R4 + BHI tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVHU -3(R6), R4 + MOVBU -1(R6), R3 + ORR R3<<16, R4 + B doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVWU -4(R6), R4 + B doLit + + // The code above handles literal tags. + // ---------------------------------------- + // The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADD $5, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // length = 1 + int(src[s-5])>>2 + MOVD $1, R1 + ADD R4>>2, R1, R4 + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVWU -4(R6), R5 + B doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADD $3, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // length = 1 + int(src[s-3])>>2 + MOVD $1, R1 + ADD R4>>2, R1, R4 + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVHU -2(R6), R5 + B doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - R3 == src[s] & 0x03 + // - R4 == src[s] + CMP $2, R3 + BEQ tagCopy2 + BGT tagCopy4 + + // case tagCopy1: + // s += 2 + ADD $2, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVD R4, R5 + AND $0xe0, R5 + MOVBU -1(R6), R3 + ORR R5<<3, R3, R5 + + // length = 4 + int(src[s-2])>>2&0x7 + MOVD $7, R1 + AND R4>>2, R1, R4 + ADD $4, R4, R4 + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - R4 == length && R4 > 0 + // - R5 == offset + + // if offset <= 0 { etc } + MOVD $0, R1 + CMP R1, R5 + BLE errCorrupt + + // if d < offset { etc } + MOVD R7, R3 + SUB R8, R3, R3 + CMP R5, R3 + BLT errCorrupt + + // if length > len(dst)-d { etc } + MOVD R10, R3 + SUB R7, R3, R3 + CMP R3, R4 + BGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVD R10, R14 + SUB R7, R14, R14 + MOVD R7, R15 + SUB R5, R15, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMP $16, R4 + BGT slowForwardCopy + CMP $8, R5 + BLT slowForwardCopy + CMP $16, R14 + BLT slowForwardCopy + MOVD 0(R15), R2 + MOVD R2, 0(R7) + MOVD 8(R15), R3 + MOVD R3, 8(R7) + ADD R4, R7, R7 + B loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUB $10, R14, R14 + CMP R14, R4 + BGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMP $8, R5 + BGE fixUpSlowForwardCopy + MOVD (R15), R3 + MOVD R3, (R7) + SUB R5, R4, R4 + ADD R5, R7, R7 + ADD R5, R5, R5 + B makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by R7 being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save R7 to R2 so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVD R7, R2 + ADD R4, R7, R7 + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + MOVD $0, R1 + CMP R1, R4 + BLE loop + MOVD (R15), R3 + MOVD R3, (R2) + ADD $8, R15, R15 + ADD $8, R2, R2 + SUB $8, R4, R4 + B finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), R3 + MOVB R3, (R7) + ADD $1, R15, R15 + ADD $1, R7, R7 + SUB $1, R4, R4 + CBNZ R4, verySlowForwardCopy + B loop + + // The code above handles copy tags. + // ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMP R10, R7 + BNE errCorrupt + + // return 0 + MOVD $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVD $1, R2 + MOVD R2, ret+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_asm.go similarity index 93% rename from vendor/github.com/klauspost/compress/snappy/decode_amd64.go rename to vendor/github.com/golang/snappy/decode_asm.go index fcd192b84..7082b3491 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go +++ b/vendor/github.com/golang/snappy/decode_asm.go @@ -5,6 +5,7 @@ // +build !appengine // +build gc // +build !noasm +// +build amd64 arm64 package snappy diff --git a/vendor/github.com/klauspost/compress/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go similarity index 98% rename from vendor/github.com/klauspost/compress/snappy/decode_other.go rename to vendor/github.com/golang/snappy/decode_other.go index 94a96c5d7..2f672be55 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_other.go +++ b/vendor/github.com/golang/snappy/decode_other.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 appengine !gc noasm +// +build !amd64,!arm64 appengine !gc noasm package snappy @@ -87,7 +87,7 @@ func decode(dst, src []byte) int { } // Copy from an earlier sub-slice of dst to a later sub-slice. // If no overlap, use the built-in copy: - if offset > length { + if offset >= length { copy(dst[d:d+length], dst[d-offset:]) d += length continue diff --git a/vendor/github.com/klauspost/compress/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go similarity index 98% rename from vendor/github.com/klauspost/compress/snappy/encode.go rename to vendor/github.com/golang/snappy/encode.go index 8d393e904..7f2365707 100644 --- a/vendor/github.com/klauspost/compress/snappy/encode.go +++ b/vendor/github.com/golang/snappy/encode.go @@ -15,6 +15,8 @@ import ( // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. +// +// Encode handles the Snappy block format, not the Snappy stream format. func Encode(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) @@ -139,6 +141,8 @@ func NewBufferedWriter(w io.Writer) *Writer { } // Writer is an io.Writer that can write Snappy-compressed bytes. +// +// Writer handles the Snappy stream format, not the Snappy block format. type Writer struct { w io.Writer err error diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/encode_amd64.s rename to vendor/github.com/golang/snappy/encode_amd64.s diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s new file mode 100644 index 000000000..bf83667d7 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_arm64.s @@ -0,0 +1,722 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - R3 len(lit) +// - R4 n +// - R6 return value +// - R8 &dst[i] +// - R10 &lit[0] +// +// The 32 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $32-56 + MOVD dst_base+0(FP), R8 + MOVD lit_base+24(FP), R10 + MOVD lit_len+32(FP), R3 + MOVD R3, R6 + MOVW R3, R4 + SUBW $1, R4, R4 + + CMPW $60, R4 + BLT oneByte + CMPW $256, R4 + BLT twoBytes + +threeBytes: + MOVD $0xf4, R2 + MOVB R2, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + ADD $3, R6, R6 + B memmove + +twoBytes: + MOVD $0xf0, R2 + MOVB R2, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + ADD $2, R6, R6 + B memmove + +oneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + ADD $1, R6, R6 + +memmove: + MOVD R6, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - R3 length +// - R7 &dst[0] +// - R8 &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVD dst_base+0(FP), R8 + MOVD R8, R7 + MOVD offset+24(FP), R11 + MOVD length+32(FP), R3 + +loop0: + // for length >= 68 { etc } + CMPW $68, R3 + BLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $64, R3, R3 + B loop0 + +step1: + // if length > 64 { etc } + CMP $64, R3 + BLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $60, R3, R3 + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMP $12, R3 + BGE step3 + CMPW $2048, R11 + BGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $3, R11, R11 + AND $0xe0, R11, R11 + SUB $4, R3, R3 + LSLW $2, R3 + AND $0xff, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUB $1, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - R6 &src[0] +// - R7 &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVD src_base+0(FP), R6 + MOVD src_len+8(FP), R14 + MOVD i+24(FP), R15 + MOVD j+32(FP), R7 + ADD R6, R14, R14 + ADD R6, R15, R15 + ADD R6, R7, R7 + MOVD R14, R13 + SUB $8, R13, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI cmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE bsf + ADD $8, R15, R15 + ADD $8, R7, R7 + B cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS extendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE extendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - R3 . . +// - R4 . . +// - R5 64 shift +// - R6 72 &src[0], tableSize +// - R7 80 &src[s] +// - R8 88 &dst[d] +// - R9 96 sLimit +// - R10 . &src[nextEmit] +// - R11 104 prevHash, currHash, nextHash, offset +// - R12 112 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 120 candidate +// - R16 . hash constant, 0x1e35a7bd +// - R17 . &table +// - . 128 table +// +// The second column (64, 72, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 64 + 64 = 32896. +TEXT ·encodeBlock(SB), 0, $32896-56 + MOVD dst_base+0(FP), R8 + MOVD src_base+24(FP), R7 + MOVD src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVD $24, R5 + MOVD $256, R6 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + MOVD $16384, R2 + CMP R2, R6 + BGE varTable + CMP R14, R6 + BGE varTable + SUB $1, R5, R5 + LSL $1, R6, R6 + B calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each + // iterations writes 64 bytes, so we can do only tableSize/32 writes + // instead of the 2048 writes that would zero-initialize all of table's + // 32768 bytes. This clear could overrun the first tableSize elements, but + // it won't overrun the allocated stack size. + ADD $128, RSP, R17 + MOVD R17, R4 + + // !!! R6 = &src[tableSize] + ADD R6<<1, R17, R6 + +memclr: + STP.P (ZR, ZR), 64(R4) + STP (ZR, ZR), -48(R4) + STP (ZR, ZR), -32(R4) + STP (ZR, ZR), -16(R4) + CMP R4, R6 + BHI memclr + + // !!! R6 = &src[0] + MOVD R7, R6 + + // sLimit := len(src) - inputMargin + MOVD R14, R9 + SUB $15, R9, R9 + + // !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't + // change for the rest of the function. + MOVD R5, 64(RSP) + MOVD R6, 72(RSP) + MOVD R9, 96(RSP) + + // nextEmit := 0 + MOVD R6, R10 + + // s := 1 + ADD $1, R7, R7 + + // nextHash := hash(load32(src, s), shift) + MOVW 0(R7), R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + +outer: + // for { etc } + + // skip := 32 + MOVD $32, R12 + + // nextS := s + MOVD R7, R13 + + // candidate := 0 + MOVD $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVD R13, R7 + + // bytesBetweenHashLookups := skip >> 5 + MOVD R12, R14 + LSR $5, R14, R14 + + // nextS = s + bytesBetweenHashLookups + ADD R14, R13, R13 + + // skip += bytesBetweenHashLookups + ADD R14, R12, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVD R13, R3 + SUB R6, R3, R3 + CMP R9, R3 + BHI emitRemainder + + // candidate = int(table[nextHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[nextHash] = uint16(s) + MOVD R7, R3 + SUB R6, R3, R3 + + MOVH R3, 0(R17)(R11<<1) + + // nextHash = hash(load32(src, nextS), shift) + MOVW 0(R13), R11 + MULW R16, R11 + LSRW R5, R11, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVW 0(R7), R3 + MOVW (R6)(R15*1), R4 + CMPW R4, R3 + BNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVD R7, R3 + SUB R10, R3, R3 + CMP $16, R3 + BLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVW R3, R4 + SUBW $1, R4, R4 + + MOVW $60, R2 + CMPW R2, R4 + BLT inlineEmitLiteralOneByte + MOVW $256, R2 + CMPW R2, R4 + BLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVD $0xf4, R1 + MOVB R1, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVD $0xf0, R1 + MOVB R1, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADD R3, R8, R8 + MOVD R7, 80(RSP) + MOVD R8, 88(RSP) + MOVD R15, 120(RSP) + CALL runtime·memmove(SB) + MOVD 64(RSP), R5 + MOVD 72(RSP), R6 + MOVD 80(RSP), R7 + MOVD 88(RSP), R8 + MOVD 96(RSP), R9 + MOVD 120(RSP), R15 + ADD $128, RSP, R17 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + B inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB R3, R4 + SUBW $1, R4, R4 + AND $0xff, R4, R4 + LSLW $2, R4, R4 + MOVB R4, (R8) + ADD $1, R8, R8 + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R10), (R0, R1) + STP (R0, R1), 0(R8) + ADD R3, R8, R8 + +inner1: + // for { etc } + + // base := s + MOVD R7, R12 + + // !!! offset := base - candidate + MOVD R12, R11 + SUB R15, R11, R11 + SUB R6, R11, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVD src_len+32(FP), R14 + ADD R6, R14, R14 + + // !!! R13 = &src[len(src) - 8] + MOVD R14, R13 + SUB $8, R13, R13 + + // !!! R15 = &src[candidate + 4] + ADD $4, R15, R15 + ADD R6, R15, R15 + + // !!! s += 4 + ADD $4, R7, R7 + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI inlineExtendMatchCmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchBSF + ADD $8, R15, R15 + ADD $8, R7, R7 + B inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + B inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS inlineExtendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVD R7, R3 + SUB R12, R3, R3 + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + MOVW $68, R2 + CMPW R2, R3 + BLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $64, R3, R3 + B inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + MOVW $64, R2 + CMPW R2, R3 + BLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $60, R3, R3 + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + MOVW $12, R2 + CMPW R2, R3 + BGE inlineEmitCopyStep3 + MOVW $2048, R2 + CMPW R2, R11 + BGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $8, R11, R11 + LSLW $5, R11, R11 + SUBW $4, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + B inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBW $1, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVD R7, R10 + + // if s >= sLimit { goto emitRemainder } + MOVD R7, R3 + SUB R6, R3, R3 + CMP R3, R9 + BLS emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVD -1(R7), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // table[prevHash] = uint16(s-1) + MOVD R7, R3 + SUB R6, R3, R3 + SUB $1, R3, R3 + + MOVHU R3, 0(R17)(R11<<1) + + // currHash := hash(uint32(x>>8), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // candidate = int(table[currHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[currHash] = uint16(s) + ADD $1, R3, R3 + MOVHU R3, 0(R17)(R11<<1) + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVW (R6)(R15*1), R4 + CMPW R4, R14 + BEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // s++ + ADD $1, R7, R7 + + // break out of the inner1 for loop, i.e. continue the outer loop. + B outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVD src_len+32(FP), R3 + ADD R6, R3, R3 + CMP R3, R10 + BEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVD R8, 8(RSP) + MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD R10, 32(RSP) + SUB R10, R3, R3 + MOVD R3, 40(RSP) + MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVD R8, 88(RSP) + CALL ·emitLiteral(SB) + MOVD 88(RSP), R8 + + // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVD 56(RSP), R1 + ADD R1, R8, R8 + +encodeBlockEnd: + MOVD dst_base+0(FP), R3 + SUB R3, R8, R8 + MOVD R8, d+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_asm.go similarity index 97% rename from vendor/github.com/klauspost/compress/snappy/encode_amd64.go rename to vendor/github.com/golang/snappy/encode_asm.go index 150d91bc8..107c1e714 100644 --- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go +++ b/vendor/github.com/golang/snappy/encode_asm.go @@ -5,6 +5,7 @@ // +build !appengine // +build gc // +build !noasm +// +build amd64 arm64 package snappy diff --git a/vendor/github.com/klauspost/compress/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go similarity index 99% rename from vendor/github.com/klauspost/compress/snappy/encode_other.go rename to vendor/github.com/golang/snappy/encode_other.go index dbcae905e..296d7f0be 100644 --- a/vendor/github.com/klauspost/compress/snappy/encode_other.go +++ b/vendor/github.com/golang/snappy/encode_other.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 appengine !gc noasm +// +build !amd64,!arm64 appengine !gc noasm package snappy diff --git a/vendor/github.com/golang/snappy/go.mod b/vendor/github.com/golang/snappy/go.mod new file mode 100644 index 000000000..f6406bb2c --- /dev/null +++ b/vendor/github.com/golang/snappy/go.mod @@ -0,0 +1 @@ +module github.com/golang/snappy diff --git a/vendor/github.com/klauspost/compress/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go similarity index 98% rename from vendor/github.com/klauspost/compress/snappy/snappy.go rename to vendor/github.com/golang/snappy/snappy.go index 74a36689e..ece692ea4 100644 --- a/vendor/github.com/klauspost/compress/snappy/snappy.go +++ b/vendor/github.com/golang/snappy/snappy.go @@ -17,7 +17,7 @@ // // The canonical, C++ implementation is at https://github.com/google/snappy and // it only implements the block format. -package snappy +package snappy // import "github.com/golang/snappy" import ( "hash/crc32" diff --git a/vendor/github.com/honeycombio/libhoney-go/CHANGELOG.md b/vendor/github.com/honeycombio/libhoney-go/CHANGELOG.md index dfc32c8c2..dae40a50f 100644 --- a/vendor/github.com/honeycombio/libhoney-go/CHANGELOG.md +++ b/vendor/github.com/honeycombio/libhoney-go/CHANGELOG.md @@ -1,5 +1,15 @@ # libhoney Changelog +## 1.15.3 2021-06-02 + +### Improvements + +- Add more context to batch response parsing error (#116) + +### Maintenance + +- Add go 1.15 & 1.16 to the testing matrix (#114, #119) + ## 1.15.2 2021-01-22 NOTE: v1.15.1 may cause update warnings due to checksum error, please use v1.15.2 instead. diff --git a/vendor/github.com/honeycombio/libhoney-go/go.mod b/vendor/github.com/honeycombio/libhoney-go/go.mod index 352bbd8c2..6c54f3eeb 100644 --- a/vendor/github.com/honeycombio/libhoney-go/go.mod +++ b/vendor/github.com/honeycombio/libhoney-go/go.mod @@ -3,7 +3,7 @@ module github.com/honeycombio/libhoney-go go 1.14 require ( - github.com/DataDog/zstd v1.4.5 + github.com/DataDog/zstd v1.4.8 github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c // indirect github.com/facebookgo/limitgroup v0.0.0-20150612190941-6abd8d71ec01 // indirect @@ -11,7 +11,7 @@ require ( github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 // indirect github.com/golang/protobuf v1.3.5 // indirect - github.com/klauspost/compress v1.11.4 + github.com/klauspost/compress v1.12.2 github.com/stretchr/testify v1.6.1 github.com/vmihailenco/msgpack/v4 v4.3.12 golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e // indirect diff --git a/vendor/github.com/honeycombio/libhoney-go/go.sum b/vendor/github.com/honeycombio/libhoney-go/go.sum index fdd0d07dd..a32a732a1 100644 --- a/vendor/github.com/honeycombio/libhoney-go/go.sum +++ b/vendor/github.com/honeycombio/libhoney-go/go.sum @@ -1,6 +1,5 @@ -github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ= -github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/DataDog/zstd v1.4.8 h1:Rpmta4xZ/MgZnriKNd24iZMhGpP5dvUcs/uqfBapKZY= +github.com/DataDog/zstd v1.4.8/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -16,14 +15,14 @@ github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 h1:JWuenKqqX8nojt github.com/facebookgo/stack v0.0.0-20160209184415-751773369052/go.mod h1:UbMTZqLaRiH3MsBH8va0n7s1pQYcu3uTb8G4tygF4Zg= github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 h1:7HZCaLC5+BZpmbhCOZJ293Lz68O7PYrF2EzeiFMwCLk= github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4/go.mod h1:5tD+neXqOorC30/tWg0LCSkrqj/AR6gu8yY8/fpw1q0= -github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.4 h1:87PNWwrRvUSnqS4dlcBU/ftvOIBep4sYuBLlh6rX2wk= github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls= github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= -github.com/klauspost/compress v1.11.4 h1:kz40R/YWls3iqT9zX9AHN3WoVsrAWVyui5sxuLqiXqU= -github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/klauspost/compress v1.12.2 h1:2KCfW3I9M7nSc5wOqXAlW2v2U6v+w6cbjvbfp+OykW8= +github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -31,7 +30,6 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -40,9 +38,7 @@ github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+ github.com/vmihailenco/tagparser v0.1.1 h1:quXMXlA39OCbd2wAdTsGDlK9RkOk6Wuw+x37wVyIuWY= github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65 h1:+rhAzEzT3f4JtomfC371qB+0Ola2caSKcY69NUBZrRQ= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0= golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k= golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= @@ -55,7 +51,6 @@ google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpC google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= gopkg.in/alexcesaro/statsd.v2 v2.0.0 h1:FXkZSCZIH17vLCO5sO2UucTHsH9pc+17F6pl3JVCwMc= gopkg.in/alexcesaro/statsd.v2 v2.0.0/go.mod h1:i0ubccKGzBVNBpdGV5MocxyA/XlLUJzA7SLonnE4drU= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/vendor/github.com/honeycombio/libhoney-go/libhoney.go b/vendor/github.com/honeycombio/libhoney-go/libhoney.go index 2a3ea9c29..d2043981e 100644 --- a/vendor/github.com/honeycombio/libhoney-go/libhoney.go +++ b/vendor/github.com/honeycombio/libhoney-go/libhoney.go @@ -33,7 +33,7 @@ const ( defaultSampleRate = 1 defaultAPIHost = "https://api.honeycomb.io/" defaultDataset = "libhoney-go dataset" - version = "1.15.2" + version = "1.15.3" // DefaultMaxBatchSize how many events to collect in a batch DefaultMaxBatchSize = 50 diff --git a/vendor/github.com/honeycombio/libhoney-go/transmission/transmission.go b/vendor/github.com/honeycombio/libhoney-go/transmission/transmission.go index d26f22ffd..1bdf21cbd 100644 --- a/vendor/github.com/honeycombio/libhoney-go/transmission/transmission.go +++ b/vendor/github.com/honeycombio/libhoney-go/transmission/transmission.go @@ -480,7 +480,11 @@ func (b *batchAgg) fireBatch(events []*Event) { if err != nil { // if we can't decode the responses, just error out all of them b.metrics.Increment("response_decode_errors") - b.enqueueErrResponses(err, events, dur/time.Duration(numEncoded)) + b.enqueueErrResponses(fmt.Errorf( + "got OK HTTP response, but couldn't read response body: %v", err), + events, + dur/time.Duration(numEncoded), + ) return } diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index b69237c9b..6f341914c 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -92,7 +92,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTra im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + first.deltaFindState c.state = c.stateTable[lu] - return } // encode the output symbol provided and write it to the bitstream. @@ -301,7 +300,7 @@ func (s *Scratch) writeCount() error { out[outP+1] = byte(bitStream >> 8) outP += (bitCount + 7) / 8 - if uint16(charnum) > s.symbolLen { + if charnum > s.symbolLen { return errors.New("internal error: charnum > s.symbolLen") } s.Out = out[:outP] @@ -331,7 +330,7 @@ type cTable struct { func (s *Scratch) allocCtable() { tableSize := 1 << s.actualTableLog // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < int(tableSize) { + if cap(s.ct.tableSymbol) < tableSize { s.ct.tableSymbol = make([]byte, tableSize) } s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] @@ -565,8 +564,8 @@ func (s *Scratch) normalizeCount2() error { distributed uint32 total = uint32(s.br.remain()) tableLog = s.actualTableLog - lowThreshold = uint32(total >> tableLog) - lowOne = uint32((total * 3) >> (tableLog + 1)) + lowThreshold = total >> tableLog + lowOne = (total * 3) >> (tableLog + 1) ) for i, cnt := range s.count[:s.symbolLen] { if cnt == 0 { @@ -591,7 +590,7 @@ func (s *Scratch) normalizeCount2() error { if (total / toDistribute) > lowOne { // risk of rounding to zero - lowOne = uint32((total * 3) / (toDistribute * 2)) + lowOne = (total * 3) / (toDistribute * 2) for i, cnt := range s.count[:s.symbolLen] { if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { s.norm[i] = 1 diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index 413ec3b3c..926f5f153 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -172,7 +172,7 @@ type decSymbol struct { // allocDtable will allocate decoding tables if they are not big enough. func (s *Scratch) allocDtable() { tableSize := 1 << s.actualTableLog - if cap(s.decTable) < int(tableSize) { + if cap(s.decTable) < tableSize { s.decTable = make([]decSymbol, tableSize) } s.decTable = s.decTable[:tableSize] @@ -340,7 +340,7 @@ type decoder struct { func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) { d.dt = dt d.br = in - d.state = uint16(in.getBits(tableLog)) + d.state = in.getBits(tableLog) } // next returns the next symbol and sets the next state. diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md index e12da4db2..8b6e5c663 100644 --- a/vendor/github.com/klauspost/compress/huff0/README.md +++ b/vendor/github.com/klauspost/compress/huff0/README.md @@ -14,7 +14,9 @@ but it can be used as a secondary step to compressors (like Snappy) that does no ## News - * Mar 2018: First implementation released. Consider this beta software for now. +This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package. + +This ensures that most functionality is well tested. # Usage diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index f9ed5f830..0823c928c 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -403,7 +403,7 @@ func (s *Scratch) buildCTable() error { var startNode = int16(s.symbolLen) nonNullRank := s.symbolLen - 1 - nodeNb := int16(startNode) + nodeNb := startNode huffNode := s.nodes[1 : huffNodesLen+1] // This overlays the slice above, but allows "-1" index lookups. @@ -536,7 +536,6 @@ func (s *Scratch) huffSort() { } nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} } - return } func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { @@ -580,7 +579,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { // Get pos of last (smallest) symbol per rank { - currentNbBits := uint8(maxNbBits) + currentNbBits := maxNbBits for pos := int(n); pos >= 0; pos-- { if huffNode[pos].nbBits >= currentNbBits { continue diff --git a/vendor/github.com/klauspost/compress/snappy/runbench.cmd b/vendor/github.com/klauspost/compress/snappy/runbench.cmd deleted file mode 100644 index d24eb4b47..000000000 --- a/vendor/github.com/klauspost/compress/snappy/runbench.cmd +++ /dev/null @@ -1,2 +0,0 @@ -del old.txt -go test -bench=. >>old.txt && go test -bench=. >>old.txt && go test -bench=. >>old.txt && benchstat -delta-test=ttest old.txt new.txt diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index 7680bfe1d..e7d7eb0ae 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -152,8 +152,8 @@ This package: file out level insize outsize millis mb/s silesia.tar zskp 1 211947520 73101992 643 313.87 silesia.tar zskp 2 211947520 67504318 969 208.38 -silesia.tar zskp 3 211947520 65177448 1899 106.44 -silesia.tar zskp 4 211947520 61381950 8115 24.91 +silesia.tar zskp 3 211947520 64595893 2007 100.68 +silesia.tar zskp 4 211947520 60995370 7691 26.28 cgo zstd: silesia.tar zstd 1 211947520 73605392 543 371.56 @@ -171,8 +171,8 @@ https://files.klauspost.com/compress/gob-stream.7z file out level insize outsize millis mb/s gob-stream zskp 1 1911399616 235022249 3088 590.30 gob-stream zskp 2 1911399616 205669791 3786 481.34 -gob-stream zskp 3 1911399616 185792019 9324 195.48 -gob-stream zskp 4 1911399616 171537212 32113 56.76 +gob-stream zskp 3 1911399616 175034659 9636 189.17 +gob-stream zskp 4 1911399616 167273881 29337 62.13 gob-stream zstd 1 1911399616 249810424 2637 691.26 gob-stream zstd 3 1911399616 208192146 3490 522.31 gob-stream zstd 6 1911399616 193632038 6687 272.56 @@ -187,8 +187,8 @@ http://mattmahoney.net/dc/textdata.html file out level insize outsize millis mb/s enwik9 zskp 1 1000000000 343848582 3609 264.18 enwik9 zskp 2 1000000000 317276632 5746 165.97 -enwik9 zskp 3 1000000000 294540704 11725 81.34 -enwik9 zskp 4 1000000000 276609671 44029 21.66 +enwik9 zskp 3 1000000000 292243069 12162 78.41 +enwik9 zskp 4 1000000000 275241169 36430 26.18 enwik9 zstd 1 1000000000 358072021 3110 306.65 enwik9 zstd 3 1000000000 313734672 4784 199.35 enwik9 zstd 6 1000000000 295138875 10290 92.68 @@ -202,8 +202,8 @@ https://files.klauspost.com/compress/github-june-2days-2019.json.zst file out level insize outsize millis mb/s github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 -github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54 -github-june-2days-2019.json zskp 4 6273951764 512796117 97791 61.18 +github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75 +github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78 github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 @@ -217,8 +217,8 @@ https://files.klauspost.com/compress/rawstudio-mint14.7z file out level insize outsize millis mb/s rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 -rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75 -rawstudio-mint14.tar zskp 4 8558382592 3027332295 486243 16.79 +rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08 +rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16 rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 @@ -232,8 +232,8 @@ https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst file out level insize outsize millis mb/s nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 -nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53 -nyc-taxi-data-10M.csv zskp 4 3325605752 495986829 89368 35.49 +nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66 +nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10 nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index c85c40255..e1be092f3 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -22,28 +22,44 @@ type blockEnc struct { dictLitEnc *huff0.Scratch wr bitWriter - extraLits int - last bool - + extraLits int output []byte recentOffsets [3]uint32 prevRecentOffsets [3]uint32 + + last bool + lowMem bool } // init should be used once the block has been created. // If called more than once, the effect is the same as calling reset. func (b *blockEnc) init() { - if cap(b.literals) < maxCompressedLiteralSize { - b.literals = make([]byte, 0, maxCompressedLiteralSize) - } - const defSeqs = 200 - b.literals = b.literals[:0] - if cap(b.sequences) < defSeqs { - b.sequences = make([]seq, 0, defSeqs) - } - if cap(b.output) < maxCompressedBlockSize { - b.output = make([]byte, 0, maxCompressedBlockSize) + if b.lowMem { + // 1K literals + if cap(b.literals) < 1<<10 { + b.literals = make([]byte, 0, 1<<10) + } + const defSeqs = 20 + if cap(b.sequences) < defSeqs { + b.sequences = make([]seq, 0, defSeqs) + } + // 1K + if cap(b.output) < 1<<10 { + b.output = make([]byte, 0, 1<<10) + } + } else { + if cap(b.literals) < maxCompressedBlockSize { + b.literals = make([]byte, 0, maxCompressedBlockSize) + } + const defSeqs = 200 + if cap(b.sequences) < defSeqs { + b.sequences = make([]seq, 0, defSeqs) + } + if cap(b.output) < maxCompressedBlockSize { + b.output = make([]byte, 0, maxCompressedBlockSize) + } } + if b.coders.mlEnc == nil { b.coders.mlEnc = &fseEncoder{} b.coders.mlPrev = &fseEncoder{} @@ -370,9 +386,9 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { b.output = bh.appendTo(b.output) b.output = append(b.output, lits[0]) return nil + case nil: default: return err - case nil: } // Compressed... // Now, allow reuse @@ -512,11 +528,6 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if debug { println("Adding literals RLE") } - default: - if debug { - println("Adding literals ERROR:", err) - } - return err case nil: // Compressed litLen... if reUsed { @@ -547,6 +558,11 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if debug { println("Adding literals compressed") } + default: + if debug { + println("Adding literals ERROR:", err) + } + return err } // Sequence compression diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go index 87896c5ea..69736e8d4 100644 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -93,7 +93,7 @@ func (h *Header) Decode(in []byte) error { h.HasCheckSum = fhd&(1<<2) != 0 if fhd&(1<<3) != 0 { - return errors.New("Reserved bit set on frame header") + return errors.New("reserved bit set on frame header") } // Read Window_Descriptor @@ -174,7 +174,7 @@ func (h *Header) Decode(in []byte) error { if len(in) < 3 { return nil } - tmp, in := in[:3], in[3:] + tmp := in[:3] bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) h.FirstBlock.Last = bh&1 != 0 blockType := blockType((bh >> 1) & 3) diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index cdda0de58..f593e464b 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -5,7 +5,6 @@ package zstd import ( - "bytes" "errors" "io" "sync" @@ -85,6 +84,10 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { d.current.output = make(chan decodeOutput, d.o.concurrent) d.current.flushed = true + if r == nil { + d.current.err = ErrDecoderNilInput + } + // Transfer option dicts. d.dicts = make(map[uint32]dict, len(d.o.dicts)) for _, dc := range d.o.dicts { @@ -111,7 +114,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { // When the stream is done, io.EOF will be returned. func (d *Decoder) Read(p []byte) (int, error) { if d.stream == nil { - return 0, errors.New("no input has been initialized") + return 0, ErrDecoderNilInput } var n int for { @@ -152,12 +155,20 @@ func (d *Decoder) Read(p []byte) (int, error) { // Reset will reset the decoder the supplied stream after the current has finished processing. // Note that this functionality cannot be used after Close has been called. +// Reset can be called with a nil reader to release references to the previous reader. +// After being called with a nil reader, no other operations than Reset or DecodeAll or Close +// should be used. func (d *Decoder) Reset(r io.Reader) error { if d.current.err == ErrDecoderClosed { return d.current.err } + + d.drainOutput() + if r == nil { - return errors.New("nil Reader sent as input") + d.current.err = ErrDecoderNilInput + d.current.flushed = true + return nil } if d.stream == nil { @@ -166,14 +177,13 @@ func (d *Decoder) Reset(r io.Reader) error { go d.startStreamDecoder(d.stream) } - d.drainOutput() - // If bytes buffer and < 1MB, do sync decoding anyway. - if bb, ok := r.(*bytes.Buffer); ok && bb.Len() < 1<<20 { + if bb, ok := r.(byter); ok && bb.Len() < 1<<20 { + bb2 := bb if debug { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } - b := bb.Bytes() + b := bb2.Bytes() var dst []byte if cap(d.current.b) > 0 { dst = d.current.b @@ -226,20 +236,17 @@ func (d *Decoder) drainOutput() { println("current already flushed") return } - for { - select { - case v := <-d.current.output: - if v.d != nil { - if debug { - printf("re-adding decoder %p", v.d) - } - d.decoders <- v.d - } - if v.err == errEndOfStream { - println("current flushed") - d.current.flushed = true - return + for v := range d.current.output { + if v.d != nil { + if debug { + printf("re-adding decoder %p", v.d) } + d.decoders <- v.d + } + if v.err == errEndOfStream { + println("current flushed") + d.current.flushed = true + return } } } @@ -249,7 +256,7 @@ func (d *Decoder) drainOutput() { // Any error encountered during the write is also returned. func (d *Decoder) WriteTo(w io.Writer) (int64, error) { if d.stream == nil { - return 0, errors.New("no input has been initialized") + return 0, ErrDecoderNilInput } var n int64 for { diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index 284d38449..c0fd058c2 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -6,7 +6,6 @@ package zstd import ( "errors" - "fmt" "runtime" ) @@ -43,7 +42,7 @@ func WithDecoderLowmem(b bool) DOption { func WithDecoderConcurrency(n int) DOption { return func(o *decoderOptions) error { if n <= 0 { - return fmt.Errorf("Concurrency must be at least 1") + return errors.New("concurrency must be at least 1") } o.concurrent = n return nil @@ -61,7 +60,7 @@ func WithDecoderMaxMemory(n uint64) DOption { return errors.New("WithDecoderMaxMemory must be at least 1") } if n > 1<<63 { - return fmt.Errorf("WithDecoderMaxmemory must be less than 1 << 63") + return errors.New("WithDecoderMaxmemory must be less than 1 << 63") } o.maxDecodedSize = n return nil diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go index b1b7c6e6a..60f298648 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_base.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go @@ -7,6 +7,10 @@ import ( "github.com/klauspost/compress/zstd/internal/xxhash" ) +const ( + dictShardBits = 6 +) + type fastBase struct { // cur is the offset at the start of hist cur int32 @@ -17,6 +21,7 @@ type fastBase struct { tmp [8]byte blk *blockEnc lastDictID uint32 + lowMem bool } // CRC returns the underlying CRC writer. @@ -57,15 +62,10 @@ func (e *fastBase) addBlock(src []byte) int32 { // check if we have space already if len(e.hist)+len(src) > cap(e.hist) { if cap(e.hist) == 0 { - l := e.maxMatchOff * 2 - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 - } - e.hist = make([]byte, 0, l) + e.ensureHist(len(src)) } else { - if cap(e.hist) < int(e.maxMatchOff*2) { - panic("unexpected buffer size") + if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) { + panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff)) } // Move down offset := int32(len(e.hist)) - e.maxMatchOff @@ -79,6 +79,28 @@ func (e *fastBase) addBlock(src []byte) int32 { return s } +// ensureHist will ensure that history can keep at least this many bytes. +func (e *fastBase) ensureHist(n int) { + if cap(e.hist) >= n { + return + } + l := e.maxMatchOff + if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize { + l += maxCompressedBlockSize + } else { + l += e.maxMatchOff + } + // Make it at least 1MB. + if l < 1<<20 && !e.lowMem { + l = 1 << 20 + } + // Make it at least the requested size. + if l < int32(n) { + l = int32(n) + } + e.hist = make([]byte, 0, l) +} + // useBlock will replace the block with the provided one, // but transfer recent offsets from the previous. func (e *fastBase) UseBlock(enc *blockEnc) { @@ -117,7 +139,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 { // Reset the encoding table. func (e *fastBase) resetBase(d *dict, singleBlock bool) { if e.blk == nil { - e.blk = &blockEnc{} + e.blk = &blockEnc{lowMem: e.lowMem} e.blk.init() } else { e.blk.reset(nil) @@ -128,14 +150,15 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) { } else { e.crc.Reset() } - if (!singleBlock || d.DictContentSize() > 0) && cap(e.hist) < int(e.maxMatchOff*2)+d.DictContentSize() { - l := e.maxMatchOff*2 + int32(d.DictContentSize()) - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 + if d != nil { + low := e.lowMem + if singleBlock { + e.lowMem = true } - e.hist = make([]byte, 0, l) + e.ensureHist(d.DictContentSize() + maxCompressedBlockSize) + e.lowMem = low } + // We offset current position so everything will be out of reach. // If above reset line, history will be purged. if e.cur < bufferReset { diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index c4baa42c6..dc1eed5f0 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -112,7 +112,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { // Override src src = e.hist sLimit := int32(len(src)) - inputMargin - const kSearchStrength = 12 + const kSearchStrength = 10 // nextEmit is where in src the next emitLiteral should start from. nextEmit := s @@ -186,9 +186,11 @@ encodeLoop: best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1)) best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2)) best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3)) - best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1)) - best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2)) - best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3)) + if best.length > 0 { + best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1)) + best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2)) + best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3)) + } } // Load next and check... e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset} @@ -218,6 +220,20 @@ encodeLoop: best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) + + // See if we can find a better match by checking where the current best ends. + // Use that offset to see if we can find a better full match. + if sAt := best.s + best.length; sAt < sLimit { + nextHashL := hash8(load6432(src, sAt), bestLongTableBits) + candidateEnd := e.longTable[nextHashL] + if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { + bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) + if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 { + bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1)) + } + best = bestEnd + } + } } // We have a match, we can store the forward value @@ -405,6 +421,7 @@ encodeLoop: // Most notable difference is that src will not be copied for history and // we do not need to check for max match length. func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.ensureHist(len(src)) e.Encode(blk, src) } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index 94a5343d0..604954290 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -16,29 +16,558 @@ const ( // This greatly depends on the type of input. betterShortTableBits = 13 // Bits used in the short match table betterShortTableSize = 1 << betterShortTableBits // Size of the table + + betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table + betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard + + betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table + betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard ) -type prevEntry struct { - offset int32 - prev int32 +type prevEntry struct { + offset int32 + prev int32 +} + +// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. +// The long match table contains the previous entry with the same hash, +// effectively making it a "chain" of length 2. +// When we find a long match we choose between the two values and select the longest. +// When we find a short match, after checking the long, we check if we can find a long at n+1 +// and that it is longer (lazy matching). +type betterFastEncoder struct { + fastBase + table [betterShortTableSize]tableEntry + longTable [betterLongTableSize]prevEntry +} + +type betterFastEncoderDict struct { + betterFastEncoder + dictTable []tableEntry + dictLongTable []prevEntry + shortTableShardDirty [betterShortTableShardCnt]bool + longTableShardDirty [betterLongTableShardCnt]bool + allDirty bool +} + +// Encode improves compression... +func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 9 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + var matched int32 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + off := s + e.cur + e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + repOff + s += lenght + repOff + + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + continue + } + const repOff2 = 1 + + // We deviate from the reference encoder and also check offset 2. + // Still slower and not much better, so disabled. + // repIndex = s - offset2 + repOff2 + if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { + // Consider history as well. + var seq seq + lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 2 + seq.offset = 2 + if debugSequences { + println("repeat sequence 2", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + index0 := s + repOff2 + s += lenght + repOff2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + // Swap offsets + offset1, offset2 = offset2, offset1 + continue + } + } + // Find the offsets of our two matches. + coffsetL := candidateL.offset - e.cur + coffsetLP := candidateL.prev - e.cur + + // Check if we have a long match. + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetL+8, src) + 8 + t = coffsetL + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 + if prevMatch > matched { + matched = prevMatch + t = coffsetLP + } + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + } + break + } + + // Check if we have a long match on prev. + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetLP+8, src) + 8 + t = coffsetLP + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + coffsetS := candidateS.offset - e.cur + + // Check if we have a short match. + if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + matched = e.matchlen(s+4, coffsetS+4, src) + 4 + + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, betterLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = candidateL.offset - e.cur + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("long match (after short)") + } + break + } + } + + // Check prev long... + coffsetL = candidateL.prev - e.cur + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("prev long match (after short)") + } + break + } + } + t = coffsetS + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // Try to find a better match by searching for a long match at the end of the current best match + if true && s+matched < sLimit { + nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + cv := load3232(src, s) + candidateL := e.longTable[nextHashL] + coffsetL := candidateL.offset - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("long match at end-of-match") + } + } + } + + // Check prev long... + if true { + coffsetL = candidateL.prev - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("prev long match at end-of-match") + } + } + } + } + } + // A match has been found. Update recent offsets. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := matched + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } } -// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. -// The long match table contains the previous entry with the same hash, -// effectively making it a "chain" of length 2. -// When we find a long match we choose between the two values and select the longest. -// When we find a short match, after checking the long, we check if we can find a long at n+1 -// and that it is longer (lazy matching). -type betterFastEncoder struct { - fastBase - table [betterShortTableSize]tableEntry - longTable [betterLongTableSize]prevEntry - dictTable []tableEntry - dictLongTable []prevEntry +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.ensureHist(len(src)) + e.Encode(blk, src) } // Encode improves compression... -func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { +func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { const ( // Input margin is the number of bytes we read (8) // and the maximum we will read ahead (2) @@ -56,6 +585,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { e.longTable[i] = prevEntry{} } e.cur = e.maxMatchOff + e.allDirty = true break } // Shift down everything in the table that isn't already too far away. @@ -88,6 +618,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { prev: v2, } } + e.allDirty = true e.cur = e.maxMatchOff break } @@ -150,7 +681,9 @@ encodeLoop: repIndex := s - offset1 + repOff off := s + e.cur e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.markLongShardDirty(nextHashL) e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + e.markShortShardDirty(nextHashS) if canRepeat { if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { @@ -204,7 +737,10 @@ encodeLoop: h0 := hash8(cv0, betterLongTableBits) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markLongShardDirty(h0) + h1 := hash5(cv1, betterShortTableBits) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) index0 += 2 } cv = load6432(src, s) @@ -265,7 +801,10 @@ encodeLoop: h0 := hash8(cv0, betterLongTableBits) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markLongShardDirty(h0) + h1 := hash5(cv1, betterShortTableBits) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) index0 += 2 } cv = load6432(src, s) @@ -346,6 +885,7 @@ encodeLoop: // We can store it, since we have at least a 4 byte match. e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + e.markLongShardDirty(nextHashL) if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { // Found a long match, at least 8 bytes. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 @@ -398,9 +938,41 @@ encodeLoop: } cv = load6432(src, s) } + // Try to find a better match by searching for a long match at the end of the current best match + if s+matched < sLimit { + nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + cv := load3232(src, s) + candidateL := e.longTable[nextHashL] + coffsetL := candidateL.offset - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("long match at end-of-match") + } + } + } - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. + // Check prev long... + if true { + coffsetL = candidateL.prev - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("prev long match at end-of-match") + } + } + } + } + } + // A match has been found. Update recent offsets. offset2 = offset1 offset1 = s - t @@ -452,7 +1024,10 @@ encodeLoop: h0 := hash8(cv0, betterLongTableBits) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markLongShardDirty(h0) + h1 := hash5(cv1, betterShortTableBits) + e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.markShortShardDirty(h1) index0 += 2 } @@ -478,7 +1053,9 @@ encodeLoop: l := 4 + e.matchlen(s+4, o2+4, src) e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.markLongShardDirty(nextHashL) e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShortShardDirty(nextHashS) seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 @@ -512,15 +1089,16 @@ encodeLoop: } } -// EncodeNoHist will encode a block with no history and no following blocks. -// Most notable difference is that src will not be copied for history and -// we do not need to check for max match length. -func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { - e.Encode(blk, src) +// ResetDict will reset and set a dictionary if not nil +func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d != nil { + panic("betterFastEncoder: Reset with dict") + } } // ResetDict will reset and set a dictionary if not nil -func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { +func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { e.resetBase(d, singleBlock) if d == nil { return @@ -557,6 +1135,7 @@ func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id + e.allDirty = true } // Init or copy dict table @@ -585,11 +1164,72 @@ func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id + e.allDirty = true } + // Reset table to initial state - copy(e.longTable[:], e.dictLongTable) + { + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.shortTableShardDirty { + if e.shortTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + const shardCnt = betterShortTableShardCnt + const shardSize = betterShortTableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.table[:], e.dictTable) + for i := range e.shortTableShardDirty { + e.shortTableShardDirty[i] = false + } + } else { + for i := range e.shortTableShardDirty { + if !e.shortTableShardDirty[i] { + continue + } + + copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + e.shortTableShardDirty[i] = false + } + } + } + { + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.shortTableShardDirty { + if e.shortTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + const shardCnt = betterLongTableShardCnt + const shardSize = betterLongTableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.longTable[:], e.dictLongTable) + for i := range e.longTableShardDirty { + e.longTableShardDirty[i] = false + } + } else { + for i := range e.longTableShardDirty { + if !e.longTableShardDirty[i] { + continue + } + copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize]) + e.longTableShardDirty[i] = false + } + } + } e.cur = e.maxMatchOff - // Reset table to initial state - copy(e.table[:], e.dictTable) + e.allDirty = false +} + +func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) { + e.longTableShardDirty[entryNum/betterLongTableShardSize] = true +} + +func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) { + e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 19eebf66e..8629d43d8 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -11,6 +11,9 @@ const ( dFastLongTableSize = 1 << dFastLongTableBits // Size of the table dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table + dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard + dFastShortTableBits = tableBits // Bits used in the short match table dFastShortTableSize = 1 << dFastShortTableBits // Size of the table dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. @@ -18,8 +21,14 @@ const ( type doubleFastEncoder struct { fastEncoder - longTable [dFastLongTableSize]tableEntry - dictLongTable []tableEntry + longTable [dFastLongTableSize]tableEntry +} + +type doubleFastEncoderDict struct { + fastEncoderDict + longTable [dFastLongTableSize]tableEntry + dictLongTable []tableEntry + longTableShardDirty [dLongTableShardCnt]bool } // Encode mimmics functionality in zstd_dfast.c @@ -678,9 +687,379 @@ encodeLoop: } } +// Encode will encode the content, with a dictionary if initialized for it. +func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = tableEntry{} + } + e.markAllShardsDirty() + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.longTable[i].offset = v + } + e.markAllShardsDirty() + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = entry + e.markShardDirty(nextHashS) + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += lenght + repOff + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + } + // Find the offsets of our two matches. + coffsetL := s - (candidateL.offset - e.cur) + coffsetS := s - (candidateS.offset - e.cur) + + // Check if we have a long match. + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + // Check if we have a short match. + if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, dFastLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = s - (candidateL.offset - e.cur) + checkAt + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} + e.markLongShardDirty(nextHashL) + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + s += checkAt + if debugMatches { + println("long match (after short)") + } + break + } + + t = candidateS.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the 4-byte match as long as possible. + l := e.matchlen(s+4, t+4, src) + 4 + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) and start+2 (short) + index0 := s - l + 1 + // Index match end-2 (long) and end-1 (short) + index1 := s - 2 + + cv0 := load6432(src, index0) + cv1 := load6432(src, index1) + te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} + te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} + longHash1 := hash8(cv0, dFastLongTableBits) + longHash2 := hash8(cv0, dFastLongTableBits) + e.longTable[longHash1] = te0 + e.longTable[longHash2] = te1 + e.markLongShardDirty(longHash1) + e.markLongShardDirty(longHash2) + cv0 >>= 8 + cv1 >>= 8 + te0.offset++ + te1.offset++ + te0.val = uint32(cv0) + te1.val = uint32(cv1) + hashVal1 := hash5(cv0, dFastShortTableBits) + hashVal2 := hash5(cv1, dFastShortTableBits) + e.table[hashVal1] = te0 + e.markShardDirty(hashVal1) + e.table[hashVal2] = te1 + e.markShardDirty(hashVal2) + + cv = load6432(src, s) + + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.markLongShardDirty(nextHashL) + e.table[nextHashS] = entry + e.markShardDirty(nextHashS) + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } + // If we encoded more than 64K mark all dirty. + if len(src) > 64<<10 { + e.markAllShardsDirty() + } +} + // ResetDict will reset and set a dictionary if not nil func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) { e.fastEncoder.Reset(d, singleBlock) + if d != nil { + panic("doubleFastEncoder: Reset with dict not supported") + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { + allDirty := e.allDirty + e.fastEncoderDict.Reset(d, singleBlock) if d == nil { return } @@ -706,8 +1085,37 @@ func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id + e.allDirty = true } // Reset table to initial state e.cur = e.maxMatchOff - copy(e.longTable[:], e.dictLongTable) + + dirtyShardCnt := 0 + if !allDirty { + for i := range e.longTableShardDirty { + if e.longTableShardDirty[i] { + dirtyShardCnt++ + } + } + } + + if allDirty || dirtyShardCnt > dLongTableShardCnt/2 { + copy(e.longTable[:], e.dictLongTable) + for i := range e.longTableShardDirty { + e.longTableShardDirty[i] = false + } + return + } + for i := range e.longTableShardDirty { + if !e.longTableShardDirty[i] { + continue + } + + copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) + e.longTableShardDirty[i] = false + } +} + +func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) { + e.longTableShardDirty[entryNum/dLongTableShardSize] = true } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 0b301df43..ba4a17e10 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -11,9 +11,11 @@ import ( ) const ( - tableBits = 15 // Bits used in the table - tableSize = 1 << tableBits // Size of the table - tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table + tableShardSize = tableSize / tableShardCnt // Size of an individual shard + tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. maxMatchLength = 131074 ) @@ -24,8 +26,14 @@ type tableEntry struct { type fastEncoder struct { fastBase - table [tableSize]tableEntry - dictTable []tableEntry + table [tableSize]tableEntry +} + +type fastEncoderDict struct { + fastEncoder + dictTable []tableEntry + tableShardDirty [tableShardCnt]bool + allDirty bool } // Encode mimmics functionality in zstd_fast.c @@ -78,7 +86,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { // TEMPLATE const hashLog = tableBits // seems global, but would be nice to tweak. - const kSearchStrength = 8 + const kSearchStrength = 7 // nextEmit is where in src the next emitLiteral should start from. nextEmit := s @@ -617,8 +625,322 @@ encodeLoop: } } +// Encode will encode the content, with a dictionary if initialized for it. +func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { + const ( + inputMargin = 8 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if e.allDirty || len(src) > 32<<10 { + e.fastEncoder.Encode(blk, src) + e.allDirty = true + return + } + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 2. + const stepSize = 2 + + // TEMPLATE + const hashLog = tableBits + // seems global, but would be nice to tweak. + const kSearchStrength = 7 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + // t will contain the match offset when we find one. + // When existing the search loop, we have already checked 4 bytes. + var t int32 + + // We will not use repeat offsets across blocks. + // By not using them for the first 3 matches + canRepeat := len(blk.sequences) > 2 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHash := hash6(cv, hashLog) + nextHash2 := hash6(cv>>8, hashLog) + candidate := e.table[nextHash] + candidate2 := e.table[nextHash2] + repIndex := s - offset1 + 2 + + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShardDirty(nextHash) + e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} + e.markShardDirty(nextHash2) + + if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { + // Consider history as well. + var seq seq + var length int32 + // length = 4 + e.matchlen(s+6, repIndex+4, src) + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + seq.matchLen = uint32(length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + 2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + sMin := s - e.maxMatchOff + if sMin < 0 { + sMin = 0 + } + for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += length + 2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, length) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + coffset0 := s - (candidate.offset - e.cur) + coffset1 := s - (candidate2.offset - e.cur) + 1 + if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { + // found a regular match + t = candidate.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + break + } + + if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { + // found a regular match + t = candidate2.offset - e.cur + s++ + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + break + } + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + // A 4-byte match has been found. We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the 4-byte match as long as possible. + //l := e.matchlen(s+4, t+4, src) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence. + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + // Don't use repeat offsets + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + + // Check offset 2 + if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + //l := 4 + e.matchlen(s+4, o2+4, src) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + // Store this, since we have it. + nextHash := hash6(cv, hashLog) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.markShardDirty(nextHash) + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + break encodeLoop + } + // Prepare next loop. + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + // ResetDict will reset and set a dictionary if not nil func (e *fastEncoder) Reset(d *dict, singleBlock bool) { + e.resetBase(d, singleBlock) + if d != nil { + panic("fastEncoder: Reset with dict") + } +} + +// ResetDict will reset and set a dictionary if not nil +func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { e.resetBase(d, singleBlock) if d == nil { return @@ -653,9 +975,44 @@ func (e *fastEncoder) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id + e.allDirty = true } e.cur = e.maxMatchOff - // Reset table to initial state - copy(e.table[:], e.dictTable) + dirtyShardCnt := 0 + if !e.allDirty { + for i := range e.tableShardDirty { + if e.tableShardDirty[i] { + dirtyShardCnt++ + } + } + } + + const shardCnt = tableShardCnt + const shardSize = tableShardSize + if e.allDirty || dirtyShardCnt > shardCnt*4/6 { + copy(e.table[:], e.dictTable) + for i := range e.tableShardDirty { + e.tableShardDirty[i] = false + } + e.allDirty = false + return + } + for i := range e.tableShardDirty { + if !e.tableShardDirty[i] { + continue + } + + copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + e.tableShardDirty[i] = false + } + e.allDirty = false +} + +func (e *fastEncoderDict) markAllShardsDirty() { + e.allDirty = true +} + +func (e *fastEncoderDict) markShardDirty(entryNum uint32) { + e.tableShardDirty[entryNum/tableShardSize] = true } diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index f5759211d..4871dd03a 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -106,7 +106,7 @@ func (e *Encoder) Reset(w io.Writer) { s.encoder = e.o.encoder() } if s.writing == nil { - s.writing = &blockEnc{} + s.writing = &blockEnc{lowMem: e.o.lowMem} s.writing.init() } s.writing.initNewEncode() @@ -176,6 +176,12 @@ func (e *Encoder) nextBlock(final bool) error { } if !s.headerWritten { // If we have a single block encode, do a sync compression. + if final && len(s.filling) == 0 && !e.o.fullZero { + s.headerWritten = true + s.fullFrameWritten = true + s.eofWritten = true + return nil + } if final && len(s.filling) > 0 { s.current = e.EncodeAll(s.filling, s.current[:0]) var n2 int @@ -334,13 +340,13 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { println("ReadFrom: got EOF final block:", len(e.state.filling)) } return n, nil + case nil: default: if debug { println("ReadFrom: got error:", err) } e.state.err = err return n, err - case nil: } if len(src) > 0 { if debug { @@ -471,7 +477,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } // If less than 1MB, allocate a buffer up front. - if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 { + if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { dst = make([]byte, 0, len(src)) } dst, err := fh.appendTo(dst) diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index a7312f42a..16d4ab63c 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -24,12 +24,12 @@ type encoderOptions struct { allLitEntropy bool customWindow bool customALEntropy bool + lowMem bool dict *dict } func (o *encoderOptions) setDefault() { *o = encoderOptions{ - // use less ram: true for now, but may change. concurrent: runtime.GOMAXPROCS(0), crc: true, single: nil, @@ -37,20 +37,31 @@ func (o *encoderOptions) setDefault() { windowSize: 8 << 20, level: SpeedDefault, allLitEntropy: true, + lowMem: false, } } // encoder returns an encoder with the selected options. func (o encoderOptions) encoder() encoder { switch o.level { + case SpeedFastest: + if o.dict != nil { + return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + } + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + case SpeedDefault: - return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} + if o.dict != nil { + return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}} + } + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} case SpeedBetterCompression: - return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} + if o.dict != nil { + return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + } + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} case SpeedBestCompression: - return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} - case SpeedFastest: - return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} + return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} } panic("unknown compression level") } @@ -62,7 +73,7 @@ func WithEncoderCRC(b bool) EOption { } // WithEncoderConcurrency will set the concurrency, -// meaning the maximum number of decoders to run concurrently. +// meaning the maximum number of encoders to run concurrently. // The value supplied must be at least 1. // By default this will be set to GOMAXPROCS. func WithEncoderConcurrency(n int) EOption { @@ -276,6 +287,17 @@ func WithSingleSegment(b bool) EOption { } } +// WithLowerEncoderMem will trade in some memory cases trade less memory usage for +// slower encoding speed. +// This will not change the window size which is the primary function for reducing +// memory usage. See WithWindowSize. +func WithLowerEncoderMem(b bool) EOption { + return func(o *encoderOptions) error { + o.lowMem = b + return nil + } +} + // WithEncoderDict allows to register a dictionary that will be used for the encode. // The encoder *may* choose to use no dictionary instead for certain payloads. func WithEncoderDict(dict []byte) EOption { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index fc4a566d3..693c5f05d 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -121,7 +121,7 @@ func (d *frameDec) reset(br byteBuffer) error { d.SingleSegment = fhd&(1<<5) != 0 if fhd&(1<<3) != 0 { - return errors.New("Reserved bit set on frame header") + return errors.New("reserved bit set on frame header") } // Read Window_Descriptor diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index aa9eba88b..c74681b99 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -97,7 +97,7 @@ func (s *fseEncoder) prepare() (*fseEncoder, error) { func (s *fseEncoder) allocCtable() { tableSize := 1 << s.actualTableLog // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < int(tableSize) { + if cap(s.ct.tableSymbol) < tableSize { s.ct.tableSymbol = make([]byte, tableSize) } s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] @@ -202,13 +202,13 @@ func (s *fseEncoder) buildCTable() error { case 0: case -1, 1: symbolTT[i].deltaNbBits = tl - symbolTT[i].deltaFindState = int16(total - 1) + symbolTT[i].deltaFindState = total - 1 total++ default: maxBitsOut := uint32(tableLog) - highBit(uint32(v-1)) minStatePlus := uint32(v) << maxBitsOut symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus - symbolTT[i].deltaFindState = int16(total - v) + symbolTT[i].deltaFindState = total - v total += v } } @@ -353,8 +353,8 @@ func (s *fseEncoder) normalizeCount2(length int) error { distributed uint32 total = uint32(length) tableLog = s.actualTableLog - lowThreshold = uint32(total >> tableLog) - lowOne = uint32((total * 3) >> (tableLog + 1)) + lowThreshold = total >> tableLog + lowOne = (total * 3) >> (tableLog + 1) ) for i, cnt := range s.count[:s.symbolLen] { if cnt == 0 { @@ -379,7 +379,7 @@ func (s *fseEncoder) normalizeCount2(length int) error { if (total / toDistribute) > lowOne { // risk of rounding to zero - lowOne = uint32((total * 3) / (toDistribute * 2)) + lowOne = (total * 3) / (toDistribute * 2) for i, cnt := range s.count[:s.symbolLen] { if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { s.norm[i] = 1 @@ -708,7 +708,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) { im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + int32(first.deltaFindState) c.state = c.stateTable[lu] - return } // encode the output symbol provided and write it to the bitstream. diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go index 6c17dc17f..474cb77d2 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go @@ -59,7 +59,7 @@ func fillBase(dst []baseOffset, base uint32, bits ...uint8) { } for i, bit := range bits { if base > math.MaxInt32 { - panic(fmt.Sprintf("invalid decoding table, base overflows int32")) + panic("invalid decoding table, base overflows int32") } dst[i] = baseOffset{ diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index b5c8ef133..1dd39e63b 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -181,11 +181,18 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { return fmt.Errorf("output (%d) bigger than max block size", size) } if size > cap(s.out) { - // Not enough size, will be extremely rarely triggered, + // Not enough size, which can happen under high volume block streaming conditions // but could be if destination slice is too small for sync operations. - // We add maxBlockSize to the capacity. - s.out = append(s.out, make([]byte, maxBlockSize)...) - s.out = s.out[:len(s.out)-maxBlockSize] + // over-allocating here can create a large amount of GC pressure so we try to keep + // it as contained as possible + used := len(s.out) - startSize + addBytes := 256 + ll + ml + used>>2 + // Clamp to max block size. + if used+addBytes > maxBlockSize { + addBytes = maxBlockSize - used + } + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] } if ml > maxMatchLen { return fmt.Errorf("match len (%d) bigger than max allowed length", ml) diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go index 36bcc3cc0..8014174a7 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqenc.go +++ b/vendor/github.com/klauspost/compress/zstd/seqenc.go @@ -35,7 +35,6 @@ func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) { // Ensure we cannot reuse by accident prevEnc := *prev prevEnc.symbolLen = 0 - return } compareSwap(ll, &s.llEnc, &s.llPrev) compareSwap(ml, &s.mlEnc, &s.mlPrev) diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 841fd95ac..9d9d1d567 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -10,8 +10,8 @@ import ( "hash/crc32" "io" + "github.com/golang/snappy" "github.com/klauspost/compress/huff0" - "github.com/klauspost/compress/snappy" ) const ( @@ -185,7 +185,6 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { r.block.reset(nil) r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen]) if err != nil { - println("snappy.Decode:", err) return written, err } err = r.block.encodeLits(r.block.literals, false) @@ -417,7 +416,7 @@ var crcTable = crc32.MakeTable(crc32.Castagnoli) // https://github.com/google/snappy/blob/master/framing_format.txt func snappyCRC(b []byte) uint32 { c := crc32.Update(0, crcTable, b) - return uint32(c>>15|c<<17) + 0xa282ead8 + return c>>15 | c<<17 + 0xa282ead8 } // snappyDecodedLen returns the length of the decoded block and the number of bytes diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go new file mode 100644 index 000000000..e35a0a2f8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -0,0 +1,120 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package zstd + +import ( + "errors" + "io" + "sync" +) + +// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip. +// See https://www.winzip.com/win/en/comp_info.html +const ZipMethodWinZip = 93 + +// ZipMethodPKWare is the method number used by PKWARE to indicate Zstandard compression. +// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.7.TXT +const ZipMethodPKWare = 20 + +var zipReaderPool sync.Pool + +// newZipReader cannot be used since we would leak goroutines... +func newZipReader(r io.Reader) io.ReadCloser { + dec, ok := zipReaderPool.Get().(*Decoder) + if ok { + dec.Reset(r) + } else { + d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) + if err != nil { + panic(err) + } + dec = d + } + return &pooledZipReader{dec: dec} +} + +type pooledZipReader struct { + mu sync.Mutex // guards Close and Read + dec *Decoder +} + +func (r *pooledZipReader) Read(p []byte) (n int, err error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.dec == nil { + return 0, errors.New("Read after Close") + } + dec, err := r.dec.Read(p) + + return dec, err +} + +func (r *pooledZipReader) Close() error { + r.mu.Lock() + defer r.mu.Unlock() + var err error + if r.dec != nil { + err = r.dec.Reset(nil) + zipReaderPool.Put(r.dec) + r.dec = nil + } + return err +} + +type pooledZipWriter struct { + mu sync.Mutex // guards Close and Read + enc *Encoder +} + +func (w *pooledZipWriter) Write(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.enc == nil { + return 0, errors.New("Write after Close") + } + return w.enc.Write(p) +} + +func (w *pooledZipWriter) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + var err error + if w.enc != nil { + err = w.enc.Close() + zipReaderPool.Put(w.enc) + w.enc = nil + } + return err +} + +// ZipCompressor returns a compressor that can be registered with zip libraries. +// The provided encoder options will be used on all encodes. +func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { + var pool sync.Pool + return func(w io.Writer) (io.WriteCloser, error) { + enc, ok := pool.Get().(*Encoder) + if ok { + enc.Reset(w) + } else { + var err error + enc, err = NewWriter(w, opts...) + if err != nil { + return nil, err + } + } + return &pooledZipWriter{enc: enc}, nil + } +} + +// ZipDecompressor returns a decompressor that can be registered with zip libraries. +// See ZipCompressor for example. +func ZipDecompressor() func(r io.Reader) io.ReadCloser { + return func(r io.Reader) io.ReadCloser { + d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) + if err != nil { + panic(err) + } + return d.IOReadCloser() + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 0807719c8..1ba308c8b 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -4,6 +4,8 @@ package zstd import ( + "bytes" + "encoding/binary" "errors" "log" "math" @@ -73,6 +75,10 @@ var ( // ErrDecoderClosed will be returned if the Decoder was used after // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + + // ErrDecoderNilInput is returned when a nil Reader was provided + // and an operation other than Reset/DecodeAll/Close was attempted. + ErrDecoderNilInput = errors.New("nil input provided as reader") ) func println(a ...interface{}) { @@ -121,24 +127,20 @@ func matchLen(a, b []byte) int { } func load3232(b []byte, i int32) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + return binary.LittleEndian.Uint32(b[i:]) } func load6432(b []byte, i int32) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } func load64(b []byte, i int) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) +} + +type byter interface { + Bytes() []byte + Len() int } + +var _ byter = &bytes.Buffer{} diff --git a/vendor/modules.txt b/vendor/modules.txt index 56819b36e..2564589bf 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -30,6 +30,8 @@ github.com/fatih/color github.com/fsnotify/fsnotify # github.com/golang/protobuf v1.3.5 github.com/golang/protobuf/proto +# github.com/golang/snappy v0.0.3 +github.com/golang/snappy # github.com/hashicorp/hcl v1.0.0 github.com/hashicorp/hcl github.com/hashicorp/hcl/hcl/ast @@ -47,7 +49,7 @@ github.com/hinshun/vt10x # github.com/hokaccha/go-prettyjson v0.0.0-20190818114111-108c894c2c0e ## explicit github.com/hokaccha/go-prettyjson -# github.com/honeycombio/libhoney-go v1.15.2 +# github.com/honeycombio/libhoney-go v1.15.3 ## explicit github.com/honeycombio/libhoney-go github.com/honeycombio/libhoney-go/transmission @@ -55,10 +57,9 @@ github.com/honeycombio/libhoney-go/transmission github.com/inconshreveable/mousetrap # github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/kballard/go-shellquote -# github.com/klauspost/compress v1.11.4 +# github.com/klauspost/compress v1.12.2 github.com/klauspost/compress/fse github.com/klauspost/compress/huff0 -github.com/klauspost/compress/snappy github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd/internal/xxhash # github.com/kr/pty v1.1.8 => github.com/creack/pty v1.1.7