From 71edf60e09bb552124f858464d1288627b6667b9 Mon Sep 17 00:00:00 2001 From: Miguel Victoria Date: Wed, 18 Sep 2024 21:56:27 +0200 Subject: [PATCH 1/3] feat: add strings.Replacer structure on native libraries --- gnovm/stdlibs/strings/replace.gno | 622 +++++++++++++++++++++++++ gnovm/stdlibs/strings/replace_test.gno | 583 +++++++++++++++++++++++ 2 files changed, 1205 insertions(+) create mode 100644 gnovm/stdlibs/strings/replace.gno create mode 100644 gnovm/stdlibs/strings/replace_test.gno diff --git a/gnovm/stdlibs/strings/replace.gno b/gnovm/stdlibs/strings/replace.gno new file mode 100644 index 00000000000..e57a15fc69c --- /dev/null +++ b/gnovm/stdlibs/strings/replace.gno @@ -0,0 +1,622 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "io" +) + +// Replacer replaces a list of strings with replacements. +// It is safe for concurrent use by multiple goroutines. +type Replacer struct { + // Custom code: initialized bool was before sync.Once on golang package + initialized bool // guards buildOnce method + // End of custom code + r replacer + oldnew []string +} + +// replacer is the interface that a replacement algorithm needs to implement. +type replacer interface { + Replace(s string) string + WriteString(w io.Writer, s string) (n int, err error) +} + +// NewReplacer returns a new [Replacer] from a list of old, new string +// pairs. Replacements are performed in the order they appear in the +// target string, without overlapping matches. The old string +// comparisons are done in argument order. +// +// NewReplacer panics if given an odd number of arguments. +func NewReplacer(oldnew ...string) *Replacer { + if len(oldnew)%2 == 1 { + panic("strings.NewReplacer: odd argument count") + } + return &Replacer{oldnew: append([]string(nil), oldnew...)} +} + +func (r *Replacer) buildOnce() { + // Custom code: only build once without sync.Once + if r.initialized { + return + } + r.initialized = true + // End of custom code + r.r = r.build() + r.oldnew = nil +} + +func (b *Replacer) build() replacer { + oldnew := b.oldnew + if len(oldnew) == 2 && len(oldnew[0]) > 1 { + return makeSingleStringReplacer(oldnew[0], oldnew[1]) + } + + allNewBytes := true + for i := 0; i < len(oldnew); i += 2 { + if len(oldnew[i]) != 1 { + return makeGenericReplacer(oldnew) + } + if len(oldnew[i+1]) != 1 { + allNewBytes = false + } + } + + if allNewBytes { + r := byteReplacer{} + for i := range r { + r[i] = byte(i) + } + // The first occurrence of old->new map takes precedence + // over the others with the same old string. + for i := len(oldnew) - 2; i >= 0; i -= 2 { + o := oldnew[i][0] + n := oldnew[i+1][0] + r[o] = n + } + return &r + } + + r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)} + // The first occurrence of old->new map takes precedence + // over the others with the same old string. + for i := len(oldnew) - 2; i >= 0; i -= 2 { + o := oldnew[i][0] + n := oldnew[i+1] + // To avoid counting repetitions multiple times. + if r.replacements[o] == nil { + // We need to use string([]byte{o}) instead of string(o), + // to avoid utf8 encoding of o. + // E. g. byte(150) produces string of length 2. + r.toReplace = append(r.toReplace, string([]byte{o})) + } + r.replacements[o] = []byte(n) + + } + return &r +} + +// Replace returns a copy of s with all replacements performed. +func (r *Replacer) Replace(s string) string { + // Custom code: adaptation without sync.Once + r.buildOnce() + // End of custom code + return r.r.Replace(s) +} + +// WriteString writes s to w with all replacements performed. +func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { + // Custom code: adaptation without sync.Once + r.buildOnce() + // End of custom code + return r.r.WriteString(w, s) +} + +// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys +// and values may be empty. For example, the trie containing keys "ax", "ay", +// "bcbc", "x" and "xy" could have eight nodes: +// +// n0 - +// n1 a- +// n2 .x+ +// n3 .y+ +// n4 b- +// n5 .cbc+ +// n6 x+ +// n7 .y+ +// +// n0 is the root node, and its children are n1, n4 and n6; n1's children are +// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked +// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 +// (marked with a trailing "+") are complete keys. +type trieNode struct { + // value is the value of the trie node's key/value pair. It is empty if + // this node is not a complete key. + value string + // priority is the priority (higher is more important) of the trie node's + // key/value pair; keys are not necessarily matched shortest- or longest- + // first. Priority is positive if this node is a complete key, and zero + // otherwise. In the example above, positive/zero priorities are marked + // with a trailing "+" or "-". + priority int + + // A trie node may have zero, one or more child nodes: + // * if the remaining fields are zero, there are no children. + // * if prefix and next are non-zero, there is one child in next. + // * if table is non-zero, it defines all the children. + // + // Prefixes are preferred over tables when there is one child, but the + // root node always uses a table for lookup efficiency. + + // prefix is the difference in keys between this trie node and the next. + // In the example above, node n4 has prefix "cbc" and n4's next node is n5. + // Node n5 has no children and so has zero prefix, next and table fields. + prefix string + next *trieNode + + // table is a lookup table indexed by the next byte in the key, after + // remapping that byte through genericReplacer.mapping to create a dense + // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and + // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and + // genericReplacer.tableSize will be 5. Node n0's table will be + // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped + // 'a', 'b' and 'x'. + table []*trieNode +} + +func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { + if key == "" { + if t.priority == 0 { + t.value = val + t.priority = priority + } + return + } + + if t.prefix != "" { + // Need to split the prefix among multiple nodes. + var n int // length of the longest common prefix + for ; n < len(t.prefix) && n < len(key); n++ { + if t.prefix[n] != key[n] { + break + } + } + if n == len(t.prefix) { + t.next.add(key[n:], val, priority, r) + } else if n == 0 { + // First byte differs, start a new lookup table here. Looking up + // what is currently t.prefix[0] will lead to prefixNode, and + // looking up key[0] will lead to keyNode. + var prefixNode *trieNode + if len(t.prefix) == 1 { + prefixNode = t.next + } else { + prefixNode = &trieNode{ + prefix: t.prefix[1:], + next: t.next, + } + } + keyNode := new(trieNode) + t.table = make([]*trieNode, r.tableSize) + t.table[r.mapping[t.prefix[0]]] = prefixNode + t.table[r.mapping[key[0]]] = keyNode + t.prefix = "" + t.next = nil + keyNode.add(key[1:], val, priority, r) + } else { + // Insert new node after the common section of the prefix. + next := &trieNode{ + prefix: t.prefix[n:], + next: t.next, + } + t.prefix = t.prefix[:n] + t.next = next + next.add(key[n:], val, priority, r) + } + } else if t.table != nil { + // Insert into existing table. + m := r.mapping[key[0]] + if t.table[m] == nil { + t.table[m] = new(trieNode) + } + t.table[m].add(key[1:], val, priority, r) + } else { + t.prefix = key + t.next = new(trieNode) + t.next.add("", val, priority, r) + } +} + +func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { + // Iterate down the trie to the end, and grab the value and keylen with + // the highest priority. + bestPriority := 0 + node := &r.root + n := 0 + for node != nil { + if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { + bestPriority = node.priority + val = node.value + keylen = n + found = true + } + + if s == "" { + break + } + if node.table != nil { + index := r.mapping[s[0]] + if int(index) == r.tableSize { + break + } + node = node.table[index] + s = s[1:] + n++ + } else if node.prefix != "" && HasPrefix(s, node.prefix) { + n += len(node.prefix) + s = s[len(node.prefix):] + node = node.next + } else { + break + } + } + return +} + +// genericReplacer is the fully generic algorithm. +// It's used as a fallback when nothing faster can be used. +type genericReplacer struct { + root trieNode + // tableSize is the size of a trie node's lookup table. It is the number + // of unique key bytes. + tableSize int + // mapping maps from key bytes to a dense index for trieNode.table. + mapping [256]byte +} + +func makeGenericReplacer(oldnew []string) *genericReplacer { + r := new(genericReplacer) + // Find each byte used, then assign them each an index. + for i := 0; i < len(oldnew); i += 2 { + key := oldnew[i] + for j := 0; j < len(key); j++ { + r.mapping[key[j]] = 1 + } + } + + for _, b := range r.mapping { + r.tableSize += int(b) + } + + var index byte + for i, b := range r.mapping { + if b == 0 { + r.mapping[i] = byte(r.tableSize) + } else { + r.mapping[i] = index + index++ + } + } + // Ensure root node uses a lookup table (for performance). + r.root.table = make([]*trieNode, r.tableSize) + + for i := 0; i < len(oldnew); i += 2 { + r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) + } + return r +} + +type appendSliceWriter []byte + +// Write writes to the buffer to satisfy [io.Writer]. +func (w *appendSliceWriter) Write(p []byte) (int, error) { + *w = append(*w, p...) + return len(p), nil +} + +// WriteString writes to the buffer without string->[]byte->string allocations. +func (w *appendSliceWriter) WriteString(s string) (int, error) { + *w = append(*w, s...) + return len(s), nil +} + +type stringWriter struct { + w io.Writer +} + +func (w stringWriter) WriteString(s string) (int, error) { + return w.w.Write([]byte(s)) +} + +func getStringWriter(w io.Writer) io.StringWriter { + sw, ok := w.(io.StringWriter) + if !ok { + sw = stringWriter{w} + } + return sw +} + +func (r *genericReplacer) Replace(s string) string { + buf := make(appendSliceWriter, 0, len(s)) + r.WriteString(&buf, s) + return string(buf) +} + +func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { + sw := getStringWriter(w) + var last, wn int + var prevMatchEmpty bool + for i := 0; i <= len(s); { + // Fast path: s[i] is not a prefix of any pattern. + if i != len(s) && r.root.priority == 0 { + index := int(r.mapping[s[i]]) + if index == r.tableSize || r.root.table[index] == nil { + i++ + continue + } + } + + // Ignore the empty match iff the previous loop found the empty match. + val, keylen, match := r.lookup(s[i:], prevMatchEmpty) + prevMatchEmpty = match && keylen == 0 + if match { + wn, err = sw.WriteString(s[last:i]) + n += wn + if err != nil { + return + } + wn, err = sw.WriteString(val) + n += wn + if err != nil { + return + } + i += keylen + last = i + continue + } + i++ + } + if last != len(s) { + wn, err = sw.WriteString(s[last:]) + n += wn + } + return +} + +// singleStringReplacer is the implementation that's used when there is only +// one string to replace (and that string has more than one byte). +type singleStringReplacer struct { + finder *stringFinder + // value is the new string that replaces that pattern when it's found. + value string +} + +func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer { + return &singleStringReplacer{finder: makeStringFinder(pattern), value: value} +} + +func (r *singleStringReplacer) Replace(s string) string { + var buf Builder + i, matched := 0, false + for { + match := r.finder.next(s[i:]) + if match == -1 { + break + } + matched = true + buf.Grow(match + len(r.value)) + buf.WriteString(s[i : i+match]) + buf.WriteString(r.value) + i += match + len(r.finder.pattern) + } + if !matched { + return s + } + buf.WriteString(s[i:]) + return buf.String() +} + +func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { + sw := getStringWriter(w) + var i, wn int + for { + match := r.finder.next(s[i:]) + if match == -1 { + break + } + wn, err = sw.WriteString(s[i : i+match]) + n += wn + if err != nil { + return + } + wn, err = sw.WriteString(r.value) + n += wn + if err != nil { + return + } + i += match + len(r.finder.pattern) + } + wn, err = sw.WriteString(s[i:]) + n += wn + return +} + +// byteReplacer is the implementation that's used when all the "old" +// and "new" values are single ASCII bytes. +// The array contains replacement bytes indexed by old byte. +type byteReplacer [256]byte + +func (r *byteReplacer) Replace(s string) string { + var buf []byte // lazily allocated + for i := 0; i < len(s); i++ { + b := s[i] + if r[b] != b { + if buf == nil { + buf = []byte(s) + } + buf[i] = r[b] + } + } + if buf == nil { + return s + } + return string(buf) +} + +func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { + sw := getStringWriter(w) + last := 0 + for i := 0; i < len(s); i++ { + b := s[i] + if r[b] == b { + continue + } + if last != i { + wn, err := sw.WriteString(s[last:i]) + n += wn + if err != nil { + return n, err + } + } + last = i + 1 + nw, err := w.Write(r[b : int(b)+1]) + n += nw + if err != nil { + return n, err + } + } + if last != len(s) { + nw, err := sw.WriteString(s[last:]) + n += nw + if err != nil { + return n, err + } + } + return n, nil +} + +// byteStringReplacer is the implementation that's used when all the +// "old" values are single ASCII bytes but the "new" values vary in size. +type byteStringReplacer struct { + // replacements contains replacement byte slices indexed by old byte. + // A nil []byte means that the old byte should not be replaced. + replacements [256][]byte + // toReplace keeps a list of bytes to replace. Depending on length of toReplace + // and length of target string it may be faster to use Count, or a plain loop. + // We store single byte as a string, because Count takes a string. + toReplace []string +} + +// countCutOff controls the ratio of a string length to a number of replacements +// at which (*byteStringReplacer).Replace switches algorithms. +// For strings with higher ration of length to replacements than that value, +// we call Count, for each replacement from toReplace. +// For strings, with a lower ratio we use simple loop, because of Count overhead. +// countCutOff is an empirically determined overhead multiplier. +// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining. +const countCutOff = 8 + +func (r *byteStringReplacer) Replace(s string) string { + newSize := len(s) + anyChanges := false + // Is it faster to use Count? + if len(r.toReplace)*countCutOff <= len(s) { + for _, x := range r.toReplace { + if c := Count(s, x); c != 0 { + // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes. + newSize += c * (len(r.replacements[x[0]]) - 1) + anyChanges = true + } + + } + } else { + for i := 0; i < len(s); i++ { + b := s[i] + if r.replacements[b] != nil { + // See above for explanation of -1 + newSize += len(r.replacements[b]) - 1 + anyChanges = true + } + } + } + if !anyChanges { + return s + } + buf := make([]byte, newSize) + j := 0 + for i := 0; i < len(s); i++ { + b := s[i] + if r.replacements[b] != nil { + j += copy(buf[j:], r.replacements[b]) + } else { + buf[j] = b + j++ + } + } + return string(buf) +} + +func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { + sw := getStringWriter(w) + last := 0 + for i := 0; i < len(s); i++ { + b := s[i] + if r.replacements[b] == nil { + continue + } + if last != i { + nw, err := sw.WriteString(s[last:i]) + n += nw + if err != nil { + return n, err + } + } + last = i + 1 + nw, err := w.Write(r.replacements[b]) + n += nw + if err != nil { + return n, err + } + } + if last != len(s) { + var nw int + nw, err = sw.WriteString(s[last:]) + n += nw + } + return +} + +// Custom code: these test functions are normally placed on export_test.go file but it seems like we do not recognize +// methods declared on test files as part of the structure. For example: +// Replacer.printTrie() was not recognized so I added here directl +func (r *Replacer) PrintTrie() string { + r.buildOnce() + gen := r.r.(*genericReplacer) + return gen.printNode(&gen.root, 0) +} + +func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) { + if t.priority > 0 { + s += "+" + } else { + s += "-" + } + s += "\n" + + if t.prefix != "" { + s += Repeat(".", depth) + t.prefix + s += r.printNode(t.next, depth+len(t.prefix)) + } else if t.table != nil { + for b, m := range r.mapping { + if int(m) != r.tableSize && t.table[m] != nil { + s += Repeat(".", depth) + string([]byte{byte(b)}) + s += r.printNode(t.table[m], depth+1) + } + } + } + return +} + +// End of custom code diff --git a/gnovm/stdlibs/strings/replace_test.gno b/gnovm/stdlibs/strings/replace_test.gno new file mode 100644 index 00000000000..4315ea24b7d --- /dev/null +++ b/gnovm/stdlibs/strings/replace_test.gno @@ -0,0 +1,583 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings_test + +import ( + "bytes" + "fmt" + "strings" + "testing" +) + +var htmlEscaper = strings.NewReplacer( + "&", "&", + "<", "<", + ">", ">", + `"`, """, + "'", "'", +) + +var htmlUnescaper = strings.NewReplacer( + "&", "&", + "<", "<", + ">", ">", + """, `"`, + "'", "'", +) + +// The http package's old HTML escaping function. +func oldHTMLEscape(s string) string { + s = strings.Replace(s, "&", "&", -1) + s = strings.Replace(s, "<", "<", -1) + s = strings.Replace(s, ">", ">", -1) + s = strings.Replace(s, `"`, """, -1) + s = strings.Replace(s, "'", "'", -1) + return s +} + +var capitalLetters = strings.NewReplacer("a", "A", "b", "B") + +// TestReplacer tests the replacer implementations. +func TestReplacer(t *testing.T) { + type testCase struct { + r *strings.Replacer + in, out string + } + var testCases []testCase + + // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8. + str := func(b byte) string { + return string([]byte{b}) + } + var s []string + + // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00". + s = nil + for i := 0; i < 256; i++ { + s = append(s, str(byte(i)), str(byte(i+1))) + } + inc := strings.NewReplacer(s...) + + // Test cases with 1-byte old strings, 1-byte new strings. + testCases = append(testCases, + testCase{capitalLetters, "brad", "BrAd"}, + testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)}, + testCase{capitalLetters, "", ""}, + + testCase{inc, "brad", "csbe"}, + testCase{inc, "\x00\xff", "\x01\x00"}, + testCase{inc, "", ""}, + + testCase{strings.NewReplacer("a", "1", "a", "2"), "brad", "br1d"}, + ) + + // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ... + s = nil + for i := 0; i < 256; i++ { + n := i + 1 - 'a' + if n < 1 { + n = 1 + } + s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n)) + } + repeat := strings.NewReplacer(s...) + + // Test cases with 1-byte old strings, variable length new strings. + testCases = append(testCases, + testCase{htmlEscaper, "No changes", "No changes"}, + testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, + testCase{htmlEscaper, "&&&", "&&&"}, + testCase{htmlEscaper, "", ""}, + + testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"}, + testCase{repeat, "abba", "abbbba"}, + testCase{repeat, "", ""}, + + testCase{strings.NewReplacer("a", "11", "a", "22"), "brad", "br11d"}, + ) + + // The remaining test cases have variable length old strings. + + testCases = append(testCases, + testCase{htmlUnescaper, "&amp;", "&"}, + testCase{htmlUnescaper, "<b>HTML's neat</b>", "HTML's neat"}, + testCase{htmlUnescaper, "", ""}, + + testCase{strings.NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"}, + + testCase{strings.NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"}, + + testCase{strings.NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"}, + ) + + // gen1 has multiple old strings of variable length. There is no + // overall non-empty common prefix, but some pairwise common prefixes. + gen1 := strings.NewReplacer( + "aaa", "3[aaa]", + "aa", "2[aa]", + "a", "1[a]", + "i", "i", + "longerst", "most long", + "longer", "medium", + "long", "short", + "xx", "xx", + "x", "X", + "X", "Y", + "Y", "Z", + ) + testCases = append(testCases, + testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"}, + testCase{gen1, "long, longerst, longer", "short, most long, medium"}, + testCase{gen1, "xxxxx", "xxxxX"}, + testCase{gen1, "XiX", "YiY"}, + testCase{gen1, "", ""}, + ) + + // gen2 has multiple old strings with no pairwise common prefix. + gen2 := strings.NewReplacer( + "roses", "red", + "violets", "blue", + "sugar", "sweet", + ) + testCases = append(testCases, + testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."}, + testCase{gen2, "", ""}, + ) + + // gen3 has multiple old strings with an overall common prefix. + gen3 := strings.NewReplacer( + "abracadabra", "poof", + "abracadabrakazam", "splat", + "abraham", "lincoln", + "abrasion", "scrape", + "abraham", "isaac", + ) + testCases = append(testCases, + testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"}, + testCase{gen3, "abrasion abracad", "scrape abracad"}, + testCase{gen3, "abba abram abrasive", "abba abram abrasive"}, + testCase{gen3, "", ""}, + ) + + // foo{1,2,3,4} have multiple old strings with an overall common prefix + // and 1- or 2- byte extensions from the common prefix. + foo1 := strings.NewReplacer( + "foo1", "A", + "foo2", "B", + "foo3", "C", + ) + foo2 := strings.NewReplacer( + "foo1", "A", + "foo2", "B", + "foo31", "C", + "foo32", "D", + ) + foo3 := strings.NewReplacer( + "foo11", "A", + "foo12", "B", + "foo31", "C", + "foo32", "D", + ) + foo4 := strings.NewReplacer( + "foo12", "B", + "foo32", "D", + ) + testCases = append(testCases, + testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"}, + testCase{foo1, "", ""}, + + testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"}, + testCase{foo2, "", ""}, + + testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"}, + testCase{foo3, "", ""}, + + testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"}, + testCase{foo4, "", ""}, + ) + + // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things. + allBytes := make([]byte, 256) + for i := range allBytes { + allBytes[i] = byte(i) + } + allString := string(allBytes) + genAll := strings.NewReplacer( + allString, "[all]", + "\xff", "[ff]", + "\x00", "[00]", + ) + testCases = append(testCases, + testCase{genAll, allString, "[all]"}, + testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"}, + testCase{genAll, "", ""}, + ) + + // Test cases with empty old strings. + + blankToX1 := strings.NewReplacer("", "X") + blankToX2 := strings.NewReplacer("", "X", "", "") + blankHighPriority := strings.NewReplacer("", "X", "o", "O") + blankLowPriority := strings.NewReplacer("o", "O", "", "X") + blankNoOp1 := strings.NewReplacer("", "") + blankNoOp2 := strings.NewReplacer("", "", "", "A") + blankFoo := strings.NewReplacer("", "X", "foobar", "R", "foobaz", "Z") + testCases = append(testCases, + testCase{blankToX1, "foo", "XfXoXoX"}, + testCase{blankToX1, "", "X"}, + + testCase{blankToX2, "foo", "XfXoXoX"}, + testCase{blankToX2, "", "X"}, + + testCase{blankHighPriority, "oo", "XOXOX"}, + testCase{blankHighPriority, "ii", "XiXiX"}, + testCase{blankHighPriority, "oiio", "XOXiXiXOX"}, + testCase{blankHighPriority, "iooi", "XiXOXOXiX"}, + testCase{blankHighPriority, "", "X"}, + + testCase{blankLowPriority, "oo", "OOX"}, + testCase{blankLowPriority, "ii", "XiXiX"}, + testCase{blankLowPriority, "oiio", "OXiXiOX"}, + testCase{blankLowPriority, "iooi", "XiOOXiX"}, + testCase{blankLowPriority, "", "X"}, + + testCase{blankNoOp1, "foo", "foo"}, + testCase{blankNoOp1, "", ""}, + + testCase{blankNoOp2, "foo", "foo"}, + testCase{blankNoOp2, "", ""}, + + testCase{blankFoo, "foobarfoobaz", "XRXZX"}, + testCase{blankFoo, "foobar-foobaz", "XRX-XZX"}, + testCase{blankFoo, "", "X"}, + ) + + // single string replacer + + abcMatcher := strings.NewReplacer("abc", "[match]") + + testCases = append(testCases, + testCase{abcMatcher, "", ""}, + testCase{abcMatcher, "ab", "ab"}, + testCase{abcMatcher, "abc", "[match]"}, + testCase{abcMatcher, "abcd", "[match]d"}, + testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"}, + ) + + // Issue 6659 cases (more single string replacer) + + noHello := strings.NewReplacer("Hello", "") + testCases = append(testCases, + testCase{noHello, "Hello", ""}, + testCase{noHello, "Hellox", "x"}, + testCase{noHello, "xHello", "x"}, + testCase{noHello, "xHellox", "xx"}, + ) + + // No-arg test cases. + + nop := strings.NewReplacer() + testCases = append(testCases, + testCase{nop, "abc", "abc"}, + testCase{nop, "", ""}, + ) + + // Run the test cases. + + for i, tc := range testCases { + if s := tc.r.Replace(tc.in); s != tc.out { + t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out) + } + var buf bytes.Buffer + n, err := tc.r.WriteString(&buf, tc.in) + if err != nil { + t.Errorf("%d. WriteString: %v", i, err) + continue + } + got := buf.String() + if got != tc.out { + t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out) + continue + } + if n != len(tc.out) { + t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)", + i, tc.in, n, len(tc.out), tc.out) + } + } +} + +var algorithmTestCases = []struct { + r *strings.Replacer + want string +}{ + {capitalLetters, "*strings.byteReplacer"}, + {htmlEscaper, "*strings.byteStringReplacer"}, + {strings.NewReplacer("12", "123"), "*strings.singleStringReplacer"}, + {strings.NewReplacer("1", "12"), "*strings.byteStringReplacer"}, + {strings.NewReplacer("", "X"), "*strings.genericReplacer"}, + {strings.NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"}, +} + +//// TestPickAlgorithm tests that strings.NewReplacer picks the correct algorithm. +//func TestPickAlgorithm(t *testing.T) { +// for i, tc := range algorithmTestCases { +// got := fmt.Sprintf("%T", tc.r.Replacer()) +// if got != tc.want { +// t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want) +// } +// } +//} + +type errWriter struct{} + +func (errWriter) Write(p []byte) (n int, err error) { + return 0, fmt.Errorf("unwritable") +} + +// TestWriteStringError tests that WriteString returns an error +// received from the underlying io.Writer. +func TestWriteStringError(t *testing.T) { + for i, tc := range algorithmTestCases { + n, err := tc.r.WriteString(errWriter{}, "abc") + if n != 0 || err == nil || err.Error() != "unwritable" { + t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err) + } + } +} + +// TestGenericTrieBuilding verifies the structure of the generated trie. There +// is one node per line, and the key ending with the current line is in the +// trie if it ends with a "+". +func TestGenericTrieBuilding(t *testing.T) { + testCases := []struct{ in, out string }{ + {"abc;abdef;abdefgh;xx;xy;z", `- + a- + .b- + ..c+ + ..d- + ...ef+ + .....gh+ + x- + .x+ + .y+ + z+ + `}, + {"abracadabra;abracadabrakazam;abraham;abrasion", `- + a- + .bra- + ....c- + .....adabra+ + ...........kazam+ + ....h- + .....am+ + ....s- + .....ion+ + `}, + {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- + X+ + Y+ + a+ + .a+ + ..a+ + i+ + l- + .ong+ + ....er+ + ......st+ + x+ + .x+ + `}, + {"foo;;foo;foo1", `+ + f- + .oo+ + ...1+ + `}, + } + + for _, tc := range testCases { + keys := strings.Split(tc.in, ";") + args := make([]string, len(keys)*2) + for i, key := range keys { + args[i*2] = key + } + + got := strings.NewReplacer(args...).PrintTrie() + // Remove tabs from tc.out + wantbuf := make([]byte, 0, len(tc.out)) + for i := 0; i < len(tc.out); i++ { + if tc.out[i] != '\t' { + wantbuf = append(wantbuf, tc.out[i]) + } + } + want := string(wantbuf) + + if got != want { + t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) + } + } +} + +func BenchmarkGenericNoMatch(b *testing.B) { + str := strings.Repeat("A", 100) + strings.Repeat("B", 100) + generic := strings.NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic + for i := 0; i < b.N; i++ { + generic.Replace(str) + } +} + +func BenchmarkGenericMatch1(b *testing.B) { + str := strings.Repeat("a", 100) + strings.Repeat("b", 100) + generic := strings.NewReplacer("a", "A", "b", "B", "12", "123") + for i := 0; i < b.N; i++ { + generic.Replace(str) + } +} + +func BenchmarkGenericMatch2(b *testing.B) { + str := strings.Repeat("It's <b>HTML</b>!", 100) + for i := 0; i < b.N; i++ { + htmlUnescaper.Replace(str) + } +} + +func benchmarkSingleString(b *testing.B, pattern, text string) { + r := strings.NewReplacer(pattern, "[match]") + b.SetBytes(int64(len(text))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + r.Replace(text) + } +} + +func BenchmarkSingleMaxSkipping(b *testing.B) { + benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000)) +} + +func BenchmarkSingleLongSuffixFail(b *testing.B) { + benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002)) +} + +func BenchmarkSingleMatch(b *testing.B) { + benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000)) +} + +func BenchmarkByteByteNoMatch(b *testing.B) { + str := strings.Repeat("A", 100) + strings.Repeat("B", 100) + for i := 0; i < b.N; i++ { + capitalLetters.Replace(str) + } +} + +func BenchmarkByteByteMatch(b *testing.B) { + str := strings.Repeat("a", 100) + strings.Repeat("b", 100) + for i := 0; i < b.N; i++ { + capitalLetters.Replace(str) + } +} + +func BenchmarkByteStringMatch(b *testing.B) { + str := "<" + strings.Repeat("a", 99) + strings.Repeat("b", 99) + ">" + for i := 0; i < b.N; i++ { + htmlEscaper.Replace(str) + } +} + +func BenchmarkHTMLEscapeNew(b *testing.B) { + str := "I <3 to escape HTML & other text too." + for i := 0; i < b.N; i++ { + htmlEscaper.Replace(str) + } +} + +func BenchmarkHTMLEscapeOld(b *testing.B) { + str := "I <3 to escape HTML & other text too." + for i := 0; i < b.N; i++ { + oldHTMLEscape(str) + } +} + +func BenchmarkByteStringReplacerWriteString(b *testing.B) { + str := strings.Repeat("I <3 to escape HTML & other text too.", 100) + buf := new(bytes.Buffer) + for i := 0; i < b.N; i++ { + htmlEscaper.WriteString(buf, str) + buf.Reset() + } +} + +func BenchmarkByteReplacerWriteString(b *testing.B) { + str := strings.Repeat("abcdefghijklmnopqrstuvwxyz", 100) + buf := new(bytes.Buffer) + for i := 0; i < b.N; i++ { + capitalLetters.WriteString(buf, str) + buf.Reset() + } +} + +// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. +func BenchmarkByteByteReplaces(b *testing.B) { + str := strings.Repeat("a", 100) + strings.Repeat("b", 100) + for i := 0; i < b.N; i++ { + strings.Replace(strings.Replace(str, "a", "A", -1), "b", "B", -1) + } +} + +// BenchmarkByteByteMap compares byteByteImpl against Map. +func BenchmarkByteByteMap(b *testing.B) { + str := strings.Repeat("a", 100) + strings.Repeat("b", 100) + fn := func(r rune) rune { + switch r { + case 'a': + return 'A' + case 'b': + return 'B' + } + return r + } + for i := 0; i < b.N; i++ { + strings.Map(fn, str) + } +} + +var mapdata = []struct{ name, data string }{ + {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"}, + {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"}, +} + +func BenchmarkMap(b *testing.B) { + mapidentity := func(r rune) rune { + return r + } + + b.Run("identity", func(b *testing.B) { + for _, md := range mapdata { + b.Run(md.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + strings.Map(mapidentity, md.data) + } + }) + } + }) + + mapchange := func(r rune) rune { + if 'a' <= r && r <= 'z' { + return r + 'A' - 'a' + } + if 'α' <= r && r <= 'ω' { + return r + 'Α' - 'α' + } + return r + } + + b.Run("change", func(b *testing.B) { + for _, md := range mapdata { + b.Run(md.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + strings.Map(mapchange, md.data) + } + }) + } + }) +} From 5b456eccf1e57ea700207ed7a45c935abfb57082 Mon Sep 17 00:00:00 2001 From: Miguel Victoria Date: Thu, 19 Sep 2024 21:50:43 +0200 Subject: [PATCH 2/3] Remove not neccessary field initialized --- gnovm/stdlibs/strings/replace.gno | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/gnovm/stdlibs/strings/replace.gno b/gnovm/stdlibs/strings/replace.gno index e57a15fc69c..a0a7af89ffa 100644 --- a/gnovm/stdlibs/strings/replace.gno +++ b/gnovm/stdlibs/strings/replace.gno @@ -11,8 +11,7 @@ import ( // Replacer replaces a list of strings with replacements. // It is safe for concurrent use by multiple goroutines. type Replacer struct { - // Custom code: initialized bool was before sync.Once on golang package - initialized bool // guards buildOnce method + // Custom code: remove variable once of type sync.Once on golang package // End of custom code r replacer oldnew []string @@ -38,11 +37,10 @@ func NewReplacer(oldnew ...string) *Replacer { } func (r *Replacer) buildOnce() { - // Custom code: only build once without sync.Once - if r.initialized { + // Custom code: check replacer is null instead of call sync.Once + if r.r != nil { return } - r.initialized = true // End of custom code r.r = r.build() r.oldnew = nil @@ -590,7 +588,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro // Custom code: these test functions are normally placed on export_test.go file but it seems like we do not recognize // methods declared on test files as part of the structure. For example: -// Replacer.printTrie() was not recognized so I added here directl +// Replacer.printTrie() was not recognized so I added here directly func (r *Replacer) PrintTrie() string { r.buildOnce() gen := r.r.(*genericReplacer) From b31ccc2f38674b254dd0927ae0cc70b42faed90f Mon Sep 17 00:00:00 2001 From: Miguel Victoria Date: Wed, 2 Oct 2024 22:12:54 +0200 Subject: [PATCH 3/3] deplace TestGenericTrieBuilding to own file --- gnovm/stdlibs/strings/printtrie_test.gno | 102 +++++++++++++++++++++++ gnovm/stdlibs/strings/replace.gno | 33 -------- gnovm/stdlibs/strings/replace_test.gno | 72 ---------------- 3 files changed, 102 insertions(+), 105 deletions(-) create mode 100644 gnovm/stdlibs/strings/printtrie_test.gno diff --git a/gnovm/stdlibs/strings/printtrie_test.gno b/gnovm/stdlibs/strings/printtrie_test.gno new file mode 100644 index 00000000000..b5b387b9bca --- /dev/null +++ b/gnovm/stdlibs/strings/printtrie_test.gno @@ -0,0 +1,102 @@ +package strings + +import ( + "testing" +) + +func (r *Replacer) PrintTrie() string { + r.buildOnce() + gen := r.r.(*genericReplacer) + return gen.printNode(&gen.root, 0) +} + +func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) { + if t.priority > 0 { + s += "+" + } else { + s += "-" + } + s += "\n" + + if t.prefix != "" { + s += Repeat(".", depth) + t.prefix + s += r.printNode(t.next, depth+len(t.prefix)) + } else if t.table != nil { + for b, m := range r.mapping { + if int(m) != r.tableSize && t.table[m] != nil { + s += Repeat(".", depth) + string([]byte{byte(b)}) + s += r.printNode(t.table[m], depth+1) + } + } + } + return +} + +func TestGenericTrieBuilding(t *testing.T) { + testCases := []struct{ in, out string }{ + {"abc;abdef;abdefgh;xx;xy;z", `- + a- + .b- + ..c+ + ..d- + ...ef+ + .....gh+ + x- + .x+ + .y+ + z+ + `}, + {"abracadabra;abracadabrakazam;abraham;abrasion", `- + a- + .bra- + ....c- + .....adabra+ + ...........kazam+ + ....h- + .....am+ + ....s- + .....ion+ + `}, + {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- + X+ + Y+ + a+ + .a+ + ..a+ + i+ + l- + .ong+ + ....er+ + ......st+ + x+ + .x+ + `}, + {"foo;;foo;foo1", `+ + f- + .oo+ + ...1+ + `}, + } + + for _, tc := range testCases { + keys := Split(tc.in, ";") + args := make([]string, len(keys)*2) + for i, key := range keys { + args[i*2] = key + } + + got := NewReplacer(args...).PrintTrie() + // Remove tabs from tc.out + wantbuf := make([]byte, 0, len(tc.out)) + for i := 0; i < len(tc.out); i++ { + if tc.out[i] != '\t' { + wantbuf = append(wantbuf, tc.out[i]) + } + } + want := string(wantbuf) + + if got != want { + t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) + } + } +} diff --git a/gnovm/stdlibs/strings/replace.gno b/gnovm/stdlibs/strings/replace.gno index a0a7af89ffa..98a47ad3f81 100644 --- a/gnovm/stdlibs/strings/replace.gno +++ b/gnovm/stdlibs/strings/replace.gno @@ -585,36 +585,3 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro } return } - -// Custom code: these test functions are normally placed on export_test.go file but it seems like we do not recognize -// methods declared on test files as part of the structure. For example: -// Replacer.printTrie() was not recognized so I added here directly -func (r *Replacer) PrintTrie() string { - r.buildOnce() - gen := r.r.(*genericReplacer) - return gen.printNode(&gen.root, 0) -} - -func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) { - if t.priority > 0 { - s += "+" - } else { - s += "-" - } - s += "\n" - - if t.prefix != "" { - s += Repeat(".", depth) + t.prefix - s += r.printNode(t.next, depth+len(t.prefix)) - } else if t.table != nil { - for b, m := range r.mapping { - if int(m) != r.tableSize && t.table[m] != nil { - s += Repeat(".", depth) + string([]byte{byte(b)}) - s += r.printNode(t.table[m], depth+1) - } - } - } - return -} - -// End of custom code diff --git a/gnovm/stdlibs/strings/replace_test.gno b/gnovm/stdlibs/strings/replace_test.gno index 4315ea24b7d..dc4858dcc5c 100644 --- a/gnovm/stdlibs/strings/replace_test.gno +++ b/gnovm/stdlibs/strings/replace_test.gno @@ -347,78 +347,6 @@ func TestWriteStringError(t *testing.T) { } } -// TestGenericTrieBuilding verifies the structure of the generated trie. There -// is one node per line, and the key ending with the current line is in the -// trie if it ends with a "+". -func TestGenericTrieBuilding(t *testing.T) { - testCases := []struct{ in, out string }{ - {"abc;abdef;abdefgh;xx;xy;z", `- - a- - .b- - ..c+ - ..d- - ...ef+ - .....gh+ - x- - .x+ - .y+ - z+ - `}, - {"abracadabra;abracadabrakazam;abraham;abrasion", `- - a- - .bra- - ....c- - .....adabra+ - ...........kazam+ - ....h- - .....am+ - ....s- - .....ion+ - `}, - {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- - X+ - Y+ - a+ - .a+ - ..a+ - i+ - l- - .ong+ - ....er+ - ......st+ - x+ - .x+ - `}, - {"foo;;foo;foo1", `+ - f- - .oo+ - ...1+ - `}, - } - - for _, tc := range testCases { - keys := strings.Split(tc.in, ";") - args := make([]string, len(keys)*2) - for i, key := range keys { - args[i*2] = key - } - - got := strings.NewReplacer(args...).PrintTrie() - // Remove tabs from tc.out - wantbuf := make([]byte, 0, len(tc.out)) - for i := 0; i < len(tc.out); i++ { - if tc.out[i] != '\t' { - wantbuf = append(wantbuf, tc.out[i]) - } - } - want := string(wantbuf) - - if got != want { - t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) - } - } -} - func BenchmarkGenericNoMatch(b *testing.B) { str := strings.Repeat("A", 100) + strings.Repeat("B", 100) generic := strings.NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic