Skip to content

Commit

Permalink
add a Deduplicate function
Browse files Browse the repository at this point in the history
  • Loading branch information
marten-seemann committed Jun 29, 2023
1 parent 44887f8 commit 8f88992
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 0 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/multiformats/go-multihash v0.0.14
github.com/multiformats/go-varint v0.0.6
github.com/stretchr/testify v1.7.0
golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df
)

require (
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU=
golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw=
golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df h1:UA2aFVmmsIlefxMk29Dp2juaUSth8Pyn3Tq5Y5mJGME=
golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
23 changes: 23 additions & 0 deletions multiaddr.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"fmt"
"log"
"strings"

"golang.org/x/exp/slices"
)

// multiaddr is the data structure representing a Multiaddr
Expand Down Expand Up @@ -210,3 +212,24 @@ func Contains(addrs []Multiaddr, addr Multiaddr) bool {
}
return false
}

// Deduplicate deduplicates addresses in place, leave only unique addresses.
// It doesn't allocate.
func Deduplicate(addrs []Multiaddr) []Multiaddr {
if len(addrs) == 0 {
return addrs
}
// Use the new slices package here, as the sort function doesn't allocate (sort.Slice does).
slices.SortFunc(addrs, func(a, b Multiaddr) bool { return bytes.Compare(a.Bytes(), b.Bytes()) < 0 })
idx := 1
for i := 1; i < len(addrs); i++ {
if !addrs[i-1].Equal(addrs[i]) {
addrs[idx] = addrs[i]
idx++
}
}
for i := idx; i < len(addrs); i++ {
addrs[i] = nil
}
return addrs[:idx]
}
50 changes: 50 additions & 0 deletions multiaddr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package multiaddr
import (
"bytes"
"encoding/hex"
"fmt"
"math"
"math/rand"
"testing"

"github.com/ipfs/go-cid"
Expand Down Expand Up @@ -785,3 +788,50 @@ func TestContains(t *testing.T) {
require.False(t, Contains(addrs, newMultiaddr(t, "/ip4/4.3.2.1/udp/1234/utp")))
require.False(t, Contains(nil, a1))
}

func TestDedupAddrs(t *testing.T) {
tcpAddr := StringCast("/ip4/127.0.0.1/tcp/1234")
quicAddr := StringCast("/ip4/127.0.0.1/udp/1234/quic-v1")
wsAddr := StringCast("/ip4/127.0.0.1/tcp/1234/ws")

type testcase struct {
in, out []Multiaddr
}

for i, tc := range []testcase{
{in: nil, out: nil},
{in: []Multiaddr{tcpAddr}, out: []Multiaddr{tcpAddr}},
{in: []Multiaddr{tcpAddr, tcpAddr, tcpAddr}, out: []Multiaddr{tcpAddr}},
{in: []Multiaddr{tcpAddr, quicAddr, tcpAddr}, out: []Multiaddr{tcpAddr, quicAddr}},
{in: []Multiaddr{tcpAddr, quicAddr, wsAddr}, out: []Multiaddr{tcpAddr, quicAddr, wsAddr}},
} {
tc := tc
t.Run(fmt.Sprintf("test %d", i), func(t *testing.T) {
deduped := Deduplicate(tc.in)
for _, a := range tc.out {
require.Contains(t, deduped, a)
}
})
}
}

func BenchmarkDedupAddrs(b *testing.B) {
b.ReportAllocs()
var addrs []Multiaddr
r := rand.New(rand.NewSource(1234))
for i := 0; i < 100; i++ {
tcpAddr := StringCast(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", r.Intn(math.MaxUint16)))
quicAddr := StringCast(fmt.Sprintf("/ip4/127.0.0.1/udp/%d/quic-v1", r.Intn(math.MaxUint16)))
wsAddr := StringCast(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d/ws", r.Intn(math.MaxUint16)))
addrs = append(addrs, tcpAddr, tcpAddr, quicAddr, quicAddr, wsAddr)
}
for _, sz := range []int{10, 20, 30, 50, 100} {
b.Run(fmt.Sprintf("%d", sz), func(b *testing.B) {
items := make([]Multiaddr, sz)
for i := 0; i < b.N; i++ {
copy(items, addrs[:sz])
Deduplicate(items)
}
})
}
}

0 comments on commit 8f88992

Please sign in to comment.