-
Notifications
You must be signed in to change notification settings - Fork 12
NAT Auto Discovery #1
Changes from 26 commits
32e8ab9
70f7dd8
f3d9a24
cc058d6
ef097b5
6efad8f
2aa66e5
ea43bf5
00fb7e7
0377627
7fad996
9af8715
bc41c7a
dcbcfce
9efd0ec
aaaa90e
1562e1b
6d4bc41
fa14117
d16ca79
b1733eb
bb5cad4
cd7a875
7b3981e
cf04a09
7c097ed
5837cc5
56a0966
54fb466
66ca387
3abf9c7
3b679e0
1cba297
dd7c7a9
9ff7df3
0fdf1b0
46d352f
0a4e215
91c209c
00d2fea
8ea9f1b
d9a0d1a
aadb8db
d7f55b0
852f4e0
9c8ee52
b2c65b0
8d2e2ae
9ef3734
6a3a9cb
67bccae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package autonat | ||
|
||
import ( | ||
"net" | ||
|
||
ma "github.com/multiformats/go-multiaddr" | ||
) | ||
|
||
var private4, private6 []*net.IPNet | ||
var privateCIDR4 = []string{ | ||
// localhost | ||
"127.0.0.0/8", | ||
// private networks | ||
"10.0.0.0/8", | ||
"100.64.0.0/10", | ||
"172.16.0.0/12", | ||
"192.168.0.0/16", | ||
// link local | ||
"169.254.0.0/16", | ||
} | ||
var privateCIDR6 = []string{ | ||
// localhost | ||
"::1/128", | ||
// ULA reserved | ||
"fc00::/7", | ||
// link local | ||
"fe80::/10", | ||
} | ||
|
||
func init() { | ||
private4 = parsePrivateCIDR(privateCIDR4) | ||
private6 = parsePrivateCIDR(privateCIDR6) | ||
} | ||
|
||
func parsePrivateCIDR(cidrs []string) []*net.IPNet { | ||
ipnets := make([]*net.IPNet, len(cidrs)) | ||
for i, cidr := range cidrs { | ||
_, ipnet, err := net.ParseCIDR(cidr) | ||
if err != nil { | ||
panic(err) | ||
} | ||
ipnets[i] = ipnet | ||
} | ||
return ipnets | ||
} | ||
|
||
func isPublicAddr(a ma.Multiaddr) bool { | ||
ip, err := a.ValueForProtocol(ma.P_IP4) | ||
if err == nil { | ||
return !inAddrRange(ip, private4) | ||
} | ||
|
||
ip, err = a.ValueForProtocol(ma.P_IP6) | ||
if err == nil { | ||
return !inAddrRange(ip, private6) | ||
} | ||
|
||
return false | ||
} | ||
|
||
func inAddrRange(s string, ipnets []*net.IPNet) bool { | ||
ip := net.ParseIP(s) | ||
for _, ipnet := range ipnets { | ||
if ipnet.Contains(ip) { | ||
return true | ||
} | ||
} | ||
|
||
return false | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
package autonat | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"math/rand" | ||
"sync" | ||
"time" | ||
|
||
host "github.com/libp2p/go-libp2p-host" | ||
peer "github.com/libp2p/go-libp2p-peer" | ||
ma "github.com/multiformats/go-multiaddr" | ||
) | ||
|
||
// NATStatus is the state of NAT as detected by the ambient service. | ||
type NATStatus int | ||
|
||
const ( | ||
// NAT status is unknown; this means that the ambient serice has not been | ||
// able to decide the presence of NAT in the most recent attempt to test | ||
magik6k marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// dial through known autonat peers. initial state. | ||
NATStatusUnknown NATStatus = iota | ||
// NAT status is publicly dialable | ||
NATStatusPublic | ||
// NAT status is private network | ||
NATStatusPrivate | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about "no nat"? Do we need that state? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does that state mean though? We have Uknown and Public -- no nat is equivalent to public. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I was thinking:
(although we may not need to track the undialable case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's more of "dialable" or not "dialable". |
||
) | ||
|
||
var ( | ||
AutoNATBootDelay = 15 * time.Second | ||
AutoNATRefreshInterval = 15 * time.Minute | ||
|
||
AutoNATRequestTimeout = 60 * time.Second | ||
) | ||
|
||
// AutoNAT is the interface for ambient NAT autodiscovery | ||
type AutoNAT interface { | ||
// Status returns the current NAT status | ||
Status() NATStatus | ||
// PublicAddr returns the public dial address when NAT status is public and an | ||
// error otherwise | ||
PublicAddr() (ma.Multiaddr, error) | ||
} | ||
|
||
// AmbientAutoNAT is the implementation of ambient NAT autodiscovery | ||
type AmbientAutoNAT struct { | ||
magik6k marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ctx context.Context | ||
host host.Host | ||
|
||
mx sync.Mutex | ||
peers map[peer.ID]struct{} | ||
status NATStatus | ||
addr ma.Multiaddr | ||
} | ||
|
||
// NewAutoNAT creates a new ambient NAT autodiscovery instance attached to a host | ||
func NewAutoNAT(ctx context.Context, h host.Host) AutoNAT { | ||
as := &AmbientAutoNAT{ | ||
ctx: ctx, | ||
host: h, | ||
peers: make(map[peer.ID]struct{}), | ||
status: NATStatusUnknown, | ||
} | ||
|
||
h.Network().Notify(as) | ||
go as.background() | ||
|
||
return as | ||
} | ||
|
||
func (as *AmbientAutoNAT) Status() NATStatus { | ||
return as.status | ||
} | ||
|
||
func (as *AmbientAutoNAT) PublicAddr() (ma.Multiaddr, error) { | ||
as.mx.Lock() | ||
defer as.mx.Unlock() | ||
|
||
if as.status != NATStatusPublic { | ||
return nil, errors.New("NAT Status is not public") | ||
} | ||
|
||
return as.addr, nil | ||
} | ||
|
||
func (as *AmbientAutoNAT) background() { | ||
// wait a bit for the node to come online and establish some connections | ||
// before starting autodetection | ||
time.Sleep(AutoNATBootDelay) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not that important but this should probably select in a context and a |
||
for { | ||
as.autodetect() | ||
select { | ||
case <-time.After(AutoNATRefreshInterval): | ||
case <-as.ctx.Done(): | ||
return | ||
} | ||
} | ||
} | ||
|
||
func (as *AmbientAutoNAT) autodetect() { | ||
peers := as.getPeers() | ||
|
||
if len(peers) == 0 { | ||
log.Debugf("skipping NAT auto detection; no autonat peers") | ||
return | ||
} | ||
|
||
cli := NewAutoNATClient(as.host) | ||
|
||
for _, p := range peers { | ||
ctx, cancel := context.WithTimeout(as.ctx, AutoNATRequestTimeout) | ||
a, err := cli.Dial(ctx, p) | ||
cancel() | ||
|
||
switch { | ||
case err == nil: | ||
log.Debugf("NAT status is public; address through %s: %s", p.Pretty(), a.String()) | ||
as.mx.Lock() | ||
as.addr = a | ||
as.status = NATStatusPublic | ||
as.mx.Unlock() | ||
return | ||
|
||
case IsDialError(err): | ||
log.Debugf("NAT status is private; dial error through %s: %s", p.Pretty(), err.Error()) | ||
as.mx.Lock() | ||
as.status = NATStatusPrivate | ||
as.mx.Unlock() | ||
return | ||
|
||
default: | ||
log.Debugf("Error dialing through %s: %s", p.Pretty(), err.Error()) | ||
} | ||
} | ||
|
||
as.mx.Lock() | ||
as.status = NATStatusUnknown | ||
as.mx.Unlock() | ||
} | ||
|
||
func (as *AmbientAutoNAT) getPeers() []peer.ID { | ||
as.mx.Lock() | ||
defer as.mx.Unlock() | ||
|
||
if len(as.peers) == 0 { | ||
return nil | ||
} | ||
|
||
peers := make([]peer.ID, 0, len(as.peers)) | ||
for p := range as.peers { | ||
if len(as.host.Network().ConnsToPeer(p)) > 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
peers = append(peers, p) | ||
} | ||
} | ||
|
||
if len(peers) == 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid this black or white decision could yield erratic results, e.g. if you only have 1 active connection to an autonat peer, we're going to restrict our query to a single peer. I much rather have a minimum threshold we strive for, e.g. 5 peers, for resilience purposes, starting with peers we hold a connection to. As it is, the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure I follow. The code tries to use an already existing connection purely to avoid creating unnecessary new connections. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Currently, if we happen to be connected to 1 autonat peer only, we'll restrict ourselves to it. If it fails, we're out of luck. This makes us fragile, especially because we expect scarcity in autonat peers. What I'm proposing is to target N peers (e.g. 5), preferring connected peers, and falling back to disconnected ones to fill up the slice. To avoid connected and unconnected peers getting mixed up in the shuffle, we keep track of the pivot index and shuffle both sublists separately. Since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's fine, but do we want to dial more than one peers when we get a DIAL_ERROR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good question. I'm not sure I have the answer. Right now we flag There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess we could do a few more tries if we have more known autonat peers, but accept the failure if we don't have enough. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yep. If we don't have enough, we'd defer to the next iteration. If by then we've found more autonat peers, with this new logic we'll query them even if not connected, and hence have a chance to improve our connectivity.
We detect "enemy action" on the receiving side through the throttling, no? (3 is fine for that) That makes me realise that we should probably move peers who have sent us There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's probably too much complexity for marginal improvement :) Also, I think I want some slightly more clever strategy for making multiple dial attempts -- if our nat status was unknown or public, then try 3 times. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implemented the "3 times is enemy action" strategy in aadb8db, with memory of past failures so that it stops asking multiple peers once it has enough confidence we are NATed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In d7f55b0 we ensure that we have at least 3 autonat peers in the candidate set, even when we are connected to less than that. |
||
// we don't have any open connections, try any autonat peer that we know about | ||
for p := range as.peers { | ||
peers = append(peers, p) | ||
} | ||
} | ||
|
||
shufflePeers(peers) | ||
|
||
return peers | ||
} | ||
|
||
func shufflePeers(peers []peer.ID) { | ||
for i := range peers { | ||
j := rand.Intn(i + 1) | ||
peers[i], peers[j] = peers[j], peers[i] | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package autonat | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
|
||
pb "github.com/libp2p/go-libp2p-autonat/pb" | ||
|
||
ggio "github.com/gogo/protobuf/io" | ||
host "github.com/libp2p/go-libp2p-host" | ||
inet "github.com/libp2p/go-libp2p-net" | ||
peer "github.com/libp2p/go-libp2p-peer" | ||
pstore "github.com/libp2p/go-libp2p-peerstore" | ||
ma "github.com/multiformats/go-multiaddr" | ||
) | ||
|
||
// AutoNATClient is a stateless client interface to AutoNAT peers | ||
type AutoNATClient interface { | ||
// Dial requests from a peer providing AutoNAT services to test dial back | ||
Dial(ctx context.Context, p peer.ID) (ma.Multiaddr, error) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we call this something else? When I see "Dial" I think "establish a connection". When I saw this function used in the code, I had absolutely no idea why dialing a peer would tell us anything about our NAT status. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, will rename. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Called it |
||
} | ||
|
||
// AutoNATError is the class of errors signalled by AutoNAT services | ||
type AutoNATError struct { | ||
Status pb.Message_ResponseStatus | ||
Text string | ||
} | ||
|
||
// NewAutoNATClient creates a fresh instance of an AutoNATClient | ||
func NewAutoNATClient(h host.Host) AutoNATClient { | ||
return &client{h: h} | ||
} | ||
|
||
type client struct { | ||
h host.Host | ||
} | ||
|
||
func (c *client) Dial(ctx context.Context, p peer.ID) (ma.Multiaddr, error) { | ||
s, err := c.h.NewStream(ctx, p, AutoNATProto) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer s.Close() | ||
|
||
r := ggio.NewDelimitedReader(s, inet.MessageSizeMax) | ||
w := ggio.NewDelimitedWriter(s) | ||
|
||
req := newDialMessage(pstore.PeerInfo{ID: c.h.ID(), Addrs: c.h.Addrs()}) | ||
err = w.WriteMsg(req) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
var res pb.Message | ||
err = r.ReadMsg(&res) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
if res.GetType() != pb.Message_DIAL_RESPONSE { | ||
return nil, fmt.Errorf("Unexpected response: %s", res.GetType().String()) | ||
} | ||
|
||
status := res.GetDialResponse().GetStatus() | ||
switch status { | ||
case pb.Message_OK: | ||
addr := res.GetDialResponse().GetAddr() | ||
return ma.NewMultiaddrBytes(addr) | ||
|
||
default: | ||
return nil, AutoNATError{Status: status, Text: res.GetDialResponse().GetStatusText()} | ||
} | ||
} | ||
|
||
func (e AutoNATError) Error() string { | ||
return fmt.Sprintf("AutoNAT error: %s (%s)", e.Text, e.Status.String()) | ||
} | ||
|
||
func (e AutoNATError) IsDialError() bool { | ||
return e.Status == pb.Message_E_DIAL_ERROR | ||
} | ||
|
||
func (e AutoNATError) IsDialRefused() bool { | ||
return e.Status == pb.Message_E_DIAL_REFUSED | ||
} | ||
|
||
// IsDialError returns true if the AutoNAT peer signalled an error dialing back | ||
func IsDialError(e error) bool { | ||
ae, ok := e.(AutoNATError) | ||
return ok && ae.IsDialError() | ||
} | ||
|
||
// IsDialRefused returns true if the AutoNAT peer signalled refusal to dial back | ||
func IsDialRefused(e error) bool { | ||
ae, ok := e.(AutoNATError) | ||
return ok && ae.IsDialRefused() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package autonat | ||
|
||
import ( | ||
inet "github.com/libp2p/go-libp2p-net" | ||
peer "github.com/libp2p/go-libp2p-peer" | ||
ma "github.com/multiformats/go-multiaddr" | ||
) | ||
|
||
var _ inet.Notifiee = (*AmbientAutoNAT)(nil) | ||
|
||
func (as *AmbientAutoNAT) Listen(net inet.Network, a ma.Multiaddr) {} | ||
func (as *AmbientAutoNAT) ListenClose(net inet.Network, a ma.Multiaddr) {} | ||
func (as *AmbientAutoNAT) OpenedStream(net inet.Network, s inet.Stream) {} | ||
func (as *AmbientAutoNAT) ClosedStream(net inet.Network, s inet.Stream) {} | ||
|
||
func (as *AmbientAutoNAT) Connected(net inet.Network, c inet.Conn) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, we really don't need a large set of peers that support this protocol. Instead of testing every one, How about we: a. Keep a list of known autonat peers (discovered as we try to use them, not when we first connect). Then, periodically*, we can:
That way we aren't unnecessarily noisy. *later, we can get even fancier and set the period to be "time since last inbound connection from a public address", or something like that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can reduce the noise by simply checking on the protocols reported by identify. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed it to look at the protocols reported by identify through the peerstore in 46d352f There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yeah... that annoys me to me to no end as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's really not. I know it should work, but that's just horrible. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is, but that's all we have now 😅 Should we think about setting up some kind of "in-mem event bus" so that different layers of libp2p can emit and react to events? Identify would then emit a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah... Ideally services would just hook into identify (or the peerstore? that doesn't seem right) and get called when we connect to a peer supporting protocol X. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made the delay configurable, with an initial value of 5 sec (per @magik6k's suggestion) |
||
go func(p peer.ID) { | ||
s, err := as.host.NewStream(as.ctx, p, AutoNATProto) | ||
if err != nil { | ||
return | ||
} | ||
s.Close() | ||
|
||
log.Infof("Discovered AutoNAT peer %s", p.Pretty()) | ||
as.mx.Lock() | ||
as.peers[p] = struct{}{} | ||
as.mx.Unlock() | ||
}(c.RemotePeer()) | ||
} | ||
|
||
func (as *AmbientAutoNAT) Disconnected(net inet.Network, c inet.Conn) {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s/serice/service
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed.