Skip to content

Commit

Permalink
Merge pull request #7319 from MichaelMure/wildcard-gateways
Browse files Browse the repository at this point in the history
feat: wildcard support for public gateways
  • Loading branch information
aschmahmann authored Aug 19, 2020
2 parents 9af4d42 + 6b6569f commit 0c57e9d
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 76 deletions.
108 changes: 78 additions & 30 deletions core/corehttp/hostname.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"net"
"net/http"
"net/url"
"regexp"
"strings"

cid "github.com/ipfs/go-cid"
Expand All @@ -24,17 +25,17 @@ import (

var defaultPaths = []string{"/ipfs/", "/ipns/", "/api/", "/p2p/", "/version"}

var pathGatewaySpec = config.GatewaySpec{
var pathGatewaySpec = &config.GatewaySpec{
Paths: defaultPaths,
UseSubdomains: false,
}

var subdomainGatewaySpec = config.GatewaySpec{
var subdomainGatewaySpec = &config.GatewaySpec{
Paths: defaultPaths,
UseSubdomains: true,
}

var defaultKnownGateways = map[string]config.GatewaySpec{
var defaultKnownGateways = map[string]*config.GatewaySpec{
"localhost": subdomainGatewaySpec,
"ipfs.io": pathGatewaySpec,
"gateway.ipfs.io": pathGatewaySpec,
Expand All @@ -58,22 +59,8 @@ func HostnameOption() ServeOption {
if err != nil {
return nil, err
}
knownGateways := make(
map[string]config.GatewaySpec,
len(defaultKnownGateways)+len(cfg.Gateway.PublicGateways),
)
for hostname, gw := range defaultKnownGateways {
knownGateways[hostname] = gw
}
for hostname, gw := range cfg.Gateway.PublicGateways {
if gw == nil {
// Allows the user to remove gateways but _also_
// allows us to continuously update the list.
delete(knownGateways, hostname)
} else {
knownGateways[hostname] = *gw
}
}

knownGateways := prepareKnownGateways(cfg.Gateway.PublicGateways)

mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
// Unfortunately, many (well, ipfs.io) gateways use
Expand Down Expand Up @@ -154,12 +141,12 @@ func HostnameOption() ServeOption {
// HTTP Host check: is this one of our subdomain-based "known gateways"?
// Example: {cid}.ipfs.localhost, {cid}.ipfs.dweb.link
if gw, hostname, ns, rootID, ok := knownSubdomainDetails(host, knownGateways); ok {
// Looks like we're using known subdomain gateway.
// Looks like we're using a known gateway in subdomain mode.

// Assemble original path prefix.
pathPrefix := "/" + ns + "/" + rootID

// Does this gateway _handle_ this path?
// Does this gateway _handle_ subdomains AND this path?
if !(gw.UseSubdomains && hasPrefix(pathPrefix, gw.Paths...)) {
// If not, resource does not exist, return 404
http.NotFound(w, r)
Expand Down Expand Up @@ -233,22 +220,83 @@ func HostnameOption() ServeOption {
}
}

type gatewayHosts struct {
exact map[string]*config.GatewaySpec
wildcard []wildcardHost
}

type wildcardHost struct {
re *regexp.Regexp
spec *config.GatewaySpec
}

func prepareKnownGateways(publicGateways map[string]*config.GatewaySpec) gatewayHosts {
var hosts gatewayHosts

hosts.exact = make(map[string]*config.GatewaySpec, len(publicGateways)+len(defaultKnownGateways))

// First, implicit defaults such as subdomain gateway on localhost
for hostname, gw := range defaultKnownGateways {
hosts.exact[hostname] = gw
}

// Then apply values from Gateway.PublicGateways, if present in the config
for hostname, gw := range publicGateways {
if gw == nil {
// Remove any implicit defaults, if present. This is useful when one
// wants to disable subdomain gateway on localhost etc.
delete(hosts.exact, hostname)
continue
}
if strings.Contains(hostname, "*") {
// from *.domain.tld, construct a regexp that match any direct subdomain
// of .domain.tld.
//
// Regexp will be in the form of ^[^.]+\.domain.tld(?::\d+)?$

escaped := strings.ReplaceAll(hostname, ".", `\.`)
regexed := strings.ReplaceAll(escaped, "*", "[^.]+")

re, err := regexp.Compile(fmt.Sprintf(`^%s(?::\d+)?$`, regexed))
if err != nil {
log.Warn("invalid wildcard gateway hostname \"%s\"", hostname)
}

hosts.wildcard = append(hosts.wildcard, wildcardHost{re: re, spec: gw})
} else {
hosts.exact[hostname] = gw
}
}

return hosts
}

// isKnownHostname checks Gateway.PublicGateways and returns matching
// GatewaySpec with gracefull fallback to version without port
func isKnownHostname(hostname string, knownGateways map[string]config.GatewaySpec) (gw config.GatewaySpec, ok bool) {
func isKnownHostname(hostname string, knownGateways gatewayHosts) (gw *config.GatewaySpec, ok bool) {
// Try hostname (host+optional port - value from Host header as-is)
if gw, ok := knownGateways[hostname]; ok {
if gw, ok := knownGateways.exact[hostname]; ok {
return gw, ok
}
// Also test without port
if gw, ok = knownGateways.exact[stripPort(hostname)]; ok {
return gw, ok
}
// Fallback to hostname without port
gw, ok = knownGateways[stripPort(hostname)]
return gw, ok

// Wildcard support. Test both with and without port.
for _, host := range knownGateways.wildcard {
if host.re.MatchString(hostname) {
return host.spec, true
}
}

return nil, false
}

// Parses Host header and looks for a known subdomain gateway host.
// Parses Host header and looks for a known gateway matching subdomain host.
// If found, returns GatewaySpec and subdomain components.
// Note: hostname is host + optional port
func knownSubdomainDetails(hostname string, knownGateways map[string]config.GatewaySpec) (gw config.GatewaySpec, knownHostname, ns, rootID string, ok bool) {
func knownSubdomainDetails(hostname string, knownGateways gatewayHosts) (gw *config.GatewaySpec, knownHostname, ns, rootID string, ok bool) {
labels := strings.Split(hostname, ".")
// Look for FQDN of a known gateway hostname.
// Example: given "dist.ipfs.io.ipns.dweb.link":
Expand All @@ -273,8 +321,8 @@ func knownSubdomainDetails(hostname string, knownGateways map[string]config.Gate
rootID := strings.Join(labels[:i-1], ".")
return gw, fqdn, ns, rootID, true
}
// not a known subdomain gateway
return gw, "", "", "", false
// no match
return nil, "", "", "", false
}

// isDNSLinkRequest returns bool that indicates if request
Expand Down
86 changes: 50 additions & 36 deletions core/corehttp/hostname_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func TestToSubdomainURL(t *testing.T) {
{"localhost", "/ipns/bafybeickencdqw37dpz3ha36ewrh4undfjt2do52chtcky4rxkj447qhdm", "http://k2k4r8l9ja7hkzynavdqup76ou46tnvuaqegbd04a4o1mpbsey0meucb.ipns.localhost/", nil},
// PeerID: ed25519+identity multihash → CIDv1Base36
{"localhost", "/ipns/12D3KooWFB51PRY9BxcXSH6khFXw1BZeszeLDy7C8GciskqCTZn5", "http://k51qzi5uqu5di608geewp3nqkg0bpujoasmka7ftkyxgcm3fh1aroup0gsdrna.ipns.localhost/", nil},
{"sub.localhost", "/ipfs/QmbCMUZw6JFeZ7Wp9jkzbye3Fzp2GGcPgC3nmeUjfVF87n", "http://bafybeif7a7gdklt6hodwdrmwmxnhksctcuav6lfxlcyfz4khzl3qfmvcgu.ipfs.sub.localhost/", nil},
} {
url, err := toSubdomainURL(test.hostname, test.path, r)
if url != test.url || !equalError(err, test.err) {
Expand Down Expand Up @@ -104,60 +105,73 @@ func TestDNSPrefix(t *testing.T) {
}

func TestKnownSubdomainDetails(t *testing.T) {
gwSpec := config.GatewaySpec{
UseSubdomains: true,
}
knownGateways := map[string]config.GatewaySpec{
"localhost": gwSpec,
"dweb.link": gwSpec,
"dweb.ipfs.pvt.k12.ma.us": gwSpec, // note the sneaky ".ipfs." ;-)
}
gwLocalhost := &config.GatewaySpec{Paths: []string{"/ipfs", "/ipns", "/api"}, UseSubdomains: true}
gwDweb := &config.GatewaySpec{Paths: []string{"/ipfs", "/ipns", "/api"}, UseSubdomains: true}
gwLong := &config.GatewaySpec{Paths: []string{"/ipfs", "/ipns", "/api"}, UseSubdomains: true}
gwWildcard1 := &config.GatewaySpec{Paths: []string{"/ipfs", "/ipns", "/api"}, UseSubdomains: true}
gwWildcard2 := &config.GatewaySpec{Paths: []string{"/ipfs", "/ipns", "/api"}, UseSubdomains: true}

knownGateways := prepareKnownGateways(map[string]*config.GatewaySpec{
"localhost": gwLocalhost,
"dweb.link": gwDweb,
"dweb.ipfs.pvt.k12.ma.us": gwLong, // note the sneaky ".ipfs." ;-)
"*.wildcard1.tld": gwWildcard1,
"*.*.wildcard2.tld": gwWildcard2,
})

for _, test := range []struct {
// in:
hostHeader string
// out:
gw *config.GatewaySpec
hostname string
ns string
rootID string
ok bool
}{
// no subdomain
{"127.0.0.1:8080", "", "", "", false},
{"[::1]:8080", "", "", "", false},
{"hey.look.example.com", "", "", "", false},
{"dweb.link", "", "", "", false},
{"127.0.0.1:8080", nil, "", "", "", false},
{"[::1]:8080", nil, "", "", "", false},
{"hey.look.example.com", nil, "", "", "", false},
{"dweb.link", nil, "", "", "", false},
// malformed Host header
{".....dweb.link", "", "", "", false},
{"link", "", "", "", false},
{"8080:dweb.link", "", "", "", false},
{" ", "", "", "", false},
{"", "", "", "", false},
{".....dweb.link", nil, "", "", "", false},
{"link", nil, "", "", "", false},
{"8080:dweb.link", nil, "", "", "", false},
{" ", nil, "", "", "", false},
{"", nil, "", "", "", false},
// unknown gateway host
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.unknown.example.com", "", "", "", false},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.unknown.example.com", nil, "", "", "", false},
// cid in subdomain, known gateway
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.localhost:8080", "localhost:8080", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.dweb.link", "dweb.link", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.localhost:8080", gwLocalhost, "localhost:8080", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.dweb.link", gwDweb, "dweb.link", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
// capture everything before .ipfs.
{"foo.bar.boo-buzz.ipfs.dweb.link", "dweb.link", "ipfs", "foo.bar.boo-buzz", true},
{"foo.bar.boo-buzz.ipfs.dweb.link", gwDweb, "dweb.link", "ipfs", "foo.bar.boo-buzz", true},
// ipns
{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.localhost:8080", "localhost:8080", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.dweb.link", "dweb.link", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.localhost:8080", gwLocalhost, "localhost:8080", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.dweb.link", gwDweb, "dweb.link", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
// edge case check: public gateway under long TLD (see: https://publicsuffix.org)
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.dweb.ipfs.pvt.k12.ma.us", "dweb.ipfs.pvt.k12.ma.us", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.dweb.ipfs.pvt.k12.ma.us", "dweb.ipfs.pvt.k12.ma.us", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.dweb.ipfs.pvt.k12.ma.us", gwLong, "dweb.ipfs.pvt.k12.ma.us", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.dweb.ipfs.pvt.k12.ma.us", gwLong, "dweb.ipfs.pvt.k12.ma.us", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
// dnslink in subdomain
{"en.wikipedia-on-ipfs.org.ipns.localhost:8080", "localhost:8080", "ipns", "en.wikipedia-on-ipfs.org", true},
{"en.wikipedia-on-ipfs.org.ipns.localhost", "localhost", "ipns", "en.wikipedia-on-ipfs.org", true},
{"dist.ipfs.io.ipns.localhost:8080", "localhost:8080", "ipns", "dist.ipfs.io", true},
{"en.wikipedia-on-ipfs.org.ipns.dweb.link", "dweb.link", "ipns", "en.wikipedia-on-ipfs.org", true},
{"en.wikipedia-on-ipfs.org.ipns.localhost:8080", gwLocalhost, "localhost:8080", "ipns", "en.wikipedia-on-ipfs.org", true},
{"en.wikipedia-on-ipfs.org.ipns.localhost", gwLocalhost, "localhost", "ipns", "en.wikipedia-on-ipfs.org", true},
{"dist.ipfs.io.ipns.localhost:8080", gwLocalhost, "localhost:8080", "ipns", "dist.ipfs.io", true},
{"en.wikipedia-on-ipfs.org.ipns.dweb.link", gwDweb, "dweb.link", "ipns", "en.wikipedia-on-ipfs.org", true},
// edge case check: public gateway under long TLD (see: https://publicsuffix.org)
{"foo.dweb.ipfs.pvt.k12.ma.us", "", "", "", false},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.dweb.ipfs.pvt.k12.ma.us", "dweb.ipfs.pvt.k12.ma.us", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.dweb.ipfs.pvt.k12.ma.us", "dweb.ipfs.pvt.k12.ma.us", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
{"foo.dweb.ipfs.pvt.k12.ma.us", nil, "", "", "", false},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.dweb.ipfs.pvt.k12.ma.us", gwLong, "dweb.ipfs.pvt.k12.ma.us", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.dweb.ipfs.pvt.k12.ma.us", gwLong, "dweb.ipfs.pvt.k12.ma.us", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
// other namespaces
{"api.localhost", "", "", "", false},
{"peerid.p2p.localhost", "localhost", "p2p", "peerid", true},
{"api.localhost", nil, "", "", "", false},
{"peerid.p2p.localhost", gwLocalhost, "localhost", "p2p", "peerid", true},
// wildcards
{"wildcard1.tld", nil, "", "", "", false},
{".wildcard1.tld", nil, "", "", "", false},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.wildcard1.tld", nil, "", "", "", false},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.sub.wildcard1.tld", gwWildcard1, "sub.wildcard1.tld", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.sub1.sub2.wildcard1.tld", nil, "", "", "", false},
{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.sub1.sub2.wildcard2.tld", gwWildcard2, "sub1.sub2.wildcard2.tld", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
} {
gw, hostname, ns, rootID, ok := knownSubdomainDetails(test.hostHeader, knownGateways)
if ok != test.ok {
Expand All @@ -172,8 +186,8 @@ func TestKnownSubdomainDetails(t *testing.T) {
if hostname != test.hostname {
t.Errorf("knownSubdomainDetails(%s): hostname is '%s', expected '%s'", test.hostHeader, hostname, test.hostname)
}
if ok && gw.UseSubdomains != gwSpec.UseSubdomains {
t.Errorf("knownSubdomainDetails(%s): gw is %+v, expected %+v", test.hostHeader, gw, gwSpec)
if gw != test.gw {
t.Errorf("knownSubdomainDetails(%s): gw is %+v, expected %+v", test.hostHeader, gw, test.gw)
}
}

Expand Down
6 changes: 6 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,12 @@ Type: `array[string]`

`PublicGateways` is a dictionary for defining gateway behavior on specified hostnames.

Hostnames can optionally be defined with one or more wildcards.

Examples:
- `*.example.com` will match requests to `http://foo.example.com/ipfs/*` or `http://{cid}.ipfs.bar.example.com/*`.
- `foo-*.example.com` will match requests to `http://foo-bar.example.com/ipfs/*` or `http://{cid}.ipfs.foo-xyz.example.com/*`.

#### `Gateway.PublicGateways: Paths`

Array of paths that should be exposed on the hostname.
Expand Down
Loading

0 comments on commit 0c57e9d

Please sign in to comment.