Skip to content

Commit

Permalink
refactor(logic)!: standardize uri_encoded/3 errors
Browse files Browse the repository at this point in the history
  • Loading branch information
ccamel committed Jan 17, 2024
1 parent ded7588 commit b8d8133
Show file tree
Hide file tree
Showing 6 changed files with 179 additions and 163 deletions.
178 changes: 25 additions & 153 deletions x/logic/predicate/uri.go
Original file line number Diff line number Diff line change
@@ -1,133 +1,11 @@
package predicate

import (
"fmt"
"net/url"

"github.com/ichiban/prolog/engine"
)

type Component string

const (
QueryComponent Component = "query"
FragmentComponent Component = "fragment"
PathComponent Component = "path"
SegmentComponent Component = "segment"
"github.com/okp4/okp4d/x/logic/prolog"
)

const upperhex = "0123456789ABCDEF"

func NewComponent(v string) (Component, error) {
switch v {
case string(QueryComponent):
return QueryComponent, nil
case string(FragmentComponent):
return FragmentComponent, nil
case string(PathComponent):
return PathComponent, nil
case string(SegmentComponent):
return SegmentComponent, nil
default:
return "", fmt.Errorf("invalid component name %s, expected `query`, `fragment`, `path` or `segment`", v)
}
}

// Return true if the specified character should be escaped when
// appearing in a URL string depending on the targeted URI component, according
// to [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986).
//
// This is a re-implementation of url.shouldEscape of net/url. Needed since the native implementation doesn't follow
// exactly the [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986) and also because the implementation of component
// escaping is only public for Path component (who in reality is SegmentPath component) and Query component. Otherwise,
// escaping doesn't fit to the SWI-Prolog escaping due to RFC discrepancy between those two implementations.
//
// Another discrepancy is on the query component that escape the space character ' ' to a '+' (plus sign) on the
// golang library and to '%20' escaping on the
// [SWI-Prolog implementation](https://www.swi-prolog.org/pldoc/doc/_SWI_/library/uri.pl?show=src#uri_encoded/3).
//
// Here some reported issues on golang about the RFC non-compliance.
// - golang.org/issue/5684.
// - https://github.com/golang/go/issues/27559
func shouldEscape(c byte, comp Component) bool {
// §2.3 Unreserved characters (alphanum)
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
return false
}

switch c {
case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
return false

case '!', '$', '&', '\'', '(', ')', '*', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch comp {
case PathComponent: // §3.3
return c == '?' || c == ':'

case SegmentComponent: // §3.3
// The RFC allows : @ & = + $
// meaning to individual path segments.
return c == '/' || c == '?' || c == ':'

case QueryComponent: // §3.4
return c == '&' || c == '+' || c == ':' || c == ';' || c == '='
case FragmentComponent: // §4.1
return false
}
}

// Everything else must be escaped.
return true
}

// Escape return the given input string by adding percent encoding depending on the current component where it's
// supposed to be put.
// This is a re-implementation of native url.escape. See shouldEscape() comment's for more details.
func (comp Component) Escape(v string) string {
hexCount := 0
for i := 0; i < len(v); i++ {
ch := v[i]
if shouldEscape(ch, comp) {
hexCount++
}
}

if hexCount == 0 {
return v
}

var buf [64]byte
var t []byte

required := len(v) + 2*hexCount
if required <= len(buf) {
t = buf[:required]
} else {
t = make([]byte, required)
}

j := 0
for i := 0; i < len(v); i++ {
switch ch := v[i]; {
case shouldEscape(ch, comp):
t[j] = '%'
t[j+1] = upperhex[ch>>4]
t[j+2] = upperhex[ch&15]
j += 3
default:
t[j] = v[i]
j++
}
}
return string(t)
}

func (comp Component) Unescape(v string) (string, error) {
return url.PathUnescape(v)
}

// URIEncoded is a predicate that unifies the given URI component with the given encoded or decoded string.
//
// The signature is as follows:
Expand All @@ -151,40 +29,34 @@ func (comp Component) Unescape(v string) (string, error) {
// - uri_encoded(path, Decoded, foo%2Fbar).
//
// [RFC 3986]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.1
func URIEncoded(vm *engine.VM, component, decoded, encoded engine.Term, cont engine.Cont, env *engine.Env) *engine.Promise {
var comp Component
switch c := env.Resolve(component).(type) {
case engine.Atom:
cc, err := NewComponent(c.String())
func URIEncoded(_ *engine.VM, component, decoded, encoded engine.Term, cont engine.Cont, env *engine.Env) *engine.Promise {
_, err := prolog.AssertIsGround(env, component)
if err != nil {
return engine.Error(err)
}
uriComponent, err := prolog.AssertURIComponent(env, component)
if err != nil {
return engine.Error(err)
}
forwardConverter := func(value []engine.Term, options engine.Term, env *engine.Env) ([]engine.Term, error) {
in, err := prolog.TextTermToString(value[0], env)
if err != nil {
return engine.Error(fmt.Errorf("uri_encoded/3: %w", err))
return nil, err
}
comp = cc
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid component type: %T, should be Atom", component))
out := uriComponent.Escape(in)
return []engine.Term{engine.NewAtom(out)}, nil
}

var dec string
switch d := env.Resolve(decoded).(type) {
case engine.Variable:
case engine.Atom:
dec = comp.Escape(d.String())
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid decoded type: %T, should be Variable or Atom", d))
}

switch e := env.Resolve(encoded).(type) {
case engine.Variable:
var r engine.Term = engine.NewAtom(dec)
return engine.Unify(vm, encoded, r, cont, env)
case engine.Atom:
enc, err := comp.Unescape(e.String())
backwardConverter := func(value []engine.Term, options engine.Term, env *engine.Env) ([]engine.Term, error) {
in, err := prolog.TextTermToString(value[0], env)
if err != nil {
return nil, err
}
out, err := uriComponent.Unescape(in)
if err != nil {
return engine.Error(fmt.Errorf("uri_encoded/3: %w", err))
return nil, prolog.WithError(engine.DomainError(prolog.ValidEncoding("uri"), value[0], env), err, env)
}
var r engine.Term = engine.NewAtom(enc)
return engine.Unify(vm, decoded, r, cont, env)
default:
return engine.Error(fmt.Errorf("uri_encoded/3: invalid encoded type: %T, should be Variable or Atom", e))
return []engine.Term{engine.NewAtom(out)}, nil
}
return prolog.UnifyFunctionalPredicate(
[]engine.Term{decoded}, []engine.Term{encoded}, prolog.AtomEmpty, forwardConverter, backwardConverter, cont, env)
}
27 changes: 17 additions & 10 deletions x/logic/predicate/uri_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package predicate

import (
"fmt"
"strings"
"testing"

"github.com/ichiban/prolog/engine"
Expand Down Expand Up @@ -32,7 +33,7 @@ func TestURIEncoded(t *testing.T) {
{
query: `uri_encoded(hey, foo, Decoded).`,
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid component name hey, expected `query`, `fragment`, `path` or `segment`"),
wantError: fmt.Errorf("error(type_error(uri_component,hey),uri_encoded/3)"),
},
{
query: `uri_encoded(path, Decoded, foo).`,
Expand All @@ -56,14 +57,14 @@ func TestURIEncoded(t *testing.T) {
}},
},
{
query: `uri_encoded(query, 'foo bar', Encoded).`,
query: `uri_encoded(query_value, 'foo bar', Encoded).`,
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'foo%20bar'",
}},
},
{
query: "uri_encoded(query, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
query: "uri_encoded(query_value, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~', Encoded).",
wantSuccess: true,
wantResult: []types.TermResults{{
"Encoded": "'%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~'",
Expand Down Expand Up @@ -91,7 +92,7 @@ func TestURIEncoded(t *testing.T) {
}},
},
{
query: "uri_encoded(query, Decoded, '%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~+').",
query: "uri_encoded(query_value, Decoded, '%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~+').",
wantSuccess: true,
wantResult: []types.TermResults{{
"Decoded": "' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~+'",
Expand Down Expand Up @@ -119,7 +120,7 @@ func TestURIEncoded(t *testing.T) {
}},
},
{
query: "uri_encoded(query, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~+', '%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~+').",
query: "uri_encoded(query_value, ' !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~+', '%20!%22%23$%25%26\\'()*%2B,-./0123456789%3A%3B%3C%3D%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%2B').",
wantSuccess: true,
wantResult: []types.TermResults{{}},
},
Expand All @@ -145,22 +146,28 @@ func TestURIEncoded(t *testing.T) {
{
query: "uri_encoded(Var, 'foo bar', 'bar%20foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid component type: engine.Variable, should be Atom"),
wantError: fmt.Errorf("error(instantiation_error,uri_encoded/3)"),
},
{
query: "uri_encoded(path, compound(2), 'bar%20foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid decoded type: *engine.compound, should be Variable or Atom"),
wantError: fmt.Errorf("error(type_error(text,compound(2)),uri_encoded/3)"),
},
{
query: "uri_encoded(path, 'foo', compound(2)).",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid encoded type: *engine.compound, should be Variable or Atom"),
wantResult: []types.TermResults{{}},
},
{
query: "uri_encoded(path, X, compound(2)).",
wantSuccess: false,
wantError: fmt.Errorf("error(type_error(text,compound(2)),uri_encoded/3)"),
},
{
query: "uri_encoded(path, Decoded, 'bar%%3foo').",
wantSuccess: false,
wantError: fmt.Errorf("uri_encoded/3: invalid URL escape \"%%%%3\""),
wantError: fmt.Errorf("error(domain_error(encoding(uri),bar%%%%3foo),[%s],uri_encoded/3)",
strings.Join(strings.Split("invalid URL escape \"%%3\"", ""), ",")),
},
}
for nc, tc := range cases {
Expand Down Expand Up @@ -194,7 +201,7 @@ func TestURIEncoded(t *testing.T) {
got = append(got, m)
}
if tc.wantError != nil {
So(sols.Err(), ShouldNotBeNil)
So(sols.Err(), ShouldNotEqual, nil)
So(sols.Err().Error(), ShouldEqual, tc.wantError.Error())
} else {
So(sols.Err(), ShouldBeNil)
Expand Down
18 changes: 18 additions & 0 deletions x/logic/prolog/assert.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (

"github.com/ichiban/prolog/engine"
"github.com/samber/lo"

"github.com/okp4/okp4d/x/logic/util"
)

// PredicateMatches returns a function that matches the given predicate against the given other predicate.
Expand Down Expand Up @@ -185,3 +187,19 @@ func AssertPair(env *engine.Env, t engine.Term) (engine.Term, engine.Term, error

return nil, nil, engine.TypeError(AtomTypePair, t, env)
}

// AssertURIComponent resolves a term as a URI component and returns it as an URIComponent.
func AssertURIComponent(env *engine.Env, t engine.Term) (util.URIComponent, error) {
switch v := env.Resolve(t); v {
case AtomQueryValue:
return util.QueryValueComponent, nil
case AtomFragment:
return util.FragmentComponent, nil
case AtomPath:
return util.PathComponent, nil
case AtomSegment:
return util.SegmentComponent, nil
default:
return 0, engine.TypeError(AtomTypeURIComponent, t, env)
}
}
8 changes: 8 additions & 0 deletions x/logic/prolog/atom.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ var (
AtomError = engine.NewAtom("error")
// AtomFalse is the term false.
AtomFalse = engine.NewAtom("false")
// AtomFragment is the term used to indicate the fragment component.
AtomFragment = engine.NewAtom("fragment")
// AtomHex is the term used to indicate the hexadecimal encoding type option.
AtomHex = engine.NewAtom("hex")
// AtomJSON are terms with principal functor json/1 used to represent json objects.
Expand All @@ -34,6 +36,12 @@ var (
// AtomPair are terms with principal functor (-)/2.
// For example, the term -(A, B) denotes the pair of elements A and B.
AtomPair = engine.NewAtom("-")
// AtomPath is the term used to indicate the path component.
AtomPath = engine.NewAtom("path")
// AtomQueryValue is the term used to indicate the query value component.
AtomQueryValue = engine.NewAtom("query_value")
// AtomSegment is the term used to indicate the segment component.
AtomSegment = engine.NewAtom("segment")
// AtomText is the term used to indicate the atom text.
AtomText = engine.NewAtom("text")
// AtomTrue is the term true.
Expand Down
2 changes: 2 additions & 0 deletions x/logic/prolog/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ var (
AtomTypePair = engine.NewAtom("pair")
// AtomTypeJSON is the term used to indicate the json type.
AtomTypeJSON = AtomJSON
// AtomTypeURIComponent is the term used to represent the URI component type.
AtomTypeURIComponent = engine.NewAtom("uri_component")
)

var (
Expand Down
Loading

0 comments on commit b8d8133

Please sign in to comment.