Skip to content

Commit

Permalink
Enforce percent encoding
Browse files Browse the repository at this point in the history
Partially copy file
https://github.com/ninenines/cowlib/blob/optimise-urldecode/src/cow_uri.erl
We use this copy because:
1. uri_string:unquote/1 is lax: It doesn't validate that characters that are
   required to be percent encoded are indeed percent encoded. In RabbitMQ,
   we want to enforce that proper percent encoding is done by AMQP clients.
2. uri_string:unquote/1 and cow_uri:urldecode/1 in cowlib v2.13.0 are both
   slow because they allocate a new binary for the common case where no
   character was percent encoded.
When a new cowlib version is released, we should make app rabbit depend on
app cowlib calling cow_uri:urldecode/1 and delete this file (rabbit_uri.erl).
  • Loading branch information
ansd committed Jul 3, 2024
1 parent 0de9591 commit 8a2db9a
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 40 deletions.
3 changes: 3 additions & 0 deletions deps/rabbit/app.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ def all_beam_files(name = "all_beam_files"):
"src/rabbit_trace.erl",
"src/rabbit_tracking_store.erl",
"src/rabbit_upgrade_preparation.erl",
"src/rabbit_uri.erl",
"src/rabbit_variable_queue.erl",
"src/rabbit_version.erl",
"src/rabbit_vhost.erl",
Expand Down Expand Up @@ -481,6 +482,7 @@ def all_test_beam_files(name = "all_test_beam_files"):
"src/rabbit_trace.erl",
"src/rabbit_tracking_store.erl",
"src/rabbit_upgrade_preparation.erl",
"src/rabbit_uri.erl",
"src/rabbit_variable_queue.erl",
"src/rabbit_version.erl",
"src/rabbit_vhost.erl",
Expand Down Expand Up @@ -762,6 +764,7 @@ def all_srcs(name = "all_srcs"):
"src/rabbit_tracking.erl",
"src/rabbit_tracking_store.erl",
"src/rabbit_upgrade_preparation.erl",
"src/rabbit_uri.erl",
"src/rabbit_variable_queue.erl",
"src/rabbit_version.erl",
"src/rabbit_vhost.erl",
Expand Down
18 changes: 9 additions & 9 deletions deps/rabbit/src/rabbit_amqp_management.erl
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ handle_http_req(<<"GET">>,
_User,
_ConnPid,
PermCaches) ->
QNameBin = uri_string:unquote(QNameBinQuoted),
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
QName = queue_resource(Vhost, QNameBin),
case rabbit_amqqueue:with(
QName,
Expand Down Expand Up @@ -110,7 +110,7 @@ handle_http_req(HttpMethod = <<"PUT">>,
exclusive := Exclusive,
arguments := QArgs0
} = decode_queue(ReqPayload),
QNameBin = uri_string:unquote(QNameBinQuoted),
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
Owner = case Exclusive of
true -> ConnPid;
false -> none
Expand Down Expand Up @@ -185,7 +185,7 @@ handle_http_req(<<"PUT">>,
User = #user{username = Username},
_ConnPid,
{PermCache0, TopicPermCache}) ->
XNameBin = uri_string:unquote(XNameBinQuoted),
XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
#{type := XTypeBin,
durable := Durable,
auto_delete := AutoDelete,
Expand Down Expand Up @@ -226,7 +226,7 @@ handle_http_req(<<"DELETE">>,
User,
ConnPid,
{PermCache0, TopicPermCache}) ->
QNameBin = uri_string:unquote(QNameBinQuoted),
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
QName = queue_resource(Vhost, QNameBin),
PermCache = check_resource_access(QName, read, User, PermCache0),
try rabbit_amqqueue:with_exclusive_access_or_die(
Expand Down Expand Up @@ -254,7 +254,7 @@ handle_http_req(<<"DELETE">>,
User = #user{username = Username},
ConnPid,
{PermCache0, TopicPermCache}) ->
QNameBin = uri_string:unquote(QNameBinQuoted),
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
QName = queue_resource(Vhost, QNameBin),
ok = prohibit_cr_lf(QNameBin),
PermCache = check_resource_access(QName, configure, User, PermCache0),
Expand All @@ -274,7 +274,7 @@ handle_http_req(<<"DELETE">>,
User = #user{username = Username},
_ConnPid,
{PermCache0, TopicPermCache}) ->
XNameBin = uri_string:unquote(XNameBinQuoted),
XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
XName = exchange_resource(Vhost, XNameBin),
ok = prohibit_cr_lf(XNameBin),
ok = prohibit_default_exchange(XName),
Expand Down Expand Up @@ -594,9 +594,9 @@ decode_binding_path_segment(Segment) ->
end,
case re:run(Segment, MP, [{capture, all_but_first, binary}]) of
{match, [SrcQ, <<DstKindChar>>, DstQ, KeyQ, ArgsHash]} ->
Src = uri_string:unquote(SrcQ),
Dst = uri_string:unquote(DstQ),
Key = uri_string:unquote(KeyQ),
Src = rabbit_uri:urldecode(SrcQ),
Dst = rabbit_uri:urldecode(DstQ),
Key = rabbit_uri:urldecode(KeyQ),
DstKind = destination_char_to_kind(DstKindChar),
{Src, DstKind, Dst, Key, ArgsHash};
nomatch ->
Expand Down
52 changes: 22 additions & 30 deletions deps/rabbit/src/rabbit_amqp_session.erl
Original file line number Diff line number Diff line change
Expand Up @@ -2430,10 +2430,14 @@ ensure_source(#'v1_0.source'{address = Address,
{utf8, <<"/q/", QNameBinQuoted/binary>>} ->
%% The only possible v2 source address format is:
%% /q/:queue
QNameBin = unquote(QNameBinQuoted),
QName = queue_resource(Vhost, QNameBin),
ok = exit_if_absent(QName),
{ok, QName, PermCache, TopicPermCache};
try rabbit_uri:urldecode(QNameBinQuoted) of
QNameBin ->
QName = queue_resource(Vhost, QNameBin),
ok = exit_if_absent(QName),
{ok, QName, PermCache, TopicPermCache}
catch error:_ ->
{error, {bad_address, Address}}
end;
{utf8, SourceAddr} ->
case address_v1_permitted() of
true ->
Expand Down Expand Up @@ -2576,7 +2580,13 @@ ensure_target_v2(undefined, _) ->
%% https://docs.oasis-open.org/amqp/anonterm/v1.0/cs01/anonterm-v1.0-cs01.html#doc-anonymous-relay
{ok, to, to, undefined}.

parse_target_v2_string(<<"/e/", Rest/binary>>) ->
parse_target_v2_string(String) ->
try parse_target_v2_string0(String)
catch error:_ ->
{error, bad_address}
end.

parse_target_v2_string0(<<"/e/", Rest/binary>>) ->
Key = cp_slash,
Pattern = try persistent_term:get(Key)
catch error:badarg ->
Expand All @@ -2590,22 +2600,22 @@ parse_target_v2_string(<<"/e/", Rest/binary>>) ->
[<<"amq.default">> | _] ->
{error, bad_address};
[XNameBinQuoted] ->
XNameBin = unquote(XNameBinQuoted),
XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
{ok, XNameBin, <<>>, undefined};
[XNameBinQuoted, RKeyQuoted] ->
XNameBin = unquote(XNameBinQuoted),
RKey = unquote(RKeyQuoted),
XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
RKey = rabbit_uri:urldecode(RKeyQuoted),
{ok, XNameBin, RKey, undefined};
_ ->
{error, bad_address}
end;
parse_target_v2_string(<<"/q/">>) ->
parse_target_v2_string0(<<"/q/">>) ->
%% empty queue name is invalid
{error, bad_address};
parse_target_v2_string(<<"/q/", QNameBinQuoted/binary>>) ->
QNameBin = unquote(QNameBinQuoted),
parse_target_v2_string0(<<"/q/", QNameBinQuoted/binary>>) ->
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
{ok, ?DEFAULT_EXCHANGE_NAME, QNameBin, QNameBin};
parse_target_v2_string(_) ->
parse_target_v2_string0(_) ->
{error, bad_address}.

ensure_target_v1({utf8, Address}, Vhost, User, Durable, PermCache0) ->
Expand All @@ -2627,24 +2637,6 @@ ensure_target_v1({utf8, Address}, Vhost, User, Durable, PermCache0) ->
ensure_target_v1(Address, _, _, _, _) ->
{error, {bad_address, Address}}.

%% uri_string:unquote/1 is implemented inefficiently because it always creates
%% a new binary. We optimise for the common case: When no character is percent
%% encoded, we avoid a new binary being created.
unquote(Bin) ->
case is_quoted(Bin) of
true ->
uri_string:unquote(Bin);
false ->
Bin
end.

is_quoted(<<>>) ->
false;
is_quoted(<<$%, _/binary>>) ->
true;
is_quoted(<<_, Rest/binary>>) ->
is_quoted(Rest).

handle_outgoing_mgmt_link_flow_control(
#management_link{delivery_count = DeliveryCountSnd} = Link0,
#'v1_0.flow'{handle = Handle = ?UINT(HandleInt),
Expand Down
174 changes: 174 additions & 0 deletions deps/rabbit/src/rabbit_uri.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
%% Copyright (c) 2016-2024, Loïc Hoguin <[email protected]>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

%% ------------------------------------------------------------------------- %%
%% This file is a partial copy of
%% https://github.com/ninenines/cowlib/blob/optimise-urldecode/src/cow_uri.erl
%% We use this copy because:
%% 1. uri_string:unquote/1 is lax: It doesn't validate that characters that are
%% required to be percent encoded are indeed percent encoded. In RabbitMQ,
%% we want to enforce that proper percent encoding is done by AMQP clients.
%% 2. uri_string:unquote/1 and cow_uri:urldecode/1 in cowlib v2.13.0 are both
%% slow because they allocate a new binary for the common case where no
%% character was percent encoded.
%% When a new cowlib version is released, we should make app rabbit depend on
%% app cowlib calling cow_uri:urldecode/1 and delete this file (rabbit_uri.erl).
%% ------------------------------------------------------------------------- %%

-module(rabbit_uri).

-export([urldecode/1]).

-define(UNHEX(H, L), (?UNHEX(H) bsl 4 bor ?UNHEX(L))).

-define(UNHEX(C),
case C of
$0 -> 0;
$1 -> 1;
$2 -> 2;
$3 -> 3;
$4 -> 4;
$5 -> 5;
$6 -> 6;
$7 -> 7;
$8 -> 8;
$9 -> 9;
$A -> 10;
$B -> 11;
$C -> 12;
$D -> 13;
$E -> 14;
$F -> 15;
$a -> 10;
$b -> 11;
$c -> 12;
$d -> 13;
$e -> 14;
$f -> 15
end
).

%% Decode a percent encoded string. (RFC3986 2.1)
%%
%% Inspiration for some of the optimisations done here come
%% from the new `json` module as it was in mid-2024.
%%
%% Possible input includes:
%%
%% * nothing encoded (no % character):
%% We want to return the binary as-is to avoid an allocation.
%%
%% * small number of encoded characters:
%% We can "skip" words of text.
%%
%% * mostly encoded characters (non-ascii languages)
%% We can decode characters in bulk.

-define(IS_PLAIN(C), (
(C =:= $!) orelse (C =:= $$) orelse (C =:= $&) orelse (C =:= $') orelse
(C =:= $() orelse (C =:= $)) orelse (C =:= $*) orelse (C =:= $+) orelse
(C =:= $,) orelse (C =:= $-) orelse (C =:= $.) orelse (C =:= $0) orelse
(C =:= $1) orelse (C =:= $2) orelse (C =:= $3) orelse (C =:= $4) orelse
(C =:= $5) orelse (C =:= $6) orelse (C =:= $7) orelse (C =:= $8) orelse
(C =:= $9) orelse (C =:= $:) orelse (C =:= $;) orelse (C =:= $=) orelse
(C =:= $@) orelse (C =:= $A) orelse (C =:= $B) orelse (C =:= $C) orelse
(C =:= $D) orelse (C =:= $E) orelse (C =:= $F) orelse (C =:= $G) orelse
(C =:= $H) orelse (C =:= $I) orelse (C =:= $J) orelse (C =:= $K) orelse
(C =:= $L) orelse (C =:= $M) orelse (C =:= $N) orelse (C =:= $O) orelse
(C =:= $P) orelse (C =:= $Q) orelse (C =:= $R) orelse (C =:= $S) orelse
(C =:= $T) orelse (C =:= $U) orelse (C =:= $V) orelse (C =:= $W) orelse
(C =:= $X) orelse (C =:= $Y) orelse (C =:= $Z) orelse (C =:= $_) orelse
(C =:= $a) orelse (C =:= $b) orelse (C =:= $c) orelse (C =:= $d) orelse
(C =:= $e) orelse (C =:= $f) orelse (C =:= $g) orelse (C =:= $h) orelse
(C =:= $i) orelse (C =:= $j) orelse (C =:= $k) orelse (C =:= $l) orelse
(C =:= $m) orelse (C =:= $n) orelse (C =:= $o) orelse (C =:= $p) orelse
(C =:= $q) orelse (C =:= $r) orelse (C =:= $s) orelse (C =:= $t) orelse
(C =:= $u) orelse (C =:= $v) orelse (C =:= $w) orelse (C =:= $x) orelse
(C =:= $y) orelse (C =:= $z) orelse (C =:= $~)
)).

urldecode(Binary) ->
skip_dec(Binary, Binary, 0).

%% This functions helps avoid a binary allocation when
%% there is nothing to decode.
skip_dec(Binary, Orig, Len) ->
case Binary of
<<C1, C2, C3, C4, Rest/bits>>
when ?IS_PLAIN(C1) andalso ?IS_PLAIN(C2)
andalso ?IS_PLAIN(C3) andalso ?IS_PLAIN(C4) ->
skip_dec(Rest, Orig, Len + 4);
_ ->
dec(Binary, [], Orig, 0, Len)
end.

-dialyzer({no_improper_lists, [dec/5]}).
%% This clause helps speed up decoding of highly encoded values.
dec(<<$%, H1, L1, $%, H2, L2, $%, H3, L3, $%, H4, L4, Rest/bits>>, Acc, Orig, Skip, Len) ->
C1 = ?UNHEX(H1, L1),
C2 = ?UNHEX(H2, L2),
C3 = ?UNHEX(H3, L3),
C4 = ?UNHEX(H4, L4),
case Len of
0 ->
dec(Rest, [Acc|<<C1, C2, C3, C4>>], Orig, Skip + 12, 0);
_ ->
Part = binary_part(Orig, Skip, Len),
dec(Rest, [Acc, Part|<<C1, C2, C3, C4>>], Orig, Skip + Len + 12, 0)
end;
dec(<<$%, H, L, Rest/bits>>, Acc, Orig, Skip, Len) ->
C = ?UNHEX(H, L),
case Len of
0 ->
dec(Rest, [Acc|<<C>>], Orig, Skip + 3, 0);
_ ->
Part = binary_part(Orig, Skip, Len),
dec(Rest, [Acc, Part|<<C>>], Orig, Skip + Len + 3, 0)
end;
%% This clause helps speed up decoding of barely encoded values.
dec(<<C1, C2, C3, C4, Rest/bits>>, Acc, Orig, Skip, Len)
when ?IS_PLAIN(C1) andalso ?IS_PLAIN(C2)
andalso ?IS_PLAIN(C3) andalso ?IS_PLAIN(C4) ->
dec(Rest, Acc, Orig, Skip, Len + 4);
dec(<<C, Rest/bits>>, Acc, Orig, Skip, Len) when ?IS_PLAIN(C) ->
dec(Rest, Acc, Orig, Skip, Len + 1);
dec(<<>>, _, Orig, 0, _) ->
Orig;
dec(<<>>, Acc, _, _, 0) ->
iolist_to_binary(Acc);
dec(<<>>, Acc, Orig, Skip, Len) ->
Part = binary_part(Orig, Skip, Len),
iolist_to_binary([Acc|Part]);
dec(_, _, Orig, Skip, Len) ->
error({invalid_byte, binary:at(Orig, Skip + Len)}).

-ifdef(TEST).
urldecode_test_() ->
Tests = [
{<<"%20">>, <<" ">>},
{<<"+">>, <<"+">>},
{<<"%00">>, <<0>>},
{<<"%fF">>, <<255>>},
{<<"123">>, <<"123">>},
{<<"%i5">>, error},
{<<"%5">>, error}
],
[{Qs, fun() ->
E = try urldecode(Qs) of
R -> R
catch _:_ ->
error
end
end} || {Qs, E} <- Tests].
-endif.
10 changes: 9 additions & 1 deletion deps/rabbit/test/amqp_address_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,15 @@ bad_v2_addresses() ->
<<"/exchange//key/">>,
<<"/exchange//key/mykey">>,
<<"/exchange/amq.default/key/">>,
<<"/exchange/amq.default/key/mykey">>
<<"/exchange/amq.default/key/mykey">>,
%% The following addresses should be percent encoded, but aren't.
<<"/q/missing%encoding">>,
<<"/q/missing/encoding">>,
<<"/q/✋"/utf8>>,
<<"/e/missing%encoding">>,
<<"/e/missing/encoding/routingkey">>,
<<"/e/exchange/missing%encoding">>,
<<"/e/✋"/utf8>>
].

%% Test v2 target address 'null' with an invalid 'to' addresses.
Expand Down
1 change: 1 addition & 0 deletions moduleindex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,7 @@ rabbit:
- rabbit_tracking
- rabbit_tracking_store
- rabbit_upgrade_preparation
- rabbit_uri
- rabbit_variable_queue
- rabbit_version
- rabbit_vhost
Expand Down

0 comments on commit 8a2db9a

Please sign in to comment.