Enforce percent encoding

Partially copy file https://github.com/ninenines/cowlib/blob/optimise-urldecode/src/cow_uri.erl We use this copy because: 1. uri_string:unquote/1 is lax: It doesn't validate that characters that are required to be percent encoded are indeed percent encoded. In RabbitMQ, we want to enforce that proper percent encoding is done by AMQP clients. 2. uri_string:unquote/1 and cow_uri:urldecode/1 in cowlib v2.13.0 are both slow because they allocate a new binary for the common case where no character was percent encoded. When a new cowlib version is released, we should make app rabbit depend on app cowlib calling cow_uri:urldecode/1 and delete this file (rabbit_uri.erl).
rabbitmq · Jul 3, 2024 · 8a2db9a · 8a2db9a
1 parent 0de9591
commit 8a2db9a
Show file tree

Hide file tree

Showing 6 changed files with 218 additions and 40 deletions.
diff --git a/deps/rabbit/app.bzl b/deps/rabbit/app.bzl
@@ -223,6 +223,7 @@ def all_beam_files(name = "all_beam_files"):
             "src/rabbit_trace.erl",
             "src/rabbit_tracking_store.erl",
             "src/rabbit_upgrade_preparation.erl",
+            "src/rabbit_uri.erl",
             "src/rabbit_variable_queue.erl",
             "src/rabbit_version.erl",
             "src/rabbit_vhost.erl",
@@ -481,6 +482,7 @@ def all_test_beam_files(name = "all_test_beam_files"):
             "src/rabbit_trace.erl",
             "src/rabbit_tracking_store.erl",
             "src/rabbit_upgrade_preparation.erl",
+            "src/rabbit_uri.erl",
             "src/rabbit_variable_queue.erl",
             "src/rabbit_version.erl",
             "src/rabbit_vhost.erl",
@@ -762,6 +764,7 @@ def all_srcs(name = "all_srcs"):
             "src/rabbit_tracking.erl",
             "src/rabbit_tracking_store.erl",
             "src/rabbit_upgrade_preparation.erl",
+            "src/rabbit_uri.erl",
             "src/rabbit_variable_queue.erl",
             "src/rabbit_version.erl",
             "src/rabbit_vhost.erl",

diff --git a/deps/rabbit/src/rabbit_amqp_management.erl b/deps/rabbit/src/rabbit_amqp_management.erl
@@ -80,7 +80,7 @@ handle_http_req(<<"GET">>,
                 _User,
                 _ConnPid,
                 PermCaches) ->
-    QNameBin = uri_string:unquote(QNameBinQuoted),
+    QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
     QName = queue_resource(Vhost, QNameBin),
     case rabbit_amqqueue:with(
            QName,
@@ -110,7 +110,7 @@ handle_http_req(HttpMethod = <<"PUT">>,
       exclusive := Exclusive,
       arguments := QArgs0
      } = decode_queue(ReqPayload),
-    QNameBin = uri_string:unquote(QNameBinQuoted),
+    QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
     Owner = case Exclusive of
                 true -> ConnPid;
                 false -> none
@@ -185,7 +185,7 @@ handle_http_req(<<"PUT">>,
                 User = #user{username = Username},
                 _ConnPid,
                 {PermCache0, TopicPermCache}) ->
-    XNameBin = uri_string:unquote(XNameBinQuoted),
+    XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
     #{type := XTypeBin,
       durable := Durable,
       auto_delete := AutoDelete,
@@ -226,7 +226,7 @@ handle_http_req(<<"DELETE">>,
                 User,
                 ConnPid,
                 {PermCache0, TopicPermCache}) ->
-    QNameBin = uri_string:unquote(QNameBinQuoted),
+    QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
     QName = queue_resource(Vhost, QNameBin),
     PermCache = check_resource_access(QName, read, User, PermCache0),
     try rabbit_amqqueue:with_exclusive_access_or_die(
@@ -254,7 +254,7 @@ handle_http_req(<<"DELETE">>,
                 User = #user{username = Username},
                 ConnPid,
                 {PermCache0, TopicPermCache}) ->
-    QNameBin = uri_string:unquote(QNameBinQuoted),
+    QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
     QName = queue_resource(Vhost, QNameBin),
     ok = prohibit_cr_lf(QNameBin),
     PermCache = check_resource_access(QName, configure, User, PermCache0),
@@ -274,7 +274,7 @@ handle_http_req(<<"DELETE">>,
                 User = #user{username = Username},
                 _ConnPid,
                 {PermCache0, TopicPermCache}) ->
-    XNameBin = uri_string:unquote(XNameBinQuoted),
+    XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
     XName = exchange_resource(Vhost, XNameBin),
     ok = prohibit_cr_lf(XNameBin),
     ok = prohibit_default_exchange(XName),
@@ -594,9 +594,9 @@ decode_binding_path_segment(Segment) ->
          end,
     case re:run(Segment, MP, [{capture, all_but_first, binary}]) of
         {match, [SrcQ, <<DstKindChar>>, DstQ, KeyQ, ArgsHash]} ->
-            Src = uri_string:unquote(SrcQ),
-            Dst = uri_string:unquote(DstQ),
-            Key = uri_string:unquote(KeyQ),
+            Src = rabbit_uri:urldecode(SrcQ),
+            Dst = rabbit_uri:urldecode(DstQ),
+            Key = rabbit_uri:urldecode(KeyQ),
             DstKind = destination_char_to_kind(DstKindChar),
             {Src, DstKind, Dst, Key, ArgsHash};
         nomatch ->

diff --git a/deps/rabbit/src/rabbit_amqp_session.erl b/deps/rabbit/src/rabbit_amqp_session.erl
@@ -2430,10 +2430,14 @@ ensure_source(#'v1_0.source'{address = Address,
         {utf8, <<"/q/", QNameBinQuoted/binary>>} ->
             %% The only possible v2 source address format is:
             %%  /q/:queue
-            QNameBin = unquote(QNameBinQuoted),
-            QName = queue_resource(Vhost, QNameBin),
-            ok = exit_if_absent(QName),
-            {ok, QName, PermCache, TopicPermCache};
+            try rabbit_uri:urldecode(QNameBinQuoted) of
+                QNameBin ->
+                    QName = queue_resource(Vhost, QNameBin),
+                    ok = exit_if_absent(QName),
+                    {ok, QName, PermCache, TopicPermCache}
+            catch error:_ ->
+                      {error, {bad_address, Address}}
+            end;
         {utf8, SourceAddr} ->
             case address_v1_permitted() of
                 true ->
@@ -2576,7 +2580,13 @@ ensure_target_v2(undefined, _) ->
     %% https://docs.oasis-open.org/amqp/anonterm/v1.0/cs01/anonterm-v1.0-cs01.html#doc-anonymous-relay
     {ok, to, to, undefined}.
 
-parse_target_v2_string(<<"/e/", Rest/binary>>) ->
+parse_target_v2_string(String) ->
+    try parse_target_v2_string0(String)
+    catch error:_ ->
+              {error, bad_address}
+    end.
+
+parse_target_v2_string0(<<"/e/", Rest/binary>>) ->
     Key = cp_slash,
     Pattern = try persistent_term:get(Key)
               catch error:badarg ->
@@ -2590,22 +2600,22 @@ parse_target_v2_string(<<"/e/", Rest/binary>>) ->
         [<<"amq.default">> | _] ->
             {error, bad_address};
         [XNameBinQuoted] ->
-            XNameBin = unquote(XNameBinQuoted),
+            XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
             {ok, XNameBin, <<>>, undefined};
         [XNameBinQuoted, RKeyQuoted] ->
-            XNameBin = unquote(XNameBinQuoted),
-            RKey = unquote(RKeyQuoted),
+            XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
+            RKey = rabbit_uri:urldecode(RKeyQuoted),
             {ok, XNameBin, RKey, undefined};
         _ ->
             {error, bad_address}
     end;
-parse_target_v2_string(<<"/q/">>) ->
+parse_target_v2_string0(<<"/q/">>) ->
     %% empty queue name is invalid
     {error, bad_address};
-parse_target_v2_string(<<"/q/", QNameBinQuoted/binary>>) ->
-    QNameBin = unquote(QNameBinQuoted),
+parse_target_v2_string0(<<"/q/", QNameBinQuoted/binary>>) ->
+    QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
     {ok, ?DEFAULT_EXCHANGE_NAME, QNameBin, QNameBin};
-parse_target_v2_string(_) ->
+parse_target_v2_string0(_) ->
     {error, bad_address}.
 
 ensure_target_v1({utf8, Address}, Vhost, User, Durable, PermCache0) ->
@@ -2627,24 +2637,6 @@ ensure_target_v1({utf8, Address}, Vhost, User, Durable, PermCache0) ->
 ensure_target_v1(Address, _, _, _, _) ->
     {error, {bad_address, Address}}.
 
-%% uri_string:unquote/1 is implemented inefficiently because it always creates
-%% a new binary. We optimise for the common case: When no character is percent
-%% encoded, we avoid a new binary being created.
-unquote(Bin) ->
-    case is_quoted(Bin) of
-        true ->
-            uri_string:unquote(Bin);
-        false ->
-            Bin
-    end.
-
-is_quoted(<<>>) ->
-    false;
-is_quoted(<<$%, _/binary>>) ->
-    true;
-is_quoted(<<_, Rest/binary>>) ->
-    is_quoted(Rest).
-
 handle_outgoing_mgmt_link_flow_control(
   #management_link{delivery_count = DeliveryCountSnd} = Link0,
   #'v1_0.flow'{handle = Handle = ?UINT(HandleInt),

diff --git a/deps/rabbit/src/rabbit_uri.erl b/deps/rabbit/src/rabbit_uri.erl
@@ -0,0 +1,174 @@
+%% Copyright (c) 2016-2024, Loïc Hoguin <[email protected]>
+%%
+%% Permission to use, copy, modify, and/or distribute this software for any
+%% purpose with or without fee is hereby granted, provided that the above
+%% copyright notice and this permission notice appear in all copies.
+%%
+%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+%% ------------------------------------------------------------------------- %%
+%% This file is a partial copy of
+%% https://github.com/ninenines/cowlib/blob/optimise-urldecode/src/cow_uri.erl
+%% We use this copy because:
+%% 1. uri_string:unquote/1 is lax: It doesn't validate that characters that are
+%%    required to be percent encoded are indeed percent encoded. In RabbitMQ,
+%%    we want to enforce that proper percent encoding is done by AMQP clients.
+%% 2. uri_string:unquote/1 and cow_uri:urldecode/1 in cowlib v2.13.0 are both
+%%    slow because they allocate a new binary for the common case where no
+%%    character was percent encoded.
+%% When a new cowlib version is released, we should make app rabbit depend on
+%% app cowlib calling cow_uri:urldecode/1 and delete this file (rabbit_uri.erl).
+%% ------------------------------------------------------------------------- %%
+
+-module(rabbit_uri).
+
+-export([urldecode/1]).
+
+-define(UNHEX(H, L), (?UNHEX(H) bsl 4 bor ?UNHEX(L))).
+
+-define(UNHEX(C),
+	case C of
+		$0 -> 0;
+		$1 -> 1;
+		$2 -> 2;
+		$3 -> 3;
+		$4 -> 4;
+		$5 -> 5;
+		$6 -> 6;
+		$7 -> 7;
+		$8 -> 8;
+		$9 -> 9;
+		$A -> 10;
+		$B -> 11;
+		$C -> 12;
+		$D -> 13;
+		$E -> 14;
+		$F -> 15;
+		$a -> 10;
+		$b -> 11;
+		$c -> 12;
+		$d -> 13;
+		$e -> 14;
+		$f -> 15
+	end
+).
+
+%% Decode a percent encoded string. (RFC3986 2.1)
+%%
+%% Inspiration for some of the optimisations done here come
+%% from the new `json` module as it was in mid-2024.
+%%
+%% Possible input includes:
+%%
+%% * nothing encoded (no % character):
+%%   We want to return the binary as-is to avoid an allocation.
+%%
+%% * small number of encoded characters:
+%%   We can "skip" words of text.
+%%
+%% * mostly encoded characters (non-ascii languages)
+%%   We can decode characters in bulk.
+
+-define(IS_PLAIN(C), (
+	(C =:= $!) orelse (C =:= $$) orelse (C =:= $&) orelse (C =:= $') orelse
+	(C =:= $() orelse (C =:= $)) orelse (C =:= $*) orelse (C =:= $+) orelse
+	(C =:= $,) orelse (C =:= $-) orelse (C =:= $.) orelse (C =:= $0) orelse
+	(C =:= $1) orelse (C =:= $2) orelse (C =:= $3) orelse (C =:= $4) orelse
+	(C =:= $5) orelse (C =:= $6) orelse (C =:= $7) orelse (C =:= $8) orelse
+	(C =:= $9) orelse (C =:= $:) orelse (C =:= $;) orelse (C =:= $=) orelse
+	(C =:= $@) orelse (C =:= $A) orelse (C =:= $B) orelse (C =:= $C) orelse
+	(C =:= $D) orelse (C =:= $E) orelse (C =:= $F) orelse (C =:= $G) orelse
+	(C =:= $H) orelse (C =:= $I) orelse (C =:= $J) orelse (C =:= $K) orelse
+	(C =:= $L) orelse (C =:= $M) orelse (C =:= $N) orelse (C =:= $O) orelse
+	(C =:= $P) orelse (C =:= $Q) orelse (C =:= $R) orelse (C =:= $S) orelse
+	(C =:= $T) orelse (C =:= $U) orelse (C =:= $V) orelse (C =:= $W) orelse
+	(C =:= $X) orelse (C =:= $Y) orelse (C =:= $Z) orelse (C =:= $_) orelse
+	(C =:= $a) orelse (C =:= $b) orelse (C =:= $c) orelse (C =:= $d) orelse
+	(C =:= $e) orelse (C =:= $f) orelse (C =:= $g) orelse (C =:= $h) orelse
+	(C =:= $i) orelse (C =:= $j) orelse (C =:= $k) orelse (C =:= $l) orelse
+	(C =:= $m) orelse (C =:= $n) orelse (C =:= $o) orelse (C =:= $p) orelse
+	(C =:= $q) orelse (C =:= $r) orelse (C =:= $s) orelse (C =:= $t) orelse
+	(C =:= $u) orelse (C =:= $v) orelse (C =:= $w) orelse (C =:= $x) orelse
+	(C =:= $y) orelse (C =:= $z) orelse (C =:= $~)
+)).
+
+urldecode(Binary) ->
+	skip_dec(Binary, Binary, 0).
+
+%% This functions helps avoid a binary allocation when
+%% there is nothing to decode.
+skip_dec(Binary, Orig, Len) ->
+	case Binary of
+		<<C1, C2, C3, C4, Rest/bits>>
+				when ?IS_PLAIN(C1) andalso ?IS_PLAIN(C2)
+				andalso ?IS_PLAIN(C3) andalso ?IS_PLAIN(C4) ->
+			skip_dec(Rest, Orig, Len + 4);
+		_ ->
+			dec(Binary, [], Orig, 0, Len)
+	end.
+
+-dialyzer({no_improper_lists, [dec/5]}).
+%% This clause helps speed up decoding of highly encoded values.
+dec(<<$%, H1, L1, $%, H2, L2, $%, H3, L3, $%, H4, L4, Rest/bits>>, Acc, Orig, Skip, Len) ->
+	C1 = ?UNHEX(H1, L1),
+	C2 = ?UNHEX(H2, L2),
+	C3 = ?UNHEX(H3, L3),
+	C4 = ?UNHEX(H4, L4),
+	case Len of
+		0 ->
+			dec(Rest, [Acc|<<C1, C2, C3, C4>>], Orig, Skip + 12, 0);
+		_ ->
+			Part = binary_part(Orig, Skip, Len),
+			dec(Rest, [Acc, Part|<<C1, C2, C3, C4>>], Orig, Skip + Len + 12, 0)
+	end;
+dec(<<$%, H, L, Rest/bits>>, Acc, Orig, Skip, Len) ->
+	C = ?UNHEX(H, L),
+	case Len of
+		0 ->
+			dec(Rest, [Acc|<<C>>], Orig, Skip + 3, 0);
+		_ ->
+			Part = binary_part(Orig, Skip, Len),
+			dec(Rest, [Acc, Part|<<C>>], Orig, Skip + Len + 3, 0)
+	end;
+%% This clause helps speed up decoding of barely encoded values.
+dec(<<C1, C2, C3, C4, Rest/bits>>, Acc, Orig, Skip, Len)
+				when ?IS_PLAIN(C1) andalso ?IS_PLAIN(C2)
+				andalso ?IS_PLAIN(C3) andalso ?IS_PLAIN(C4) ->
+	dec(Rest, Acc, Orig, Skip, Len + 4);
+dec(<<C, Rest/bits>>, Acc, Orig, Skip, Len) when ?IS_PLAIN(C) ->
+	dec(Rest, Acc, Orig, Skip, Len + 1);
+dec(<<>>, _, Orig, 0, _) ->
+	Orig;
+dec(<<>>, Acc, _, _, 0) ->
+	iolist_to_binary(Acc);
+dec(<<>>, Acc, Orig, Skip, Len) ->
+	Part = binary_part(Orig, Skip, Len),
+	iolist_to_binary([Acc|Part]);
+dec(_, _, Orig, Skip, Len) ->
+	error({invalid_byte, binary:at(Orig, Skip + Len)}).
+
+-ifdef(TEST).
+urldecode_test_() ->
+	Tests = [
+		{<<"%20">>, <<" ">>},
+		{<<"+">>, <<"+">>},
+		{<<"%00">>, <<0>>},
+		{<<"%fF">>, <<255>>},
+		{<<"123">>, <<"123">>},
+		{<<"%i5">>, error},
+		{<<"%5">>, error}
+	],
+	[{Qs, fun() ->
+		E = try urldecode(Qs) of
+			R -> R
+		catch _:_ ->
+			error
+		end
+	end} || {Qs, E} <- Tests].
+-endif.
diff --git a/deps/rabbit/test/amqp_address_SUITE.erl b/deps/rabbit/test/amqp_address_SUITE.erl
@@ -436,7 +436,15 @@ bad_v2_addresses() ->
      <<"/exchange//key/">>,
      <<"/exchange//key/mykey">>,
      <<"/exchange/amq.default/key/">>,
-     <<"/exchange/amq.default/key/mykey">>
+     <<"/exchange/amq.default/key/mykey">>,
+     %% The following addresses should be percent encoded, but aren't.
+     <<"/q/missing%encoding">>,
+     <<"/q/missing/encoding">>,
+     <<"/q/✋"/utf8>>,
+     <<"/e/missing%encoding">>,
+     <<"/e/missing/encoding/routingkey">>,
+     <<"/e/exchange/missing%encoding">>,
+     <<"/e/✋"/utf8>>
     ].
 
 %% Test v2 target address 'null' with an invalid 'to' addresses.

diff --git a/moduleindex.yaml b/moduleindex.yaml
@@ -723,6 +723,7 @@ rabbit:
 - rabbit_tracking
 - rabbit_tracking_store
 - rabbit_upgrade_preparation
+- rabbit_uri
 - rabbit_variable_queue
 - rabbit_version
 - rabbit_vhost