diff --git a/api/src/main/java/jakarta/servlet/http/HttpServletRequest.java b/api/src/main/java/jakarta/servlet/http/HttpServletRequest.java
index 0bdc775fb..de56f21ff 100644
--- a/api/src/main/java/jakarta/servlet/http/HttpServletRequest.java
+++ b/api/src/main/java/jakarta/servlet/http/HttpServletRequest.java
@@ -249,9 +249,14 @@ public String toString() {
*
* This method returns null if there was no extra path information.
*
- * @return a String, decoded by the web container, specifying extra path information that comes after the
- * servlet path but before the query string in the request URL; or null if the URL does not have any extra
- * path information
+ * @return a String specifying extra path information that comes after the servlet path but before the
+ * query string in the request URL; or null if the URL does not have any extra path information. The path
+ * will be canonicalized as per section 3.5 of the specification. This method will not return any encoded characters
+ * unless the container is configured specifically to allow them.
+ * @throws IllegalArgumentException In standard configuration, this method will never throw. However, a container may be
+ * configured to not reject some suspicious sequences identified by 3.5.2, furthermore the container may be configured
+ * to allow such paths to only be accessed via safer methods like {@link #getRequestURI()} and to throw
+ * IllegalArgumentException if this method is called for such suspicious paths.
*/
public String getPathInfo();
@@ -299,8 +304,13 @@ default public PushBuilder newPushBuilder() {
* {@link jakarta.servlet.ServletContext#getContextPath()} should be considered as the prime or preferred context path
* of the application.
*
- * @return a String specifying the portion of the request URI that indicates the context of the request
- *
+ * @return a String specifying the portion of the request URI that indicates the context of the request.
+ * The path will be canonicalized as per section 3.5 of the specification. This method will not return any encoded
+ * characters unless the container is configured specifically to allow them.
+ * @throws IllegalArgumentException In standard configuration, this method will never throw. However, a container may be
+ * configured to not reject some suspicious sequences identified by 3.5.2, furthermore the container may be configured
+ * to allow such paths to only be accessed via safer methods like {@link #getRequestURI()} and to throw
+ * IllegalArgumentException if this method is called for such suspicious paths.
* @see jakarta.servlet.ServletContext#getContextPath()
*/
public String getContextPath();
@@ -411,15 +421,21 @@ default public PushBuilder newPushBuilder() {
public StringBuffer getRequestURL();
/**
- * Returns the part of this request's URL that calls the servlet. This path starts with a "/" character and includes
- * either the servlet name or a path to the servlet, but does not include any extra path information or a query string.
+ * Returns the part of this request's URL that calls the servlet. This path starts with a "/" character and includes the
+ * path to the servlet, but does not include any extra path information or a query string.
*
*
* This method will return an empty string ("") if the servlet used to process this request was matched using the "/*"
* pattern.
*
- * @return a String containing the name or path of the servlet being called, as specified in the request
- * URL, decoded, or an empty string if the servlet used to process the request is matched using the "/*" pattern.
+ * @return a String containing the path of the servlet being called, as specified in the request URL, or an
+ * empty string if the servlet used to process the request is matched using the "/*" pattern. The path will be
+ * canonicalized as per section 3.5 of the specification. This method will not return any encoded characters unless the
+ * container is configured specifically to allow them.
+ * @throws IllegalArgumentException In standard configuration, this method will never throw. However, a container may be
+ * configured to not reject some suspicious sequences identified by 3.5.2, furthermore the container may be configured
+ * to allow such paths to only be accessed via safer methods like {@link #getRequestURI()} and to throw
+ * IllegalArgumentException if this method is called for such suspicious paths.
*/
public String getServletPath();
diff --git a/api/src/test/java/jakarta/servlet/http/CanonicalUriPathTest.java b/api/src/test/java/jakarta/servlet/http/CanonicalUriPathTest.java
new file mode 100644
index 000000000..08f8fedf2
--- /dev/null
+++ b/api/src/test/java/jakarta/servlet/http/CanonicalUriPathTest.java
@@ -0,0 +1,313 @@
+package jakarta.servlet.http;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Consumer;
+import java.util.stream.Stream;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+public class CanonicalUriPathTest {
+
+ private static final Set ENCODED_DOT_SEGMENT;
+ static {
+ Set set = Collections.newSetFromMap(new TreeMap<>(String.CASE_INSENSITIVE_ORDER));
+ set.add("%2e");
+ set.add("%2e%2e");
+ set.add("%2e.");
+ set.add(".%2e");
+ ENCODED_DOT_SEGMENT = Collections.unmodifiableSet(set);
+ }
+
+ public static String canonicalUriPath(String uriPath, Consumer rejection) {
+
+ // The code presented here is a non-normative implementation of the algorithm
+ // from section 3.5 of the specification.
+
+ if (uriPath == null)
+ throw new IllegalArgumentException("null path");
+
+ String path = uriPath;
+
+ // Remember start/end conditions
+ boolean fragment = false;
+ boolean startsWithSlash;
+ boolean dotSegmentWithParam;
+ boolean encodedDotSegment;
+ boolean emptyNonLastSegmentWithParam;
+ boolean emptySegmentBeforeDotDot = false;
+ boolean decodeError = false;
+
+ // Discard fragment.
+ if (path.contains("#")) {
+ path = path.substring(0, path.indexOf('#'));
+ fragment = true;
+ }
+
+ // Separation of path and query.
+ if (path.contains("?"))
+ path = path.substring(0, path.indexOf('?'));
+
+ // This needs to be checked after removal of path and query
+ startsWithSlash = path.startsWith("/");
+
+ // Split path into segments.
+ List segments = new ArrayList<>(Arrays.asList(path.substring(startsWithSlash ? 1 : 0).split("/", -1)));
+
+ // Remove path parameters.
+ emptyNonLastSegmentWithParam = segments.stream().limit(segments.size() - 1).anyMatch(s -> s.startsWith(";"));
+ dotSegmentWithParam = segments.stream().anyMatch(s -> s.startsWith(".;") || s.startsWith("..;"));
+ segments.replaceAll(s -> (s.contains(";")) ? s.substring(0, s.indexOf(';')) : s);
+
+ // Decode characters
+ encodedDotSegment = segments.stream().anyMatch(ENCODED_DOT_SEGMENT::contains);
+ try {
+ segments.replaceAll(CanonicalUriPathTest::decode);
+ } catch (Exception e) {
+ decodeError = true;
+ }
+
+ // Remove Empty Segments other than the last
+ AtomicInteger last = new AtomicInteger(segments.size());
+ segments.removeIf(s -> last.decrementAndGet() != 0 && s.length() == 0);
+
+ // Remove dot-segments
+ int count = 0;
+ for (ListIterator s = segments.listIterator(); s.hasNext();) {
+ String segment = s.next();
+ if (segment.equals(".")) {
+ s.remove();
+ } else if (segment.equals("..")) {
+ if (count > 0) {
+ s.remove();
+ String prev = s.previous();
+ s.remove();
+ count--;
+ emptySegmentBeforeDotDot |= prev.length() == 0;
+ }
+ } else {
+ count++;
+ }
+ }
+
+ // Concatenate segments
+ if (segments.size() == 0)
+ path = "/";
+ else {
+ StringBuilder buf = new StringBuilder();
+ if (!decodeError && uriPath.toLowerCase().contains("%2f")) {
+ segments.replaceAll(CanonicalUriPathTest::encode);
+ }
+ segments.forEach(s -> buf.append("/").append(s));
+ path = buf.toString();
+ }
+
+ // Rejecting Errors and Suspicious Sequences
+ if (fragment)
+ rejection.accept("fragment");
+ if (decodeError)
+ rejection.accept("decode error");
+ // Any path not starting with the `"/"` character
+ if (!startsWithSlash)
+ rejection.accept("must start with /");
+ // Any path starting with an initial segment of `".."`
+ if (!segments.isEmpty() && segments.get(0).equals(".."))
+ rejection.accept("leading dot-dot-segment");
+ // The encoded `"/"` character
+ if (uriPath.toLowerCase().contains("%2f"))
+ rejection.accept("encoded /");
+ // Any `"."` or `".."` segment that had a path parameter
+ if (dotSegmentWithParam)
+ rejection.accept("dot segment with parameter");
+ // Any `"."` or `".."` segment with any encoded characters
+ if (encodedDotSegment)
+ rejection.accept("encoded dot segment");
+ // Any `".."` segment preceded by an empty segment
+ if (emptySegmentBeforeDotDot)
+ rejection.accept("empty segment before dot dot");
+ // Any empty segment with parameters
+ if (emptyNonLastSegmentWithParam)
+ rejection.accept("empty segment with parameters");
+ // The `"\"` character encoded or not.
+ if (path.contains("\\"))
+ rejection.accept("backslash character");
+ // Any control characters either encoded or not.
+ for (char c : path.toCharArray()) {
+ if (c < 0x20 || c == 0x7f) {
+ rejection.accept("control character");
+ break;
+ }
+ }
+
+ return path;
+ }
+
+ private static String decode(String segment) {
+ if (segment.contains("%")) {
+ StringBuilder buf = new StringBuilder();
+ ByteArrayOutputStream utf8 = new ByteArrayOutputStream();
+ for (int i = 0; i < segment.length(); i++) {
+ char c = segment.charAt(i);
+ if (c == '%') {
+ int b = Integer.parseInt(segment.substring(i + 1, i + 3), 16);
+ if (b < 0)
+ throw new IllegalArgumentException("negative encoding");
+ utf8.write(b);
+ i += 2;
+ } else {
+ if (utf8.size() > 0) {
+ buf.append(fromUtf8(utf8.toByteArray()));
+ utf8.reset();
+ }
+ buf.append(c);
+ }
+ }
+ if (utf8.size() > 0) {
+ buf.append(fromUtf8(utf8.toByteArray()));
+ utf8.reset();
+ }
+ segment = buf.toString();
+ }
+ return segment;
+ }
+
+ private static String encode(String segment) {
+ if (segment.contains("%") || segment.contains("/")) {
+ segment = segment.replace("%", "%25");
+ segment = segment.replace("/", "%2F");
+ }
+ return segment;
+ }
+
+ private static CharBuffer fromUtf8(byte[] bytes) {
+ try {
+ return StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).decode(ByteBuffer.wrap(bytes));
+ } catch (CharacterCodingException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ public static Stream data() {
+ List