Skip to content

Commit fbecc91

Browse files
committed
Better handling of URLs with literal ';' and '?'
1 parent fe6f9c4 commit fbecc91

File tree

5 files changed

+246
-55
lines changed

5 files changed

+246
-55
lines changed

java/org/apache/catalina/connector/CoyoteAdapter.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -650,18 +650,17 @@ protected boolean postParseRequest(org.apache.coyote.Request req, Request reques
650650
} else {
651651
/*
652652
* The URI is chars or String, and has been sent using an in-memory protocol handler. The following
653-
* assumptions are made: - req.requestURI() has been set to the 'original' non-decoded, non-normalized
654-
* URI - req.decodedURI() has been set to the decoded, normalized form of req.requestURI() -
655-
* 'suspicious' URI filtering - if required - has already been performed
653+
* assumptions are made:
654+
*
655+
* - req.requestURI() has been set to the 'original' non-decoded, non-normalized URI that includes path
656+
* parameters (if any)
657+
*
658+
* - req.decodedURI() has been set to the decoded, normalized form of req.requestURI() with any path
659+
* parameters removed
660+
*
661+
* - 'suspicious' URI filtering, if required, has already been performed
656662
*/
657663
decodedURI.toChars();
658-
// Remove all path parameters; any needed path parameter should be set
659-
// using the request object rather than passing it in the URL
660-
CharChunk uriCC = decodedURI.getCharChunk();
661-
int semicolon = uriCC.indexOf(';');
662-
if (semicolon > 0) {
663-
decodedURI.setChars(uriCC.getBuffer(), uriCC.getStart(), semicolon);
664-
}
665664
}
666665
}
667666

java/org/apache/catalina/valves/rewrite/RewriteValve.java

Lines changed: 108 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.io.InputStream;
2222
import java.io.InputStreamReader;
2323
import java.io.StringReader;
24+
import java.net.URLDecoder;
2425
import java.nio.charset.Charset;
2526
import java.nio.charset.StandardCharsets;
2627
import java.util.ArrayList;
@@ -65,6 +66,24 @@
6566
*/
6667
public class RewriteValve extends ValveBase {
6768

69+
private static final URLEncoder REWRITE_DEFAULT_ENCODER;
70+
private static final URLEncoder REWRITE_QUERY_ENCODER;
71+
72+
static {
73+
/*
74+
* See the detailed explanation of encoding/decoding during URL re-writing in the invoke() method.
75+
*
76+
* These encoders perform the second stage of encoding, after re-writing has completed. These rewrite specific
77+
* encoders treat '%' as a safe character so that URLs and query strings already processed by encodeForRewrite()
78+
* do not end up with double encoding of '%' characters.
79+
*/
80+
REWRITE_DEFAULT_ENCODER = (URLEncoder) URLEncoder.DEFAULT.clone();
81+
REWRITE_DEFAULT_ENCODER.addSafeCharacter('%');
82+
83+
REWRITE_QUERY_ENCODER = (URLEncoder) URLEncoder.QUERY.clone();
84+
REWRITE_QUERY_ENCODER.addSafeCharacter('%');
85+
}
86+
6887
/**
6988
* The rewrite rules that the valve will use.
7089
*/
@@ -296,22 +315,51 @@ public void invoke(Request request, Response response) throws IOException, Servl
296315

297316
invoked.set(Boolean.TRUE);
298317

299-
// As long as MB isn't a char sequence or affiliated, this has to be
300-
// converted to a string
318+
// As long as MB isn't a char sequence or affiliated, this has to be converted to a string
301319
Charset uriCharset = request.getConnector().getURICharset();
302320
String originalQueryStringEncoded = request.getQueryString();
303321
MessageBytes urlMB = context ? request.getRequestPathMB() : request.getDecodedRequestURIMB();
304322
urlMB.toChars();
305323
CharSequence urlDecoded = urlMB.getCharChunk();
324+
325+
/*
326+
* The URL presented to the rewrite valve is the URL that is used for request mapping. That URL has been
327+
* processed to: remove path parameters; remove the query string; decode; and normalize the URL. It may
328+
* contain literal '%', '?' and/or ';' characters at this point.
329+
*
330+
* The re-write rules need to be able to process URLs with literal '?' characters and add query strings
331+
* without the two becoming confused. The re-write rules also need to be able to insert literal '%'
332+
* characters without them being confused with %nn encoding.
333+
*
334+
* The re-write rules cannot insert path parameters.
335+
*
336+
* To meet these requirement, the URL is processed as follows.
337+
*
338+
* Step 1. The URL is partially re-encoded by encodeForRewrite(). This method encodes any literal '%', ';'
339+
* and/or '?' characters in the URL using the standard %nn form.
340+
*
341+
* Step 2. The re-write processing runs with the provided re-write rules against the partially encoded URL.
342+
* If a re-write rule needs to insert a literal '%', ';' or '?', it must do so in %nn encoded form.
343+
*
344+
* Step 3. The URL (and query string if present) is re-encoded using the re-write specific encoders
345+
* (REWRITE_DEFAULT_ENCODER and REWRITE_QUERY_ENCODER) that behave the same was as the standard encoders
346+
* apart from '%' being treated as a safe character. This prevents double encoding of any '%' characters
347+
* present in the URL from steps 1 or 2.
348+
*/
349+
350+
// Step 1. Encode URL for processing by the re-write rules.
351+
CharSequence urlRewriteEncoded = encodeForRewrite(urlDecoded);
306352
CharSequence host = request.getServerName();
307353
boolean rewritten = false;
308354
boolean done = false;
309355
boolean qsa = false;
310356
boolean qsd = false;
311357
boolean valveSkip = false;
358+
359+
// Step 2. Process the URL using the re-write rules.
312360
for (int i = 0; i < rules.length; i++) {
313361
RewriteRule rule = rules[i];
314-
CharSequence test = (rule.isHost()) ? host : urlDecoded;
362+
CharSequence test = (rule.isHost()) ? host : urlRewriteEncoded;
315363
CharSequence newtest = rule.evaluate(test, resolver);
316364
if (newtest != null && !Objects.equals(test.toString(), newtest.toString())) {
317365
if (containerLog.isTraceEnabled()) {
@@ -321,7 +369,7 @@ public void invoke(Request request, Response response) throws IOException, Servl
321369
if (rule.isHost()) {
322370
host = newtest;
323371
} else {
324-
urlDecoded = newtest;
372+
urlRewriteEncoded = newtest;
325373
}
326374
rewritten = true;
327375
}
@@ -358,28 +406,30 @@ public void invoke(Request request, Response response) throws IOException, Servl
358406
if (rule.isRedirect() && newtest != null) {
359407
// Append the query string to the url if there is one and it
360408
// hasn't been rewritten
361-
String urlStringDecoded = urlDecoded.toString();
362-
int index = urlStringDecoded.indexOf('?');
363-
String rewrittenQueryStringDecoded;
409+
String urlStringRewriteEncoded = urlRewriteEncoded.toString();
410+
int index = urlStringRewriteEncoded.indexOf('?');
411+
String rewrittenQueryStringRewriteEncoded;
364412
if (index == -1) {
365-
rewrittenQueryStringDecoded = null;
413+
rewrittenQueryStringRewriteEncoded = null;
366414
} else {
367-
rewrittenQueryStringDecoded = urlStringDecoded.substring(index + 1);
368-
urlStringDecoded = urlStringDecoded.substring(0, index);
415+
rewrittenQueryStringRewriteEncoded = urlStringRewriteEncoded.substring(index + 1);
416+
urlStringRewriteEncoded = urlStringRewriteEncoded.substring(0, index);
369417
}
370418

419+
// Step 3. Complete the 2nd stage to encoding.
371420
StringBuilder urlStringEncoded =
372-
new StringBuilder(URLEncoder.DEFAULT.encode(urlStringDecoded, uriCharset));
421+
new StringBuilder(REWRITE_DEFAULT_ENCODER.encode(urlStringRewriteEncoded, uriCharset));
422+
373423
if (!qsd && originalQueryStringEncoded != null && !originalQueryStringEncoded.isEmpty()) {
374-
if (rewrittenQueryStringDecoded == null) {
424+
if (rewrittenQueryStringRewriteEncoded == null) {
375425
urlStringEncoded.append('?');
376426
urlStringEncoded.append(originalQueryStringEncoded);
377427
} else {
378428
if (qsa) {
379429
// if qsa is specified append the query
380430
urlStringEncoded.append('?');
381-
urlStringEncoded
382-
.append(URLEncoder.QUERY.encode(rewrittenQueryStringDecoded, uriCharset));
431+
urlStringEncoded.append(
432+
REWRITE_QUERY_ENCODER.encode(rewrittenQueryStringRewriteEncoded, uriCharset));
383433
urlStringEncoded.append('&');
384434
urlStringEncoded.append(originalQueryStringEncoded);
385435
} else if (index == urlStringEncoded.length() - 1) {
@@ -388,13 +438,14 @@ public void invoke(Request request, Response response) throws IOException, Servl
388438
urlStringEncoded.deleteCharAt(index);
389439
} else {
390440
urlStringEncoded.append('?');
391-
urlStringEncoded
392-
.append(URLEncoder.QUERY.encode(rewrittenQueryStringDecoded, uriCharset));
441+
urlStringEncoded.append(
442+
REWRITE_QUERY_ENCODER.encode(rewrittenQueryStringRewriteEncoded, uriCharset));
393443
}
394444
}
395-
} else if (rewrittenQueryStringDecoded != null) {
445+
} else if (rewrittenQueryStringRewriteEncoded != null) {
396446
urlStringEncoded.append('?');
397-
urlStringEncoded.append(URLEncoder.QUERY.encode(rewrittenQueryStringDecoded, uriCharset));
447+
urlStringEncoded
448+
.append(REWRITE_QUERY_ENCODER.encode(rewrittenQueryStringRewriteEncoded, uriCharset));
398449
}
399450

400451
// Insert the context if
@@ -469,12 +520,12 @@ public void invoke(Request request, Response response) throws IOException, Servl
469520
if (rewritten) {
470521
if (!done) {
471522
// See if we need to replace the query string
472-
String urlStringDecoded = urlDecoded.toString();
473-
String queryStringDecoded = null;
474-
int queryIndex = urlStringDecoded.indexOf('?');
523+
String urlStringRewriteEncoded = urlRewriteEncoded.toString();
524+
String queryStringRewriteEncoded = null;
525+
int queryIndex = urlStringRewriteEncoded.indexOf('?');
475526
if (queryIndex != -1) {
476-
queryStringDecoded = urlStringDecoded.substring(queryIndex + 1);
477-
urlStringDecoded = urlStringDecoded.substring(0, queryIndex);
527+
queryStringRewriteEncoded = urlStringRewriteEncoded.substring(queryIndex + 1);
528+
urlStringRewriteEncoded = urlStringRewriteEncoded.substring(0, queryIndex);
478529
}
479530
// Save the current context path before re-writing starts
480531
String contextPath = null;
@@ -488,22 +539,24 @@ public void invoke(Request request, Response response) throws IOException, Servl
488539
// This is neither decoded nor normalized
489540
chunk.append(contextPath);
490541
}
491-
chunk.append(URLEncoder.DEFAULT.encode(urlStringDecoded, uriCharset));
542+
543+
// Step 3. Complete the 2nd stage to encoding.
544+
chunk.append(REWRITE_DEFAULT_ENCODER.encode(urlStringRewriteEncoded, uriCharset));
492545
// Decoded and normalized URI
493546
// Rewriting may have denormalized the URL
494-
urlStringDecoded = RequestUtil.normalize(urlStringDecoded);
547+
urlStringRewriteEncoded = RequestUtil.normalize(urlStringRewriteEncoded);
495548
request.getCoyoteRequest().decodedURI().setChars(MessageBytes.EMPTY_CHAR_ARRAY, 0, 0);
496549
chunk = request.getCoyoteRequest().decodedURI().getCharChunk();
497550
if (context) {
498551
// This is decoded and normalized
499552
chunk.append(request.getServletContext().getContextPath());
500553
}
501-
chunk.append(urlStringDecoded);
554+
chunk.append(URLDecoder.decode(urlStringRewriteEncoded, uriCharset));
502555
// Set the new Query if there is one
503-
if (queryStringDecoded != null) {
556+
if (queryStringRewriteEncoded != null) {
504557
request.getCoyoteRequest().queryString().setChars(MessageBytes.EMPTY_CHAR_ARRAY, 0, 0);
505558
chunk = request.getCoyoteRequest().queryString().getCharChunk();
506-
chunk.append(URLEncoder.QUERY.encode(queryStringDecoded, uriCharset));
559+
chunk.append(REWRITE_QUERY_ENCODER.encode(queryStringRewriteEncoded, uriCharset));
507560
if (qsa && originalQueryStringEncoded != null && !originalQueryStringEncoded.isEmpty()) {
508561
chunk.append('&');
509562
chunk.append(originalQueryStringEncoded);
@@ -790,4 +843,31 @@ protected static void parseRuleFlag(String line, RewriteRule rule, String flag)
790843
throw new IllegalArgumentException(sm.getString("rewriteValve.invalidFlags", line, flag));
791844
}
792845
}
846+
847+
848+
private CharSequence encodeForRewrite(CharSequence input) {
849+
StringBuilder result = null;
850+
int pos = 0;
851+
int mark = 0;
852+
while (pos < input.length()) {
853+
char c = input.charAt(pos);
854+
if (c == '%' || c == ';' || c == '?') {
855+
if (result == null) {
856+
result = new StringBuilder((int) (input.length() * 1.1));
857+
}
858+
result.append(input.subSequence(mark, pos));
859+
result.append('%');
860+
result.append(Character.forDigit((c >> 4) & 0xF, 16));
861+
result.append(Character.forDigit(c & 0xF, 16));
862+
mark = pos + 1;
863+
}
864+
pos++;
865+
}
866+
if (result != null) {
867+
result.append(input.subSequence(mark, input.length()));
868+
return result;
869+
} else {
870+
return input;
871+
}
872+
}
793873
}

0 commit comments

Comments
 (0)