21
21
import java .io .InputStream ;
22
22
import java .io .InputStreamReader ;
23
23
import java .io .StringReader ;
24
+ import java .net .URLDecoder ;
24
25
import java .nio .charset .Charset ;
25
26
import java .nio .charset .StandardCharsets ;
26
27
import java .util .ArrayList ;
65
66
*/
66
67
public class RewriteValve extends ValveBase {
67
68
69
+ private static final URLEncoder REWRITE_DEFAULT_ENCODER ;
70
+ private static final URLEncoder REWRITE_QUERY_ENCODER ;
71
+
72
+ static {
73
+ /*
74
+ * See the detailed explanation of encoding/decoding during URL re-writing in the invoke() method.
75
+ *
76
+ * These encoders perform the second stage of encoding, after re-writing has completed. These rewrite specific
77
+ * encoders treat '%' as a safe character so that URLs and query strings already processed by encodeForRewrite()
78
+ * do not end up with double encoding of '%' characters.
79
+ */
80
+ REWRITE_DEFAULT_ENCODER = (URLEncoder ) URLEncoder .DEFAULT .clone ();
81
+ REWRITE_DEFAULT_ENCODER .addSafeCharacter ('%' );
82
+
83
+ REWRITE_QUERY_ENCODER = (URLEncoder ) URLEncoder .QUERY .clone ();
84
+ REWRITE_QUERY_ENCODER .addSafeCharacter ('%' );
85
+ }
86
+
68
87
/**
69
88
* The rewrite rules that the valve will use.
70
89
*/
@@ -296,22 +315,51 @@ public void invoke(Request request, Response response) throws IOException, Servl
296
315
297
316
invoked .set (Boolean .TRUE );
298
317
299
- // As long as MB isn't a char sequence or affiliated, this has to be
300
- // converted to a string
318
+ // As long as MB isn't a char sequence or affiliated, this has to be converted to a string
301
319
Charset uriCharset = request .getConnector ().getURICharset ();
302
320
String originalQueryStringEncoded = request .getQueryString ();
303
321
MessageBytes urlMB = context ? request .getRequestPathMB () : request .getDecodedRequestURIMB ();
304
322
urlMB .toChars ();
305
323
CharSequence urlDecoded = urlMB .getCharChunk ();
324
+
325
+ /*
326
+ * The URL presented to the rewrite valve is the URL that is used for request mapping. That URL has been
327
+ * processed to: remove path parameters; remove the query string; decode; and normalize the URL. It may
328
+ * contain literal '%', '?' and/or ';' characters at this point.
329
+ *
330
+ * The re-write rules need to be able to process URLs with literal '?' characters and add query strings
331
+ * without the two becoming confused. The re-write rules also need to be able to insert literal '%'
332
+ * characters without them being confused with %nn encoding.
333
+ *
334
+ * The re-write rules cannot insert path parameters.
335
+ *
336
+ * To meet these requirement, the URL is processed as follows.
337
+ *
338
+ * Step 1. The URL is partially re-encoded by encodeForRewrite(). This method encodes any literal '%', ';'
339
+ * and/or '?' characters in the URL using the standard %nn form.
340
+ *
341
+ * Step 2. The re-write processing runs with the provided re-write rules against the partially encoded URL.
342
+ * If a re-write rule needs to insert a literal '%', ';' or '?', it must do so in %nn encoded form.
343
+ *
344
+ * Step 3. The URL (and query string if present) is re-encoded using the re-write specific encoders
345
+ * (REWRITE_DEFAULT_ENCODER and REWRITE_QUERY_ENCODER) that behave the same was as the standard encoders
346
+ * apart from '%' being treated as a safe character. This prevents double encoding of any '%' characters
347
+ * present in the URL from steps 1 or 2.
348
+ */
349
+
350
+ // Step 1. Encode URL for processing by the re-write rules.
351
+ CharSequence urlRewriteEncoded = encodeForRewrite (urlDecoded );
306
352
CharSequence host = request .getServerName ();
307
353
boolean rewritten = false ;
308
354
boolean done = false ;
309
355
boolean qsa = false ;
310
356
boolean qsd = false ;
311
357
boolean valveSkip = false ;
358
+
359
+ // Step 2. Process the URL using the re-write rules.
312
360
for (int i = 0 ; i < rules .length ; i ++) {
313
361
RewriteRule rule = rules [i ];
314
- CharSequence test = (rule .isHost ()) ? host : urlDecoded ;
362
+ CharSequence test = (rule .isHost ()) ? host : urlRewriteEncoded ;
315
363
CharSequence newtest = rule .evaluate (test , resolver );
316
364
if (newtest != null && !Objects .equals (test .toString (), newtest .toString ())) {
317
365
if (containerLog .isTraceEnabled ()) {
@@ -321,7 +369,7 @@ public void invoke(Request request, Response response) throws IOException, Servl
321
369
if (rule .isHost ()) {
322
370
host = newtest ;
323
371
} else {
324
- urlDecoded = newtest ;
372
+ urlRewriteEncoded = newtest ;
325
373
}
326
374
rewritten = true ;
327
375
}
@@ -358,28 +406,30 @@ public void invoke(Request request, Response response) throws IOException, Servl
358
406
if (rule .isRedirect () && newtest != null ) {
359
407
// Append the query string to the url if there is one and it
360
408
// hasn't been rewritten
361
- String urlStringDecoded = urlDecoded .toString ();
362
- int index = urlStringDecoded .indexOf ('?' );
363
- String rewrittenQueryStringDecoded ;
409
+ String urlStringRewriteEncoded = urlRewriteEncoded .toString ();
410
+ int index = urlStringRewriteEncoded .indexOf ('?' );
411
+ String rewrittenQueryStringRewriteEncoded ;
364
412
if (index == -1 ) {
365
- rewrittenQueryStringDecoded = null ;
413
+ rewrittenQueryStringRewriteEncoded = null ;
366
414
} else {
367
- rewrittenQueryStringDecoded = urlStringDecoded .substring (index + 1 );
368
- urlStringDecoded = urlStringDecoded .substring (0 , index );
415
+ rewrittenQueryStringRewriteEncoded = urlStringRewriteEncoded .substring (index + 1 );
416
+ urlStringRewriteEncoded = urlStringRewriteEncoded .substring (0 , index );
369
417
}
370
418
419
+ // Step 3. Complete the 2nd stage to encoding.
371
420
StringBuilder urlStringEncoded =
372
- new StringBuilder (URLEncoder .DEFAULT .encode (urlStringDecoded , uriCharset ));
421
+ new StringBuilder (REWRITE_DEFAULT_ENCODER .encode (urlStringRewriteEncoded , uriCharset ));
422
+
373
423
if (!qsd && originalQueryStringEncoded != null && !originalQueryStringEncoded .isEmpty ()) {
374
- if (rewrittenQueryStringDecoded == null ) {
424
+ if (rewrittenQueryStringRewriteEncoded == null ) {
375
425
urlStringEncoded .append ('?' );
376
426
urlStringEncoded .append (originalQueryStringEncoded );
377
427
} else {
378
428
if (qsa ) {
379
429
// if qsa is specified append the query
380
430
urlStringEncoded .append ('?' );
381
- urlStringEncoded
382
- . append ( URLEncoder . QUERY . encode (rewrittenQueryStringDecoded , uriCharset ));
431
+ urlStringEncoded . append (
432
+ REWRITE_QUERY_ENCODER . encode (rewrittenQueryStringRewriteEncoded , uriCharset ));
383
433
urlStringEncoded .append ('&' );
384
434
urlStringEncoded .append (originalQueryStringEncoded );
385
435
} else if (index == urlStringEncoded .length () - 1 ) {
@@ -388,13 +438,14 @@ public void invoke(Request request, Response response) throws IOException, Servl
388
438
urlStringEncoded .deleteCharAt (index );
389
439
} else {
390
440
urlStringEncoded .append ('?' );
391
- urlStringEncoded
392
- . append ( URLEncoder . QUERY . encode (rewrittenQueryStringDecoded , uriCharset ));
441
+ urlStringEncoded . append (
442
+ REWRITE_QUERY_ENCODER . encode (rewrittenQueryStringRewriteEncoded , uriCharset ));
393
443
}
394
444
}
395
- } else if (rewrittenQueryStringDecoded != null ) {
445
+ } else if (rewrittenQueryStringRewriteEncoded != null ) {
396
446
urlStringEncoded .append ('?' );
397
- urlStringEncoded .append (URLEncoder .QUERY .encode (rewrittenQueryStringDecoded , uriCharset ));
447
+ urlStringEncoded
448
+ .append (REWRITE_QUERY_ENCODER .encode (rewrittenQueryStringRewriteEncoded , uriCharset ));
398
449
}
399
450
400
451
// Insert the context if
@@ -469,12 +520,12 @@ public void invoke(Request request, Response response) throws IOException, Servl
469
520
if (rewritten ) {
470
521
if (!done ) {
471
522
// See if we need to replace the query string
472
- String urlStringDecoded = urlDecoded .toString ();
473
- String queryStringDecoded = null ;
474
- int queryIndex = urlStringDecoded .indexOf ('?' );
523
+ String urlStringRewriteEncoded = urlRewriteEncoded .toString ();
524
+ String queryStringRewriteEncoded = null ;
525
+ int queryIndex = urlStringRewriteEncoded .indexOf ('?' );
475
526
if (queryIndex != -1 ) {
476
- queryStringDecoded = urlStringDecoded .substring (queryIndex + 1 );
477
- urlStringDecoded = urlStringDecoded .substring (0 , queryIndex );
527
+ queryStringRewriteEncoded = urlStringRewriteEncoded .substring (queryIndex + 1 );
528
+ urlStringRewriteEncoded = urlStringRewriteEncoded .substring (0 , queryIndex );
478
529
}
479
530
// Save the current context path before re-writing starts
480
531
String contextPath = null ;
@@ -488,22 +539,24 @@ public void invoke(Request request, Response response) throws IOException, Servl
488
539
// This is neither decoded nor normalized
489
540
chunk .append (contextPath );
490
541
}
491
- chunk .append (URLEncoder .DEFAULT .encode (urlStringDecoded , uriCharset ));
542
+
543
+ // Step 3. Complete the 2nd stage to encoding.
544
+ chunk .append (REWRITE_DEFAULT_ENCODER .encode (urlStringRewriteEncoded , uriCharset ));
492
545
// Decoded and normalized URI
493
546
// Rewriting may have denormalized the URL
494
- urlStringDecoded = RequestUtil .normalize (urlStringDecoded );
547
+ urlStringRewriteEncoded = RequestUtil .normalize (urlStringRewriteEncoded );
495
548
request .getCoyoteRequest ().decodedURI ().setChars (MessageBytes .EMPTY_CHAR_ARRAY , 0 , 0 );
496
549
chunk = request .getCoyoteRequest ().decodedURI ().getCharChunk ();
497
550
if (context ) {
498
551
// This is decoded and normalized
499
552
chunk .append (request .getServletContext ().getContextPath ());
500
553
}
501
- chunk .append (urlStringDecoded );
554
+ chunk .append (URLDecoder . decode ( urlStringRewriteEncoded , uriCharset ) );
502
555
// Set the new Query if there is one
503
- if (queryStringDecoded != null ) {
556
+ if (queryStringRewriteEncoded != null ) {
504
557
request .getCoyoteRequest ().queryString ().setChars (MessageBytes .EMPTY_CHAR_ARRAY , 0 , 0 );
505
558
chunk = request .getCoyoteRequest ().queryString ().getCharChunk ();
506
- chunk .append (URLEncoder . QUERY . encode (queryStringDecoded , uriCharset ));
559
+ chunk .append (REWRITE_QUERY_ENCODER . encode (queryStringRewriteEncoded , uriCharset ));
507
560
if (qsa && originalQueryStringEncoded != null && !originalQueryStringEncoded .isEmpty ()) {
508
561
chunk .append ('&' );
509
562
chunk .append (originalQueryStringEncoded );
@@ -790,4 +843,31 @@ protected static void parseRuleFlag(String line, RewriteRule rule, String flag)
790
843
throw new IllegalArgumentException (sm .getString ("rewriteValve.invalidFlags" , line , flag ));
791
844
}
792
845
}
846
+
847
+
848
+ private CharSequence encodeForRewrite (CharSequence input ) {
849
+ StringBuilder result = null ;
850
+ int pos = 0 ;
851
+ int mark = 0 ;
852
+ while (pos < input .length ()) {
853
+ char c = input .charAt (pos );
854
+ if (c == '%' || c == ';' || c == '?' ) {
855
+ if (result == null ) {
856
+ result = new StringBuilder ((int ) (input .length () * 1.1 ));
857
+ }
858
+ result .append (input .subSequence (mark , pos ));
859
+ result .append ('%' );
860
+ result .append (Character .forDigit ((c >> 4 ) & 0xF , 16 ));
861
+ result .append (Character .forDigit (c & 0xF , 16 ));
862
+ mark = pos + 1 ;
863
+ }
864
+ pos ++;
865
+ }
866
+ if (result != null ) {
867
+ result .append (input .subSequence (mark , input .length ()));
868
+ return result ;
869
+ } else {
870
+ return input ;
871
+ }
872
+ }
793
873
}
0 commit comments