Skip to content

Commit e6c5cd3

Browse files
authored
Improve JSON output when there is leading data before the actual JSON body (#1130)
In some special cases, to prevent against Cross Site Script Inclusion (XSSI) attacks, the JSON response body starts with a magic prefix line that must be stripped before feeding the rest of the response body to the JSON parser. Such prefix is now simply ignored from the parser but still printed in the terminal. * Fix Windows tests
1 parent 2731341 commit e6c5cd3

File tree

8 files changed

+170
-51
lines changed

8 files changed

+170
-51
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ This project adheres to [Semantic Versioning](https://semver.org/).
55

66
## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased)
77

8+
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))
9+
810
## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06)
911

1012
Blog post: [What’s new in HTTPie 2.5.0](https://httpie.io/blog/httpie-2.5.0)

httpie/output/formatters/colors.py

Lines changed: 8 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
from pygments.formatters.terminal import TerminalFormatter
1010
from pygments.formatters.terminal256 import Terminal256Formatter
1111
from pygments.lexer import Lexer
12+
from pygments.lexers.data import JsonLexer
1213
from pygments.lexers.special import TextLexer
1314
from pygments.lexers.text import HttpLexer as PygmentsHttpLexer
1415
from pygments.util import ClassNotFound
1516

17+
from ..lexers.json import EnhancedJsonLexer
1618
from ...compat import is_windows
1719
from ...context import Environment
1820
from ...plugins import FormatterPlugin
@@ -60,6 +62,7 @@ def __init__(
6062
http_lexer = PygmentsHttpLexer()
6163
formatter = TerminalFormatter()
6264
else:
65+
from ..lexers.http import SimplifiedHTTPLexer
6366
http_lexer = SimplifiedHTTPLexer()
6467
formatter = Terminal256Formatter(
6568
style=self.get_style_class(color_scheme)
@@ -151,55 +154,12 @@ def get_lexer(
151154
else:
152155
lexer = pygments.lexers.get_lexer_by_name('json')
153156

154-
return lexer
155-
156-
157-
class SimplifiedHTTPLexer(pygments.lexer.RegexLexer):
158-
"""Simplified HTTP lexer for Pygments.
159-
160-
It only operates on headers and provides a stronger contrast between
161-
their names and values than the original one bundled with Pygments
162-
(:class:`pygments.lexers.text import HttpLexer`), especially when
163-
Solarized color scheme is used.
157+
# Use our own JSON lexer: it supports JSON bodies preceded by non-JSON data
158+
# as well as legit JSON bodies.
159+
if isinstance(lexer, JsonLexer):
160+
lexer = EnhancedJsonLexer()
164161

165-
"""
166-
name = 'HTTP'
167-
aliases = ['http']
168-
filenames = ['*.http']
169-
tokens = {
170-
'root': [
171-
# Request-Line
172-
(r'([A-Z]+)( +)([^ ]+)( +)(HTTP)(/)(\d+\.\d+)',
173-
pygments.lexer.bygroups(
174-
pygments.token.Name.Function,
175-
pygments.token.Text,
176-
pygments.token.Name.Namespace,
177-
pygments.token.Text,
178-
pygments.token.Keyword.Reserved,
179-
pygments.token.Operator,
180-
pygments.token.Number
181-
)),
182-
# Response Status-Line
183-
(r'(HTTP)(/)(\d+\.\d+)( +)(\d{3})( +)(.+)',
184-
pygments.lexer.bygroups(
185-
pygments.token.Keyword.Reserved, # 'HTTP'
186-
pygments.token.Operator, # '/'
187-
pygments.token.Number, # Version
188-
pygments.token.Text,
189-
pygments.token.Number, # Status code
190-
pygments.token.Text,
191-
pygments.token.Name.Exception, # Reason
192-
)),
193-
# Header
194-
(r'(.*?)( *)(:)( *)(.+)', pygments.lexer.bygroups(
195-
pygments.token.Name.Attribute, # Name
196-
pygments.token.Text,
197-
pygments.token.Operator, # Colon
198-
pygments.token.Text,
199-
pygments.token.String # Value
200-
))
201-
]
202-
}
162+
return lexer
203163

204164

205165
class Solarized256Style(pygments.style.Style):

httpie/output/formatters/json.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,16 @@ def format_body(self, body: str, mime: str) -> str:
1717
]
1818
if (self.kwargs['explicit_json']
1919
or any(token in mime for token in maybe_json)):
20+
from ..utils import load_prefixed_json
2021
try:
21-
obj = json.loads(body)
22+
data_prefix, json_obj = load_prefixed_json(body)
2223
except ValueError:
2324
pass # Invalid JSON, ignore.
2425
else:
2526
# Indent, sort keys by name, and avoid
2627
# unicode escapes to improve readability.
27-
body = json.dumps(
28-
obj=obj,
28+
body = data_prefix + json.dumps(
29+
obj=json_obj,
2930
sort_keys=self.format_options['json']['sort_keys'],
3031
ensure_ascii=False,
3132
indent=self.format_options['json']['indent']

httpie/output/lexers/__init__.py

Whitespace-only changes.

httpie/output/lexers/http.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import pygments
2+
3+
4+
class SimplifiedHTTPLexer(pygments.lexer.RegexLexer):
5+
"""Simplified HTTP lexer for Pygments.
6+
7+
It only operates on headers and provides a stronger contrast between
8+
their names and values than the original one bundled with Pygments
9+
(:class:`pygments.lexers.text import HttpLexer`), especially when
10+
Solarized color scheme is used.
11+
12+
"""
13+
name = 'HTTP'
14+
aliases = ['http']
15+
filenames = ['*.http']
16+
tokens = {
17+
'root': [
18+
# Request-Line
19+
(r'([A-Z]+)( +)([^ ]+)( +)(HTTP)(/)(\d+\.\d+)',
20+
pygments.lexer.bygroups(
21+
pygments.token.Name.Function,
22+
pygments.token.Text,
23+
pygments.token.Name.Namespace,
24+
pygments.token.Text,
25+
pygments.token.Keyword.Reserved,
26+
pygments.token.Operator,
27+
pygments.token.Number
28+
)),
29+
# Response Status-Line
30+
(r'(HTTP)(/)(\d+\.\d+)( +)(\d{3})( +)(.+)',
31+
pygments.lexer.bygroups(
32+
pygments.token.Keyword.Reserved, # 'HTTP'
33+
pygments.token.Operator, # '/'
34+
pygments.token.Number, # Version
35+
pygments.token.Text,
36+
pygments.token.Number, # Status code
37+
pygments.token.Text,
38+
pygments.token.Name.Exception, # Reason
39+
)),
40+
# Header
41+
(r'(.*?)( *)(:)( *)(.+)', pygments.lexer.bygroups(
42+
pygments.token.Name.Attribute, # Name
43+
pygments.token.Text,
44+
pygments.token.Operator, # Colon
45+
pygments.token.Text,
46+
pygments.token.String # Value
47+
))
48+
]
49+
}

httpie/output/lexers/json.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import re
2+
3+
from pygments.lexer import bygroups, using, RegexLexer
4+
from pygments.lexers.data import JsonLexer
5+
from pygments.token import Token
6+
7+
PREFIX_TOKEN = Token.Error
8+
PREFIX_REGEX = r'[^{\["]+'
9+
10+
11+
class EnhancedJsonLexer(RegexLexer):
12+
"""
13+
Enhanced JSON lexer for Pygments.
14+
15+
It adds support for eventual data prefixing the actual JSON body.
16+
17+
"""
18+
name = 'JSON'
19+
flags = re.IGNORECASE | re.DOTALL
20+
tokens = {
21+
'root': [
22+
# Eventual non-JSON data prefix followed by actual JSON body.
23+
# FIX: data prefix + number (integer or float) are not correctly handled.
24+
(
25+
fr'({PREFIX_REGEX})' + r'((?:[{\["]|true|false|null).+)',
26+
bygroups(PREFIX_TOKEN, using(JsonLexer))
27+
),
28+
# JSON body.
29+
(r'.+', using(JsonLexer)),
30+
],
31+
}

httpie/output/utils.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import json
2+
import re
3+
from typing import Tuple
4+
5+
from .lexers.json import PREFIX_REGEX
6+
7+
8+
def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
9+
"""Simple JSON loading from `data`.
10+
11+
"""
12+
# First, the full data.
13+
try:
14+
return '', json.loads(data)
15+
except ValueError:
16+
pass
17+
18+
# Then, try to find the start of the actual body.
19+
data_prefix, body = parse_prefixed_json(data)
20+
try:
21+
return data_prefix, json.loads(body)
22+
except ValueError:
23+
raise ValueError('Invalid JSON')
24+
25+
26+
def parse_prefixed_json(data: str) -> Tuple[str, str]:
27+
"""Find the potential JSON body from `data`.
28+
29+
Sometimes the JSON body is prefixed with a XSSI magic string, specific to the server.
30+
Return a tuple (data prefix, actual JSON body).
31+
32+
"""
33+
matches = re.findall(PREFIX_REGEX, data)
34+
data_prefix = matches[0] if matches else ''
35+
body = data[len(data_prefix):]
36+
return data_prefix, body

tests/test_json.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import json
2+
3+
import pytest
4+
import responses
5+
6+
from httpie.cli.constants import PRETTY_MAP
7+
from httpie.compat import is_windows
8+
from httpie.output.formatters.colors import ColorFormatter
9+
10+
from .utils import MockEnvironment, http, URL_EXAMPLE
11+
12+
TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
13+
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
14+
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'
15+
16+
17+
@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES)
18+
@pytest.mark.parametrize('json_data', TEST_JSON_VALUES)
19+
@pytest.mark.parametrize('pretty', PRETTY_MAP.keys())
20+
@responses.activate
21+
def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_data, pretty):
22+
"""Test JSON bodies preceded by non-JSON data."""
23+
body = data_prefix + json.dumps(json_data)
24+
content_type = 'application/json'
25+
responses.add(responses.GET, URL_EXAMPLE, body=body,
26+
content_type=content_type)
27+
28+
colored_output = pretty in ('all', 'colors')
29+
env = MockEnvironment(colors=256) if colored_output else None
30+
r = http('--pretty=' + pretty, URL_EXAMPLE, env=env)
31+
32+
indent = None if pretty in ('none', 'colors') else 4
33+
expected_body = data_prefix + json.dumps(json_data, indent=indent)
34+
if colored_output:
35+
fmt = ColorFormatter(env, format_options={'json': {'format': True, 'indent': 4}})
36+
expected_body = fmt.format_body(expected_body, content_type)
37+
# Check to ensure the non-JSON data prefix is colored only one time,
38+
# meaning it was correctly handled as a whole.
39+
assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body
40+
assert expected_body in r

0 commit comments

Comments
 (0)