Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 32 additions & 20 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,8 @@ HTTPie does several things by default in order to make its terminal output easy

### Colors and formatting

TODO: mention body colors/formatting are based on content-type + --response-mime (heuristics for JSON content-type)

Syntax highlighting is applied to HTTP headers and bodies (where it makes sense).
You can choose your preferred color scheme via the `--style` option if you don’t like the default one.
There are dozens of styles available, here are just a few notable ones:
Expand Down Expand Up @@ -1259,26 +1261,6 @@ $ http --response-as='text/plain; charset=big5' pie.dev/get

Given the encoding is not sent by the server, HTTPie will auto-detect it.

### Binary data

Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
Binary data is also suppressed in redirected but prettified output.
The connection is closed as soon as we know that the response body is binary,

```bash
$ http pie.dev/bytes/2000
```

You will nearly instantly see something like this:

```http
HTTP/1.1 200 OK
Content-Type: application/octet-stream

+-----------------------------------------+
| NOTE: binary data not shown in terminal |
+-----------------------------------------+
```

### Redirected output

Expand Down Expand Up @@ -1320,6 +1302,36 @@ function httpless {
http --pretty=all --print=hb "$@" | less -R;
}
```
### Binary data

Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
Binary data is also suppressed in redirected but prettified output.
The connection is closed as soon as we know that the response body is binary,

```bash
$ http pie.dev/bytes/2000
```

You will nearly instantly see something like this:

```http
HTTP/1.1 200 OK
Content-Type: application/octet-stream

+-----------------------------------------+
| NOTE: binary data not shown in terminal |
+-----------------------------------------+
```

### Display encoding

TODO:
(both request/response)
* we look at content-type
* else we detect
* short texts default to utf8
(only response)
* --response-charset allows overwriting

## Download mode

Expand Down
16 changes: 16 additions & 0 deletions httpie/cli/argtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,19 @@ def parse_format_options(s: str, defaults: Optional[dict]) -> dict:
s=','.join(DEFAULT_FORMAT_OPTIONS),
defaults=None,
)


def response_charset_type(encoding: str) -> str:
try:
''.encode(encoding)
except LookupError:
raise argparse.ArgumentTypeError(
f'{encoding!r} is not a supported encoding')
return encoding


def response_mime_type(mime_type: str) -> str:
if mime_type.count('/') != 1:
raise argparse.ArgumentTypeError(
f'{mime_type!r} doesn’t look like a mime type; use type/subtype')
return mime_type
26 changes: 18 additions & 8 deletions httpie/cli/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .argparser import HTTPieArgumentParser
from .argtypes import (
KeyValueArgType, SessionNameValidator,
readable_file_arg,
readable_file_arg, response_charset_type, response_mime_type,
)
from .constants import (
DEFAULT_FORMAT_OPTIONS, OUTPUT_OPTIONS,
Expand Down Expand Up @@ -310,19 +310,29 @@
)

output_processing.add_argument(
'--response-as',
metavar='CONTENT_TYPE',
'--response-charset',
metavar='ENCODING',
type=response_charset_type,
help='''
Override the response Content-Type for display purposes, e.g.:
Override the response encoding for terminal display purposes, e.g.:
--response-charset=utf8
--response-charset=big5
'''
)

output_processing.add_argument(
'--response-mime',
metavar='MIME_TYPE',
type=response_mime_type,
help='''
Override the response mime type for coloring and formatting for the terminal, e.g.:

--response-as=application/xml
--response-as=charset=utf-8
--response-as='application/xml; charset=utf-8'
--response-mime=application/json
--response-mime=text/xml

'''
)


output_processing.add_argument(
'--format-options',
action='append',
Expand Down
2 changes: 1 addition & 1 deletion httpie/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import urllib3
from . import __version__
from .cli.dicts import RequestHeadersDict
from .constants import UTF8
from .encoding import UTF8
from .plugins.registry import plugin_manager
from .sessions import get_httpie_session
from .ssl import AVAILABLE_SSL_VERSION_ARG_MAPPING, HTTPieHTTPSAdapter
Expand Down
37 changes: 0 additions & 37 deletions httpie/codec.py

This file was deleted.

50 changes: 50 additions & 0 deletions httpie/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,53 @@


is_windows = 'win32' in str(sys.platform).lower()


try:
from functools import cached_property
except ImportError:
# Can be removed once we drop Pyth on <3.8 support
# Taken from: `django.utils.functional.cached_property`
class cached_property:
"""
Decorator that converts a method with a single self argument into a
property cached on the instance.

A cached property can be made out of an existing method:
(e.g. ``url = cached_property(get_absolute_url)``).
The optional ``name`` argument is obsolete as of Python 3.6 and will be
deprecated in Django 4.0 (#30127).
"""
name = None

@staticmethod
def func(instance):
raise TypeError(
'Cannot use cached_property instance without calling '
'__set_name__() on it.'
)

def __init__(self, func, name=None):
self.real_func = func
self.__doc__ = getattr(func, '__doc__')

def __set_name__(self, owner, name):
if self.name is None:
self.name = name
self.func = self.real_func
elif name != self.name:
raise TypeError(
"Cannot assign the same cached_property to two different names "
"(%r and %r)." % (self.name, name)
)

def __get__(self, instance, cls=None):
"""
Call the function and put the return value in instance.__dict__ so that
subsequent attribute access on the instance returns the cached value
instead of calling cached_property.__get__().
"""
if instance is None:
return self
res = instance.__dict__[self.name] = self.func(instance)
return res
2 changes: 1 addition & 1 deletion httpie/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from . import __version__
from .compat import is_windows
from .constants import UTF8
from .encoding import UTF8


ENV_XDG_CONFIG_HOME = 'XDG_CONFIG_HOME'
Expand Down
2 changes: 0 additions & 2 deletions httpie/constants.py

This file was deleted.

2 changes: 1 addition & 1 deletion httpie/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from .compat import is_windows
from .config import DEFAULT_CONFIG_DIR, Config, ConfigFileError
from .constants import UTF8
from .encoding import UTF8

from .utils import repr_dict

Expand Down
50 changes: 50 additions & 0 deletions httpie/encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Union

from charset_normalizer import from_bytes
from charset_normalizer.constant import TOO_SMALL_SEQUENCE

UTF8 = 'utf-8'

ContentBytes = Union[bytearray, bytes]


def detect_encoding(content: ContentBytes) -> str:
"""
We default to utf8 if text too short, because the detection
can return a random encoding leading to confusing results:

>>> too_short = ']"foo"'
>>> detected = from_bytes(too_short.encode()).best().encoding
>>> detected
'utf_16_be'

This comment was marked as spam.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to know! What has changed?

This comment was marked as spam.

Copy link
Member

@jkbrzt jkbrzt Oct 4, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That’s exactly what we needed. Is there still some length threshold below which it’s unreasonable to rely on the detected encoding? Or a way to get some sort of confidence interval for the best match?

Copy link
Contributor Author

@BoboTiG BoboTiG Oct 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe could we require charset_normalizer>=2.0.5 and drop our own TOO_SMALL_SEQUENCE check?

I am not sure that charset_normalizer >= 2.0.5 is available on all OSes for our package thought.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I committed d52a483 just to see how it goes.

This comment was marked as spam.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll revert d52a483 as it seems too new.

>>> too_short.encode().decode(detected)
'崢景漢'

"""
encoding = UTF8
if len(content) > TOO_SMALL_SEQUENCE:
match = from_bytes(bytes(content)).best()
if match:
encoding = match.encoding
return encoding


def smart_decode(content: ContentBytes, encoding: str) -> str:
"""Decode `content` using the given `encoding`.
If no `encoding` is provided, the best effort is to guess it from `content`.

Unicode errors are replaced.

"""
if not encoding:
encoding = detect_encoding(content)
return content.decode(encoding, 'replace')


def smart_encode(content: str, encoding: str) -> bytes:
"""Encode `content` using the given `encoding`.

Unicode errors are replaced.

"""
return content.encode(encoding, 'replace')
31 changes: 11 additions & 20 deletions httpie/models.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,33 @@
from abc import ABCMeta, abstractmethod
from typing import Iterable, Optional
from typing import Iterable
from urllib.parse import urlsplit

from .constants import UTF8
from .utils import split_cookies
from .utils import split_cookies, parse_content_type_header
from .compat import cached_property


class HTTPMessage(metaclass=ABCMeta):
class HTTPMessage:
"""Abstract class for HTTP messages."""

def __init__(self, orig):
self._orig = orig

@abstractmethod
def iter_body(self, chunk_size: int) -> Iterable[bytes]:
"""Return an iterator over the body."""
raise NotImplementedError

@abstractmethod
def iter_lines(self, chunk_size: int) -> Iterable[bytes]:
"""Return an iterator over the body yielding (`line`, `line_feed`)."""
raise NotImplementedError

@property
@abstractmethod
def headers(self) -> str:
"""Return a `str` with the message's headers."""
raise NotImplementedError

@property
@abstractmethod
def encoding(self) -> Optional[str]:
"""Return a `str` with the message's encoding, if known."""
@cached_property
def encoding(self) -> str:
ct, params = parse_content_type_header(self.content_type)
return params.get('charset', '')

@property
def content_type(self) -> str:
Expand Down Expand Up @@ -77,10 +76,6 @@ def headers(self):
)
return '\r\n'.join(headers)

@property
def encoding(self):
return self._orig.encoding or UTF8


class HTTPRequest(HTTPMessage):
"""A :class:`requests.models.Request` wrapper."""
Expand Down Expand Up @@ -114,10 +109,6 @@ def headers(self):
headers = '\r\n'.join(headers).strip()
return headers

@property
def encoding(self):
return UTF8

@property
def body(self):
body = self._orig.body
Expand Down
2 changes: 1 addition & 1 deletion httpie/output/formatters/xml.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sys
from typing import TYPE_CHECKING, Optional

from ...constants import UTF8
from ...encoding import UTF8
from ...plugins import FormatterPlugin

if TYPE_CHECKING:
Expand Down
Loading