httpie · BoboTiG · Oct 6, 2021 · Sep 30, 2021 · Oct 4, 2021 · Oct 4, 2021
diff --git a/docs/README.md b/docs/README.md
@@ -1179,6 +1179,8 @@ HTTPie does several things by default in order to make its terminal output easy
 
 ### Colors and formatting
 
+TODO: mention body colors/formatting are based on content-type + --response-mime (heuristics for JSON content-type)
+
 Syntax highlighting is applied to HTTP headers and bodies (where it makes sense).
 You can choose your preferred color scheme via the `--style` option if you don’t like the default one.
 There are dozens of styles available, here are just a few notable ones:
@@ -1259,26 +1261,6 @@ $ http --response-as='text/plain; charset=big5' pie.dev/get
 
 Given the encoding is not sent by the server, HTTPie will auto-detect it.
 
-### Binary data
-
-Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
-Binary data is also suppressed in redirected but prettified output.
-The connection is closed as soon as we know that the response body is binary,
-
-```bash
-$ http pie.dev/bytes/2000
-```
-
-You will nearly instantly see something like this:
-
-```http
-HTTP/1.1 200 OK
-Content-Type: application/octet-stream
-
-+-----------------------------------------+
-| NOTE: binary data not shown in terminal |
-+-----------------------------------------+
-```
 
 ### Redirected output
 
@@ -1320,6 +1302,36 @@ function httpless {
     http --pretty=all --print=hb "$@" | less -R;
 }
 ```
+### Binary data
+
+Binary data is suppressed for terminal output, which makes it safe to perform requests to URLs that send back binary data.
+Binary data is also suppressed in redirected but prettified output.
+The connection is closed as soon as we know that the response body is binary,
+
+```bash
+$ http pie.dev/bytes/2000
+```
+
+You will nearly instantly see something like this:
+
+```http
+HTTP/1.1 200 OK
+Content-Type: application/octet-stream
+
++-----------------------------------------+
+| NOTE: binary data not shown in terminal |
++-----------------------------------------+
+```
+
+### Display encoding
+
+TODO:
+(both request/response)
+* we look at content-type
+* else we detect
+* short texts default to utf8
+(only response)
+* --response-charset allows overwriting
 
 ## Download mode
 

diff --git a/httpie/cli/argtypes.py b/httpie/cli/argtypes.py
@@ -242,3 +242,19 @@ def parse_format_options(s: str, defaults: Optional[dict]) -> dict:
     s=','.join(DEFAULT_FORMAT_OPTIONS),
     defaults=None,
 )
+
+
+def response_charset_type(encoding: str) -> str:
+    try:
+        ''.encode(encoding)
+    except LookupError:
+        raise argparse.ArgumentTypeError(
+            f'{encoding!r} is not a supported encoding')
+    return encoding
+
+
+def response_mime_type(mime_type: str) -> str:
+    if mime_type.count('/') != 1:
+        raise argparse.ArgumentTypeError(
+            f'{mime_type!r} doesn’t look like a mime type; use type/subtype')
+    return mime_type
diff --git a/httpie/cli/definition.py b/httpie/cli/definition.py
@@ -9,7 +9,7 @@
 from .argparser import HTTPieArgumentParser
 from .argtypes import (
     KeyValueArgType, SessionNameValidator,
-    readable_file_arg,
+    readable_file_arg, response_charset_type, response_mime_type,
 )
 from .constants import (
     DEFAULT_FORMAT_OPTIONS, OUTPUT_OPTIONS,
@@ -310,19 +310,29 @@
 )
 
 output_processing.add_argument(
-    '--response-as',
-    metavar='CONTENT_TYPE',
+    '--response-charset',
+    metavar='ENCODING',
+    type=response_charset_type,
     help='''
-    Override the response Content-Type for display purposes, e.g.:
+    Override the response encoding for terminal display purposes, e.g.:
+        --response-charset=utf8
+        --response-charset=big5
+    '''
+)
+
+output_processing.add_argument(
+    '--response-mime',
+    metavar='MIME_TYPE',
+    type=response_mime_type,
+    help='''
+    Override the response mime type for coloring and formatting for the terminal, e.g.:
 
-        --response-as=application/xml
-        --response-as=charset=utf-8
-        --response-as='application/xml; charset=utf-8'
+        --response-mime=application/json
+        --response-mime=text/xml
 
     '''
 )
 
-
 output_processing.add_argument(
     '--format-options',
     action='append',

diff --git a/httpie/client.py b/httpie/client.py
@@ -12,7 +12,7 @@
 import urllib3
 from . import __version__
 from .cli.dicts import RequestHeadersDict
-from .constants import UTF8
+from .encoding import UTF8
 from .plugins.registry import plugin_manager
 from .sessions import get_httpie_session
 from .ssl import AVAILABLE_SSL_VERSION_ARG_MAPPING, HTTPieHTTPSAdapter

diff --git a/httpie/codec.py b/httpie/codec.py
diff --git a/httpie/compat.py b/httpie/compat.py
@@ -2,3 +2,53 @@
 
 
 is_windows = 'win32' in str(sys.platform).lower()
+
+
+try:
+    from functools import cached_property
+except ImportError:
+    # Can be removed once we drop Pyth on <3.8 support
+    # Taken from: `django.utils.functional.cached_property`
+    class cached_property:
+        """
+        Decorator that converts a method with a single self argument into a
+        property cached on the instance.
+
+        A cached property can be made out of an existing method:
+        (e.g. ``url = cached_property(get_absolute_url)``).
+        The optional ``name`` argument is obsolete as of Python 3.6 and will be
+        deprecated in Django 4.0 (#30127).
+        """
+        name = None
+
+        @staticmethod
+        def func(instance):
+            raise TypeError(
+                'Cannot use cached_property instance without calling '
+                '__set_name__() on it.'
+            )
+
+        def __init__(self, func, name=None):
+            self.real_func = func
+            self.__doc__ = getattr(func, '__doc__')
+
+        def __set_name__(self, owner, name):
+            if self.name is None:
+                self.name = name
+                self.func = self.real_func
+            elif name != self.name:
+                raise TypeError(
+                    "Cannot assign the same cached_property to two different names "
+                    "(%r and %r)." % (self.name, name)
+                )
+
+        def __get__(self, instance, cls=None):
+            """
+            Call the function and put the return value in instance.__dict__ so that
+            subsequent attribute access on the instance returns the cached value
+            instead of calling cached_property.__get__().
+            """
+            if instance is None:
+                return self
+            res = instance.__dict__[self.name] = self.func(instance)
+            return res
diff --git a/httpie/config.py b/httpie/config.py
@@ -5,7 +5,7 @@
 
 from . import __version__
 from .compat import is_windows
-from .constants import UTF8
+from .encoding import UTF8
 
 
 ENV_XDG_CONFIG_HOME = 'XDG_CONFIG_HOME'

diff --git a/httpie/constants.py b/httpie/constants.py
diff --git a/httpie/context.py b/httpie/context.py
@@ -11,7 +11,7 @@
 
 from .compat import is_windows
 from .config import DEFAULT_CONFIG_DIR, Config, ConfigFileError
-from .constants import UTF8
+from .encoding import UTF8
 
 from .utils import repr_dict
 

diff --git a/httpie/encoding.py b/httpie/encoding.py
@@ -0,0 +1,50 @@
+from typing import Union
+
+from charset_normalizer import from_bytes
+from charset_normalizer.constant import TOO_SMALL_SEQUENCE
+
+UTF8 = 'utf-8'
+
+ContentBytes = Union[bytearray, bytes]
+
+
+def detect_encoding(content: ContentBytes) -> str:
+    """
+    We default to utf8 if text too short, because the detection
+    can return a random encoding leading to confusing results:
+
+    >>> too_short = ']"foo"'
+    >>> detected = from_bytes(too_short.encode()).best().encoding
+    >>> detected
+    'utf_16_be'
+    >>> too_short.encode().decode(detected)
+    '崢景漢'
+
+    """
+    encoding = UTF8
+    if len(content) > TOO_SMALL_SEQUENCE:
+        match = from_bytes(bytes(content)).best()
+        if match:
+            encoding = match.encoding
+    return encoding
+
+
+def smart_decode(content: ContentBytes, encoding: str) -> str:
+    """Decode `content` using the given `encoding`.
+    If no `encoding` is provided, the best effort is to guess it from `content`.
+
+    Unicode errors are replaced.
+
+    """
+    if not encoding:
+        encoding = detect_encoding(content)
+    return content.decode(encoding, 'replace')
+
+
+def smart_encode(content: str, encoding: str) -> bytes:
+    """Encode `content` using the given `encoding`.
+
+    Unicode errors are replaced.
+
+    """
+    return content.encode(encoding, 'replace')
diff --git a/httpie/models.py b/httpie/models.py
@@ -1,34 +1,33 @@
-from abc import ABCMeta, abstractmethod
-from typing import Iterable, Optional
+from typing import Iterable
 from urllib.parse import urlsplit
 
-from .constants import UTF8
-from .utils import split_cookies
+from .utils import split_cookies, parse_content_type_header
+from .compat import cached_property
 
 
-class HTTPMessage(metaclass=ABCMeta):
+class HTTPMessage:
     """Abstract class for HTTP messages."""
 
     def __init__(self, orig):
         self._orig = orig
 
-    @abstractmethod
     def iter_body(self, chunk_size: int) -> Iterable[bytes]:
         """Return an iterator over the body."""
+        raise NotImplementedError
 
-    @abstractmethod
     def iter_lines(self, chunk_size: int) -> Iterable[bytes]:
         """Return an iterator over the body yielding (`line`, `line_feed`)."""
+        raise NotImplementedError
 
     @property
-    @abstractmethod
     def headers(self) -> str:
         """Return a `str` with the message's headers."""
+        raise NotImplementedError
 
-    @property
-    @abstractmethod
-    def encoding(self) -> Optional[str]:
-        """Return a `str` with the message's encoding, if known."""
+    @cached_property
+    def encoding(self) -> str:
+        ct, params = parse_content_type_header(self.content_type)
+        return params.get('charset', '')
 
     @property
     def content_type(self) -> str:
@@ -77,10 +76,6 @@ def headers(self):
         )
         return '\r\n'.join(headers)
 
-    @property
-    def encoding(self):
-        return self._orig.encoding or UTF8
-
 
 class HTTPRequest(HTTPMessage):
     """A :class:`requests.models.Request` wrapper."""
@@ -114,10 +109,6 @@ def headers(self):
         headers = '\r\n'.join(headers).strip()
         return headers
 
-    @property
-    def encoding(self):
-        return UTF8
-
     @property
     def body(self):
         body = self._orig.body

diff --git a/httpie/output/formatters/xml.py b/httpie/output/formatters/xml.py
@@ -1,7 +1,7 @@
 import sys
 from typing import TYPE_CHECKING, Optional
 
-from ...constants import UTF8
+from ...encoding import UTF8
 from ...plugins import FormatterPlugin
 
 if TYPE_CHECKING: