Skip to content

Commit c9a79a0

Browse files
authored
gh-139156: Use PyBytesWriter in _PyUnicode_EncodeCharmap() (#139251)
Replace PyBytes_FromStringAndSize() and _PyBytes_Resize() with the PyBytesWriter API. Add _PyBytesWriter_GetSize() and _PyBytesWriter_GetData() static inline functions.
1 parent 06703d6 commit c9a79a0

File tree

3 files changed

+73
-64
lines changed

3 files changed

+73
-64
lines changed

Include/internal/pycore_bytesobject.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,26 @@ struct PyBytesWriter {
7373
// Export for '_testcapi' shared extension
7474
PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray(Py_ssize_t size);
7575

76+
static inline Py_ssize_t
77+
_PyBytesWriter_GetSize(PyBytesWriter *writer)
78+
{
79+
return writer->size;
80+
}
81+
82+
static inline char*
83+
_PyBytesWriter_GetData(PyBytesWriter *writer)
84+
{
85+
if (writer->obj == NULL) {
86+
return writer->small_buffer;
87+
}
88+
else if (writer->use_bytearray) {
89+
return PyByteArray_AS_STRING(writer->obj);
90+
}
91+
else {
92+
return PyBytes_AS_STRING(writer->obj);
93+
}
94+
}
95+
7696
#ifdef __cplusplus
7797
}
7898
#endif

Objects/bytesobject.c

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3480,15 +3480,7 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
34803480
static inline char*
34813481
byteswriter_data(PyBytesWriter *writer)
34823482
{
3483-
if (writer->obj == NULL) {
3484-
return writer->small_buffer;
3485-
}
3486-
else if (writer->use_bytearray) {
3487-
return PyByteArray_AS_STRING(writer->obj);
3488-
}
3489-
else {
3490-
return PyBytes_AS_STRING(writer->obj);
3491-
}
3483+
return _PyBytesWriter_GetData(writer);
34923484
}
34933485

34943486

@@ -3710,7 +3702,7 @@ PyBytesWriter_GetData(PyBytesWriter *writer)
37103702
Py_ssize_t
37113703
PyBytesWriter_GetSize(PyBytesWriter *writer)
37123704
{
3713-
return writer->size;
3705+
return _PyBytesWriter_GetSize(writer);
37143706
}
37153707

37163708

Objects/unicodeobject.c

Lines changed: 51 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -8842,15 +8842,13 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping, unsigned char *replace)
88428842
}
88438843

88448844
static int
8845-
charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
8845+
charmapencode_resize(PyBytesWriter *writer, Py_ssize_t *outpos, Py_ssize_t requiredsize)
88468846
{
8847-
Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
8847+
Py_ssize_t outsize = PyBytesWriter_GetSize(writer);
88488848
/* exponentially overallocate to minimize reallocations */
8849-
if (requiredsize < 2*outsize)
8850-
requiredsize = 2*outsize;
8851-
if (_PyBytes_Resize(outobj, requiredsize))
8852-
return -1;
8853-
return 0;
8849+
if (requiredsize < 2 * outsize)
8850+
requiredsize = 2 * outsize;
8851+
return PyBytesWriter_Resize(writer, requiredsize);
88548852
}
88558853

88568854
typedef enum charmapencode_result {
@@ -8864,22 +8862,22 @@ typedef enum charmapencode_result {
88648862
reallocation error occurred. The caller must decref the result */
88658863
static charmapencode_result
88668864
charmapencode_output(Py_UCS4 c, PyObject *mapping,
8867-
PyObject **outobj, Py_ssize_t *outpos)
8865+
PyBytesWriter *writer, Py_ssize_t *outpos)
88688866
{
88698867
PyObject *rep;
88708868
unsigned char replace;
88718869
char *outstart;
8872-
Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
8870+
Py_ssize_t outsize = _PyBytesWriter_GetSize(writer);
88738871

88748872
if (Py_IS_TYPE(mapping, &EncodingMapType)) {
88758873
int res = encoding_map_lookup(c, mapping);
88768874
Py_ssize_t requiredsize = *outpos+1;
88778875
if (res == -1)
88788876
return enc_FAILED;
88798877
if (outsize<requiredsize)
8880-
if (charmapencode_resize(outobj, outpos, requiredsize))
8878+
if (charmapencode_resize(writer, outpos, requiredsize))
88818879
return enc_EXCEPTION;
8882-
outstart = PyBytes_AS_STRING(*outobj);
8880+
outstart = _PyBytesWriter_GetData(writer);
88838881
outstart[(*outpos)++] = (char)res;
88848882
return enc_SUCCESS;
88858883
}
@@ -8894,23 +8892,23 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
88948892
if (PyLong_Check(rep)) {
88958893
Py_ssize_t requiredsize = *outpos+1;
88968894
if (outsize<requiredsize)
8897-
if (charmapencode_resize(outobj, outpos, requiredsize)) {
8895+
if (charmapencode_resize(writer, outpos, requiredsize)) {
88988896
Py_DECREF(rep);
88998897
return enc_EXCEPTION;
89008898
}
8901-
outstart = PyBytes_AS_STRING(*outobj);
8899+
outstart = _PyBytesWriter_GetData(writer);
89028900
outstart[(*outpos)++] = (char)replace;
89038901
}
89048902
else {
89058903
const char *repchars = PyBytes_AS_STRING(rep);
89068904
Py_ssize_t repsize = PyBytes_GET_SIZE(rep);
89078905
Py_ssize_t requiredsize = *outpos+repsize;
89088906
if (outsize<requiredsize)
8909-
if (charmapencode_resize(outobj, outpos, requiredsize)) {
8907+
if (charmapencode_resize(writer, outpos, requiredsize)) {
89108908
Py_DECREF(rep);
89118909
return enc_EXCEPTION;
89128910
}
8913-
outstart = PyBytes_AS_STRING(*outobj);
8911+
outstart = _PyBytesWriter_GetData(writer);
89148912
memcpy(outstart + *outpos, repchars, repsize);
89158913
*outpos += repsize;
89168914
}
@@ -8926,7 +8924,7 @@ charmap_encoding_error(
89268924
PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
89278925
PyObject **exceptionObject,
89288926
_Py_error_handler *error_handler, PyObject **error_handler_obj, const char *errors,
8929-
PyObject **res, Py_ssize_t *respos)
8927+
PyBytesWriter *writer, Py_ssize_t *respos)
89308928
{
89318929
PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
89328930
Py_ssize_t size, repsize;
@@ -8981,7 +8979,7 @@ charmap_encoding_error(
89818979

89828980
case _Py_ERROR_REPLACE:
89838981
for (collpos = collstartpos; collpos<collendpos; ++collpos) {
8984-
x = charmapencode_output('?', mapping, res, respos);
8982+
x = charmapencode_output('?', mapping, writer, respos);
89858983
if (x==enc_EXCEPTION) {
89868984
return -1;
89878985
}
@@ -9002,7 +9000,7 @@ charmap_encoding_error(
90029000
char *cp;
90039001
sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos));
90049002
for (cp = buffer; *cp; ++cp) {
9005-
x = charmapencode_output(*cp, mapping, res, respos);
9003+
x = charmapencode_output(*cp, mapping, writer, respos);
90069004
if (x==enc_EXCEPTION)
90079005
return -1;
90089006
else if (x==enc_FAILED) {
@@ -9022,17 +9020,17 @@ charmap_encoding_error(
90229020
return -1;
90239021
if (PyBytes_Check(repunicode)) {
90249022
/* Directly copy bytes result to output. */
9025-
Py_ssize_t outsize = PyBytes_Size(*res);
9023+
Py_ssize_t outsize = PyBytesWriter_GetSize(writer);
90269024
Py_ssize_t requiredsize;
90279025
repsize = PyBytes_Size(repunicode);
90289026
requiredsize = *respos + repsize;
90299027
if (requiredsize > outsize)
90309028
/* Make room for all additional bytes. */
9031-
if (charmapencode_resize(res, respos, requiredsize)) {
9029+
if (charmapencode_resize(writer, respos, requiredsize)) {
90329030
Py_DECREF(repunicode);
90339031
return -1;
90349032
}
9035-
memcpy(PyBytes_AsString(*res) + *respos,
9033+
memcpy((char*)PyBytesWriter_GetData(writer) + *respos,
90369034
PyBytes_AsString(repunicode), repsize);
90379035
*respos += repsize;
90389036
*inpos = newpos;
@@ -9045,7 +9043,7 @@ charmap_encoding_error(
90459043
kind = PyUnicode_KIND(repunicode);
90469044
for (index = 0; index < repsize; index++) {
90479045
Py_UCS4 repch = PyUnicode_READ(kind, data, index);
9048-
x = charmapencode_output(repch, mapping, res, respos);
9046+
x = charmapencode_output(repch, mapping, writer, respos);
90499047
if (x==enc_EXCEPTION) {
90509048
Py_DECREF(repunicode);
90519049
return -1;
@@ -9067,65 +9065,64 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
90679065
PyObject *mapping,
90689066
const char *errors)
90699067
{
9070-
/* output object */
9071-
PyObject *res = NULL;
9072-
/* current input position */
9073-
Py_ssize_t inpos = 0;
9074-
Py_ssize_t size;
9075-
/* current output position */
9076-
Py_ssize_t respos = 0;
9077-
PyObject *error_handler_obj = NULL;
9078-
PyObject *exc = NULL;
9079-
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
9080-
const void *data;
9081-
int kind;
9082-
9083-
size = PyUnicode_GET_LENGTH(unicode);
9084-
data = PyUnicode_DATA(unicode);
9085-
kind = PyUnicode_KIND(unicode);
9086-
90879068
/* Default to Latin-1 */
9088-
if (mapping == NULL)
9069+
if (mapping == NULL) {
90899070
return unicode_encode_ucs1(unicode, errors, 256);
9071+
}
9072+
9073+
Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
9074+
if (size == 0) {
9075+
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
9076+
}
9077+
const void *data = PyUnicode_DATA(unicode);
9078+
int kind = PyUnicode_KIND(unicode);
90909079

9080+
PyObject *error_handler_obj = NULL;
9081+
PyObject *exc = NULL;
9082+
9083+
/* output object */
9084+
PyBytesWriter *writer;
90919085
/* allocate enough for a simple encoding without
90929086
replacements, if we need more, we'll resize */
9093-
res = PyBytes_FromStringAndSize(NULL, size);
9094-
if (res == NULL)
9087+
writer = PyBytesWriter_Create(size);
9088+
if (writer == NULL) {
90959089
goto onError;
9096-
if (size == 0)
9097-
return res;
9090+
}
9091+
9092+
/* current input position */
9093+
Py_ssize_t inpos = 0;
9094+
/* current output position */
9095+
Py_ssize_t respos = 0;
9096+
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
90989097

90999098
while (inpos<size) {
91009099
Py_UCS4 ch = PyUnicode_READ(kind, data, inpos);
91019100
/* try to encode it */
9102-
charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
9101+
charmapencode_result x = charmapencode_output(ch, mapping, writer, &respos);
91039102
if (x==enc_EXCEPTION) /* error */
91049103
goto onError;
91059104
if (x==enc_FAILED) { /* unencodable character */
91069105
if (charmap_encoding_error(unicode, &inpos, mapping,
91079106
&exc,
91089107
&error_handler, &error_handler_obj, errors,
9109-
&res, &respos)) {
9108+
writer, &respos)) {
91109109
goto onError;
91119110
}
91129111
}
9113-
else
9112+
else {
91149113
/* done with this character => adjust input position */
91159114
++inpos;
9115+
}
91169116
}
91179117

9118-
/* Resize if we allocated to much */
9119-
if (respos<PyBytes_GET_SIZE(res))
9120-
if (_PyBytes_Resize(&res, respos) < 0)
9121-
goto onError;
9122-
91239118
Py_XDECREF(exc);
91249119
Py_XDECREF(error_handler_obj);
9125-
return res;
9120+
9121+
/* Resize if we allocated too much */
9122+
return PyBytesWriter_FinishWithSize(writer, respos);
91269123

91279124
onError:
9128-
Py_XDECREF(res);
9125+
PyBytesWriter_Discard(writer);
91299126
Py_XDECREF(exc);
91309127
Py_XDECREF(error_handler_obj);
91319128
return NULL;

0 commit comments

Comments
 (0)