Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 55 additions & 46 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -6089,59 +6089,72 @@ _PyUnicode_EncodeUTF32(PyObject *str,
const char *errors,
int byteorder)
{
int kind;
const void *data;
Py_ssize_t len;
PyObject *v;
uint32_t *out;
if (!PyUnicode_Check(str)) {
PyErr_BadArgument();
return NULL;
}
int kind = PyUnicode_KIND(str);
const void *data = PyUnicode_DATA(str);
Py_ssize_t len = PyUnicode_GET_LENGTH(str);

if (len > PY_SSIZE_T_MAX / 4 - (byteorder == 0))
return PyErr_NoMemory();
Py_ssize_t nsize = len + (byteorder == 0);

#if PY_LITTLE_ENDIAN
int native_ordering = byteorder <= 0;
#else
int native_ordering = byteorder >= 0;
#endif
const char *encoding;
Py_ssize_t nsize, pos;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *rep = NULL;
if (kind == PyUnicode_1BYTE_KIND) {
// gh-139156: Don't use PyBytesWriter API here since it has an overhead
// on short strings
PyObject *v = PyBytes_FromStringAndSize(NULL, nsize * 4);
if (v == NULL) {
return NULL;
}

if (!PyUnicode_Check(str)) {
PyErr_BadArgument();
return NULL;
/* output buffer is 4-bytes aligned */
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 4));
uint32_t *out = (uint32_t *)PyBytes_AS_STRING(v);
if (byteorder == 0) {
*out++ = 0xFEFF;
}
if (len > 0) {
ucs1lib_utf32_encode((const Py_UCS1 *)data, len,
&out, native_ordering);
}
return v;
}
kind = PyUnicode_KIND(str);
data = PyUnicode_DATA(str);
len = PyUnicode_GET_LENGTH(str);

if (len > PY_SSIZE_T_MAX / 4 - (byteorder == 0))
return PyErr_NoMemory();
nsize = len + (byteorder == 0);
v = PyBytes_FromStringAndSize(NULL, nsize * 4);
if (v == NULL)
PyBytesWriter *writer = PyBytesWriter_Create(nsize * 4);
if (writer == NULL) {
return NULL;
}

/* output buffer is 4-bytes aligned */
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 4));
out = (uint32_t *)PyBytes_AS_STRING(v);
if (byteorder == 0)
assert(_Py_IS_ALIGNED(PyBytesWriter_GetData(writer), 4));
uint32_t *out = (uint32_t *)PyBytesWriter_GetData(writer);
if (byteorder == 0) {
*out++ = 0xFEFF;
if (len == 0)
goto done;
}
if (len == 0) {
return PyBytesWriter_Finish(writer);
}

const char *encoding;
if (byteorder == -1)
encoding = "utf-32-le";
else if (byteorder == 1)
encoding = "utf-32-be";
else
encoding = "utf-32";

if (kind == PyUnicode_1BYTE_KIND) {
ucs1lib_utf32_encode((const Py_UCS1 *)data, len, &out, native_ordering);
goto done;
}
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *rep = NULL;

pos = 0;
while (pos < len) {
for (Py_ssize_t pos = 0; pos < len; ) {
Py_ssize_t newpos, repsize, moreunits;

if (kind == PyUnicode_2BYTE_KIND) {
Expand Down Expand Up @@ -6188,21 +6201,18 @@ _PyUnicode_EncodeUTF32(PyObject *str,

/* four bytes are reserved for each surrogate */
if (moreunits > 0) {
Py_ssize_t outpos = out - (uint32_t*) PyBytes_AS_STRING(v);
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 4) {
/* integer overflow */
PyErr_NoMemory();
out = PyBytesWriter_GrowAndUpdatePointer(writer, 4 * moreunits, out);
if (out == NULL) {
goto error;
}
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * moreunits) < 0)
goto error;
out = (uint32_t*) PyBytes_AS_STRING(v) + outpos;
}

if (PyBytes_Check(rep)) {
memcpy(out, PyBytes_AS_STRING(rep), repsize);
out += repsize / 4;
} else /* rep is unicode */ {
}
else {
/* rep is unicode */
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
ucs1lib_utf32_encode(PyUnicode_1BYTE_DATA(rep), repsize,
&out, native_ordering);
Expand All @@ -6214,18 +6224,17 @@ _PyUnicode_EncodeUTF32(PyObject *str,
/* Cut back to size actually needed. This is necessary for, for example,
encoding of a string containing isolated surrogates and the 'ignore'
handler is used. */
nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
if (nsize != PyBytes_GET_SIZE(v))
_PyBytes_Resize(&v, nsize);
nsize = (unsigned char*) out - (unsigned char*) PyBytesWriter_GetData(writer);
PyObject *bytes = PyBytesWriter_FinishWithSize(writer, nsize);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
done:
return v;
return bytes;

error:
Py_XDECREF(rep);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
Py_XDECREF(v);
PyBytesWriter_Discard(writer);
return NULL;
}

Expand Down
Loading