Skip to content

Commit

Permalink
Add PyUnicode_EqualToUTF8() function
Browse files Browse the repository at this point in the history
Add PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize()
functions.
  • Loading branch information
vstinner committed Oct 12, 2023
1 parent f78c780 commit 50be06c
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 0 deletions.
8 changes: 8 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ Python 3.13
Available on Python 3.5.2 and newer.
.. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
See `PyUnicode_EqualToUTF8() documentation <https://docs.python.org/dev/c-api/unicode.html#c.PyUnicode_EqualToUTF8>`__.
.. c:function:: int PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t size)
See `PyUnicode_EqualToUTF8AndSize() documentation <https://docs.python.org/dev/c-api/unicode.html#c.PyUnicode_EqualToUTF8AndSize>`__.
Python 3.12
-----------
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Changelog
=========

* 2023-10-04: Add functions:

* ``PyUnicode_EqualToUTF8()``
* ``PyUnicode_EqualToUTF8AndSize()``

* 2023-10-03: Add functions:

* ``PyObject_VisitManagedDict()``
Expand Down
73 changes: 73 additions & 0 deletions pythoncapi_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,79 @@ PyThreadState_GetUnchecked(void)
}
#endif

// gh-110289 added PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize()
// to Python 3.13.0a1
#if PY_VERSION_HEX < 0x030D00A1
static inline int
PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t str_len)
{
Py_ssize_t len;
const void *utf8;
PyObject *exc_type, *exc_value, *exc_tb;
int res;

// API cannot report errors so save/restore the exception
PyErr_Fetch(&exc_type, &exc_value, &exc_tb);

// Python 3.3.0a1 added PyUnicode_AsUTF8AndSize()
#if PY_VERSION_HEX >= 0x030300A1
if (PyUnicode_IS_ASCII(unicode)) {
utf8 = PyUnicode_DATA(unicode);
len = PyUnicode_GET_LENGTH(unicode);
}
else {
utf8 = PyUnicode_AsUTF8AndSize(unicode, &len);
if (utf8 == NULL) {
// Memory allocation failure. The API cannot report error,
// so ignore the exception and return 0.
res = 0;
goto done;
}
}

if (len != str_len) {
res = 0;
goto done;
}
res = (memcmp(utf8, str, (size_t)len) == 0);
#else
PyObject *bytes = PyUnicode_AsUTF8String(unicode);
if (bytes == NULL) {
// Memory allocation failure. The API cannot report error,
// so ignore the exception and return 0.
res = 0;
goto done;
}

#if PY_VERSION_HEX >= 0x03000000
len = PyBytes_GET_SIZE(bytes);
utf8 = PyBytes_AS_STRING(bytes);
#else
len = PyString_GET_SIZE(bytes);
utf8 = PyString_AS_STRING(bytes);
#endif
if (len != str_len) {
Py_DECREF(bytes);
res = 0;
goto done;
}

res = (memcmp(utf8, str, (size_t)len) == 0);
Py_DECREF(bytes);
#endif

done:
PyErr_Restore(exc_type, exc_value, exc_tb);
return res;
}

static inline int
PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
{
return PyUnicode_EqualToUTF8AndSize(unicode, str, (Py_ssize_t)strlen(str));
}
#endif


#ifdef __cplusplus
}
Expand Down
35 changes: 35 additions & 0 deletions tests/test_pythoncapi_compat_cext.c
Original file line number Diff line number Diff line change
Expand Up @@ -1361,6 +1361,40 @@ test_managed_dict(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
#endif // PY_VERSION_HEX >= 0x030B00A3


static PyObject *
test_unicode(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
{
PyObject *abc = PyUnicode_FromString("abc");
if (abc == NULL) {
return NULL;
}

PyObject *abc0def = PyUnicode_FromStringAndSize("abc\0def", 7);
if (abc == NULL) {
return NULL;
}

// PyUnicode_EqualToUTF8() can be called with an exception raised
// and it must not clear the current exception.
PyErr_NoMemory();

assert(PyUnicode_EqualToUTF8AndSize(abc, "abc", 3) == 1);
assert(PyUnicode_EqualToUTF8AndSize(abc, "Python", 6) == 0);
assert(PyUnicode_EqualToUTF8AndSize(abc0def, "abc\0def", 7) == 1);

assert(PyUnicode_EqualToUTF8(abc, "abc") == 1);
assert(PyUnicode_EqualToUTF8(abc, "Python") == 0);
assert(PyUnicode_EqualToUTF8(abc0def, "abc\0def") == 0);

assert(PyErr_ExceptionMatches(PyExc_MemoryError));
PyErr_Clear();

Py_DECREF(abc);
Py_DECREF(abc0def);
Py_RETURN_NONE;
}


static struct PyMethodDef methods[] = {
{"test_object", test_object, METH_NOARGS, _Py_NULL},
{"test_py_is", test_py_is, METH_NOARGS, _Py_NULL},
Expand Down Expand Up @@ -1390,6 +1424,7 @@ static struct PyMethodDef methods[] = {
#ifdef TEST_MANAGED_DICT
{"test_managed_dict", test_managed_dict, METH_NOARGS, _Py_NULL},
#endif
{"test_unicode", test_unicode, METH_NOARGS, _Py_NULL},
{_Py_NULL, _Py_NULL, 0, _Py_NULL}
};

Expand Down

0 comments on commit 50be06c

Please sign in to comment.