
python-checkins at python
May 2, 2012, 5:33 PM
Post #1 of 1
(23 views)
Permalink
|
|
cpython: Unicode: optimize creating of 1-character strings
|
|
http://hg.python.org/cpython/rev/3d83e2297166 changeset: 76722:3d83e2297166 user: Victor Stinner <victor.stinner [at] gmail> date: Thu May 03 02:17:04 2012 +0200 summary: Unicode: optimize creating of 1-character strings files: Objects/unicodeobject.c | 60 ++++++++++++++++++++++++---- 1 files changed, 51 insertions(+), 9 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1919,8 +1919,18 @@ return unicode_empty; } assert(size > 0); - if (size == 1 && u[0] < 256) - return get_latin1_char((unsigned char)u[0]); + if (size == 1) { + Py_UCS4 ch = u[0]; + if (ch < 256) + return get_latin1_char((unsigned char)ch); + + res = PyUnicode_New(1, ch); + if (res == NULL) + return NULL; + PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; + } max_char = ucs2lib_find_max_char(u, u + size); res = PyUnicode_New(size, max_char); @@ -1947,8 +1957,18 @@ return unicode_empty; } assert(size > 0); - if (size == 1 && u[0] < 256) - return get_latin1_char((unsigned char)u[0]); + if (size == 1) { + Py_UCS4 ch = u[0]; + if (ch < 256) + return get_latin1_char((unsigned char)ch); + + res = PyUnicode_New(1, ch); + if (res == NULL) + return NULL; + PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; + } max_char = ucs4lib_find_max_char(u, u + size); res = PyUnicode_New(size, max_char); @@ -11368,10 +11388,33 @@ static PyObject * unicode_getitem(PyObject *self, Py_ssize_t index) { - Py_UCS4 ch = PyUnicode_ReadChar(self, index); - if (ch == (Py_UCS4)-1) - return NULL; - return PyUnicode_FromOrdinal(ch); + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 ch; + PyObject *res; + + if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) { + PyErr_BadArgument(); + return NULL; + } + if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return NULL; + } + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + ch = PyUnicode_READ(kind, data, index); + if (ch < 256) + return get_latin1_char(ch); + + res = PyUnicode_New(1, ch); + if (res == NULL) + return NULL; + kind = PyUnicode_KIND(res); + data = PyUnicode_DATA(res); + PyUnicode_WRITE(kind, data, 0, ch); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; } /* Believe it or not, this produces the same value for ASCII strings @@ -12039,7 +12082,6 @@ } if (PyUnicode_IS_ASCII(self)) { - kind = PyUnicode_KIND(self); data = PyUnicode_1BYTE_DATA(self); return unicode_fromascii(data + start, length); } -- Repository URL: http://hg.python.org/cpython
|