
python-checkins at python
Feb 20, 2012, 10:51 AM
Post #2 of 5
(50 views)
Permalink
|
|
cpython (merge default -> default): Merged upstream changes.
[In reply to]
|
|
http://hg.python.org/cpython/rev/ae960e5ae112 changeset: 75064:ae960e5ae112 parent: 75063:2b4a553bd6ed parent: 75059:42f61304f77d user: Vinay Sajip <vinay_sajip [at] yahoo> date: Mon Feb 20 18:50:33 2012 +0000 summary: Merged upstream changes. files: Doc/library/base64.rst | 11 +- Doc/whatsnew/3.3.rst | 14 ++ Lib/base64.py | 26 ++- Lib/test/test_base64.py | 163 +++++++++++++++++---------- Misc/NEWS | 6 + Python/dynload_aix.c | 1 - Python/dynload_dl.c | 1 - Python/dynload_hpux.c | 1 - Python/dynload_next.c | 1 - Python/dynload_shlib.c | 6 - 10 files changed, 147 insertions(+), 83 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -18,9 +18,14 @@ There are two interfaces provided by this module. The modern interface supports encoding and decoding ASCII byte string objects using all three -alphabets. The legacy interface provides for encoding and decoding to and from -file-like objects as well as byte strings, but only using the Base64 standard -alphabet. +alphabets. Additionally, the decoding functions of the modern interface also +accept Unicode strings containing only ASCII characters. The legacy interface +provides for encoding and decoding to and from file-like objects as well as +byte strings, but only using the Base64 standard alphabet. + +.. versionchanged:: 3.3 + ASCII-only Unicode strings are now accepted by the decoding functions of + the modern interface. The modern interface provides: diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -939,6 +939,20 @@ :c:func:`PyUnicode_FromFormat()`, your code will automatically take advantage of the new unicode representations. +Building C extensions +--------------------- + +* The range of possible file names for C extensions has been narrowed. + Very rarely used spellings have been suppressed: under POSIX, files + named ``xxxmodule.so``, ``xxxmodule.abi3.so`` and + ``xxxmodule.cpython-*.so`` are no longer recognized as implementing + the ``xxx`` module. If you had been generating such files, you have + to switch to the other spellings (i.e., remove the ``module`` string + from the file names). + + (implemented in :issue:`14040`.) + + Other issues ------------ diff --git a/Lib/base64.py b/Lib/base64.py --- a/Lib/base64.py +++ b/Lib/base64.py @@ -29,6 +29,16 @@ bytes_types = (bytes, bytearray) # Types acceptable as binary data +def _bytes_from_decode_data(s): + if isinstance(s, str): + try: + return s.encode('ascii') + except UnicodeEncodeError: + raise ValueError('string argument should contain only ASCII characters') + elif isinstance(s, bytes_types): + return s + else: + raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__) def _translate(s, altchars): if not isinstance(s, bytes_types): @@ -79,12 +89,9 @@ discarded prior to the padding check. If validate is True, non-base64-alphabet characters in the input result in a binascii.Error. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + s = _bytes_from_decode_data(s) if altchars is not None: - if not isinstance(altchars, bytes_types): - raise TypeError("expected bytes, not %s" - % altchars.__class__.__name__) + altchars = _bytes_from_decode_data(altchars) assert len(altchars) == 2, repr(altchars) s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'}) if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): @@ -211,8 +218,7 @@ the input is incorrectly padded or if there are non-alphabet characters present in the input. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + s = _bytes_from_decode_data(s) quanta, leftover = divmod(len(s), 8) if leftover: raise binascii.Error('Incorrect padding') @@ -220,8 +226,7 @@ # False, or the character to map the digit 1 (one) to. It should be # either L (el) or I (eye). if map01 is not None: - if not isinstance(map01, bytes_types): - raise TypeError("expected bytes, not %s" % map01.__class__.__name__) + map01 = _bytes_from_decode_data(map01) assert len(map01) == 1, repr(map01) s = _translate(s, {b'0': b'O', b'1': map01}) if casefold: @@ -292,8 +297,7 @@ s were incorrectly padded or if there are non-alphabet characters present in the string. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + s = _bytes_from_decode_data(s) if casefold: s = s.upper() if re.search(b'[^0-9A-F]', s): diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -102,44 +102,53 @@ def test_b64decode(self): eq = self.assertEqual - eq(base64.b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org") - eq(base64.b64decode(b'AA=='), b'\x00') - eq(base64.b64decode(b"YQ=="), b"a") - eq(base64.b64decode(b"YWI="), b"ab") - eq(base64.b64decode(b"YWJj"), b"abc") - eq(base64.b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" - b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT" - b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="), - b"abcdefghijklmnopqrstuvwxyz" - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"0123456789!@#0^&*();:<>,. []{}") - eq(base64.b64decode(b''), b'') + + tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org", + b'AA==': b'\x00', + b"YQ==": b"a", + b"YWI=": b"ab", + b"YWJj": b"abc", + b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" + b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT" + b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==": + + b"abcdefghijklmnopqrstuvwxyz" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789!@#0^&*();:<>,. []{}", + b'': b'', + } + for data, res in tests.items(): + eq(base64.b64decode(data), res) + eq(base64.b64decode(data.decode('ascii')), res) + # Test with arbitrary alternative characters - eq(base64.b64decode(b'01a*b$cd', altchars=b'*$'), b'\xd3V\xbeo\xf7\x1d') - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.b64decode, "") - self.assertRaises(TypeError, base64.b64decode, b"", altchars="") + tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d', + } + for (data, altchars), res in tests_altchars.items(): + data_str = data.decode('ascii') + altchars_str = altchars.decode('ascii') + + eq(base64.b64decode(data, altchars=altchars), res) + eq(base64.b64decode(data_str, altchars=altchars), res) + eq(base64.b64decode(data, altchars=altchars_str), res) + eq(base64.b64decode(data_str, altchars=altchars_str), res) + # Test standard alphabet - eq(base64.standard_b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org") - eq(base64.standard_b64decode(b"YQ=="), b"a") - eq(base64.standard_b64decode(b"YWI="), b"ab") - eq(base64.standard_b64decode(b"YWJj"), b"abc") - eq(base64.standard_b64decode(b""), b"") - eq(base64.standard_b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" - b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" - b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="), - b"abcdefghijklmnopqrstuvwxyz" - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"0123456789!@#0^&*();:<>,. []{}") - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.standard_b64decode, "") - self.assertRaises(TypeError, base64.standard_b64decode, b"", altchars="") + for data, res in tests.items(): + eq(base64.standard_b64decode(data), res) + eq(base64.standard_b64decode(data.decode('ascii')), res) + # Test with 'URL safe' alternative characters - eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d') - self.assertRaises(TypeError, base64.urlsafe_b64decode, "") + tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d', + b'': b'', + } + for data, res in tests_urlsafe.items(): + eq(base64.urlsafe_b64decode(data), res) + eq(base64.urlsafe_b64decode(data.decode('ascii')), res) def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') + self.assertRaises(binascii.Error, base64.b64decode, 'abc') def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. @@ -154,8 +163,10 @@ (b'YWJj\nYWI=', b'abcab')) for bstr, res in tests: self.assertEqual(base64.b64decode(bstr), res) + self.assertEqual(base64.b64decode(bstr.decode('ascii')), res) with self.assertRaises(binascii.Error): base64.b64decode(bstr, validate=True) + base64.b64decode(bstr.decode('ascii'), validate=True) def test_b32encode(self): eq = self.assertEqual @@ -170,40 +181,62 @@ def test_b32decode(self): eq = self.assertEqual - eq(base64.b32decode(b''), b'') - eq(base64.b32decode(b'AA======'), b'\x00') - eq(base64.b32decode(b'ME======'), b'a') - eq(base64.b32decode(b'MFRA===='), b'ab') - eq(base64.b32decode(b'MFRGG==='), b'abc') - eq(base64.b32decode(b'MFRGGZA='), b'abcd') - eq(base64.b32decode(b'MFRGGZDF'), b'abcde') - self.assertRaises(TypeError, base64.b32decode, "") + tests = {b'': b'', + b'AA======': b'\x00', + b'ME======': b'a', + b'MFRA====': b'ab', + b'MFRGG===': b'abc', + b'MFRGGZA=': b'abcd', + b'MFRGGZDF': b'abcde', + } + for data, res in tests.items(): + eq(base64.b32decode(data), res) + eq(base64.b32decode(data.decode('ascii')), res) def test_b32decode_casefold(self): eq = self.assertEqual - eq(base64.b32decode(b'', True), b'') - eq(base64.b32decode(b'ME======', True), b'a') - eq(base64.b32decode(b'MFRA====', True), b'ab') - eq(base64.b32decode(b'MFRGG===', True), b'abc') - eq(base64.b32decode(b'MFRGGZA=', True), b'abcd') - eq(base64.b32decode(b'MFRGGZDF', True), b'abcde') - # Lower cases - eq(base64.b32decode(b'me======', True), b'a') - eq(base64.b32decode(b'mfra====', True), b'ab') - eq(base64.b32decode(b'mfrgg===', True), b'abc') - eq(base64.b32decode(b'mfrggza=', True), b'abcd') - eq(base64.b32decode(b'mfrggzdf', True), b'abcde') - # Expected exceptions + tests = {b'': b'', + b'ME======': b'a', + b'MFRA====': b'ab', + b'MFRGG===': b'abc', + b'MFRGGZA=': b'abcd', + b'MFRGGZDF': b'abcde', + # Lower cases + b'me======': b'a', + b'mfra====': b'ab', + b'mfrgg===': b'abc', + b'mfrggza=': b'abcd', + b'mfrggzdf': b'abcde', + } + + for data, res in tests.items(): + eq(base64.b32decode(data, True), res) + eq(base64.b32decode(data.decode('ascii'), True), res) + self.assertRaises(TypeError, base64.b32decode, b'me======') + self.assertRaises(TypeError, base64.b32decode, 'me======') + # Mapping zero and one eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode(b'M1023456', map01=b'L'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode(b'M1023456', map01=b'I'), b'b\x1d\xad\xf3\xbe') - self.assertRaises(TypeError, base64.b32decode, b"", map01="") + eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe') + + map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe', + (b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe', + } + for (data, map01), res in map_tests.items(): + data_str = data.decode('ascii') + map01_str = map01.decode('ascii') + + eq(base64.b32decode(data, map01=map01), res) + eq(base64.b32decode(data_str, map01=map01), res) + eq(base64.b32decode(data, map01=map01_str), res) + eq(base64.b32decode(data_str, map01=map01_str), res) def test_b32decode_error(self): - self.assertRaises(binascii.Error, base64.b32decode, b'abc') - self.assertRaises(binascii.Error, base64.b32decode, b'ABCDEF==') + for data in [b'abc', b'ABCDEF==']: + with self.assertRaises(binascii.Error): + base64.b32decode(data) + base64.b32decode(data.decode('ascii')) def test_b16encode(self): eq = self.assertEqual @@ -214,12 +247,24 @@ def test_b16decode(self): eq = self.assertEqual eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef') + eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef') eq(base64.b16decode(b'00'), b'\x00') + eq(base64.b16decode('00'), b'\x00') # Lower case is not allowed without a flag self.assertRaises(binascii.Error, base64.b16decode, b'0102abcdef') + self.assertRaises(binascii.Error, base64.b16decode, '0102abcdef') # Case fold eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef') - self.assertRaises(TypeError, base64.b16decode, "") + eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef') + + def test_decode_nonascii_str(self): + decode_funcs = (base64.b64decode, + base64.standard_b64decode, + base64.urlsafe_b64decode, + base64.b32decode, + base64.b16decode) + for f in decode_funcs: + self.assertRaises(ValueError, f, 'with non-ascii \xcb') def test_ErrorHeritage(self): self.assertTrue(issubclass(binascii.Error, ValueError)) diff --git a/Misc/NEWS b/Misc/NEWS --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #14040: Remove rarely used file name suffixes for C extensions + (under POSIX mainly). + - Issue #14051: Allow arbitrary attributes to be set of classmethod and staticmethod. @@ -469,6 +472,9 @@ Library ------- +- Issue #13641: Decoding functions in the base64 module now accept ASCII-only + unicode strings. Patch by Catalin Iacob. + - Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new importlib.invalidate_caches() function. diff --git a/Python/dynload_aix.c b/Python/dynload_aix.c --- a/Python/dynload_aix.c +++ b/Python/dynload_aix.c @@ -28,7 +28,6 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { {".so", "rb", C_EXTENSION}, - {"module.so", "rb", C_EXTENSION}, {0, 0} }; diff --git a/Python/dynload_dl.c b/Python/dynload_dl.c --- a/Python/dynload_dl.c +++ b/Python/dynload_dl.c @@ -11,7 +11,6 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { {".o", "rb", C_EXTENSION}, - {"module.o", "rb", C_EXTENSION}, {0, 0} }; diff --git a/Python/dynload_hpux.c b/Python/dynload_hpux.c --- a/Python/dynload_hpux.c +++ b/Python/dynload_hpux.c @@ -15,7 +15,6 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { {SHLIB_EXT, "rb", C_EXTENSION}, - {"module"SHLIB_EXT, "rb", C_EXTENSION}, {0, 0} }; diff --git a/Python/dynload_next.c b/Python/dynload_next.c --- a/Python/dynload_next.c +++ b/Python/dynload_next.c @@ -10,7 +10,6 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { {".so", "rb", C_EXTENSION}, - {"module.so", "rb", C_EXTENSION}, {0, 0} }; diff --git a/Python/dynload_shlib.c b/Python/dynload_shlib.c --- a/Python/dynload_shlib.c +++ b/Python/dynload_shlib.c @@ -39,7 +39,6 @@ const struct filedescr _PyImport_DynLoadFiletab[] = { #ifdef __CYGWIN__ {".dll", "rb", C_EXTENSION}, - {"module.dll", "rb", C_EXTENSION}, #else /* !__CYGWIN__ */ #if defined(PYOS_OS2) && defined(PYCC_GCC) {".pyd", "rb", C_EXTENSION}, @@ -48,15 +47,10 @@ #ifdef __VMS {".exe", "rb", C_EXTENSION}, {".EXE", "rb", C_EXTENSION}, - {"module.exe", "rb", C_EXTENSION}, - {"MODULE.EXE", "rb", C_EXTENSION}, #else /* !__VMS */ {"." SOABI ".so", "rb", C_EXTENSION}, - {"module." SOABI ".so", "rb", C_EXTENSION}, {".abi" PYTHON_ABI_STRING ".so", "rb", C_EXTENSION}, - {"module.abi" PYTHON_ABI_STRING ".so", "rb", C_EXTENSION}, {".so", "rb", C_EXTENSION}, - {"module.so", "rb", C_EXTENSION}, #endif /* __VMS */ #endif /* defined(PYOS_OS2) && defined(PYCC_GCC) */ #endif /* __CYGWIN__ */ -- Repository URL: http://hg.python.org/cpython
|