python · serhiy-storchaka · Jul 4, 2026 · Jul 4, 2026 · Jul 4, 2026
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
@@ -1087,6 +1087,18 @@ On Windows, ``cpXXX`` codecs are available for all code pages.
 But only codecs listed in the following table are guaranteed to exist on
 other platforms.
 
+On platforms that provide the C library's :manpage:`iconv(3)` function
+(such as those using the GNU C Library),
+every encoding known to ``iconv`` for which Python has no built-in codec
+is available as well.
+Such an encoding is looked up by its ``iconv`` name (for example ``cp1133``).
+Prefixing the name with ``iconv:`` forces the use of the ``iconv``-based codec
+even when a built-in codec of the same name exists (for example ``iconv:latin1``),
+which is mostly useful for testing.
+
+.. versionchanged:: next
+   Added support for encodings provided by the C library's ``iconv``.
+
 .. impl-detail::
 
    Some common encodings can bypass the codecs lookup machinery to

@@ -86,6 +86,16 @@ New modules
 Improved modules
 ================
 
+codecs
+------
+
+* On platforms that provide the C library's :manpage:`iconv(3)` function,
+  every encoding known to ``iconv`` for which Python has no built-in codec
+  is now available (for example ``cp1133``).
+  Prefixing an encoding name with ``iconv:`` forces the ``iconv``-based codec
+  even when a built-in codec of the same name exists.
+  (Contributed by Serhiy Storchaka in :gh:`152997`.)
+
 curses
 ------
 

diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
@@ -182,6 +182,22 @@ extern int _PyUnicodeWriter_FormatV(
     const char *format,
     va_list vargs);
 
+/* --- iconv Codec -------------------------------------------------------- */
+
+#ifdef HAVE_ICONV
+extern PyObject* _PyUnicode_DecodeIconv(
+    const char *encoding,       /* iconv encoding name */
+    const char *string,         /* encoded string */
+    Py_ssize_t length,          /* size of string */
+    const char *errors,         /* error handling */
+    Py_ssize_t *consumed);      /* bytes consumed, or NULL for non-stateful */
+
+extern PyObject* _PyUnicode_EncodeIconv(
+    const char *encoding,       /* iconv encoding name */
+    PyObject *unicode,          /* Unicode object */
+    const char *errors);        /* error handling */
+#endif
+
 /* --- UTF-7 Codecs ------------------------------------------------------- */
 
 extern PyObject* _PyUnicode_EncodeUTF7(

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
@@ -176,3 +176,30 @@ def win32_code_page_search_function(encoding):
         return create_win32_code_page_codec(cp)
 
     codecs.register(win32_code_page_search_function)
+
+try:
+    from _codecs import iconv_encode as _iconv_encode
+except ImportError:
+    pass
+else:
+    from ._iconv_codecs import create_iconv_codec
+
+    # Last-resort search function backed by the C library's iconv(): provides
+    # any encoding iconv knows that Python has no built-in codec for.  Registered
+    # last, so it never shadows a built-in; an "iconv:" prefix forces it.
+    def iconv_search_function(encoding):
+        if encoding.startswith('iconv:'):
+            name = encoding[len('iconv:'):]
+        else:
+            name = encoding
+        if not name:
+            return None
+        # Test if the encoding is supported by iconv.
+        try:
+            _iconv_encode(name, '')
+        except (LookupError, OSError):
+            return None
+
+        return create_iconv_codec(encoding, name)
+
+    codecs.register(iconv_search_function)
diff --git a/Lib/encodings/_iconv_codecs.py b/Lib/encodings/_iconv_codecs.py
@@ -0,0 +1,36 @@
+import codecs
+
+def create_iconv_codec(name, encoding):
+    from _codecs import iconv_encode, iconv_decode
+
+    def encode(input, errors='strict'):
+        return iconv_encode(encoding, input, errors)
+
+    def decode(input, errors='strict'):
+        return iconv_decode(encoding, input, errors, True)
+
+    class IncrementalEncoder(codecs.IncrementalEncoder):
+        def encode(self, input, final=False):
+            return iconv_encode(encoding, input, self.errors)[0]
+
+    class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+        def _buffer_decode(self, input, errors, final):
+            return iconv_decode(encoding, input, errors, final)
+
+    class StreamWriter(codecs.StreamWriter):
+        def encode(self, input, errors='strict'):
+            return iconv_encode(encoding, input, errors)
+
+    class StreamReader(codecs.StreamReader):
+        def decode(self, input, errors, final=False):
+            return iconv_decode(encoding, input, errors, final)
+
+    return codecs.CodecInfo(
+        name=name,
+        encode=encode,
+        decode=decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -3586,6 +3586,178 @@ def test_large_utf8_input(self, size):
         self.assertEqual(decoded[0][-11:], '56\ud1000123456\ud100')
 
 
+def iconv_encoding_available(name):
+    # The encodings iconv provides are platform-dependent, so tests must probe
+    # availability rather than assume it.
+    try:
+        codecs.iconv_encode(name, '')
+    except (LookupError, OSError):
+        return False
+    return True
+
+
+# Candidate encodings with sample text; tests probe several and skip only when a
+# whole category is unavailable.
+_ICONV_SINGLE_BYTE = [
+    ('KOI8-U', 'Привіт, світ!'),
+    ('ISO-8859-7', 'Καλημέρα'),
+    ('ISO-8859-2', 'Zażółć gęślą jaźń'),
+    ('ISO-8859-1', 'Grüße'),
+]
+_ICONV_MULTIBYTE = ['EUC-JP', 'SHIFT_JIS', 'GBK', 'GB18030', 'BIG5']
+# Encodings iconv may provide but for which CPython has no built-in codec
+# (cp1047 is EBCDIC, i.e. not ASCII-compatible).
+_ICONV_ONLY = ['cp1047', 'cp1133', 'GEORGIAN-PS', 'ARMSCII-8']
+
+
+@unittest.skipUnless(hasattr(codecs, 'iconv_encode'),
+                     'the iconv codec is not available')
+class IconvTest(unittest.TestCase):
+
+    def require(self, *names):
+        for name in names:
+            if iconv_encoding_available(name):
+                return name
+        self.skipTest('no suitable iconv encoding is available')
+
+    def require_single_byte(self):
+        for enc, text in _ICONV_SINGLE_BYTE:
+            if iconv_encoding_available(enc):
+                return enc, text
+        self.skipTest('no single-byte iconv encoding is available')
+
+    def test_unknown_encoding(self):
+        self.assertRaises(LookupError, codecs.iconv_encode, 'no-such-enc-42', 'a')
+        self.assertRaises(LookupError, codecs.iconv_decode, 'no-such-enc-42', b'a')
+        self.assertRaises(LookupError, codecs.lookup, 'iconv:no-such-enc-42')
+
+    def test_roundtrip(self):
+        cases = _ICONV_SINGLE_BYTE + [(enc, '日本語') for enc in _ICONV_MULTIBYTE]
+        tested = False
+        for enc, text in cases:
+            if not iconv_encoding_available(enc):
+                continue
+            tested = True
+            with self.subTest(encoding=enc):
+                data = codecs.iconv_encode(enc, text)[0]
+                decoded, consumed = codecs.iconv_decode(enc, data, 'strict', True)
+                self.assertEqual(decoded, text)
+                self.assertEqual(consumed, len(data))
+        if not tested:
+            self.skipTest('none of the test encodings are available')
+
+    def test_encode_errors(self):
+        # A non-ASCII character is not representable in ASCII.
+        enc = self.require('ASCII')
+        with self.assertRaises(UnicodeEncodeError) as cm:
+            codecs.iconv_encode(enc, 'a€b')
+        self.assertEqual((cm.exception.encoding, cm.exception.start,
+                          cm.exception.end), (enc, 1, 2))
+        self.assertEqual(codecs.iconv_encode(enc, 'a€b', 'replace')[0], b'a?b')
+        self.assertEqual(codecs.iconv_encode(enc, 'a€b', 'ignore')[0], b'ab')
+        self.assertEqual(codecs.iconv_encode(enc, 'a€b', 'backslashreplace')[0],
+                         b'a\\u20acb')
+        self.assertEqual(codecs.iconv_encode(enc, 'a€b', 'xmlcharrefreplace')[0],
+                         b'a&#8364;b')
+
+    def test_decode_errors(self):
+        enc = self.require('ASCII')
+        bad = b'a\xffb'
+        with self.assertRaises(UnicodeDecodeError) as cm:
+            codecs.iconv_decode(enc, bad, 'strict', True)
+        self.assertEqual((cm.exception.encoding, cm.exception.start), (enc, 1))
+        self.assertEqual(codecs.iconv_decode(enc, bad, 'replace', True)[0],
+                         'a\ufffdb')
+        self.assertEqual(codecs.iconv_decode(enc, bad, 'ignore', True)[0], 'ab')
+        self.assertEqual(codecs.iconv_decode(enc, bad, 'backslashreplace', True)[0],
+                         'a\\xffb')
+
+    def test_stateful_decode(self):
+        enc = self.require(*_ICONV_MULTIBYTE)
+        full = codecs.iconv_encode(enc, '日本')[0]
+        # A trailing incomplete multibyte sequence is deferred when final is
+        # false, and reported through *consumed*.
+        text, consumed = codecs.iconv_decode(enc, full[:-1], 'strict', False)
+        self.assertEqual(text, '日')
+        self.assertEqual(consumed, len(full) - 2)
+        # With final=True the same input is an error.
+        self.assertRaises(UnicodeDecodeError,
+                          codecs.iconv_decode, enc, full[:-1], 'strict', True)
+
+    def test_empty(self):
+        enc = self.require('ASCII')
+        self.assertEqual(codecs.iconv_encode(enc, ''), (b'', 0))
+        self.assertEqual(codecs.iconv_decode(enc, b'', 'strict', True), ('', 0))
+
+    def test_lookup_bare_name(self):
+        # An encoding that iconv knows but Python has no built-in codec for.
+        for name in _ICONV_ONLY:
+            if (iconv_encoding_available(name)
+                    and encodings.search_function(name) is None):
+                break
+        else:
+            self.skipTest('no iconv-only encoding is available')
+        info = codecs.lookup(name)
+        self.assertEqual(info.name, name.lower())
+        # The encoding need not be ASCII-compatible (e.g. EBCDIC), so just
+        # check that it round-trips.
+        self.assertEqual('abc'.encode(name).decode(name), 'abc')
+
+    def test_lookup_does_not_shadow_builtin(self):
+        # Built-in codecs must win over the iconv fallback.
+        self.assertEqual(codecs.lookup('utf-8').name, 'utf-8')
+        self.assertEqual(codecs.lookup('ascii').name, 'ascii')
+
+    def test_iconv_prefix_forces_engine(self):
+        # These candidates all have a built-in codec to compare against.
+        enc, text = self.require_single_byte()
+        info = codecs.lookup('iconv:' + enc)
+        # The registry lower-cases the requested name.
+        self.assertEqual(info.name, ('iconv:' + enc).lower())
+        self.assertEqual(text.encode('iconv:' + enc), text.encode(enc))
+        self.assertEqual(text.encode('iconv:' + enc).decode('iconv:' + enc), text)
+
+    def test_incremental_decode(self):
+        enc = self.require(*_ICONV_MULTIBYTE)
+        text = '日本語'
+        data = codecs.encode(text, 'iconv:' + enc)
+        dec = codecs.getincrementaldecoder('iconv:' + enc)()
+        out = ''.join(dec.decode(data[i:i+1]) for i in range(len(data)))
+        out += dec.decode(b'', True)
+        self.assertEqual(out, text)
+
+    def test_stream(self):
+        enc = self.require(*_ICONV_MULTIBYTE)
+        text = '日本語'
+        raw = codecs.encode(text, 'iconv:' + enc)
+        reader = codecs.getreader('iconv:' + enc)(io.BytesIO(raw))
+        self.assertEqual(reader.read(), text)
+
+    def test_encode_kinds(self):
+        # The string's own buffer is fed to iconv per storage kind; check each
+        # of the 1-, 2- and 4-byte kinds against the built-in codec.
+        enc = self.require('UTF-8')
+        for text in ('Gr\xfc\xdfe', 'ĀāĂ', 'A\U0001f389B'):
+            with self.subTest(text=text):
+                self.assertEqual(text.encode('iconv:' + enc), text.encode(enc))
+
+    def test_encode_surrogateescape(self):
+        # A lone surrogate lives in the 2-byte kind and round-trips.
+        enc = self.require('ASCII')
+        data = b'ab\xff'
+        s = data.decode(enc, 'surrogateescape')
+        self.assertEqual(s.encode('iconv:' + enc, 'surrogateescape'), data)
+
+    def test_encode_surrogate_pair(self):
+        # A surrogate pair must stay two code points, never combined into an
+        # astral character (as UTF-16 would): backslashreplace escapes each
+        # surrogate separately, not the escape of a single combined character.
+        pair = '\ud83c\udf89'
+        latin1 = self.require('ISO-8859-1', 'ASCII')
+        self.assertEqual(pair.encode('iconv:' + latin1, 'backslashreplace'),
+                         rb'\ud83c\udf89')
+
+
 class ASCIITest(unittest.TestCase):
     def test_encode(self):
         self.assertEqual('abc123'.encode('ascii'), b'abc123')

@@ -1656,6 +1656,7 @@ FROZEN_FILES_IN = \
 		Lib/encodings/aliases.py \
 		Lib/encodings/utf_8.py \
 		Lib/encodings/_win_cp_codecs.py \
+		Lib/encodings/_iconv_codecs.py \
 		Lib/io.py \
 		Lib/_collections_abc.py \
 		Lib/_sitebuiltins.py \
@@ -1686,6 +1687,7 @@ FROZEN_FILES_OUT = \
 		Python/frozen_modules/encodings.aliases.h \
 		Python/frozen_modules/encodings.utf_8.h \
 		Python/frozen_modules/encodings._win_cp_codecs.h \
+		Python/frozen_modules/encodings._iconv_codecs.h \
 		Python/frozen_modules/io.h \
 		Python/frozen_modules/_collections_abc.h \
 		Python/frozen_modules/_sitebuiltins.h \
@@ -1747,6 +1749,9 @@ Python/frozen_modules/encodings.utf_8.h: Lib/encodings/utf_8.py $(FREEZE_MODULE_
 Python/frozen_modules/encodings._win_cp_codecs.h: Lib/encodings/_win_cp_codecs.py $(FREEZE_MODULE_DEPS)
 	$(FREEZE_MODULE) encodings._win_cp_codecs $(srcdir)/Lib/encodings/_win_cp_codecs.py Python/frozen_modules/encodings._win_cp_codecs.h
 
+Python/frozen_modules/encodings._iconv_codecs.h: Lib/encodings/_iconv_codecs.py $(FREEZE_MODULE_DEPS)
+	$(FREEZE_MODULE) encodings._iconv_codecs $(srcdir)/Lib/encodings/_iconv_codecs.py Python/frozen_modules/encodings._iconv_codecs.h
+
 Python/frozen_modules/io.h: Lib/io.py $(FREEZE_MODULE_DEPS)
 	$(FREEZE_MODULE) io $(srcdir)/Lib/io.py Python/frozen_modules/io.h
 

diff --git a/Misc/NEWS.d/next/Library/2026-07-03-19-51-47.gh-issue-152997.i1ErzB.rst b/Misc/NEWS.d/next/Library/2026-07-03-19-51-47.gh-issue-152997.i1ErzB.rst
@@ -0,0 +1,6 @@
+On platforms providing the C library's :manpage:`iconv(3)` function,
+the :mod:`codecs` module now exposes every encoding known to ``iconv``
+for which Python has no built-in codec.
+Such an encoding can be used by its name (for example ``"cp1133"``)
+or, to force the ``iconv``-based engine even when a built-in codec exists,
+with an ``"iconv:"`` prefix (for example ``"iconv:latin1"``).