codepoint_to_chr (regex)

codepoint_to_chr( chr(
2025-11-22 06:23:02 -05:00 · 2025-11-16 18:27:08 +01:00 · 2025-11-16 18:27:08 +01:00 · 47257e1262
commit 47257e1262
parent 94a51a1acc
17 changed files with 39 additions and 48 deletions
--- a/setup/translations.py
+++ b/setup/translations.py
@ -23,7 +23,7 @@ from collections import defaultdict
 from functools import lru_cache, partial
 from locale import normalize as normalize_locale

-from polyglot.builtins import codepoint_to_chr, iteritems
+from polyglot.builtins import iteritems
 from setup import Command, __appname__, __version__, build_cache_dir, dump_json, edit_file, is_ci, require_git_master
 from setup.iso_codes import iso_data
 from setup.parallel_build import batched_parallel_jobs
@ -128,7 +128,7 @@ class POT(Command):  # {{{
        ans = []
        for lineno, msg in msgs:
            ans.append(f'#: {path}:{lineno}')
-            slash = codepoint_to_chr(92)
+            slash = chr(92)
            msg = msg.replace(slash, slash*2).replace('"', r'\"').replace('\n',
                    r'\n').replace('\r', r'\r').replace('\t', r'\t')
            ans.append(f'msgid "{msg}"')
--- a/src/calibre/ebooks/docx/fonts.py
+++ b/src/calibre/ebooks/docx/fonts.py
@ -13,7 +13,6 @@ from calibre.utils.filenames import ascii_filename
 from calibre.utils.fonts.scanner import NoFonts, font_scanner
 from calibre.utils.fonts.utils import is_truetype_font, panose_to_css_generic_family
 from calibre.utils.icu import ord_string
-from polyglot.builtins import codepoint_to_chr

 Embed = namedtuple('Embed', 'name key subsetted')

@ -124,7 +123,7 @@ def do_map(m, points):
        if base < p < limit:
            yield m[p - base]
        else:
-            yield codepoint_to_chr(p)
+            yield chr(p)


 def map_symbol_text(text, font):
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -20,7 +20,7 @@ from calibre.ebooks.lit.maps import HTML_MAP, OPF_MAP
 from calibre.ebooks.oeb.base import urlnormalize, xpath
 from calibre.ebooks.oeb.reader import OEBReader
 from calibre_extensions import lzx, msdes
-from polyglot.builtins import codepoint_to_chr, itervalues, string_or_bytes
+from polyglot.builtins import itervalues, string_or_bytes
 from polyglot.urllib import unquote as urlunquote
 from polyglot.urllib import urldefrag

@ -109,7 +109,7 @@ def read_utf8_char(bytes, pos):
                raise LitError(
                    f'Invalid UTF8 character: {bytes[pos:pos+i]!r}')
            c = (c << 6) | (b & 0x3F)
-    return codepoint_to_chr(c), pos+elsize
+    return chr(c), pos+elsize


 def consume_sized_utf8_string(bytes, zpad=False):
@ -250,9 +250,9 @@ class UnBinary:
                    else:
                        dynamic_tag += 1
                        errors += 1
-                        tag_name = '?'+codepoint_to_chr(tag)+'?'
+                        tag_name = '?'+chr(tag)+'?'
                        current_map = self.tag_to_attr_map[tag]
-                        print(f'WARNING: tag {codepoint_to_chr(tag)} unknown')
+                        print(f'WARNING: tag {chr(tag)} unknown')
                    buf.write(encode(tag_name))
                elif flags & FLAG_CLOSING:
                    if depth == 0:
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -26,7 +26,7 @@ from calibre.ebooks.lit.reader import DirectoryEntry
 from calibre.ebooks.oeb.base import CSS_MIME, OEB_DOCS, OEB_STYLES, OPF_MIME, XHTML_MIME, XML, XML_NS, prefixname, urlnormalize
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre_extensions import msdes
-from polyglot.builtins import codepoint_to_chr, native_string_type, string_or_bytes
+from polyglot.builtins import native_string_type, string_or_bytes
 from polyglot.urllib import unquote, urldefrag

 __all__ = ['LitWriter']
@ -162,7 +162,7 @@ class ReBinary:
        for value in values:
            if isinstance(value, numbers.Integral):
                try:
-                    value = codepoint_to_chr(value)
+                    value = chr(value)
                except OverflowError:
                    self.logger.warn('Unicode overflow for integer:', value)
                    value = '?'
@ -215,9 +215,9 @@ class ReBinary:
                path, frag = urldefrag(value)
                if self.item:
                    path = self.item.abshref(path)
-                prefix = codepoint_to_chr(3)
+                prefix = chr(3)
                if path in self.manifest.hrefs:
-                    prefix = codepoint_to_chr(2)
+                    prefix = chr(2)
                    value = self.manifest.hrefs[path].id
                    if frag:
                        value = '#'.join((value, frag))
@ -280,9 +280,9 @@ class ReBinary:
            self.logger.warn(f'More than six anchors in file {self.item.href!r}. '
                'Some links may not work properly.')
        data = io.BytesIO()
-        data.write(codepoint_to_chr(len(self.anchors)).encode('utf-8'))
+        data.write(chr(len(self.anchors)).encode('utf-8'))
        for anchor, offset in self.anchors:
-            data.write(codepoint_to_chr(len(anchor)).encode('utf-8'))
+            data.write(chr(len(anchor)).encode('utf-8'))
            if isinstance(anchor, str):
                anchor = anchor.encode('utf-8')
            data.write(anchor)
@ -521,9 +521,9 @@ class LitWriter:
                item.offset = offset \
                    if state in ('linear', 'nonlinear') else 0
                data.write(pack('<I', item.offset))
-                entry = [codepoint_to_chr(len(id)), str(id),
-                         codepoint_to_chr(len(href)), str(href),
-                         codepoint_to_chr(len(media_type)), str(media_type)]
+                entry = [chr(len(id)), str(id),
+                         chr(len(href)), str(href),
+                         chr(len(media_type)), str(media_type)]
                for value in entry:
                    data.write(value.encode('utf-8'))
                data.write(b'\0')
--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -14,12 +14,11 @@ from calibre import browser
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.book.base import Metadata
-from polyglot.builtins import codepoint_to_chr
 from polyglot.urllib import parse_qs, quote_plus

 URL = 'http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping='

-_ignore_starts = '\'"'+''.join(codepoint_to_chr(x) for x in list(range(0x2018, 0x201e))+[0x2032, 0x2033])
+_ignore_starts = '\'"'+''.join(chr(x) for x in list(range(0x2018, 0x201e))+[0x2032, 0x2033])


 def get_series(title, authors, timeout=60):
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -14,7 +14,6 @@ from calibre.ebooks.mobi.utils import rescale_image
 from calibre.utils.date import now as nowf
 from calibre.utils.imghdr import what
 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
-from polyglot.builtins import codepoint_to_chr

 '''
 Retrieve and modify in-place Mobipocket book metadata.
@ -279,7 +278,7 @@ class MetadataUpdater:

    def hexdump(self, src, length=16):
        # Diagnostic
-        FILTER=''.join([((len(repr(codepoint_to_chr(x)))==3) and codepoint_to_chr(x)) or '.' for x in range(256)])
+        FILTER=''.join([((len(repr(chr(x)))==3) and chr(x)) or '.' for x in range(256)])
        N=0
        result=''
        while src:
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -10,7 +10,7 @@ import re

 from calibre import force_unicode
 from calibre.ebooks.metadata import MetaInformation
-from polyglot.builtins import codepoint_to_chr, int_to_byte, string_or_bytes
+from polyglot.builtins import int_to_byte, string_or_bytes

 title_pat    = re.compile(br'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
 author_pat   = re.compile(br'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@ -93,7 +93,7 @@ def decode(raw, codec):

    def uni(match):
        try:
-            return codepoint_to_chr(int(match.group(1)))
+            return chr(int(match.group(1)))
        except Exception:
            return '?'

--- a/src/calibre/ebooks/metadata/topaz.py
+++ b/src/calibre/ebooks/metadata/topaz.py
@ -10,7 +10,7 @@ from struct import pack

 from calibre import force_unicode
 from calibre.ebooks.metadata import MetaInformation
-from polyglot.builtins import codepoint_to_chr, int_to_byte
+from polyglot.builtins import int_to_byte


 def is_dkey(x):
@ -154,7 +154,7 @@ class MetadataUpdater:

    def dump_hex(self, src, length=16):
        ''' Diagnostic '''
-        FILTER=''.join([((len(repr(codepoint_to_chr(x)))==3) and codepoint_to_chr(x)) or '.' for x in range(256)])
+        FILTER=''.join([((len(repr(chr(x)))==3) and chr(x)) or '.' for x in range(256)])
        N=0
        result=''
        while src:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -30,7 +30,7 @@ from calibre.utils.icu import title_case as icu_title
 from calibre.utils.localization import __, is_rtl_lang
 from calibre.utils.short_uuid import uuid4
 from calibre.utils.xml_parse import safe_xml_fromstring
-from polyglot.builtins import codepoint_to_chr, iteritems, itervalues, string_or_bytes
+from polyglot.builtins import iteritems, itervalues, string_or_bytes
 from polyglot.urllib import unquote as urlunquote
 from polyglot.urllib import urldefrag, urljoin, urlparse, urlunparse

@ -452,7 +452,7 @@ def serialize(data, media_type, pretty_print=False):
    return b'' if data is None else bytes(data)


-ASCII_CHARS   = frozenset(codepoint_to_chr(x) for x in range(128))
+ASCII_CHARS   = frozenset(chr(x) for x in range(128))
 UNIBYTE_CHARS = frozenset(x.encode('ascii') for x in ASCII_CHARS)
 USAFE         = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
                 'abcdefghijklmnopqrstuvwxyz'
--- a/src/calibre/ebooks/pdb/plucker/reader.py
+++ b/src/calibre/ebooks/pdb/plucker/reader.py
@ -12,7 +12,6 @@ from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.utils.img import Canvas, image_from_data, save_cover_data_to
 from calibre.utils.imghdr import identify
-from polyglot.builtins import codepoint_to_chr

 DATATYPE_PHTML = 0
 DATATYPE_PHTML_COMPRESSED = 1
@ -700,7 +699,7 @@ class Reader(FormatReader):
            elif c == 0xa0:
                html += '&nbsp;'
            else:
-                html += codepoint_to_chr(c)
+                html += chr(c)
            offset += 1
            if offset in paragraph_offsets:
                need_set_p_id = True
--- a/src/calibre/ebooks/pdf/render/common.py
+++ b/src/calibre/ebooks/pdf/render/common.py
@ -14,7 +14,6 @@ from io import BytesIO
 from calibre.utils.logging import default_log
 from calibre_extensions.speedup import pdf_float
 from polyglot.binary import as_hex_bytes
-from polyglot.builtins import codepoint_to_chr

 EOL = b'\n'

@ -91,7 +90,7 @@ class Name(str):
        raw = bytearray(raw)
        sharp = ord(b'#')
        buf = (
-            codepoint_to_chr(x).encode('ascii') if 33 < x < 126 and x != sharp else
+            chr(x).encode('ascii') if 33 < x < 126 and x != sharp else
            f'#{x:x}'.encode('ascii') for x in raw)
        stream.write(b'/'+b''.join(buf))

--- a/src/calibre/ebooks/pdf/render/fonts.py
+++ b/src/calibre/ebooks/pdf/render/fonts.py
@ -14,7 +14,6 @@ from calibre import as_unicode
 from calibre.ebooks.pdf.render.common import Array, Dictionary, Name, Stream, String
 from calibre.utils.fonts.sfnt.subset import NoGlyphs, UnsupportedFont, pdf_subset
 from calibre.utils.short_uuid import uuid4
-from polyglot.builtins import codepoint_to_chr

 STANDARD_FONTS = {
    'Times-Roman', 'Helvetica', 'Courier', 'Symbol', 'Times-Bold',
@ -121,7 +120,7 @@ class Font:
        self.metrics, self.compress = metrics, compress
        self.is_otf = self.metrics.is_otf
        self.subset_tag = str(
-            re.sub(r'.', lambda m: codepoint_to_chr(int(m.group())+ord('A')), oct(num).replace('o', '')
+            re.sub(r'.', lambda m: chr(int(m.group())+ord('A')), oct(num).replace('o', '')
        )).rjust(6, 'A')
        self.font_stream = FontStream(metrics.is_otf, compress=compress)
        try:
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@ -16,7 +16,6 @@ import re
 from calibre.ebooks.rtf2xml import copy
 from calibre.ptempfile import better_mktemp
 from calibre.utils.mreplace import MReplace
-from polyglot.builtins import codepoint_to_chr

 from . import open_for_read, open_for_write

@ -98,7 +97,7 @@ class Tokenize:
            uni_len = len(match_obj.group(0))
            if uni_char < 0:
                uni_char += 65536
-            uni_char = codepoint_to_chr(uni_char).encode('ascii', 'xmlcharrefreplace').decode('ascii')
+            uni_char = chr(uni_char).encode('ascii', 'xmlcharrefreplace').decode('ascii')
            self.__uc_char = self.__uc_value[-1]
            # there is only an unicode char
            if len(token)<= uni_len:
--- a/src/calibre/gui2/tweak_book/char_select.py
+++ b/src/calibre/gui2/tweak_book/char_select.py
@ -43,7 +43,6 @@ from calibre.gui2.tweak_book.widgets import Dialog
 from calibre.gui2.widgets import BusyCursor
 from calibre.gui2.widgets2 import HistoryLineEdit2
 from calibre.startup import connect_lambda
-from calibre.utils.icu import safe_chr as codepoint_to_chr
 from calibre.utils.unicode_names import character_name_from_code, points_for_word
 from calibre_extensions.progress_indicator import set_no_activate_on_click

@ -590,7 +589,7 @@ class CharDelegate(QStyledItemDelegate):
        f = option.font
        f.setPixelSize(option.rect.height() - 8)
        painter.setFont(f)
-        painter.drawText(option.rect, Qt.AlignmentFlag.AlignHCenter | Qt.AlignmentFlag.AlignBottom | Qt.TextFlag.TextSingleLine, codepoint_to_chr(charcode))
+        painter.drawText(option.rect, Qt.AlignmentFlag.AlignHCenter | Qt.AlignmentFlag.AlignBottom | Qt.TextFlag.TextSingleLine, chr(charcode))

    def paint_non_printing(self, painter, option, charcode):
        text = self.np_pat.sub(r'\n\1', non_printing[charcode])
@ -630,7 +629,7 @@ class CharView(QListView):
        except (TypeError, ValueError):
            pass
        else:
-            self.char_selected.emit(codepoint_to_chr(char_code))
+            self.char_selected.emit(chr(char_code))

    def set_allow_drag_and_drop(self, enabled):
        if not enabled:
@ -681,9 +680,9 @@ class CharView(QListView):
                pass
            else:
                m = QMenu(self)
-                m.addAction(QIcon.ic('edit-copy.png'), _('Copy %s to clipboard') % codepoint_to_chr(char_code), partial(self.copy_to_clipboard, char_code))
+                m.addAction(QIcon.ic('edit-copy.png'), _('Copy %s to clipboard') % chr(char_code), partial(self.copy_to_clipboard, char_code))
                m.addAction(QIcon.ic('rating.png'),
-                            (_('Remove %s from favorites') if self.showing_favorites else _('Add %s to favorites')) % codepoint_to_chr(char_code),
+                            (_('Remove %s from favorites') if self.showing_favorites else _('Add %s to favorites')) % chr(char_code),
                            partial(self.remove_from_favorites, char_code))
                if self.showing_favorites:
                    m.addAction(_('Restore favorites to defaults'), self.restore_defaults)
@ -697,7 +696,7 @@ class CharView(QListView):

    def copy_to_clipboard(self, char_code):
        c = QApplication.clipboard()
-        c.setText(codepoint_to_chr(char_code))
+        c.setText(chr(char_code))

    def remove_from_favorites(self, char_code):
        existing = tprefs['charmap_favorites']
--- a/src/calibre/gui2/tweak_book/editor/snippets.py
+++ b/src/calibre/gui2/tweak_book/editor/snippets.py
@ -42,7 +42,7 @@ from calibre.gui2.tweak_book.widgets import Dialog, PlainTextEdit
 from calibre.utils.config import JSONConfig
 from calibre.utils.icu import string_length as strlen
 from calibre.utils.localization import localize_user_manual_link
-from polyglot.builtins import codepoint_to_chr, iteritems
+from polyglot.builtins import iteritems


 def string_length(x):
@ -118,7 +118,7 @@ escape = unescape = None
 def escape_funcs():
    global escape, unescape
    if escape is None:
-        escapem = {('\\' + x):codepoint_to_chr(i+1) for i, x in enumerate('\\${}')}
+        escapem = {('\\' + x):chr(i+1) for i, x in enumerate('\\${}')}
        escape_pat = re.compile('|'.join(map(re.escape, escapem)))
        def escape(x):
            return escape_pat.sub(lambda m: escapem[m.group()], x.replace('\\\\', '\x01'))
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@ -80,15 +80,15 @@ def unescape(text, rm=False, rchar=''):
            # character reference
            try:
                if text[:3] == '&#x':
-                    return codepoint_to_chr(int(text[3:-1], 16))
+                    return chr(int(text[3:-1], 16))
                else:
-                    return codepoint_to_chr(int(text[2:-1]))
+                    return chr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
-                text = codepoint_to_chr(name2codepoint[text[1:-1]])
+                text = chr(name2codepoint[text[1:-1]])
            except KeyError:
                pass
        if rm:
--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@ -26,7 +26,6 @@ from calibre.utils.icu import lower as icu_lower
 from calibre.utils.icu import sort_key
 from calibre.utils.localization import _
 from polyglot.binary import as_hex_unicode, from_hex_unicode
-from polyglot.builtins import codepoint_to_chr

 '''
 This class manages access to the preference holding the saved search queries.
@ -151,7 +150,7 @@ class Parser:
    WORD = 2
    QUOTED_WORD = 3
    EOF = 4
-    REPLACEMENTS = tuple(('\\' + x, codepoint_to_chr(i + 1)) for i, x in enumerate('\\"()'))
+    REPLACEMENTS = tuple(('\\' + x, chr(i + 1)) for i, x in enumerate('\\"()'))

    # the sep must be a printable character sequence that won't actually appear naturally
    docstring_sep = '□ༀ؆'  # Unicode white square, Tibetan Om, Arabic-Indic Cube Root