decode unicode-escape (extra-edit)

2025-07-08 10:44:09 -04:00 · 2025-01-24 11:14:16 +01:00 · 2025-01-24 11:14:16 +01:00 · 534293eabc
commit 534293eabc
parent 12cb8b2e58
17 changed files with 87 additions and 87 deletions
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -299,8 +299,8 @@ class iPadOutput(OutputProfile):
        }
    ]
-    ratings_char = '\u2605'            # filled star
+    ratings_char = '★'            # filled star
-    empty_ratings_char = '\u2606'      # hollow star
+    empty_ratings_char = '☆'      # hollow star
    touchscreen = True
    # touchscreen_news_css {{{
@ -677,8 +677,8 @@ class KindleOutput(OutputProfile):
    supports_mobi_indexing = True
    periodical_date_in_title = False
-    empty_ratings_char = '\u2606'
+    empty_ratings_char = '☆'
-    ratings_char = '\u2605'
+    ratings_char = '★'
    mobi_ems_per_blockquote = 2.0
@ -696,8 +696,8 @@ class KindleDXOutput(OutputProfile):
    # comic_screen_size         = (741, 1022)
    supports_mobi_indexing = True
    periodical_date_in_title = False
-    empty_ratings_char = '\u2606'
+    empty_ratings_char = '☆'
-    ratings_char = '\u2605'
+    ratings_char = '★'
    mobi_ems_per_blockquote = 2.0
--- a/src/calibre/ebooks/chm/metadata.py
+++ b/src/calibre/ebooks/chm/metadata.py
@ -77,7 +77,7 @@ def _get_comments(soup):
    pages = (_metadata_from_span(soup, 'pages') or _metadata_from_table(soup, 'pages'))
    try:
        # date span can have copyright symbols in it...
-        date = date.replace('\u00a9', '').strip()
+        date = date.replace('©', '').strip()
        # and pages often comes as '(\d+ pages)'
        pages = re.search(r'\d+', pages).group(0)
        return f'Published {date}, {pages} pages.'
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -19,20 +19,20 @@ XLINK_NS     = 'http://www.w3.org/1999/xlink'
 _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
 LIGATURES = {
-#        '\u00c6': 'AE',
+#        'Æ': 'AE',
-#        '\u00e6': 'ae',
+#        'æ': 'ae',
-#        '\u0152': 'OE',
+#        'Œ': 'OE',
-#        '\u0153': 'oe',
+#        'œ': 'oe',
-#        '\u0132': 'IJ',
+#        'Ĳ': 'IJ',
-#        '\u0133': 'ij',
+#        'ĳ': 'ij',
-#        '\u1D6B': 'ue',
+#        'ᵫ': 'ue',
-        '\uFB00': 'ff',
+        'ﬀ': 'ff',
-        '\uFB01': 'fi',
+        'ﬁ': 'fi',
-        '\uFB02': 'fl',
+        'ﬂ': 'fl',
-        '\uFB03': 'ffi',
+        'ﬃ': 'ffi',
-        '\uFB04': 'ffl',
+        'ﬄ': 'ffl',
-        '\uFB05': 'ft',
+        'ﬅ': 'ft',
-        '\uFB06': 'st',
+        'ﬆ': 'st',
        }
 _ligpat = re.compile('|'.join(LIGATURES))
@ -240,7 +240,7 @@ class Dehyphenator:
            else:
                if self.verbose > 2:
                    self.log('            Cleanup:returning original text '+firsthalf+' + linefeed '+secondhalf)
-                return firsthalf+'\u2014'+wraptags+secondhalf
+                return firsthalf+'—'+wraptags+secondhalf
        else:
            if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6:
@ -274,7 +274,7 @@ class Dehyphenator:
                r'</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)')% length)
        elif format == 'txt':
            intextmatch = re.compile(
-                '(?<=.{%i})(?P<firstpart>[^\\W\\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\\w\\d]+)'% length)
+                '(?<=.{%i})(?P<firstpart>[^\\W\\-]+)(-|‐)( |\t)*(?P<wraptags>(\n( |\t)*)+)(?P<secondpart>[\\w\\d]+)'% length)
        elif format == 'individual_words':
            intextmatch = re.compile(
                r'(?!<)(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)', re.UNICODE)
--- a/src/calibre/ebooks/html_entities.py
+++ b/src/calibre/ebooks/html_entities.py
@ -2194,8 +2194,8 @@ def find_tests():
            t('a&amp;b&lt;c', 'a&b<c')
            t('a&acE;b', 'a∾̳b')
            t('a&#1234;b', 'aӒb')
-            t('a&#X1234;b', 'a\u1234b')
+            t('a&#X1234;b', 'aሴb')
-            t('a&#x1034fA;b', 'a\U001034fAb')
+            t('a&#x1034fA;b', 'a\U001034FAb')
            t('a&#0;b&#x000;c', 'abc')
            x('&amp;&lt;&gt;&apos;&quot;', '&amp;&lt;&gt;&apos;&quot;')
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -803,7 +803,7 @@ class HTMLConverter:
            src = src.lstrip()
            f = src[0]
            next = 1
-            if f in ("'", '"', '\u201c', '\u2018', '\u201d', '\u2019'):
+            if f in ("'", '"', '“', '‘', '”', '’'):
                if len(src) >= 2:
                    next = 2
                    f = src[:2]
@ -819,7 +819,7 @@ class HTMLConverter:
        def append_text(src):
            fp, key, variant = self.font_properties(css)
-            for x, y in [('\xad', ''), ('\xa0', ' '), ('\ufb00', 'ff'), ('\ufb01', 'fi'), ('\ufb02', 'fl'), ('\ufb03', 'ffi'), ('\ufb04', 'ffl')]:
+            for x, y in [('\xad', ''), ('\xa0', ' '), ('ﬀ', 'ff'), ('ﬁ', 'fi'), ('ﬂ', 'fl'), ('ﬃ', 'ffi'), ('ﬄ', 'ffl')]:
                src = src.replace(x, y)
            def valigner(x):
@ -1624,7 +1624,7 @@ class HTMLConverter:
                            in_ol = parent.name.lower() == 'ol'
                            break
                        parent = parent.parent
-                    prepend = str(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
+                    prepend = str(self.list_counter)+'. ' if in_ol else '•' + ' '
                    self.current_para.append(Span(prepend))
                    self.process_children(tag, tag_css, tag_pseudo_css)
                    if in_ol:
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -529,11 +529,11 @@ class MobiMLizer:
            t = elem.text
            if not t:
                t = ''
-            elem.text = '\u201c' + t
+            elem.text = '“' + t
            t = elem.tail
            if not t:
                t = ''
-            elem.tail = '\u201d' + t
+            elem.tail = '”' + t
        text = None
        if elem.text:
            if istate.preserve or istate.pre_wrap:
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -362,7 +362,7 @@ class Serializer:
        text = text.replace('&', '&amp;')
        text = text.replace('<', '&lt;')
        text = text.replace('>', '&gt;')
-        text = text.replace('\u00AD', '')  # Soft-hyphen
+        text = text.replace('\u00ad', '')  # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
        if isinstance(text, str):
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -100,9 +100,9 @@ def adjacent_quotes(first_string, second_string):
    if fchar is not None:
        first_char = fchar.group(1)  # First non-space char
-    return bool((last_char == '\u0022' and first_char == '\u0022') \
+    return bool((last_char == '"' and first_char == '"') \
-             or (last_char == '\u2019' and first_char == '\u2018') \
+             or (last_char == '’' and first_char == '‘') \
-             or (last_char == '\u201d' and first_char == '\u201c'))
+             or (last_char == '”' and first_char == '“'))
 class Font:
--- a/src/calibre/ebooks/readability/htmls.py
+++ b/src/calibre/ebooks/readability/htmls.py
@ -24,9 +24,9 @@ def normalize_entities(cur_title):
        '\u2013':'-',
        '&mdash;': '-',
        '&ndash;': '-',
-        '\u00A0': ' ',
+        '\u00a0': ' ',
-        '\u00AB': '"',
+        '\u00ab': '"',
-        '\u00BB': '"',
+        '\u00bb': '"',
        '&quot;': '"',
    }
    for c, r in iteritems(entities):
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -1693,7 +1693,7 @@ def elided_text(text, font=None, width=300, pos='middle'):
        font = QApplication.instance().font()
    fm = (font if isinstance(font, QFontMetrics) else QFontMetrics(font))
    delta = 4
-    ellipsis = '\u2026'
+    ellipsis = '…'
    def remove_middle(x):
        mid = len(x) // 2
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -121,13 +121,13 @@ class PluginWidget(QWidget,Ui_Form):
                               'name':_('Read book'),
                               'field':_('Tags'),
                               'pattern':'+',
-                               'prefix':'\u2713'},
+                               'prefix':'✓'},
                              {'ordinal':1,
                               'enabled':True,
                               'name':_('Wishlist item'),
                               'field':_('Tags'),
                               'pattern':'Wishlist',
-                               'prefix':'\u00d7'},],
+                               'prefix':'×'},],
                             ['table_widget','table_widget']))
        self.OPTION_FIELDS = option_fields
@ -1351,52 +1351,52 @@ class PrefixRules(GenericRulesTable):
        # Create a list of prefixes for user selection
        raw_prefix_list = [
            ('Ampersand', '&'),
-            ('Angle left double', '\u00ab'),
+            ('Angle left double', '«'),
-            ('Angle left', '\u2039'),
+            ('Angle left', '‹'),
-            ('Angle right double', '\u00bb'),
+            ('Angle right double', '»'),
-            ('Angle right', '\u203a'),
+            ('Angle right', '›'),
-            ('Arrow carriage return', '\u21b5'),
+            ('Arrow carriage return', '↵'),
-            ('Arrow double', '\u2194'),
+            ('Arrow double', '↔'),
-            ('Arrow down', '\u2193'),
+            ('Arrow down', '↓'),
-            ('Arrow left', '\u2190'),
+            ('Arrow left', '←'),
-            ('Arrow right', '\u2192'),
+            ('Arrow right', '→'),
-            ('Arrow up', '\u2191'),
+            ('Arrow up', '↑'),
            ('Asterisk', '*'),
            ('At sign', '@'),
-            ('Bullet smallest', '\u22c5'),
+            ('Bullet smallest', '⋅'),
-            ('Bullet small', '\u00b7'),
+            ('Bullet small', '·'),
-            ('Bullet', '\u2022'),
+            ('Bullet', '•'),
-            ('Cards clubs', '\u2663'),
+            ('Cards clubs', '♣'),
-            ('Cards diamonds', '\u2666'),
+            ('Cards diamonds', '♦'),
-            ('Cards hearts', '\u2665'),
+            ('Cards hearts', '♥'),
-            ('Cards spades', '\u2660'),
+            ('Cards spades', '♠'),
            ('Caret', '^'),
-            ('Checkmark', '\u2713'),
+            ('Checkmark', '✓'),
-            ('Copyright circle c', '\u00a9'),
+            ('Copyright circle c', '©'),
-            ('Copyright circle r', '\u00ae'),
+            ('Copyright circle r', '®'),
-            ('Copyright trademark', '\u2122'),
+            ('Copyright trademark', '™'),
-            ('Currency cent', '\u00a2'),
+            ('Currency cent', '¢'),
            ('Currency dollar', '$'),
-            ('Currency euro', '\u20ac'),
+            ('Currency euro', '€'),
-            ('Currency pound', '\u00a3'),
+            ('Currency pound', '£'),
-            ('Currency yen', '\u00a5'),
+            ('Currency yen', '¥'),
-            ('Dagger double', '\u2021'),
+            ('Dagger double', '‡'),
-            ('Dagger', '\u2020'),
+            ('Dagger', '†'),
-            ('Degree', '\u00b0'),
+            ('Degree', '°'),
-            ('Dots3', '\u2234'),
+            ('Dots3', '∴'),
            ('Hash', '#'),
-            ('Infinity', '\u221e'),
+            ('Infinity', '∞'),
-            ('Lozenge', '\u25ca'),
+            ('Lozenge', '◊'),
-            ('Math divide', '\u00f7'),
+            ('Math divide', '÷'),
-            ('Math empty', '\u2205'),
+            ('Math empty', '∅'),
            ('Math equals', '='),
-            ('Math minus', '\u2212'),
+            ('Math minus', '−'),
-            ('Math plus circled', '\u2295'),
+            ('Math plus circled', '⊕'),
-            ('Math times circled', '\u2297'),
+            ('Math times circled', '⊗'),
-            ('Math times', '\u00d7'),
+            ('Math times', '×'),
-            ('Paragraph', '\u00b6'),
+            ('Paragraph', '¶'),
            ('Percent', '%'),
-            ('Plus-or-minus', '\u00b1'),
+            ('Plus-or-minus', '±'),
            ('Plus', '+'),
            ('Punctuation colon', ':'),
            ('Punctuation colon-semi', ';'),
@ -1405,10 +1405,10 @@ class PrefixRules(GenericRulesTable):
            ('Punctuation period', '.'),
            ('Punctuation slash back', '\\'),
            ('Punctuation slash forward', '/'),
-            ('Section', '\u00a7'),
+            ('Section', '§'),
            ('Tilde', '~'),
            ('Vertical bar', '|'),
-            ('Vertical bar broken', '\u00a6'),
+            ('Vertical bar broken', '¦'),
            ('_0', '0'),
            ('_1', '1'),
            ('_2', '2'),
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -465,7 +465,7 @@ class MetadataSingleDialogBase(QDialog):
    def update_window_title(self, *args):
        title = self.title.current_val
        if len(title) > 50:
-            title = title[:50] + '\u2026'
+            title = title[:50] + '…'
        self.setWindowTitle(BASE_TITLE + ' - ' +
                title + ' -' +
                _(' [%(num)d of %(tot)d]')%dict(num=self.current_row+1,
--- a/src/calibre/gui2/store/stores/litres_plugin.py
+++ b/src/calibre/gui2/store/stores/litres_plugin.py
@ -113,7 +113,7 @@ def format_price_in_RUR(price):
    '''
    if price and re.match(r'^\d*?\.\d*?$', price):
        try:
-            price = u'{:,.2F} \u20BD'.format(float(price))  # \u20BD => руб.
+            price = u'{:,.2F} \u20bd'.format(float(price))  # \u20bd => руб.
            price = price.replace(',', ' ').replace('.', ',', 1)
        except:
            pass
--- a/src/calibre/library/catalogs/epub_mobi.py
+++ b/src/calibre/library/catalogs/epub_mobi.py
@ -156,7 +156,7 @@ class EPUB_MOBI(CatalogPlugin):
                                 "Default: '%default'\n"
                                 "Applies to: AZW3, EPUB, MOBI output formats")),
                   Option('--prefix-rules',
-                          default="(('Read books','tags','+','\u2713'),('Wishlist item','tags','Wishlist','\u00d7'))",
+                          default="(('Read books','tags','+','✓'),('Wishlist item','tags','Wishlist','×'))",
                          dest='prefix_rules',
                          action=None,
                          help=_("Specifies the rules used to include prefixes indicating read books, wishlist items and other user-specified prefixes.\n"
--- a/src/calibre/utils/search_query_parser_test.py
+++ b/src/calibre/utils/search_query_parser_test.py
@ -227,7 +227,7 @@ class Tester(SearchQueryParser):
       'London : Jonathan Cape, 2005.',
       'lrf,txt'],
 259: ['My name is Red',
-       'Orhan Pamuk; translated from the Turkish by Erda\u011f G\xf6knar',
+       'Orhan Pamuk; translated from the Turkish by Erdağ G\xf6knar',
       'New York : Alfred A. Knopf, 2001.',
       'lit,lrf'],
 265: ['Harbinger', 'David Mack', 'Star Trek', 'lit,lrf'],
--- a/src/calibre/utils/terminal.py
+++ b/src/calibre/utils/terminal.py
@ -286,7 +286,7 @@ def test():
    text = [colored(t, fg=t)+'. '+colored(t, fg=t, bold=True)+'.' for t in
            ('red', 'yellow', 'green', 'white', 'cyan', 'magenta', 'blue',)]
    s.write('\n'.join(text))
-    u = '\u041c\u0438\u0445\u0430\u0438\u043b fällen'
+    u = 'Михаил fällen'
    print()
    s.write(u)
    print()
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -1183,7 +1183,7 @@ class BasicNewsRecipe(Recipe):
            from calibre.utils.cleantext import clean_xml_chars
            # Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
-            ans = clean_xml_chars(ans) + '\u2026'
+            ans = clean_xml_chars(ans) + '…'
        return ans
    def feed2index(self, f, feeds):