mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
decode unicode-escape (extra-edit)
This commit is contained in:
parent
12cb8b2e58
commit
534293eabc
@ -299,8 +299,8 @@ class iPadOutput(OutputProfile):
|
||||
}
|
||||
]
|
||||
|
||||
ratings_char = '\u2605' # filled star
|
||||
empty_ratings_char = '\u2606' # hollow star
|
||||
ratings_char = '★' # filled star
|
||||
empty_ratings_char = '☆' # hollow star
|
||||
|
||||
touchscreen = True
|
||||
# touchscreen_news_css {{{
|
||||
@ -677,8 +677,8 @@ class KindleOutput(OutputProfile):
|
||||
supports_mobi_indexing = True
|
||||
periodical_date_in_title = False
|
||||
|
||||
empty_ratings_char = '\u2606'
|
||||
ratings_char = '\u2605'
|
||||
empty_ratings_char = '☆'
|
||||
ratings_char = '★'
|
||||
|
||||
mobi_ems_per_blockquote = 2.0
|
||||
|
||||
@ -696,8 +696,8 @@ class KindleDXOutput(OutputProfile):
|
||||
# comic_screen_size = (741, 1022)
|
||||
supports_mobi_indexing = True
|
||||
periodical_date_in_title = False
|
||||
empty_ratings_char = '\u2606'
|
||||
ratings_char = '\u2605'
|
||||
empty_ratings_char = '☆'
|
||||
ratings_char = '★'
|
||||
mobi_ems_per_blockquote = 2.0
|
||||
|
||||
|
||||
|
@ -77,7 +77,7 @@ def _get_comments(soup):
|
||||
pages = (_metadata_from_span(soup, 'pages') or _metadata_from_table(soup, 'pages'))
|
||||
try:
|
||||
# date span can have copyright symbols in it...
|
||||
date = date.replace('\u00a9', '').strip()
|
||||
date = date.replace('©', '').strip()
|
||||
# and pages often comes as '(\d+ pages)'
|
||||
pages = re.search(r'\d+', pages).group(0)
|
||||
return f'Published {date}, {pages} pages.'
|
||||
|
@ -19,20 +19,20 @@ XLINK_NS = 'http://www.w3.org/1999/xlink'
|
||||
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
||||
|
||||
LIGATURES = {
|
||||
# '\u00c6': 'AE',
|
||||
# '\u00e6': 'ae',
|
||||
# '\u0152': 'OE',
|
||||
# '\u0153': 'oe',
|
||||
# '\u0132': 'IJ',
|
||||
# '\u0133': 'ij',
|
||||
# '\u1D6B': 'ue',
|
||||
'\uFB00': 'ff',
|
||||
'\uFB01': 'fi',
|
||||
'\uFB02': 'fl',
|
||||
'\uFB03': 'ffi',
|
||||
'\uFB04': 'ffl',
|
||||
'\uFB05': 'ft',
|
||||
'\uFB06': 'st',
|
||||
# 'Æ': 'AE',
|
||||
# 'æ': 'ae',
|
||||
# 'Œ': 'OE',
|
||||
# 'œ': 'oe',
|
||||
# 'IJ': 'IJ',
|
||||
# 'ij': 'ij',
|
||||
# 'ᵫ': 'ue',
|
||||
'ff': 'ff',
|
||||
'fi': 'fi',
|
||||
'fl': 'fl',
|
||||
'ffi': 'ffi',
|
||||
'ffl': 'ffl',
|
||||
'ſt': 'ft',
|
||||
'st': 'st',
|
||||
}
|
||||
|
||||
_ligpat = re.compile('|'.join(LIGATURES))
|
||||
@ -240,7 +240,7 @@ class Dehyphenator:
|
||||
else:
|
||||
if self.verbose > 2:
|
||||
self.log(' Cleanup:returning original text '+firsthalf+' + linefeed '+secondhalf)
|
||||
return firsthalf+'\u2014'+wraptags+secondhalf
|
||||
return firsthalf+'—'+wraptags+secondhalf
|
||||
|
||||
else:
|
||||
if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6:
|
||||
@ -274,7 +274,7 @@ class Dehyphenator:
|
||||
r'</[iub]>\s*<p>\s*<[iub]>)\s*(?P<secondpart>[\w\d]+)')% length)
|
||||
elif format == 'txt':
|
||||
intextmatch = re.compile(
|
||||
'(?<=.{%i})(?P<firstpart>[^\\W\\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\\w\\d]+)'% length)
|
||||
'(?<=.{%i})(?P<firstpart>[^\\W\\-]+)(-|‐)( |\t)*(?P<wraptags>(\n( |\t)*)+)(?P<secondpart>[\\w\\d]+)'% length)
|
||||
elif format == 'individual_words':
|
||||
intextmatch = re.compile(
|
||||
r'(?!<)(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)', re.UNICODE)
|
||||
|
@ -2194,8 +2194,8 @@ def find_tests():
|
||||
t('a&b<c', 'a&b<c')
|
||||
t('a∾̳b', 'a∾̳b')
|
||||
t('aӒb', 'aӒb')
|
||||
t('aሴb', 'a\u1234b')
|
||||
t('a􃓺b', 'a\U001034fAb')
|
||||
t('aሴb', 'aሴb')
|
||||
t('a􃓺b', 'a\U001034FAb')
|
||||
t('a�b�c', 'abc')
|
||||
x('&<>'"', '&<>'"')
|
||||
|
||||
|
@ -803,7 +803,7 @@ class HTMLConverter:
|
||||
src = src.lstrip()
|
||||
f = src[0]
|
||||
next = 1
|
||||
if f in ("'", '"', '\u201c', '\u2018', '\u201d', '\u2019'):
|
||||
if f in ("'", '"', '“', '‘', '”', '’'):
|
||||
if len(src) >= 2:
|
||||
next = 2
|
||||
f = src[:2]
|
||||
@ -819,7 +819,7 @@ class HTMLConverter:
|
||||
|
||||
def append_text(src):
|
||||
fp, key, variant = self.font_properties(css)
|
||||
for x, y in [('\xad', ''), ('\xa0', ' '), ('\ufb00', 'ff'), ('\ufb01', 'fi'), ('\ufb02', 'fl'), ('\ufb03', 'ffi'), ('\ufb04', 'ffl')]:
|
||||
for x, y in [('\xad', ''), ('\xa0', ' '), ('ff', 'ff'), ('fi', 'fi'), ('fl', 'fl'), ('ffi', 'ffi'), ('ffl', 'ffl')]:
|
||||
src = src.replace(x, y)
|
||||
|
||||
def valigner(x):
|
||||
@ -1624,7 +1624,7 @@ class HTMLConverter:
|
||||
in_ol = parent.name.lower() == 'ol'
|
||||
break
|
||||
parent = parent.parent
|
||||
prepend = str(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
|
||||
prepend = str(self.list_counter)+'. ' if in_ol else '•' + ' '
|
||||
self.current_para.append(Span(prepend))
|
||||
self.process_children(tag, tag_css, tag_pseudo_css)
|
||||
if in_ol:
|
||||
|
@ -529,11 +529,11 @@ class MobiMLizer:
|
||||
t = elem.text
|
||||
if not t:
|
||||
t = ''
|
||||
elem.text = '\u201c' + t
|
||||
elem.text = '“' + t
|
||||
t = elem.tail
|
||||
if not t:
|
||||
t = ''
|
||||
elem.tail = '\u201d' + t
|
||||
elem.tail = '”' + t
|
||||
text = None
|
||||
if elem.text:
|
||||
if istate.preserve or istate.pre_wrap:
|
||||
|
@ -362,7 +362,7 @@ class Serializer:
|
||||
text = text.replace('&', '&')
|
||||
text = text.replace('<', '<')
|
||||
text = text.replace('>', '>')
|
||||
text = text.replace('\u00AD', '') # Soft-hyphen
|
||||
text = text.replace('\u00ad', '') # Soft-hyphen
|
||||
if quot:
|
||||
text = text.replace('"', '"')
|
||||
if isinstance(text, str):
|
||||
|
@ -100,9 +100,9 @@ def adjacent_quotes(first_string, second_string):
|
||||
if fchar is not None:
|
||||
first_char = fchar.group(1) # First non-space char
|
||||
|
||||
return bool((last_char == '\u0022' and first_char == '\u0022') \
|
||||
or (last_char == '\u2019' and first_char == '\u2018') \
|
||||
or (last_char == '\u201d' and first_char == '\u201c'))
|
||||
return bool((last_char == '"' and first_char == '"') \
|
||||
or (last_char == '’' and first_char == '‘') \
|
||||
or (last_char == '”' and first_char == '“'))
|
||||
|
||||
class Font:
|
||||
|
||||
|
@ -24,9 +24,9 @@ def normalize_entities(cur_title):
|
||||
'\u2013':'-',
|
||||
'—': '-',
|
||||
'–': '-',
|
||||
'\u00A0': ' ',
|
||||
'\u00AB': '"',
|
||||
'\u00BB': '"',
|
||||
'\u00a0': ' ',
|
||||
'\u00ab': '"',
|
||||
'\u00bb': '"',
|
||||
'"': '"',
|
||||
}
|
||||
for c, r in iteritems(entities):
|
||||
|
@ -1693,7 +1693,7 @@ def elided_text(text, font=None, width=300, pos='middle'):
|
||||
font = QApplication.instance().font()
|
||||
fm = (font if isinstance(font, QFontMetrics) else QFontMetrics(font))
|
||||
delta = 4
|
||||
ellipsis = '\u2026'
|
||||
ellipsis = '…'
|
||||
|
||||
def remove_middle(x):
|
||||
mid = len(x) // 2
|
||||
|
@ -121,13 +121,13 @@ class PluginWidget(QWidget,Ui_Form):
|
||||
'name':_('Read book'),
|
||||
'field':_('Tags'),
|
||||
'pattern':'+',
|
||||
'prefix':'\u2713'},
|
||||
'prefix':'✓'},
|
||||
{'ordinal':1,
|
||||
'enabled':True,
|
||||
'name':_('Wishlist item'),
|
||||
'field':_('Tags'),
|
||||
'pattern':'Wishlist',
|
||||
'prefix':'\u00d7'},],
|
||||
'prefix':'×'},],
|
||||
['table_widget','table_widget']))
|
||||
|
||||
self.OPTION_FIELDS = option_fields
|
||||
@ -1351,52 +1351,52 @@ class PrefixRules(GenericRulesTable):
|
||||
# Create a list of prefixes for user selection
|
||||
raw_prefix_list = [
|
||||
('Ampersand', '&'),
|
||||
('Angle left double', '\u00ab'),
|
||||
('Angle left', '\u2039'),
|
||||
('Angle right double', '\u00bb'),
|
||||
('Angle right', '\u203a'),
|
||||
('Arrow carriage return', '\u21b5'),
|
||||
('Arrow double', '\u2194'),
|
||||
('Arrow down', '\u2193'),
|
||||
('Arrow left', '\u2190'),
|
||||
('Arrow right', '\u2192'),
|
||||
('Arrow up', '\u2191'),
|
||||
('Angle left double', '«'),
|
||||
('Angle left', '‹'),
|
||||
('Angle right double', '»'),
|
||||
('Angle right', '›'),
|
||||
('Arrow carriage return', '↵'),
|
||||
('Arrow double', '↔'),
|
||||
('Arrow down', '↓'),
|
||||
('Arrow left', '←'),
|
||||
('Arrow right', '→'),
|
||||
('Arrow up', '↑'),
|
||||
('Asterisk', '*'),
|
||||
('At sign', '@'),
|
||||
('Bullet smallest', '\u22c5'),
|
||||
('Bullet small', '\u00b7'),
|
||||
('Bullet', '\u2022'),
|
||||
('Cards clubs', '\u2663'),
|
||||
('Cards diamonds', '\u2666'),
|
||||
('Cards hearts', '\u2665'),
|
||||
('Cards spades', '\u2660'),
|
||||
('Bullet smallest', '⋅'),
|
||||
('Bullet small', '·'),
|
||||
('Bullet', '•'),
|
||||
('Cards clubs', '♣'),
|
||||
('Cards diamonds', '♦'),
|
||||
('Cards hearts', '♥'),
|
||||
('Cards spades', '♠'),
|
||||
('Caret', '^'),
|
||||
('Checkmark', '\u2713'),
|
||||
('Copyright circle c', '\u00a9'),
|
||||
('Copyright circle r', '\u00ae'),
|
||||
('Copyright trademark', '\u2122'),
|
||||
('Currency cent', '\u00a2'),
|
||||
('Checkmark', '✓'),
|
||||
('Copyright circle c', '©'),
|
||||
('Copyright circle r', '®'),
|
||||
('Copyright trademark', '™'),
|
||||
('Currency cent', '¢'),
|
||||
('Currency dollar', '$'),
|
||||
('Currency euro', '\u20ac'),
|
||||
('Currency pound', '\u00a3'),
|
||||
('Currency yen', '\u00a5'),
|
||||
('Dagger double', '\u2021'),
|
||||
('Dagger', '\u2020'),
|
||||
('Degree', '\u00b0'),
|
||||
('Dots3', '\u2234'),
|
||||
('Currency euro', '€'),
|
||||
('Currency pound', '£'),
|
||||
('Currency yen', '¥'),
|
||||
('Dagger double', '‡'),
|
||||
('Dagger', '†'),
|
||||
('Degree', '°'),
|
||||
('Dots3', '∴'),
|
||||
('Hash', '#'),
|
||||
('Infinity', '\u221e'),
|
||||
('Lozenge', '\u25ca'),
|
||||
('Math divide', '\u00f7'),
|
||||
('Math empty', '\u2205'),
|
||||
('Infinity', '∞'),
|
||||
('Lozenge', '◊'),
|
||||
('Math divide', '÷'),
|
||||
('Math empty', '∅'),
|
||||
('Math equals', '='),
|
||||
('Math minus', '\u2212'),
|
||||
('Math plus circled', '\u2295'),
|
||||
('Math times circled', '\u2297'),
|
||||
('Math times', '\u00d7'),
|
||||
('Paragraph', '\u00b6'),
|
||||
('Math minus', '−'),
|
||||
('Math plus circled', '⊕'),
|
||||
('Math times circled', '⊗'),
|
||||
('Math times', '×'),
|
||||
('Paragraph', '¶'),
|
||||
('Percent', '%'),
|
||||
('Plus-or-minus', '\u00b1'),
|
||||
('Plus-or-minus', '±'),
|
||||
('Plus', '+'),
|
||||
('Punctuation colon', ':'),
|
||||
('Punctuation colon-semi', ';'),
|
||||
@ -1405,10 +1405,10 @@ class PrefixRules(GenericRulesTable):
|
||||
('Punctuation period', '.'),
|
||||
('Punctuation slash back', '\\'),
|
||||
('Punctuation slash forward', '/'),
|
||||
('Section', '\u00a7'),
|
||||
('Section', '§'),
|
||||
('Tilde', '~'),
|
||||
('Vertical bar', '|'),
|
||||
('Vertical bar broken', '\u00a6'),
|
||||
('Vertical bar broken', '¦'),
|
||||
('_0', '0'),
|
||||
('_1', '1'),
|
||||
('_2', '2'),
|
||||
|
@ -465,7 +465,7 @@ class MetadataSingleDialogBase(QDialog):
|
||||
def update_window_title(self, *args):
|
||||
title = self.title.current_val
|
||||
if len(title) > 50:
|
||||
title = title[:50] + '\u2026'
|
||||
title = title[:50] + '…'
|
||||
self.setWindowTitle(BASE_TITLE + ' - ' +
|
||||
title + ' -' +
|
||||
_(' [%(num)d of %(tot)d]')%dict(num=self.current_row+1,
|
||||
|
@ -113,7 +113,7 @@ def format_price_in_RUR(price):
|
||||
'''
|
||||
if price and re.match(r'^\d*?\.\d*?$', price):
|
||||
try:
|
||||
price = u'{:,.2F} \u20BD'.format(float(price)) # \u20BD => руб.
|
||||
price = u'{:,.2F} \u20bd'.format(float(price)) # \u20bd => руб.
|
||||
price = price.replace(',', ' ').replace('.', ',', 1)
|
||||
except:
|
||||
pass
|
||||
|
@ -156,7 +156,7 @@ class EPUB_MOBI(CatalogPlugin):
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--prefix-rules',
|
||||
default="(('Read books','tags','+','\u2713'),('Wishlist item','tags','Wishlist','\u00d7'))",
|
||||
default="(('Read books','tags','+','✓'),('Wishlist item','tags','Wishlist','×'))",
|
||||
dest='prefix_rules',
|
||||
action=None,
|
||||
help=_("Specifies the rules used to include prefixes indicating read books, wishlist items and other user-specified prefixes.\n"
|
||||
|
@ -227,7 +227,7 @@ class Tester(SearchQueryParser):
|
||||
'London : Jonathan Cape, 2005.',
|
||||
'lrf,txt'],
|
||||
259: ['My name is Red',
|
||||
'Orhan Pamuk; translated from the Turkish by Erda\u011f G\xf6knar',
|
||||
'Orhan Pamuk; translated from the Turkish by Erdağ G\xf6knar',
|
||||
'New York : Alfred A. Knopf, 2001.',
|
||||
'lit,lrf'],
|
||||
265: ['Harbinger', 'David Mack', 'Star Trek', 'lit,lrf'],
|
||||
|
@ -286,7 +286,7 @@ def test():
|
||||
text = [colored(t, fg=t)+'. '+colored(t, fg=t, bold=True)+'.' for t in
|
||||
('red', 'yellow', 'green', 'white', 'cyan', 'magenta', 'blue',)]
|
||||
s.write('\n'.join(text))
|
||||
u = '\u041c\u0438\u0445\u0430\u0438\u043b fällen'
|
||||
u = 'Михаил fällen'
|
||||
print()
|
||||
s.write(u)
|
||||
print()
|
||||
|
@ -1183,7 +1183,7 @@ class BasicNewsRecipe(Recipe):
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
|
||||
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
||||
ans = clean_xml_chars(ans) + '\u2026'
|
||||
ans = clean_xml_chars(ans) + '…'
|
||||
return ans
|
||||
|
||||
def feed2index(self, f, feeds):
|
||||
|
Loading…
x
Reference in New Issue
Block a user