mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Output: When using preserve spaces, output tab characters as a sequence of four non-breaking spaces as some readers dont handle the \x09 char code. Fix #6770 (Problem converting pmlz to epub). PDF Input: More unicode character matching.
This commit is contained in:
commit
1e77e6538f
@ -166,6 +166,17 @@ class HTMLPreProcessor(object):
|
||||
(re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
|
||||
(re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
|
||||
(re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
|
||||
# ` with letter before
|
||||
(re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'),
|
||||
(re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'),
|
||||
(re.compile(u'e\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'è'),
|
||||
(re.compile(u'E\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'È'),
|
||||
(re.compile(u'i\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ì'),
|
||||
(re.compile(u'I\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ì'),
|
||||
(re.compile(u'o\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ò'),
|
||||
(re.compile(u'O\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ò'),
|
||||
(re.compile(u'u\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ù'),
|
||||
(re.compile(u'U\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ù'),
|
||||
|
||||
# ´
|
||||
(re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),
|
||||
|
@ -207,6 +207,7 @@ class PML_HTMLizer(object):
|
||||
while html != old:
|
||||
old = html
|
||||
html = self.cleanup_html_remove_redundant(html)
|
||||
html = re.sub(r'(?imu)^\s*', '', html)
|
||||
return html
|
||||
|
||||
def cleanup_html_remove_redundant(self, html):
|
||||
@ -216,7 +217,7 @@ class PML_HTMLizer(object):
|
||||
html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
|
||||
else:
|
||||
html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
|
||||
html = re.sub(r'<p>\s*</p>', '', html)
|
||||
html = re.sub(r'(?imu)<p>\s*</p>', '', html)
|
||||
return html
|
||||
|
||||
def start_line(self):
|
||||
@ -556,7 +557,7 @@ class PML_HTMLizer(object):
|
||||
text = t
|
||||
else:
|
||||
self.toc.add_item(os.path.basename(self.file_name), id, value)
|
||||
text = '<span id="%s"></span>%s' % (id, t)
|
||||
text = '%s<span id="%s"></span>' % (t, id)
|
||||
elif c == 'm':
|
||||
empty = False
|
||||
src = self.code_value(line)
|
||||
|
@ -77,7 +77,7 @@ def separate_paragraphs_print_formatted(txt):
|
||||
|
||||
def preserve_spaces(txt):
|
||||
txt = txt.replace(' ', ' ')
|
||||
txt = txt.replace('\t', '	')
|
||||
txt = txt.replace('\t', ' ')
|
||||
return txt
|
||||
|
||||
def opf_writer(path, opf_name, manifest, spine, mi):
|
||||
|
Loading…
x
Reference in New Issue
Block a user