TXT Output: When using preserve spaces, output tab characters as a sequence of four non-breaking spaces as some readers dont handle the \x09 char code. Fix #6770 (Problem converting pmlz to epub). PDF Input: More unicode character matching.

This commit is contained in:
Kovid Goyal 2010-09-11 11:58:32 -06:00
commit 1e77e6538f
3 changed files with 15 additions and 3 deletions

View File

@ -166,6 +166,17 @@ class HTMLPreProcessor(object):
(re.compile(u'`\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ò'),
(re.compile(u'`\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ù'),
(re.compile(u'`\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ù'),
# ` with letter before
(re.compile(u'a\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'à'),
(re.compile(u'A\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'À'),
(re.compile(u'e\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'è'),
(re.compile(u'E\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'È'),
(re.compile(u'i\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ì'),
(re.compile(u'I\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ì'),
(re.compile(u'o\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ò'),
(re.compile(u'O\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ò'),
(re.compile(u'u\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'ù'),
(re.compile(u'U\s*(<br.*?>)*\s*`', re.UNICODE), lambda match: u'Ù'),
# ´
(re.compile(u'´\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'á'),

View File

@ -207,6 +207,7 @@ class PML_HTMLizer(object):
while html != old:
old = html
html = self.cleanup_html_remove_redundant(html)
html = re.sub(r'(?imu)^\s*', '', html)
return html
def cleanup_html_remove_redundant(self, html):
@ -216,7 +217,7 @@ class PML_HTMLizer(object):
html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
else:
html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
html = re.sub(r'<p>\s*</p>', '', html)
html = re.sub(r'(?imu)<p>\s*</p>', '', html)
return html
def start_line(self):
@ -556,7 +557,7 @@ class PML_HTMLizer(object):
text = t
else:
self.toc.add_item(os.path.basename(self.file_name), id, value)
text = '<span id="%s"></span>%s' % (id, t)
text = '%s<span id="%s"></span>' % (t, id)
elif c == 'm':
empty = False
src = self.code_value(line)

View File

@ -77,7 +77,7 @@ def separate_paragraphs_print_formatted(txt):
def preserve_spaces(txt):
txt = txt.replace(' ', '&nbsp;')
txt = txt.replace('\t', '&#09;')
txt = txt.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;')
return txt
def opf_writer(path, opf_name, manifest, spine, mi):