diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index f7b803974f..256bcce6fc 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -166,6 +166,17 @@ class HTMLPreProcessor(object): (re.compile(u'`\s*()*\s*O', re.UNICODE), lambda match: u'Ò'), (re.compile(u'`\s*()*\s*u', re.UNICODE), lambda match: u'ù'), (re.compile(u'`\s*()*\s*U', re.UNICODE), lambda match: u'Ù'), + # ` with letter before + (re.compile(u'a\s*()*\s*`', re.UNICODE), lambda match: u'à'), + (re.compile(u'A\s*()*\s*`', re.UNICODE), lambda match: u'À'), + (re.compile(u'e\s*()*\s*`', re.UNICODE), lambda match: u'è'), + (re.compile(u'E\s*()*\s*`', re.UNICODE), lambda match: u'È'), + (re.compile(u'i\s*()*\s*`', re.UNICODE), lambda match: u'ì'), + (re.compile(u'I\s*()*\s*`', re.UNICODE), lambda match: u'Ì'), + (re.compile(u'o\s*()*\s*`', re.UNICODE), lambda match: u'ò'), + (re.compile(u'O\s*()*\s*`', re.UNICODE), lambda match: u'Ò'), + (re.compile(u'u\s*()*\s*`', re.UNICODE), lambda match: u'ù'), + (re.compile(u'U\s*()*\s*`', re.UNICODE), lambda match: u'Ù'), # ´ (re.compile(u'´\s*()*\s*a', re.UNICODE), lambda match: u'á'), diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 166695ff5c..b0fc15197a 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -207,6 +207,7 @@ class PML_HTMLizer(object): while html != old: old = html html = self.cleanup_html_remove_redundant(html) + html = re.sub(r'(?imu)^\s*', '', html) return html def cleanup_html_remove_redundant(self, html): @@ -216,7 +217,7 @@ class PML_HTMLizer(object): html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html) else: html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html) - html = re.sub(r'

\s*

', '', html) + html = re.sub(r'(?imu)

\s*

', '', html) return html def start_line(self): @@ -556,7 +557,7 @@ class PML_HTMLizer(object): text = t else: self.toc.add_item(os.path.basename(self.file_name), id, value) - text = '%s' % (id, t) + text = '%s' % (t, id) elif c == 'm': empty = False src = self.code_value(line) diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index a12e8a0761..dac1e34df7 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -77,7 +77,7 @@ def separate_paragraphs_print_formatted(txt): def preserve_spaces(txt): txt = txt.replace(' ', ' ') - txt = txt.replace('\t', ' ') + txt = txt.replace('\t', '    ') return txt def opf_writer(path, opf_name, manifest, spine, mi):