From 86e68579f32972a2424771a7f3e84d046d630283 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 11 Sep 2010 08:39:40 -0400 Subject: [PATCH] PDF Input: Fix bug #6734, add additional matching for unicode characters. --- src/calibre/ebooks/conversion/preprocess.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index f7b803974f..256bcce6fc 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -166,6 +166,17 @@ class HTMLPreProcessor(object): (re.compile(u'`\s*()*\s*O', re.UNICODE), lambda match: u'Ò'), (re.compile(u'`\s*()*\s*u', re.UNICODE), lambda match: u'ù'), (re.compile(u'`\s*()*\s*U', re.UNICODE), lambda match: u'Ù'), + # ` with letter before + (re.compile(u'a\s*()*\s*`', re.UNICODE), lambda match: u'à'), + (re.compile(u'A\s*()*\s*`', re.UNICODE), lambda match: u'À'), + (re.compile(u'e\s*()*\s*`', re.UNICODE), lambda match: u'è'), + (re.compile(u'E\s*()*\s*`', re.UNICODE), lambda match: u'È'), + (re.compile(u'i\s*()*\s*`', re.UNICODE), lambda match: u'ì'), + (re.compile(u'I\s*()*\s*`', re.UNICODE), lambda match: u'Ì'), + (re.compile(u'o\s*()*\s*`', re.UNICODE), lambda match: u'ò'), + (re.compile(u'O\s*()*\s*`', re.UNICODE), lambda match: u'Ò'), + (re.compile(u'u\s*()*\s*`', re.UNICODE), lambda match: u'ù'), + (re.compile(u'U\s*()*\s*`', re.UNICODE), lambda match: u'Ù'), # ´ (re.compile(u'´\s*()*\s*a', re.UNICODE), lambda match: u'á'),