diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
index 2b5eb5011e..ba0cd187e4 100644
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@@ -61,6 +61,7 @@ class SonyReaderInput(InputProfile):
dpi = 168.451
fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
+ #unsupported_unicode_chars = [\u2018, \u2019, \u201a, \u201b, \u201c, \u201d, \u201e, \u201f]
class SonyReader300Input(SonyReaderInput):
@@ -250,6 +251,9 @@ class OutputProfile(Plugin):
#: The character used to represent a star in ratings
ratings_char = u'*'
+
+ #: Unsupported unicode characters to be replaced during preprocessing
+ unsupported_unicode_chars = []
@classmethod
def tags_to_string(cls, tags):
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 0b981cf6f7..b4815cb35e 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -182,8 +182,10 @@ class Dehyphenator(object):
lookupword = self.removesuffixes.sub('', dehyphenated)
if self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword)
- booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
+ # escape any meta-characters which may be in the lookup word
+ lookupword = re.sub(r'(?P[\[\]\\\^\$\.\|\?\*\+\(\)])', r'\\\g', lookupword)
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
+ booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
if self.format == 'html_cleanup':
match = booklookup.search(self.html)
hyphenmatch = re.search(u'%s' % hyphenated, self.html)