diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 2b5eb5011e..ba0cd187e4 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -61,6 +61,7 @@ class SonyReaderInput(InputProfile): dpi = 168.451 fbase = 12 fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24] + #unsupported_unicode_chars = [\u2018, \u2019, \u201a, \u201b, \u201c, \u201d, \u201e, \u201f] class SonyReader300Input(SonyReaderInput): @@ -250,6 +251,9 @@ class OutputProfile(Plugin): #: The character used to represent a star in ratings ratings_char = u'*' + + #: Unsupported unicode characters to be replaced during preprocessing + unsupported_unicode_chars = [] @classmethod def tags_to_string(cls, tags): diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 0b981cf6f7..b4815cb35e 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -182,8 +182,10 @@ class Dehyphenator(object): lookupword = self.removesuffixes.sub('', dehyphenated) if self.prefixes.match(firsthalf) is None: lookupword = self.removeprefix.sub('', lookupword) - booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE) + # escape any meta-characters which may be in the lookup word + lookupword = re.sub(r'(?P[\[\]\\\^\$\.\|\?\*\+\(\)])', r'\\\g', lookupword) #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated) + booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE) if self.format == 'html_cleanup': match = booklookup.search(self.html) hyphenmatch = re.search(u'%s' % hyphenated, self.html)