mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Escaping meta-characters before compiling words as a regex for removing hyphens
This commit is contained in:
parent
a0a984c5b0
commit
394f09e7f4
@ -61,6 +61,7 @@ class SonyReaderInput(InputProfile):
|
|||||||
dpi = 168.451
|
dpi = 168.451
|
||||||
fbase = 12
|
fbase = 12
|
||||||
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
|
||||||
|
#unsupported_unicode_chars = [\u2018, \u2019, \u201a, \u201b, \u201c, \u201d, \u201e, \u201f]
|
||||||
|
|
||||||
class SonyReader300Input(SonyReaderInput):
|
class SonyReader300Input(SonyReaderInput):
|
||||||
|
|
||||||
@ -251,6 +252,9 @@ class OutputProfile(Plugin):
|
|||||||
#: The character used to represent a star in ratings
|
#: The character used to represent a star in ratings
|
||||||
ratings_char = u'*'
|
ratings_char = u'*'
|
||||||
|
|
||||||
|
#: Unsupported unicode characters to be replaced during preprocessing
|
||||||
|
unsupported_unicode_chars = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def tags_to_string(cls, tags):
|
def tags_to_string(cls, tags):
|
||||||
return escape(', '.join(tags))
|
return escape(', '.join(tags))
|
||||||
|
@ -182,8 +182,10 @@ class Dehyphenator(object):
|
|||||||
lookupword = self.removesuffixes.sub('', dehyphenated)
|
lookupword = self.removesuffixes.sub('', dehyphenated)
|
||||||
if self.prefixes.match(firsthalf) is None:
|
if self.prefixes.match(firsthalf) is None:
|
||||||
lookupword = self.removeprefix.sub('', lookupword)
|
lookupword = self.removeprefix.sub('', lookupword)
|
||||||
booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
|
# escape any meta-characters which may be in the lookup word
|
||||||
|
lookupword = re.sub(r'(?P<meta>[\[\]\\\^\$\.\|\?\*\+\(\)])', r'\\\g<meta>', lookupword)
|
||||||
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
|
#print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
|
||||||
|
booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
|
||||||
if self.format == 'html_cleanup':
|
if self.format == 'html_cleanup':
|
||||||
match = booklookup.search(self.html)
|
match = booklookup.search(self.html)
|
||||||
hyphenmatch = re.search(u'%s' % hyphenated, self.html)
|
hyphenmatch = re.search(u'%s' % hyphenated, self.html)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user