diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index a2ec2912cb..e72e15c3d9 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -17,6 +17,8 @@ convert_entities = functools.partial(entity_to_unicode, result_exceptions = { u'<' : '<', u'>' : '>', + u"'" : ''', + u'"' : '"', u'&' : '&', }) _span_pat = re.compile('', re.DOTALL|re.IGNORECASE) @@ -349,7 +351,7 @@ class HTMLPreProcessor(object): # print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: