From 569b84e1cb940326f90ddeadde48b066c19ad5bd Mon Sep 17 00:00:00 2001 From: ldolse Date: Thu, 16 Sep 2010 16:44:28 +0800 Subject: [PATCH] Revert previous changes, now looking for entities in unwrapping rule --- src/calibre/ebooks/conversion/preprocess.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index a2ec2912cb..e72e15c3d9 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -17,6 +17,8 @@ convert_entities = functools.partial(entity_to_unicode, result_exceptions = { u'<' : '<', u'>' : '>', + u"'" : ''', + u'"' : '"', u'&' : '&', }) _span_pat = re.compile('', re.DOTALL|re.IGNORECASE) @@ -349,7 +351,7 @@ class HTMLPreProcessor(object): # print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?)?\s*(\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: