mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Revert previous changes, now looking for entities in unwrapping rule
This commit is contained in:
parent
66b443adc5
commit
569b84e1cb
@ -17,6 +17,8 @@ convert_entities = functools.partial(entity_to_unicode,
|
|||||||
result_exceptions = {
|
result_exceptions = {
|
||||||
u'<' : '<',
|
u'<' : '<',
|
||||||
u'>' : '>',
|
u'>' : '>',
|
||||||
|
u"'" : ''',
|
||||||
|
u'"' : '"',
|
||||||
u'&' : '&',
|
u'&' : '&',
|
||||||
})
|
})
|
||||||
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
||||||
@ -349,7 +351,7 @@ class HTMLPreProcessor(object):
|
|||||||
# print "The pdf line length returned is " + str(length)
|
# print "The pdf line length returned is " + str(length)
|
||||||
end_rules.append(
|
end_rules.append(
|
||||||
# Un wrap using punctuation
|
# Un wrap using punctuation
|
||||||
(re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
(re.compile(r'(?<=.{%i}([a-z,:)\-IA]|(?<!\&\w{4});))\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
|
||||||
)
|
)
|
||||||
|
|
||||||
for rule in self.PREPROCESS + start_rules:
|
for rule in self.PREPROCESS + start_rules:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user