mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix bug 2587: Use WayneD solution because it's cleaner.
This commit is contained in:
parent
fb92bbbf50
commit
d5bf14f1d8
@ -21,9 +21,7 @@ _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
|||||||
def sanitize_head(match):
|
def sanitize_head(match):
|
||||||
x = match.group(1)
|
x = match.group(1)
|
||||||
x = _span_pat.sub('', x)
|
x = _span_pat.sub('', x)
|
||||||
x = ('\n%s' % x) if not x.startswith('\n') else x
|
return '<head>\n%s\n</head>' % x
|
||||||
x += '\n' if not x.endswith('\n') else ''
|
|
||||||
return '<head>%s</head>' % x
|
|
||||||
|
|
||||||
def chap_head(match):
|
def chap_head(match):
|
||||||
chap = match.group('chap')
|
chap = match.group('chap')
|
||||||
@ -86,7 +84,7 @@ class HTMLPreProcessor(object):
|
|||||||
PREPROCESS = [
|
PREPROCESS = [
|
||||||
# Some idiotic HTML generators (Frontpage I'm looking at you)
|
# Some idiotic HTML generators (Frontpage I'm looking at you)
|
||||||
# Put all sorts of crap into <head>. This messes up lxml
|
# Put all sorts of crap into <head>. This messes up lxml
|
||||||
(re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL),
|
(re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),
|
||||||
sanitize_head),
|
sanitize_head),
|
||||||
# Convert all entities, since lxml doesn't handle them well
|
# Convert all entities, since lxml doesn't handle them well
|
||||||
(re.compile(r'&(\S+?);'), convert_entities),
|
(re.compile(r'&(\S+?);'), convert_entities),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user