mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix inefficient regex that slows down a lot with certain input. Fixes #1951979 [Private bug](https://bugs.launchpad.net/calibre/+bug/1951979)
This commit is contained in:
parent
1e12201376
commit
235b7e38c1
@ -46,7 +46,7 @@ _ligpat = re.compile('|'.join(LIGATURES))
|
||||
|
||||
|
||||
def sanitize_head(match):
|
||||
x = match.group(1)
|
||||
x = match.group(1).strip()
|
||||
x = _span_pat.sub('', x)
|
||||
return '<head>\n%s\n</head>' % x
|
||||
|
||||
@ -380,8 +380,7 @@ def html_preprocess_rules():
|
||||
(re.compile(r'\s{10000,}'), ''),
|
||||
# Some idiotic HTML generators (Frontpage I'm looking at you)
|
||||
# Put all sorts of crap into <head>. This messes up lxml
|
||||
(re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),
|
||||
sanitize_head),
|
||||
(re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL), sanitize_head),
|
||||
# Convert all entities, since lxml doesn't handle them well
|
||||
(re.compile(r'&(\S+?);'), convert_entities),
|
||||
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
||||
|
Loading…
x
Reference in New Issue
Block a user