mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
...
This commit is contained in:
parent
1b0efe04d9
commit
2014e6520e
@ -347,19 +347,19 @@ class CSSPreProcessor(object):
|
|||||||
class HTMLPreProcessor(object):
|
class HTMLPreProcessor(object):
|
||||||
|
|
||||||
PREPROCESS = [
|
PREPROCESS = [
|
||||||
# Remove huge block of contiguous spaces as they slow down
|
# Remove huge block of contiguous spaces as they slow down
|
||||||
# the following regexes pretty badly
|
# the following regexes pretty badly
|
||||||
(re.compile(r'\s{10000,}'), lambda m: ''),
|
(re.compile(r'\s{10000,}'), lambda m: ''),
|
||||||
# Some idiotic HTML generators (Frontpage I'm looking at you)
|
# Some idiotic HTML generators (Frontpage I'm looking at you)
|
||||||
# Put all sorts of crap into <head>. This messes up lxml
|
# Put all sorts of crap into <head>. This messes up lxml
|
||||||
(re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),
|
(re.compile(r'<head[^>]*>\n*(.*?)\n*</head>', re.IGNORECASE|re.DOTALL),
|
||||||
sanitize_head),
|
sanitize_head),
|
||||||
# Convert all entities, since lxml doesn't handle them well
|
# Convert all entities, since lxml doesn't handle them well
|
||||||
(re.compile(r'&(\S+?);'), convert_entities),
|
(re.compile(r'&(\S+?);'), convert_entities),
|
||||||
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
||||||
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
|
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
|
||||||
lambda match: ''),
|
lambda match: ''),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
PDFTOHTML = [
|
PDFTOHTML = [
|
||||||
@ -636,7 +636,9 @@ class HTMLPreProcessor(object):
|
|||||||
|
|
||||||
for rule in rules + end_rules:
|
for rule in rules + end_rules:
|
||||||
try:
|
try:
|
||||||
|
print(rule[0].pattern)
|
||||||
html = rule[0].sub(rule[1], html)
|
html = rule[0].sub(rule[1], html)
|
||||||
|
print(222222222222)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if rule in user_sr_rules:
|
if rule in user_sr_rules:
|
||||||
self.log.error(
|
self.log.error(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user