mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Conversion pipeline: Discard broken CSS generated by Microsoft products before parsing. Should improve performance when converting HTML from Word in particular.
This commit is contained in:
parent
b43c7d45a3
commit
7ec5194d9b
@ -264,10 +264,16 @@ class Dehyphenator(object):
|
||||
class CSSPreProcessor(object):
|
||||
|
||||
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
|
||||
# Remove some of the broken CSS Microsoft products
|
||||
# create
|
||||
MS_PAT = re.compile(r'^\s*(mso-|panose-).+?$',
|
||||
re.MULTILINE|re.IGNORECASE)
|
||||
|
||||
def __call__(self, data, add_namespace=False):
|
||||
from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
|
||||
data = self.PAGE_PAT.sub('', data)
|
||||
if '\n' in data:
|
||||
data = self.MS_PAT.sub('', data)
|
||||
if not add_namespace:
|
||||
return data
|
||||
ans, namespaced = [], False
|
||||
|
Loading…
x
Reference in New Issue
Block a user