mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
EPUB Output: Fix regression in handling of comments that would occassionally cause content in the comments to leak into the text
This commit is contained in:
parent
29d842db3e
commit
8e248482aa
@ -205,9 +205,8 @@ class HTMLProcessor(Processor, Rationalizer):
|
|||||||
def save(self):
|
def save(self):
|
||||||
for meta in list(self.root.xpath('//meta')):
|
for meta in list(self.root.xpath('//meta')):
|
||||||
meta.getparent().remove(meta)
|
meta.getparent().remove(meta)
|
||||||
#for img in self.root.xpath('//img[@src]'):
|
# Strip all comments since Adobe DE is petrified of them
|
||||||
# self.convert_image(img)
|
Processor.save(self, strip_comments=True)
|
||||||
Processor.save(self)
|
|
||||||
|
|
||||||
def remove_first_image(self):
|
def remove_first_image(self):
|
||||||
images = self.root.xpath('//img')
|
images = self.root.xpath('//img')
|
||||||
|
@ -332,8 +332,6 @@ class PreProcessor(object):
|
|||||||
(re.compile(r'&(\S+?);'), convert_entities),
|
(re.compile(r'&(\S+?);'), convert_entities),
|
||||||
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
||||||
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
|
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
|
||||||
# Strip all comments since Adobe DE is petrified of them
|
|
||||||
(re.compile(r'<!--[^>]*>'), lambda match : ''),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
@ -447,7 +445,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
def save_path(self):
|
def save_path(self):
|
||||||
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
||||||
|
|
||||||
def save(self):
|
def save(self, strip_comments=False):
|
||||||
'''
|
'''
|
||||||
Save processed HTML into the content directory.
|
Save processed HTML into the content directory.
|
||||||
Should be called after all HTML processing is finished.
|
Should be called after all HTML processing is finished.
|
||||||
@ -458,7 +456,11 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
svg.set('xmlns', 'http://www.w3.org/2000/svg')
|
svg.set('xmlns', 'http://www.w3.org/2000/svg')
|
||||||
|
|
||||||
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
||||||
ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
|
ans = re.compile(r'<head>', re.IGNORECASE).sub(
|
||||||
|
'<head>\n\t<meta http-equiv="Content-Type" '
|
||||||
|
'content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
|
||||||
|
if strip_comments:
|
||||||
|
ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
|
||||||
with open(self.save_path(), 'wb') as f:
|
with open(self.save_path(), 'wb') as f:
|
||||||
f.write(ans)
|
f.write(ans)
|
||||||
return f.name
|
return f.name
|
||||||
@ -594,7 +596,7 @@ class Processor(Parser):
|
|||||||
mark = etree.Element('hr', style=page_break_before)
|
mark = etree.Element('hr', style=page_break_before)
|
||||||
elem.addprevious(mark)
|
elem.addprevious(mark)
|
||||||
|
|
||||||
def save(self):
|
def save(self, strip_comments=False):
|
||||||
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
|
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
|
||||||
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
|
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
|
||||||
if sheet is not None:
|
if sheet is not None:
|
||||||
@ -608,7 +610,7 @@ class Processor(Parser):
|
|||||||
if isinstance(raw, unicode):
|
if isinstance(raw, unicode):
|
||||||
raw = raw.encode('utf-8')
|
raw = raw.encode('utf-8')
|
||||||
open(path, 'wb').write(raw)
|
open(path, 'wb').write(raw)
|
||||||
return Parser.save(self)
|
return Parser.save(self, strip_comments=strip_comments)
|
||||||
|
|
||||||
def populate_toc(self, toc):
|
def populate_toc(self, toc):
|
||||||
'''
|
'''
|
||||||
|
Loading…
x
Reference in New Issue
Block a user