mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Do not use BeautifulSoup to postprocess the jacket
Avoids an extra parse
This commit is contained in:
parent
8813a31a38
commit
a3c0ce3b24
@ -15,7 +15,6 @@ from lxml import etree
|
|||||||
|
|
||||||
from calibre import guess_type, strftime
|
from calibre import guess_type, strftime
|
||||||
from calibre.constants import iswindows
|
from calibre.constants import iswindows
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urldefrag, urlnormalize
|
from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urldefrag, urlnormalize
|
||||||
from calibre.library.comments import comments_to_html, markdown
|
from calibre.library.comments import comments_to_html, markdown
|
||||||
from calibre.utils.date import is_date_undefined, as_local_time
|
from calibre.utils.date import is_date_undefined, as_local_time
|
||||||
@ -205,6 +204,34 @@ class Tags(unicode_type):
|
|||||||
return t
|
return t
|
||||||
|
|
||||||
|
|
||||||
|
def postprocess_jacket(root, output_profile, has_data):
|
||||||
|
# Post-process the generated html to strip out empty header items
|
||||||
|
|
||||||
|
def extract(tag):
|
||||||
|
parent = tag.getparent()
|
||||||
|
idx = parent.index(tag)
|
||||||
|
parent.remove(tag)
|
||||||
|
if tag.tail:
|
||||||
|
if idx == 0:
|
||||||
|
parent.text = (parent.text or '') + tag.tail
|
||||||
|
else:
|
||||||
|
if idx >= len(parent):
|
||||||
|
idx = -1
|
||||||
|
parent[-1].tail = (parent[-1].tail or '') + tag.tail
|
||||||
|
|
||||||
|
def extract_class(cls):
|
||||||
|
for tag in root.xpath('//*[@class="_"]'.replace('_', cls)):
|
||||||
|
extract(tag)
|
||||||
|
|
||||||
|
for key in 'series rating tags'.split():
|
||||||
|
if not has_data[key]:
|
||||||
|
extract_class('cbj_' + key)
|
||||||
|
if not has_data['pubdate']:
|
||||||
|
extract_class('cbj_pubdata')
|
||||||
|
if output_profile.short_name != 'kindle':
|
||||||
|
extract_class('cbj_kindle_banner_hr')
|
||||||
|
|
||||||
|
|
||||||
def render_jacket(mi, output_profile,
|
def render_jacket(mi, output_profile,
|
||||||
alt_title=_('Unknown'), alt_tags=[], alt_comments='',
|
alt_title=_('Unknown'), alt_tags=[], alt_comments='',
|
||||||
alt_publisher='', rescale_fonts=False, alt_authors=None):
|
alt_publisher='', rescale_fonts=False, alt_authors=None):
|
||||||
@ -256,6 +283,7 @@ def render_jacket(mi, output_profile,
|
|||||||
author = ''
|
author = ''
|
||||||
mi.authors = orig
|
mi.authors = orig
|
||||||
author = escape(author)
|
author = escape(author)
|
||||||
|
has_data = {}
|
||||||
|
|
||||||
def generate_html(comments):
|
def generate_html(comments):
|
||||||
args = dict(xmlns=XHTML_NS,
|
args = dict(xmlns=XHTML_NS,
|
||||||
@ -315,32 +343,12 @@ def render_jacket(mi, output_profile,
|
|||||||
|
|
||||||
formatter = SafeFormatter()
|
formatter = SafeFormatter()
|
||||||
generated_html = formatter.format(template, **args)
|
generated_html = formatter.format(template, **args)
|
||||||
|
has_data['series'] = bool(series)
|
||||||
|
has_data['tags'] = bool(tags)
|
||||||
|
has_data['rating'] = bool(rating)
|
||||||
|
has_data['pubdate'] = bool(pubdate)
|
||||||
|
|
||||||
# Post-process the generated html to strip out empty header items
|
return strip_encoding_declarations(generated_html)
|
||||||
|
|
||||||
soup = BeautifulSoup(generated_html)
|
|
||||||
if not series:
|
|
||||||
series_tag = soup.find(attrs={'class':'cbj_series'})
|
|
||||||
if series_tag is not None:
|
|
||||||
series_tag.extract()
|
|
||||||
if not rating:
|
|
||||||
rating_tag = soup.find(attrs={'class':'cbj_rating'})
|
|
||||||
if rating_tag is not None:
|
|
||||||
rating_tag.extract()
|
|
||||||
if not tags:
|
|
||||||
tags_tag = soup.find(attrs={'class':'cbj_tags'})
|
|
||||||
if tags_tag is not None:
|
|
||||||
tags_tag.extract()
|
|
||||||
if not pubdate:
|
|
||||||
pubdate_tag = soup.find(attrs={'class':'cbj_pubdata'})
|
|
||||||
if pubdate_tag is not None:
|
|
||||||
pubdate_tag.extract()
|
|
||||||
if output_profile.short_name != 'kindle':
|
|
||||||
hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})
|
|
||||||
if hr_tag is not None:
|
|
||||||
hr_tag.extract()
|
|
||||||
|
|
||||||
return strip_encoding_declarations(soup.decode_contents())
|
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
from calibre.ebooks.oeb.base import RECOVER_PARSER
|
||||||
|
|
||||||
@ -369,6 +377,7 @@ def render_jacket(mi, output_profile,
|
|||||||
for child in body:
|
for child in body:
|
||||||
fw.append(child)
|
fw.append(child)
|
||||||
body.append(fw)
|
body.append(fw)
|
||||||
|
postprocess_jacket(root, output_profile, has_data)
|
||||||
from calibre.ebooks.oeb.polish.pretty import pretty_html_tree
|
from calibre.ebooks.oeb.polish.pretty import pretty_html_tree
|
||||||
pretty_html_tree(None, root)
|
pretty_html_tree(None, root)
|
||||||
return root
|
return root
|
||||||
|
Loading…
x
Reference in New Issue
Block a user