mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix handling of reflowable covers in EPUB input. Also try to extract svg embedded raster covers. Misc. minor fixes
This commit is contained in:
parent
a20d9fb169
commit
d0e1fa2d90
@ -361,6 +361,8 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
|
||||
return '&'+ent+';'
|
||||
if ent == 'apos':
|
||||
return "'"
|
||||
if ent == 'hellips':
|
||||
ent = 'hellip'
|
||||
if ent.startswith(u'#x'):
|
||||
num = int(ent[2:], 16)
|
||||
if encoding is None or num > 255:
|
||||
|
@ -57,6 +57,35 @@ class HTMLRenderer(object):
|
||||
self.loop.exit(0)
|
||||
|
||||
|
||||
def extract_cover_from_embedded_svg(html, base, log):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import XPath, SVG, XLINK
|
||||
root = etree.fromstring(html)
|
||||
|
||||
svg = XPath('//svg:svg')(root)
|
||||
if len(svg) == 1 and len(svg[0]) == 1 and svg[0][0].tag == SVG('image'):
|
||||
image = svg[0][0]
|
||||
href = image.get(XLINK('href'), None)
|
||||
path = os.path.join(base, *href.split('/'))
|
||||
if href and os.access(path, os.R_OK):
|
||||
return open(path, 'rb').read()
|
||||
|
||||
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||
from calibre.ebooks.oeb.base import SVG_NS
|
||||
raw = open(path_to_html, 'rb').read()
|
||||
data = None
|
||||
if SVG_NS in raw:
|
||||
try:
|
||||
data = extract_cover_from_embedded_svg(raw,
|
||||
os.path.dirname(path_to_html), log)
|
||||
except:
|
||||
pass
|
||||
if data is None:
|
||||
renderer = render_html(path_to_html, width, height)
|
||||
data = getattr(renderer, 'data', None)
|
||||
return data
|
||||
|
||||
|
||||
def render_html(path_to_html, width=590, height=750):
|
||||
from PyQt4.QtWebKit import QWebPage
|
||||
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
|
||||
|
@ -54,7 +54,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def rationalize_cover(self, opf):
|
||||
def rationalize_cover(self, opf, log):
|
||||
guide_cover, guide_elem = None, None
|
||||
for guide_elem in opf.iterguide():
|
||||
if guide_elem.get('type', '').lower() == 'cover':
|
||||
@ -65,28 +65,37 @@ class EPUBInput(InputFormatPlugin):
|
||||
spine = list(opf.iterspine())
|
||||
if not spine:
|
||||
return
|
||||
# Check if the cover specified in the guide is also
|
||||
# the first element in spine
|
||||
idref = spine[0].get('idref', '')
|
||||
manifest = list(opf.itermanifest())
|
||||
if not manifest:
|
||||
return
|
||||
if manifest[0].get('id', False) != idref:
|
||||
elem = [x for x in manifest if x.get('id', '') == idref]
|
||||
if not elem or elem[0].get('href', None) != guide_cover:
|
||||
return
|
||||
log('Found HTML cover', guide_cover)
|
||||
|
||||
# Remove from spine as covers must be treated
|
||||
# specially
|
||||
spine[0].getparent().remove(spine[0])
|
||||
guide_elem.set('href', 'calibre_raster_cover.jpg')
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
t = etree.SubElement(elem[0].getparent(), OPF('item'),
|
||||
href=guide_elem.get('href'), id='calibre_raster_cover')
|
||||
t.set('media-type', 'image/jpeg')
|
||||
for elem in list(opf.iterguide()):
|
||||
if elem.get('type', '').lower() == 'titlepage':
|
||||
elem.getparent().remove(elem)
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
|
||||
t.set('type', 'titlepage')
|
||||
t.set('href', guide_cover)
|
||||
t.set('title', 'Title Page')
|
||||
from calibre.ebooks import render_html
|
||||
renderer = render_html(guide_cover)
|
||||
from calibre.ebooks import render_html_svg_workaround
|
||||
renderer = render_html_svg_workaround(guide_cover, log)
|
||||
if renderer is not None:
|
||||
open('calibre_raster_cover.jpg', 'wb').write(
|
||||
renderer.data)
|
||||
|
||||
renderer)
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
@ -121,7 +130,7 @@ class EPUBInput(InputFormatPlugin):
|
||||
for elem in opf.iterguide():
|
||||
elem.set('href', delta+elem.get('href'))
|
||||
|
||||
self.rationalize_cover(opf)
|
||||
self.rationalize_cover(opf, log)
|
||||
|
||||
with open('content.opf', 'wb') as nopf:
|
||||
nopf.write(opf.render())
|
||||
|
@ -5,14 +5,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''Read meta information from epub files'''
|
||||
|
||||
import os, time
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
from contextlib import closing
|
||||
|
||||
from PyQt4.Qt import QUrl, QEventLoop, QSize, QByteArray, QBuffer, \
|
||||
SIGNAL, QPainter, QImage, QObject, QApplication, Qt, QPalette
|
||||
from PyQt4.QtWebKit import QWebPage
|
||||
|
||||
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
@ -102,64 +98,9 @@ class OCFDirReader(OCFReader):
|
||||
def open(self, path, *args, **kwargs):
|
||||
return open(os.path.join(self.root, path), *args, **kwargs)
|
||||
|
||||
class CoverRenderer(QObject):
|
||||
WIDTH = 600
|
||||
HEIGHT = 800
|
||||
|
||||
def __init__(self, path):
|
||||
if QApplication.instance() is None:
|
||||
QApplication([])
|
||||
QObject.__init__(self)
|
||||
self.loop = QEventLoop()
|
||||
self.page = QWebPage()
|
||||
pal = self.page.palette()
|
||||
pal.setBrush(QPalette.Background, Qt.white)
|
||||
self.page.setPalette(pal)
|
||||
self.page.setViewportSize(QSize(self.WIDTH, self.HEIGHT))
|
||||
self.page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
|
||||
self.page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
|
||||
QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
|
||||
self._image_data = None
|
||||
self.rendered = False
|
||||
url = QUrl.fromLocalFile(os.path.normpath(path))
|
||||
self.page.mainFrame().load(url)
|
||||
|
||||
def render_html(self, ok):
|
||||
try:
|
||||
if not ok:
|
||||
self.rendered = True
|
||||
return
|
||||
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
|
||||
image.setDotsPerMeterX(96*(100/2.54))
|
||||
image.setDotsPerMeterY(96*(100/2.54))
|
||||
painter = QPainter(image)
|
||||
self.page.mainFrame().render(painter)
|
||||
painter.end()
|
||||
ba = QByteArray()
|
||||
buf = QBuffer(ba)
|
||||
buf.open(QBuffer.WriteOnly)
|
||||
image.save(buf, 'JPEG')
|
||||
self._image_data = str(ba.data())
|
||||
finally:
|
||||
self.loop.exit(0)
|
||||
self.rendered = True
|
||||
|
||||
def image_data():
|
||||
def fget(self):
|
||||
if not self.rendered:
|
||||
self.loop.exec_()
|
||||
count = 0
|
||||
while count < 50 and not self.rendered:
|
||||
time.sleep(0.1)
|
||||
count += 1
|
||||
return self._image_data
|
||||
return property(fget=fget)
|
||||
image_data = image_data()
|
||||
|
||||
|
||||
def get_cover(opf, opf_path, stream):
|
||||
from calibre.gui2 import is_ok_to_use_qt
|
||||
if not is_ok_to_use_qt(): return None
|
||||
from calibre.ebooks import render_html_svg_workaround
|
||||
from calibre.utils.logging import default_log
|
||||
spine = list(opf.spine_items())
|
||||
if not spine:
|
||||
return
|
||||
@ -172,8 +113,7 @@ def get_cover(opf, opf_path, stream):
|
||||
cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
|
||||
if not os.path.exists(cpage):
|
||||
return
|
||||
cr = CoverRenderer(cpage)
|
||||
return cr.image_data
|
||||
return render_html_svg_workaround(cpage, default_log)
|
||||
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
""" Return metadata as a :class:`MetaInformation` object """
|
||||
|
@ -1556,7 +1556,8 @@ class MobiWriter(object):
|
||||
else:
|
||||
raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
|
||||
|
||||
if oeb.metadata.cover:
|
||||
if oeb.metadata.cover and \
|
||||
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids:
|
||||
id = unicode(oeb.metadata.cover[0])
|
||||
item = oeb.manifest.ids[id]
|
||||
href = item.href
|
||||
|
@ -27,7 +27,6 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \
|
||||
OEBError, OEBBook, DirContainer
|
||||
from calibre.ebooks.oeb.writer import OEBWriter
|
||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||
from calibre.ebooks.metadata.epub import CoverRenderer
|
||||
from calibre.startup import get_lang
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.constants import __appname__, __version__
|
||||
@ -346,6 +345,8 @@ class OEBReader(object):
|
||||
if descriptionElement:
|
||||
description = etree.tostring(descriptionElement[0],
|
||||
method='text', encoding=unicode).strip()
|
||||
if not description:
|
||||
description = None
|
||||
else :
|
||||
description = None
|
||||
|
||||
@ -525,12 +526,14 @@ class OEBReader(object):
|
||||
return
|
||||
|
||||
def _cover_from_html(self, hcover):
|
||||
from calibre.ebooks import render_html_svg_workaround
|
||||
with TemporaryDirectory('_html_cover') as tdir:
|
||||
writer = OEBWriter()
|
||||
writer(self.oeb, tdir)
|
||||
path = os.path.join(tdir, urlunquote(hcover.href))
|
||||
renderer = CoverRenderer(path)
|
||||
data = renderer.image_data
|
||||
data = render_html_svg_workaround(path, self.logger)
|
||||
if not data:
|
||||
data = ''
|
||||
id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
|
||||
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
|
||||
return item
|
||||
|
@ -102,3 +102,5 @@ class Log(object):
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.prints(INFO, *args, **kwargs)
|
||||
|
||||
default_log = Log()
|
||||
|
@ -17,7 +17,7 @@ class Publico(BasicNewsRecipe):
|
||||
max_articles_per_feed = 30
|
||||
encoding='utf-8'
|
||||
no_stylesheets = True
|
||||
language = _('Portuguese')
|
||||
language = _('Portugese')
|
||||
preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE), lambda match: ''),]
|
||||
|
||||
feeds = [
|
||||
|
Loading…
x
Reference in New Issue
Block a user