mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix handling of reflowable covers in EPUB input. Also try to extract svg embedded raster covers. Misc. minor fixes
This commit is contained in:
parent
a20d9fb169
commit
d0e1fa2d90
@ -361,6 +361,8 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
|
|||||||
return '&'+ent+';'
|
return '&'+ent+';'
|
||||||
if ent == 'apos':
|
if ent == 'apos':
|
||||||
return "'"
|
return "'"
|
||||||
|
if ent == 'hellips':
|
||||||
|
ent = 'hellip'
|
||||||
if ent.startswith(u'#x'):
|
if ent.startswith(u'#x'):
|
||||||
num = int(ent[2:], 16)
|
num = int(ent[2:], 16)
|
||||||
if encoding is None or num > 255:
|
if encoding is None or num > 255:
|
||||||
|
@ -57,6 +57,35 @@ class HTMLRenderer(object):
|
|||||||
self.loop.exit(0)
|
self.loop.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_cover_from_embedded_svg(html, base, log):
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.oeb.base import XPath, SVG, XLINK
|
||||||
|
root = etree.fromstring(html)
|
||||||
|
|
||||||
|
svg = XPath('//svg:svg')(root)
|
||||||
|
if len(svg) == 1 and len(svg[0]) == 1 and svg[0][0].tag == SVG('image'):
|
||||||
|
image = svg[0][0]
|
||||||
|
href = image.get(XLINK('href'), None)
|
||||||
|
path = os.path.join(base, *href.split('/'))
|
||||||
|
if href and os.access(path, os.R_OK):
|
||||||
|
return open(path, 'rb').read()
|
||||||
|
|
||||||
|
def render_html_svg_workaround(path_to_html, log, width=590, height=750):
|
||||||
|
from calibre.ebooks.oeb.base import SVG_NS
|
||||||
|
raw = open(path_to_html, 'rb').read()
|
||||||
|
data = None
|
||||||
|
if SVG_NS in raw:
|
||||||
|
try:
|
||||||
|
data = extract_cover_from_embedded_svg(raw,
|
||||||
|
os.path.dirname(path_to_html), log)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if data is None:
|
||||||
|
renderer = render_html(path_to_html, width, height)
|
||||||
|
data = getattr(renderer, 'data', None)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def render_html(path_to_html, width=590, height=750):
|
def render_html(path_to_html, width=590, height=750):
|
||||||
from PyQt4.QtWebKit import QWebPage
|
from PyQt4.QtWebKit import QWebPage
|
||||||
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
|
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
|
||||||
|
@ -54,7 +54,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def rationalize_cover(self, opf):
|
def rationalize_cover(self, opf, log):
|
||||||
guide_cover, guide_elem = None, None
|
guide_cover, guide_elem = None, None
|
||||||
for guide_elem in opf.iterguide():
|
for guide_elem in opf.iterguide():
|
||||||
if guide_elem.get('type', '').lower() == 'cover':
|
if guide_elem.get('type', '').lower() == 'cover':
|
||||||
@ -65,28 +65,37 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
spine = list(opf.iterspine())
|
spine = list(opf.iterspine())
|
||||||
if not spine:
|
if not spine:
|
||||||
return
|
return
|
||||||
|
# Check if the cover specified in the guide is also
|
||||||
|
# the first element in spine
|
||||||
idref = spine[0].get('idref', '')
|
idref = spine[0].get('idref', '')
|
||||||
manifest = list(opf.itermanifest())
|
manifest = list(opf.itermanifest())
|
||||||
if not manifest:
|
if not manifest:
|
||||||
return
|
return
|
||||||
if manifest[0].get('id', False) != idref:
|
elem = [x for x in manifest if x.get('id', '') == idref]
|
||||||
|
if not elem or elem[0].get('href', None) != guide_cover:
|
||||||
return
|
return
|
||||||
|
log('Found HTML cover', guide_cover)
|
||||||
|
|
||||||
|
# Remove from spine as covers must be treated
|
||||||
|
# specially
|
||||||
spine[0].getparent().remove(spine[0])
|
spine[0].getparent().remove(spine[0])
|
||||||
guide_elem.set('href', 'calibre_raster_cover.jpg')
|
guide_elem.set('href', 'calibre_raster_cover.jpg')
|
||||||
|
from calibre.ebooks.oeb.base import OPF
|
||||||
|
t = etree.SubElement(elem[0].getparent(), OPF('item'),
|
||||||
|
href=guide_elem.get('href'), id='calibre_raster_cover')
|
||||||
|
t.set('media-type', 'image/jpeg')
|
||||||
for elem in list(opf.iterguide()):
|
for elem in list(opf.iterguide()):
|
||||||
if elem.get('type', '').lower() == 'titlepage':
|
if elem.get('type', '').lower() == 'titlepage':
|
||||||
elem.getparent().remove(elem)
|
elem.getparent().remove(elem)
|
||||||
from calibre.ebooks.oeb.base import OPF
|
|
||||||
t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
|
t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
|
||||||
t.set('type', 'titlepage')
|
t.set('type', 'titlepage')
|
||||||
t.set('href', guide_cover)
|
t.set('href', guide_cover)
|
||||||
t.set('title', 'Title Page')
|
t.set('title', 'Title Page')
|
||||||
from calibre.ebooks import render_html
|
from calibre.ebooks import render_html_svg_workaround
|
||||||
renderer = render_html(guide_cover)
|
renderer = render_html_svg_workaround(guide_cover, log)
|
||||||
if renderer is not None:
|
if renderer is not None:
|
||||||
open('calibre_raster_cover.jpg', 'wb').write(
|
open('calibre_raster_cover.jpg', 'wb').write(
|
||||||
renderer.data)
|
renderer)
|
||||||
|
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
@ -121,7 +130,7 @@ class EPUBInput(InputFormatPlugin):
|
|||||||
for elem in opf.iterguide():
|
for elem in opf.iterguide():
|
||||||
elem.set('href', delta+elem.get('href'))
|
elem.set('href', delta+elem.get('href'))
|
||||||
|
|
||||||
self.rationalize_cover(opf)
|
self.rationalize_cover(opf, log)
|
||||||
|
|
||||||
with open('content.opf', 'wb') as nopf:
|
with open('content.opf', 'wb') as nopf:
|
||||||
nopf.write(opf.render())
|
nopf.write(opf.render())
|
||||||
|
@ -5,14 +5,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
'''Read meta information from epub files'''
|
'''Read meta information from epub files'''
|
||||||
|
|
||||||
import os, time
|
import os
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from PyQt4.Qt import QUrl, QEventLoop, QSize, QByteArray, QBuffer, \
|
|
||||||
SIGNAL, QPainter, QImage, QObject, QApplication, Qt, QPalette
|
|
||||||
from PyQt4.QtWebKit import QWebPage
|
|
||||||
|
|
||||||
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
|
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
@ -102,64 +98,9 @@ class OCFDirReader(OCFReader):
|
|||||||
def open(self, path, *args, **kwargs):
|
def open(self, path, *args, **kwargs):
|
||||||
return open(os.path.join(self.root, path), *args, **kwargs)
|
return open(os.path.join(self.root, path), *args, **kwargs)
|
||||||
|
|
||||||
class CoverRenderer(QObject):
|
|
||||||
WIDTH = 600
|
|
||||||
HEIGHT = 800
|
|
||||||
|
|
||||||
def __init__(self, path):
|
|
||||||
if QApplication.instance() is None:
|
|
||||||
QApplication([])
|
|
||||||
QObject.__init__(self)
|
|
||||||
self.loop = QEventLoop()
|
|
||||||
self.page = QWebPage()
|
|
||||||
pal = self.page.palette()
|
|
||||||
pal.setBrush(QPalette.Background, Qt.white)
|
|
||||||
self.page.setPalette(pal)
|
|
||||||
self.page.setViewportSize(QSize(self.WIDTH, self.HEIGHT))
|
|
||||||
self.page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
|
|
||||||
self.page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
|
|
||||||
QObject.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
|
|
||||||
self._image_data = None
|
|
||||||
self.rendered = False
|
|
||||||
url = QUrl.fromLocalFile(os.path.normpath(path))
|
|
||||||
self.page.mainFrame().load(url)
|
|
||||||
|
|
||||||
def render_html(self, ok):
|
|
||||||
try:
|
|
||||||
if not ok:
|
|
||||||
self.rendered = True
|
|
||||||
return
|
|
||||||
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
|
|
||||||
image.setDotsPerMeterX(96*(100/2.54))
|
|
||||||
image.setDotsPerMeterY(96*(100/2.54))
|
|
||||||
painter = QPainter(image)
|
|
||||||
self.page.mainFrame().render(painter)
|
|
||||||
painter.end()
|
|
||||||
ba = QByteArray()
|
|
||||||
buf = QBuffer(ba)
|
|
||||||
buf.open(QBuffer.WriteOnly)
|
|
||||||
image.save(buf, 'JPEG')
|
|
||||||
self._image_data = str(ba.data())
|
|
||||||
finally:
|
|
||||||
self.loop.exit(0)
|
|
||||||
self.rendered = True
|
|
||||||
|
|
||||||
def image_data():
|
|
||||||
def fget(self):
|
|
||||||
if not self.rendered:
|
|
||||||
self.loop.exec_()
|
|
||||||
count = 0
|
|
||||||
while count < 50 and not self.rendered:
|
|
||||||
time.sleep(0.1)
|
|
||||||
count += 1
|
|
||||||
return self._image_data
|
|
||||||
return property(fget=fget)
|
|
||||||
image_data = image_data()
|
|
||||||
|
|
||||||
|
|
||||||
def get_cover(opf, opf_path, stream):
|
def get_cover(opf, opf_path, stream):
|
||||||
from calibre.gui2 import is_ok_to_use_qt
|
from calibre.ebooks import render_html_svg_workaround
|
||||||
if not is_ok_to_use_qt(): return None
|
from calibre.utils.logging import default_log
|
||||||
spine = list(opf.spine_items())
|
spine = list(opf.spine_items())
|
||||||
if not spine:
|
if not spine:
|
||||||
return
|
return
|
||||||
@ -172,8 +113,7 @@ def get_cover(opf, opf_path, stream):
|
|||||||
cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
|
cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
|
||||||
if not os.path.exists(cpage):
|
if not os.path.exists(cpage):
|
||||||
return
|
return
|
||||||
cr = CoverRenderer(cpage)
|
return render_html_svg_workaround(cpage, default_log)
|
||||||
return cr.image_data
|
|
||||||
|
|
||||||
def get_metadata(stream, extract_cover=True):
|
def get_metadata(stream, extract_cover=True):
|
||||||
""" Return metadata as a :class:`MetaInformation` object """
|
""" Return metadata as a :class:`MetaInformation` object """
|
||||||
|
@ -1556,7 +1556,8 @@ class MobiWriter(object):
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
|
raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
|
||||||
|
|
||||||
if oeb.metadata.cover:
|
if oeb.metadata.cover and \
|
||||||
|
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids:
|
||||||
id = unicode(oeb.metadata.cover[0])
|
id = unicode(oeb.metadata.cover[0])
|
||||||
item = oeb.manifest.ids[id]
|
item = oeb.manifest.ids[id]
|
||||||
href = item.href
|
href = item.href
|
||||||
|
@ -27,7 +27,6 @@ from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \
|
|||||||
OEBError, OEBBook, DirContainer
|
OEBError, OEBBook, DirContainer
|
||||||
from calibre.ebooks.oeb.writer import OEBWriter
|
from calibre.ebooks.oeb.writer import OEBWriter
|
||||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||||
from calibre.ebooks.metadata.epub import CoverRenderer
|
|
||||||
from calibre.startup import get_lang
|
from calibre.startup import get_lang
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.constants import __appname__, __version__
|
from calibre.constants import __appname__, __version__
|
||||||
@ -346,6 +345,8 @@ class OEBReader(object):
|
|||||||
if descriptionElement:
|
if descriptionElement:
|
||||||
description = etree.tostring(descriptionElement[0],
|
description = etree.tostring(descriptionElement[0],
|
||||||
method='text', encoding=unicode).strip()
|
method='text', encoding=unicode).strip()
|
||||||
|
if not description:
|
||||||
|
description = None
|
||||||
else :
|
else :
|
||||||
description = None
|
description = None
|
||||||
|
|
||||||
@ -525,12 +526,14 @@ class OEBReader(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
def _cover_from_html(self, hcover):
|
def _cover_from_html(self, hcover):
|
||||||
|
from calibre.ebooks import render_html_svg_workaround
|
||||||
with TemporaryDirectory('_html_cover') as tdir:
|
with TemporaryDirectory('_html_cover') as tdir:
|
||||||
writer = OEBWriter()
|
writer = OEBWriter()
|
||||||
writer(self.oeb, tdir)
|
writer(self.oeb, tdir)
|
||||||
path = os.path.join(tdir, urlunquote(hcover.href))
|
path = os.path.join(tdir, urlunquote(hcover.href))
|
||||||
renderer = CoverRenderer(path)
|
data = render_html_svg_workaround(path, self.logger)
|
||||||
data = renderer.image_data
|
if not data:
|
||||||
|
data = ''
|
||||||
id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
|
id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
|
||||||
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
|
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
|
||||||
return item
|
return item
|
||||||
|
@ -102,3 +102,5 @@ class Log(object):
|
|||||||
|
|
||||||
def __call__(self, *args, **kwargs):
|
def __call__(self, *args, **kwargs):
|
||||||
self.prints(INFO, *args, **kwargs)
|
self.prints(INFO, *args, **kwargs)
|
||||||
|
|
||||||
|
default_log = Log()
|
||||||
|
@ -17,7 +17,7 @@ class Publico(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 30
|
max_articles_per_feed = 30
|
||||||
encoding='utf-8'
|
encoding='utf-8'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = _('Portuguese')
|
language = _('Portugese')
|
||||||
preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE), lambda match: ''),]
|
preprocess_regexps = [(re.compile(u"\uFFFD", re.DOTALL|re.IGNORECASE), lambda match: ''),]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user