mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
ebook-polish: Update covers in epub
This commit is contained in:
parent
9a0164059a
commit
c91c1aeba2
@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, logging, sys, hashlib, uuid
|
||||
from urllib import unquote as urlunquote, quote as urlquote
|
||||
from urlparse import urlparse
|
||||
|
||||
from lxml import etree
|
||||
|
||||
@ -96,16 +97,22 @@ class Container(object):
|
||||
def name_to_abspath(self, name):
|
||||
return os.path.abspath(join(self.root, *name.split('/')))
|
||||
|
||||
def exists(self, name):
|
||||
return os.path.exists(self.name_to_abspath(name))
|
||||
|
||||
def href_to_name(self, href, base=None):
|
||||
'''
|
||||
Convert an href (relative to base) to a name. base must be a name or
|
||||
None, in which self.root is used.
|
||||
None, in which case self.root is used.
|
||||
'''
|
||||
if base is None:
|
||||
base = self.root
|
||||
else:
|
||||
base = os.path.dirname(self.name_to_abspath(base))
|
||||
href = urlunquote(href.partition('#')[0])
|
||||
purl = urlparse(href)
|
||||
if purl.scheme or not purl.path or purl.path.startswith('/'):
|
||||
return None
|
||||
href = urlunquote(purl.path)
|
||||
fullpath = os.path.join(base, *href.split('/'))
|
||||
return self.abspath_to_name(fullpath)
|
||||
|
||||
@ -208,10 +215,19 @@ class Container(object):
|
||||
return self.parsed(self.opf_name)
|
||||
|
||||
@property
|
||||
def spine_items(self):
|
||||
manifest_id_map = {item.get('id'):self.href_to_name(item.get('href'), self.opf_name)
|
||||
def manifest_id_map(self):
|
||||
return {item.get('id'):self.href_to_name(item.get('href'), self.opf_name)
|
||||
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @id]')}
|
||||
|
||||
@property
|
||||
def guide_type_map(self):
|
||||
return {item.get('type', ''):self.href_to_name(item.get('href'), self.opf_name)
|
||||
for item in self.opf_xpath('//opf:guide/opf:reference[@href and @type]')}
|
||||
|
||||
@property
|
||||
def spine_items(self):
|
||||
manifest_id_map = self.manifest_id_map
|
||||
|
||||
linear, non_linear = [], []
|
||||
for item in self.opf_xpath('//opf:spine/opf:itemref[@idref]'):
|
||||
idref = item.get('idref')
|
||||
@ -251,8 +267,8 @@ class Container(object):
|
||||
self.remove_from_xml(item)
|
||||
self.dirty(self.opf_name)
|
||||
|
||||
path = self.name_path_map.pop(name)
|
||||
if os.path.exists(path):
|
||||
path = self.name_path_map.pop(name, None)
|
||||
if path and os.path.exists(path):
|
||||
os.remove(path)
|
||||
self.mime_map.pop(name, None)
|
||||
self.parsed_cache.pop(name, None)
|
||||
@ -301,15 +317,24 @@ class Container(object):
|
||||
if idx == len(parent)-1:
|
||||
parent[idx-1].tail = parent.text
|
||||
|
||||
def opf_get_or_create(self, name):
|
||||
ans = self.opf_xpath('//opf:'+name)
|
||||
if ans:
|
||||
return ans[0]
|
||||
self.dirty(self.opf_name)
|
||||
package = self.opf_xpath('//opf:package')[0]
|
||||
item = package.makeelement(OPF(name))
|
||||
item.tail = '\n'
|
||||
package.append(item)
|
||||
return item
|
||||
|
||||
def generate_item(self, name, id_prefix=None, media_type=None):
|
||||
'''Add an item to the manifest with href derived from the given
|
||||
name. Ensures uniqueness of href and id automatically. Returns
|
||||
generated item.'''
|
||||
id_prefix = id_prefix or 'id'
|
||||
media_type = media_type or guess_type(name)[0]
|
||||
path = self.name_to_abspath(name)
|
||||
relpath = self.relpath(path, base=self.opf_dir)
|
||||
href = urlquote(relpath)
|
||||
href = self.name_to_href(name, self.opf_name)
|
||||
base, ext = href.rpartition('.')[0::2]
|
||||
all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
|
||||
c = 0
|
||||
@ -319,8 +344,12 @@ class Container(object):
|
||||
item_id = id_prefix + '%d'%c
|
||||
all_names = {x.get('href') for x in self.opf_xpath(
|
||||
'//opf:manifest/opf:item[@href]')}
|
||||
|
||||
def exists(h):
|
||||
return self.exists(self.href_to_name(h, self.opf_name))
|
||||
|
||||
c = 0
|
||||
while href in all_names:
|
||||
while href in all_names or exists(href):
|
||||
c += 1
|
||||
href = '%s_%d.%s'%(base, c, ext)
|
||||
manifest = self.opf_xpath('//opf:manifest')[0]
|
||||
@ -329,15 +358,26 @@ class Container(object):
|
||||
item.set('media-type', media_type)
|
||||
self.insert_into_xml(manifest, item)
|
||||
self.dirty(self.opf_name)
|
||||
name = self.href_to_name(href, self.opf_name)
|
||||
self.name_path_map[name] = self.name_to_abspath(name)
|
||||
self.mime_map[name] = media_type
|
||||
return item
|
||||
|
||||
def commit_item(self, name):
|
||||
self.dirtied.remove(name)
|
||||
data = self.parsed_cache.pop(name)
|
||||
data = serialize(data, self.mime_map[name])
|
||||
with open(self.name_path_map[name], 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
def open(self, name, mode='rb'):
|
||||
if name in self.dirtied:
|
||||
self.commit_item(name)
|
||||
return open(self.name_to_abspath(name), mode)
|
||||
|
||||
def commit(self, outpath=None):
|
||||
for name in tuple(self.dirtied):
|
||||
self.dirtied.remove(name)
|
||||
data = self.parsed_cache.pop(name)
|
||||
data = serialize(data, self.mime_map[name])
|
||||
with open(self.name_path_map[name], 'wb') as f:
|
||||
f.write(data)
|
||||
self.commit_item(name)
|
||||
|
||||
def compare_to(self, other):
|
||||
if set(self.name_path_map) != set(other.name_path_map):
|
||||
|
@ -7,9 +7,10 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import shutil
|
||||
import shutil, re, os
|
||||
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
from calibre.ebooks.oeb.base import OPF, OEB_DOCS, XPath, XLINK, xml2text
|
||||
from calibre.ebooks.oeb.polish.replace import replace_links
|
||||
|
||||
def set_azw3_cover(container, cover_path, report):
|
||||
name = None
|
||||
@ -33,4 +34,197 @@ def set_azw3_cover(container, cover_path, report):
|
||||
def set_cover(container, cover_path, report):
|
||||
if container.book_type == 'azw3':
|
||||
set_azw3_cover(container, cover_path, report)
|
||||
else:
|
||||
set_epub_cover(container, cover_path, report)
|
||||
|
||||
###############################################################################
|
||||
# The delightful EPUB cover processing
|
||||
|
||||
def is_raster_image(media_type):
|
||||
return media_type and media_type.lower() in {
|
||||
'image/png', 'image/jpeg', 'image/jpg', 'image/gif'}
|
||||
|
||||
COVER_TYPES = { 'coverimagestandard', 'other.ms-coverimage-standard',
|
||||
'other.ms-titleimage-standard', 'other.ms-titleimage',
|
||||
'other.ms-coverimage', 'other.ms-thumbimage-standard',
|
||||
'other.ms-thumbimage', 'thumbimagestandard', 'cover'}
|
||||
|
||||
def find_cover_image(container):
|
||||
'Find a raster image marked as a cover in the OPF'
|
||||
manifest_id_map = container.manifest_id_map
|
||||
mm = container.mime_map
|
||||
for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
|
||||
item_id = meta.get('content')
|
||||
name = manifest_id_map.get(item_id, None)
|
||||
media_type = mm.get(name, None)
|
||||
if is_raster_image(media_type):
|
||||
return name
|
||||
|
||||
# First look for a guide item with type == 'cover'
|
||||
guide_type_map = container.guide_type_map
|
||||
for ref_type, name in guide_type_map.iteritems():
|
||||
if ref_type.lower() == 'cover' and is_raster_image(mm.get(name, None)):
|
||||
return name
|
||||
|
||||
# Find the largest image from all possible guide cover items
|
||||
largest_cover = (None, 0)
|
||||
for ref_type, name in guide_type_map.iteritems():
|
||||
if ref_type.lower() in COVER_TYPES and is_raster_image(mm.get(name, None)):
|
||||
path = container.name_path_map.get(name, None)
|
||||
if path:
|
||||
sz = os.path.getsize(path)
|
||||
if sz > largest_cover[1]:
|
||||
largest_cover = (name, sz)
|
||||
|
||||
if largest_cover[0]:
|
||||
return largest_cover[0]
|
||||
|
||||
def find_cover_page(container):
|
||||
'Find a document marked as a cover in the OPF'
|
||||
mm = container.mime_map
|
||||
guide_type_map = container.guide_type_map
|
||||
for ref_type, name in guide_type_map.iteritems():
|
||||
if ref_type.lower() == 'cover' and mm.get(name, '').lower() in OEB_DOCS:
|
||||
return name
|
||||
|
||||
def find_cover_image_in_page(container, cover_page):
|
||||
root = container.parsed(cover_page)
|
||||
body = XPath('//h:body')(root)
|
||||
if len(body) != 1: return
|
||||
body = body[0]
|
||||
images = []
|
||||
for img in XPath('descendant::h:img[@src]|descendant::svg:svg/descendant::svg:image')(body):
|
||||
href = img.get('src') or img.get(XLINK('href'))
|
||||
if href:
|
||||
name = container.href_to_name(href, base=cover_page)
|
||||
images.append(name)
|
||||
text = re.sub(r'\s+', '', xml2text(body))
|
||||
if text or len(images) > 1:
|
||||
# Document has more content than a single image
|
||||
return
|
||||
if images:
|
||||
return images[0]
|
||||
|
||||
def clean_opf(container):
|
||||
'Remove all references to covers from the OPF'
|
||||
manifest_id_map = container.manifest_id_map
|
||||
for meta in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
|
||||
name = manifest_id_map.get(meta.get('content', None), None)
|
||||
container.remove_from_xml(meta)
|
||||
if name and name in container.name_path_map:
|
||||
yield name
|
||||
|
||||
gtm = container.guide_type_map
|
||||
for ref in container.opf_xpath('//opf:guide/opf:reference[@type]'):
|
||||
typ = ref.get('type', '')
|
||||
if typ.lower() in COVER_TYPES:
|
||||
container.remove_from_xml(ref)
|
||||
name = gtm.get(typ, None)
|
||||
if name and name in container.name_path_map:
|
||||
yield name
|
||||
|
||||
container.dirty(container.opf_name)
|
||||
|
||||
def create_epub_cover(container, cover_path):
|
||||
from calibre.ebooks.conversion.config import load_defaults
|
||||
from calibre.ebooks.oeb.transforms.cover import CoverManager
|
||||
|
||||
ext = cover_path.rpartition('.')[-1].lower()
|
||||
raster_cover_item = container.generate_item('cover.'+ext, id_prefix='cover')
|
||||
raster_cover = container.href_to_name(raster_cover_item.get('href'),
|
||||
container.opf_name)
|
||||
with open(cover_path, 'rb') as src, container.open(raster_cover, 'wb') as dest:
|
||||
shutil.copyfileobj(src, dest)
|
||||
opts = load_defaults('epub_output')
|
||||
keep_aspect = opts.get('preserve_cover_aspect_ratio', False)
|
||||
no_svg = opts.get('no_svg_cover', False)
|
||||
if no_svg:
|
||||
style = 'style="height: 100%%"'
|
||||
templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style)
|
||||
else:
|
||||
width, height = 600, 800
|
||||
ar = 'xMidYMid meet' if keep_aspect else 'none'
|
||||
templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar)
|
||||
templ = templ.replace('__viewbox__', '0 0 %d %d'%(width, height))
|
||||
templ = templ.replace('__width__', str(width))
|
||||
templ = templ.replace('__height__', str(height))
|
||||
titlepage_item = container.generate_item('titlepage.xhtml',
|
||||
id_prefix='titlepage')
|
||||
titlepage = container.href_to_name(titlepage_item.get('href'),
|
||||
container.opf_name)
|
||||
raw = templ%container.name_to_href(raster_cover).encode('utf-8')
|
||||
with container.open(titlepage, 'wb') as f:
|
||||
f.write(raw)
|
||||
|
||||
spine = container.opf_xpath('//opf:spine')[0]
|
||||
ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id'))
|
||||
container.insert_into_xml(spine, ref, index=0)
|
||||
guide = container.opf_get_or_create('guide')
|
||||
container.insert_into_xml(guide, guide.makeelement(
|
||||
OPF('reference'), type='cover', title=_('Cover'),
|
||||
href=container.name_to_href(titlepage)))
|
||||
metadata = container.opf_get_or_create('metadata')
|
||||
meta = metadata.makeelement(OPF('meta'), name='cover')
|
||||
meta.set('content', raster_cover_item.get('id'))
|
||||
container.insert_into_xml(metadata, meta)
|
||||
|
||||
return raster_cover, titlepage
|
||||
|
||||
def set_epub_cover(container, cover_path, report):
|
||||
cover_image = find_cover_image(container)
|
||||
cover_page = find_cover_page(container)
|
||||
wrapped_image = extra_cover_page = None
|
||||
updated = False
|
||||
|
||||
possible_removals = set(clean_opf(container))
|
||||
possible_removals
|
||||
# TODO: Handle possible_removals and also iterate over links in the removed
|
||||
# pages and handle possibly removing stylesheets referred to by them.
|
||||
|
||||
spine_items = tuple(container.spine_items)
|
||||
if cover_page is None:
|
||||
# Check if the first item in the spine is a simple cover wrapper
|
||||
candidate = container.abspath_to_name(spine_items[0])
|
||||
if find_cover_image_in_page(container, candidate) is not None:
|
||||
cover_page = candidate
|
||||
|
||||
if cover_page is not None:
|
||||
wrapped_image = find_cover_image_in_page(container, cover_page)
|
||||
|
||||
if len(spine_items) > 1:
|
||||
# Look for an extra cover page
|
||||
c = container.abspath_to_name(spine_items[1])
|
||||
if c != cover_page:
|
||||
candidate = find_cover_image_in_page(container, c)
|
||||
if candidate and candidate in {wrapped_image, cover_image}:
|
||||
# This page has only a single image and that image is the
|
||||
# cover image, remove it.
|
||||
container.remove_item(c)
|
||||
extra_cover_page = c
|
||||
spine_items = spine_items[:1] + spine_items[2:]
|
||||
|
||||
if wrapped_image is not None:
|
||||
# The cover page is a simple wrapper around a single cover image,
|
||||
# we can remove it safely.
|
||||
container.remove_item(cover_page)
|
||||
container.remove_item(wrapped_image)
|
||||
updated = True
|
||||
|
||||
if cover_image and cover_image != wrapped_image:
|
||||
# Remove the old cover image
|
||||
container.remove_item(cover_image)
|
||||
|
||||
# Insert the new cover
|
||||
raster_cover, titlepage = create_epub_cover(container, cover_path)
|
||||
|
||||
report('Cover updated' if updated else 'Cover inserted')
|
||||
|
||||
# Replace links to the old cover image/cover page
|
||||
link_sub = {s:d for s, d in {
|
||||
cover_page:titlepage, wrapped_image:raster_cover,
|
||||
cover_image:raster_cover, extra_cover_page:titlepage}.iteritems()
|
||||
if s is not None}
|
||||
if link_sub:
|
||||
replace_links(container, link_sub, frag_map=lambda x, y:None)
|
||||
|
||||
|
||||
|
@ -118,9 +118,9 @@ def option_parser():
|
||||
a = parser.add_option
|
||||
o = partial(a, default=False, action='store_true')
|
||||
o('--subset-fonts', '-f', dest='subset', help=CLI_HELP['subset'])
|
||||
a('--cover', help=_(
|
||||
a('--cover', '-c', help=_(
|
||||
'Path to a cover image. Changes the cover specified in the ebook. '
|
||||
'If no cover is present, inserts a new cover.'))
|
||||
'If no cover is present, or the cover is not properly identified, inserts a new cover.'))
|
||||
o('--verbose', help=_('Produce more verbose output, useful for debugging.'))
|
||||
|
||||
return parser
|
||||
|
61
src/calibre/ebooks/oeb/polish/replace.py
Normal file
61
src/calibre/ebooks/oeb/polish/replace.py
Normal file
@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from urlparse import urlparse
|
||||
|
||||
from cssutils import replaceUrls
|
||||
|
||||
from calibre import guess_type
|
||||
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, rewrite_links)
|
||||
|
||||
class LinkReplacer(object):
|
||||
|
||||
def __init__(self, base, container, link_map, frag_map):
|
||||
self.base = base
|
||||
self.frag_map = frag_map
|
||||
self.link_map = link_map
|
||||
self.container = container
|
||||
self.replaced = False
|
||||
|
||||
def __call__(self, url):
|
||||
name = self.container.href_to_name(url, self.base)
|
||||
if not name:
|
||||
return url
|
||||
nname = self.link_map.get(name, None)
|
||||
if not nname:
|
||||
return url
|
||||
purl = urlparse(url)
|
||||
href = self.container.name_to_href(nname, self.base)
|
||||
if purl.fragment:
|
||||
nfrag = self.frag_map(name, purl.fragment)
|
||||
if nfrag:
|
||||
href += '#%s'%nfrag
|
||||
if href != url:
|
||||
self.replaced = True
|
||||
return href
|
||||
|
||||
def replace_links(container, link_map, frag_map=lambda name, frag:frag):
|
||||
ncx_type = guess_type('toc.ncx')[0]
|
||||
for name, media_type in container.mime_map.iteritems():
|
||||
repl = LinkReplacer(name, container, link_map, frag_map)
|
||||
if media_type.lower() in OEB_DOCS:
|
||||
rewrite_links(container.parsed(name), repl)
|
||||
elif media_type.lower() in OEB_STYLES:
|
||||
replaceUrls(container.parsed(name), repl)
|
||||
elif media_type.lower() == ncx_type:
|
||||
for elem in container.parsed(name).xpath('//*[@src]'):
|
||||
src = elem.get('src')
|
||||
nsrc = repl(src)
|
||||
if src != nsrc:
|
||||
elem.set('src', nsrc)
|
||||
|
||||
if repl.replaced:
|
||||
container.dirty(name)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user