Ebook container: Allow using an unzipped epub (folder) as a container

This commit is contained in:
Kovid Goyal 2015-10-15 13:01:18 +05:30
parent e0dfef216f
commit dce2207202
2 changed files with 111 additions and 22 deletions

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, logging, sys, hashlib, uuid, re, shutil, unicodedata
import os, logging, sys, hashlib, uuid, re, shutil, unicodedata, errno
from collections import defaultdict
from io import BytesIO
from urlparse import urlparse
@ -125,6 +125,8 @@ class Container(object): # {{{
#: The type of book (epub for EPUB files and azw3 for AZW3 files)
book_type = 'oeb'
#: If this container represents an unzipped book (a directory)
is_dir = False
SUPPORTS_TITLEPAGES = True
SUPPORTS_FILENAMES = True
@ -912,6 +914,21 @@ class ObfuscationKeyMissing(InvalidEpub):
pass
OCF_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
VCS_IGNORE_FILES = frozenset('.gitignore .hgignore .agignore .bzrignore'.split())
VCS_DIRS = frozenset(('.git', '.hg', '.svn', '.bzr'))
def walk_dir(basedir):
for dirpath, dirnames, filenames in os.walk(basedir):
for vcsdir in VCS_DIRS:
try:
dirnames.remove(vcsdir)
except Exception:
pass
is_root = os.path.abspath(os.path.normcase(dirpath)) == os.path.abspath(os.path.normcase(basedir))
yield is_root, dirpath, None
for fname in filenames:
if fname not in VCS_IGNORE_FILES:
yield is_root, dirpath, fname
class EpubContainer(Container):
@ -929,7 +946,7 @@ class EpubContainer(Container):
def __init__(self, pathtoepub, log, clone_data=None, tdir=None):
if clone_data is not None:
super(EpubContainer, self).__init__(None, None, log, clone_data=clone_data)
for x in ('pathtoepub', 'obfuscated_fonts'):
for x in ('pathtoepub', 'obfuscated_fonts', 'is_dir'):
setattr(self, x, clone_data[x])
return
@ -938,16 +955,28 @@ class EpubContainer(Container):
tdir = PersistentTemporaryDirectory('_epub_container')
tdir = os.path.abspath(os.path.realpath(tdir))
self.root = tdir
with open(self.pathtoepub, 'rb') as stream:
try:
zf = ZipFile(stream)
zf.extractall(tdir)
except:
log.exception('EPUB appears to be invalid ZIP file, trying a'
' more forgiving ZIP parser')
from calibre.utils.localunzip import extractall
stream.seek(0)
extractall(stream, path=tdir)
self.is_dir = os.path.isdir(pathtoepub)
if self.is_dir:
for is_root, dirpath, fname in walk_dir(self.pathtoepub):
if is_root:
base = tdir
else:
base = os.path.join(tdir, os.path.relpath(dirpath, self.pathtoepub))
if fname is None:
os.mkdir(base)
if fname is not None:
shutil.copy(os.path.join(dirpath, fname), os.path.join(base, fname))
else:
with open(self.pathtoepub, 'rb') as stream:
try:
zf = ZipFile(stream)
zf.extractall(tdir)
except:
log.exception('EPUB appears to be invalid ZIP file, trying a'
' more forgiving ZIP parser')
from calibre.utils.localunzip import extractall
stream.seek(0)
extractall(stream, path=tdir)
try:
os.remove(join(tdir, 'mimetype'))
except EnvironmentError:
@ -980,6 +1009,7 @@ class EpubContainer(Container):
ans = super(EpubContainer, self).clone_data(dest_dir)
ans['pathtoepub'] = self.pathtoepub
ans['obfuscated_fonts'] = self.obfuscated_fonts.copy()
ans['is_dir'] = self.is_dir
return ans
def rename(self, old_name, new_name):
@ -1109,13 +1139,42 @@ class EpubContainer(Container):
f.write(decrypt_font_data(key, data, alg))
if outpath is None:
outpath = self.pathtoepub
from calibre.ebooks.tweak import zip_rebuilder
with open(join(self.root, 'mimetype'), 'wb') as f:
f.write(guess_type('a.epub'))
zip_rebuilder(self.root, outpath)
for name, data in restore_fonts.iteritems():
with self.open(name, 'wb') as f:
f.write(data)
if self.is_dir:
# First remove items from the source dir that do not exist any more
for is_root, dirpath, fname in walk_dir(self.pathtoepub):
if fname is not None:
if is_root and fname == 'mimetype':
continue
base = self.root if is_root else os.path.join(self.root, os.path.relpath(dirpath, self.pathtoepub))
fpath = os.path.join(base, fname)
if not os.path.exists(fpath):
os.remove(os.path.join(dirpath, fname))
try:
os.rmdir(dirpath)
except EnvironmentError as err:
if err.errno != errno.ENOTEMPTY:
raise
# Now copy over everything from root to source dir
for dirpath, dirnames, filenames in os.walk(self.root):
is_root = os.path.abspath(os.path.normcase(dirpath)) == os.path.abspath(os.path.normcase(self.root))
base = self.pathtoepub if is_root else os.path.join(self.pathtoepub, os.path.relpath(dirpath, self.root))
try:
os.mkdir(base)
except EnvironmentError as err:
if err.errno != errno.EEXIST:
raise
for fname in filenames:
with open(os.path.join(dirpath, fname), 'rb') as src, open(os.path.join(base, fname), 'wb') as dest:
shutil.copyfileobj(src, dest)
else:
from calibre.ebooks.tweak import zip_rebuilder
with open(join(self.root, 'mimetype'), 'wb') as f:
f.write(guess_type('a.epub'))
zip_rebuilder(self.root, outpath)
for name, data in restore_fonts.iteritems():
with self.open(name, 'wb') as f:
f.write(data)
@dynamic_property
def path_to_ebook(self):
@ -1254,7 +1313,11 @@ class AZW3Container(Container):
def get_container(path, log=None, tdir=None, tweak_mode=False):
if log is None:
log = default_log
ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi', 'original_azw3', 'original_mobi'}
try:
isdir = os.path.isdir(path)
except Exception:
isdir = False
ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi', 'original_azw3', 'original_mobi'} and not isdir
else EpubContainer)(path, log, tdir=tdir)
ebook.tweak_mode = tweak_mode
return ebook

View File

@ -7,14 +7,15 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, subprocess
from zipfile import ZipFile
from calibre import CurrentDir
from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book, get_split_book
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
from calibre.ebooks.oeb.polish.replace import rename_files
from calibre.ebooks.oeb.polish.split import split, merge
from calibre.utils.filenames import nlinks_file
from calibre.ptempfile import TemporaryFile
from calibre.ptempfile import TemporaryFile, TemporaryDirectory
def get_container(*args, **kwargs):
kwargs['tweak_mode'] = True
@ -235,3 +236,28 @@ class ContainerTests(BaseTest):
c = get_container(book)
merge(c, 'styles', ('stylesheet.css', 'page_styles.css'), 'stylesheet.css')
self.check_links(c)
def test_dir_container(self):
def create_book(source):
with ZipFile(P('quick_start/eng.epub', allow_user_override=False)) as zf:
zf.extractall(source)
with CurrentDir(source):
self.assertTrue(os.path.exists('images/cover.jpg'))
with open('.gitignore', 'wb') as f:
f.write(b'nothing')
os.mkdir('.git')
with open('.git/xxx', 'wb') as f:
f.write(b'xxx')
with TemporaryDirectory('-polish-dir-container') as source:
create_book(source)
c = get_container(source)
c.remove_item('images/cover.jpg')
with c.open('images/test-container.xyz', 'wb') as f:
f.write(b'xyz')
c.commit()
with CurrentDir(source):
self.assertTrue(os.path.exists('.gitignore'))
self.assertTrue(os.path.exists('.git/xxx'))
self.assertTrue(os.path.exists('images/test-container.xyz'))
self.assertFalse(os.path.exists('images/cover.jpg'))