From 18d0f6a6ef79bc80b23a3a58a1c7c607169fe662 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 18:58:54 -0400 Subject: [PATCH 1/6] Add HTMLZ as a book extension. Use HTML icon for HTMLZ. --- src/calibre/ebooks/__init__.py | 2 +- src/calibre/gui2/__init__.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 7776be5e28..a56abb907e 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -26,7 +26,7 @@ class ParserError(ValueError): pass BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm', - 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', + 'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb'] diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 22aaabf592..e39427021e 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -357,6 +357,7 @@ class FileIconProvider(QFileIconProvider): 'bmp' : 'bmp', 'svg' : 'svg', 'html' : 'html', + 'htmlz' : 'html', 'htm' : 'html', 'xhtml' : 'html', 'xhtm' : 'html', From d5119f0c2f0bad0220122e7771cbb6388d22a21a Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 19:11:52 -0400 Subject: [PATCH 2/6] HTMLZ Output: Handle SVG data returned as lxml.etree._Element properly. --- src/calibre/ebooks/htmlz/output.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/htmlz/output.py b/src/calibre/ebooks/htmlz/output.py index 7cdf04bcdb..03fe12c89e 100644 --- a/src/calibre/ebooks/htmlz/output.py +++ b/src/calibre/ebooks/htmlz/output.py @@ -12,7 +12,7 @@ from lxml import etree from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation -from calibre.ebooks.oeb.base import OEB_IMAGES +from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME from calibre.ptempfile import TemporaryDirectory from calibre.utils.zipfile import ZipFile @@ -71,9 +71,13 @@ class HTMLZOutput(OutputFormatPlugin): os.makedirs(os.path.join(tdir, 'images')) for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: + if item.media_type == SVG_MIME: + data = unicode(etree.tostring(item.data, encoding=unicode)) + else: + data = item.data fname = os.path.join(tdir, 'images', images[item.href]) with open(fname, 'wb') as img: - img.write(item.data) + img.write(data) # Metadata with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: From 1d6521aa5e34fc04902130680b0e73a1979ae0c7 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 19:53:04 -0400 Subject: [PATCH 3/6] extZ metadata: Read and write first opf file found in archive. --- src/calibre/ebooks/metadata/extz.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 0ecdbe9ea6..b49f3f6ddd 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -25,14 +25,30 @@ def get_metadata(stream, extract_cover=True): with TemporaryDirectory('_untxtz_mdata') as tdir: try: - zf = ZipFile(stream) - zf.extract('metadata.opf', tdir) - with open(os.path.join(tdir, 'metadata.opf'), 'rb') as opff: - mi = OPF(opff).to_book_metadata() + with ZipFile(stream) as zf: + opf_name = get_first_opf_name(stream) + opf_stream = StringIO(zf.read(opf_name)) + mi = OPF(opf_stream).to_book_metadata() except: return mi return mi def set_metadata(stream, mi): opf = StringIO(metadata_to_opf(mi)) - safe_replace(stream, 'metadata.opf', opf) + try: + opf_name = get_first_opf_name(stream) + except: + opf_name = 'metadata.opf' + safe_replace(stream, opf_name, opf) + +def get_first_opf_name(stream): + with ZipFile(stream) as zf: + names = zf.namelist() + opfs = [] + for n in names: + if n.endswith('.opf') and '/' not in n: + opfs.append(n) + if not opfs: + raise Exception('No OPF found') + opfs.sort() + return opfs[0] From 184692b587e67d79ef35edc04ff5b97c0c27654d Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 20:39:01 -0400 Subject: [PATCH 4/6] extZ metadata: Get cover, update OPF without losing other data such as spine, and guide. --- src/calibre/ebooks/metadata/extz.py | 34 +++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index b49f3f6ddd..338c4dd91d 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -7,13 +7,10 @@ __copyright__ = '2011, John Schember ' Read meta information from extZ (TXTZ, HTMLZ...) files. ''' -import os - from cStringIO import StringIO from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf -from calibre.ptempfile import TemporaryDirectory +from calibre.ebooks.metadata.opf2 import OPF from calibre.utils.zipfile import ZipFile, safe_replace def get_metadata(stream, extract_cover=True): @@ -23,23 +20,32 @@ def get_metadata(stream, extract_cover=True): mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) - with TemporaryDirectory('_untxtz_mdata') as tdir: - try: - with ZipFile(stream) as zf: - opf_name = get_first_opf_name(stream) - opf_stream = StringIO(zf.read(opf_name)) - mi = OPF(opf_stream).to_book_metadata() - except: - return mi + try: + with ZipFile(stream) as zf: + opf_name = get_first_opf_name(stream) + opf_stream = StringIO(zf.read(opf_name)) + opf = OPF(opf_stream) + mi = opf.to_book_metadata() + if extract_cover: + cover_name = opf.raster_cover + if cover_name: + mi.cover_data = ('jpg', zf.read(cover_name)) + except: + return mi return mi def set_metadata(stream, mi): - opf = StringIO(metadata_to_opf(mi)) try: opf_name = get_first_opf_name(stream) + with ZipFile(stream) as zf: + opf_stream = StringIO(zf.read(opf_name)) + opf = OPF(opf_stream) except: opf_name = 'metadata.opf' - safe_replace(stream, opf_name, opf) + opf = OPF(StringIO()) + opf.smart_update(mi, replace_metadata=True) + newopf = StringIO(opf.render()) + safe_replace(stream, opf_name, newopf) def get_first_opf_name(stream): with ZipFile(stream) as zf: From fbde96b7a1b947f349fa3c71d1c5b6e090418fd9 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 20:54:14 -0400 Subject: [PATCH 5/6] extZ metadata: Set cover. --- src/calibre/ebooks/metadata/extz.py | 52 ++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 338c4dd91d..18c5a25671 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -7,10 +7,14 @@ __copyright__ = '2011, John Schember ' Read meta information from extZ (TXTZ, HTMLZ...) files. ''' +import os +import posixpath + from cStringIO import StringIO from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPF +from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.zipfile import ZipFile, safe_replace def get_metadata(stream, extract_cover=True): @@ -35,17 +39,50 @@ def get_metadata(stream, extract_cover=True): return mi def set_metadata(stream, mi): + replacements = {} + + # Get the OPF in the archive. try: - opf_name = get_first_opf_name(stream) + opf_path = get_first_opf_name(stream) with ZipFile(stream) as zf: - opf_stream = StringIO(zf.read(opf_name)) + opf_stream = StringIO(zf.read(opf_path)) opf = OPF(opf_stream) except: - opf_name = 'metadata.opf' + opf_path = 'metadata.opf' opf = OPF(StringIO()) + + # Cover. + new_cdata = None + try: + new_cdata = mi.cover_data[1] + if not new_cdata: + raise Exception('no cover') + except: + try: + new_cdata = open(mi.cover, 'rb').read() + except: + pass + if new_cdata: + raster_cover = opf.raster_cover + if not raster_cover: + raster_cover = 'cover.jpg' + cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover) + new_cover = _write_new_cover(new_cdata, cpath) + replacements[cpath] = open(new_cover.name, 'rb') + + # Update the metadata. opf.smart_update(mi, replace_metadata=True) newopf = StringIO(opf.render()) - safe_replace(stream, opf_name, newopf) + safe_replace(stream, opf_path, newopf, extra_replacements=replacements) + + # Cleanup temporary files. + try: + if cpath is not None: + replacements[cpath].close() + os.remove(replacements[cpath].name) + except: + pass + def get_first_opf_name(stream): with ZipFile(stream) as zf: @@ -58,3 +95,10 @@ def get_first_opf_name(stream): raise Exception('No OPF found') opfs.sort() return opfs[0] + +def _write_new_cover(new_cdata, cpath): + from calibre.utils.magick.draw import save_cover_data_to + new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1]) + new_cover.close() + save_cover_data_to(new_cdata, new_cover.name) + return new_cover From 5b82c42e4bc5b96ee242f61bc30d0be3d8ecf703 Mon Sep 17 00:00:00 2001 From: John Schember Date: Tue, 12 Apr 2011 20:55:06 -0400 Subject: [PATCH 6/6] ... --- src/calibre/ebooks/metadata/extz.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index 18c5a25671..6d41f7819d 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -83,7 +83,6 @@ def set_metadata(stream, mi): except: pass - def get_first_opf_name(stream): with ZipFile(stream) as zf: names = zf.namelist()