diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 047051f61c..8e66ab38a7 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -8,18 +8,15 @@ __copyright__ = '2008, Kovid Goyal ' import os, re, posixpath from cStringIO import StringIO from contextlib import closing -from future_builtins import map from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace from calibre.utils.localunzip import LocalZipFile from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup -from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf +from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf, set_metadata as set_metadata_opf from calibre.ebooks.metadata.opf2 import OPF from calibre.ptempfile import TemporaryDirectory from calibre import CurrentDir, walk from calibre.constants import isosx -from calibre.utils.localization import lang_as_iso639_1 class EPubException(Exception): pass @@ -263,61 +260,26 @@ def serialize_cover_data(new_cdata, cpath): from calibre.utils.img import save_cover_data_to return save_cover_data_to(new_cdata, data_fmt=os.path.splitext(cpath)[1][1:]) -def normalize_languages(opf_languages, mi_languages): - ' Preserve original country codes and use 2-letter lang codes where possible ' - from calibre.spell import parse_lang_code - def parse(x): - try: - return parse_lang_code(x) - except ValueError: - return None - opf_languages = filter(None, map(parse, opf_languages)) - cc_map = {c.langcode:c.countrycode for c in opf_languages} - mi_languages = filter(None, map(parse, mi_languages)) - def norm(x): - lc = x.langcode - cc = x.countrycode or cc_map.get(lc, None) - lc = lang_as_iso639_1(lc) or lc - if cc: - lc += '-' + cc - return lc - return list(map(norm, mi_languages)) - -def update_metadata(opf, mi, apply_null=False, update_timestamp=False, force_identifiers=False): - for x in ('guide', 'toc', 'manifest', 'spine'): - setattr(mi, x, None) - if mi.languages: - mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages) - - opf.smart_update(mi, apply_null=apply_null) - if getattr(mi, 'uuid', None): - opf.application_id = mi.uuid - if apply_null or force_identifiers: - opf.set_identifiers(mi.get_identifiers()) - else: - orig = opf.get_identifiers() - orig.update(mi.get_identifiers()) - opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v}) - if update_timestamp and mi.timestamp is not None: - opf.timestamp = mi.timestamp - def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_identifiers=False): stream.seek(0) reader = get_zip_reader(stream, root=os.getcwdu()) - raster_cover = reader.opf.raster_cover - mi = MetaInformation(mi) new_cdata = None - replacements = {} try: new_cdata = mi.cover_data[1] if not new_cdata: raise Exception('no cover') - except: + except Exception: try: - new_cdata = open(mi.cover, 'rb').read() - except: + with lopen(mi.cover, 'rb') as f: + new_cdata = f.read() + except Exception: pass + + opfbytes, ver, raster_cover = set_metadata_opf( + reader.read_bytes(reader.opf_path), mi, + cover_data=new_cdata, apply_null=apply_null, update_timestamp=update_timestamp, force_identifiers=force_identifiers) cpath = None + replacements = {} if new_cdata and raster_cover: try: cpath = posixpath.join(posixpath.dirname(reader.opf_path), @@ -330,15 +292,11 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_ide import traceback traceback.print_exc() - update_metadata(reader.opf, mi, apply_null=apply_null, - update_timestamp=update_timestamp, force_identifiers=force_identifiers) - - newopf = reader.opf.render() if isinstance(reader.archive, LocalZipFile): - reader.archive.safe_replace(reader.container[OPF.MIMETYPE], newopf, + reader.archive.safe_replace(reader.container[OPF.MIMETYPE], opfbytes, extra_replacements=replacements) else: - safe_replace(stream, reader.container[OPF.MIMETYPE], newopf, + safe_replace(stream, reader.container[OPF.MIMETYPE], opfbytes, extra_replacements=replacements) try: if cpath is not None: diff --git a/src/calibre/ebooks/metadata/opf.py b/src/calibre/ebooks/metadata/opf.py index 8854de914e..038e3fef89 100644 --- a/src/calibre/ebooks/metadata/opf.py +++ b/src/calibre/ebooks/metadata/opf.py @@ -7,7 +7,8 @@ from __future__ import (unicode_literals, division, absolute_import, from calibre.ebooks.metadata import parse_opf_version from calibre.ebooks.metadata.opf2 import OPF -from calibre.ebooks.metadata.utils import parse_opf +from calibre.ebooks.metadata.utils import parse_opf, normalize_languages +from calibre.ebooks.metadata import MetaInformation class DummyFile(object): @@ -24,3 +25,33 @@ def get_metadata(stream): ver = parse_opf_version(root.get('version')) opf = OPF(None, preparsed_opf=root, read_toc=False) return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item() + +def set_metadata_opf2(root, mi, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False): + mi = MetaInformation(mi) + for x in ('guide', 'toc', 'manifest', 'spine'): + setattr(mi, x, None) + opf = OPF(None, preparsed_opf=root, read_toc=False) + if mi.languages: + mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages) + + opf.smart_update(mi, apply_null=apply_null) + if getattr(mi, 'uuid', None): + opf.application_id = mi.uuid + if apply_null or force_identifiers: + opf.set_identifiers(mi.get_identifiers()) + else: + orig = opf.get_identifiers() + orig.update(mi.get_identifiers()) + opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v}) + if update_timestamp and mi.timestamp is not None: + opf.timestamp = mi.timestamp + return opf.render(), opf.raster_cover + +def set_metadata(stream, mi, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False): + if isinstance(stream, bytes): + stream = DummyFile(stream) + root = parse_opf(stream) + ver = parse_opf_version(root.get('version')) + opfbytes, raster_cover = set_metadata_opf2( + root, mi, cover_data=cover_data, apply_null=apply_null, update_timestamp=update_timestamp, force_identifiers=force_identifiers) + return opfbytes, ver, raster_cover diff --git a/src/calibre/ebooks/metadata/utils.py b/src/calibre/ebooks/metadata/utils.py index 7c2d9d1c3d..238b9088d2 100644 --- a/src/calibre/ebooks/metadata/utils.py +++ b/src/calibre/ebooks/metadata/utils.py @@ -4,9 +4,13 @@ from __future__ import (unicode_literals, division, absolute_import, print_function) +from future_builtins import map + +from lxml import etree from calibre.ebooks.chardet import xml_to_unicode -from lxml import etree +from calibre.spell import parse_lang_code +from calibre.utils.localization import lang_as_iso639_1 PARSER = etree.XMLParser(recover=True, no_network=True) @@ -25,3 +29,22 @@ def parse_opf(stream_or_path): return root +def normalize_languages(opf_languages, mi_languages): + ' Preserve original country codes and use 2-letter lang codes where possible ' + def parse(x): + try: + return parse_lang_code(x) + except ValueError: + return None + opf_languages = filter(None, map(parse, opf_languages)) + cc_map = {c.langcode:c.countrycode for c in opf_languages} + mi_languages = filter(None, map(parse, mi_languages)) + def norm(x): + lc = x.langcode + cc = x.countrycode or cc_map.get(lc, None) + lc = lang_as_iso639_1(lc) or lc + if cc: + lc += '-' + cc + return lc + return list(map(norm, mi_languages)) +