Refactor setting of EPUB metadata

Move it into the new opf module and also avoid several unnecessary mem
copies
This commit is contained in:
Kovid Goyal 2016-06-16 10:07:23 +05:30
parent e214e3c86c
commit ec2671b809
3 changed files with 68 additions and 56 deletions

View File

@ -8,18 +8,15 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, re, posixpath
from cStringIO import StringIO
from contextlib import closing
from future_builtins import map
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
from calibre.utils.localunzip import LocalZipFile
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf
from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf, set_metadata as set_metadata_opf
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir, walk
from calibre.constants import isosx
from calibre.utils.localization import lang_as_iso639_1
class EPubException(Exception):
pass
@ -263,61 +260,26 @@ def serialize_cover_data(new_cdata, cpath):
from calibre.utils.img import save_cover_data_to
return save_cover_data_to(new_cdata, data_fmt=os.path.splitext(cpath)[1][1:])
def normalize_languages(opf_languages, mi_languages):
' Preserve original country codes and use 2-letter lang codes where possible '
from calibre.spell import parse_lang_code
def parse(x):
try:
return parse_lang_code(x)
except ValueError:
return None
opf_languages = filter(None, map(parse, opf_languages))
cc_map = {c.langcode:c.countrycode for c in opf_languages}
mi_languages = filter(None, map(parse, mi_languages))
def norm(x):
lc = x.langcode
cc = x.countrycode or cc_map.get(lc, None)
lc = lang_as_iso639_1(lc) or lc
if cc:
lc += '-' + cc
return lc
return list(map(norm, mi_languages))
def update_metadata(opf, mi, apply_null=False, update_timestamp=False, force_identifiers=False):
for x in ('guide', 'toc', 'manifest', 'spine'):
setattr(mi, x, None)
if mi.languages:
mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages)
opf.smart_update(mi, apply_null=apply_null)
if getattr(mi, 'uuid', None):
opf.application_id = mi.uuid
if apply_null or force_identifiers:
opf.set_identifiers(mi.get_identifiers())
else:
orig = opf.get_identifiers()
orig.update(mi.get_identifiers())
opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v})
if update_timestamp and mi.timestamp is not None:
opf.timestamp = mi.timestamp
def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_identifiers=False):
stream.seek(0)
reader = get_zip_reader(stream, root=os.getcwdu())
raster_cover = reader.opf.raster_cover
mi = MetaInformation(mi)
new_cdata = None
replacements = {}
try:
new_cdata = mi.cover_data[1]
if not new_cdata:
raise Exception('no cover')
except:
except Exception:
try:
new_cdata = open(mi.cover, 'rb').read()
except:
with lopen(mi.cover, 'rb') as f:
new_cdata = f.read()
except Exception:
pass
opfbytes, ver, raster_cover = set_metadata_opf(
reader.read_bytes(reader.opf_path), mi,
cover_data=new_cdata, apply_null=apply_null, update_timestamp=update_timestamp, force_identifiers=force_identifiers)
cpath = None
replacements = {}
if new_cdata and raster_cover:
try:
cpath = posixpath.join(posixpath.dirname(reader.opf_path),
@ -330,15 +292,11 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False, force_ide
import traceback
traceback.print_exc()
update_metadata(reader.opf, mi, apply_null=apply_null,
update_timestamp=update_timestamp, force_identifiers=force_identifiers)
newopf = reader.opf.render()
if isinstance(reader.archive, LocalZipFile):
reader.archive.safe_replace(reader.container[OPF.MIMETYPE], newopf,
reader.archive.safe_replace(reader.container[OPF.MIMETYPE], opfbytes,
extra_replacements=replacements)
else:
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf,
safe_replace(stream, reader.container[OPF.MIMETYPE], opfbytes,
extra_replacements=replacements)
try:
if cpath is not None:

View File

@ -7,7 +7,8 @@ from __future__ import (unicode_literals, division, absolute_import,
from calibre.ebooks.metadata import parse_opf_version
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata.utils import parse_opf
from calibre.ebooks.metadata.utils import parse_opf, normalize_languages
from calibre.ebooks.metadata import MetaInformation
class DummyFile(object):
@ -24,3 +25,33 @@ def get_metadata(stream):
ver = parse_opf_version(root.get('version'))
opf = OPF(None, preparsed_opf=root, read_toc=False)
return opf.to_book_metadata(), ver, opf.raster_cover, opf.first_spine_item()
def set_metadata_opf2(root, mi, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False):
mi = MetaInformation(mi)
for x in ('guide', 'toc', 'manifest', 'spine'):
setattr(mi, x, None)
opf = OPF(None, preparsed_opf=root, read_toc=False)
if mi.languages:
mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages)
opf.smart_update(mi, apply_null=apply_null)
if getattr(mi, 'uuid', None):
opf.application_id = mi.uuid
if apply_null or force_identifiers:
opf.set_identifiers(mi.get_identifiers())
else:
orig = opf.get_identifiers()
orig.update(mi.get_identifiers())
opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v})
if update_timestamp and mi.timestamp is not None:
opf.timestamp = mi.timestamp
return opf.render(), opf.raster_cover
def set_metadata(stream, mi, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False):
if isinstance(stream, bytes):
stream = DummyFile(stream)
root = parse_opf(stream)
ver = parse_opf_version(root.get('version'))
opfbytes, raster_cover = set_metadata_opf2(
root, mi, cover_data=cover_data, apply_null=apply_null, update_timestamp=update_timestamp, force_identifiers=force_identifiers)
return opfbytes, ver, raster_cover

View File

@ -4,9 +4,13 @@
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from future_builtins import map
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
from lxml import etree
from calibre.spell import parse_lang_code
from calibre.utils.localization import lang_as_iso639_1
PARSER = etree.XMLParser(recover=True, no_network=True)
@ -25,3 +29,22 @@ def parse_opf(stream_or_path):
return root
def normalize_languages(opf_languages, mi_languages):
' Preserve original country codes and use 2-letter lang codes where possible '
def parse(x):
try:
return parse_lang_code(x)
except ValueError:
return None
opf_languages = filter(None, map(parse, opf_languages))
cc_map = {c.langcode:c.countrycode for c in opf_languages}
mi_languages = filter(None, map(parse, mi_languages))
def norm(x):
lc = x.langcode
cc = x.countrycode or cc_map.get(lc, None)
lc = lang_as_iso639_1(lc) or lc
if cc:
lc += '-' + cc
return lc
return list(map(norm, mi_languages))