From caab42221b232334c969e77f0a25fa04e7915f94 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 25 Jun 2009 11:38:40 -0700 Subject: [PATCH] Create clean metadata sections when outputting OEB/EPUB. Also fix #2671 (.6.0b8 - Not transferring author to PRS-505) --- src/calibre/ebooks/conversion/plumber.py | 3 +- src/calibre/ebooks/oeb/reader.py | 71 ++++-------- src/calibre/ebooks/oeb/transforms/metadata.py | 107 +++++++++--------- 3 files changed, 76 insertions(+), 105 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index a53e6050d1..11975094e3 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -616,8 +616,7 @@ OptionRecommendation(name='list_recipes', self.opts.dest = self.opts.output_profile from calibre.ebooks.oeb.transforms.metadata import MergeMetadata - MergeMetadata()(self.oeb, self.user_metadata, - self.opts.prefer_metadata_cover) + MergeMetadata()(self.oeb, self.user_metadata, self.opts) pr(0.2) self.flush() diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 75d92f1815..9a637c1a24 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -6,7 +6,7 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys, os, uuid, copy, re +import sys, os, uuid, copy, re, cStringIO from itertools import izip from urlparse import urldefrag, urlparse from urllib import unquote as urlunquote @@ -22,7 +22,7 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \ PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, \ ENTITY_RE, MS_COVER_TYPE, iterlinks -from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath, \ +from calibre.ebooks.oeb.base import namespace, barename, XPath, xpath, \ urlnormalize, BINARY_MIME, \ OEBError, OEBBook, DirContainer from calibre.ebooks.oeb.writer import OEBWriter @@ -30,6 +30,7 @@ from calibre.ebooks.oeb.entitydefs import ENTITYDEFS from calibre.ebooks.metadata.epub import CoverRenderer from calibre.startup import get_lang from calibre.ptempfile import TemporaryDirectory +from calibre.constants import __appname__, __version__ __all__ = ['OEBReader'] @@ -123,53 +124,25 @@ class OEBReader(object): return opf def _metadata_from_opf(self, opf): - uid = opf.get('unique-identifier', None) - self.oeb.uid = None - metadata = self.oeb.metadata - for elem in xpath(opf, '/o2:package/o2:metadata//*'): - term = elem.tag - value = elem.text - attrib = dict(elem.attrib) - nsmap = elem.nsmap - if term == OPF('meta'): - term = qname(attrib.pop('name', None), nsmap) - value = attrib.pop('content', None) - if value: - value = COLLAPSE_RE.sub(' ', value.strip()) - if term and (value or attrib): - metadata.add(term, value, attrib, nsmap=nsmap) - haveuuid = haveid = False - for ident in metadata.identifier: - if unicode(ident).startswith('urn:uuid:'): - haveuuid = True - if 'id' in ident.attrib: - haveid = True - if not (haveuuid and haveid): - bookid = "urn:uuid:%s" % str(uuid.uuid4()) - metadata.add('identifier', bookid, id='calibre-uuid') - if uid is None: - self.logger.warn(u'Unique-identifier not specified') - for item in metadata.identifier: - if not item.id: - continue - if uid is None or item.id == uid: - self.oeb.uid = item - break - else: - self.logger.warn(u'Unique-identifier %r not found' % uid) - for ident in metadata.identifier: - if 'id' in ident.attrib: - self.oeb.uid = metadata.identifier[0] - break - if not metadata.language: - self.logger.warn(u'Language not specified') - metadata.add('language', get_lang()) - if not metadata.creator: - self.logger.warn('Creator not specified') - metadata.add('creator', self.oeb.translate(__('Unknown'))) - if not metadata.title: - self.logger.warn('Title not specified') - metadata.add('title', self.oeb.translate(__('Unknown'))) + from calibre.ebooks.metadata.opf2 import OPF + from calibre.ebooks.metadata import MetaInformation + from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata + stream = cStringIO.StringIO(etree.tostring(opf)) + mi = MetaInformation(OPF(stream)) + if not mi.title: + mi.title = self.oeb.translate(__('Unknown')) + if not mi.authors: + mi.authors = [self.oeb.translate(__('Unknown'))] + if not mi.book_producer: + mi.book_producer = '%(a)s (%(v)s) [http://%(a)s.kovidgoyal.net]'%\ + dict(a=__appname__, v=__version__) + if not mi.language: + mi.language = get_lang() + meta_info_to_oeb_metadata(mi, self.oeb.metadata, self.logger) + bookid = "urn:uuid:%s" % str(uuid.uuid4()) if mi.application_id is None \ + else mi.applicaion_id + self.oeb.metadata.add('identifier', bookid, id='calibre-uuid') + self.oeb.uid = self.oeb.metadata.identifier[0] def _manifest_prune_invalid(self): ''' diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py index 894cb4fb08..b2d254f76c 100644 --- a/src/calibre/ebooks/oeb/transforms/metadata.py +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -8,66 +8,65 @@ __docformat__ = 'restructuredtext en' import os +def meta_info_to_oeb_metadata(mi, m, log): + if mi.title: + m.clear('title') + m.add('title', mi.title) + if mi.title_sort: + if not m.title: + m.add('title', mi.title_sort) + m.title[0].file_as = mi.title_sort + if mi.authors: + m.filter('creator', lambda x : x.role.lower() == 'aut') + for a in mi.authors: + attrib = {'role':'aut'} + if mi.author_sort: + attrib['file_as'] = mi.author_sort + m.add('creator', a, attrib=attrib) + if mi.book_producer: + m.filter('contributor', lambda x : x.role.lower() == 'bkp') + m.add('contributor', mi.book_producer, role='bkp') + if mi.comments: + m.clear('description') + m.add('description', mi.comments) + if mi.publisher: + m.clear('publisher') + m.add('publisher', mi.publisher) + if mi.series: + m.clear('series') + m.add('series', mi.series) + if mi.isbn: + has = False + for x in m.identifier: + if x.scheme.lower() == 'isbn': + x.content = mi.isbn + has = True + if not has: + m.add('identifier', mi.isbn, scheme='ISBN') + if mi.language: + m.clear('language') + m.add('language', mi.language) + if mi.series_index is not None: + m.clear('series_index') + m.add('series_index', mi.format_series_index()) + if mi.rating is not None: + m.clear('rating') + m.add('rating', '%.2f'%mi.rating) + if mi.tags: + m.clear('subject') + for t in mi.tags: + m.add('subject', t) + + class MergeMetadata(object): 'Merge in user metadata, including cover' - def __call__(self, oeb, mi, prefer_metadata_cover=False, - prefer_author_sort=False): - from calibre.ebooks.oeb.base import DC + def __call__(self, oeb, mi, opts): self.oeb, self.log = oeb, oeb.log m = self.oeb.metadata + meta_info_to_oeb_metadata(mi, m, oeb.log) self.log('Merging user specified metadata...') - if mi.title: - m.clear('title') - m.add('title', mi.title) - if mi.title_sort: - if not m.title: - m.add(DC('title'), mi.title_sort) - m.title[0].file_as = mi.title_sort - if prefer_author_sort and mi.author_sort: - mi.authors = [mi.author_sort] - if mi.authors: - m.filter('creator', lambda x : x.role.lower() == 'aut') - for a in mi.authors: - attrib = {'role':'aut'} - if mi.author_sort: - attrib['file_as'] = mi.author_sort - m.add('creator', a, attrib=attrib) - if mi.comments: - m.clear('description') - m.add('description', mi.comments) - if mi.publisher: - m.clear('publisher') - m.add('publisher', mi.publisher) - if mi.series: - m.clear('series') - m.add('series', mi.series) - if mi.isbn: - has = False - for x in m.identifier: - if x.scheme.lower() == 'isbn': - x.content = mi.isbn - has = True - if not has: - m.add('identifier', mi.isbn, scheme='ISBN') - if mi.language: - m.clear('language') - m.add('language', mi.language) - if mi.book_producer: - m.filter('creator', lambda x : x.role.lower() == 'bkp') - m.add('creator', mi.book_producer, role='bkp') - if mi.series_index is not None: - m.clear('series_index') - m.add('series_index', mi.format_series_index()) - if mi.rating is not None: - m.clear('rating') - m.add('rating', '%.2f'%mi.rating) - if mi.tags: - m.clear('subject') - for t in mi.tags: - m.add('subject', t) - - cover_id = self.set_cover(mi, prefer_metadata_cover) + cover_id = self.set_cover(mi, opts.prefer_metadata_cover) m.clear('cover') if cover_id is not None: m.add('cover', cover_id)