diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index fdafd2e08b..39c77eace5 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -452,6 +452,13 @@ class MobiWriter(object): code = EXTH_CODES[term] for item in oeb.metadata[term]: data = self.COLLAPSE_RE.sub(' ', unicode(item)) + if term == 'identifier': + if data.lower().startswith('urn:isbn:'): + data = data[9:] + elif item.get('scheme', '').lower() == 'isbn': + pass + else: + continue data = data.encode('utf-8') exth.write(pack('>II', code, len(data) + 8)) exth.write(data) @@ -468,7 +475,7 @@ class MobiWriter(object): nrecs += 3 exth = exth.getvalue() trail = len(exth) % 4 - pad = '' if not trail else '\0' * (4 - trail) + pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad] return ''.join(exth) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 7f90183324..3336391a38 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -21,6 +21,7 @@ from lxml import etree from lxml import html from calibre import LoggingInterface from calibre.translations.dynamic import translate +from calibre.startup import get_lang XML_PARSER = etree.XMLParser(recover=True) XML_NS = 'http://www.w3.org/XML/1998/namespace' @@ -30,6 +31,7 @@ OPF2_NS = 'http://www.idpf.org/2007/opf' DC09_NS = 'http://purl.org/metadata/dublin_core' DC10_NS = 'http://purl.org/dc/elements/1.0/' DC11_NS = 'http://purl.org/dc/elements/1.1/' +DC_NSES = set([DC09_NS, DC10_NS, DC11_NS]) XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance' DCTERMS_NS = 'http://purl.org/dc/terms/' NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/' @@ -194,8 +196,12 @@ class Metadata(object): if term == OPF('meta') and not value: term = self.fq_attrib.pop('name') value = self.fq_attrib.pop('content') - elif term in Metadata.TERMS and not namespace(term): - term = DC(term) + elif barename(term).lower() in Metadata.TERMS and \ + (not namespace(term) or namespace(term) in DC_NSES): + # Anything looking like Dublin Core is coerced + term = DC(barename(term).lower()) + elif namespace(term) == OPF2_NS: + term = barename(term) self.term = term self.value = value self.attrib = attrib = {} @@ -216,7 +222,16 @@ class Metadata(object): raise AttributeError( '%r object has no attribute %r' \ % (self.__class__.__name__, name)) - + + def __getitem__(self, key): + return self.attrib[key] + + def __contains__(self, key): + return key in self.attrib + + def get(self, key, default=None): + return self.attrib.get(key, default) + def __repr__(self): return 'Item(term=%r, value=%r, attrib=%r)' \ % (barename(self.term), self.value, self.attrib) @@ -814,7 +829,7 @@ class OEBBook(object): break if not metadata.language: self.logger.warn(u'Language not specified.') - metadata.add('language', 'en') + metadata.add('language', get_lang()) if not metadata.creator: self.logger.warn(u'Creator not specified.') metadata.add('creator', _('Unknown')) @@ -857,6 +872,8 @@ class OEBBook(object): extras.sort() for item in extras: spine.add(item, False) + if len(spine) == 0: + raise OEBError("Spine is empty") def _guide_from_opf(self, opf): self.guide = guide = Guide(self) @@ -886,8 +903,11 @@ class OEBBook(object): if len(result) != 1: return False id = result[0] - ncx = self.manifest[id].data - self.manifest.remove(id) + if id not in self.manifest.ids: + return False + item = self.manifest.ids[id] + ncx = item.data + self.manifest.remove(item) title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0] self.toc = toc = TOC(title) navmaps = xpath(ncx, 'ncx:navMap')