From 52d968838cacc556659ecea3cabac6a77f6bac26 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 25 Mar 2009 15:13:30 -0700 Subject: [PATCH] MOBI Output: Fix Time recipe --> MOBI --- src/calibre/ebooks/oeb/base.py | 204 +++++++++++++++++---------------- 1 file changed, 104 insertions(+), 100 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 8e0fa3fbb2..940b124d88 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -282,17 +282,17 @@ class Metadata(object): OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS} OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS, 'xsi': XSI_NS, 'calibre': CALIBRE_NS} - + class Item(object): - + class Attribute(object): - + def __init__(self, attr, allowed=None): if not callable(attr): attr_, attr = attr, lambda term: attr_ self.attr = attr self.allowed = allowed - + def term_attr(self, obj): term = obj.term if namespace(term) != DC11_NS: @@ -303,14 +303,14 @@ class Metadata(object): 'attribute %r not valid for metadata term %r' \ % (self.attr(term), barename(obj.term))) return self.attr(term) - + def __get__(self, obj, cls): if obj is None: return None return obj.attrib.get(self.term_attr(obj), '') - + def __set__(self, obj, value): obj.attrib[self.term_attr(obj)] = value - + def __init__(self, term, value, attrib={}, nsmap={}, **kwargs): self.attrib = attrib = dict(attrib) self.nsmap = nsmap = dict(nsmap) @@ -336,8 +336,8 @@ class Metadata(object): nsattr = 'scheme' if attr != nsattr: attrib[nsattr] = attrib.pop(attr) - - scheme = Attribute(lambda term : 'scheme' if term == OPF('meta') else OPF('scheme'), + + scheme = Attribute(lambda term : 'scheme' if term == OPF('meta') else OPF('scheme'), [DC('identifier'), OPF('meta')]) file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor')]) role = Attribute(OPF('role'), [DC('creator'), DC('contributor')]) @@ -349,19 +349,19 @@ class Metadata(object): DC('relation'), DC('rights'), DC('source'), DC('subject'), OPF('meta')]) - + def __getitem__(self, key): return self.attrib[key] - + def __setitem__(self, key, value): self.attrib[key] = value - + def __contains__(self, key): return key in self.attrib - + def get(self, key, default=None): return self.attrib.get(key, default) - + def __repr__(self): return 'Item(term=%r, value=%r, attrib=%r)' \ % (barename(self.term), self.value, self.attrib) @@ -387,7 +387,7 @@ class Metadata(object): elem.attrib['name'] = prefixname(self.term, nsrmap) elem.attrib['content'] = prefixname(self.value, nsrmap) return elem - + def to_opf2(self, parent=None, nsrmap={}): attrib = {} for key, value in self.attrib.items(): @@ -400,7 +400,7 @@ class Metadata(object): elem.attrib['name'] = prefixname(self.term, nsrmap) elem.attrib['content'] = prefixname(self.value, nsrmap) return elem - + def __init__(self, oeb): self.oeb = oeb self.items = defaultdict(list) @@ -434,7 +434,7 @@ class Metadata(object): nsmap.update(item.nsmap) return nsmap return property(fget=fget) - + @apply def _opf1_nsmap(): def fget(self): @@ -444,8 +444,8 @@ class Metadata(object): del nsmap[key] return nsmap return property(fget=fget) - - + + @apply def _opf2_nsmap(): def fget(self): @@ -453,8 +453,8 @@ class Metadata(object): nsmap.update(self.OPF2_NSMAP) return nsmap return property(fget=fget) - - + + def to_opf1(self, parent=None): nsmap = self._opf1_nsmap nsrmap = dict((value, key) for key, value in nsmap.items()) @@ -468,7 +468,7 @@ class Metadata(object): chaptertour = self.Item('ms-chaptertour', 'chaptertour') chaptertour.to_opf1(dcmeta, xmeta, nsrmap=nsrmap) return elem - + def to_opf2(self, parent=None): nsmap = self._opf2_nsmap nsrmap = dict((value, key) for key, value in nsmap.items()) @@ -480,12 +480,12 @@ class Metadata(object): class Manifest(object): - + class Item(object): - + NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)') META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]') - + def __init__(self, oeb, id, href, media_type, fallback=None, loader=str, data=None): self.oeb = oeb @@ -543,7 +543,11 @@ class Manifest(object): elif not namespace(data.tag): data.attrib['xmlns'] = XHTML_NS data = etree.tostring(data, encoding=unicode) - data = etree.fromstring(data) + try: + data = etree.fromstring(data) + except: + data=data.replace(':=', '=').replace(':>', '>') + data = etree.fromstring(data) elif namespace(data.tag) != XHTML_NS: # OEB_DOC_NS, but possibly others ns = namespace(data.tag) @@ -584,7 +588,7 @@ class Manifest(object): 'File %r missing element' % self.href) etree.SubElement(data, XHTML('body')) return data - + @apply def data(): def fget(self): @@ -604,7 +608,7 @@ class Manifest(object): def fdel(self): self._data = None return property(fget, fset, fdel) - + def __str__(self): data = self.data if isinstance(data, etree._Element): @@ -612,13 +616,13 @@ class Manifest(object): if isinstance(data, unicode): return data.encode('utf-8') return str(data) - + def __eq__(self, other): return id(self) == id(other) - + def __ne__(self, other): return not self.__eq__(other) - + def __cmp__(self, other): result = cmp(self.spine_position, other.spine_position) if result != 0: @@ -632,7 +636,7 @@ class Manifest(object): onum = float(omatch.group(2)) if omatch else 0.0 okey = (oref, onum, other.id) return cmp(skey, okey) - + def relhref(self, href): if urlparse(href).scheme: return href @@ -663,7 +667,7 @@ class Manifest(object): href = os.path.join(dirname, href) href = os.path.normpath(href).replace('\\', '/') return href - + def __init__(self, oeb): self.oeb = oeb self.ids = {} @@ -714,7 +718,7 @@ class Manifest(object): def items(self): for id, item in self.ids.items(): yield id, item - + def __contains__(self, key): return key in self.ids @@ -732,7 +736,7 @@ class Manifest(object): attrib['fallback'] = item.fallback element(elem, 'item', attrib=attrib) return elem - + def to_opf2(self, parent=None): elem = element(parent, OPF('manifest')) for item in self.ids.values(): @@ -750,7 +754,7 @@ class Manifest(object): class Spine(object): - + def __init__(self, oeb): self.oeb = oeb self.items = [] @@ -763,13 +767,13 @@ class Spine(object): elif linear in ('no', 'false'): linear = False return linear - + def add(self, item, linear=None): item.linear = self._linear(linear) item.spine_position = len(self.items) self.items.append(item) return item - + def insert(self, index, item, linear): item.linear = self._linear(linear) item.spine_position = index @@ -777,14 +781,14 @@ class Spine(object): for i in xrange(index, len(self.items)): self.items[i].spine_position = i return item - + def remove(self, item): index = item.spine_position self.items.pop(index) for i in xrange(index, len(self.items)): self.items[i].spine_position = i item.spine_position = None - + def __iter__(self): for item in self.items: yield item @@ -816,9 +820,9 @@ class Spine(object): class Guide(object): - + class Reference(object): - + _TYPES_TITLES = [('cover', __('Cover')), ('title-page', __('Title Page')), ('toc', __('Table of Contents')), @@ -839,7 +843,7 @@ class Guide(object): TYPES = set(t for t, _ in _TYPES_TITLES) TITLES = dict(_TYPES_TITLES) ORDER = dict((t, i) for (t, _), i in izip(_TYPES_TITLES, count(0))) - + def __init__(self, oeb, type, title, href): self.oeb = oeb if type.lower() in self.TYPES: @@ -852,22 +856,22 @@ class Guide(object): self.type = type self.title = title self.href = urlnormalize(href) - + def __repr__(self): return 'Reference(type=%r, title=%r, href=%r)' \ % (self.type, self.title, self.href) - + @apply def _order(): def fget(self): return self.ORDER.get(self.type, self.type) return property(fget=fget) - + def __cmp__(self, other): if not isinstance(other, Guide.Reference): return NotImplemented return cmp(self._order, other._order) - + @apply def item(): def fget(self): @@ -875,40 +879,40 @@ class Guide(object): hrefs = self.oeb.manifest.hrefs return hrefs.get(path, None) return property(fget=fget) - + def __init__(self, oeb): self.oeb = oeb self.refs = {} - + def add(self, type, title, href): ref = self.Reference(self.oeb, type, title, href) self.refs[type] = ref return ref - + def iterkeys(self): for type in self.refs: yield type __iter__ = iterkeys - + def values(self): return sorted(self.refs.values()) - + def items(self): for type, ref in self.refs.items(): yield type, ref - + def __getitem__(self, key): return self.refs[key] - + def __delitem__(self, key): del self.refs[key] - + def __contains__(self, key): return key in self.refs - + def __len__(self): return len(self.refs) - + def to_opf1(self, parent=None): elem = element(parent, 'guide') for ref in self.refs.values(): @@ -917,7 +921,7 @@ class Guide(object): attrib['title'] = ref.title element(elem, 'reference', attrib=attrib) return elem - + def to_opf2(self, parent=None): elem = element(parent, OPF('guide')) for ref in self.refs.values(): @@ -936,7 +940,7 @@ class TOC(object): self.klass = klass self.id = id self.nodes = [] - + def add(self, title, href, klass=None, id=None): node = TOC(title, href, klass, id) self.nodes.append(node) @@ -947,11 +951,11 @@ class TOC(object): yield node for child in node.iterdescendants(): yield child - + def __iter__(self): for node in self.nodes: yield node - + def __getitem__(self, index): return self.nodes[index] @@ -963,7 +967,7 @@ class TOC(object): prev.nodes.append(node) else: prev = node - + def depth(self, level=0): if self.nodes: return self.nodes[0].depth(level+1) @@ -975,7 +979,7 @@ class TOC(object): 'title': node.title, 'href': node.href}) node.to_opf1(tour) return tour - + def to_ncx(self, parent, depth=1): for node in self.nodes: id = node.id or unicode(uuid.uuid4()) @@ -992,7 +996,7 @@ class TOC(object): class PageList(object): - + class Page(object): def __init__(self, name, href, type='normal', klass=None, id=None): self.name = name @@ -1000,10 +1004,10 @@ class PageList(object): self.type = type self.id = id self.klass = klass - + def __init__(self): self.pages = [] - + def add(self, name, href, type='normal', klass=None, id=None): page = self.Page(name, href, type, klass, id) self.pages.append(page) @@ -1011,14 +1015,14 @@ class PageList(object): def __len__(self): return len(self.pages) - + def __iter__(self): for page in self.pages: yield page - + def __getitem__(self, index): return self.pages[index] - + def to_ncx(self, parent=None): plist = element(parent, NCX('pageList'), id=str(uuid.uuid4())) values = dict((t, count(1)) for t in ('front', 'normal', 'special')) @@ -1034,7 +1038,7 @@ class PageList(object): element(label, NCX('text')).text = page.name element(ptarget, NCX('content'), src=page.href) return plist - + def to_page_map(self): pmap = etree.Element(OPF('page-map'), nsmap={None: OPF2_NS}) for page in self.pages: @@ -1043,7 +1047,7 @@ class PageList(object): class OEBBook(object): - + COVER_SVG_XP = XPath('h:body//svg:svg[position() = 1]') COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]') @@ -1058,7 +1062,7 @@ class OEBBook(object): if opfpath or container: opf = self._read_opf(opfpath) self._all_from_opf(opf) - + def _clean_opf(self, opf): nsmap = {} for elem in opf.iter(tag=etree.Element): @@ -1085,7 +1089,7 @@ class OEBBook(object): for element in xpath(opf, tag): nroot.append(element) return nroot - + def _read_opf(self, opfpath): data = self.container.read(opfpath) data = self.decode(data) @@ -1102,7 +1106,7 @@ class OEBBook(object): raise OEBError('Invalid namespace %r for OPF document' % ns) opf = self._clean_opf(opf) return opf - + def _metadata_from_opf(self, opf): uid = opf.get('unique-identifier', None) self.uid = None @@ -1190,7 +1194,7 @@ class OEBBook(object): media_type = guessed or BINARY_MIME added = manifest.add(id, href, media_type) unchecked.add(added) - + def _manifest_from_opf(self, opf): self.manifest = manifest = Manifest(self) for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): @@ -1214,7 +1218,7 @@ class OEBBook(object): id, href = manifest.generate(id, href) manifest.add(id, href, media_type, fallback) self._manifest_add_missing() - + def _spine_add_extra(self): manifest = self.manifest spine = self.spine @@ -1247,7 +1251,7 @@ class OEBBook(object): self.logger.warn( 'Spine-referenced file %r not in spine' % item.href) spine.add(item, linear=False) - + def _spine_from_opf(self, opf): self.spine = spine = Spine(self) for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'): @@ -1260,7 +1264,7 @@ class OEBBook(object): if len(spine) == 0: raise OEBError("Spine is empty") self._spine_add_extra() - + def _guide_from_opf(self, opf): self.guide = guide = Guide(self) for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'): @@ -1270,7 +1274,7 @@ class OEBBook(object): self.logger.warn(u'Guide reference %r not found' % href) continue guide.add(elem.get('type'), elem.get('title'), href) - + def _find_ncx(self, opf): result = xpath(opf, '/o2:package/o2:spine/@toc') if result: @@ -1283,9 +1287,9 @@ class OEBBook(object): for item in self.manifest.values(): if item.media_type == NCX_MIME: self.manifest.remove(item) - return item + return item return None - + def _toc_from_navpoint(self, item, toc, navpoint): children = xpath(navpoint, 'ncx:navPoint') for child in children: @@ -1303,7 +1307,7 @@ class OEBBook(object): klass = child.get('class') node = toc.add(title, href, id=id, klass=klass) self._toc_from_navpoint(item, node, child) - + def _toc_from_ncx(self, item): if item is None: return False @@ -1316,7 +1320,7 @@ class OEBBook(object): for navmap in navmaps: self._toc_from_navpoint(item, toc, navmap) return True - + def _toc_from_tour(self, opf): result = xpath(opf, 'o2:tours/o2:tour') if not result: @@ -1332,11 +1336,11 @@ class OEBBook(object): path, _ = urldefrag(urlnormalize(href)) if path not in self.manifest.hrefs: self.logger.warn('TOC reference %r not found' % href) - continue + continue id = site.get('id') toc.add(title, href, id=id) return True - + def _toc_from_html(self, opf): if 'toc' not in self.guide: return False @@ -1368,7 +1372,7 @@ class OEBBook(object): for href in order: toc.add(' '.join(titles[href]), href) return True - + def _toc_from_spine(self, opf): self.toc = toc = TOC() titles = [] @@ -1395,14 +1399,14 @@ class OEBBook(object): if not item.linear: continue toc.add(title, item.href) return True - + def _toc_from_opf(self, opf, item): if self._toc_from_ncx(item): return if self._toc_from_tour(opf): return self.logger.warn('No metadata table of contents found') if self._toc_from_html(opf): return self._toc_from_spine(opf) - + def _pages_from_ncx(self, opf, item): if item is None: return False @@ -1423,7 +1427,7 @@ class OEBBook(object): klass = ptarget.get('class') pages.add(name, href, type=type, id=id, klass=klass) return True - + def _find_page_map(self, opf): result = xpath(opf, '/o2:package/o2:spine/@page-map') if result: @@ -1438,7 +1442,7 @@ class OEBBook(object): self.manifest.remove(item) return item return None - + def _pages_from_page_map(self, opf): item = self._find_page_map(opf) if item is None: @@ -1459,13 +1463,13 @@ class OEBBook(object): type = 'front' pages.add(name, href, type=type) return True - + def _pages_from_opf(self, opf, item): if self._pages_from_ncx(opf, item): return if self._pages_from_page_map(opf): return self.pages = PageList() return - + def _cover_from_html(self, hcover): with TemporaryDirectory('_html_cover') as tdir: writer = DirWriter() @@ -1476,7 +1480,7 @@ class OEBBook(object): id, href = self.manifest.generate('cover', 'cover.jpeg') item = self.manifest.add(id, href, JPEG_MIME, data=data) return item - + def _locate_cover_image(self): if self.metadata.cover: id = str(self.metadata.cover[0]) @@ -1513,14 +1517,14 @@ class OEBBook(object): if item is not None and item.media_type in OEB_IMAGES: return item return self._cover_from_html(hcover) - + def _ensure_cover_image(self): cover = self._locate_cover_image() if self.metadata.cover: self.metadata.cover[0].value = cover.id return self.metadata.add('cover', cover.id) - + def _all_from_opf(self, opf): self.version = opf.get('version', '1.2') self._metadata_from_opf(opf) @@ -1531,12 +1535,12 @@ class OEBBook(object): self._toc_from_opf(opf, item) self._pages_from_opf(opf, item) self._ensure_cover_image() - + def translate(self, text): lang = str(self.metadata.language[0]) lang = lang.split('-', 1)[0].lower() return translate(lang, text) - + def decode(self, data): if isinstance(data, unicode): return data @@ -1558,7 +1562,7 @@ class OEBBook(object): data = data.replace('\r\n', '\n') data = data.replace('\r', '\n') return data - + def to_opf1(self): package = etree.Element('package', attrib={'unique-identifier': self.uid.id}) @@ -1600,7 +1604,7 @@ class OEBBook(object): order = playorder.get(href, 0) elem.attrib['playOrder'] = str(order) return - + def _to_ncx(self): lang = unicode(self.metadata.language[0]) ncx = etree.Element(NCX('ncx'), @@ -1629,7 +1633,7 @@ class OEBBook(object): maxpnum.attrib['content'] = str(value) self._update_playorder(ncx) return ncx - + def to_opf2(self, page_map=False): results = {} package = etree.Element(OPF('package'),