diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index b61ed3fad1..08ced86af9 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -8,10 +8,196 @@ lxml based OPF parser.
'''
import sys, unittest, functools, os
+from urllib import unquote, quote
from lxml import etree
from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata import Resource, ResourceCollection
+
+class ManifestItem(Resource):
+
+ @staticmethod
+ def from_opf_manifest_item(item, basedir):
+ href = item.get('href', None)
+ if href:
+ if unquote(href) == href:
+ href = quote(href)
+ res = ManifestItem(href, basedir=basedir, is_path=False)
+ mt = item.get('media-type', '').strip()
+ if mt:
+ res.mime_type = mt
+ return res
+
+ @apply
+ def media_type():
+ def fget(self):
+ return self.mime_type
+ def fset(self, val):
+ self.mime_type = val
+ return property(fget=fget, fset=fset)
+
+
+ def __unicode__(self):
+ return u' '%(self.id, self.href(), self.media_type)
+
+ def __str__(self):
+ return unicode(self).encode('utf-8')
+
+ def __repr__(self):
+ return unicode(self)
+
+
+ def __getitem__(self, index):
+ if index == 0:
+ return self.href()
+ if index == 1:
+ return self.media_type
+ raise IndexError('%d out of bounds.'%index)
+
+
+class Manifest(ResourceCollection):
+
+ @staticmethod
+ def from_opf_manifest_element(items, dir):
+ m = Manifest()
+ for item in items:
+ try:
+ m.append(ManifestItem.from_opf_manifest_item(item, dir))
+ id = item.get('id', '')
+ if not id:
+ id = 'id%d'%m.next_id
+ m[-1].id = id
+ m.next_id += 1
+ except ValueError:
+ continue
+ return m
+
+ @staticmethod
+ def from_paths(entries):
+ '''
+ `entries`: List of (path, mime-type) If mime-type is None it is autodetected
+ '''
+ m = Manifest()
+ for path, mt in entries:
+ mi = ManifestItem(path, is_path=True)
+ if mt:
+ mi.mime_type = mt
+ mi.id = 'id%d'%m.next_id
+ m.next_id += 1
+ m.append(mi)
+ return m
+
+ def __init__(self):
+ ResourceCollection.__init__(self)
+ self.next_id = 1
+
+
+ def item(self, id):
+ for i in self:
+ if i.id == id:
+ return i
+
+ def id_for_path(self, path):
+ path = os.path.normpath(os.path.abspath(path))
+ for i in self:
+ if i.path and os.path.normpath(i.path) == path:
+ return i.id
+
+ def path_for_id(self, id):
+ for i in self:
+ if i.id == id:
+ return i.path
+
+class Spine(ResourceCollection):
+
+ class Item(Resource):
+
+ def __init__(self, idfunc, *args, **kwargs):
+ Resource.__init__(self, *args, **kwargs)
+ self.is_linear = True
+ self.id = idfunc(self.path)
+
+ @staticmethod
+ def from_opf_spine_element(itemrefs, manifest):
+ s = Spine(manifest)
+ for itemref in itemrefs:
+ idref = itemref.get('idref', None)
+ if idref is not None:
+ r = Spine.Item(s.manifest.id_for_path,
+ s.manifest.path_for_id(idref), is_path=True)
+ r.is_linear = itemref.get('linear', 'yes') == 'yes'
+ s.append(r)
+ return s
+
+ @staticmethod
+ def from_paths(paths, manifest):
+ s = Spine(manifest)
+ for path in paths:
+ try:
+ s.append(Spine.Item(s.manifest.id_for_path, path, is_path=True))
+ except:
+ continue
+ return s
+
+
+
+ def __init__(self, manifest):
+ ResourceCollection.__init__(self)
+ self.manifest = manifest
+
+
+ def linear_items(self):
+ for r in self:
+ if r.is_linear:
+ yield r.path
+
+ def nonlinear_items(self):
+ for r in self:
+ if not r.is_linear:
+ yield r.path
+
+ def items(self):
+ for i in self:
+ yield i.path
+
+class Guide(ResourceCollection):
+
+ class Reference(Resource):
+
+ @staticmethod
+ def from_opf_resource_item(ref, basedir):
+ title, href, type = ref.get('title', ''), ref.get('href'), ref.get('type')
+ res = Guide.Reference(href, basedir, is_path=False)
+ res.title = title
+ res.type = type
+ return res
+
+ def __repr__(self):
+ ans = ''
+
+
+ @staticmethod
+ def from_opf_guide(references, base_dir=os.getcwdu()):
+ coll = Guide()
+ for ref in references:
+ try:
+ ref = Guide.Reference.from_opf_resource_item(ref, base_dir)
+ coll.append(ref)
+ except:
+ continue
+ return coll
+
+ def set_cover(self, path):
+ map(self.remove, [i for i in self if 'cover' in i.type.lower()])
+ for type in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
+ self.append(Guide.Reference(path, is_path=True))
+ self[-1].type = type
+ self[-1].title = ''
+
class MetadataField(object):
@@ -61,7 +247,10 @@ class OPF(object):
'[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut")]')
tags_path = XPath('/opf:package/opf:metadata/*[re:match(name(), "subject", "i")]')
isbn_path = XPath('/opf:package/opf:metadata/*[re:match(name(), "identifier", "i") and '+
- '(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
+ '(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
+ manifest_path = XPath('/opf:package/*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
+ spine_path = XPath('/opf:package/*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
+ guide_path = XPath('/opf:package/*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
title = MetadataField('title')
publisher = MetadataField('publisher')
@@ -73,8 +262,8 @@ class OPF(object):
rating = MetadataField('rating', is_dc=False, formatter=int)
- def __init__(self, stream, basedir):
- self.basedir = basedir
+ def __init__(self, stream, basedir=os.getcwdu()):
+ self.basedir = self.base_dir = basedir
raw, self.encoding = xml_to_unicode(stream.read(), strip_encoding_pats=True, resolve_entities=True)
self.tree = etree.fromstring(raw, self.PARSER)
@@ -82,6 +271,19 @@ class OPF(object):
if not self.metadata:
raise ValueError('Malformed OPF file: No element')
self.metadata = self.metadata[0]
+ self.manifest = Manifest()
+ m = self.manifest_path(self.tree)
+ if m:
+ self.manifest = Manifest.from_opf_manifest_element(m, basedir)
+ self.spine = None
+ s = self.spine_path(self.tree)
+ if s:
+ self.spine = Spine.from_opf_spine_element(s, self.manifest)
+ self.guide = None
+ guide = self.guide_path(self.tree)
+ if guide:
+ self.guide = Guide.from_opf_guide(guide, basedir)
+ self.cover_data = (None, None)
def get_text(self, elem):
return u''.join(self.TEXT(elem))
diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py
index 2fb206af8b..2cf1f50630 100644
--- a/src/calibre/parallel.py
+++ b/src/calibre/parallel.py
@@ -24,7 +24,7 @@ In the second mode, the controller can also send the worker STOP messages, in wh
the worker interrupts the job and dies. The sending of progress and console output messages
is buffered and asynchronous to prevent the job from being IO bound.
'''
-import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \
+import sys, os, gc, cPickle, traceback, cStringIO, time, signal, \
subprocess, socket, collections, binascii, re, thread, tempfile, atexit
from select import select
from threading import RLock, Thread, Event
@@ -919,9 +919,9 @@ def worker(host, port):
tb = unicode(traceback.format_exc(), 'utf-8', 'replace')
msg = 'ERROR:'+cPickle.dumps((exception, tb),-1)
write(client_socket, msg)
- res = read(client_socket, 10)
- if res != 'OK':
- break
+ res = read(client_socket, 10)
+ if res != 'OK':
+ break
gc.collect()
elif msg == 'PING:':
write(client_socket, 'OK')