diff --git a/src/calibre/ebooks/metadata/opf.py b/src/calibre/ebooks/metadata/opf.py deleted file mode 100644 index 9f1d12d6d1..0000000000 --- a/src/calibre/ebooks/metadata/opf.py +++ /dev/null @@ -1,538 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' -'''Read/Write metadata from Open Packaging Format (.opf) files.''' - -import re, os -import uuid -from urllib import unquote, quote - -from calibre.constants import __appname__, __version__ -from calibre.ebooks.metadata import MetaInformation, string_to_authors -from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup -from calibre.ebooks.lrf import entity_to_unicode -from calibre.ebooks.metadata import Resource, ResourceCollection -from calibre.ebooks.metadata.toc import TOC - -class OPFSoup(BeautifulStoneSoup): - - def __init__(self, raw): - BeautifulStoneSoup.__init__(self, raw, - convertEntities=BeautifulSoup.HTML_ENTITIES, - selfClosingTags=['item', 'itemref', 'reference']) - -class ManifestItem(Resource): - - @staticmethod - def from_opf_manifest_item(item, basedir): - if item.has_key('href'): - href = item['href'] - if unquote(href) == href: - try: - href = quote(href) - except KeyError: - pass - res = ManifestItem(href, basedir=basedir, is_path=False) - mt = item.get('media-type', '').strip() - if mt: - res.mime_type = mt - return res - - @dynamic_property - def media_type(self): - def fget(self): - return self.mime_type - def fset(self, val): - self.mime_type = val - return property(fget=fget, fset=fset) - - - def __unicode__(self): - return u''%(self.id, self.href(), self.media_type) - - def __str__(self): - return unicode(self).encode('utf-8') - - def __repr__(self): - return unicode(self) - - - def __getitem__(self, index): - if index == 0: - return self.href() - if index == 1: - return self.media_type - raise IndexError('%d out of bounds.'%index) - - -class Manifest(ResourceCollection): - - @staticmethod - def from_opf_manifest_element(manifest, dir): - m = Manifest() - for item in manifest.findAll(re.compile('item')): - try: - m.append(ManifestItem.from_opf_manifest_item(item, dir)) - id = item.get('id', '') - if not id: - id = 'id%d'%m.next_id - m[-1].id = id - m.next_id += 1 - except ValueError: - continue - return m - - @staticmethod - def from_paths(entries): - ''' - `entries`: List of (path, mime-type) If mime-type is None it is autodetected - ''' - m = Manifest() - for path, mt in entries: - mi = ManifestItem(path, is_path=True) - if mt: - mi.mime_type = mt - mi.id = 'id%d'%m.next_id - m.next_id += 1 - m.append(mi) - return m - - def __init__(self): - ResourceCollection.__init__(self) - self.next_id = 1 - - - def item(self, id): - for i in self: - if i.id == id: - return i - - def id_for_path(self, path): - path = os.path.normpath(os.path.abspath(path)) - for i in self: - if i.path and os.path.normpath(i.path) == path: - return i.id - - def path_for_id(self, id): - for i in self: - if i.id == id: - return i.path - -class Spine(ResourceCollection): - - class Item(Resource): - - def __init__(self, idfunc, *args, **kwargs): - Resource.__init__(self, *args, **kwargs) - self.is_linear = True - self.id = idfunc(self.path) - - @staticmethod - def from_opf_spine_element(spine, manifest): - s = Spine(manifest) - for itemref in spine.findAll(re.compile('itemref')): - if itemref.has_key('idref'): - r = Spine.Item(s.manifest.id_for_path, - s.manifest.path_for_id(itemref['idref']), is_path=True) - r.is_linear = itemref.get('linear', 'yes') == 'yes' - s.append(r) - return s - - @staticmethod - def from_paths(paths, manifest): - s = Spine(manifest) - for path in paths: - try: - s.append(Spine.Item(s.manifest.id_for_path, path, is_path=True)) - except: - continue - return s - - - - def __init__(self, manifest): - ResourceCollection.__init__(self) - self.manifest = manifest - - - def linear_items(self): - for r in self: - if r.is_linear: - yield r.path - - def nonlinear_items(self): - for r in self: - if not r.is_linear: - yield r.path - - def items(self): - for i in self: - yield i.path - - -class Guide(ResourceCollection): - - class Reference(Resource): - - @staticmethod - def from_opf_resource_item(ref, basedir): - title, href, type = ref.get('title', ''), ref['href'], ref['type'] - res = Guide.Reference(href, basedir, is_path=False) - res.title = title - res.type = type - return res - - def __repr__(self): - ans = '' - - - @staticmethod - def from_opf_guide(guide_elem, base_dir=os.getcwdu()): - coll = Guide() - for ref in guide_elem.findAll('reference'): - try: - ref = Guide.Reference.from_opf_resource_item(ref, base_dir) - coll.append(ref) - except: - continue - return coll - - def set_cover(self, path): - map(self.remove, [i for i in self if 'cover' in i.type.lower()]) - for type in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'): - self.append(Guide.Reference(path, is_path=True)) - self[-1].type = type - self[-1].title = '' - - -class standard_field(object): - - def __init__(self, name): - self.name = name - - def __get__(self, obj, typ=None): - return getattr(obj, 'get_'+self.name)() - - -class OPF(MetaInformation): - - MIMETYPE = 'application/oebps-package+xml' - ENTITY_PATTERN = re.compile(r'&(\S+?);') - - uid = standard_field('uid') - application_id = standard_field('application_id') - title = standard_field('title') - authors = standard_field('authors') - language = standard_field('language') - title_sort = standard_field('title_sort') - author_sort = standard_field('author_sort') - comments = standard_field('comments') - category = standard_field('category') - publisher = standard_field('publisher') - isbn = standard_field('isbn') - cover = standard_field('cover') - series = standard_field('series') - series_index = standard_field('series_index') - rating = standard_field('rating') - tags = standard_field('tags') - - def __init__(self): - raise NotImplementedError('Abstract base class') - - @dynamic_property - def package(self): - def fget(self): - return self.soup.find(re.compile('package')) - return property(fget=fget) - - @dynamic_property - def metadata(self): - def fget(self): - return self.package.find(re.compile('metadata')) - return property(fget=fget) - - - def get_title(self): - title = self.metadata.find('dc:title') - if title and title.string: - return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip() - return self.default_title.strip() - - def get_authors(self): - creators = self.metadata.findAll('dc:creator') - for elem in creators: - role = elem.get('role') - if not role: - role = elem.get('opf:role') - if not role: - role = 'aut' - if role == 'aut' and elem.string: - raw = self.ENTITY_PATTERN.sub(entity_to_unicode, elem.string) - return string_to_authors(raw) - return [] - - def get_author_sort(self): - creators = self.metadata.findAll('dc:creator') - for elem in creators: - role = elem.get('role') - if not role: - role = elem.get('opf:role') - if role == 'aut': - fa = elem.get('file-as') - return self.ENTITY_PATTERN.sub(entity_to_unicode, fa).strip() if fa else None - return None - - def get_title_sort(self): - title = self.package.find('dc:title') - if title: - if title.has_key('file-as'): - return title['file-as'].strip() - return None - - def get_comments(self): - comments = self.soup.find('dc:description') - if comments and comments.string: - return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string).strip() - return None - - def get_uid(self): - package = self.package - if package.has_key('unique-identifier'): - return package['unique-identifier'] - - def get_category(self): - category = self.soup.find('dc:type') - if category and category.string: - return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string).strip() - return None - - def get_publisher(self): - publisher = self.soup.find('dc:publisher') - if publisher and publisher.string: - return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string).strip() - return None - - def get_isbn(self): - for item in self.metadata.findAll('dc:identifier'): - scheme = item.get('scheme') - if not scheme: - scheme = item.get('opf:scheme') - if scheme is not None and scheme.lower() == 'isbn' and item.string: - return str(item.string).strip() - return None - - def get_language(self): - item = self.metadata.find('dc:language') - if not item: - return _('Unknown') - return ''.join(item.findAll(text=True)).strip() - - def get_application_id(self): - for item in self.metadata.findAll('dc:identifier'): - scheme = item.get('scheme', None) - if scheme is None: - scheme = item.get('opf:scheme', None) - if scheme in ['libprs500', 'calibre']: - return str(item.string).strip() - return None - - def get_cover(self): - guide = getattr(self, 'guide', []) - if not guide: - guide = [] - references = [ref for ref in guide if 'cover' in ref.type.lower()] - for candidate in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'): - matches = [r for r in references if r.type.lower() == candidate and r.path] - if matches: - return matches[0].path - - def possible_cover_prefixes(self): - isbn, ans = [], [] - for item in self.metadata.findAll('dc:identifier'): - scheme = item.get('scheme') - if not scheme: - scheme = item.get('opf:scheme') - isbn.append((scheme, item.string)) - for item in isbn: - ans.append(item[1].replace('-', '')) - return ans - - def get_series(self): - s = self.metadata.find('series') - if s is not None: - return str(s.string).strip() - return None - - def get_series_index(self): - s = self.metadata.find('series-index') - if s and s.string: - try: - return float(str(s.string).strip()) - except: - return None - return None - - def get_rating(self): - s = self.metadata.find('rating') - if s and s.string: - try: - return int(str(s.string).strip()) - except: - return None - return None - - def get_tags(self): - ans = [] - subs = self.soup.findAll('dc:subject') - for sub in subs: - val = sub.string - if val: - ans.append(val) - return [unicode(a).strip() for a in ans] - - -class OPFReader(OPF): - - def __init__(self, stream, dir=os.getcwdu()): - manage = False - if not hasattr(stream, 'read'): - manage = True - dir = os.path.dirname(stream) - stream = open(stream, 'rb') - self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown' - if hasattr(stream, 'seek'): - stream.seek(0) - self.soup = OPFSoup(stream.read()) - if manage: - stream.close() - self.manifest = Manifest() - m = self.soup.find(re.compile('manifest')) - if m is not None: - self.manifest = Manifest.from_opf_manifest_element(m, dir) - self.spine = None - spine = self.soup.find(re.compile('spine')) - if spine is not None: - self.spine = Spine.from_opf_spine_element(spine, self.manifest) - - self.toc = TOC(base_path=dir) - self.toc.read_from_opf(self) - guide = self.soup.find(re.compile('guide')) - if guide is not None: - self.guide = Guide.from_opf_guide(guide, dir) - self.base_dir = dir - self.cover_data = (None, None) - - -class OPFCreator(MetaInformation): - - def __init__(self, base_path, *args, **kwargs): - ''' - Initialize. - @param base_path: An absolute path to the directory in which this OPF file - will eventually be. This is used by the L{create_manifest} method - to convert paths to files into relative paths. - ''' - MetaInformation.__init__(self, *args, **kwargs) - self.base_path = os.path.abspath(base_path) - if self.application_id is None: - self.application_id = str(uuid.uuid4()) - if not isinstance(self.toc, TOC): - self.toc = None - if not self.authors: - self.authors = [_('Unknown')] - if self.guide is None: - self.guide = Guide() - if self.cover: - self.guide.set_cover(self.cover) - - - def create_manifest(self, entries): - ''' - Create - - `entries`: List of (path, mime-type) If mime-type is None it is autodetected - ''' - entries = map(lambda x: x if os.path.isabs(x[0]) else - (os.path.abspath(os.path.join(self.base_path, x[0])), x[1]), - entries) - self.manifest = Manifest.from_paths(entries) - self.manifest.set_basedir(self.base_path) - - def create_manifest_from_files_in(self, files_and_dirs): - entries = [] - - def dodir(dir): - for spec in os.walk(dir): - root, files = spec[0], spec[-1] - for name in files: - path = os.path.join(root, name) - if os.path.isfile(path): - entries.append((path, None)) - - for i in files_and_dirs: - if os.path.isdir(i): - dodir(i) - else: - entries.append((i, None)) - - self.create_manifest(entries) - - def create_spine(self, entries): - ''' - Create the element. Must first call :method:`create_manifest`. - - `entries`: List of paths - ''' - entries = map(lambda x: x if os.path.isabs(x) else - os.path.abspath(os.path.join(self.base_path, x)), entries) - self.spine = Spine.from_paths(entries, self.manifest) - - def set_toc(self, toc): - ''' - Set the toc. You must call :method:`create_spine` before calling this - method. - - :param toc: A :class:`TOC` object - ''' - self.toc = toc - - def create_guide(self, guide_element): - self.guide = Guide.from_opf_guide(guide_element, self.base_path) - self.guide.set_basedir(self.base_path) - - def render(self, opf_stream, ncx_stream=None, ncx_manifest_entry=None): - from calibre.utils.genshi.template import MarkupTemplate - opf_template = open(P('templates/opf.xml'), 'rb').read() - template = MarkupTemplate(opf_template) - if self.manifest: - self.manifest.set_basedir(self.base_path) - if ncx_manifest_entry is not None: - if not os.path.isabs(ncx_manifest_entry): - ncx_manifest_entry = os.path.join(self.base_path, ncx_manifest_entry) - remove = [i for i in self.manifest if i.id == 'ncx'] - for item in remove: - self.manifest.remove(item) - self.manifest.append(ManifestItem(ncx_manifest_entry, self.base_path)) - self.manifest[-1].id = 'ncx' - self.manifest[-1].mime_type = 'application/x-dtbncx+xml' - if not self.guide: - self.guide = Guide() - if self.cover: - cover = self.cover - if not os.path.isabs(cover): - cover = os.path.abspath(os.path.join(self.base_path, cover)) - self.guide.set_cover(cover) - self.guide.set_basedir(self.base_path) - - opf = template.generate(__appname__=__appname__, mi=self, __version__=__version__).render('xml') - if not opf.startswith('\n'+opf - opf_stream.write(opf) - opf_stream.flush() - toc = getattr(self, 'toc', None) - if toc is not None and ncx_stream is not None: - toc.render(ncx_stream, self.application_id) - ncx_stream.flush() -