mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove old OPF classes
This commit is contained in:
parent
73753b67d8
commit
c96e77fe4c
@ -1,538 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''Read/Write metadata from Open Packaging Format (.opf) files.'''
|
||||
|
||||
import re, os
|
||||
import uuid
|
||||
from urllib import unquote, quote
|
||||
|
||||
from calibre.constants import __appname__, __version__
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
|
||||
from calibre.ebooks.lrf import entity_to_unicode
|
||||
from calibre.ebooks.metadata import Resource, ResourceCollection
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
|
||||
class OPFSoup(BeautifulStoneSoup):
|
||||
|
||||
def __init__(self, raw):
|
||||
BeautifulStoneSoup.__init__(self, raw,
|
||||
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
||||
selfClosingTags=['item', 'itemref', 'reference'])
|
||||
|
||||
class ManifestItem(Resource):
|
||||
|
||||
@staticmethod
|
||||
def from_opf_manifest_item(item, basedir):
|
||||
if item.has_key('href'):
|
||||
href = item['href']
|
||||
if unquote(href) == href:
|
||||
try:
|
||||
href = quote(href)
|
||||
except KeyError:
|
||||
pass
|
||||
res = ManifestItem(href, basedir=basedir, is_path=False)
|
||||
mt = item.get('media-type', '').strip()
|
||||
if mt:
|
||||
res.mime_type = mt
|
||||
return res
|
||||
|
||||
@dynamic_property
|
||||
def media_type(self):
|
||||
def fget(self):
|
||||
return self.mime_type
|
||||
def fset(self, val):
|
||||
self.mime_type = val
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
|
||||
|
||||
def __str__(self):
|
||||
return unicode(self).encode('utf-8')
|
||||
|
||||
def __repr__(self):
|
||||
return unicode(self)
|
||||
|
||||
|
||||
def __getitem__(self, index):
|
||||
if index == 0:
|
||||
return self.href()
|
||||
if index == 1:
|
||||
return self.media_type
|
||||
raise IndexError('%d out of bounds.'%index)
|
||||
|
||||
|
||||
class Manifest(ResourceCollection):
|
||||
|
||||
@staticmethod
|
||||
def from_opf_manifest_element(manifest, dir):
|
||||
m = Manifest()
|
||||
for item in manifest.findAll(re.compile('item')):
|
||||
try:
|
||||
m.append(ManifestItem.from_opf_manifest_item(item, dir))
|
||||
id = item.get('id', '')
|
||||
if not id:
|
||||
id = 'id%d'%m.next_id
|
||||
m[-1].id = id
|
||||
m.next_id += 1
|
||||
except ValueError:
|
||||
continue
|
||||
return m
|
||||
|
||||
@staticmethod
|
||||
def from_paths(entries):
|
||||
'''
|
||||
`entries`: List of (path, mime-type) If mime-type is None it is autodetected
|
||||
'''
|
||||
m = Manifest()
|
||||
for path, mt in entries:
|
||||
mi = ManifestItem(path, is_path=True)
|
||||
if mt:
|
||||
mi.mime_type = mt
|
||||
mi.id = 'id%d'%m.next_id
|
||||
m.next_id += 1
|
||||
m.append(mi)
|
||||
return m
|
||||
|
||||
def __init__(self):
|
||||
ResourceCollection.__init__(self)
|
||||
self.next_id = 1
|
||||
|
||||
|
||||
def item(self, id):
|
||||
for i in self:
|
||||
if i.id == id:
|
||||
return i
|
||||
|
||||
def id_for_path(self, path):
|
||||
path = os.path.normpath(os.path.abspath(path))
|
||||
for i in self:
|
||||
if i.path and os.path.normpath(i.path) == path:
|
||||
return i.id
|
||||
|
||||
def path_for_id(self, id):
|
||||
for i in self:
|
||||
if i.id == id:
|
||||
return i.path
|
||||
|
||||
class Spine(ResourceCollection):
|
||||
|
||||
class Item(Resource):
|
||||
|
||||
def __init__(self, idfunc, *args, **kwargs):
|
||||
Resource.__init__(self, *args, **kwargs)
|
||||
self.is_linear = True
|
||||
self.id = idfunc(self.path)
|
||||
|
||||
@staticmethod
|
||||
def from_opf_spine_element(spine, manifest):
|
||||
s = Spine(manifest)
|
||||
for itemref in spine.findAll(re.compile('itemref')):
|
||||
if itemref.has_key('idref'):
|
||||
r = Spine.Item(s.manifest.id_for_path,
|
||||
s.manifest.path_for_id(itemref['idref']), is_path=True)
|
||||
r.is_linear = itemref.get('linear', 'yes') == 'yes'
|
||||
s.append(r)
|
||||
return s
|
||||
|
||||
@staticmethod
|
||||
def from_paths(paths, manifest):
|
||||
s = Spine(manifest)
|
||||
for path in paths:
|
||||
try:
|
||||
s.append(Spine.Item(s.manifest.id_for_path, path, is_path=True))
|
||||
except:
|
||||
continue
|
||||
return s
|
||||
|
||||
|
||||
|
||||
def __init__(self, manifest):
|
||||
ResourceCollection.__init__(self)
|
||||
self.manifest = manifest
|
||||
|
||||
|
||||
def linear_items(self):
|
||||
for r in self:
|
||||
if r.is_linear:
|
||||
yield r.path
|
||||
|
||||
def nonlinear_items(self):
|
||||
for r in self:
|
||||
if not r.is_linear:
|
||||
yield r.path
|
||||
|
||||
def items(self):
|
||||
for i in self:
|
||||
yield i.path
|
||||
|
||||
|
||||
class Guide(ResourceCollection):
|
||||
|
||||
class Reference(Resource):
|
||||
|
||||
@staticmethod
|
||||
def from_opf_resource_item(ref, basedir):
|
||||
title, href, type = ref.get('title', ''), ref['href'], ref['type']
|
||||
res = Guide.Reference(href, basedir, is_path=False)
|
||||
res.title = title
|
||||
res.type = type
|
||||
return res
|
||||
|
||||
def __repr__(self):
|
||||
ans = '<reference type="%s" href="%s" '%(self.type, self.href())
|
||||
if self.title:
|
||||
ans += 'title="%s" '%self.title
|
||||
return ans + '/>'
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_opf_guide(guide_elem, base_dir=os.getcwdu()):
|
||||
coll = Guide()
|
||||
for ref in guide_elem.findAll('reference'):
|
||||
try:
|
||||
ref = Guide.Reference.from_opf_resource_item(ref, base_dir)
|
||||
coll.append(ref)
|
||||
except:
|
||||
continue
|
||||
return coll
|
||||
|
||||
def set_cover(self, path):
|
||||
map(self.remove, [i for i in self if 'cover' in i.type.lower()])
|
||||
for type in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
|
||||
self.append(Guide.Reference(path, is_path=True))
|
||||
self[-1].type = type
|
||||
self[-1].title = ''
|
||||
|
||||
|
||||
class standard_field(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, typ=None):
|
||||
return getattr(obj, 'get_'+self.name)()
|
||||
|
||||
|
||||
class OPF(MetaInformation):
|
||||
|
||||
MIMETYPE = 'application/oebps-package+xml'
|
||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||
|
||||
uid = standard_field('uid')
|
||||
application_id = standard_field('application_id')
|
||||
title = standard_field('title')
|
||||
authors = standard_field('authors')
|
||||
language = standard_field('language')
|
||||
title_sort = standard_field('title_sort')
|
||||
author_sort = standard_field('author_sort')
|
||||
comments = standard_field('comments')
|
||||
category = standard_field('category')
|
||||
publisher = standard_field('publisher')
|
||||
isbn = standard_field('isbn')
|
||||
cover = standard_field('cover')
|
||||
series = standard_field('series')
|
||||
series_index = standard_field('series_index')
|
||||
rating = standard_field('rating')
|
||||
tags = standard_field('tags')
|
||||
|
||||
def __init__(self):
|
||||
raise NotImplementedError('Abstract base class')
|
||||
|
||||
@dynamic_property
|
||||
def package(self):
|
||||
def fget(self):
|
||||
return self.soup.find(re.compile('package'))
|
||||
return property(fget=fget)
|
||||
|
||||
@dynamic_property
|
||||
def metadata(self):
|
||||
def fget(self):
|
||||
return self.package.find(re.compile('metadata'))
|
||||
return property(fget=fget)
|
||||
|
||||
|
||||
def get_title(self):
|
||||
title = self.metadata.find('dc:title')
|
||||
if title and title.string:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
|
||||
return self.default_title.strip()
|
||||
|
||||
def get_authors(self):
|
||||
creators = self.metadata.findAll('dc:creator')
|
||||
for elem in creators:
|
||||
role = elem.get('role')
|
||||
if not role:
|
||||
role = elem.get('opf:role')
|
||||
if not role:
|
||||
role = 'aut'
|
||||
if role == 'aut' and elem.string:
|
||||
raw = self.ENTITY_PATTERN.sub(entity_to_unicode, elem.string)
|
||||
return string_to_authors(raw)
|
||||
return []
|
||||
|
||||
def get_author_sort(self):
|
||||
creators = self.metadata.findAll('dc:creator')
|
||||
for elem in creators:
|
||||
role = elem.get('role')
|
||||
if not role:
|
||||
role = elem.get('opf:role')
|
||||
if role == 'aut':
|
||||
fa = elem.get('file-as')
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, fa).strip() if fa else None
|
||||
return None
|
||||
|
||||
def get_title_sort(self):
|
||||
title = self.package.find('dc:title')
|
||||
if title:
|
||||
if title.has_key('file-as'):
|
||||
return title['file-as'].strip()
|
||||
return None
|
||||
|
||||
def get_comments(self):
|
||||
comments = self.soup.find('dc:description')
|
||||
if comments and comments.string:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string).strip()
|
||||
return None
|
||||
|
||||
def get_uid(self):
|
||||
package = self.package
|
||||
if package.has_key('unique-identifier'):
|
||||
return package['unique-identifier']
|
||||
|
||||
def get_category(self):
|
||||
category = self.soup.find('dc:type')
|
||||
if category and category.string:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string).strip()
|
||||
return None
|
||||
|
||||
def get_publisher(self):
|
||||
publisher = self.soup.find('dc:publisher')
|
||||
if publisher and publisher.string:
|
||||
return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string).strip()
|
||||
return None
|
||||
|
||||
def get_isbn(self):
|
||||
for item in self.metadata.findAll('dc:identifier'):
|
||||
scheme = item.get('scheme')
|
||||
if not scheme:
|
||||
scheme = item.get('opf:scheme')
|
||||
if scheme is not None and scheme.lower() == 'isbn' and item.string:
|
||||
return str(item.string).strip()
|
||||
return None
|
||||
|
||||
def get_language(self):
|
||||
item = self.metadata.find('dc:language')
|
||||
if not item:
|
||||
return _('Unknown')
|
||||
return ''.join(item.findAll(text=True)).strip()
|
||||
|
||||
def get_application_id(self):
|
||||
for item in self.metadata.findAll('dc:identifier'):
|
||||
scheme = item.get('scheme', None)
|
||||
if scheme is None:
|
||||
scheme = item.get('opf:scheme', None)
|
||||
if scheme in ['libprs500', 'calibre']:
|
||||
return str(item.string).strip()
|
||||
return None
|
||||
|
||||
def get_cover(self):
|
||||
guide = getattr(self, 'guide', [])
|
||||
if not guide:
|
||||
guide = []
|
||||
references = [ref for ref in guide if 'cover' in ref.type.lower()]
|
||||
for candidate in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
|
||||
matches = [r for r in references if r.type.lower() == candidate and r.path]
|
||||
if matches:
|
||||
return matches[0].path
|
||||
|
||||
def possible_cover_prefixes(self):
|
||||
isbn, ans = [], []
|
||||
for item in self.metadata.findAll('dc:identifier'):
|
||||
scheme = item.get('scheme')
|
||||
if not scheme:
|
||||
scheme = item.get('opf:scheme')
|
||||
isbn.append((scheme, item.string))
|
||||
for item in isbn:
|
||||
ans.append(item[1].replace('-', ''))
|
||||
return ans
|
||||
|
||||
def get_series(self):
|
||||
s = self.metadata.find('series')
|
||||
if s is not None:
|
||||
return str(s.string).strip()
|
||||
return None
|
||||
|
||||
def get_series_index(self):
|
||||
s = self.metadata.find('series-index')
|
||||
if s and s.string:
|
||||
try:
|
||||
return float(str(s.string).strip())
|
||||
except:
|
||||
return None
|
||||
return None
|
||||
|
||||
def get_rating(self):
|
||||
s = self.metadata.find('rating')
|
||||
if s and s.string:
|
||||
try:
|
||||
return int(str(s.string).strip())
|
||||
except:
|
||||
return None
|
||||
return None
|
||||
|
||||
def get_tags(self):
|
||||
ans = []
|
||||
subs = self.soup.findAll('dc:subject')
|
||||
for sub in subs:
|
||||
val = sub.string
|
||||
if val:
|
||||
ans.append(val)
|
||||
return [unicode(a).strip() for a in ans]
|
||||
|
||||
|
||||
class OPFReader(OPF):
|
||||
|
||||
def __init__(self, stream, dir=os.getcwdu()):
|
||||
manage = False
|
||||
if not hasattr(stream, 'read'):
|
||||
manage = True
|
||||
dir = os.path.dirname(stream)
|
||||
stream = open(stream, 'rb')
|
||||
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
|
||||
if hasattr(stream, 'seek'):
|
||||
stream.seek(0)
|
||||
self.soup = OPFSoup(stream.read())
|
||||
if manage:
|
||||
stream.close()
|
||||
self.manifest = Manifest()
|
||||
m = self.soup.find(re.compile('manifest'))
|
||||
if m is not None:
|
||||
self.manifest = Manifest.from_opf_manifest_element(m, dir)
|
||||
self.spine = None
|
||||
spine = self.soup.find(re.compile('spine'))
|
||||
if spine is not None:
|
||||
self.spine = Spine.from_opf_spine_element(spine, self.manifest)
|
||||
|
||||
self.toc = TOC(base_path=dir)
|
||||
self.toc.read_from_opf(self)
|
||||
guide = self.soup.find(re.compile('guide'))
|
||||
if guide is not None:
|
||||
self.guide = Guide.from_opf_guide(guide, dir)
|
||||
self.base_dir = dir
|
||||
self.cover_data = (None, None)
|
||||
|
||||
|
||||
class OPFCreator(MetaInformation):
|
||||
|
||||
def __init__(self, base_path, *args, **kwargs):
|
||||
'''
|
||||
Initialize.
|
||||
@param base_path: An absolute path to the directory in which this OPF file
|
||||
will eventually be. This is used by the L{create_manifest} method
|
||||
to convert paths to files into relative paths.
|
||||
'''
|
||||
MetaInformation.__init__(self, *args, **kwargs)
|
||||
self.base_path = os.path.abspath(base_path)
|
||||
if self.application_id is None:
|
||||
self.application_id = str(uuid.uuid4())
|
||||
if not isinstance(self.toc, TOC):
|
||||
self.toc = None
|
||||
if not self.authors:
|
||||
self.authors = [_('Unknown')]
|
||||
if self.guide is None:
|
||||
self.guide = Guide()
|
||||
if self.cover:
|
||||
self.guide.set_cover(self.cover)
|
||||
|
||||
|
||||
def create_manifest(self, entries):
|
||||
'''
|
||||
Create <manifest>
|
||||
|
||||
`entries`: List of (path, mime-type) If mime-type is None it is autodetected
|
||||
'''
|
||||
entries = map(lambda x: x if os.path.isabs(x[0]) else
|
||||
(os.path.abspath(os.path.join(self.base_path, x[0])), x[1]),
|
||||
entries)
|
||||
self.manifest = Manifest.from_paths(entries)
|
||||
self.manifest.set_basedir(self.base_path)
|
||||
|
||||
def create_manifest_from_files_in(self, files_and_dirs):
|
||||
entries = []
|
||||
|
||||
def dodir(dir):
|
||||
for spec in os.walk(dir):
|
||||
root, files = spec[0], spec[-1]
|
||||
for name in files:
|
||||
path = os.path.join(root, name)
|
||||
if os.path.isfile(path):
|
||||
entries.append((path, None))
|
||||
|
||||
for i in files_and_dirs:
|
||||
if os.path.isdir(i):
|
||||
dodir(i)
|
||||
else:
|
||||
entries.append((i, None))
|
||||
|
||||
self.create_manifest(entries)
|
||||
|
||||
def create_spine(self, entries):
|
||||
'''
|
||||
Create the <spine> element. Must first call :method:`create_manifest`.
|
||||
|
||||
`entries`: List of paths
|
||||
'''
|
||||
entries = map(lambda x: x if os.path.isabs(x) else
|
||||
os.path.abspath(os.path.join(self.base_path, x)), entries)
|
||||
self.spine = Spine.from_paths(entries, self.manifest)
|
||||
|
||||
def set_toc(self, toc):
|
||||
'''
|
||||
Set the toc. You must call :method:`create_spine` before calling this
|
||||
method.
|
||||
|
||||
:param toc: A :class:`TOC` object
|
||||
'''
|
||||
self.toc = toc
|
||||
|
||||
def create_guide(self, guide_element):
|
||||
self.guide = Guide.from_opf_guide(guide_element, self.base_path)
|
||||
self.guide.set_basedir(self.base_path)
|
||||
|
||||
def render(self, opf_stream, ncx_stream=None, ncx_manifest_entry=None):
|
||||
from calibre.utils.genshi.template import MarkupTemplate
|
||||
opf_template = open(P('templates/opf.xml'), 'rb').read()
|
||||
template = MarkupTemplate(opf_template)
|
||||
if self.manifest:
|
||||
self.manifest.set_basedir(self.base_path)
|
||||
if ncx_manifest_entry is not None:
|
||||
if not os.path.isabs(ncx_manifest_entry):
|
||||
ncx_manifest_entry = os.path.join(self.base_path, ncx_manifest_entry)
|
||||
remove = [i for i in self.manifest if i.id == 'ncx']
|
||||
for item in remove:
|
||||
self.manifest.remove(item)
|
||||
self.manifest.append(ManifestItem(ncx_manifest_entry, self.base_path))
|
||||
self.manifest[-1].id = 'ncx'
|
||||
self.manifest[-1].mime_type = 'application/x-dtbncx+xml'
|
||||
if not self.guide:
|
||||
self.guide = Guide()
|
||||
if self.cover:
|
||||
cover = self.cover
|
||||
if not os.path.isabs(cover):
|
||||
cover = os.path.abspath(os.path.join(self.base_path, cover))
|
||||
self.guide.set_cover(cover)
|
||||
self.guide.set_basedir(self.base_path)
|
||||
|
||||
opf = template.generate(__appname__=__appname__, mi=self, __version__=__version__).render('xml')
|
||||
if not opf.startswith('<?xml '):
|
||||
opf = '<?xml version="1.0" encoding="UTF-8"?>\n'+opf
|
||||
opf_stream.write(opf)
|
||||
opf_stream.flush()
|
||||
toc = getattr(self, 'toc', None)
|
||||
if toc is not None and ncx_stream is not None:
|
||||
toc.render(ncx_stream, self.application_id)
|
||||
ncx_stream.flush()
|
||||
|
Loading…
x
Reference in New Issue
Block a user