mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Document OEBBook.
This commit is contained in:
parent
31f96140ad
commit
e5984c02c7
@ -312,7 +312,7 @@ class LitWriter(object):
|
|||||||
cover = None
|
cover = None
|
||||||
if oeb.metadata.cover:
|
if oeb.metadata.cover:
|
||||||
id = str(oeb.metadata.cover[0])
|
id = str(oeb.metadata.cover[0])
|
||||||
cover = oeb.manifest[id]
|
cover = oeb.manifest.ids[id]
|
||||||
for type, title in ALL_MS_COVER_TYPES:
|
for type, title in ALL_MS_COVER_TYPES:
|
||||||
if type not in oeb.guide:
|
if type not in oeb.guide:
|
||||||
oeb.guide.add(type, title, cover.href)
|
oeb.guide.add(type, title, cover.href)
|
||||||
|
@ -5,6 +5,7 @@ from __future__ import with_statement
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, sys, re, uuid
|
import os, sys, re, uuid
|
||||||
from mimetypes import types_map
|
from mimetypes import types_map
|
||||||
@ -175,6 +176,7 @@ URL_SAFE = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|||||||
URL_UNSAFE = [ASCII_CHARS - URL_SAFE, UNIBYTE_CHARS - URL_SAFE]
|
URL_UNSAFE = [ASCII_CHARS - URL_SAFE, UNIBYTE_CHARS - URL_SAFE]
|
||||||
|
|
||||||
def urlquote(href):
|
def urlquote(href):
|
||||||
|
"""Quote URL-unsafe characters, allowing IRI-safe characters."""
|
||||||
result = []
|
result = []
|
||||||
unsafe = 0 if isinstance(href, unicode) else 1
|
unsafe = 0 if isinstance(href, unicode) else 1
|
||||||
unsafe = URL_UNSAFE[unsafe]
|
unsafe = URL_UNSAFE[unsafe]
|
||||||
@ -185,6 +187,9 @@ def urlquote(href):
|
|||||||
return ''.join(result)
|
return ''.join(result)
|
||||||
|
|
||||||
def urlnormalize(href):
|
def urlnormalize(href):
|
||||||
|
"""Convert a URL into normalized form, with all and only URL-unsafe
|
||||||
|
characters URL quoted.
|
||||||
|
"""
|
||||||
parts = urlparse(href)
|
parts = urlparse(href)
|
||||||
if not parts.scheme:
|
if not parts.scheme:
|
||||||
path, frag = urldefrag(href)
|
path, frag = urldefrag(href)
|
||||||
@ -196,21 +201,30 @@ def urlnormalize(href):
|
|||||||
|
|
||||||
|
|
||||||
class OEBError(Exception):
|
class OEBError(Exception):
|
||||||
|
"""Generic OEB-processing error."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FauxLogger(object):
|
class FauxLogger(object):
|
||||||
|
"""Fake logging interface."""
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
return self
|
return self
|
||||||
def __call__(self, message):
|
def __call__(self, message):
|
||||||
print message
|
print message
|
||||||
|
|
||||||
class Logger(LoggingInterface, object):
|
class Logger(LoggingInterface, object):
|
||||||
|
"""A logging object which provides both the standard `logging.Logger` and
|
||||||
|
calibre-specific interfaces.
|
||||||
|
"""
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
return object.__getattribute__(self, 'log_' + name)
|
return object.__getattribute__(self, 'log_' + name)
|
||||||
|
|
||||||
|
|
||||||
class NullContainer(object):
|
class NullContainer(object):
|
||||||
|
"""An empty container.
|
||||||
|
|
||||||
|
For use with book formats which do not support container-like access.
|
||||||
|
"""
|
||||||
def read(self, path):
|
def read(self, path):
|
||||||
raise OEBError('Attempt to read from NullContainer')
|
raise OEBError('Attempt to read from NullContainer')
|
||||||
|
|
||||||
@ -224,6 +238,8 @@ class NullContainer(object):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
class DirContainer(object):
|
class DirContainer(object):
|
||||||
|
"""Filesystem directory container."""
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
path = unicode(path)
|
path = unicode(path)
|
||||||
ext = os.path.splitext(path)[1].lower()
|
ext = os.path.splitext(path)[1].lower()
|
||||||
@ -269,20 +285,38 @@ class DirContainer(object):
|
|||||||
|
|
||||||
|
|
||||||
class Metadata(object):
|
class Metadata(object):
|
||||||
DC_TERMS = set([
|
"""A collection of OEB data model metadata.
|
||||||
'contributor', 'coverage', 'creator', 'date',
|
|
||||||
'description', 'format', 'identifier', 'language',
|
Provides access to the list of items associated with a particular metadata
|
||||||
'publisher', 'relation', 'rights', 'source', 'subject',
|
term via the term's local name using either Python container or attribute
|
||||||
'title', 'type'
|
syntax. Return an empty list for any terms with no currently associated
|
||||||
])
|
metadata items.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DC_TERMS = set(['contributor', 'coverage', 'creator', 'date',
|
||||||
|
'description', 'format', 'identifier', 'language',
|
||||||
|
'publisher', 'relation', 'rights', 'source',
|
||||||
|
'subject', 'title', 'type'])
|
||||||
CALIBRE_TERMS = set(['series', 'series_index', 'rating'])
|
CALIBRE_TERMS = set(['series', 'series_index', 'rating'])
|
||||||
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
|
OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'),
|
||||||
'scheme': OPF('scheme'), 'event': OPF('event'),
|
'scheme': OPF('scheme'), 'event': OPF('event'),
|
||||||
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
|
'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
|
||||||
|
|
||||||
class Item(object):
|
class Item(object):
|
||||||
|
"""An item of OEB data model metadata.
|
||||||
|
|
||||||
|
The metadata term or name may be accessed via the :attr:`term` or
|
||||||
|
:attr:`name` attributes. The metadata value or content may be accessed
|
||||||
|
via the :attr:`value` or :attr:`content` attributes, or via Unicode or
|
||||||
|
string representations of the object.
|
||||||
|
|
||||||
|
OEB data model metadata attributes may be accessed either via their
|
||||||
|
fully-qualified names using the Python container access syntax, or via
|
||||||
|
their local names using Python attribute syntax. Only attributes
|
||||||
|
allowed by the OPF 2.0 specification are supported.
|
||||||
|
"""
|
||||||
class Attribute(object):
|
class Attribute(object):
|
||||||
|
"""Smart accessor for allowed OEB metadata item attributes."""
|
||||||
|
|
||||||
def __init__(self, attr, allowed=None):
|
def __init__(self, attr, allowed=None):
|
||||||
if not callable(attr):
|
if not callable(attr):
|
||||||
@ -333,10 +367,24 @@ class Metadata(object):
|
|||||||
nsattr = 'scheme'
|
nsattr = 'scheme'
|
||||||
if attr != nsattr:
|
if attr != nsattr:
|
||||||
attrib[nsattr] = attrib.pop(attr)
|
attrib[nsattr] = attrib.pop(attr)
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def name(self):
|
||||||
|
def fget(self):
|
||||||
|
return self.term
|
||||||
|
return property(fget=fget)
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def content(self):
|
||||||
|
def fget(self):
|
||||||
|
return self.value
|
||||||
|
def fset(self, value):
|
||||||
|
self.value = value
|
||||||
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
scheme = Attribute(lambda term: 'scheme' if \
|
scheme = Attribute(lambda term: 'scheme' if \
|
||||||
term == OPF('meta') else OPF('scheme'),
|
term == OPF('meta') else OPF('scheme'),
|
||||||
[DC('identifier'), OPF('meta')])
|
[DC('identifier'), OPF('meta')])
|
||||||
file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor')])
|
file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor')])
|
||||||
role = Attribute(OPF('role'), [DC('creator'), DC('contributor')])
|
role = Attribute(OPF('role'), [DC('creator'), DC('contributor')])
|
||||||
event = Attribute(OPF('event'), [DC('date')])
|
event = Attribute(OPF('event'), [DC('date')])
|
||||||
@ -405,6 +453,7 @@ class Metadata(object):
|
|||||||
self.items = defaultdict(list)
|
self.items = defaultdict(list)
|
||||||
|
|
||||||
def add(self, term, value, attrib={}, nsmap={}, **kwargs):
|
def add(self, term, value, attrib={}, nsmap={}, **kwargs):
|
||||||
|
"""Add a new metadata item."""
|
||||||
item = self.Item(term, value, attrib, nsmap, **kwargs)
|
item = self.Item(term, value, attrib, nsmap, **kwargs)
|
||||||
items = self.items[barename(item.term)]
|
items = self.items[barename(item.term)]
|
||||||
items.append(item)
|
items.append(item)
|
||||||
@ -477,8 +526,40 @@ class Metadata(object):
|
|||||||
|
|
||||||
|
|
||||||
class Manifest(object):
|
class Manifest(object):
|
||||||
|
"""Collection of files composing an OEB data model book.
|
||||||
|
|
||||||
|
Provides access to the content of the files composing the book and
|
||||||
|
attributes associated with those files, including their internal paths,
|
||||||
|
unique identifiers, and MIME types.
|
||||||
|
|
||||||
|
Itself acts as a :class:`set` of manifest items, and provides the following
|
||||||
|
instance data member for dictionary-like access:
|
||||||
|
|
||||||
|
:attr:`ids`: A dictionary in which the keys are the unique identifiers of
|
||||||
|
the manifest items and the values are the items themselves.
|
||||||
|
:attr:`hrefs`: A dictionary in which the keys are the internal paths of the
|
||||||
|
manifest items and the values are the items themselves.
|
||||||
|
"""
|
||||||
|
|
||||||
class Item(object):
|
class Item(object):
|
||||||
|
"""An OEB data model book content file.
|
||||||
|
|
||||||
|
Provides the following data members for accessing the file content and
|
||||||
|
metadata associated with this particular file.
|
||||||
|
|
||||||
|
:attr:`id`: Unique identifier.
|
||||||
|
:attr:`href`: Book-internal path.
|
||||||
|
:attr:`media_type`: MIME type of the file content.
|
||||||
|
:attr:`fallback`: Unique id of any fallback manifest item associated
|
||||||
|
with this manifest item.
|
||||||
|
:attr:`spine_position`: Display/reading order index for book textual
|
||||||
|
content. `None` for manifest items which are not part of the
|
||||||
|
book's textual content.
|
||||||
|
:attr:`linear`: `True` for textual content items which are part of the
|
||||||
|
primary linear reading order and `False` for textual content items
|
||||||
|
which are not (such as footnotes). Meaningless for items which
|
||||||
|
have a :attr:`spine_position` of `None`.
|
||||||
|
"""
|
||||||
|
|
||||||
NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)')
|
NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)')
|
||||||
META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]')
|
META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]')
|
||||||
@ -584,6 +665,18 @@ class Manifest(object):
|
|||||||
|
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def data(self):
|
def data(self):
|
||||||
|
doc = """Provides MIME type sensitive access to the manifest
|
||||||
|
entry's associated content.
|
||||||
|
|
||||||
|
- XHTML, HTML, and variant content is parsed as necessary to
|
||||||
|
convert and and return as an lxml.etree element in the XHTML
|
||||||
|
namespace.
|
||||||
|
- XML content is parsed and returned as an lxml.etree element.
|
||||||
|
- CSS and CSS-variant content is parsed and returned as a cssutils
|
||||||
|
CSS DOM stylesheet.
|
||||||
|
- All other content is returned as a :class:`str` object with no
|
||||||
|
special parsing.
|
||||||
|
"""
|
||||||
def fget(self):
|
def fget(self):
|
||||||
if self._data is not None:
|
if self._data is not None:
|
||||||
return self._data
|
return self._data
|
||||||
@ -600,7 +693,7 @@ class Manifest(object):
|
|||||||
self._data = value
|
self._data = value
|
||||||
def fdel(self):
|
def fdel(self):
|
||||||
self._data = None
|
self._data = None
|
||||||
return property(fget, fset, fdel)
|
return property(fget, fset, fdel, doc=doc)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
data = self.data
|
data = self.data
|
||||||
@ -631,6 +724,9 @@ class Manifest(object):
|
|||||||
return cmp(skey, okey)
|
return cmp(skey, okey)
|
||||||
|
|
||||||
def relhref(self, href):
|
def relhref(self, href):
|
||||||
|
"""Convert the URL provided in :param:`href` from a book-absolute
|
||||||
|
reference to a reference relative to this manifest item.
|
||||||
|
"""
|
||||||
if urlparse(href).scheme:
|
if urlparse(href).scheme:
|
||||||
return href
|
return href
|
||||||
if '/' not in self.href:
|
if '/' not in self.href:
|
||||||
@ -649,6 +745,9 @@ class Manifest(object):
|
|||||||
return relhref
|
return relhref
|
||||||
|
|
||||||
def abshref(self, href):
|
def abshref(self, href):
|
||||||
|
"""Convert the URL provided in :param:`href` from a reference
|
||||||
|
relative to this manifest item to a book-absolute reference.
|
||||||
|
"""
|
||||||
if urlparse(href).scheme:
|
if urlparse(href).scheme:
|
||||||
return href
|
return href
|
||||||
path, frag = urldefrag(href)
|
path, frag = urldefrag(href)
|
||||||
@ -663,25 +762,46 @@ class Manifest(object):
|
|||||||
|
|
||||||
def __init__(self, oeb):
|
def __init__(self, oeb):
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
|
self.items = set()
|
||||||
self.ids = {}
|
self.ids = {}
|
||||||
self.hrefs = {}
|
self.hrefs = {}
|
||||||
|
|
||||||
def add(self, id, href, media_type, fallback=None, loader=None, data=None):
|
def add(self, id, href, media_type, fallback=None, loader=None, data=None):
|
||||||
|
"""Add a new item to the book manifest.
|
||||||
|
|
||||||
|
The item's :param:`id`, :param:`href`, and :param:`media_type` are all
|
||||||
|
required. A :param:`fallback` item-id is required for any items with a
|
||||||
|
MIME type which is not one of the OPS core media types. Either the
|
||||||
|
item's data itself may be provided with :param:`data`, or a loader
|
||||||
|
function for the data may be provided with :param:`loader`, or the
|
||||||
|
item's data may latter be set manually via the :attr:`data` attribute.
|
||||||
|
"""
|
||||||
item = self.Item(
|
item = self.Item(
|
||||||
self.oeb, id, href, media_type, fallback, loader, data)
|
self.oeb, id, href, media_type, fallback, loader, data)
|
||||||
|
self.items.add(item)
|
||||||
self.ids[item.id] = item
|
self.ids[item.id] = item
|
||||||
self.hrefs[item.href] = item
|
self.hrefs[item.href] = item
|
||||||
return item
|
return item
|
||||||
|
|
||||||
def remove(self, item):
|
def remove(self, item):
|
||||||
|
"""Removes :param:`item` from the manifest."""
|
||||||
if item in self.ids:
|
if item in self.ids:
|
||||||
item = self.ids[item]
|
item = self.ids[item]
|
||||||
del self.ids[item.id]
|
del self.ids[item.id]
|
||||||
del self.hrefs[item.href]
|
del self.hrefs[item.href]
|
||||||
|
self.items.remove(item)
|
||||||
if item in self.oeb.spine:
|
if item in self.oeb.spine:
|
||||||
self.oeb.spine.remove(item)
|
self.oeb.spine.remove(item)
|
||||||
|
|
||||||
def generate(self, id=None, href=None):
|
def generate(self, id=None, href=None):
|
||||||
|
"""Generate a new unique identifier and/or internal path for use in
|
||||||
|
creating a new manifest item, using the provided :param:`id` and/or
|
||||||
|
:param:`href` as bases.
|
||||||
|
|
||||||
|
Returns an two-tuple of the new id and path. If either :param:`id` or
|
||||||
|
:param:`href` are `None` then the corresponding item in the return
|
||||||
|
tuple will also be `None`.
|
||||||
|
"""
|
||||||
if id is not None:
|
if id is not None:
|
||||||
base = id
|
base = id
|
||||||
index = 1
|
index = 1
|
||||||
@ -698,26 +818,16 @@ class Manifest(object):
|
|||||||
return id, href
|
return id, href
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for id in self.ids:
|
for item in self.items:
|
||||||
yield id
|
|
||||||
|
|
||||||
def __getitem__(self, id):
|
|
||||||
return self.ids[id]
|
|
||||||
|
|
||||||
def values(self):
|
|
||||||
for item in self.ids.values():
|
|
||||||
yield item
|
yield item
|
||||||
|
values = __iter__
|
||||||
|
|
||||||
def items(self):
|
def __contains__(self, item):
|
||||||
for id, item in self.ids.items():
|
return item in self.items
|
||||||
yield id, item
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
|
||||||
return key in self.ids
|
|
||||||
|
|
||||||
def to_opf1(self, parent=None):
|
def to_opf1(self, parent=None):
|
||||||
elem = element(parent, 'manifest')
|
elem = element(parent, 'manifest')
|
||||||
for item in self.ids.values():
|
for item in self.items:
|
||||||
media_type = item.media_type
|
media_type = item.media_type
|
||||||
if media_type in OEB_DOCS:
|
if media_type in OEB_DOCS:
|
||||||
media_type = OEB_DOC_MIME
|
media_type = OEB_DOC_MIME
|
||||||
@ -732,7 +842,7 @@ class Manifest(object):
|
|||||||
|
|
||||||
def to_opf2(self, parent=None):
|
def to_opf2(self, parent=None):
|
||||||
elem = element(parent, OPF('manifest'))
|
elem = element(parent, OPF('manifest'))
|
||||||
for item in self.ids.values():
|
for item in self.items:
|
||||||
media_type = item.media_type
|
media_type = item.media_type
|
||||||
if media_type in OEB_DOCS:
|
if media_type in OEB_DOCS:
|
||||||
media_type = XHTML_MIME
|
media_type = XHTML_MIME
|
||||||
@ -747,7 +857,13 @@ class Manifest(object):
|
|||||||
|
|
||||||
|
|
||||||
class Spine(object):
|
class Spine(object):
|
||||||
|
"""Collection of manifest items composing an OEB data model book's main
|
||||||
|
textual content.
|
||||||
|
|
||||||
|
The spine manages which manifest items compose the book's main textual
|
||||||
|
content and the sequence in which they appear. Provides Python container
|
||||||
|
access as a list-like object.
|
||||||
|
"""
|
||||||
def __init__(self, oeb):
|
def __init__(self, oeb):
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
self.items = []
|
self.items = []
|
||||||
@ -762,12 +878,14 @@ class Spine(object):
|
|||||||
return linear
|
return linear
|
||||||
|
|
||||||
def add(self, item, linear=None):
|
def add(self, item, linear=None):
|
||||||
|
"""Append :param:`item` to the end of the `Spine`."""
|
||||||
item.linear = self._linear(linear)
|
item.linear = self._linear(linear)
|
||||||
item.spine_position = len(self.items)
|
item.spine_position = len(self.items)
|
||||||
self.items.append(item)
|
self.items.append(item)
|
||||||
return item
|
return item
|
||||||
|
|
||||||
def insert(self, index, item, linear):
|
def insert(self, index, item, linear):
|
||||||
|
"""Insert :param:`item` at position :param:`index` in the `Spine`."""
|
||||||
item.linear = self._linear(linear)
|
item.linear = self._linear(linear)
|
||||||
item.spine_position = index
|
item.spine_position = index
|
||||||
self.items.insert(index, item)
|
self.items.insert(index, item)
|
||||||
@ -776,6 +894,7 @@ class Spine(object):
|
|||||||
return item
|
return item
|
||||||
|
|
||||||
def remove(self, item):
|
def remove(self, item):
|
||||||
|
"""Remove :param:`item` from the `Spine`."""
|
||||||
index = item.spine_position
|
index = item.spine_position
|
||||||
self.items.pop(index)
|
self.items.pop(index)
|
||||||
for i in xrange(index, len(self.items)):
|
for i in xrange(index, len(self.items)):
|
||||||
@ -813,9 +932,24 @@ class Spine(object):
|
|||||||
|
|
||||||
|
|
||||||
class Guide(object):
|
class Guide(object):
|
||||||
|
"""Collection of references to standard frequently-occurring sections
|
||||||
|
within an OEB data model book.
|
||||||
|
|
||||||
|
Provides dictionary-like access, in which the keys are the OEB reference
|
||||||
|
type identifiers and the values are `Reference` objects.
|
||||||
|
"""
|
||||||
|
|
||||||
class Reference(object):
|
class Reference(object):
|
||||||
|
"""Reference to a standard book section.
|
||||||
|
|
||||||
|
Provides the following instance data members:
|
||||||
|
|
||||||
|
:attr:`type`: Reference type identifier, as chosen from the list
|
||||||
|
allowed in the OPF 2.0 specification.
|
||||||
|
:attr:`title`: Human-readable section title.
|
||||||
|
:attr:`href`: Book-internal URL of the referenced section. May include
|
||||||
|
a fragment identifier.
|
||||||
|
"""
|
||||||
_TYPES_TITLES = [('cover', __('Cover')),
|
_TYPES_TITLES = [('cover', __('Cover')),
|
||||||
('title-page', __('Title Page')),
|
('title-page', __('Title Page')),
|
||||||
('toc', __('Table of Contents')),
|
('toc', __('Table of Contents')),
|
||||||
@ -867,17 +1001,19 @@ class Guide(object):
|
|||||||
|
|
||||||
@dynamic_property
|
@dynamic_property
|
||||||
def item(self):
|
def item(self):
|
||||||
|
doc = """The manifest item associated with this reference."""
|
||||||
def fget(self):
|
def fget(self):
|
||||||
path = urldefrag(self.href)[0]
|
path = urldefrag(self.href)[0]
|
||||||
hrefs = self.oeb.manifest.hrefs
|
hrefs = self.oeb.manifest.hrefs
|
||||||
return hrefs.get(path, None)
|
return hrefs.get(path, None)
|
||||||
return property(fget=fget)
|
return property(fget=fget, doc=doc)
|
||||||
|
|
||||||
def __init__(self, oeb):
|
def __init__(self, oeb):
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
self.refs = {}
|
self.refs = {}
|
||||||
|
|
||||||
def add(self, type, title, href):
|
def add(self, type, title, href):
|
||||||
|
"""Add a new reference to the `Guide`."""
|
||||||
ref = self.Reference(self.oeb, type, title, href)
|
ref = self.Reference(self.oeb, type, title, href)
|
||||||
self.refs[type] = ref
|
self.refs[type] = ref
|
||||||
return ref
|
return ref
|
||||||
@ -925,8 +1061,19 @@ class Guide(object):
|
|||||||
return elem
|
return elem
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: This needs beefing up to support the interface of toc.TOC
|
||||||
class TOC(object):
|
class TOC(object):
|
||||||
# This needs beefing up to support the interface of toc.TOC
|
"""Represents a hierarchical table of contents or navigation tree for
|
||||||
|
accessing arbitrary semantic sections within an OEB data model book.
|
||||||
|
|
||||||
|
Acts as a node within the navigation tree. Provides list-like access to
|
||||||
|
sub-nodes. Provides the follow node instance data attributes:
|
||||||
|
|
||||||
|
:attr:`title`: The title of this navigation node.
|
||||||
|
:attr:`href`: Book-internal URL referenced by this node.
|
||||||
|
:attr:`klass`: Optional semantic class referenced by this node.
|
||||||
|
:attr:`id`: Option unique identifier for this node.
|
||||||
|
"""
|
||||||
def __init__(self, title=None, href=None, klass=None, id=None):
|
def __init__(self, title=None, href=None, klass=None, id=None):
|
||||||
self.title = title
|
self.title = title
|
||||||
self.href = urlnormalize(href) if href else href
|
self.href = urlnormalize(href) if href else href
|
||||||
@ -935,17 +1082,26 @@ class TOC(object):
|
|||||||
self.nodes = []
|
self.nodes = []
|
||||||
|
|
||||||
def add(self, title, href, klass=None, id=None):
|
def add(self, title, href, klass=None, id=None):
|
||||||
|
"""Create and return a new sub-node of this node."""
|
||||||
node = TOC(title, href, klass, id)
|
node = TOC(title, href, klass, id)
|
||||||
self.nodes.append(node)
|
self.nodes.append(node)
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
def iter(self):
|
||||||
|
"""Iterate over this node and all descendants in depth-first order."""
|
||||||
|
yield self
|
||||||
|
for child in self.nodes:
|
||||||
|
for node in child.iter():
|
||||||
|
yield node
|
||||||
|
|
||||||
def iterdescendants(self):
|
def iterdescendants(self):
|
||||||
for node in self.nodes:
|
"""Iterate over all descendant nodes in depth-first order."""
|
||||||
yield node
|
for child in self.nodes:
|
||||||
for child in node.iterdescendants():
|
for node in child.iter():
|
||||||
yield child
|
yield node
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
"""Iterate over all immediate child nodes."""
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
yield node
|
yield node
|
||||||
|
|
||||||
@ -953,6 +1109,9 @@ class TOC(object):
|
|||||||
return self.nodes[index]
|
return self.nodes[index]
|
||||||
|
|
||||||
def autolayer(self):
|
def autolayer(self):
|
||||||
|
"""Make sequences of children pointing to the same content file into
|
||||||
|
children of the first node referencing that file.
|
||||||
|
"""
|
||||||
prev = None
|
prev = None
|
||||||
for node in list(self.nodes):
|
for node in list(self.nodes):
|
||||||
if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]:
|
if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]:
|
||||||
@ -961,10 +1120,12 @@ class TOC(object):
|
|||||||
else:
|
else:
|
||||||
prev = node
|
prev = node
|
||||||
|
|
||||||
def depth(self, level=0):
|
def depth(self):
|
||||||
if self.nodes:
|
"""The maximum depth of the navigation tree rooted at this node."""
|
||||||
return self.nodes[0].depth(level+1)
|
try:
|
||||||
return level
|
return max(node.depth() for node in self.nodes) + 1
|
||||||
|
except ValueError:
|
||||||
|
return 1
|
||||||
|
|
||||||
def to_opf1(self, tour):
|
def to_opf1(self, tour):
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
@ -989,12 +1150,34 @@ class TOC(object):
|
|||||||
|
|
||||||
|
|
||||||
class PageList(object):
|
class PageList(object):
|
||||||
|
"""Collection of named "pages" to mapped positions within an OEB data model
|
||||||
|
book's textual content.
|
||||||
|
|
||||||
|
Provides list-like access to the pages.
|
||||||
|
"""
|
||||||
|
|
||||||
class Page(object):
|
class Page(object):
|
||||||
|
"""Represents a mapping between a page name and a position within
|
||||||
|
the book content.
|
||||||
|
|
||||||
|
Provides the following instance data attributes:
|
||||||
|
|
||||||
|
:attr:`name`: The name of this page. Generally a number.
|
||||||
|
:attr:`href`: Book-internal URL at which point this page begins.
|
||||||
|
:attr:`type`: Must be one of 'front' (for prefatory pages, as commonly
|
||||||
|
labeled in print with small-case Roman numerals), 'normal' (for
|
||||||
|
standard pages, as commonly labeled in print with Arabic numerals),
|
||||||
|
or 'special' (for other pages, as commonly not labeled in any
|
||||||
|
fashion in print, such as the cover and title pages).
|
||||||
|
:attr:`klass`: Optional semantic class of this page.
|
||||||
|
:attr:`id`: Optional unique identifier for this page.
|
||||||
|
"""
|
||||||
|
TYPES = set(['front', 'normal', 'special'])
|
||||||
|
|
||||||
def __init__(self, name, href, type='normal', klass=None, id=None):
|
def __init__(self, name, href, type='normal', klass=None, id=None):
|
||||||
self.name = name
|
self.name = unicode(name)
|
||||||
self.href = urlnormalize(href)
|
self.href = urlnormalize(href)
|
||||||
self.type = type
|
self.type = type if type in self.TYPES else 'normal'
|
||||||
self.id = id
|
self.id = id
|
||||||
self.klass = klass
|
self.klass = klass
|
||||||
|
|
||||||
@ -1002,6 +1185,7 @@ class PageList(object):
|
|||||||
self.pages = []
|
self.pages = []
|
||||||
|
|
||||||
def add(self, name, href, type='normal', klass=None, id=None):
|
def add(self, name, href, type='normal', klass=None, id=None):
|
||||||
|
"""Create a new page and add it to the `PageList`."""
|
||||||
page = self.Page(name, href, type, klass, id)
|
page = self.Page(name, href, type, klass, id)
|
||||||
self.pages.append(page)
|
self.pages.append(page)
|
||||||
return page
|
return page
|
||||||
@ -1015,6 +1199,12 @@ class PageList(object):
|
|||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
return self.pages[index]
|
return self.pages[index]
|
||||||
|
|
||||||
|
def pop(self, index=-1):
|
||||||
|
return self.pages.pop(index)
|
||||||
|
|
||||||
|
def remove(self, page):
|
||||||
|
return self.pages.remove(page)
|
||||||
|
|
||||||
def to_ncx(self, parent=None):
|
def to_ncx(self, parent=None):
|
||||||
plist = element(parent, NCX('pageList'), id=str(uuid.uuid4()))
|
plist = element(parent, NCX('pageList'), id=str(uuid.uuid4()))
|
||||||
@ -1040,8 +1230,33 @@ class PageList(object):
|
|||||||
|
|
||||||
|
|
||||||
class OEBBook(object):
|
class OEBBook(object):
|
||||||
|
"""Representation of a book in the IDPF OEB data model."""
|
||||||
|
|
||||||
def __init__(self, encoding=None, pretty_print=False, logger=FauxLogger()):
|
def __init__(self, encoding=None, pretty_print=False, logger=FauxLogger()):
|
||||||
|
"""Create empty book. Optional arguments:
|
||||||
|
|
||||||
|
:param:`encoding`: Default encoding for textual content read
|
||||||
|
from an external container.
|
||||||
|
:param:`pretty_print`: Whether or not the canonical string form
|
||||||
|
of XML markup is pretty-printed.
|
||||||
|
:prama:`logger`: A Logger object to use for logging all messages
|
||||||
|
related to the processing of this book. It is accessible
|
||||||
|
via the instance data member :attr:`logger`.
|
||||||
|
|
||||||
|
It provides the following public instance data members for
|
||||||
|
accessing various parts of the OEB data model:
|
||||||
|
|
||||||
|
:attr:`metadata`: Metadata such as title, author name(s), etc.
|
||||||
|
:attr:`manifest`: Manifest of all files included in the book,
|
||||||
|
including MIME types and fallback information.
|
||||||
|
:attr:`spine`: In-order list of manifest items which compose
|
||||||
|
the textual content of the book.
|
||||||
|
:attr:`guide`: Collection of references to standard positions
|
||||||
|
within the text, such as the cover, preface, etc.
|
||||||
|
:attr:`toc`: Hierarchical table of contents.
|
||||||
|
:attr:`pages`: List of "pages," such as indexed to a print edition of
|
||||||
|
the same text.
|
||||||
|
"""
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
self.pretty_print = pretty_print
|
self.pretty_print = pretty_print
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
@ -1057,16 +1272,19 @@ class OEBBook(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def generate(cls, opts):
|
def generate(cls, opts):
|
||||||
|
"""Generate an OEBBook instance from command-line options."""
|
||||||
encoding = opts.encoding
|
encoding = opts.encoding
|
||||||
pretty_print = opts.pretty_print
|
pretty_print = opts.pretty_print
|
||||||
return cls(encoding=encoding, pretty_print=pretty_print)
|
return cls(encoding=encoding, pretty_print=pretty_print)
|
||||||
|
|
||||||
def translate(self, text):
|
def translate(self, text):
|
||||||
|
"""Translate :param:`text` into the book's primary language."""
|
||||||
lang = str(self.metadata.language[0])
|
lang = str(self.metadata.language[0])
|
||||||
lang = lang.split('-', 1)[0].lower()
|
lang = lang.split('-', 1)[0].lower()
|
||||||
return translate(lang, text)
|
return translate(lang, text)
|
||||||
|
|
||||||
def decode(self, data):
|
def decode(self, data):
|
||||||
|
"""Automatically decode :param:`data` into a `unicode` object."""
|
||||||
if isinstance(data, unicode):
|
if isinstance(data, unicode):
|
||||||
return data
|
return data
|
||||||
if data[:2] in ('\xff\xfe', '\xfe\xff'):
|
if data[:2] in ('\xff\xfe', '\xfe\xff'):
|
||||||
@ -1089,6 +1307,11 @@ class OEBBook(object):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def to_opf1(self):
|
def to_opf1(self):
|
||||||
|
"""Produce OPF 1.2 representing the book's metadata and structure.
|
||||||
|
|
||||||
|
Returns a dictionary in which the keys are MIME types and the values
|
||||||
|
are tuples of (default) filenames and lxml.etree element structures.
|
||||||
|
"""
|
||||||
package = etree.Element('package',
|
package = etree.Element('package',
|
||||||
attrib={'unique-identifier': self.uid.id})
|
attrib={'unique-identifier': self.uid.id})
|
||||||
self.metadata.to_opf1(package)
|
self.metadata.to_opf1(package)
|
||||||
@ -1160,6 +1383,11 @@ class OEBBook(object):
|
|||||||
return ncx
|
return ncx
|
||||||
|
|
||||||
def to_opf2(self, page_map=False):
|
def to_opf2(self, page_map=False):
|
||||||
|
"""Produce OPF 2.0 representing the book's metadata and structure.
|
||||||
|
|
||||||
|
Returns a dictionary in which the keys are MIME types and the values
|
||||||
|
are tuples of (default) filenames and lxml.etree element structures.
|
||||||
|
"""
|
||||||
results = {}
|
results = {}
|
||||||
package = etree.Element(OPF('package'),
|
package = etree.Element(OPF('package'),
|
||||||
attrib={'version': '2.0', 'unique-identifier': self.uid.id},
|
attrib={'version': '2.0', 'unique-identifier': self.uid.id},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user