mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
API documentation for the Container class
This commit is contained in:
parent
c4a195e761
commit
22b08349ba
@ -57,9 +57,8 @@ All the |app| python code is in the ``calibre`` package. This package contains t
|
|||||||
various transformations that are applied to the book during
|
various transformations that are applied to the book during
|
||||||
conversions live in :file:`oeb/transforms/*.py`. And the input and output
|
conversions live in :file:`oeb/transforms/*.py`. And the input and output
|
||||||
plugins live in :file:`conversion/plugins/*.py`.
|
plugins live in :file:`conversion/plugins/*.py`.
|
||||||
* Ebook editing happens using a different container object. All the
|
* Ebook editing happens using a different container object. It is
|
||||||
code for editing is in ``ebooks.oeb.polish`` in particular the
|
documented in :ref:`polish_api`.
|
||||||
container object is in ``ebooks.oeb.polish.container``.
|
|
||||||
|
|
||||||
* db - The database back-end. See :ref:`db_api` for the interface to the |app| library.
|
* db - The database back-end. See :ref:`db_api` for the interface to the |app| library.
|
||||||
|
|
||||||
@ -402,4 +401,5 @@ it sets up the interpreter to run |app| code.
|
|||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
db_api
|
db_api
|
||||||
|
polish
|
||||||
|
|
||||||
|
@ -84,7 +84,13 @@ class Container(object): # {{{
|
|||||||
* Names: These are paths to the books' files relative to the root
|
* Names: These are paths to the books' files relative to the root
|
||||||
directory. They always contain POSIX separators and are unquoted. They
|
directory. They always contain POSIX separators and are unquoted. They
|
||||||
can be thought of as canonical identifiers for files in the book.
|
can be thought of as canonical identifiers for files in the book.
|
||||||
Most methods on the container object work with names.
|
Most methods on the container object work with names. Names are always
|
||||||
|
in the NFC unicode normal form.
|
||||||
|
|
||||||
|
* Clones: the container object supports efficient on-disk cloning, which is used to
|
||||||
|
implement checkpoints in the ebook editor. In order to make this work, you should
|
||||||
|
never access files on the filesystem directly. Instead, use :meth:`raw_data` or
|
||||||
|
:meth:`open` to read/write to component files in the book.
|
||||||
|
|
||||||
When converting between hrefs and names use the methods provided by this
|
When converting between hrefs and names use the methods provided by this
|
||||||
class, they assume all hrefs are quoted.
|
class, they assume all hrefs are quoted.
|
||||||
@ -172,6 +178,7 @@ class Container(object): # {{{
|
|||||||
}
|
}
|
||||||
|
|
||||||
def guess_type(self, name):
|
def guess_type(self, name):
|
||||||
|
' Return the expected mimetype for the specified file name based on its extension. '
|
||||||
# epubcheck complains if the mimetype for text documents is set to
|
# epubcheck complains if the mimetype for text documents is set to
|
||||||
# text/html in EPUB 2 books. Sigh.
|
# text/html in EPUB 2 books. Sigh.
|
||||||
ans = guess_type(name)
|
ans = guess_type(name)
|
||||||
@ -180,6 +187,7 @@ class Container(object): # {{{
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def add_name_to_manifest(self, name):
|
def add_name_to_manifest(self, name):
|
||||||
|
' Add an entry to the manifest for a file with the specified name. Returns the manifest id. '
|
||||||
all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
|
all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
|
||||||
c = 0
|
c = 0
|
||||||
item_id = 'id'
|
item_id = 'id'
|
||||||
@ -325,12 +333,23 @@ class Container(object): # {{{
|
|||||||
yield (elem.get('src'), elem.sourceline, 0) if get_line_numbers else elem.get('src')
|
yield (elem.get('src'), elem.sourceline, 0) if get_line_numbers else elem.get('src')
|
||||||
|
|
||||||
def abspath_to_name(self, fullpath, root=None):
|
def abspath_to_name(self, fullpath, root=None):
|
||||||
|
'''
|
||||||
|
Convert an absolute path to a canonical name relative to :attr:`root`
|
||||||
|
|
||||||
|
:param root: The base directory. By default the root for this container object is used.
|
||||||
|
'''
|
||||||
return self.relpath(os.path.abspath(fullpath), base=root).replace(os.sep, '/')
|
return self.relpath(os.path.abspath(fullpath), base=root).replace(os.sep, '/')
|
||||||
|
|
||||||
def name_to_abspath(self, name):
|
def name_to_abspath(self, name):
|
||||||
|
' Convert a canonical name to an absolute OS dependant path '
|
||||||
return os.path.abspath(join(self.root, *name.split('/')))
|
return os.path.abspath(join(self.root, *name.split('/')))
|
||||||
|
|
||||||
def exists(self, name):
|
def exists(self, name):
|
||||||
|
''' True iff a file corresponding to the canonical name exists. Note
|
||||||
|
that this function suffers from the limitations of the underlying OS
|
||||||
|
filesystem, in particular case (in)sensitivity. So on a case
|
||||||
|
insensitive filesystem this will return True even if the case of name
|
||||||
|
is different from the case of the underlying filesystem file. See also :meth:`has_name`'''
|
||||||
return os.path.exists(self.name_to_abspath(name))
|
return os.path.exists(self.name_to_abspath(name))
|
||||||
|
|
||||||
def href_to_name(self, href, base=None):
|
def href_to_name(self, href, base=None):
|
||||||
@ -358,19 +377,25 @@ class Container(object): # {{{
|
|||||||
return urlquote(path)
|
return urlquote(path)
|
||||||
|
|
||||||
def opf_xpath(self, expr):
|
def opf_xpath(self, expr):
|
||||||
|
' Convenience method to evaluate an XPath expression on the OPF file, has the opf: and dc: namespace prefixes pre-defined. '
|
||||||
return self.opf.xpath(expr, namespaces=OPF_NAMESPACES)
|
return self.opf.xpath(expr, namespaces=OPF_NAMESPACES)
|
||||||
|
|
||||||
def has_name(self, name):
|
def has_name(self, name):
|
||||||
|
''' Return True iff a file with the same canonical name as that specified exists. Unlike :meth:`exists` this method is always case-sensitive. '''
|
||||||
return name and name in self.name_path_map
|
return name and name in self.name_path_map
|
||||||
|
|
||||||
def relpath(self, path, base=None):
|
def relpath(self, path, base=None):
|
||||||
'''Convert an absolute path (with os separators) to a path relative to
|
'''Convert an absolute path (with os separators) to a path relative to
|
||||||
base (defaults to self.root). The relative path is *not* a name. Use
|
base (defaults to self.root). The relative path is *not* a name. Use
|
||||||
abspath_to_name() for that.'''
|
:meth:`abspath_to_name` for that.'''
|
||||||
return relpath(path, base or self.root)
|
return relpath(path, base or self.root)
|
||||||
|
|
||||||
def decode(self, data, normalize_to_nfc=True):
|
def decode(self, data, normalize_to_nfc=True):
|
||||||
"""Automatically decode :param:`data` into a `unicode` object."""
|
"""
|
||||||
|
Automatically decode ``data`` into a ``unicode`` object.
|
||||||
|
|
||||||
|
:param normalize_to_nfc: Normalize returned unicode to the NFC normal form as is required by both the EPUB and AZW3 formats.
|
||||||
|
"""
|
||||||
def fix_data(d):
|
def fix_data(d):
|
||||||
return d.replace('\r\n', '\n').replace('\r', '\n')
|
return d.replace('\r\n', '\n').replace('\r', '\n')
|
||||||
if isinstance(data, unicode):
|
if isinstance(data, unicode):
|
||||||
@ -407,14 +432,17 @@ class Container(object): # {{{
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def names_that_need_not_be_manifested(self):
|
def names_that_need_not_be_manifested(self):
|
||||||
|
' Set of names that are allowed to be missing from the manifest. Depends on the ebook file format. '
|
||||||
return {self.opf_name}
|
return {self.opf_name}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def names_that_must_not_be_removed(self):
|
def names_that_must_not_be_removed(self):
|
||||||
|
' Set of names that must never be deleted from the container. Depends on the ebook file format. '
|
||||||
return {self.opf_name}
|
return {self.opf_name}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def names_that_must_not_be_changed(self):
|
def names_that_must_not_be_changed(self):
|
||||||
|
' Set of names that must never be renamed. Depends on the ebook file format. '
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
def parse_xml(self, data):
|
def parse_xml(self, data):
|
||||||
@ -447,6 +475,12 @@ class Container(object): # {{{
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def raw_data(self, name, decode=True, normalize_to_nfc=True):
|
def raw_data(self, name, decode=True, normalize_to_nfc=True):
|
||||||
|
'''
|
||||||
|
Return the raw data corresponding to the file specified by name
|
||||||
|
|
||||||
|
:param decode: If True and the file has a text based mimetype, decode it and return a unicode object instead of raw bytes.
|
||||||
|
:param normalize_to_nfc: If True the returned unicode object is normalized to the NFC normal form as is required for the EPUB and AZW3 file formats.
|
||||||
|
'''
|
||||||
ans = self.open(name).read()
|
ans = self.open(name).read()
|
||||||
mime = self.mime_map.get(name, guess_type(name))
|
mime = self.mime_map.get(name, guess_type(name))
|
||||||
if decode and (mime in OEB_STYLES or mime in OEB_DOCS or mime == 'text/plain' or mime[-4:] in {'+xml', '/xml'}):
|
if decode and (mime in OEB_STYLES or mime in OEB_DOCS or mime == 'text/plain' or mime[-4:] in {'+xml', '/xml'}):
|
||||||
@ -471,6 +505,11 @@ class Container(object): # {{{
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def parsed(self, name):
|
def parsed(self, name):
|
||||||
|
''' Return a parsed representation of the file specified by name. For
|
||||||
|
HTML and XML files an lxml tree is returned. For CSS files a cssutils
|
||||||
|
stylesheet is returned. Note that parsed objects are cached for
|
||||||
|
performance. If you make any changes to the parsed object, you must
|
||||||
|
call :meth:`dirty` so that the container knows to update the cache. See also :meth:`replace`.'''
|
||||||
ans = self.parsed_cache.get(name, None)
|
ans = self.parsed_cache.get(name, None)
|
||||||
if ans is None:
|
if ans is None:
|
||||||
self.used_encoding = None
|
self.used_encoding = None
|
||||||
@ -481,15 +520,24 @@ class Container(object): # {{{
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def replace(self, name, obj):
|
def replace(self, name, obj):
|
||||||
|
'''
|
||||||
|
Replace the parsed object corresponding to name with obj, which must be
|
||||||
|
a similar object, i.e. an lxml tree for HTML/XML or a cssutils
|
||||||
|
stylesheet for a CSS file.
|
||||||
|
'''
|
||||||
self.parsed_cache[name] = obj
|
self.parsed_cache[name] = obj
|
||||||
self.dirty(name)
|
self.dirty(name)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def opf(self):
|
def opf(self):
|
||||||
|
' The parsed OPF file '
|
||||||
return self.parsed(self.opf_name)
|
return self.parsed(self.opf_name)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def mi(self):
|
def mi(self):
|
||||||
|
''' The metadata of this book as a Metadata object. Note that this
|
||||||
|
object is constructed on the fly every time this property is requested,
|
||||||
|
so use it sparingly. '''
|
||||||
from calibre.ebooks.metadata.opf2 import OPF as O
|
from calibre.ebooks.metadata.opf2 import OPF as O
|
||||||
mi = self.serialize_item(self.opf_name)
|
mi = self.serialize_item(self.opf_name)
|
||||||
return O(BytesIO(mi), basedir=self.opf_dir, unquote_urls=False,
|
return O(BytesIO(mi), basedir=self.opf_dir, unquote_urls=False,
|
||||||
@ -497,11 +545,13 @@ class Container(object): # {{{
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def manifest_id_map(self):
|
def manifest_id_map(self):
|
||||||
|
' Mapping of manifest id to canonical names '
|
||||||
return {item.get('id'):self.href_to_name(item.get('href'), self.opf_name)
|
return {item.get('id'):self.href_to_name(item.get('href'), self.opf_name)
|
||||||
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @id]')}
|
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @id]')}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def manifest_type_map(self):
|
def manifest_type_map(self):
|
||||||
|
' Mapping of manifest media-type to list of canonical names of that media-type '
|
||||||
ans = defaultdict(list)
|
ans = defaultdict(list)
|
||||||
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'):
|
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'):
|
||||||
ans[item.get('media-type').lower()].append(self.href_to_name(
|
ans[item.get('media-type').lower()].append(self.href_to_name(
|
||||||
@ -510,11 +560,15 @@ class Container(object): # {{{
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def guide_type_map(self):
|
def guide_type_map(self):
|
||||||
|
' Mapping of guide type to canonical name '
|
||||||
return {item.get('type', ''):self.href_to_name(item.get('href'), self.opf_name)
|
return {item.get('type', ''):self.href_to_name(item.get('href'), self.opf_name)
|
||||||
for item in self.opf_xpath('//opf:guide/opf:reference[@href and @type]')}
|
for item in self.opf_xpath('//opf:guide/opf:reference[@href and @type]')}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def spine_iter(self):
|
def spine_iter(self):
|
||||||
|
''' An iterator that yields item, name is_linear for every item in the
|
||||||
|
books' spine. item is the lxml element, name is the canonical file name
|
||||||
|
and is_linear is True if the item is linear. See also: :attr:`spine_names` and :attr:`spine_items`. '''
|
||||||
manifest_id_map = self.manifest_id_map
|
manifest_id_map = self.manifest_id_map
|
||||||
non_linear = []
|
non_linear = []
|
||||||
for item in self.opf_xpath('//opf:spine/opf:itemref[@idref]'):
|
for item in self.opf_xpath('//opf:spine/opf:itemref[@idref]'):
|
||||||
@ -531,15 +585,23 @@ class Container(object): # {{{
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def spine_names(self):
|
def spine_names(self):
|
||||||
|
''' An iterator yielding name and is_linear for every item in the
|
||||||
|
books' spine. See also: :attr:`spine_iter` and :attr:`spine_items`. '''
|
||||||
for item, name, linear in self.spine_iter:
|
for item, name, linear in self.spine_iter:
|
||||||
yield name, linear
|
yield name, linear
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def spine_items(self):
|
def spine_items(self):
|
||||||
|
''' An iterator yielding canonical name for every item in the
|
||||||
|
books' spine. See also: :attr:`spine_iter` and :attr:`spine_items`. '''
|
||||||
for name, linear in self.spine_names:
|
for name, linear in self.spine_names:
|
||||||
yield self.name_path_map[name]
|
yield self.name_path_map[name]
|
||||||
|
|
||||||
def remove_from_spine(self, spine_items, remove_if_no_longer_in_spine=True):
|
def remove_from_spine(self, spine_items, remove_if_no_longer_in_spine=True):
|
||||||
|
'''
|
||||||
|
Remove the specified items (by canonical name) from the spine. If ``remove_if_no_longer_in_spine``
|
||||||
|
is True, the items are also deleted from the book, not just from the spine.
|
||||||
|
'''
|
||||||
nixed = set()
|
nixed = set()
|
||||||
for (name, remove), (item, xname, linear) in zip(spine_items, self.spine_iter):
|
for (name, remove), (item, xname, linear) in zip(spine_items, self.spine_iter):
|
||||||
if remove and name == xname:
|
if remove and name == xname:
|
||||||
@ -619,6 +681,7 @@ class Container(object): # {{{
|
|||||||
self.dirtied.discard(name)
|
self.dirtied.discard(name)
|
||||||
|
|
||||||
def dirty(self, name):
|
def dirty(self, name):
|
||||||
|
''' Mark the parsed object corresponding to name as dirty. See also: :meth:`parsed`. '''
|
||||||
self.dirtied.add(name)
|
self.dirtied.add(name)
|
||||||
|
|
||||||
def remove_from_xml(self, item):
|
def remove_from_xml(self, item):
|
||||||
@ -662,6 +725,9 @@ class Container(object): # {{{
|
|||||||
parent[idx-1].tail = parent.text
|
parent[idx-1].tail = parent.text
|
||||||
|
|
||||||
def opf_get_or_create(self, name):
|
def opf_get_or_create(self, name):
|
||||||
|
''' Convenience method to either return the first XML element with the
|
||||||
|
specified name or create it under the opf:package element and then
|
||||||
|
return it, if it does not already exist. '''
|
||||||
ans = self.opf_xpath('//opf:'+name)
|
ans = self.opf_xpath('//opf:'+name)
|
||||||
if ans:
|
if ans:
|
||||||
return ans[0]
|
return ans[0]
|
||||||
@ -741,6 +807,7 @@ class Container(object): # {{{
|
|||||||
meta.set('content', meta.attrib.pop('content'))
|
meta.set('content', meta.attrib.pop('content'))
|
||||||
|
|
||||||
def serialize_item(self, name):
|
def serialize_item(self, name):
|
||||||
|
''' Convert a parsed object (identified by canonical name) into a bytestring. See :meth:`parsed`. '''
|
||||||
data = self.parsed(name)
|
data = self.parsed(name)
|
||||||
if name == self.opf_name:
|
if name == self.opf_name:
|
||||||
self.format_opf()
|
self.format_opf()
|
||||||
@ -753,6 +820,9 @@ class Container(object): # {{{
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def commit_item(self, name, keep_parsed=False):
|
def commit_item(self, name, keep_parsed=False):
|
||||||
|
''' Commit a parsed object to disk (it is serialized and written to the
|
||||||
|
underlying file). If ``keep_parsed`` is True the parsed representation
|
||||||
|
is retained in the cache. See also: :meth:`parsed` '''
|
||||||
if name not in self.parsed_cache:
|
if name not in self.parsed_cache:
|
||||||
return
|
return
|
||||||
data = self.serialize_item(name)
|
data = self.serialize_item(name)
|
||||||
@ -767,6 +837,9 @@ class Container(object): # {{{
|
|||||||
f.write(data)
|
f.write(data)
|
||||||
|
|
||||||
def filesize(self, name):
|
def filesize(self, name):
|
||||||
|
''' Return the size in bytes of the file represented by the specified
|
||||||
|
canonical name. Automatically handles dirtied parsed objects. See also:
|
||||||
|
:meth:`parsed` '''
|
||||||
if name in self.dirtied:
|
if name in self.dirtied:
|
||||||
self.commit_item(name, keep_parsed=True)
|
self.commit_item(name, keep_parsed=True)
|
||||||
path = self.name_to_abspath(name)
|
path = self.name_to_abspath(name)
|
||||||
@ -794,6 +867,11 @@ class Container(object): # {{{
|
|||||||
return open(path, mode)
|
return open(path, mode)
|
||||||
|
|
||||||
def commit(self, outpath=None, keep_parsed=False):
|
def commit(self, outpath=None, keep_parsed=False):
|
||||||
|
'''
|
||||||
|
Commit all dirtied parsed objects to the filesystem and write out the ebook file at outpath.
|
||||||
|
:param output: The path to write the saved ebook file to. If None, the path of the original book file is used.
|
||||||
|
:param keep_parsed: If True the parsed representations of committed items are kept in the cache.
|
||||||
|
'''
|
||||||
for name in tuple(self.dirtied):
|
for name in tuple(self.dirtied):
|
||||||
self.commit_item(name, keep_parsed=keep_parsed)
|
self.commit_item(name, keep_parsed=keep_parsed)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user