Finish API documentation for the container and associated tools

This commit is contained in:
Kovid Goyal 2014-07-05 11:50:12 +05:30
parent 23d31b1815
commit 45780c4719
9 changed files with 175 additions and 6 deletions

View File

@ -35,7 +35,7 @@ The Container object
.. autoclass:: Container
:members:
Tools for dealing with component files in a container
Managing component files in a container
--------------------------------------------------------
.. module:: calibre.ebooks.oeb.polish.replace
@ -46,3 +46,77 @@ Tools for dealing with component files in a container
.. autofunction:: get_recommended_folders
Pretty printing and auto fixing parse errors
--------------------------------------------------------
.. module:: calibre.ebooks.oeb.polish.pretty
.. autofunction:: fix_html
.. autofunction:: fix_all_html
.. autofunction:: pretty_html
.. autofunction:: pretty_css
.. autofunction:: pretty_xml
.. autofunction:: pretty_all
Managing book jackets
-----------------------
.. module:: calibre.ebooks.oeb.polish.jacket
.. autofunction:: remove_jacket
.. autofunction:: add_or_replace_jacket
Splitting and merging of files
---------------------------------
.. module:: calibre.ebooks.oeb.polish.split
.. autofunction:: split
.. autofunction:: multisplit
.. autofunction:: merge
Managing covers
-------------------
.. module:: calibre.ebooks.oeb.polish.cover
.. autofunction:: set_cover
.. autofunction:: mark_as_cover
.. autofunction:: mark_as_titlepage
Working with CSS
-------------------
.. autofunction:: calibre.ebooks.oeb.polish.fonts.change_font
.. module:: calibre.ebooks.oeb.polish.css
.. autofunction:: remove_unused_css
.. autofunction:: filter_css
Working with the Table of Contents
-----------------------------------
.. module:: calibre.ebooks.oeb.polish.toc
.. autofunction:: from_xpaths
.. autofunction:: from_links
.. autofunction:: from_files
.. autofunction:: create_inline_toc

View File

@ -96,7 +96,9 @@ class Container(object): # {{{
class, they assume all hrefs are quoted.
'''
#: The type of book (epub for EPUB files and azw3 for AZW3 files)
book_type = 'oeb'
SUPPORTS_TITLEPAGES = True
SUPPORTS_FILENAMES = True

View File

@ -37,7 +37,7 @@ def set_azw3_cover(container, cover_path, report, options=None):
with open(cover_path, 'rb') as src, container.open(name, 'wb') as dest:
shutil.copyfileobj(src, dest)
container.dirty(container.opf_name)
report('Cover updated' if found else 'Cover inserted')
report(_('Cover updated') if found else _('Cover inserted'))
def get_azw3_raster_cover_name(container):
items = container.opf_xpath('//opf:guide/opf:reference[@href and contains(@type, "cover")]')
@ -66,13 +66,30 @@ def get_cover_page_name(container):
return
return find_cover_page(container)
def set_cover(container, cover_path, report, options=None):
def set_cover(container, cover_path, report=None, options=None):
'''
Set the cover of the book to the image pointed to by cover_path.
:param cover_path: Either the absolute path to an image file or the
canonical name of an image in the book. When using an image int he book,
you must also set options, see below.
:param report: An optional callable that takes a single argument. It will
be called with information about the tasks being processed.
:param options: None or a dictionary that controls how the cover is set. The dictionary can have entries:
**keep_aspect**: True or False (Preserve aspect ratio of covers in EPUB)
**no_svg**: True or False (Use an SVG cover wrapper in the EPUB titlepage)
**existing**: True or False (``cover_path`` refers to an existing image in the book)
'''
report = report or (lambda x:x)
if container.book_type == 'azw3':
set_azw3_cover(container, cover_path, report, options=options)
else:
set_epub_cover(container, cover_path, report, options=options)
def mark_as_cover(container, name):
'''
Mark the specified image as the cover image.
'''
if name not in container.mime_map:
raise ValueError('Cannot mark %s as cover as it does not exist' % name)
mt = container.mime_map[name]
@ -169,6 +186,11 @@ def mark_as_cover_epub(container, name):
container.dirty(container.opf_name)
def mark_as_titlepage(container, name, move_to_start=True):
'''
Mark the specified HTML file as the titlepage of the EPUB.
:param move_to_start: If True the HTML file is moved to the start of the spine
'''
if move_to_start:
for item, q, linear in container.spine_iter:
if name == q:
@ -381,7 +403,7 @@ def set_epub_cover(container, cover_path, report, options=None):
# Insert the new cover
raster_cover, titlepage = create_epub_cover(container, cover_path, existing_image, options=options)
report('Cover updated' if updated else 'Cover inserted')
report(_('Cover updated') if updated else _('Cover inserted'))
# Replace links to the old cover image/cover page
link_sub = {s:d for s, d in {

View File

@ -134,7 +134,15 @@ def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None)
ans.discard(name)
return ans
def remove_unused_css(container, report, remove_unused_classes=False):
def remove_unused_css(container, report=None, remove_unused_classes=False):
'''
Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.
:param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
:param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
'''
report = report or (lambda x:x)
def safe_parse(name):
try:
return container.parsed(name)
@ -263,6 +271,12 @@ def filter_sheet(sheet, properties):
def filter_css(container, properties, names=()):
'''
Remove the specified CSS properties from all CSS rules in the book.
:param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`.
:param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book.
'''
if not names:
types = OEB_STYLES | OEB_DOCS
names = []

View File

@ -127,6 +127,12 @@ def change_font_in_sheet(container, sheet, old_name, new_name, sheet_name):
return changed
def change_font(container, old_name, new_name=None):
'''
Change a font family from old_name to new_name. Changes all occurrences of
the font family in stylesheets, style tags and style attributes.
If the old_name refers to an embedded font, it is removed. You can set
new_name to None to remove the font family instead of changing it.
'''
changed = False
for name, mt in tuple(container.mime_map.iteritems()):
if mt in OEB_STYLES:

View File

@ -59,6 +59,7 @@ def replace_jacket(container, name):
container.dirty(name)
def remove_jacket(container):
' Remove an existing jacket, if ant. Returns False if no existing jacket was found. '
name = find_existing_jacket(container)
if name is not None:
remove_jacket_images(container, name)
@ -74,6 +75,8 @@ def remove_jacket_images(container, name):
container.remove_item(iname)
def add_or_replace_jacket(container):
''' Either create a new jacket from the book's metadata or replace an
existing jacket. Returns True if an existing jacket was replaced. '''
name = find_existing_jacket(container)
found = True
if name is None:

View File

@ -185,19 +185,23 @@ def pretty_html_tree(container, root):
pretty_script_or_style(container, child)
def fix_html(container, raw):
' Fix any parsing errors in the HTML represented as a string in raw. Fixing is done using the HTML 5 parsing algorithm. '
root = container.parse_xhtml(raw)
return serialize(root, 'text/html')
def pretty_html(container, name, raw):
' Pretty print the HTML represented as a string in raw '
root = container.parse_xhtml(raw)
pretty_html_tree(container, root)
return serialize(root, 'text/html')
def pretty_css(container, name, raw):
' Pretty print the CSS represented as a string in raw '
sheet = container.parse_css(raw)
return serialize(sheet, 'text/css')
def pretty_xml(container, name, raw):
' Pretty print the XML represented as a string in raw. If ``name`` is the name of the OPF, extra OPF-specific prettying is performed. '
root = container.parse_xml(raw)
if name == container.opf_name:
pretty_opf(root)
@ -205,12 +209,14 @@ def pretty_xml(container, name, raw):
return serialize(root, 'text/xml')
def fix_all_html(container):
' Fix any parsing errors in all HTML files in the container. Fixing is done using the HTML 5 parsing algorithm. '
for name, mt in container.mime_map.iteritems():
if mt in OEB_DOCS:
container.parsed(name)
container.dirty(name)
def pretty_all(container):
' Pretty print all HTML/CSS/XML files in the container '
for name, mt in container.mime_map.iteritems():
prettied = False
if mt in OEB_DOCS:

View File

@ -164,7 +164,17 @@ class SplitLinkReplacer(object):
return url
def split(container, name, loc_or_xpath, before=True, totals=None):
''' Split the file specified by name at the position specified by loc_or_xpath. '''
'''
Split the file specified by name at the position specified by loc_or_xpath.
Splitting automatically migrates all links and references to the affected
files.
:param loc_or_xpath: Should be an XPath expression such as
//h:div[@id="split_here"]. Can also be a *loc* which is used internally to
implement splitting in the preview panel.
:param before: If True the split occurs before the identified element otherwise after it.
:param totals: Used internally
'''
root = container.parsed(name)
if isinstance(loc_or_xpath, type('')):
@ -238,6 +248,13 @@ def split(container, name, loc_or_xpath, before=True, totals=None):
return bottom_name
def multisplit(container, name, xpath, before=True):
'''
Split the specified file at multiple locations (all tags that match the specified XPath expression. See also: :func:`split`.
Splitting automatically migrates all links and references to the affected
files.
:param before: If True the splits occur before the identified element otherwise after it.
'''
root = container.parsed(name)
nodes = root.xpath(xpath, namespaces=XPNSMAP)
if not nodes:
@ -447,6 +464,14 @@ def merge_css(container, names, master):
def merge(container, category, names, master):
'''
Merge the specified files into a single file, automatically migrating all
links and references to the affected files. The file must all either be HTML or CSS files.
:param category: Must be either ``'text'`` for HTML files or ``'styles'`` for CSS files
:param names: The list of files to be merged
:param master: Which of the merged files is the *master* file, that is, the file that will remain after merging.
'''
if category not in {'text', 'styles'}:
raise AbortError('Cannot merge files of type: %s' % category)
if len(names) < 2:

View File

@ -240,6 +240,12 @@ def item_at_top(elem):
return True
def from_xpaths(container, xpaths):
'''
Generate a Table of Contents from a list of XPath expressions. Each
expression in the list corresponds to a level of the generate ToC. For
example: :code:`['//h:h1', '//h:h2', '//h:h3']` will generate a three level
table of contents from the ``<h1>``, ``<h2>`` and ``<h3>`` tags.
'''
tocroot = TOC()
xpaths = [XPath(xp) for xp in xpaths]
level_prev = {i+1:None for i in xrange(len(xpaths))}
@ -295,6 +301,9 @@ def from_xpaths(container, xpaths):
return tocroot
def from_links(container):
'''
Generate a Table of Contents from links in the book.
'''
toc = TOC()
link_path = XPath('//h:a[@href]')
seen_titles, seen_dests = set(), set()
@ -338,6 +347,9 @@ def find_text(node):
return text
def from_files(container):
'''
Generate a Table of Contents from files in the book.
'''
toc = TOC()
for i, spinepath in enumerate(container.spine_items):
name = container.abspath_to_name(spinepath)
@ -484,6 +496,11 @@ def find_inline_toc(container):
return name
def create_inline_toc(container, title=None):
'''
Create an inline (HTML) Table of Contents from an existing NCX table of contents.
:param title: The title for this table of contents.
'''
lang = get_book_language(container)
default_title = 'Table of Contents'
if lang: