Finish API documentation for the container and associated tools

2025-06-23 15:30:45 -04:00 · 2014-07-05 11:50:12 +05:30 · 2014-07-05 11:50:12 +05:30 · 45780c4719
commit 45780c4719
parent 23d31b1815
9 changed files with 175 additions and 6 deletions
--- a/manual/polish.rst
+++ b/manual/polish.rst
@ -35,7 +35,7 @@ The Container object
 .. autoclass:: Container
   :members:

-Tools for dealing with component files in a container
+Managing component files in a container
 --------------------------------------------------------

 .. module:: calibre.ebooks.oeb.polish.replace
@ -46,3 +46,77 @@ Tools for dealing with component files in a container

 .. autofunction:: get_recommended_folders

+Pretty printing and auto fixing parse errors
+--------------------------------------------------------
+
+.. module:: calibre.ebooks.oeb.polish.pretty
+
+.. autofunction:: fix_html
+
+.. autofunction:: fix_all_html
+
+.. autofunction:: pretty_html
+
+.. autofunction:: pretty_css
+
+.. autofunction:: pretty_xml
+
+.. autofunction:: pretty_all
+
+
+Managing book jackets
+-----------------------
+
+.. module:: calibre.ebooks.oeb.polish.jacket
+
+.. autofunction:: remove_jacket
+
+.. autofunction:: add_or_replace_jacket
+
+Splitting and merging of files
+---------------------------------
+
+.. module:: calibre.ebooks.oeb.polish.split
+
+.. autofunction:: split
+
+.. autofunction:: multisplit
+
+.. autofunction:: merge
+
+Managing covers
+-------------------
+
+.. module:: calibre.ebooks.oeb.polish.cover
+
+.. autofunction:: set_cover
+
+.. autofunction:: mark_as_cover
+
+.. autofunction:: mark_as_titlepage
+
+Working with CSS
+-------------------
+
+.. autofunction:: calibre.ebooks.oeb.polish.fonts.change_font
+
+.. module:: calibre.ebooks.oeb.polish.css
+
+.. autofunction:: remove_unused_css
+
+.. autofunction:: filter_css
+
+
+Working with the Table of Contents
+-----------------------------------
+
+.. module:: calibre.ebooks.oeb.polish.toc
+
+.. autofunction:: from_xpaths
+
+.. autofunction:: from_links
+
+.. autofunction:: from_files
+
+.. autofunction:: create_inline_toc
+
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@ -96,7 +96,9 @@ class Container(object):  # {{{
    class, they assume all hrefs are quoted.
    '''

+    #: The type of book (epub for EPUB files and azw3 for AZW3 files)
    book_type = 'oeb'
+
    SUPPORTS_TITLEPAGES = True
    SUPPORTS_FILENAMES = True

--- a/src/calibre/ebooks/oeb/polish/cover.py
+++ b/src/calibre/ebooks/oeb/polish/cover.py
@ -37,7 +37,7 @@ def set_azw3_cover(container, cover_path, report, options=None):
        with open(cover_path, 'rb') as src, container.open(name, 'wb') as dest:
            shutil.copyfileobj(src, dest)
    container.dirty(container.opf_name)
-    report('Cover updated' if found else 'Cover inserted')
+    report(_('Cover updated') if found else _('Cover inserted'))

 def get_azw3_raster_cover_name(container):
    items = container.opf_xpath('//opf:guide/opf:reference[@href and contains(@type, "cover")]')
@ -66,13 +66,30 @@ def get_cover_page_name(container):
        return
    return find_cover_page(container)

-def set_cover(container, cover_path, report, options=None):
+def set_cover(container, cover_path, report=None, options=None):
+    '''
+    Set the cover of the book to the image pointed to by cover_path.
+
+    :param cover_path: Either the absolute path to an image file or the
+        canonical name of an image in the book. When using an image int he book,
+        you must also set options, see below.
+    :param report: An optional callable that takes a single argument. It will
+        be called with information about the tasks being processed.
+    :param options: None or a dictionary that controls how the cover is set. The dictionary can have entries:
+        **keep_aspect**: True or False  (Preserve aspect ratio of covers in EPUB)
+        **no_svg**: True or False  (Use an SVG cover wrapper in the EPUB titlepage)
+        **existing**: True or False  (``cover_path`` refers to an existing image in the book)
+    '''
+    report = report or (lambda x:x)
    if container.book_type == 'azw3':
        set_azw3_cover(container, cover_path, report, options=options)
    else:
        set_epub_cover(container, cover_path, report, options=options)

 def mark_as_cover(container, name):
+    '''
+    Mark the specified image as the cover image.
+    '''
    if name not in container.mime_map:
        raise ValueError('Cannot mark %s as cover as it does not exist' % name)
    mt = container.mime_map[name]
@ -169,6 +186,11 @@ def mark_as_cover_epub(container, name):
    container.dirty(container.opf_name)

 def mark_as_titlepage(container, name, move_to_start=True):
+    '''
+    Mark the specified HTML file as the titlepage of the EPUB.
+
+    :param move_to_start: If True the HTML file is moved to the start of the spine
+    '''
    if move_to_start:
        for item, q, linear in container.spine_iter:
            if name == q:
@ -381,7 +403,7 @@ def set_epub_cover(container, cover_path, report, options=None):
    # Insert the new cover
    raster_cover, titlepage = create_epub_cover(container, cover_path, existing_image, options=options)

-    report('Cover updated' if updated else 'Cover inserted')
+    report(_('Cover updated') if updated else _('Cover inserted'))

    # Replace links to the old cover image/cover page
    link_sub = {s:d for s, d in {
--- a/src/calibre/ebooks/oeb/polish/css.py
+++ b/src/calibre/ebooks/oeb/polish/css.py
@ -134,7 +134,15 @@ def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None)
    ans.discard(name)
    return ans

-def remove_unused_css(container, report, remove_unused_classes=False):
+def remove_unused_css(container, report=None, remove_unused_classes=False):
+    '''
+    Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.
+
+    :param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
+    :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
+    '''
+    report = report or (lambda x:x)
+
    def safe_parse(name):
        try:
            return container.parsed(name)
@ -263,6 +271,12 @@ def filter_sheet(sheet, properties):


 def filter_css(container, properties, names=()):
+    '''
+    Remove the specified CSS properties from all CSS rules in the book.
+
+    :param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`.
+    :param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book.
+    '''
    if not names:
        types = OEB_STYLES | OEB_DOCS
        names = []
--- a/src/calibre/ebooks/oeb/polish/fonts.py
+++ b/src/calibre/ebooks/oeb/polish/fonts.py
@ -127,6 +127,12 @@ def change_font_in_sheet(container, sheet, old_name, new_name, sheet_name):
    return changed

 def change_font(container, old_name, new_name=None):
+    '''
+    Change a font family from old_name to new_name. Changes all occurrences of
+    the font family in stylesheets, style tags and style attributes.
+    If the old_name refers to an embedded font, it is removed. You can set
+    new_name to None to remove the font family instead of changing it.
+    '''
    changed = False
    for name, mt in tuple(container.mime_map.iteritems()):
        if mt in OEB_STYLES:
--- a/src/calibre/ebooks/oeb/polish/jacket.py
+++ b/src/calibre/ebooks/oeb/polish/jacket.py
@ -59,6 +59,7 @@ def replace_jacket(container, name):
    container.dirty(name)

 def remove_jacket(container):
+    ' Remove an existing jacket, if ant. Returns False if no existing jacket was found. '
    name = find_existing_jacket(container)
    if name is not None:
        remove_jacket_images(container, name)
@ -74,6 +75,8 @@ def remove_jacket_images(container, name):
            container.remove_item(iname)

 def add_or_replace_jacket(container):
+    ''' Either create a new jacket from the book's metadata or replace an
+    existing jacket. Returns True if an existing jacket was replaced. '''
    name = find_existing_jacket(container)
    found = True
    if name is None:
--- a/src/calibre/ebooks/oeb/polish/pretty.py
+++ b/src/calibre/ebooks/oeb/polish/pretty.py
@ -185,19 +185,23 @@ def pretty_html_tree(container, root):
            pretty_script_or_style(container, child)

 def fix_html(container, raw):
+    ' Fix any parsing errors in the HTML represented as a string in raw. Fixing is done using the HTML 5 parsing algorithm. '
    root = container.parse_xhtml(raw)
    return serialize(root, 'text/html')

 def pretty_html(container, name, raw):
+    ' Pretty print the HTML represented as a string in raw '
    root = container.parse_xhtml(raw)
    pretty_html_tree(container, root)
    return serialize(root, 'text/html')

 def pretty_css(container, name, raw):
+    ' Pretty print the CSS represented as a string in raw '
    sheet = container.parse_css(raw)
    return serialize(sheet, 'text/css')

 def pretty_xml(container, name, raw):
+    ' Pretty print the XML represented as a string in raw. If ``name`` is the name of the OPF, extra OPF-specific prettying is performed. '
    root = container.parse_xml(raw)
    if name == container.opf_name:
        pretty_opf(root)
@ -205,12 +209,14 @@ def pretty_xml(container, name, raw):
    return serialize(root, 'text/xml')

 def fix_all_html(container):
+    ' Fix any parsing errors in all HTML files in the container. Fixing is done using the HTML 5 parsing algorithm. '
    for name, mt in container.mime_map.iteritems():
        if mt in OEB_DOCS:
            container.parsed(name)
            container.dirty(name)

 def pretty_all(container):
+    ' Pretty print all HTML/CSS/XML files in the container '
    for name, mt in container.mime_map.iteritems():
        prettied = False
        if mt in OEB_DOCS:
--- a/src/calibre/ebooks/oeb/polish/split.py
+++ b/src/calibre/ebooks/oeb/polish/split.py
@ -164,7 +164,17 @@ class SplitLinkReplacer(object):
        return url

 def split(container, name, loc_or_xpath, before=True, totals=None):
-    ''' Split the file specified by name at the position specified by loc_or_xpath. '''
+    '''
+    Split the file specified by name at the position specified by loc_or_xpath.
+    Splitting automatically migrates all links and references to the affected
+    files.
+
+    :param loc_or_xpath: Should be an XPath expression such as
+        //h:div[@id="split_here"]. Can also be a *loc* which is used internally to
+        implement splitting in the preview panel.
+    :param before: If True the split occurs before the identified element otherwise after it.
+    :param totals: Used internally
+    '''

    root = container.parsed(name)
    if isinstance(loc_or_xpath, type('')):
@ -238,6 +248,13 @@ def split(container, name, loc_or_xpath, before=True, totals=None):
    return bottom_name

 def multisplit(container, name, xpath, before=True):
+    '''
+    Split the specified file at multiple locations (all tags that match the specified XPath expression. See also: :func:`split`.
+    Splitting automatically migrates all links and references to the affected
+    files.
+
+    :param before: If True the splits occur before the identified element otherwise after it.
+    '''
    root = container.parsed(name)
    nodes = root.xpath(xpath, namespaces=XPNSMAP)
    if not nodes:
@ -447,6 +464,14 @@ def merge_css(container, names, master):


 def merge(container, category, names, master):
+    '''
+    Merge the specified files into a single file, automatically migrating all
+    links and references to the affected files. The file must all either be HTML or CSS files.
+
+    :param category: Must be either ``'text'`` for HTML files or ``'styles'`` for CSS files
+    :param names: The list of files to be merged
+    :param master: Which of the merged files is the *master* file, that is, the file that will remain after merging.
+    '''
    if category not in {'text', 'styles'}:
        raise AbortError('Cannot merge files of type: %s' % category)
    if len(names) < 2:
--- a/src/calibre/ebooks/oeb/polish/toc.py
+++ b/src/calibre/ebooks/oeb/polish/toc.py
@ -240,6 +240,12 @@ def item_at_top(elem):
    return True

 def from_xpaths(container, xpaths):
+    '''
+    Generate a Table of Contents from a list of XPath expressions. Each
+    expression in the list corresponds to a level of the generate ToC. For
+    example: :code:`['//h:h1', '//h:h2', '//h:h3']` will generate a three level
+    table of contents from the ``<h1>``, ``<h2>`` and ``<h3>`` tags.
+    '''
    tocroot = TOC()
    xpaths = [XPath(xp) for xp in xpaths]
    level_prev = {i+1:None for i in xrange(len(xpaths))}
@ -295,6 +301,9 @@ def from_xpaths(container, xpaths):
    return tocroot

 def from_links(container):
+    '''
+    Generate a Table of Contents from links in the book.
+    '''
    toc = TOC()
    link_path = XPath('//h:a[@href]')
    seen_titles, seen_dests = set(), set()
@ -338,6 +347,9 @@ def find_text(node):
                return text

 def from_files(container):
+    '''
+    Generate a Table of Contents from files in the book.
+    '''
    toc = TOC()
    for i, spinepath in enumerate(container.spine_items):
        name = container.abspath_to_name(spinepath)
@ -484,6 +496,11 @@ def find_inline_toc(container):
            return name

 def create_inline_toc(container, title=None):
+    '''
+    Create an inline (HTML) Table of Contents from an existing NCX table of contents.
+
+    :param title: The title for this table of contents.
+    '''
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang: