From 45780c471958a7f6979f69e809daa54aa40e60d1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 5 Jul 2014 11:50:12 +0530 Subject: [PATCH] Finish API documentation for the container and associated tools --- manual/polish.rst | 76 +++++++++++++++++++++- src/calibre/ebooks/oeb/polish/container.py | 2 + src/calibre/ebooks/oeb/polish/cover.py | 28 +++++++- src/calibre/ebooks/oeb/polish/css.py | 16 ++++- src/calibre/ebooks/oeb/polish/fonts.py | 6 ++ src/calibre/ebooks/oeb/polish/jacket.py | 3 + src/calibre/ebooks/oeb/polish/pretty.py | 6 ++ src/calibre/ebooks/oeb/polish/split.py | 27 +++++++- src/calibre/ebooks/oeb/polish/toc.py | 17 +++++ 9 files changed, 175 insertions(+), 6 deletions(-) diff --git a/manual/polish.rst b/manual/polish.rst index 641e84536b..d268f39070 100644 --- a/manual/polish.rst +++ b/manual/polish.rst @@ -35,7 +35,7 @@ The Container object .. autoclass:: Container :members: -Tools for dealing with component files in a container +Managing component files in a container -------------------------------------------------------- .. module:: calibre.ebooks.oeb.polish.replace @@ -46,3 +46,77 @@ Tools for dealing with component files in a container .. autofunction:: get_recommended_folders +Pretty printing and auto fixing parse errors +-------------------------------------------------------- + +.. module:: calibre.ebooks.oeb.polish.pretty + +.. autofunction:: fix_html + +.. autofunction:: fix_all_html + +.. autofunction:: pretty_html + +.. autofunction:: pretty_css + +.. autofunction:: pretty_xml + +.. autofunction:: pretty_all + + +Managing book jackets +----------------------- + +.. module:: calibre.ebooks.oeb.polish.jacket + +.. autofunction:: remove_jacket + +.. autofunction:: add_or_replace_jacket + +Splitting and merging of files +--------------------------------- + +.. module:: calibre.ebooks.oeb.polish.split + +.. autofunction:: split + +.. autofunction:: multisplit + +.. autofunction:: merge + +Managing covers +------------------- + +.. module:: calibre.ebooks.oeb.polish.cover + +.. autofunction:: set_cover + +.. autofunction:: mark_as_cover + +.. autofunction:: mark_as_titlepage + +Working with CSS +------------------- + +.. autofunction:: calibre.ebooks.oeb.polish.fonts.change_font + +.. module:: calibre.ebooks.oeb.polish.css + +.. autofunction:: remove_unused_css + +.. autofunction:: filter_css + + +Working with the Table of Contents +----------------------------------- + +.. module:: calibre.ebooks.oeb.polish.toc + +.. autofunction:: from_xpaths + +.. autofunction:: from_links + +.. autofunction:: from_files + +.. autofunction:: create_inline_toc + diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 62038f173d..d9175b6bd2 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -96,7 +96,9 @@ class Container(object): # {{{ class, they assume all hrefs are quoted. ''' + #: The type of book (epub for EPUB files and azw3 for AZW3 files) book_type = 'oeb' + SUPPORTS_TITLEPAGES = True SUPPORTS_FILENAMES = True diff --git a/src/calibre/ebooks/oeb/polish/cover.py b/src/calibre/ebooks/oeb/polish/cover.py index 353229d8d2..bf76caa47f 100644 --- a/src/calibre/ebooks/oeb/polish/cover.py +++ b/src/calibre/ebooks/oeb/polish/cover.py @@ -37,7 +37,7 @@ def set_azw3_cover(container, cover_path, report, options=None): with open(cover_path, 'rb') as src, container.open(name, 'wb') as dest: shutil.copyfileobj(src, dest) container.dirty(container.opf_name) - report('Cover updated' if found else 'Cover inserted') + report(_('Cover updated') if found else _('Cover inserted')) def get_azw3_raster_cover_name(container): items = container.opf_xpath('//opf:guide/opf:reference[@href and contains(@type, "cover")]') @@ -66,13 +66,30 @@ def get_cover_page_name(container): return return find_cover_page(container) -def set_cover(container, cover_path, report, options=None): +def set_cover(container, cover_path, report=None, options=None): + ''' + Set the cover of the book to the image pointed to by cover_path. + + :param cover_path: Either the absolute path to an image file or the + canonical name of an image in the book. When using an image int he book, + you must also set options, see below. + :param report: An optional callable that takes a single argument. It will + be called with information about the tasks being processed. + :param options: None or a dictionary that controls how the cover is set. The dictionary can have entries: + **keep_aspect**: True or False (Preserve aspect ratio of covers in EPUB) + **no_svg**: True or False (Use an SVG cover wrapper in the EPUB titlepage) + **existing**: True or False (``cover_path`` refers to an existing image in the book) + ''' + report = report or (lambda x:x) if container.book_type == 'azw3': set_azw3_cover(container, cover_path, report, options=options) else: set_epub_cover(container, cover_path, report, options=options) def mark_as_cover(container, name): + ''' + Mark the specified image as the cover image. + ''' if name not in container.mime_map: raise ValueError('Cannot mark %s as cover as it does not exist' % name) mt = container.mime_map[name] @@ -169,6 +186,11 @@ def mark_as_cover_epub(container, name): container.dirty(container.opf_name) def mark_as_titlepage(container, name, move_to_start=True): + ''' + Mark the specified HTML file as the titlepage of the EPUB. + + :param move_to_start: If True the HTML file is moved to the start of the spine + ''' if move_to_start: for item, q, linear in container.spine_iter: if name == q: @@ -381,7 +403,7 @@ def set_epub_cover(container, cover_path, report, options=None): # Insert the new cover raster_cover, titlepage = create_epub_cover(container, cover_path, existing_image, options=options) - report('Cover updated' if updated else 'Cover inserted') + report(_('Cover updated') if updated else _('Cover inserted')) # Replace links to the old cover image/cover page link_sub = {s:d for s, d in { diff --git a/src/calibre/ebooks/oeb/polish/css.py b/src/calibre/ebooks/oeb/polish/css.py index 1b087785f7..0e8d2f40cc 100644 --- a/src/calibre/ebooks/oeb/polish/css.py +++ b/src/calibre/ebooks/oeb/polish/css.py @@ -134,7 +134,15 @@ def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None) ans.discard(name) return ans -def remove_unused_css(container, report, remove_unused_classes=False): +def remove_unused_css(container, report=None, remove_unused_classes=False): + ''' + Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content. + + :param report: An optional callable that takes a single argument. It is called with information about the operations being performed. + :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed. + ''' + report = report or (lambda x:x) + def safe_parse(name): try: return container.parsed(name) @@ -263,6 +271,12 @@ def filter_sheet(sheet, properties): def filter_css(container, properties, names=()): + ''' + Remove the specified CSS properties from all CSS rules in the book. + + :param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`. + :param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book. + ''' if not names: types = OEB_STYLES | OEB_DOCS names = [] diff --git a/src/calibre/ebooks/oeb/polish/fonts.py b/src/calibre/ebooks/oeb/polish/fonts.py index 3f789394bd..9b385445a6 100644 --- a/src/calibre/ebooks/oeb/polish/fonts.py +++ b/src/calibre/ebooks/oeb/polish/fonts.py @@ -127,6 +127,12 @@ def change_font_in_sheet(container, sheet, old_name, new_name, sheet_name): return changed def change_font(container, old_name, new_name=None): + ''' + Change a font family from old_name to new_name. Changes all occurrences of + the font family in stylesheets, style tags and style attributes. + If the old_name refers to an embedded font, it is removed. You can set + new_name to None to remove the font family instead of changing it. + ''' changed = False for name, mt in tuple(container.mime_map.iteritems()): if mt in OEB_STYLES: diff --git a/src/calibre/ebooks/oeb/polish/jacket.py b/src/calibre/ebooks/oeb/polish/jacket.py index ee795c0011..b96695a965 100644 --- a/src/calibre/ebooks/oeb/polish/jacket.py +++ b/src/calibre/ebooks/oeb/polish/jacket.py @@ -59,6 +59,7 @@ def replace_jacket(container, name): container.dirty(name) def remove_jacket(container): + ' Remove an existing jacket, if ant. Returns False if no existing jacket was found. ' name = find_existing_jacket(container) if name is not None: remove_jacket_images(container, name) @@ -74,6 +75,8 @@ def remove_jacket_images(container, name): container.remove_item(iname) def add_or_replace_jacket(container): + ''' Either create a new jacket from the book's metadata or replace an + existing jacket. Returns True if an existing jacket was replaced. ''' name = find_existing_jacket(container) found = True if name is None: diff --git a/src/calibre/ebooks/oeb/polish/pretty.py b/src/calibre/ebooks/oeb/polish/pretty.py index 6f8032e8f3..0f62fc6b2b 100644 --- a/src/calibre/ebooks/oeb/polish/pretty.py +++ b/src/calibre/ebooks/oeb/polish/pretty.py @@ -185,19 +185,23 @@ def pretty_html_tree(container, root): pretty_script_or_style(container, child) def fix_html(container, raw): + ' Fix any parsing errors in the HTML represented as a string in raw. Fixing is done using the HTML 5 parsing algorithm. ' root = container.parse_xhtml(raw) return serialize(root, 'text/html') def pretty_html(container, name, raw): + ' Pretty print the HTML represented as a string in raw ' root = container.parse_xhtml(raw) pretty_html_tree(container, root) return serialize(root, 'text/html') def pretty_css(container, name, raw): + ' Pretty print the CSS represented as a string in raw ' sheet = container.parse_css(raw) return serialize(sheet, 'text/css') def pretty_xml(container, name, raw): + ' Pretty print the XML represented as a string in raw. If ``name`` is the name of the OPF, extra OPF-specific prettying is performed. ' root = container.parse_xml(raw) if name == container.opf_name: pretty_opf(root) @@ -205,12 +209,14 @@ def pretty_xml(container, name, raw): return serialize(root, 'text/xml') def fix_all_html(container): + ' Fix any parsing errors in all HTML files in the container. Fixing is done using the HTML 5 parsing algorithm. ' for name, mt in container.mime_map.iteritems(): if mt in OEB_DOCS: container.parsed(name) container.dirty(name) def pretty_all(container): + ' Pretty print all HTML/CSS/XML files in the container ' for name, mt in container.mime_map.iteritems(): prettied = False if mt in OEB_DOCS: diff --git a/src/calibre/ebooks/oeb/polish/split.py b/src/calibre/ebooks/oeb/polish/split.py index dac0c9b4d7..f8eec193bf 100644 --- a/src/calibre/ebooks/oeb/polish/split.py +++ b/src/calibre/ebooks/oeb/polish/split.py @@ -164,7 +164,17 @@ class SplitLinkReplacer(object): return url def split(container, name, loc_or_xpath, before=True, totals=None): - ''' Split the file specified by name at the position specified by loc_or_xpath. ''' + ''' + Split the file specified by name at the position specified by loc_or_xpath. + Splitting automatically migrates all links and references to the affected + files. + + :param loc_or_xpath: Should be an XPath expression such as + //h:div[@id="split_here"]. Can also be a *loc* which is used internally to + implement splitting in the preview panel. + :param before: If True the split occurs before the identified element otherwise after it. + :param totals: Used internally + ''' root = container.parsed(name) if isinstance(loc_or_xpath, type('')): @@ -238,6 +248,13 @@ def split(container, name, loc_or_xpath, before=True, totals=None): return bottom_name def multisplit(container, name, xpath, before=True): + ''' + Split the specified file at multiple locations (all tags that match the specified XPath expression. See also: :func:`split`. + Splitting automatically migrates all links and references to the affected + files. + + :param before: If True the splits occur before the identified element otherwise after it. + ''' root = container.parsed(name) nodes = root.xpath(xpath, namespaces=XPNSMAP) if not nodes: @@ -447,6 +464,14 @@ def merge_css(container, names, master): def merge(container, category, names, master): + ''' + Merge the specified files into a single file, automatically migrating all + links and references to the affected files. The file must all either be HTML or CSS files. + + :param category: Must be either ``'text'`` for HTML files or ``'styles'`` for CSS files + :param names: The list of files to be merged + :param master: Which of the merged files is the *master* file, that is, the file that will remain after merging. + ''' if category not in {'text', 'styles'}: raise AbortError('Cannot merge files of type: %s' % category) if len(names) < 2: diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index c614bd489b..bcca0b9991 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -240,6 +240,12 @@ def item_at_top(elem): return True def from_xpaths(container, xpaths): + ''' + Generate a Table of Contents from a list of XPath expressions. Each + expression in the list corresponds to a level of the generate ToC. For + example: :code:`['//h:h1', '//h:h2', '//h:h3']` will generate a three level + table of contents from the ``

``, ``

`` and ``

`` tags. + ''' tocroot = TOC() xpaths = [XPath(xp) for xp in xpaths] level_prev = {i+1:None for i in xrange(len(xpaths))} @@ -295,6 +301,9 @@ def from_xpaths(container, xpaths): return tocroot def from_links(container): + ''' + Generate a Table of Contents from links in the book. + ''' toc = TOC() link_path = XPath('//h:a[@href]') seen_titles, seen_dests = set(), set() @@ -338,6 +347,9 @@ def find_text(node): return text def from_files(container): + ''' + Generate a Table of Contents from files in the book. + ''' toc = TOC() for i, spinepath in enumerate(container.spine_items): name = container.abspath_to_name(spinepath) @@ -484,6 +496,11 @@ def find_inline_toc(container): return name def create_inline_toc(container, title=None): + ''' + Create an inline (HTML) Table of Contents from an existing NCX table of contents. + + :param title: The title for this table of contents. + ''' lang = get_book_language(container) default_title = 'Table of Contents' if lang: