MOBI Output:Fix bug that cause MOBI conversion to sometimes fail when linking to an external stylesheet

2026-02-06 11:03:30 -05:00 · 2009-04-21 14:10:00 -07:00 · 2009-04-21 14:10:00 -07:00 · 241a2fc099
commit 241a2fc099
parent 8dd974ff42
2 changed files with 130 additions and 131 deletions
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -30,10 +30,10 @@ from calibre.utils.zipfile import ZipFile
 from cssutils import CSSParser

 class HTMLElement(HtmlElement):
-    
+
    @apply
    def specified_font_size():
-        
+
        def fget(self):
            ans = self.get('specified_font_size', '')
            if not ans:
@ -41,12 +41,12 @@ class HTMLElement(HtmlElement):
            if ans.startswith('f'):
                return functools.partial(operator.mul, float(ans[1:]))
            return float(ans)
-        
+
        def fset(self, val):
            self.set('specified_font_size', ('f'+repr(val(1))) if callable(val) else repr(val))
-                     
+
        return property(fget=fget, fset=fset)
-    
+
    @apply
    def computed_font_size():
        def fget(self):
@ -54,48 +54,48 @@ class HTMLElement(HtmlElement):
            if ans == '':
                return None
            return float(ans)
-        
+
        def fset(self, val):
            self.set('computed_font_size', repr(val))
-        
+
        return property(fget=fget, fset=fset)
-    
+
    def remove_font_size_information(self):
        for elem in self.iter():
            for p in ('computed', 'specified'):
                elem.attrib.pop(p+'_font_size', None)
-                
+
    def getpath(self):
        return self.getroottree().getpath(self)

 class Lookup(HtmlElementClassLookup):
-    
+
    def lookup(self, node_type, document, namespace, name):
        if node_type == 'element':
            return HTMLElement
        return HtmlElementClassLookup.lookup(self, node_type, document, namespace, name)

 class HTMLParser(_HTMLParser):
-    
+
    def __init__(self, **kwargs):
        super(HTMLParser, self).__init__(**kwargs)
        self.set_element_class_lookup(Lookup())
-        
+
 parser = HTMLParser()

 def fromstring(raw, **kw):
    return _fromstring(raw, parser=parser, **kw)

 def tostring(root, pretty_print=False):
-    return _tostring(root, encoding='utf-8', method='xml', 
-                         include_meta_content_type=True, 
+    return _tostring(root, encoding='utf-8', method='xml',
+                         include_meta_content_type=True,
                         pretty_print=pretty_print)
-    
+
 class Link(object):
    '''
    Represents a link in a HTML file.
    '''
-    
+
    @classmethod
    def url_to_local_path(cls, url, base):
        path = urlunparse(('', '', url.path, url.params, url.query, ''))
@ -103,7 +103,7 @@ class Link(object):
        if os.path.isabs(path):
            return path
        return os.path.abspath(os.path.join(base, path))
-    
+
    def __init__(self, url, base):
        '''
        :param url:  The url this link points to. Must be an unquoted unicode string.
@ -127,13 +127,13 @@ class Link(object):

    def __eq__(self, other):
        return self.path == getattr(other, 'path', other)
-    
+
    def __str__(self):
-        return u'Link: %s --> %s'%(self.url, self.path) 
-        
+        return u'Link: %s --> %s'%(self.url, self.path)
+

 class IgnoreFile(Exception):
-    
+
    def __init__(self, msg, errno):
        Exception.__init__(self, msg)
        self.doesnt_exist = errno == 2
@ -148,13 +148,13 @@ class HTMLFile(object):

    The encoding of the file is available as :member:`encoding`.
    '''
-    
+
    HTML_PAT  = re.compile(r'<\s*html', re.IGNORECASE)
    TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE)
    LINK_PAT  = re.compile(
    r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))',
    re.DOTALL|re.IGNORECASE)
-    
+
    def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
        '''
        :param level: The level of this file. Should be 0 for the root file.
@ -167,7 +167,7 @@ class HTMLFile(object):
        self.level    = level
        self.referrer = referrer
        self.links    = []
-        
+
        try:
            with open(self.path, 'rb') as f:
                src = f.read()
@ -176,7 +176,7 @@ class HTMLFile(object):
            if level == 0:
                raise IOError(msg)
            raise IgnoreFile(msg, err.errno)
-        
+
        self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
        if not self.is_binary:
            if encoding is None:
@ -189,19 +189,19 @@ class HTMLFile(object):
            match = self.TITLE_PAT.search(src)
            self.title = match.group(1) if match is not None else self.title
            self.find_links(src)
-                
-        
-                    
+
+
+
    def __eq__(self, other):
        return self.path == getattr(other, 'path', other)
-    
+
    def __str__(self):
        return u'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
-    
+
    def __repr__(self):
        return str(self)
-                    
-        
+
+
    def find_links(self, src):
        for match in self.LINK_PAT.finditer(src):
            url = None
@ -212,7 +212,7 @@ class HTMLFile(object):
            link = self.resolve(url)
            if link not in self.links:
                self.links.append(link)
-                
+
    def resolve(self, url):
        return Link(url, self.base)

@ -234,13 +234,13 @@ def depth_first(root, flat, visited=set([])):
                    if hf not in visited:
                        yield hf
                        visited.add(hf)
-        
-                                
+
+
 def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None):
    '''
    Recursively traverse all links in the HTML file.
-    
-    :param max_levels: Maximum levels of recursion. Must be non-negative. 0 
+
+    :param max_levels: Maximum levels of recursion. Must be non-negative. 0
                       implies that no links in the root HTML file are followed.
    :param encoding:   Specify character encoding of HTML files. If `None` it is
                       auto-detected.
@ -271,7 +271,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
                        print repr(err)
            for link in rejects:
                hf.links.remove(link)
-                
+
        next_level = list(nl)
    orec = sys.getrecursionlimit()
    sys.setrecursionlimit(500000)
@ -279,14 +279,14 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
        return flat, list(depth_first(flat[0], flat))
    finally:
        sys.setrecursionlimit(orec)
-    
-    
+
+
 def opf_traverse(opf_reader, verbose=0, encoding=None):
    '''
    Return a list of :class:`HTMLFile` objects in the order specified by the
    `<spine>` element of the OPF.
-    
-    :param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance.  
+
+    :param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance.
    :param encoding:   Specify character encoding of HTML files. If `None` it is
                       auto-detected.
    '''
@ -317,7 +317,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
            print 'WARNING: OPF spine item %s does not exist'%path
    ans = [f for f in ans if not f.is_binary]
    return ans
-            
+

 convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', 'apos', 'lt', 'gt', 'amp'])
 _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
@ -326,20 +326,20 @@ def sanitize_head(match):
    x = match.group(1)
    x = _span_pat.sub('', x)
    return '<head>\n'+x+'\n</head>'
-    
+
 class PreProcessor(object):
    PREPROCESS = [
                  # Some idiotic HTML generators (Frontpage I'm looking at you)
                  # Put all sorts of crap into <head>. This messes up lxml
-                  (re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL), 
+                  (re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL),
                   sanitize_head),
                  # Convert all entities, since lxml doesn't handle them well
                  (re.compile(r'&(\S+?);'), convert_entities),
                  # Remove the <![if/endif tags inserted by everybody's darling, MS Word
-                  (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE), 
+                  (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
                   lambda match: ''),
                  ]
-                     
+
    # Fix pdftohtml markup
    PDFTOHTML  = [
                  # Remove <hr> tags
@ -348,20 +348,20 @@ class PreProcessor(object):
                  (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
                  # Remove <br> and replace <br><br> with <p>
                  (re.compile(r'<br.*?>\s*<br.*?>', re.IGNORECASE), lambda match: '<p>'),
-                  (re.compile(r'(.*)<br.*?>', re.IGNORECASE), 
-                   lambda match: match.group() if re.match('<', match.group(1).lstrip()) or len(match.group(1)) < 40 
+                  (re.compile(r'(.*)<br.*?>', re.IGNORECASE),
+                   lambda match: match.group() if re.match('<', match.group(1).lstrip()) or len(match.group(1)) < 40
                                else match.group(1)),
                  # Remove hyphenation
                  (re.compile(r'-\n\r?'), lambda match: ''),
-                  
+
                  # Remove gray background
                  (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
-                  
+
                  # Remove non breaking spaces
                  (re.compile(ur'\u00a0'), lambda match : ' '),
-                  
+
                  ]
-    
+
    # Fix Book Designer markup
    BOOK_DESIGNER = [
                     # HR
@ -377,17 +377,17 @@ class PreProcessor(object):
                     (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                      lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                     ]
-    
+
    def is_baen(self, src):
-        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"', 
+        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
                          re.IGNORECASE).search(src) is not None
-                          
+
    def is_book_designer(self, raw):
        return re.search('<H2[^><]*id=BookTitle', raw) is not None
-    
+
    def is_pdftohtml(self, src):
        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
-                          
+
    def preprocess(self, html):
        opts = getattr(self, 'opts', False)
        if opts and hasattr(opts, 'profile') and getattr(opts.profile, 'remove_special_chars', False):
@ -403,17 +403,17 @@ class PreProcessor(object):
        for rule in self.PREPROCESS + rules:
            html = rule[0].sub(rule[1], html)
        return html
-    
+
 class Parser(PreProcessor, LoggingInterface):
 #    SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont'
-#    SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in 
+#    SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in
 #                          [
 #                           (r'<(?P<tag>%s)(?P<attrs>(\s+[^<>]*){0,1})(?<!/)>',
 #                            '<\g<tag>\g<attrs> />'),
 #                           (),
 #                           ]
 #                          ]
-    
+
    def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'):
        LoggingInterface.__init__(self, logging.getLogger(name))
        self.setup_cli_handler(opts.verbose)
@ -433,27 +433,27 @@ class Parser(PreProcessor, LoggingInterface):
                name = os.path.splitext(name)[0] + '_cr_%d'%save_counter + os.path.splitext(name)[1]
                save_counter += 1
            self.htmlfile_map[f.path] = name
-        
+
        self.parse_html()
        # Handle <image> tags inside embedded <svg>
        # At least one source of EPUB files (Penguin) uses xlink:href
        # without declaring the xlink namespace
-        for image in self.root.xpath('//image'): 
+        for image in self.root.xpath('//image'):
            for attr in image.attrib.keys():
                if attr.endswith(':href'):
                    nhref = self.rewrite_links(image.get(attr))
                    image.set(attr, nhref)
-        
+
        self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
        for bad in ('xmlns', 'lang', 'xml:lang'): # lxml also adds these attributes for XHTML documents, leading to duplicates
            if self.root.get(bad, None) is not None:
                self.root.attrib.pop(bad)
-        
-        
-        
+
+
+
    def save_path(self):
        return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
-    
+
    def save(self, strip_comments=False):
        '''
        Save processed HTML into the content directory.
@ -463,7 +463,7 @@ class Parser(PreProcessor, LoggingInterface):
        self.root.set('xmlns:xlink', 'http://www.w3.org/1999/xlink')
        for svg in self.root.xpath('//svg'):
            svg.set('xmlns', 'http://www.w3.org/2000/svg')
-        
+
        ans = tostring(self.root, pretty_print=self.opts.pretty_print)
        ans = re.compile(r'<head>', re.IGNORECASE).sub(
            '<head>\n\t<meta http-equiv="Content-Type" '
@ -503,7 +503,7 @@ class Parser(PreProcessor, LoggingInterface):
            self.root.remove(head)
            self.root.insert(0, head)

-        self.head = head 
+        self.head = head
        try:
            self.body = self.root.body
        except:
@ -526,7 +526,7 @@ class Parser(PreProcessor, LoggingInterface):
        if not self.head.xpath('./title'):
            title = etree.SubElement(self.head, 'title')
            title.text = _('Unknown')
-    
+
    def debug_tree(self, name):
        '''
        Dump source tree for later debugging.
@ -538,8 +538,8 @@ class Parser(PreProcessor, LoggingInterface):
                    (os.path.basename(self.htmlfile.path), name)), 'wb') as f:
            f.write(tostring(self.root))
            self.log_debug(_('Written processed HTML to ')+f.name)
-    
-            
+
+
    def rewrite_links(self, olink):
        '''
        Make all links in document relative so that they work in the EPUB container.
@ -555,7 +555,7 @@ class Parser(PreProcessor, LoggingInterface):
        if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path):
            return olink
        if link.path in self.htmlfiles:
-            return self.htmlfile_map[link.path] + frag 
+            return self.htmlfile_map[link.path] + frag
        if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None:
            return olink # This happens when --max-levels is used
        if link.path in self.resource_map.keys():
@ -567,26 +567,26 @@ class Parser(PreProcessor, LoggingInterface):
        name = 'resources/' + name
        self.resource_map[link.path] = name
        return name + frag
-    
-        
+
+

 class Processor(Parser):
    '''
    This class builds on :class:`Parser` to provide additional methods
    to perform various processing/modification tasks on HTML files.
    '''
-    
+
    LINKS_PATH = XPath('//a[@href]')
    PIXEL_PAT  = re.compile(r'([-]?\d+|[-]?\d*\.\d+)px')
    PAGE_PAT   = re.compile(r'@page[^{]*?{[^}]*?}')
-    
+
    def __init__(self, *args, **kwargs):
        Parser.__init__(self, *args, **kwargs)
        temp = LoggingInterface(logging.getLogger('cssutils'))
        temp.setup_cli_handler(self.opts.verbose)
        self.css_parser = CSSParser(log=temp.logger, loglevel=logging.ERROR)
        self.stylesheet = self.font_css = self.override_css = None
-    
+
    def detect_chapters(self):
        self.detected_chapters = self.opts.chapter(self.root)
        chapter_mark = self.opts.chapter_mark
@ -604,12 +604,12 @@ class Processor(Parser):
            else: # chapter_mark == 'both':
                mark = etree.Element('hr', style=page_break_before)
            elem.addprevious(mark)
-    
+
    def save(self, strip_comments=False):
-        style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
+        style_path = os.path.splitext(os.path.basename(self.save_path()))[0]+'_calibre'
        for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
            if sheet is not None:
-                style = etree.SubElement(self.head, 'link', attrib={'type':'text/css', 'rel':'stylesheet', 
+                style = etree.SubElement(self.head, 'link', attrib={'type':'text/css', 'rel':'stylesheet',
                                                           'href':'resources/%s_%d.css'%(style_path, i),
                                                           'charset':'UTF-8'})
                style.tail = '\n'
@ -620,16 +620,16 @@ class Processor(Parser):
                    raw = raw.encode('utf-8')
                open(path, 'wb').write(raw)
        return Parser.save(self, strip_comments=strip_comments)
-    
+
    def populate_toc(self, toc):
        '''
        Populate the Table of Contents from detected chapters and links.
        '''
        class Adder(object):
-            
+
            def __init__(self, toc):
                self.next_play_order = max([x.play_order for x in toc.flat()])
-                
+
            def __call__(self, href, fragment, text, target, type='link'):
                for entry in toc.flat():
                    if entry.href == href and entry.fragment == fragment:
@ -637,15 +637,15 @@ class Processor(Parser):
                if len(text) > 50:
                    text = text[:50] + u'\u2026'
                self.next_play_order += 1
-                return target.add_item(href, fragment, text, type=type, 
+                return target.add_item(href, fragment, text, type=type,
                                       play_order=self.next_play_order)
        add_item = Adder(toc)
        name = self.htmlfile_map[self.htmlfile.path]
        href = 'content/'+name
-        
+
        # Add level* TOC items
        counter = 0
-        
+
        def elem_to_link(elem, href, counter):
            text = (u''.join(elem.xpath('string()'))).strip()
            if not text:
@ -662,8 +662,8 @@ class Processor(Parser):
                elem.set('id', id)
                frag = id
            return text, _href, frag
-                
-        
+
+
        if self.opts.level1_toc is not None:
            level1 = self.opts.level1_toc(self.root)
            level1_order = []
@ -702,17 +702,17 @@ class Processor(Parser):
                                    counter += 1
                                    if text:
                                        add_item(_href, frag, text, level2, type='chapter')
-                
-            
+
+
            if level1_order: # Fix play order
                next_play_order = level1_order[0].play_order
                for x in level1_order:
                    for y in x.flat():
                        y.play_order = next_play_order
                        next_play_order += 1
-                    
-                        
-                    
+
+
+
            if len(toc) > 0:
                # Detected TOC entries using --level* options
                # so aborting all other toc processing
@ -726,7 +726,7 @@ class Processor(Parser):
                    id = elem.get('id', 'calibre_chapter_%d'%counter)
                    elem.set('id', id)
                    add_item(href, id, text, toc, type='chapter')
-        
+
        if len(list(toc.flat())) >= self.opts.toc_threshold:
            return
        referrer = toc
@ -745,7 +745,7 @@ class Processor(Parser):
                name = self.htmlfile_map[self.htmlfile.referrer.path]
                href = 'content/'+name
                referrer = add_item(href, None, text, toc)
-            
+
        # Add links to TOC
        if int(self.opts.max_toc_links) > 0:
            for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
@ -762,7 +762,7 @@ class Processor(Parser):
                        if len(parts) > 1:
                            fragment = parts[1]
                        add_item(href, fragment, text, referrer)
-                    
+
    @classmethod
    def preprocess_css(cls, css, dpi=96):
        def rescale(match):
@ -772,17 +772,17 @@ class Processor(Parser):
            except ValueError:
                return ''
            return '%fpt'%(72 * val/dpi)
-        
+
        css = cls.PIXEL_PAT.sub(rescale, css)
        css = cls.PAGE_PAT.sub('', css)
        return css
-        
+
    def extract_css(self, parsed_sheets):
        '''
-        Remove all CSS information from the document and store it as 
+        Remove all CSS information from the document and store it as
        :class:`StyleSheet` objects.
        '''
-        
+
        def get_id(chapter, counter, prefix='calibre_css_'):
            new_id = '%s_%d'%(prefix, counter)
            if chapter.tag.lower() == 'a' and  'name' in chapter.keys():
@ -796,7 +796,7 @@ class Processor(Parser):
                id = new_id
                chapter.set('id', id)
            return id
-    
+
        self.external_stylesheets, self.stylesheet = [], self.css_parser.parseString('')
        self.specified_override_css = []
        for link in self.root.xpath('//link'):
@ -825,8 +825,7 @@ class Processor(Parser):
                                    self.log_exception('')
                    if parsed_sheets.has_key(file):
                        self.external_stylesheets.append(parsed_sheets[file])
-                
-        
+
        for style in self.root.xpath('//style'):
            if 'css' in style.get('type', 'text/css').lower():
                override_css = style.get('title', '') == 'override_css'
@ -889,7 +888,7 @@ class Processor(Parser):
            cn += classname
            font.set('class', cn)
            font.tag = 'span'
-            
+
        id_css, id_css_counter = {}, 0
        for elem in self.root.xpath('//*[@style]'):
            setting = elem.get('style')
@ -906,7 +905,7 @@ class Processor(Parser):
                cn = elem.get('class', classname)
                elem.set('class', cn)
            elem.attrib.pop('style')
-        
+
        css = '\n'.join(['.%s {%s;}'%(cn, setting) for \
                         setting, cn in cache.items()])
        css += '\n\n'
@ -930,28 +929,28 @@ class Processor(Parser):
        self.override_css = self.css_parser.parseString(self.preprocess_css(css))
        for rule in reversed(self.specified_override_css):
            self.override_css.insertRule(rule, index=0)
-        
-        
+
+
 def config(defaults=None, config_name='html',
           desc=_('Options to control the traversal of HTML')):
    if defaults is None:
        c = Config(config_name, desc)
    else:
        c = StringConfig(defaults, desc)
-        
+
    c.add_opt('output', ['-o', '--output'], default=None,
             help=_('The output directory. Default is the current directory.'))
    c.add_opt('encoding', ['--encoding'], default=None,
              help=_('Character encoding for HTML files. Default is to auto detect.'))
    c.add_opt('zip', ['--zip'], default=False,
              help=_('Create the output in a zip file. If this option is specified, the --output should be the name of a file not a directory.'))
-    
+
    traversal = c.add_group('traversal', _('Control the following of links in HTML files.'))
    traversal('breadth_first', ['--breadth-first'], default=False,
              help=_('Traverse links in HTML files breadth first. Normally, they are traversed depth first'))
    traversal('max_levels', ['--max-levels'], default=sys.getrecursionlimit(), group='traversal',
              help=_('Maximum levels of recursion when following links in HTML files. Must be non-negative. 0 implies that no links in the root HTML file are followed.'))
-    
+
    metadata = c.add_group('metadata', _('Set metadata of the generated ebook'))
    metadata('title', ['-t', '--title'], default=None,
             help=_('Set the title. Default is to autodetect.'))
@ -965,13 +964,13 @@ def config(defaults=None, config_name='html',
             help=_('A summary of this book.'))
    metadata('from_opf', ['--metadata-from'], default=None,
              help=_('Load metadata from the specified OPF file'))
-        
+
    debug = c.add_group('debug', _('Options useful for debugging'))
    debug('verbose', ['-v', '--verbose'], default=0, action='count',
          help=_('Be more verbose while processing. Can be specified multiple times to increase verbosity.'))
    debug('pretty_print', ['--pretty-print'], default=False,
          help=_('Output HTML is "pretty printed" for easier parsing by humans'))
-    
+
    return c

 def option_parser():
@ -980,7 +979,7 @@ def option_parser():
 %prog [options] file.html|opf

 Follow all links in an HTML file and collect them into the specified directory.
-Also collects any resources like images, stylesheets, scripts, etc. 
+Also collects any resources like images, stylesheets, scripts, etc.
 If an OPF file is specified instead, the list of files in its <spine> element
 is used.
 '''))
@ -1056,11 +1055,11 @@ def merge_metadata(htmlfile, opf, opts):
        elif attr == 'tags':
            val = [i.strip() for i in val.split(',') if i.strip()]
        setattr(mi, attr, val)
-        
+
    cover = getattr(opts, 'cover', False)
    if cover and os.path.exists(cover):
        mi.cover = os.path.abspath(cover)
-        
+
    if not mi.title:
        if htmlfile:
            mi.title = os.path.splitext(os.path.basename(htmlfile))[0]
@ -1092,13 +1091,13 @@ def rebase_toc(toc, htmlfile_map, basepath, root=True):
    def fix_entry(entry):
        if entry.abspath in htmlfile_map.keys():
            entry.href = 'content/' +  htmlfile_map[entry.abspath]
-            
+
    for entry in toc:
        rebase_toc(entry, htmlfile_map, basepath, root=False)
        fix_entry(entry)
    if root:
        toc.base_path = basepath
-    
+
 def create_dir(htmlfile, opts):
    '''
    Create a directory that contains the open ebook
@ -1110,16 +1109,16 @@ def create_dir(htmlfile, opts):
    else:
        opf, filelist = get_filelist(htmlfile, opts)
        mi = merge_metadata(htmlfile, opf, opts)
-    
+
    resource_map, htmlfile_map = parse_content(filelist, opts)
    resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
-    
+
    if opf and opf.cover and os.access(opf.cover, os.R_OK):
        cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
        shutil.copyfile(opf.cover, cpath)
        resources.append(cpath)
        mi.cover = cpath
-    
+
    spine = [htmlfile_map[f.path] for f in filelist]
    mi = create_metadata(opts.output, mi, spine, resources)
    buf = cStringIO.StringIO()
@ -1132,7 +1131,7 @@ def create_dir(htmlfile, opts):
        with open(os.path.join(opts.output, 'toc.ncx'), 'wb') as f:
            f.write(toc)
    print 'Open ebook created in', opts.output
-    
+
 def create_oebzip(htmlfile, opts):
    '''
    Create a zip file that contains the Open ebook.
@ -1154,13 +1153,13 @@ def main(args=sys.argv):
        parser.print_help()
        print _('You must specify an input HTML file')
        return 1
-    
+
    htmlfile = args[1]
    if opts.zip:
        create_oebzip(htmlfile, opts)
    else:
        create_dir(htmlfile, opts)
-        
+
    return 0

 def gui_main(htmlfile, pt=None):
@ -1183,7 +1182,7 @@ output  = %s
    if len(nontrivial) < 2:
        return None
    return pt.name
-    
+

 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/mobi/from_any.py
+++ b/src/calibre/ebooks/mobi/from_any.py
@ -21,7 +21,7 @@ def config(defaults=None):
    c.remove_opt('profile')
    mobic = mobi_config(defaults=defaults)
    c.update(mobic)
-    return c 
+    return c

 def option_parser(usage=USAGE):
    usage = usage % ('Mobipocket', formats())
@ -33,13 +33,13 @@ def any2mobi(opts, path, notification=None):
    if not ext:
        raise ValueError('Unknown file type: '+path)
    ext = ext.lower()[1:]
-    
+
    if opts.output is None:
        opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
-    
+
    opts.output = os.path.abspath(opts.output)
    orig_output = opts.output
-    
+
    with TemporaryDirectory('_any2mobi') as tdir:
        oebdir = os.path.join(tdir, 'oeb')
        os.mkdir(oebdir)
@ -54,7 +54,7 @@ def any2mobi(opts, path, notification=None):
        opts.output = orig_output
        logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
        oeb2mobi(opts, opf)
-    
+

 def main(args=sys.argv):
    parser = option_parser()