From 241a2fc09911709065c8ffe42cf84e1803ad10c2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 21 Apr 2009 14:10:00 -0700
Subject: [PATCH] MOBI Output:Fix bug that cause MOBI conversion to sometimes
 fail when linking to an external stylesheet

---
 src/calibre/ebooks/html.py          | 251 ++++++++++++++--------------
 src/calibre/ebooks/mobi/from_any.py |  10 +-
 2 files changed, 130 insertions(+), 131 deletions(-)
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index f405040c77..d515c606d3 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -30,10 +30,10 @@ from calibre.utils.zipfile import ZipFile
 from cssutils import CSSParser
 
 class HTMLElement(HtmlElement):
-    
+
     @apply
     def specified_font_size():
-        
+
         def fget(self):
             ans = self.get('specified_font_size', '')
             if not ans:
@@ -41,12 +41,12 @@ class HTMLElement(HtmlElement):
             if ans.startswith('f'):
                 return functools.partial(operator.mul, float(ans[1:]))
             return float(ans)
-        
+
         def fset(self, val):
             self.set('specified_font_size', ('f'+repr(val(1))) if callable(val) else repr(val))
-                     
+
         return property(fget=fget, fset=fset)
-    
+
     @apply
     def computed_font_size():
         def fget(self):
@@ -54,48 +54,48 @@ class HTMLElement(HtmlElement):
             if ans == '':
                 return None
             return float(ans)
-        
+
         def fset(self, val):
             self.set('computed_font_size', repr(val))
-        
+
         return property(fget=fget, fset=fset)
-    
+
     def remove_font_size_information(self):
         for elem in self.iter():
             for p in ('computed', 'specified'):
                 elem.attrib.pop(p+'_font_size', None)
-                
+
     def getpath(self):
         return self.getroottree().getpath(self)
 
 class Lookup(HtmlElementClassLookup):
-    
+
     def lookup(self, node_type, document, namespace, name):
         if node_type == 'element':
             return HTMLElement
         return HtmlElementClassLookup.lookup(self, node_type, document, namespace, name)
 
 class HTMLParser(_HTMLParser):
-    
+
     def __init__(self, **kwargs):
         super(HTMLParser, self).__init__(**kwargs)
         self.set_element_class_lookup(Lookup())
-        
+
 parser = HTMLParser()
 
 def fromstring(raw, **kw):
     return _fromstring(raw, parser=parser, **kw)
 
 def tostring(root, pretty_print=False):
-    return _tostring(root, encoding='utf-8', method='xml', 
-                         include_meta_content_type=True, 
+    return _tostring(root, encoding='utf-8', method='xml',
+                         include_meta_content_type=True,
                          pretty_print=pretty_print)
-    
+
 class Link(object):
     '''
     Represents a link in a HTML file.
     '''
-    
+
     @classmethod
     def url_to_local_path(cls, url, base):
         path = urlunparse(('', '', url.path, url.params, url.query, ''))
@@ -103,7 +103,7 @@ class Link(object):
         if os.path.isabs(path):
             return path
         return os.path.abspath(os.path.join(base, path))
-    
+
     def __init__(self, url, base):
         '''
         :param url:  The url this link points to. Must be an unquoted unicode string.
@@ -127,13 +127,13 @@ class Link(object):
 
     def __eq__(self, other):
         return self.path == getattr(other, 'path', other)
-    
+
     def __str__(self):
-        return u'Link: %s --> %s'%(self.url, self.path) 
-        
+        return u'Link: %s --> %s'%(self.url, self.path)
+
 
 class IgnoreFile(Exception):
-    
+
     def __init__(self, msg, errno):
         Exception.__init__(self, msg)
         self.doesnt_exist = errno == 2
@@ -148,13 +148,13 @@ class HTMLFile(object):
 
     The encoding of the file is available as :member:`encoding`.
     '''
-    
+
     HTML_PAT  = re.compile(r'<\s*html', re.IGNORECASE)
     TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE)
     LINK_PAT  = re.compile(
     r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))',
     re.DOTALL|re.IGNORECASE)
-    
+
     def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
         '''
         :param level: The level of this file. Should be 0 for the root file.
@@ -167,7 +167,7 @@ class HTMLFile(object):
         self.level    = level
         self.referrer = referrer
         self.links    = []
-        
+
         try:
             with open(self.path, 'rb') as f:
                 src = f.read()
@@ -176,7 +176,7 @@ class HTMLFile(object):
             if level == 0:
                 raise IOError(msg)
             raise IgnoreFile(msg, err.errno)
-        
+
         self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
         if not self.is_binary:
             if encoding is None:
@@ -189,19 +189,19 @@ class HTMLFile(object):
             match = self.TITLE_PAT.search(src)
             self.title = match.group(1) if match is not None else self.title
             self.find_links(src)
-                
-        
-                    
+
+
+
     def __eq__(self, other):
         return self.path == getattr(other, 'path', other)
-    
+
     def __str__(self):
         return u'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
-    
+
     def __repr__(self):
         return str(self)
-                    
-        
+
+
     def find_links(self, src):
         for match in self.LINK_PAT.finditer(src):
             url = None
@@ -212,7 +212,7 @@ class HTMLFile(object):
             link = self.resolve(url)
             if link not in self.links:
                 self.links.append(link)
-                
+
     def resolve(self, url):
         return Link(url, self.base)
 
@@ -234,13 +234,13 @@ def depth_first(root, flat, visited=set([])):
                     if hf not in visited:
                         yield hf
                         visited.add(hf)
-        
-                                
+
+
 def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None):
     '''
     Recursively traverse all links in the HTML file.
-    
-    :param max_levels: Maximum levels of recursion. Must be non-negative. 0 
+
+    :param max_levels: Maximum levels of recursion. Must be non-negative. 0
                        implies that no links in the root HTML file are followed.
     :param encoding:   Specify character encoding of HTML files. If `None` it is
                        auto-detected.
@@ -271,7 +271,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
                         print repr(err)
             for link in rejects:
                 hf.links.remove(link)
-                
+
         next_level = list(nl)
     orec = sys.getrecursionlimit()
     sys.setrecursionlimit(500000)
@@ -279,14 +279,14 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
         return flat, list(depth_first(flat[0], flat))
     finally:
         sys.setrecursionlimit(orec)
-    
-    
+
+
 def opf_traverse(opf_reader, verbose=0, encoding=None):
     '''
     Return a list of :class:`HTMLFile` objects in the order specified by the
     `<spine>` element of the OPF.
-    
-    :param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance.  
+
+    :param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance.
     :param encoding:   Specify character encoding of HTML files. If `None` it is
                        auto-detected.
     '''
@@ -317,7 +317,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
             print 'WARNING: OPF spine item %s does not exist'%path
     ans = [f for f in ans if not f.is_binary]
     return ans
-            
+
 
 convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', 'apos', 'lt', 'gt', 'amp'])
 _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
@@ -326,20 +326,20 @@ def sanitize_head(match):
     x = match.group(1)
     x = _span_pat.sub('', x)
     return '<head>\n'+x+'\n</head>'
-    
+
 class PreProcessor(object):
     PREPROCESS = [
                   # Some idiotic HTML generators (Frontpage I'm looking at you)
                   # Put all sorts of crap into <head>. This messes up lxml
-                  (re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL), 
+                  (re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL),
                    sanitize_head),
                   # Convert all entities, since lxml doesn't handle them well
                   (re.compile(r'&(\S+?);'), convert_entities),
                   # Remove the <![if/endif tags inserted by everybody's darling, MS Word
-                  (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE), 
+                  (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
                    lambda match: ''),
                   ]
-                     
+
     # Fix pdftohtml markup
     PDFTOHTML  = [
                   # Remove <hr> tags
@@ -348,20 +348,20 @@ class PreProcessor(object):
                   (re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
                   # Remove <br> and replace <br><br> with <p>
                   (re.compile(r'<br.*?>\s*<br.*?>', re.IGNORECASE), lambda match: '<p>'),
-                  (re.compile(r'(.*)<br.*?>', re.IGNORECASE), 
-                   lambda match: match.group() if re.match('<', match.group(1).lstrip()) or len(match.group(1)) < 40 
+                  (re.compile(r'(.*)<br.*?>', re.IGNORECASE),
+                   lambda match: match.group() if re.match('<', match.group(1).lstrip()) or len(match.group(1)) < 40
                                 else match.group(1)),
                   # Remove hyphenation
                   (re.compile(r'-\n\r?'), lambda match: ''),
-                  
+
                   # Remove gray background
                   (re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
-                  
+
                   # Remove non breaking spaces
                   (re.compile(ur'\u00a0'), lambda match : ' '),
-                  
+
                   ]
-    
+
     # Fix Book Designer markup
     BOOK_DESIGNER = [
                      # HR
@@ -377,17 +377,17 @@ class PreProcessor(object):
                      (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                       lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                      ]
-    
+
     def is_baen(self, src):
-        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"', 
+        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
                           re.IGNORECASE).search(src) is not None
-                          
+
     def is_book_designer(self, raw):
         return re.search('<H2[^><]*id=BookTitle', raw) is not None
-    
+
     def is_pdftohtml(self, src):
         return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
-                          
+
     def preprocess(self, html):
         opts = getattr(self, 'opts', False)
         if opts and hasattr(opts, 'profile') and getattr(opts.profile, 'remove_special_chars', False):
@@ -403,17 +403,17 @@ class PreProcessor(object):
         for rule in self.PREPROCESS + rules:
             html = rule[0].sub(rule[1], html)
         return html
-    
+
 class Parser(PreProcessor, LoggingInterface):
 #    SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont'
-#    SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in 
+#    SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in
 #                          [
 #                           (r'<(?P<tag>%s)(?P<attrs>(\s+[^<>]*){0,1})(?<!/)>',
 #                            '<\g<tag>\g<attrs> />'),
 #                           (),
 #                           ]
 #                          ]
-    
+
     def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'):
         LoggingInterface.__init__(self, logging.getLogger(name))
         self.setup_cli_handler(opts.verbose)
@@ -433,27 +433,27 @@ class Parser(PreProcessor, LoggingInterface):
                 name = os.path.splitext(name)[0] + '_cr_%d'%save_counter + os.path.splitext(name)[1]
                 save_counter += 1
             self.htmlfile_map[f.path] = name
-        
+
         self.parse_html()
         # Handle <image> tags inside embedded <svg>
         # At least one source of EPUB files (Penguin) uses xlink:href
         # without declaring the xlink namespace
-        for image in self.root.xpath('//image'): 
+        for image in self.root.xpath('//image'):
             for attr in image.attrib.keys():
                 if attr.endswith(':href'):
                     nhref = self.rewrite_links(image.get(attr))
                     image.set(attr, nhref)
-        
+
         self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
         for bad in ('xmlns', 'lang', 'xml:lang'): # lxml also adds these attributes for XHTML documents, leading to duplicates
             if self.root.get(bad, None) is not None:
                 self.root.attrib.pop(bad)
-        
-        
-        
+
+
+
     def save_path(self):
         return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
-    
+
     def save(self, strip_comments=False):
         '''
         Save processed HTML into the content directory.
@@ -463,7 +463,7 @@ class Parser(PreProcessor, LoggingInterface):
         self.root.set('xmlns:xlink', 'http://www.w3.org/1999/xlink')
         for svg in self.root.xpath('//svg'):
             svg.set('xmlns', 'http://www.w3.org/2000/svg')
-        
+
         ans = tostring(self.root, pretty_print=self.opts.pretty_print)
         ans = re.compile(r'<head>', re.IGNORECASE).sub(
             '<head>\n\t<meta http-equiv="Content-Type" '
@@ -503,7 +503,7 @@ class Parser(PreProcessor, LoggingInterface):
             self.root.remove(head)
             self.root.insert(0, head)
 
-        self.head = head 
+        self.head = head
         try:
             self.body = self.root.body
         except:
@@ -526,7 +526,7 @@ class Parser(PreProcessor, LoggingInterface):
         if not self.head.xpath('./title'):
             title = etree.SubElement(self.head, 'title')
             title.text = _('Unknown')
-    
+
     def debug_tree(self, name):
         '''
         Dump source tree for later debugging.
@@ -538,8 +538,8 @@ class Parser(PreProcessor, LoggingInterface):
                     (os.path.basename(self.htmlfile.path), name)), 'wb') as f:
             f.write(tostring(self.root))
             self.log_debug(_('Written processed HTML to ')+f.name)
-    
-            
+
+
     def rewrite_links(self, olink):
         '''
         Make all links in document relative so that they work in the EPUB container.
@@ -555,7 +555,7 @@ class Parser(PreProcessor, LoggingInterface):
         if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path):
             return olink
         if link.path in self.htmlfiles:
-            return self.htmlfile_map[link.path] + frag 
+            return self.htmlfile_map[link.path] + frag
         if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None:
             return olink # This happens when --max-levels is used
         if link.path in self.resource_map.keys():
@@ -567,26 +567,26 @@ class Parser(PreProcessor, LoggingInterface):
         name = 'resources/' + name
         self.resource_map[link.path] = name
         return name + frag
-    
-        
+
+
 
 class Processor(Parser):
     '''
     This class builds on :class:`Parser` to provide additional methods
     to perform various processing/modification tasks on HTML files.
     '''
-    
+
     LINKS_PATH = XPath('//a[@href]')
     PIXEL_PAT  = re.compile(r'([-]?\d+|[-]?\d*\.\d+)px')
     PAGE_PAT   = re.compile(r'@page[^{]*?{[^}]*?}')
-    
+
     def __init__(self, *args, **kwargs):
         Parser.__init__(self, *args, **kwargs)
         temp = LoggingInterface(logging.getLogger('cssutils'))
         temp.setup_cli_handler(self.opts.verbose)
         self.css_parser = CSSParser(log=temp.logger, loglevel=logging.ERROR)
         self.stylesheet = self.font_css = self.override_css = None
-    
+
     def detect_chapters(self):
         self.detected_chapters = self.opts.chapter(self.root)
         chapter_mark = self.opts.chapter_mark
@@ -604,12 +604,12 @@ class Processor(Parser):
             else: # chapter_mark == 'both':
                 mark = etree.Element('hr', style=page_break_before)
             elem.addprevious(mark)
-    
+
     def save(self, strip_comments=False):
-        style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
+        style_path = os.path.splitext(os.path.basename(self.save_path()))[0]+'_calibre'
         for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
             if sheet is not None:
-                style = etree.SubElement(self.head, 'link', attrib={'type':'text/css', 'rel':'stylesheet', 
+                style = etree.SubElement(self.head, 'link', attrib={'type':'text/css', 'rel':'stylesheet',
                                                            'href':'resources/%s_%d.css'%(style_path, i),
                                                            'charset':'UTF-8'})
                 style.tail = '\n'
@@ -620,16 +620,16 @@ class Processor(Parser):
                     raw = raw.encode('utf-8')
                 open(path, 'wb').write(raw)
         return Parser.save(self, strip_comments=strip_comments)
-    
+
     def populate_toc(self, toc):
         '''
         Populate the Table of Contents from detected chapters and links.
         '''
         class Adder(object):
-            
+
             def __init__(self, toc):
                 self.next_play_order = max([x.play_order for x in toc.flat()])
-                
+
             def __call__(self, href, fragment, text, target, type='link'):
                 for entry in toc.flat():
                     if entry.href == href and entry.fragment == fragment:
@@ -637,15 +637,15 @@ class Processor(Parser):
                 if len(text) > 50:
                     text = text[:50] + u'\u2026'
                 self.next_play_order += 1
-                return target.add_item(href, fragment, text, type=type, 
+                return target.add_item(href, fragment, text, type=type,
                                        play_order=self.next_play_order)
         add_item = Adder(toc)
         name = self.htmlfile_map[self.htmlfile.path]
         href = 'content/'+name
-        
+
         # Add level* TOC items
         counter = 0
-        
+
         def elem_to_link(elem, href, counter):
             text = (u''.join(elem.xpath('string()'))).strip()
             if not text:
@@ -662,8 +662,8 @@ class Processor(Parser):
                 elem.set('id', id)
                 frag = id
             return text, _href, frag
-                
-        
+
+
         if self.opts.level1_toc is not None:
             level1 = self.opts.level1_toc(self.root)
             level1_order = []
@@ -702,17 +702,17 @@ class Processor(Parser):
                                     counter += 1
                                     if text:
                                         add_item(_href, frag, text, level2, type='chapter')
-                
-            
+
+
             if level1_order: # Fix play order
                 next_play_order = level1_order[0].play_order
                 for x in level1_order:
                     for y in x.flat():
                         y.play_order = next_play_order
                         next_play_order += 1
-                    
-                        
-                    
+
+
+
             if len(toc) > 0:
                 # Detected TOC entries using --level* options
                 # so aborting all other toc processing
@@ -726,7 +726,7 @@ class Processor(Parser):
                     id = elem.get('id', 'calibre_chapter_%d'%counter)
                     elem.set('id', id)
                     add_item(href, id, text, toc, type='chapter')
-        
+
         if len(list(toc.flat())) >= self.opts.toc_threshold:
             return
         referrer = toc
@@ -745,7 +745,7 @@ class Processor(Parser):
                 name = self.htmlfile_map[self.htmlfile.referrer.path]
                 href = 'content/'+name
                 referrer = add_item(href, None, text, toc)
-            
+
         # Add links to TOC
         if int(self.opts.max_toc_links) > 0:
             for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
@@ -762,7 +762,7 @@ class Processor(Parser):
                         if len(parts) > 1:
                             fragment = parts[1]
                         add_item(href, fragment, text, referrer)
-                    
+
     @classmethod
     def preprocess_css(cls, css, dpi=96):
         def rescale(match):
@@ -772,17 +772,17 @@ class Processor(Parser):
             except ValueError:
                 return ''
             return '%fpt'%(72 * val/dpi)
-        
+
         css = cls.PIXEL_PAT.sub(rescale, css)
         css = cls.PAGE_PAT.sub('', css)
         return css
-        
+
     def extract_css(self, parsed_sheets):
         '''
-        Remove all CSS information from the document and store it as 
+        Remove all CSS information from the document and store it as
         :class:`StyleSheet` objects.
         '''
-        
+
         def get_id(chapter, counter, prefix='calibre_css_'):
             new_id = '%s_%d'%(prefix, counter)
             if chapter.tag.lower() == 'a' and  'name' in chapter.keys():
@@ -796,7 +796,7 @@ class Processor(Parser):
                 id = new_id
                 chapter.set('id', id)
             return id
-    
+
         self.external_stylesheets, self.stylesheet = [], self.css_parser.parseString('')
         self.specified_override_css = []
         for link in self.root.xpath('//link'):
@@ -825,8 +825,7 @@ class Processor(Parser):
                                     self.log_exception('')
                     if parsed_sheets.has_key(file):
                         self.external_stylesheets.append(parsed_sheets[file])
-                
-        
+
         for style in self.root.xpath('//style'):
             if 'css' in style.get('type', 'text/css').lower():
                 override_css = style.get('title', '') == 'override_css'
@@ -889,7 +888,7 @@ class Processor(Parser):
             cn += classname
             font.set('class', cn)
             font.tag = 'span'
-            
+
         id_css, id_css_counter = {}, 0
         for elem in self.root.xpath('//*[@style]'):
             setting = elem.get('style')
@@ -906,7 +905,7 @@ class Processor(Parser):
                 cn = elem.get('class', classname)
                 elem.set('class', cn)
             elem.attrib.pop('style')
-        
+
         css = '\n'.join(['.%s {%s;}'%(cn, setting) for \
                          setting, cn in cache.items()])
         css += '\n\n'
@@ -930,28 +929,28 @@ class Processor(Parser):
         self.override_css = self.css_parser.parseString(self.preprocess_css(css))
         for rule in reversed(self.specified_override_css):
             self.override_css.insertRule(rule, index=0)
-        
-        
+
+
 def config(defaults=None, config_name='html',
            desc=_('Options to control the traversal of HTML')):
     if defaults is None:
         c = Config(config_name, desc)
     else:
         c = StringConfig(defaults, desc)
-        
+
     c.add_opt('output', ['-o', '--output'], default=None,
              help=_('The output directory. Default is the current directory.'))
     c.add_opt('encoding', ['--encoding'], default=None,
               help=_('Character encoding for HTML files. Default is to auto detect.'))
     c.add_opt('zip', ['--zip'], default=False,
               help=_('Create the output in a zip file. If this option is specified, the --output should be the name of a file not a directory.'))
-    
+
     traversal = c.add_group('traversal', _('Control the following of links in HTML files.'))
     traversal('breadth_first', ['--breadth-first'], default=False,
               help=_('Traverse links in HTML files breadth first. Normally, they are traversed depth first'))
     traversal('max_levels', ['--max-levels'], default=sys.getrecursionlimit(), group='traversal',
               help=_('Maximum levels of recursion when following links in HTML files. Must be non-negative. 0 implies that no links in the root HTML file are followed.'))
-    
+
     metadata = c.add_group('metadata', _('Set metadata of the generated ebook'))
     metadata('title', ['-t', '--title'], default=None,
              help=_('Set the title. Default is to autodetect.'))
@@ -965,13 +964,13 @@ def config(defaults=None, config_name='html',
              help=_('A summary of this book.'))
     metadata('from_opf', ['--metadata-from'], default=None,
               help=_('Load metadata from the specified OPF file'))
-        
+
     debug = c.add_group('debug', _('Options useful for debugging'))
     debug('verbose', ['-v', '--verbose'], default=0, action='count',
           help=_('Be more verbose while processing. Can be specified multiple times to increase verbosity.'))
     debug('pretty_print', ['--pretty-print'], default=False,
           help=_('Output HTML is "pretty printed" for easier parsing by humans'))
-    
+
     return c
 
 def option_parser():
@@ -980,7 +979,7 @@ def option_parser():
 %prog [options] file.html|opf
 
 Follow all links in an HTML file and collect them into the specified directory.
-Also collects any resources like images, stylesheets, scripts, etc. 
+Also collects any resources like images, stylesheets, scripts, etc.
 If an OPF file is specified instead, the list of files in its <spine> element
 is used.
 '''))
@@ -1056,11 +1055,11 @@ def merge_metadata(htmlfile, opf, opts):
         elif attr == 'tags':
             val = [i.strip() for i in val.split(',') if i.strip()]
         setattr(mi, attr, val)
-        
+
     cover = getattr(opts, 'cover', False)
     if cover and os.path.exists(cover):
         mi.cover = os.path.abspath(cover)
-        
+
     if not mi.title:
         if htmlfile:
             mi.title = os.path.splitext(os.path.basename(htmlfile))[0]
@@ -1092,13 +1091,13 @@ def rebase_toc(toc, htmlfile_map, basepath, root=True):
     def fix_entry(entry):
         if entry.abspath in htmlfile_map.keys():
             entry.href = 'content/' +  htmlfile_map[entry.abspath]
-            
+
     for entry in toc:
         rebase_toc(entry, htmlfile_map, basepath, root=False)
         fix_entry(entry)
     if root:
         toc.base_path = basepath
-    
+
 def create_dir(htmlfile, opts):
     '''
     Create a directory that contains the open ebook
@@ -1110,16 +1109,16 @@ def create_dir(htmlfile, opts):
     else:
         opf, filelist = get_filelist(htmlfile, opts)
         mi = merge_metadata(htmlfile, opf, opts)
-    
+
     resource_map, htmlfile_map = parse_content(filelist, opts)
     resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
-    
+
     if opf and opf.cover and os.access(opf.cover, os.R_OK):
         cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
         shutil.copyfile(opf.cover, cpath)
         resources.append(cpath)
         mi.cover = cpath
-    
+
     spine = [htmlfile_map[f.path] for f in filelist]
     mi = create_metadata(opts.output, mi, spine, resources)
     buf = cStringIO.StringIO()
@@ -1132,7 +1131,7 @@ def create_dir(htmlfile, opts):
         with open(os.path.join(opts.output, 'toc.ncx'), 'wb') as f:
             f.write(toc)
     print 'Open ebook created in', opts.output
-    
+
 def create_oebzip(htmlfile, opts):
     '''
     Create a zip file that contains the Open ebook.
@@ -1154,13 +1153,13 @@ def main(args=sys.argv):
         parser.print_help()
         print _('You must specify an input HTML file')
         return 1
-    
+
     htmlfile = args[1]
     if opts.zip:
         create_oebzip(htmlfile, opts)
     else:
         create_dir(htmlfile, opts)
-        
+
     return 0
 
 def gui_main(htmlfile, pt=None):
@@ -1183,7 +1182,7 @@ output  = %s
     if len(nontrivial) < 2:
         return None
     return pt.name
-    
+
 
 if __name__ == '__main__':
     sys.exit(main())
diff --git a/src/calibre/ebooks/mobi/from_any.py b/src/calibre/ebooks/mobi/from_any.py
index 5607690e21..fc9e94dafb 100644
--- a/src/calibre/ebooks/mobi/from_any.py
+++ b/src/calibre/ebooks/mobi/from_any.py
@@ -21,7 +21,7 @@ def config(defaults=None):
     c.remove_opt('profile')
     mobic = mobi_config(defaults=defaults)
     c.update(mobic)
-    return c 
+    return c
 
 def option_parser(usage=USAGE):
     usage = usage % ('Mobipocket', formats())
@@ -33,13 +33,13 @@ def any2mobi(opts, path, notification=None):
     if not ext:
         raise ValueError('Unknown file type: '+path)
     ext = ext.lower()[1:]
-    
+
     if opts.output is None:
         opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
-    
+
     opts.output = os.path.abspath(opts.output)
     orig_output = opts.output
-    
+
     with TemporaryDirectory('_any2mobi') as tdir:
         oebdir = os.path.join(tdir, 'oeb')
         os.mkdir(oebdir)
@@ -54,7 +54,7 @@ def any2mobi(opts, path, notification=None):
         opts.output = orig_output
         logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
         oeb2mobi(opts, opf)
-    
+
 
 def main(args=sys.argv):
     parser = option_parser()