Working HTML/OPF input plugin. Also fixed feeds download and removed cover processing from OEBBook

2025-08-11 09:13:57 -04:00 · 2009-04-10 21:12:27 -07:00 · 2009-04-10 21:12:27 -07:00 · 95d1b58ae3
commit 95d1b58ae3
parent 296853cd43
10 changed files with 295 additions and 337 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -189,6 +189,7 @@ class ComicMetadataReader(MetadataReaderPlugin):
    def get_metadata(self, stream, ftype):
        if ftype == 'cbr':
            from calibre.libunrar import extract_member as extract_first
+            extract_first
        else:
            from calibre.libunzip import extract_member as extract_first
        from calibre.ebooks.metadata import MetaInformation
@ -267,12 +268,14 @@ from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.pdf.input import PDFInput
 from calibre.ebooks.txt.input import TXTInput
+from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.customize.profiles import input_profiles, output_profiles

-plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, TXTInput, OEBOutput, TXTOutput, PDFOutput]
+plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
+        TXTInput, OEBOutput, TXTOutput, PDFOutput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -163,9 +163,9 @@ class InputFormatPlugin(Plugin):
            for x in os.listdir('.'):
                shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)

-
            ret = self.convert(stream, options, file_ext,
                               log, accelerators)
+
        if options.debug_input is not None:
            options.debug_input = os.path.abspath(options.debug_input)
            if not os.path.exists(options.debug_input):
--- a/src/calibre/ebooks/html/init.py
+++ b/src/calibre/ebooks/html/init.py
@ -17,7 +17,7 @@ def tostring(root, strip_comments=False, pretty_print=False):
    root.set('xmlns', 'http://www.w3.org/1999/xhtml')
    root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
    for x in root.iter():
-        if x.tag.rpartition('}')[-1].lower() == 'svg':
+        if hasattr(x.tag, 'rpartition') and x.tag.rpartition('}')[-1].lower() == 'svg':
            x.set('xmlns', 'http://www.w3.org/2000/svg')

    ans = _tostring(root, encoding='utf-8', pretty_print=pretty_print)
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -11,14 +11,12 @@ __docformat__ = 'restructuredtext en'
 Input plugin for HTML or OPF ebooks.
 '''

-import os, re, sys, cStringIO
+import os, re, sys
 from urlparse import urlparse, urlunparse
 from urllib import unquote

 from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.metadata.meta import get_metadata
-from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
-from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.customize.conversion import OptionRecommendation
 from calibre import unicode_path
@ -213,72 +211,21 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
        sys.setrecursionlimit(orec)


-def opf_traverse(opf_reader, verbose=0, encoding=None):
-    '''
-    Return a list of :class:`HTMLFile` objects in the order specified by the
-    `<spine>` element of the OPF.
-
-    :param opf_reader: An :class:`calibre.ebooks.metadata.opf2.OPF` instance.
-    :param encoding:   Specify character encoding of HTML files. If `None` it is
-                       auto-detected.
-    '''
-    if not opf_reader.spine:
-        raise ValueError('OPF does not have a spine')
-    flat = []
-    for path in opf_reader.spine.items():
-        path = os.path.abspath(path)
-        if path not in flat:
-            flat.append(os.path.abspath(path))
-    for item in opf_reader.manifest:
-        if 'html' in item.mime_type:
-            path = os.path.abspath(item.path)
-            if path not in flat:
-                flat.append(path)
-    for i, path in enumerate(flat):
-        if not os.path.exists(path):
-            path = path.replace('&', '%26')
-            if os.path.exists(path):
-                flat[i] = path
-                for item in opf_reader.itermanifest():
-                    item.set('href', item.get('href').replace('&', '%26'))
-    ans = []
-    for path in flat:
-        if os.path.exists(path):
-            ans.append(HTMLFile(path, 0, encoding, verbose))
-        else:
-            print 'WARNING: OPF spine item %s does not exist'%path
-    ans = [f for f in ans if not f.is_binary]
-    return ans
-
-def search_for_opf(dir):
-    for f in os.listdir(dir):
-        if f.lower().endswith('.opf'):
-            return OPF(open(os.path.join(dir, f), 'rb'), dir)
-
 def get_filelist(htmlfile, dir, opts, log):
    '''
    Build list of files referenced by html file or try to detect and use an
    OPF file instead.
    '''
-    print 'Building file list...'
-    opf = search_for_opf(dir)
-    filelist = None
-    if opf is not None:
-        try:
-            filelist = opf_traverse(opf, verbose=opts.verbose,
-                    encoding=opts.input_encoding)
-        except:
-            pass
-    if not filelist:
-        filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
-                            verbose=opts.verbose,
-                            encoding=opts.input_encoding)\
-                    [0 if opts.breadth_first else 1]
+    log.info('Building file list...')
+    filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
+                        verbose=opts.verbose,
+                        encoding=opts.input_encoding)\
+                [0 if opts.breadth_first else 1]
    if opts.verbose:
        log.debug('\tFound files...')
        for f in filelist:
            log.debug('\t\t', f)
-    return opf, filelist
+    return filelist


 class HTMLInput(InputFormatPlugin):
@ -309,34 +256,32 @@ class HTMLInput(InputFormatPlugin):

    def convert(self, stream, opts, file_ext, log,
                accelerators):
+        from calibre.ebooks.metadata.meta import get_metadata
+
        basedir = os.getcwd()
+
        if hasattr(stream, 'name'):
            basedir = os.path.dirname(stream.name)
        if file_ext == 'opf':
-            opf = OPF(stream, basedir)
-            filelist = opf_traverse(opf, verbose=opts.verbose,
-                    encoding=opts.input_encoding)
-            mi = MetaInformation(opf)
+            opfpath = stream.name
        else:
-            opf, filelist = get_filelist(stream.name, basedir, opts, log)
-            mi = MetaInformation(opf)
-            mi.smart_update(get_metadata(stream, 'html'))
+            filelist = get_filelist(stream.name, basedir, opts, log)
+            mi = get_metadata(stream, 'html')
+            mi = OPFCreator(os.getcwdu(), mi)
+            mi.guide = None
+            entries = [(f.path, 'application/xhtml+xml') for f in filelist]
+            mi.create_manifest(entries)
+            mi.create_spine([f.path for f in filelist])

-        mi = OPFCreator(os.getcwdu(), mi)
-        mi.guide = None
-        entries = [(f.path, 'application/xhtml+xml') for f in filelist]
-        mi.create_manifest(entries)
-        mi.create_spine([f.path for f in filelist])
-
-        tocbuf = cStringIO.StringIO()
-        mi.render(open('metadata.opf', 'wb'), tocbuf, 'toc.ncx')
-        toc = tocbuf.getvalue()
-        if toc:
-            open('toc.ncx', 'wb').write(toc)
+            mi.render(open('metadata.opf', 'wb'))
+            opfpath = os.path.abspath('metadata.opf')

        from calibre.ebooks.conversion.plumber import create_oebbook
-        return create_oebbook(log, os.path.abspath('metadata.opf'))
-
-
+        oeb = create_oebbook(log, opfpath)
+
+        from calibre.ebooks.oeb.transforms.package import Package
+        Package(os.getcwdu())(oeb, opts)
+
+        return oeb


--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -573,7 +573,7 @@ class OEBReader(object):
        item = self._find_ncx(opf)
        self._toc_from_opf(opf, item)
        self._pages_from_opf(opf, item)
-        self._ensure_cover_image()
+        #self._ensure_cover_image()


 def main(argv=sys.argv):
--- a/src/calibre/ebooks/oeb/transforms/package.py
+++ b/src/calibre/ebooks/oeb/transforms/package.py
@ -6,13 +6,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os
+import os, re
 from urllib import unquote as urlunquote
 from functools import partial

 from lxml import etree
 import cssutils

+from calibre import sanitize_file_name
 from calibre.constants import islinux
 from calibre.ebooks.oeb.base import OEB_DOCS, urlnormalize, urldefrag, \
                                    rewrite_links
@ -36,15 +37,21 @@ class Package(object):
        self.new_base_path = os.path.abspath(base)

    def rewrite_links_in(self, item):
-        base = os.path.join(self.new_base_path, *item.href.split('/'))
+        old_href = item.old_href.split('#')[0]
+        new_href = item.href.split('#')[0]
+        base = os.path.join(self.old_base_path, *old_href.split('/'))
        base = os.path.dirname(base)
+        self.log.debug('\tRewriting links in', base+'/'+
+                item.href.rpartition('/')[-1])
+        new_base = os.path.join(self.new_base_path, *new_href.split('/'))
+        new_base = os.path.dirname(new_base)

        if etree.iselement(item.data):
-            self.rewrite_links_in_xml(item.data, base)
+            self.rewrite_links_in_xml(item.data, base, new_base)
        elif hasattr(item.data, 'cssText'):
-            self.rewrite_links_in_css(item.data, base)
+            self.rewrite_links_in_css(item.data, base, new_base)

-    def link_replacer(self, link_, base=''):
+    def link_replacer(self, link_, base='', new_base=''):
        link = urlnormalize(link_)
        link, frag = urldefrag(link)
        link = urlunquote(link).replace('/', os.sep)
@ -55,20 +62,33 @@ class Package(object):
            link = link.lower()
        if link not in self.map:
            return link_
-        nlink = os.path.relpath(self.map[link], base)
+        nlink = os.path.relpath(self.map[link], new_base)
        if frag:
-            nlink = '#'.join(nlink, frag)
+            nlink = '#'.join((nlink, frag))
        return nlink.replace(os.sep, '/')

-    def rewrite_links_in_css(self, sheet, base):
-        repl = partial(self.link_replacer, base=base)
+    def rewrite_links_in_css(self, sheet, base, new_base):
+        repl = partial(self.link_replacer, base=base, new_base=new_base)
        cssutils.replaceUrls(sheet, repl)

-    def rewrite_links_in_xml(self, root, base):
-        repl = partial(self.link_replacer, base=base)
+    def rewrite_links_in_xml(self, root, base, new_base):
+        repl = partial(self.link_replacer, base=base, new_base=new_base)
        rewrite_links(root, repl)

-    def move_manifest_item(self, item):
+    def uniqify_name(self, new_href, hrefs):
+        c = 0
+        while new_href in hrefs:
+            c += 1
+            parts = new_href.split('/')
+            name, ext = os.path.splitext(parts[-1])
+            name = re.sub(r'_\d+$', '', name)
+            name += '_%d'%c
+            parts[-1] = name + ext
+            new_href = '/'.join(parts)
+        return new_href
+
+
+    def move_manifest_item(self, item, hrefs):
        item.data # Make sure the data has been loaded and cached
        old_abspath = os.path.join(self.old_base_path,
                *(urldefrag(item.href)[0].split('/')))
@ -79,11 +99,17 @@ class Package(object):
            new_href = 'content/'
        elif item.href.lower().endswith('.ncx'):
            new_href = ''
-        new_href += bname
+        new_href += sanitize_file_name(bname)
+
+        if new_href in hrefs:
+            new_href = self.uniqify_name(new_href, hrefs)
+        hrefs.add(new_href)

        new_abspath = os.path.join(self.new_base_path, *new_href.split('/'))
        new_abspath = os.path.abspath(new_abspath)
+        item.old_href = self.oeb.manifest.hrefs.pop(item.href).href
        item.href   = new_href
+        self.oeb.manifest.hrefs[item.href] = item
        if not islinux:
            old_abspath, new_abspath = old_abspath.lower(), new_abspath.lower()
        if old_abspath != new_abspath:
@ -91,25 +117,33 @@ class Package(object):

    def rewrite_links_in_toc(self, toc):
        if toc.href:
-            toc.href = self.link_replacer(toc.href, base=self.new_base_path)
+            toc.href = self.link_replacer(toc.href, base=self.old_base_path,
+                    new_base=self.new_base_path)

        for x in toc:
            self.rewrite_links_in_toc(x)

    def __call__(self, oeb, context):
        self.map = {}
-        self.log = self.oeb.log
+        self.log = oeb.log
+        self.oeb = oeb
        self.old_base_path = os.path.abspath(oeb.container.rootdir)

+        hrefs = set([])
        for item in self.oeb.manifest:
-            self.move_manifest_item(item)
+            self.move_manifest_item(item, hrefs)

+        self.log.debug('Rewriting links in OEB documents...')
        for item in self.oeb.manifest:
            self.rewrite_links_in(item)

        if getattr(oeb.toc, 'nodes', False):
+            self.log.debug('Rewriting links in TOC...')
            self.rewrite_links_in_toc(oeb.toc)

        if hasattr(oeb, 'guide'):
+            self.log.debug('Rewriting links in guide...')
            for ref in oeb.guide.values():
-                ref.href = self.link_replacer(ref.href, base=self.new_base_path)
+                ref.href = self.link_replacer(ref.href,
+                        base=self.old_base_path,
+                        new_base=self.new_base_path)
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@ -48,7 +48,8 @@ class OEBWriter(object):
                   pretty_print=pretty_print)

    def __call__(self, oeb, path):
-        """Read the book in the :class:`OEBBook` object :param:`oeb` to a file
+        """
+        Read the book in the :class:`OEBBook` object :param:`oeb` to a file
        at :param:`path`.
        """
        version = int(self.version[0])
--- a/src/calibre/web/feeds/main.py
+++ b/src/calibre/web/feeds/main.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 CLI for downloading feeds.
 '''

-import sys, os, logging
+import sys, os
 from calibre.web.feeds.recipes import get_builtin_recipe, compile_recipe, titles
 from calibre.web.fetch.simple import option_parser as _option_parser
 from calibre.web.feeds.news import BasicNewsRecipe
@ -113,7 +113,7 @@ If you specify this option, any argument to %prog is ignored and a default recip
 class RecipeError(Exception):
    pass

-def run_recipe(opts, recipe_arg, parser, notification=None, handler=None):
+def run_recipe(opts, recipe_arg, parser, notification=None):
    if notification is None:
        from calibre.utils.terminfo import TerminalController, ProgressBar
        term = TerminalController(sys.stdout)
@ -137,14 +137,6 @@ def run_recipe(opts, recipe_arg, parser, notification=None, handler=None):
    if recipe is None:
        raise RecipeError(recipe_arg+ ' is an invalid recipe')

-    
-    if handler is None:
-        from calibre import ColoredFormatter
-        handler = logging.StreamHandler(sys.stdout)
-        handler.setLevel(logging.DEBUG if opts.debug else logging.INFO if opts.verbose else logging.WARN)
-        handler.setFormatter(ColoredFormatter('%(levelname)s: %(message)s\n')) # The trailing newline is need because of the progress bar
-        logging.getLogger('feeds2disk').addHandler(handler)
-    
    recipe = recipe(opts, parser, notification)

    if not os.path.exists(recipe.output_dir):
@ -153,7 +145,7 @@ def run_recipe(opts, recipe_arg, parser, notification=None, handler=None):

    return recipe

-def main(args=sys.argv, notification=None, handler=None):
+def main(args=sys.argv, notification=None):
    p = option_parser()
    opts, args = p.parse_args(args=args[1:])

@ -161,7 +153,7 @@ def main(args=sys.argv, notification=None, handler=None):
        p.print_help()
        return 1
    recipe_arg = args[0] if len(args) > 0 else None
-    run_recipe(opts, recipe_arg, p, notification=notification, handler=handler)    
+    run_recipe(opts, recipe_arg, p, notification=notification)

    return 0

--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -7,7 +7,7 @@ Defines various abstract base classes that can be subclassed to create powerful
 __docformat__ = "restructuredtext en"


-import logging, os, cStringIO, time, traceback, re, urlparse, sys
+import os, time, traceback, re, urlparse, sys
 from collections import defaultdict
 from functools import partial
 from contextlib import nested, closing
@ -27,6 +27,7 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
 from calibre.web.fetch.simple import option_parser as web2disk_option_parser
 from calibre.web.fetch.simple import RecursiveFetcher
 from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
+from calibre.utils.logging import Log
 from calibre.ptempfile import PersistentTemporaryFile, \
                              PersistentTemporaryDirectory

@ -423,7 +424,7 @@ class BasicNewsRecipe(object):
        '''
        raise NotImplementedError

-    def get_obfuscated_article(self, url, logger):
+    def get_obfuscated_article(self, url):
        '''
        If you set :member:`articles_are_obfuscated` this method is called with
        every article URL. It should return the path to a file on the filesystem
@ -443,6 +444,7 @@ class BasicNewsRecipe(object):
        :param parser:  Command line option parser. Used to intelligently merge options.
        :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
        '''
+        self.log = Log()
        if not isinstance(self.title, unicode):
            self.title = unicode(self.title, 'utf-8', 'replace')

@ -455,7 +457,6 @@ class BasicNewsRecipe(object):


        if self.debug:
-            logging.getLogger('feeds2disk').setLevel(logging.DEBUG)
            self.verbose = True
        self.report_progress = progress_reporter

@ -560,20 +561,20 @@ class BasicNewsRecipe(object):
            res = self.build_index()
            self.report_progress(1, _('Download finished'))
            if self.failed_downloads:
-                self.log_warning(_('Failed to download the following articles:'))
+                self.log.warning(_('Failed to download the following articles:'))
                for feed, article, debug in self.failed_downloads:
-                    self.log_warning(article.title+_(' from ')+feed.title)
-                    self.log_debug(article.url)
-                    self.log_debug(debug)
+                    self.log.warning(article.title+_(' from ')+feed.title)
+                    self.log.debug(article.url)
+                    self.log.debug(debug)
            if self.partial_failures:
-                self.log_warning(_('Failed to download parts of the following articles:'))
+                self.log.warning(_('Failed to download parts of the following articles:'))
                for feed, atitle, aurl, debug in self.partial_failures:
-                    self.log_warning(atitle + _(' from ') + feed)
-                    self.log_debug(aurl)
-                    self.log_warning(_('\tFailed links:'))
+                    self.log.warning(atitle + _(' from ') + feed)
+                    self.log.debug(aurl)
+                    self.log.warning(_('\tFailed links:'))
                    for l, tb in debug:
-                        self.log_warning(l)
-                        self.log_debug(tb)
+                        self.log.warning(l)
+                        self.log.debug(tb)
            return res
        finally:
            self.cleanup()
@ -636,20 +637,11 @@ class BasicNewsRecipe(object):
                              extra_css=self.extra_css).render(doctype='xhtml')


-    def create_logger(self, feed_number, article_number):
-        logger = logging.getLogger('feeds2disk.article_%d_%d'%(feed_number, article_number))
-        out = cStringIO.StringIO()
-        handler = logging.StreamHandler(out)
-        handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
-        handler.setLevel(logging.INFO if self.verbose else logging.WARNING)
-        if self.debug:
-            handler.setLevel(logging.DEBUG)
-        logger.addHandler(handler)
-        return logger, out
-
-    def _fetch_article(self, url, dir, logger, f, a, num_of_feeds):
+    def _fetch_article(self, url, dir, f, a, num_of_feeds):
        self.web2disk_options.browser = self.get_browser() if self.multithreaded_fetch else self.browser
-        fetcher = RecursiveFetcher(self.web2disk_options, logger, self.image_map, self.css_map, (url, f, a, num_of_feeds))
+        fetcher = RecursiveFetcher(self.web2disk_options, self.log,
+                self.image_map, self.css_map,
+                (url, f, a, num_of_feeds))
        fetcher.base_dir = dir
        fetcher.current_dir = dir
        fetcher.show_progress = False
@ -661,21 +653,21 @@ class BasicNewsRecipe(object):
            raise Exception(_('Could not fetch article. Run with --debug to see the reason'))
        return res, path, failures

-    def fetch_article(self, url, dir, logger, f, a, num_of_feeds):
-        return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
+    def fetch_article(self, url, dir, f, a, num_of_feeds):
+        return self._fetch_article(url, dir, f, a, num_of_feeds)

-    def fetch_obfuscated_article(self, url, dir, logger, f, a, num_of_feeds):
-        path = os.path.abspath(self.get_obfuscated_article(url, logger))
+    def fetch_obfuscated_article(self, url, dir, f, a, num_of_feeds):
+        path = os.path.abspath(self.get_obfuscated_article(url))
        url = ('file:'+path) if iswindows else ('file://'+path)
-        return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
+        return self._fetch_article(url, dir, f, a, num_of_feeds)

-    def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds):
+    def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
        templ = templates.EmbeddedContent()
        raw = templ.generate(article).render('html')
        with PersistentTemporaryFile('_feeds2disk.html') as pt:
            pt.write(raw)
            url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
-        return self._fetch_article(url, dir, logger, f, a, num_of_feeds)
+        return self._fetch_article(url, dir,  f, a, num_of_feeds)


    def build_index(self):
@ -716,7 +708,6 @@ class BasicNewsRecipe(object):
                art_dir = os.path.join(feed_dir, 'article_%d'%a)
                if not os.path.isdir(art_dir):
                    os.makedirs(art_dir)
-                logger, stream = self.create_logger(f, a)
                try:
                    url = self.print_version(article.url)
                except NotImplementedError:
@ -726,10 +717,9 @@ class BasicNewsRecipe(object):
                func, arg = (self.fetch_embedded_article, article) if self.use_embedded_content else \
                            ((self.fetch_obfuscated_article if self.articles_are_obfuscated \
                              else self.fetch_article), url)
-                req = WorkRequest(func, (arg, art_dir, logger, f, a, len(feed)),
+                req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
                                      {}, (f, a), self.article_downloaded,
                                      self.error_in_article_download)
-                req.stream = stream
                req.feed = feed
                req.article = article
                req.feed_dir = feed_dir
@ -768,8 +758,8 @@ class BasicNewsRecipe(object):
            cu = self.get_cover_url()
        except Exception, err:
            cu = None
-            self.log_error(_('Could not download cover: %s')%str(err))
-            self.log_debug(traceback.format_exc())
+            self.log.error(_('Could not download cover: %s')%str(err))
+            self.log.debug(traceback.format_exc())
        if cu is not None:
            ext = cu.rpartition('.')[-1]
            if '?' in ext:
@ -841,8 +831,8 @@ class BasicNewsRecipe(object):
            f.write(html.encode('utf-8'))
        renderer = render_html(hf)
        if renderer.tb is not None:
-            self.logger.warning('Failed to render default cover')
-            self.logger.debug(renderer.tb)
+            self.log.warning('Failed to render default cover')
+            self.log.debug(renderer.tb)
        else:
            cover_file.write(renderer.data)
            cover_file.flush()
@ -863,7 +853,7 @@ class BasicNewsRecipe(object):
        manifest.append(os.path.join(dir, 'index.ncx'))
        cpath = getattr(self, 'cover_path', None)
        if cpath is None:
-            pf = PersistentTemporaryFile('_recipe_cover.jpg')
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
            self.default_cover(pf)
            cpath =  pf.name
        if cpath is not None and os.access(cpath, os.R_OK):
@ -944,7 +934,7 @@ class BasicNewsRecipe(object):
        a = request.requestID[1]

        article = request.article
-        self.log_debug(_('\nDownloaded article %s from %s\n%s')%(article.title, article.url, request.stream.getvalue().decode('utf-8', 'ignore')))
+        self.log.debug(_('\nDownloaded article %s from %s')%(article.title, article.url))
        article.orig_url = article.url
        article.url = 'article_%d/index.html'%a
        article.downloaded = True
@ -956,11 +946,11 @@ class BasicNewsRecipe(object):

    def error_in_article_download(self, request, traceback):
        self.jobs_done += 1
-        self.log_error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
+        self.log.error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
        debug = request.stream.getvalue().decode('utf-8', 'ignore')
-        self.log_debug(debug)
-        self.log_debug(traceback)
-        self.log_debug('\n')
+        self.log.debug(debug)
+        self.log.debug(traceback)
+        self.log.debug('\n')
        self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title)
        self.failed_downloads.append((request.feed, request.article, debug))

@ -990,7 +980,7 @@ class BasicNewsRecipe(object):
                feed.populate_from_preparsed_feed(msg, [])
                feed.description = unicode(err)
                parsed_feeds.append(feed)
-                self.log_exception(msg)
+                self.log.exception(msg)


        return parsed_feeds
@ -1057,7 +1047,7 @@ class CustomIndexRecipe(BasicNewsRecipe):
        index = os.path.abspath(self.custom_index())
        url = 'file:'+index if iswindows else 'file://'+index
        self.web2disk_options.browser = self.browser
-        fetcher = RecursiveFetcher(self.web2disk_options, self.logger)
+        fetcher = RecursiveFetcher(self.web2disk_options, self.log)
        fetcher.base_dir = self.output_dir
        fetcher.current_dir = self.output_dir
        fetcher.show_progress = False
@ -1069,7 +1059,7 @@ class AutomaticNewsRecipe(BasicNewsRecipe):

    keep_only_tags = [dict(name=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])]

-    def fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds):
+    def fetch_embedded_article(self, article, dir, f, a, num_of_feeds):
        if self.use_embedded_content:
            self.web2disk_options.keep_only_tags = []
-        return BasicNewsRecipe.fetch_embedded_article(self, article, dir, logger, f, a, num_of_feeds)
+        return BasicNewsRecipe.fetch_embedded_article(self, article, dir, f, a, num_of_feeds)
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -7,18 +7,19 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Fetch a webpage and its links recursively. The webpages are saved to disk in
 UTF-8 encoding with any charset declarations removed.
 '''
-import sys, socket, os, urlparse, logging, re, time, copy, urllib2, threading, traceback
+import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
 from urllib import url2pathname, quote
 from threading import RLock
 from httplib import responses
 from PIL import Image
 from cStringIO import StringIO

-from calibre import setup_cli_handlers, browser, sanitize_file_name, \
+from calibre import browser, sanitize_file_name, \
                    relpath, unicode_path
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log

 class FetchError(Exception):
    pass
@ -92,10 +93,11 @@ class RecursiveFetcher(object):
    default_timeout = socket.getdefaulttimeout() # Needed here as it is used in __del__
    DUMMY_LOCK = DummyLock()

-    def __init__(self, options, logger, image_map={}, css_map={}, job_info=None):
+    def __init__(self, options, log, image_map={}, css_map={}, job_info=None):
        self.base_dir = os.path.abspath(os.path.expanduser(options.dir))
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)
+        self.log = log
        self.default_timeout = socket.getdefaulttimeout()
        socket.setdefaulttimeout(options.timeout)
        self.verbose = options.verbose
@ -174,7 +176,7 @@ class RecursiveFetcher(object):

    def fetch_url(self, url):
        data = None
-        self.log_debug('Fetching %s', url)
+        self.log.debug('Fetching', url)
        delta = time.time() - self.last_fetch_at
        if  delta < self.delay:
            time.sleep(delta)
@ -190,7 +192,7 @@ class RecursiveFetcher(object):
                    raise FetchError, responses[err.code]
                if getattr(err, 'reason', [0])[0] == 104 or \
                    getattr(getattr(err, 'args', [None])[0], 'errno', None) == -2: # Connection reset by peer or Name or service not know
-                    self.log_debug('Temporary error, retrying in 1 second')
+                    self.log.debug('Temporary error, retrying in 1 second')
                    time.sleep(1)
                    with closing(self.browser.open(url)) as f:
                        data = response(f.read()+f.read())
@ -204,9 +206,9 @@ class RecursiveFetcher(object):

    def start_fetch(self, url):
        soup = BeautifulSoup(u'<a href="'+url+'" />')
-        self.log_info('Downloading')
+        self.log.debug('Downloading')
        res = self.process_links(soup, url, 0, into_dir='')
-        self.log_info('%s saved to %s', url, res)
+        self.log.debug('%s saved to %s'%( url, res))
        return res

    def is_link_ok(self, url):
@ -243,8 +245,7 @@ class RecursiveFetcher(object):
                try:
                    data = self.fetch_url(iurl)
                except Exception, err:
-                    self.log_debug('Could not fetch stylesheet %s', iurl)
-                    self.log_debug('Error: %s', str(err), exc_info=True)
+                    self.log.exception('Could not fetch stylesheet %s'% iurl)
                    continue
                stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
                with self.stylemap_lock:
@ -267,8 +268,7 @@ class RecursiveFetcher(object):
                        try:
                            data = self.fetch_url(iurl)
                        except Exception, err:
-                            self.log_warning('Could not fetch stylesheet %s', iurl)
-                            self.log_debug('Error: %s', str(err), exc_info=True)
+                            self.log.exception('Could not fetch stylesheet %s'% iurl)
                            continue
                        c += 1
                        stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
@ -291,9 +291,6 @@ class RecursiveFetcher(object):
                iurl = self.image_url_processor(baseurl, iurl)
            ext  = os.path.splitext(iurl)[1]
            ext  = ext[:5]
-            #if not ext:
-            #    self.log_debug('Skipping extensionless image %s', iurl)
-            #    continue
            if not urlparse.urlsplit(iurl).scheme:
                iurl = urlparse.urljoin(baseurl, iurl, False)
            with self.imagemap_lock:
@ -303,8 +300,7 @@ class RecursiveFetcher(object):
            try:
                data = self.fetch_url(iurl)
            except Exception, err:
-                self.log_warning('Could not fetch image %s', iurl)
-                self.log_debug('Error: %s', str(err), exc_info=True)
+                self.log.exception('Could not fetch image %s'% iurl)
                continue
            c += 1
            fname = sanitize_file_name('img'+str(c)+ext)
@ -330,10 +326,10 @@ class RecursiveFetcher(object):
        if not parts.scheme:
            iurl = urlparse.urljoin(baseurl, iurl, False)
        if not self.is_link_ok(iurl):
-            self.log_debug('Skipping invalid link: %s', iurl)
+            self.log.debug('Skipping invalid link:', iurl)
            return None
        if filter and not self.is_link_wanted(iurl):
-            self.log_debug('Filtered link: '+iurl)
+            self.log.debug('Filtered link: '+iurl)
            return None
        return iurl

@ -401,7 +397,7 @@ class RecursiveFetcher(object):
                    base = soup.find('base', href=True)
                    if base is not None:
                        newbaseurl = base['href']
-                    self.log_debug('Processing images...')
+                    self.log.debug('Processing images...')
                    self.process_images(soup, newbaseurl)
                    if self.download_stylesheets:
                        self.process_stylesheets(soup, newbaseurl)
@ -416,11 +412,11 @@ class RecursiveFetcher(object):
                    self.downloaded_paths.append(res)
                    self.filemap[nurl] = res
                    if recursion_level < self.max_recursions:
-                        self.log_debug('Processing links...')
+                        self.log.debug('Processing links...')
                        self.process_links(soup, newbaseurl, recursion_level+1)
                    else:
                        self.process_return_links(soup, newbaseurl)
-                        self.log_debug('Recursion limit reached. Skipping links in %s', iurl)
+                        self.log.debug('Recursion limit reached. Skipping links in', iurl)

                    if callable(self.postprocess_html_ext):
                        soup = self.postprocess_html_ext(soup,
@ -434,8 +430,7 @@ class RecursiveFetcher(object):
                    self.localize_link(tag, 'href', res)
                except Exception, err:
                    self.failed_links.append((iurl, traceback.format_exc()))
-                    self.log_warning('Could not fetch link %s', iurl)
-                    self.log_debug('Error: %s', str(err), exc_info=True)
+                    self.log.exception('Could not fetch link', iurl)
                finally:
                    self.current_dir = diskpath
                    self.files += 1
@ -478,12 +473,10 @@ def option_parser(usage=_('%prog URL\n\nWhere URL is for example http://google.c
    return parser


-def create_fetcher(options, logger=None, image_map={}):
-    if logger is None:
-        level = logging.DEBUG if options.verbose else logging.INFO
-        logger = logging.getLogger('web2disk')
-        setup_cli_handlers(logger, level)
-    return RecursiveFetcher(options, logger, image_map={})
+def create_fetcher(options, image_map={}, log=None):
+    if log is None:
+        log = Log()
+    return RecursiveFetcher(options, log, image_map={})

 def main(args=sys.argv):
    parser = option_parser()