Pull from trunk

2025-08-11 09:13:57 -04:00 · 2009-03-19 19:08:27 -07:00 · 2009-03-19 19:08:27 -07:00 · fe918ab068
commit fe918ab068
parent c372526ba9 b759c79501
56 changed files with 13203 additions and 5724 deletions
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.5.1'
+__version__   = '0.5.2'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 '''
 Various run time constants.
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -108,7 +108,7 @@ def find_html_index(files):
    html_files = [f[0] for f in html_files]
    for q in ('toc', 'index'):
        for f in html_files:
-            if os.path.splitext(f)[0].lower() == q:
+            if os.path.splitext(os.path.basename(f))[0].lower() == q:
                return f, os.path.splitext(f)[1].lower()[1:]
    return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]

@ -199,6 +199,10 @@ class HTMLProcessor(Processor, Rationalizer):
                
        for tag in self.root.xpath('//form'):
            tag.getparent().remove(tag)
+            
+        for tag in self.root.xpath('//center'):
+            tag.tag = 'div'
+            tag.set('style', 'text-align:center')
                
        if self.opts.linearize_tables:
            for tag in self.root.xpath('//table | //tr | //th | //td'):
--- a/src/calibre/ebooks/epub/split.py
+++ b/src/calibre/ebooks/epub/split.py
@ -138,7 +138,9 @@ class Splitter(object):
        split_point, before = self.find_split_point(root)
        if split_point is None or self.split_size > 6*self.orig_size:
            if not self.always_remove:
-                self.log_warn(_('\t\tToo much markup. Re-splitting without structure preservation. This may cause incorrect rendering.'))
+                self.log_warn(_('\t\tToo much markup. Re-splitting without '
+                                'structure preservation. This may cause '
+                                'incorrect rendering.'))
            raise SplitError(self.path, root)
        
        for t in self.do_split(tree, split_point, before):
@ -149,7 +151,8 @@ class Splitter(object):
            if size <= self.opts.profile.flow_size:
                self.trees.append(t)
                #print tostring(t.getroot(), pretty_print=True)
-                self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', len(self.trees), size/1024.)
+                self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', 
+                               len(self.trees), size/1024.)
                self.split_size += size
            else:
                self.split_to_size(t)
@ -329,10 +332,12 @@ class Splitter(object):
                     '//pre',
                     '//hr', 
                     '//p',
+                     '//div',
                     '//br',
                     '//li',
                     ):
-            elems = root.xpath(path, namespaces={'re':'http://exslt.org/regular-expressions'})
+            elems = root.xpath(path, 
+                    namespaces={'re':'http://exslt.org/regular-expressions'})
            elem = pick_elem(elems)
            if elem is not None:
                try:
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -267,7 +267,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
                except IgnoreFile, err:
                    rejects.append(link)
                    if not err.doesnt_exist or verbose > 1:
-                        print str(err)
+                        print repr(err)
            for link in rejects:
                hf.links.remove(link)
                
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -229,7 +229,8 @@ class MetaInformation(object):
        if mi.authors and mi.authors[0] != _('Unknown'):
            self.authors = mi.authors

-        for attr in ('author_sort', 'title_sort', 'comments', 'category',
+            
+        for attr in ('author_sort', 'title_sort', 'category',
                     'publisher', 'series', 'series_index', 'rating',
                     'isbn', 'application_id', 'manifest', 'spine', 'toc',
                     'cover', 'language', 'guide', 'book_producer',
@ -244,7 +245,16 @@ class MetaInformation(object):

        if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None:
            self.cover_data = mi.cover_data
-
+            
+        my_comments = getattr(self, 'comments', '')
+        other_comments = getattr(mi, 'comments', '')
+        if not my_comments:
+            my_comments = ''
+        if not other_comments:
+            other_comments = ''
+        if len(other_comments.strip()) > len(my_comments.strip()):
+            self.comments = other_comments
+            
    def format_series_index(self):
        try:
            x = float(self.series_index)
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -0,0 +1,153 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import traceback, sys, textwrap
+from threading import Thread
+
+from calibre import preferred_encoding
+from calibre.utils.config import OptionParser
+
+class FetchGoogle(Thread):
+    name = 'Google Books'
+        
+    def __init__(self, title, author, publisher, isbn, verbose):
+        self.title = title
+        self.verbose = verbose
+        self.author = author
+        self.publisher = publisher
+        self.isbn = isbn
+        Thread.__init__(self, None)
+        self.daemon = True
+        self.exception, self.tb = None, None
+        
+    def run(self):
+        from calibre.ebooks.metadata.google_books import search
+        try:
+            self.results = search(self.title, self.author, self.publisher, 
+                                  self.isbn, max_results=10, 
+                                  verbose=self.verbose)
+        except Exception, e:
+            self.results = []
+            self.exception = e
+            self.tb = traceback.format_exc() 
+
+
+class FetchISBNDB(Thread):
+    name = 'IsbnDB'
+    def __init__(self, title, author, publisher, isbn, verbose, key):
+        self.title = title
+        self.author = author
+        self.publisher = publisher
+        self.isbn = isbn
+        self.verbose = verbose
+        Thread.__init__(self, None)
+        self.daemon = True
+        self.exception, self.tb = None, None
+        self.key = key
+        
+    def run(self):
+        from calibre.ebooks.metadata.isbndb import option_parser, create_books
+        args = ['isbndb']
+        if self.isbn:
+            args.extend(['--isbn', self.isbn])
+        else: 
+            if self.title:
+                args.extend(['--title', self.title])
+            if self.author:
+                args.extend(['--author', self.author])
+            if self.publisher:
+                args.extend(['--publisher', self.publisher])
+        args.append(self.key)
+        try:
+            opts, args = option_parser().parse_args(args)
+            self.results = create_books(opts, args)
+        except Exception, e:
+            self.results = []
+            self.exception = e
+            self.tb = traceback.format_exc()
+
+def result_index(source, result):
+    if not result.isbn:
+        return -1
+    for i, x in enumerate(source):
+        if x.isbn == result.isbn:
+            return i
+    return -1
+    
+def merge_results(one, two):
+    for x in two:
+        idx = result_index(one, x)
+        if idx < 0:
+            one.append(x)
+        else:
+            one[idx].smart_update(x)
+
+def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
+           verbose=0):
+    assert not(title is None and author is None and publisher is None and \
+                   isbn is None)
+    fetchers = [FetchGoogle(title, author, publisher, isbn, verbose)]
+    if isbndb_key:
+        fetchers.append(FetchISBNDB(title, author, publisher, isbn, verbose, 
+                                        isbndb_key))
+        
+    
+    for fetcher in fetchers:
+        fetcher.start()
+    for fetcher in fetchers:
+        fetcher.join()
+    for fetcher in fetchers[1:]:
+        merge_results(fetchers[0].results, fetcher.results)
+        
+    results = sorted(fetchers[0].results, cmp=lambda x, y : cmp(
+            (x.comments.strip() if x.comments else ''),
+            (y.comments.strip() if y.comments else '')
+                                                  ), reverse=True)
+    
+    return results, [(x.name, x.exception, x.tb) for x in fetchers]
+
+        
+def option_parser():
+    parser = OptionParser(textwrap.dedent(
+        '''\
+        %prog [options]
+        
+        Fetch book metadata from online sources. You must specify at least one 
+        of title, author, publisher or ISBN. If you specify ISBN, the others 
+        are ignored.  
+        '''
+    ))
+    parser.add_option('-t', '--title', help='Book title')
+    parser.add_option('-a', '--author', help='Book author(s)')
+    parser.add_option('-p', '--publisher', help='Book publisher')
+    parser.add_option('-i', '--isbn', help='Book ISBN')
+    parser.add_option('-m', '--max-results', default=10, 
+                      help='Maximum number of results to fetch')
+    parser.add_option('-k', '--isbndb-key', 
+                      help=('The access key for your ISBNDB.com account. '
+                      'Only needed if you want to search isbndb.com'))
+    parser.add_option('-v', '--verbose', default=0, action='count',
+                      help='Be more verbose about errors')
+    return parser
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    results, exceptions = search(opts.title, opts.author, opts.publisher, 
+                                 opts.isbn, opts.isbndb_key, opts.verbose)
+    for result in results:
+        print unicode(result).encode(preferred_encoding)
+        print
+        
+    for name, exception, tb in exceptions:
+        if exception is not None:
+            print 'WARNING: Fetching from', name, 'failed with error:'
+            print exception
+            print tb
+            
+    return 0
+    
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -0,0 +1,241 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys, textwrap
+from urllib import urlencode
+from functools import partial
+
+from lxml import etree
+from dateutil import parser
+
+from calibre import browser, preferred_encoding
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.config import OptionParser
+
+NAMESPACES = {
+              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
+              'atom' : 'http://www.w3.org/2005/Atom',
+              'dc': 'http://purl.org/dc/terms'
+            }
+XPath = partial(etree.XPath, namespaces=NAMESPACES)
+
+total_results  = XPath('//openSearch:totalResults')
+start_index    = XPath('//openSearch:startIndex')
+items_per_page = XPath('//openSearch:itemsPerPage')
+entry          = XPath('//atom:entry')
+entry_id       = XPath('descendant::atom:id')
+creator        = XPath('descendant::dc:creator')
+identifier     = XPath('descendant::dc:identifier')
+title          = XPath('descendant::dc:title')
+date           = XPath('descendant::dc:date')
+publisher      = XPath('descendant::dc:publisher')
+subject        = XPath('descendant::dc:subject')
+description    = XPath('descendant::dc:description')
+language       = XPath('descendant::dc:language')
+
+def report(verbose):
+    if verbose:
+        import traceback
+        traceback.print_exc()
+
+
+class Query(object):
+    
+    BASE_URL = 'http://books.google.com/books/feeds/volumes?'
+    
+    def __init__(self, title=None, author=None, publisher=None, isbn=None,
+                 max_results=20, min_viewability='none', start_index=1):
+        assert not(title is None and author is None and publisher is None and \
+                   isbn is None)
+        assert (max_results < 21)
+        assert (min_viewability in ('none', 'partial', 'full'))
+        q = ''
+        if isbn is not None:
+            q += 'isbn:'+isbn
+        else:
+            def build_term(prefix, parts):
+                return ' '.join('in'+prefix + ':' + x for x in parts)
+            if title is not None:
+                q += build_term('title', title.split())
+            if author is not None:
+                q += build_term('author', author.split())
+            if publisher is not None:
+                q += build_term('publisher', publisher.split())
+        
+        self.url = self.BASE_URL+urlencode({
+            'q':q,
+            'max-results':max_results,
+            'start-index':start_index,
+            'min-viewability':min_viewability,
+            })
+        
+    def __call__(self, browser, verbose):
+        if verbose:
+            print 'Query:', self.url
+        feed = etree.fromstring(browser.open(self.url).read())
+        total = int(total_results(feed)[0].text)
+        start = int(start_index(feed)[0].text)
+        entries = entry(feed)
+        new_start = start + len(entries)
+        if new_start > total:
+            new_start = 0
+        return entries, new_start
+
+
+class ResultList(list):
+    
+    def get_description(self, entry, verbose):
+        try:
+            desc = description(entry)
+            if desc:
+                return 'SUMMARY:\n'+desc[0].text
+        except:
+            report(verbose)
+    
+    def get_language(self, entry, verbose):
+        try:
+            l = language(entry)
+            if l:
+                return l[0].text
+        except:
+            report(verbose)
+    
+                
+    
+    def get_title(self, entry):
+        candidates = [x.text for x in title(entry)]
+        candidates.sort(cmp=lambda x,y: cmp(len(x), len(y)), reverse=True)
+        return candidates[0]
+    
+    def get_authors(self, entry):
+        m = creator(entry)
+        if not m:
+            m = []
+        m = [x.text for x in m]
+        return m
+    
+    def get_author_sort(self, entry, verbose):
+        for x in creator(entry):
+            for key, val in x.attrib.items():
+                if key.endswith('file-as'):
+                    return val
+    
+    def get_identifiers(self, entry, mi):
+        isbns = []
+        for x in identifier(entry):
+            t = str(x.text).strip()
+            if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
+                if t[:5].upper() == 'ISBN:':
+                    isbns.append(t[5:])
+        if isbns:
+            mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
+    
+    def get_tags(self, entry, verbose):
+        try:
+            tags = [x.text for x in subject(entry)]
+        except:
+            report(verbose)
+            tags = []
+        return tags
+    
+    def get_publisher(self, entry, verbose):
+        try:
+            pub = publisher(entry)[0].text
+        except:
+            pub = None
+        return pub
+    
+    def get_date(self, entry, verbose):
+        try:
+            d = date(entry)
+            if d:
+                d = parser.parse(d[0].text)
+            else:
+                d = None
+        except:
+            report(verbose)
+            d = None
+        return d
+    
+    def populate(self, entries, browser, verbose=False):
+        for x in entries:
+            try:
+                id_url = entry_id(x)[0].text
+                title = self.get_title(x)
+            except:
+                report(verbose)
+            mi = MetaInformation(title, self.get_authors(x))
+            try:
+                raw = browser.open(id_url).read()
+                feed = etree.fromstring(raw)
+                x = entry(feed)[0]
+            except Exception, e:
+                if verbose:
+                    print 'Failed to get all details for an entry'
+                    print e
+            mi.author_sort = self.get_author_sort(x, verbose) 
+            mi.comments = self.get_description(x, verbose)
+            self.get_identifiers(x, mi)
+            mi.tags = self.get_tags(x, verbose)
+            mi.publisher = self.get_publisher(x, verbose)
+            mi.timestamp = self.get_date(x, verbose)
+            mi.language = self.get_language(x, verbose)
+            self.append(mi)
+
+
+def search(title=None, author=None, publisher=None, isbn=None,
+           min_viewability='none', verbose=False, max_results=40):
+    br   = browser()
+    start, entries = 1, []
+    while start > 0 and len(entries) <= max_results:
+        new, start = Query(title=title, author=author, publisher=publisher, 
+                       isbn=isbn, min_viewability=min_viewability)(br, verbose)
+        if not new:
+            break
+        entries.extend(new)
+    
+    entries = entries[:max_results]
+    
+    ans = ResultList()
+    ans.populate(entries, br, verbose)
+    return ans
+
+def option_parser():
+    parser = OptionParser(textwrap.dedent(
+        '''\
+        %prog [options]
+        
+        Fetch book metadata from Google. You must specify one of title, author,
+        publisher or ISBN. If you specify ISBN the others are ignored. Will 
+        fetch a maximum of 100 matches, so you should make your query as 
+        specific as possible. 
+        '''
+    ))
+    parser.add_option('-t', '--title', help='Book title')
+    parser.add_option('-a', '--author', help='Book author(s)')
+    parser.add_option('-p', '--publisher', help='Book publisher')
+    parser.add_option('-i', '--isbn', help='Book ISBN')
+    parser.add_option('-m', '--max-results', default=10, 
+                      help='Maximum number of results to fetch')
+    parser.add_option('-v', '--verbose', default=0, action='count',
+                      help='Be more verbose about errors')
+    return parser
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    try:
+        results = search(opts.title, opts.author, opts.publisher, opts.isbn,
+                         verbose=opts.verbose, max_results=opts.max_results)
+    except AssertionError:
+        report(True)
+        parser.print_help()
+        return 1
+    for result in results:
+        print unicode(result).encode(preferred_encoding)
+        print
+    
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -4,10 +4,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Interface to isbndb.com. My key HLLXQX2A.
 '''

-import sys, logging, re, socket
+import sys, re, socket
 from urllib import urlopen, quote

-from calibre import setup_cli_handlers
 from calibre.utils.config import OptionParser
 from calibre.ebooks.metadata import MetaInformation, authors_to_sort_string
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
@ -63,9 +62,10 @@ class ISBNDBMetadata(MetaInformation):
            
        try:
            self.author_sort = book.find('authors').find('person').string
+            if self.authors and self.author_sort == self.authors[0]:
+                self.author_sort = None
        except:
-            if self.authors:
-                self.author_sort = authors_to_sort_string(self.authors)
+            pass
        self.publisher = book.find('publishertext').string
        
        summ = book.find('summary')
@ -118,19 +118,15 @@ key is the account key you generate after signing up for a free account from isb
    return parser
    

-def create_books(opts, args, logger=None, timeout=5.):
-    if logger is None:
-        level = logging.DEBUG if opts.verbose else logging.INFO
-        logger = logging.getLogger('isbndb')
-        setup_cli_handlers(logger, level)
-    
+def create_books(opts, args, timeout=5.):
    base_url = BASE_URL%dict(key=args[1])
    if opts.isbn is not None:
        url = build_isbn(base_url, opts)
    else:
        url = build_combined(base_url, opts)
-        
-    logger.info('ISBNDB query: '+url)
+    
+    if opts.verbose:
+        print ('ISBNDB query: '+url)
    
    return [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]

@ -139,7 +135,7 @@ def main(args=sys.argv):
    opts, args = parser.parse_args(args)
    if len(args) != 2:
        parser.print_help()
-        print('You must supply the isbndb.com key')
+        print ('You must supply the isbndb.com key')
        return 1
    
    for book in create_books(opts, args):
--- a/src/calibre/gui2/dialogs/fetch_metadata.py
+++ b/src/calibre/gui2/dialogs/fetch_metadata.py
@ -4,22 +4,74 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 GUI for fetching metadata from servers.
 '''

-import logging, cStringIO
+import time

-from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, \
+from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, QThread, \
                         QAbstractTableModel, QCoreApplication, QTimer
-from PyQt4.QtGui import QDialog, QItemSelectionModel
+from PyQt4.QtGui import QDialog, QItemSelectionModel, QWidget, QLabel, QMovie

 from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
-from calibre.gui2 import error_dialog, NONE, info_dialog
-from calibre.ebooks.metadata.isbndb import create_books, option_parser, ISBNDBError
+from calibre.gui2 import error_dialog, NONE, info_dialog, warning_dialog
 from calibre.utils.config import prefs

+class Fetcher(QThread):
+    
+    def __init__(self, title, author, publisher, isbn, key):
+        QThread.__init__(self)
+        self.title = title
+        self.author = author
+        self.publisher = publisher
+        self.isbn = isbn
+        self.key = key
+        
+    def run(self):
+        from calibre.ebooks.metadata.fetch import search
+        self.results, self.exceptions = search(self.title, self.author,
+                                               self.publisher, self.isbn, 
+                                               self.key if self.key else None)
+
+class ProgressIndicator(QWidget):
+    
+    def __init__(self, *args):
+        QWidget.__init__(self, *args)
+        self.setGeometry(0, 0, 300, 350)
+        self.movie = QMovie(':/images/jobs-animated.mng')
+        self.ml = QLabel(self)
+        self.ml.setMovie(self.movie)
+        self.movie.start()
+        self.movie.setPaused(True)
+        self.status = QLabel(self)
+        self.status.setWordWrap(True)
+        self.status.setAlignment(Qt.AlignHCenter|Qt.AlignTop)
+        self.status.font().setBold(True)
+        self.status.font().setPointSize(self.font().pointSize()+6)
+        self.setVisible(False)
+        
+    def start(self, msg=''):
+        view = self.parent()
+        pwidth, pheight = view.size().width(), view.size().height()
+        self.resize(pwidth, min(pheight, 250))
+        self.move(0, (pheight-self.size().height())/2.)
+        self.ml.resize(self.ml.sizeHint())
+        self.ml.move(int((self.size().width()-self.ml.size().width())/2.), 0)
+        self.status.resize(self.size().width(), self.size().height()-self.ml.size().height()-10)
+        self.status.move(0, self.ml.size().height()+10)
+        self.status.setText(msg)
+        self.setVisible(True)
+        self.movie.setPaused(False)
+        
+    def stop(self):
+        if self.movie.state() == self.movie.Running:
+            self.movie.setPaused(True)
+            self.setVisible(False)
+            
 class Matches(QAbstractTableModel):
    
    def __init__(self, matches):
        self.matches = matches
-        self.matches.sort(cmp=lambda b, a: cmp(len(a.comments if a.comments else ''), len(b.comments if b.comments else '')))
+        self.matches.sort(cmp=lambda b, a: \
+                        cmp(len(a.comments if a.comments else ''), 
+                            len(b.comments if b.comments else '')))
        QAbstractTableModel.__init__(self)
        
    def rowCount(self, *args):
@ -73,22 +125,23 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
        Ui_FetchMetadata.__init__(self)
        self.setupUi(self)
        
+        self.pi = ProgressIndicator(self)
        self.timeout = timeout
        QObject.connect(self.fetch, SIGNAL('clicked()'), self.fetch_metadata)
        
        self.key.setText(prefs['isbndb_com_key'])
        
-        self.setWindowTitle(title if title else 'Unknown')
-        self.tlabel.setText(self.tlabel.text().arg(title if title else 'Unknown'))
+        self.setWindowTitle(title if title else _('Unknown'))
        self.isbn = isbn
        self.title = title
        self.author = author.strip()
        self.publisher = publisher
        self.previous_row = None
        self.connect(self.matches, SIGNAL('activated(QModelIndex)'), self.chosen)
-        key = str(self.key.text())
-        if key:
-            QTimer.singleShot(100, self.fetch_metadata)
+        self.connect(self.matches, SIGNAL('entered(QModelIndex)'), 
+                     lambda index:self.matches.setCurrentIndex(index))
+        self.matches.setMouseTracking(True)
+        self.fetch_metadata()
        
        
    def show_summary(self, current, previous):
@ -100,53 +153,58 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
        
    def fetch_metadata(self):
        key = str(self.key.text())
-        if not key:
-            error_dialog(self, _('Cannot connect'), 
-                         _('You must specify a valid access key for isbndb.com'))
-            return
-        else:
+        if key:
            prefs['isbndb_com_key'] =  key
-            
-        args = ['isbndb']
+        else:
+            key = None
+        title = author = publisher = isbn = None
        if self.isbn:
-            args.extend(('--isbn', self.isbn))
+            isbn = self.isbn
        if self.title:
-            args.extend(('--title', self.title))
-        if self.author and not self.author == 'Unknown':
-            args.extend(('--author', self.author))
-        #if self.publisher:
-        #    args.extend(('--publisher', self.publisher))
-        
+            title = self.title
+        if self.author and not self.author == _('Unknown'):
+            author = self.author
        self.fetch.setEnabled(False)
        self.setCursor(Qt.WaitCursor)
        QCoreApplication.instance().processEvents()
+        self.fetcher = Fetcher(title, author, publisher, isbn, key)
+        self.fetcher.start()
+        self.pi.start(_('Finding metadata...'))
+        self._hangcheck = QTimer(self)
+        self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
+        self.start_time = time.time()
+        self._hangcheck.start()
+        
+    def hangcheck(self):
+        if not (self.fetcher.isFinished() or time.time() - self.start_time > 75):
+            return
+        self._hangcheck.stop()
        try:
-            args.append(key)
-            parser = option_parser()
-            opts, args = parser.parse_args(args)
-            
-            self.logger = logging.getLogger('Job #'+str(id))
-            self.logger.setLevel(logging.DEBUG)
-            self.log_dest = cStringIO.StringIO()
-            handler = logging.StreamHandler(self.log_dest)
-            handler.setLevel(logging.DEBUG)
-            handler.setFormatter(logging.Formatter('[%(levelname)s] %(filename)s:%(lineno)s: %(message)s'))
-            self.logger.addHandler(handler)
-            
-            try:
-                books = create_books(opts, args, self.logger, self.timeout)
-            except ISBNDBError, err:
-                error_dialog(self, _('Error fetching metadata'), str(err)).exec_()
+            if self.fetcher.isRunning():
+                error_dialog(self, _('Could not find metadata'),
+                             _('The metadata download seems to have stalled. '
+                               'Try again later.')).exec_()
+                self.fetcher.terminate()
+                return
+            self.model = Matches(self.fetcher.results)
+            warnings = [(x[0], unicode(x[1])) for x in \
+                            self.fetcher.exceptions if x[1] is not None]
+            if warnings:
+                warnings='<br>'.join(['<b>%s</b>: %s'%(name, exc) for name,exc in warnings])
+                warning_dialog(self, _('Warning'),
+                               '<p>'+_('Could not fetch metadata from:')+\
+                               '<br><br>'+warnings+'</p>').exec_()
+            if self.model.rowCount() < 1:
+                info_dialog(self, _('No metadata found'),
+                     _('No metadata found, try adjusting the title and author '
+                       'or the ISBN key.')).exec_()
+                self.reject()
                return
            
-            self.model = Matches(books)
-            if self.model.rowCount() < 1:
-                info_dialog(self, _('No metadata found'), _('No metadata found, try adjusting the title and author or the ISBN key.')).exec_()
-                self.reject()
-            
            self.matches.setModel(self.model)
-            QObject.connect(self.matches.selectionModel(), SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'),
-                         self.show_summary)
+            QObject.connect(self.matches.selectionModel(), 
+                        SIGNAL('currentRowChanged(QModelIndex, QModelIndex)'),
+                        self.show_summary)
            self.model.reset()
            self.matches.selectionModel().select(self.model.index(0, 0), 
                                  QItemSelectionModel.Select | QItemSelectionModel.Rows)
@ -155,9 +213,9 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
            self.fetch.setEnabled(True)
            self.unsetCursor()
            self.matches.resizeColumnsToContents()
+            self.pi.stop()
+            
        
-
-
    def selected_book(self):
        try:
            return self.matches.model().matches[self.matches.currentIndex().row()]
--- a/src/calibre/gui2/dialogs/fetch_metadata.ui
+++ b/src/calibre/gui2/dialogs/fetch_metadata.ui
@ -23,20 +23,13 @@
   <item>
    <widget class="QLabel" name="tlabel" >
     <property name="text" >
-      <string>Fetching metadata for &lt;b>%1&lt;/b></string>
+      <string>&lt;p>calibre can find metadata for your books from two locations: &lt;b>Google Books&lt;/b> and &lt;b>isbndb.com&lt;/b>. &lt;p>To use isbndb.com you must sign up for a &lt;a href="http://www.isbndb.com">free account&lt;/a> and exter you access key below.</string>
     </property>
     <property name="alignment" >
      <set>Qt::AlignCenter</set>
     </property>
-    </widget>
-   </item>
-   <item>
-    <widget class="QLabel" name="label" >
-     <property name="text" >
-      <string>Sign up for a free account from &lt;a href="http://www.isbndb.com">ISBNdb.com&lt;/a> to get an access key.</string>
-     </property>
-     <property name="alignment" >
-      <set>Qt::AlignCenter</set>
+     <property name="wordWrap" >
+      <bool>true</bool>
     </property>
     <property name="openExternalLinks" >
      <bool>true</bool>
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@ -219,6 +219,8 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
                return QVariant(category + ' [%d]'%num)
            elif role == Qt.FontRole:
                return self.bold_font
+            elif role == Qt.ForegroundRole and category == _('Scheduled'):
+                return QVariant(QColor(0, 255, 0))
        return NONE
    
    def update_recipe_schedule(self, recipe):
--- a/src/calibre/gui2/images/news/24sata.png
+++ b/src/calibre/gui2/images/news/24sata.png
--- a/src/calibre/gui2/images/news/adventuregamers.png
+++ b/src/calibre/gui2/images/news/adventuregamers.png
--- a/src/calibre/gui2/images/news/instapaper.png
+++ b/src/calibre/gui2/images/news/instapaper.png
--- a/src/calibre/gui2/images/news/nacional_cro.png
+++ b/src/calibre/gui2/images/news/nacional_cro.png
--- a/src/calibre/gui2/images/news/vecernji_list.png
+++ b/src/calibre/gui2/images/news/vecernji_list.png
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -94,7 +94,8 @@ class DateDelegate(QStyledItemDelegate):
    def createEditor(self, parent, option, index):
        qde = QStyledItemDelegate.createEditor(self, parent, option, index)
        qde.setDisplayFormat('MM/dd/yyyy')
-        qde.setMinimumDate(QDate(100,1,1))
+        qde.setMinimumDate(QDate(-4000,1,1))
+        qde.setCalendarPopup(True)
        return qde

 class BooksModel(QAbstractTableModel):
@ -824,7 +825,7 @@ class DeviceBooksModel(BooksModel):


    def search(self, text, refinement, reset=True):
-        if not text:
+        if not text or not text.strip():
            self.map = list(range(len(self.db)))
        else:
            matches = self.search_engine.parse(text)
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -94,6 +94,7 @@ class Main(MainWindow, Ui_MainWindow):
        self.viewers = collections.deque()
        self.content_server = None
        self.system_tray_icon = QSystemTrayIcon(QIcon(':/library'), self)
+        self.system_tray_icon.setObjectName('calibre')
        if not config['systray_icon']:
            self.system_tray_icon.hide()
        else:
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -92,7 +92,6 @@ class ProgressIndicator(QWidget):
        
    def stop(self):
        if self.movie.state() == self.movie.Running:
-            #self.movie.jumpToFrame(0)
            self.movie.setPaused(True)
            self.setVisible(False)

--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -40,6 +40,7 @@ entry_points = {
             'calibre-parallel   = calibre.parallel:main',
             'calibre-customize  = calibre.customize.ui:main',
             'pdftrim            = calibre.ebooks.pdf.pdftrim:main' ,
+             'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
        ],
        'gui_scripts'    : [
            __appname__+' = calibre.gui2.main:main',
@ -157,6 +158,7 @@ def setup_completion(fatal_errors):
        from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
        from calibre.ebooks.mobi.from_feeds import option_parser as feeds2mobi
        from calibre.ebooks.epub.from_comic import option_parser as comic2epub
+        from calibre.ebooks.metadata.fetch import option_parser as fem_op
        from calibre.gui2.main import option_parser as guiop
        any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
             'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
@ -191,6 +193,7 @@ def setup_completion(fatal_errors):
        f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
        f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
        f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
+        f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
        f.write('''
 _prs500_ls()
 {
--- a/src/calibre/trac/donations/server.py
+++ b/src/calibre/trac/donations/server.py
@ -205,6 +205,9 @@ class Server(object):
        ax.set_xlabel('Days ago')
        ax.set_ylabel('Income ($)')
        ax.hlines([stats.daily_average], 0, days-1)
+        ax.hlines([stats.daily_average+stats.daily_deviation,
+                   stats.daily_average-stats.daily_deviation], 0, days-1,
+                   linestyle=':',color='r')
        ax.set_xlim([0, days-1])
        text = u'''\
 Total: $%(total).2f
--- a/src/calibre/trac/plugins/download.py
+++ b/src/calibre/trac/plugins/download.py
@ -358,12 +358,9 @@ else:
    def extract_tarball(tar, destdir):
        print 'Extracting application files...'
        if hasattr(tar, 'read'):
-            try:
-                tarfile.open(fileobj=tar, mode='r').extractall(destdir)
-            except: # tarfile.py on Fedora 9 is buggy
-                subprocess.check_call(['tar', 'xjf', tar.name, '-C', destdir])
+            subprocess.check_call(['tar', 'xjf', tar.name, '-C', destdir])
        else:
-            tarfile.open(tar, 'r').extractall(destdir)
+            subprocess.check_call(['tar', 'xjf', tar, '-C', destdir])
    
    def main():
        defdir = '/opt/calibre'
@ -382,5 +379,4 @@ else:
        pi = os.path.join(destdir, 'calibre_postinstall')
        subprocess.call(pi, shell=True)
        return 0
-    ''')
-    
+    ''')
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -34,7 +34,9 @@ recipe_modules = ['recipe_' + r for r in (
           'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
           'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
           'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail',
-           'new_york_review_of_books_no_sub', 'politico',
+           'new_york_review_of_books_no_sub', 'politico', 'adventuregamers',
+           'mondedurable', 'instapaper', 'dnevnik_cro', 'vecernji_list',
+           'nacional_cro', '24sata',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_24sata.py
+++ b/src/calibre/web/feeds/recipes/recipe_24sata.py
@ -0,0 +1,57 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+24sata.hr
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Cro24Sata(BasicNewsRecipe):
+    title                 = '24 Sata - Hr'
+    __author__            = 'Darko Miletic'
+    description           = "News Portal from Croatia"
+    publisher             = '24sata.hr'
+    category              = 'news, politics, Croatia'    
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    delay                 = 4
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    remove_javascript     = True    
+    language              = _('Croatian')
+
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+     
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    remove_tags = [
+                    dict(name=['object','link','embed'])
+                   ,dict(name='table', attrs={'class':'enumbox'})
+                  ]
+    
+    feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
+
+    def preprocess_html(self, soup):
+        soup.html['lang']     = 'hr-HR'
+        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    def print_version(self, url):
+        return url + '&action=ispis'
+        
--- a/src/calibre/web/feeds/recipes/recipe_adventuregamers.py
+++ b/src/calibre/web/feeds/recipes/recipe_adventuregamers.py
@ -0,0 +1,75 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.adventuregamers.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdventureGamers(BasicNewsRecipe):
+    title                 = u'Adventure Gamers'
+    language              = _('English')
+    __author__            = 'Darko Miletic'
+    description           = 'Adventure games portal'    
+    publisher             = 'Adventure Gamers'
+    category              = 'news, games, adventure, technology'    
+    language              = _('English')
+    oldest_article        = 10
+    delay                 = 10
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    remove_javascript     = True
+    use_embedded_content  = False
+    INDEX                 = u'http://www.adventuregamers.com'
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+
+    keep_only_tags = [
+                       dict(name='div', attrs={'class':'content_middle'})
+                     ]
+
+    remove_tags = [
+                     dict(name=['object','link','embed','form'])
+                    ,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']})
+                  ]
+                  
+    remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
+    
+    feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
+    
+    def get_article_url(self, article):
+        return article.get('guid',  None)
+    
+    def append_page(self, soup, appendtag, position):
+        pager = soup.find('div',attrs={'class':'toolbar_fat_next'})
+        if pager:
+           nexturl = self.INDEX + pager.a['href']
+           soup2 = self.index_to_soup(nexturl)
+           texttag = soup2.find('div', attrs={'class':'bodytext'})
+           for it in texttag.findAll(style=True):
+               del it['style']
+           newpos = len(texttag.contents)          
+           self.append_page(soup2,texttag,newpos)
+           texttag.extract()
+           appendtag.insert(position,texttag)
+        
+    
+    def preprocess_html(self, soup):
+        mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
+        soup.head.insert(0,mtag)    
+        for item in soup.findAll(style=True):
+            del item['style']
+        self.append_page(soup, soup.body, 3)
+        pager = soup.find('div',attrs={'class':'toolbar_fat'})
+        if pager:
+           pager.extract()        
+        return soup
--- a/src/calibre/web/feeds/recipes/recipe_dnevnik_cro.py
+++ b/src/calibre/web/feeds/recipes/recipe_dnevnik_cro.py
@ -0,0 +1,60 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+dnevnik.hr
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class DnevnikCro(BasicNewsRecipe):
+    title                 = 'Dnevnik - Hr'
+    __author__            = 'Darko Miletic'
+    description           = "Vijesti iz Hrvatske"
+    publisher             = 'Dnevnik.hr'
+    category              = 'news, politics, Croatia'    
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    delay                 = 4
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    remove_javascript     = True    
+    language              = _('Croatian')
+
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        , '--ignore-tables'
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+     
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    keep_only_tags     = [dict(name='div', attrs={'id':'article'})]
+        
+    remove_tags = [
+                    dict(name=['object','link','embed'])
+                   ,dict(name='div', attrs={'class':'menu'})
+                   ,dict(name='div', attrs={'id':'video'})
+                  ]
+
+    remove_tags_after  = dict(name='div', attrs={'id':'content'})
+
+    feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
+
+    def preprocess_html(self, soup):
+        soup.html['lang']     = 'hr-HR'
+        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_instapaper.py
+++ b/src/calibre/web/feeds/recipes/recipe_instapaper.py
@ -0,0 +1,74 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.instapaper.com
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Instapaper(BasicNewsRecipe):
+    title                 = 'Instapaper.com'
+    __author__            = 'Darko Miletic'
+    description           = 'Personalized news feeds. Go to instapaper.com to setup up your news.'    
+    publisher             = 'Instapaper.com'
+    category              = 'news, custom' 
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    remove_javascript     = True
+    needs_subscription    = True 
+    INDEX                 = u'http://www.instapaper.com'
+    LOGIN                 = INDEX + u'/user/login'
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
+
+    feeds = [
+              (u'Unread articles' , INDEX + u'/u'      )
+             ,(u'Starred articles', INDEX + u'/starred')
+            ]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None:
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['username'] = self.username
+            if self.password is not None:
+               br['password'] = self.password
+            br.submit()
+        return br
+    
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for item in soup.findAll('div', attrs={'class':'titleRow'}):
+                description = self.tag_to_string(item.div)
+                atag = item.a
+                if atag and atag.has_key('href'):
+                    url         = self.INDEX + atag['href'] + '/text'
+                    title       = self.tag_to_string(atag)
+                    date        = strftime(self.timefmt)
+                    articles.append({
+                                      'title'      :title
+                                     ,'date'       :date
+                                     ,'url'        :url
+                                     ,'description':description
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+        
--- a/src/calibre/web/feeds/recipes/recipe_mondedurable.py
+++ b/src/calibre/web/feeds/recipes/recipe_mondedurable.py
@ -0,0 +1,45 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+mondedurable.science-et-vie.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdventureGamers(BasicNewsRecipe):
+    title                 = 'Monde durable'
+    language              = _('French')
+    __author__            = 'Darko Miletic'
+    description           = 'science news'    
+    publisher             = 'Monde durable'
+    category              = 'environnement, developpement durable, science & vie, science et vie'    
+    oldest_article        = 30
+    delay                 = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    remove_javascript     = True
+    use_embedded_content  = False
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+
+    keep_only_tags = [dict(name='div', attrs={'class':'post'})]
+
+    remove_tags = [dict(name=['object','link','embed','form','img'])]
+                  
+    feeds = [(u'Articles', u'http://mondedurable.science-et-vie.com/feed/')]
+    
+    def preprocess_html(self, soup):
+        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
+        soup.head.insert(0,mtag)    
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/src/calibre/web/feeds/recipes/recipe_nacional_cro.py
+++ b/src/calibre/web/feeds/recipes/recipe_nacional_cro.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+nacional.hr
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class NacionalCro(BasicNewsRecipe):
+    title                 = 'Nacional - Hr'
+    __author__            = 'Darko Miletic'
+    description           = "news from Croatia"
+    publisher             = 'Nacional.hr'
+    category              = 'news, politics, Croatia'    
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    delay                 = 4
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    remove_javascript     = True    
+    language              = _('Croatian')
+
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+     
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    remove_tags = [dict(name=['object','link','embed'])]
+    
+    feeds = [(u'Najnovije Vijesti', u'http://www.nacional.hr/rss')]
+
+    def preprocess_html(self, soup):
+        soup.html['lang']     = 'hr-HR'
+        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    def print_version(self, url):
+        rest, sep, disc = url.rpartition('/')
+        return rest.replace('/clanak/','/clanak/print/')
+        
--- a/src/calibre/web/feeds/recipes/recipe_sueddeutsche.py
+++ b/src/calibre/web/feeds/recipes/recipe_sueddeutsche.py
@ -14,9 +14,10 @@ class Sueddeutsche(BasicNewsRecipe):
    description = 'News from Germany'
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
-    timefmt = ' [%d %b %Y]'
-    max_articles_per_feed = 40
    language = _('German')
+    timefmt = ' [%d %b %Y]'
+    oldest_article = 7
+    max_articles_per_feed = 50
    no_stylesheets = True
    encoding = 'latin1'
    remove_tags_after = [dict(name='div', attrs={'class':'artikelBox navigatorBox'})]
@ -27,6 +28,7 @@ class Sueddeutsche(BasicNewsRecipe):
                   dict(name='div', attrs={'class':'footerLinks'}),
                   dict(name='div', attrs={'class':'seitenanfang'}),
                   dict(name='td', attrs={'class':'mar5'}),
+                   dict(name='a', attrs={'class':'top'}),
                   dict(name='table', attrs={'class':'pageAktiv'}),
                   dict(name='table', attrs={'class':'xartable'}),
                   dict(name='table', attrs={'class':'wpnavi'}),
@ -39,8 +41,20 @@ class Sueddeutsche(BasicNewsRecipe):
                   dict(name='div', attrs={'class':'similar-article-box'}),
                   dict(name='div', attrs={'class':'videoBigHack'}),
                   dict(name='td', attrs={'class':'artikelDruckenRight'}),
+                   dict(name='td', attrs={'class':'stoerBSbgUnten'}),
+                   dict(name='li', attrs={'class':'first'}),
+                   dict(name='li', attrs={'class':'bookmark closed'}),
+                   dict(name='li', attrs={'class':'print'}),
+                   dict(name='li', attrs={'class':'mail'}),
+                   dict(name='li', attrs={'class':'last'}),
+                   dict(name='li', attrs={'class':'tiefethemen'}),
+                   dict(name='li', attrs={'class':'prev'}),
+                   dict(name='ul', attrs={'class':'activities'}),
+                   dict(name='li', attrs={'class':'next'}),
                   dict(name='span', attrs={'class':'hidePrint'}),
                   dict(id='headerLBox'),
+                   dict(id='bookmarklist1'),
+                   dict(id='bookmarklist2'),
                   dict(id='rechteSpalte'),
                   dict(id='newsticker-list-small'),
                   dict(id='ntop5'),
@ -57,7 +71,4 @@ class Sueddeutsche(BasicNewsRecipe):
    
    feeds =  [ (u'Sueddeutsche', u'http://www.sueddeutsche.de/app/service/rss/alles/rss.xml') ] 

-    def postprocess_html(self, soup, first_fetch):
-        for t in soup.findAll(['table', 'tr', 'td']):
-            t.name = 'div'
-        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_vecernji_list.py
+++ b/src/calibre/web/feeds/recipes/recipe_vecernji_list.py
@ -0,0 +1,58 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+www.vecernji.hr
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class VecernjiList(BasicNewsRecipe):
+    title                 = 'Vecernji List'
+    __author__            = 'Darko Miletic'
+    description           = "Vecernji.hr je vodeci hrvatski news portal. Cilj je biti prvi u objavljivanju svih vijesti iz Hrvatske, svijeta, sporta, gospodarstva, showbiza i jos mnogo vise. Uz cjelodnevni rad, novinari objavljuju preko 300 raznih vijesti svakoga dana. Vecernji.hr prati sve vaznije dogadaje specijalnim izvjestajima, video specijalima i foto galerijama."
+    publisher             = 'Vecernji.hr'
+    category              = 'news, politics, Croatia'    
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    delay                 = 4
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = False
+    remove_javascript     = True    
+    language              = _('Croatian')
+
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        , '--ignore-tables'
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+     
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    remove_tags = [
+                    dict(name=['object','link','embed'])
+                   ,dict(name='table', attrs={'class':'enumbox'})
+                  ]
+    
+    feeds = [(u'Vijesti', u'http://www.vecernji.hr/rss/')]
+
+    def preprocess_html(self, soup):
+        soup.html['lang']     = 'hr-HR'
+        mtag = '<meta http-equiv="Content-Language" content="hr-HR"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+    def print_version(self, url):
+        return url.replace('/index.do','/print.do')
+        
--- a/4
+++ b/4
@ -8,3 +8,7 @@
 * Rationalize books table. Add a pubdate column, remove the uri column (and associated support in add_books) and  convert series_index to a float.

 * Replace single application stuff with Listener from multiprocessing
+
+* Refactor add books to use a separate process named calibre-worker-add
+  - Dont use the process for adding a single book
+  - Use a process pool for speed