Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-07-07 18:48:51 +01:00 · 2011-07-07 18:48:51 +01:00 · a6f2aa1ef8
commit a6f2aa1ef8
parent b10e134a3b ba570d42c5
28 changed files with 993 additions and 367 deletions
--- a/recipes/cracked_com.recipe
+++ b/recipes/cracked_com.recipe
@ -1,83 +1,63 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-import re

 class Cracked(BasicNewsRecipe):
    title                 = u'Cracked.com'
-    __author__            = u'Nudgenudge'
+    __author__            = 'UnWeave'
    language              = 'en'
-    description            = 'America''s Only Humor and Video Site, since 1958'
+    description           = "America's Only HumorSite since 1958"
    publisher             = 'Cracked'
    category              = 'comedy, lists'
-    oldest_article        = 2
-    delay                 = 10
-    max_articles_per_feed = 2
+    oldest_article        = 3 #days
+    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'cp1252'
+    encoding              = 'ascii'
    remove_javascript     = True
    use_embedded_content  = False
-    INDEX                 = u'http://www.cracked.com'
-    extra_css             = """
-                                .pageheader_type{font-size: x-large; font-weight: bold; color: #828D74}
-                                .pageheader_title{font-size: xx-large; color: #394128}
-                                .pageheader_byline{font-size: small; font-weight: bold; color: #394128}
-                                .score_bg {display: inline; width: 100%; margin-bottom: 2em}
-                                .score_column_1{ padding-left: 10px; font-size: small; width: 50%}
-                                .score_column_2{ padding-left: 10px; font-size: small; width: 50%}
-                                .score_column_3{ padding-left: 10px; font-size: small; width: 50%}
-                                .score_header{font-size: large; color: #50544A}
-                                .bodytext{display: block}
-                                body{font-family: Helvetica,Arial,sans-serif}
-                            """
+
+    feeds = [ (u'Articles', u'http://feeds.feedburner.com/CrackedRSS/') ]

    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
-                        , 'linearize_tables' : True
                        }

-    keep_only_tags    =  [
-                        dict(name='div', attrs={'class':['Column1']})
+    remove_tags_before = dict(id='PrimaryContent')
+
+    remove_tags_after = dict(name='div', attrs={'class':'shareBar'})
+
+    remove_tags = [ dict(name='div', attrs={'class':['social',
+                                                     'FacebookLike',
+                                                     'shareBar'
+                                                     ]}),
+
+                    dict(name='div', attrs={'id':['inline-share-buttons',
+                                                  ]}),
+
+                    dict(name='span', attrs={'class':['views',
+                                                      'KonaFilter'
+                                                      ]}),
+                    #dict(name='img'),
                    ]

-    feeds = [(u'Articles', u'http://feeds.feedburner.com/CrackedRSS')]
-
-    def get_article_url(self, article):
-        return article.get('guid',  None)
-
-    def cleanup_page(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        for alink in soup.findAll('a'):
-            if alink.string is not None:
-                tstr = alink.string
-                alink.replaceWith(tstr)
-        for div_to_remove in soup.findAll('div', attrs={'id':['googlead_1','fb-like-article','comments_section']}):
-            div_to_remove.extract()
-        for div_to_remove in soup.findAll('div', attrs={'class':['share_buttons_col_1','GenericModule1']}):
-            div_to_remove.extract()
-        for div_to_remove in soup.findAll('div', attrs={'class':re.compile("prev_next")}):
-            div_to_remove.extract()
-        for ul_to_remove in soup.findAll('ul', attrs={'class':['Nav6']}):
-            ul_to_remove.extract()
-        for image in soup.findAll('img', attrs={'alt': 'article image'}):
-            image.extract()
-
-    def append_page(self, soup, appendtag, position):
-        pager = soup.find('a',attrs={'class':'next_arrow_active'})
-        if pager:
-            nexturl = self.INDEX + pager['href']
-            soup2 = self.index_to_soup(nexturl)
-            texttag = soup2.find('div', attrs={'class':re.compile("userStyled")})
-            newpos = len(texttag.contents)
-            self.append_page(soup2,texttag,newpos)
-            texttag.extract()
-            self.cleanup_page(appendtag)
-            appendtag.insert(position,texttag)
-        else:
-            self.cleanup_page(appendtag)
+    def appendPage(self, soup, appendTag, position):
+        # Check if article has multiple pages
+        pageNav = soup.find('nav', attrs={'class':'PaginationContent'})
+        if pageNav:
+            # Check not at last page
+            nextPage = pageNav.find('a', attrs={'class':'next'})
+            if nextPage:
+                nextPageURL = nextPage['href']
+                nextPageSoup = self.index_to_soup(nextPageURL)
+                # 8th <section> tag contains article content
+                nextPageContent = nextPageSoup.findAll('section')[7]
+                newPosition = len(nextPageContent.contents)
+                self.appendPage(nextPageSoup,nextPageContent,newPosition)
+                nextPageContent.extract()
+                pageNav.extract()
+                appendTag.insert(position,nextPageContent)

    def preprocess_html(self, soup):
-        self.append_page(soup, soup.body, 3)
-        return self.adeify_images(soup)
+        self.appendPage(soup, soup.body, 3)
+        return soup
--- a/recipes/galicia_confidential.recipe
+++ b/recipes/galicia_confidential.recipe
@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds import Feed
+
+class GC_gl(BasicNewsRecipe):
+    title                 = u'Galicia Confidencial (RSS)'
+    __author__            = u'Susana Sotelo Docío'
+    description           = u'Unha fiestra de información aberta a todos'
+    publisher             = u'Galicia Confidencial'
+    category              = u'news, society, politics, Galicia'
+    encoding              = 'utf-8'
+    language              = 'gl'
+    direction             = 'ltr'
+    cover_url             = 'http://galiciaconfidencial.com/imagenes/header/logo_gc.gif'
+    oldest_article        = 5
+    max_articles_per_feed = 100
+    center_navbar         = False
+
+    feeds                 = [(u'Novas no RSS', u'http://galiciaconfidencial.com/rss2/xeral.rss')]
+
+    extra_css             = u' p{text-align:left} '
+
+    def print_version(self, url):
+        return url.replace('http://galiciaconfidencial.com/nova/', 'http://galiciaconfidencial.com/imprimir/')
+
+    def parse_index(self):
+        feeds = []
+        self.gc_parse_feeds(feeds)
+        return feeds
+
+    def gc_parse_feeds(self, feeds):
+        rssFeeds = Feed()
+        rssFeeds = BasicNewsRecipe.parse_feeds(self)
+        self.feed_to_index_append(rssFeeds[:], feeds)
+
+
+    def feed_to_index_append(self, feedObject, masterFeed):
+        for feed in feedObject:
+            newArticles = []
+            for article in feed.articles:
+                newArt = {
+                                'title' : article.title,
+                                    'url'   : article.url,
+                                    'date'  : article.date
+                    }
+                newArticles.append(newArt)
+            masterFeed.append((feed.title,newArticles))
+
--- a/recipes/menorca.recipe
+++ b/recipes/menorca.recipe
@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds import Feed
+
+class Menorca(BasicNewsRecipe):
+
+    title       = 'Menorca'
+    publisher   = 'Editorial Menorca S.A. '
+    __author__  = 'M. Sintes'
+    description = u'Peri\xf3dico con informaci\xf3n de Menorca, Espa\xf1a'
+    category    = 'news, politics, economy, culture, Menorca, Spain '
+    language    = 'es'
+    enconding   = 'cp1252'
+
+    no_stylesheets = True
+    oldest_article = 5
+    max_articles_per_feed = 25
+
+
+    feeds = [   (u'Principal',u'http://www.menorca.info/rss'),
+                (u'Opini\xf3n',u'http://www.menorca.info/rss?seccion=opinion'),
+                (u'Menorca',u'http://www.menorca.info/rss?seccion=menorca'),
+                (u'Alaior',u'http://www.menorca.info/rss?seccion=pueblos/alaior'),
+                (u'Ciutadella', u'http://www.menorca.info/rss?seccion=pueblos/ciutadella'),
+                (u'Es Castell', u'http://www.menorca.info/rss?seccion=pueblos/escastell'),
+                (u'Es Mercadal', u'http://www.menorca.info/rss?seccion=pueblos/esmercadal'),
+                (u'Es Migjorn', u'http://www.menorca.info/rss?seccion=pueblos/esmigjorn'),
+                (u'Ferreries', u'http://www.menorca.info/rss?seccion=pueblos/ferreries'),
+                (u'Fornells', u'http://www.menorca.info/rss?seccion=pueblos/fornells'),
+                (u'Llucma\xe7anes', u'http://www.menorca.info/rss?seccion=pueblos/llucmaanes'),
+                (u'Ma\xf3', u'http://www.menorca.info/rss?seccion=pueblos/mao'),
+                (u'Sant Climent', u'http://www.menorca.info/rss?seccion=pueblos/santcliment'),
+                (u'Sant Llu\xeds', u'http://www.menorca.info/rss?seccion=pueblos/santlluis'),
+                (u'Deportes',u'http://www.menorca.info/rss?seccion=deportes'),
+                (u'Balears', u'http://www.menorca.info/rss?seccion=balears')]
+
+    #Seccions amb link rss erroni. Es recupera directament de la pagina web
+    seccions_web = [(u'Mundo',u'http://www.menorca.info/actualidad/mundo'),
+                (u'Econom\xeda',u'http://www.menorca.info/actualidad/economia'),
+                (u'Espa\xf1a',u'http://www.menorca.info/actualidad/espana')]
+
+    remove_tags_before = dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
+    remove_tags_after = dict(name='div', attrs={'class':'compartir'})
+    remove_tags = [dict(id = 'utilidades'),
+                dict(name='div', attrs={'class': 'totalComentarios'}),
+                dict(name='div', attrs={'class': 'compartir'}),
+                dict(name='div', attrs={'class': re.compile("img_noticia*")})
+                ]
+
+    def print_version(self, url):
+        url_imprimir = url + '?d=print'
+        return url.replace(url, url_imprimir)
+
+    def feed_to_index_append(self, feedObject, masterFeed):
+
+        # Loop thru the feed object and build the correct type of article list
+        for feed in feedObject:
+            newArticles = []
+            for article in feed.articles:
+                newArt = {
+                                  'title' : article.title,
+                                      'url'   : article.url,
+                                              'date'  : article.date,
+                                      'description' : article.text_summary
+                    }
+
+                newArticles.append(newArt)
+
+            # append the newly-built list object to the index object                     # passed in as masterFeed.
+            masterFeed.append((feed.title,newArticles))
+
+
+    def parse_index(self):
+
+        rssFeeds = Feed()
+        rssFeeds = BasicNewsRecipe.parse_feeds(self)
+
+        articles = []
+        feeds = []
+
+        self.feed_to_index_append(rssFeeds,feeds)
+
+
+
+        for (nom_seccio, url_seccio) in self.seccions_web:
+
+
+            articles = []
+
+            soup = self.index_to_soup(url_seccio)
+            for article in soup.findAll('div', attrs={'class':re.compile("articulo noticia|cajaNoticiaPortada")}):
+                h = article.find(['h2','h3'])
+                titol = self.tag_to_string(h)
+                a = article.find('a', href=True)
+                url = 'http://www.menorca.info' +  a['href']
+
+                desc = None
+                autor = ''
+                dt = ''
+
+                soup_art = self.index_to_soup(url)
+                aut = soup_art.find('div', attrs={'class':'autor'})
+                tx = self.tag_to_string(aut)
+                ls = re.split('[,;]',tx)
+
+                t = len(ls)
+                if t >= 1:
+                    autor = ls[0]
+
+                    if t > 1:
+                        d = ls[t-1]
+
+                        if len(d) >= 10:
+                            lt = len(d) - 10
+                            dt = d[lt:]
+
+
+
+                self.log('\tTrobat article: ', titol, 'a', url, 'Seccio: ', nom_seccio, 'Autor: ',  autor, 'Data: ', dt)
+
+                articles.append({'title': titol, 'url': url, 'description': desc, 'date':dt, 'author': autor})
+
+
+
+
+
+            if articles:
+                feeds.append((nom_seccio, articles))
+
+
+
+
+        return feeds
+
+
+
--- a/recipes/spiegel_int.recipe
+++ b/recipes/spiegel_int.recipe
@ -1,94 +1,67 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 spiegel.de
 '''

 from calibre.web.feeds.news import BasicNewsRecipe

-
 class Spiegel_int(BasicNewsRecipe):
    title                 = 'Spiegel Online International'
    __author__            = 'Darko Miletic and Sujata Raman'
-    description           = "News and POV from Europe's largest newsmagazine"
+    description           = "Daily news, analysis and opinion from Europe's leading newsmagazine and Germany's top news Web site"
    oldest_article        = 7
    max_articles_per_feed = 100
-    language = 'en'
-
+    language              = 'en_DE'
    no_stylesheets        = True
    use_embedded_content  = False
+    encoding              = 'cp1252'
    publisher             = 'SPIEGEL ONLINE GmbH'
    category              = 'news, politics, Germany'
-    lang                  = 'en'
-    recursions = 1
-    match_regexps = [r'http://www.spiegel.de/.*-[1-9],00.html']
+    masthead_url          = 'http://www.spiegel.de/static/sys/v9/spiegelonline_logo.png'
+    publication_type      = 'magazine'
+
    conversion_options = {
                             'comments' : description
                            ,'tags'     : category
-                            ,'language'    : lang
-                            ,'publisher'   : publisher
-                            ,'pretty_print': True
+                            ,'language' : language
+                            ,'publisher': publisher
                         }

    extra_css = '''
-                   #spArticleColumn{font-family:verdana,arial,helvetica,geneva,sans-serif ; }
+                   #spArticleContent{font-family: Verdana,Arial,Helvetica,Geneva,sans-serif}
                   h1{color:#666666; font-weight:bold;}
                   h2{color:#990000;}
                   h3{color:#990000;}
                   h4 {color:#990000;}
                   a{color:#990000;}
                   .spAuthor{font-style:italic;}
-                   #spIntroTeaser{font-weight:bold;}
+                   #spIntroTeaser{font-weight:bold}
                   .spCredit{color:#666666; font-size:x-small;}
                   .spShortDate{font-size:x-small;}
                   .spArticleImageBox {font-size:x-small;}
                   .spPhotoGallery{font-size:x-small; color:#990000 ;}
                '''

-    keep_only_tags = [
-                        dict(name ='div', attrs={'id': ['spArticleImageBox spAssetAlignleft','spArticleColumn']}),
-                    ]
-
-    remove_tags = [
-                    dict(name='div', attrs={'id':['spSocialBookmark','spArticleFunctions','spMultiPagerHeadlines',]}),
-                    dict(name='div', attrs={'class':['spCommercial spM520','spArticleCredit','spPicZoom']}),
-                    ]
-
-    feeds          = [(u'Spiegel Online', u'http://www.spiegel.de/schlagzeilen/rss/0,5291,676,00.xml')]
-
-    def postprocess_html(self, soup,first):
-
-         for tag in soup.findAll(name='div',attrs={'id':"spMultiPagerControl"}):
-                tag.extract()
-
-         p =  soup.find(name = 'p', attrs={'id':'spIntroTeaser'})
-
-         if p.string is not None:
-            t = p.string.rpartition(':')[0]
-
-            if 'Part'in t:
-                if soup.h1 is not None:
-                     soup.h1.extract()
-                if soup.h2 is not None:
-                 soup.h2.extract()
-                 functag = soup.find(name= 'div', attrs={'id':"spArticleFunctions"})
-                 if functag is not None:
-                     functag.extract()
-                 auttag = soup.find(name= 'p', attrs={'class':"spAuthor"})
-                 if auttag is not None:
-                     auttag.extract()
-
-                 pictag = soup.find(name= 'div', attrs={'id':"spArticleTopAsset"})
-                 if pictag is not None:
-                     pictag.extract()
+    keep_only_tags    = [dict(attrs={'id':'spArticleContent'})]
+    remove_tags_after = dict(attrs={'id':'spArticleBody'})
+    remove_tags       = [dict(name=['meta','base','iframe','embed','object'])]
+    remove_attributes = ['clear']
+    feeds             = [(u'Spiegel Online', u'http://www.spiegel.de/international/index.rss')]

+    def print_version(self, url):
+        main, sep, rest = url.rpartition(',')
+        rmain, rsep, rrest = main.rpartition(',')
+        return rmain + ',druck-' + rrest + ',' + rest

+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               str = self.tag_to_string(item)
+               item.replaceWith(str)
        return soup
-
-   # def print_version(self, url):
-   #     main, sep, rest = url.rpartition(',')
-   #     rmain, rsep, rrest = main.rpartition(',')
-   #     return rmain + ',druck-' + rrest + ',' + rest
-
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -3,57 +3,16 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import textwrap, os, glob, functools, re
+import os, glob, functools, re
 from calibre import guess_type
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
+from calibre.ebooks.html.to_zip import HTML2ZIP

 # To archive plugins {{{
-class HTML2ZIP(FileTypePlugin):
-    name = 'HTML to ZIP'
-    author = 'Kovid Goyal'
-    description = textwrap.dedent(_('''\
-Follow all local links in an HTML file and create a ZIP \
-file containing all linked files. This plugin is run \
-every time you add an HTML file to the library.\
-'''))
-    version = numeric_version
-    file_types = set(['html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
-    supported_platforms = ['windows', 'osx', 'linux']
-    on_import = True
-
-    def run(self, htmlfile):
-        from calibre.ptempfile import TemporaryDirectory
-        from calibre.gui2.convert.gui_conversion import gui_convert
-        from calibre.customize.conversion import OptionRecommendation
-        from calibre.ebooks.epub import initialize_container
-
-        with TemporaryDirectory('_plugin_html2zip') as tdir:
-            recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
-            recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
-            if self.site_customization and self.site_customization.strip():
-                recs.append(['input_encoding', self.site_customization.strip(),
-                    OptionRecommendation.HIGH])
-            gui_convert(htmlfile, tdir, recs, abort_after_input_dump=True)
-            of = self.temporary_file('_plugin_html2zip.zip')
-            tdir = os.path.join(tdir, 'input')
-            opf = glob.glob(os.path.join(tdir, '*.opf'))[0]
-            ncx = glob.glob(os.path.join(tdir, '*.ncx'))
-            if ncx:
-                os.remove(ncx[0])
-            epub = initialize_container(of.name, os.path.basename(opf))
-            epub.add_dir(tdir)
-            epub.close()
-
-        return of.name
-
-    def customization_help(self, gui=False):
-        return _('Character encoding for the input HTML files. Common choices '
-        'include: cp1252, latin1, iso-8859-1 and utf-8.')
-

 class PML2PMLZ(FileTypePlugin):
    name = 'PML to PMLZ'
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
--- a/src/calibre/db/locking.py
+++ b/src/calibre/db/locking.py
@ -0,0 +1,331 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from threading import Lock, Condition, current_thread
+
+class LockingError(RuntimeError):
+    pass
+
+def create_locks():
+    '''
+    Return a pair of locks: (read_lock, write_lock)
+
+    The read_lock can be acquired by multiple threads simultaneously, it can
+    also be acquired multiple times by the same thread.
+
+    Only one thread can hold write_lock at a time, and only if there are no
+    current read_locks. While the write_lock is held no
+    other threads can acquire read locks. The write_lock can also be acquired
+    multiple times by the same thread.
+
+    Both read_lock and write_lock are meant to be used in with statements (they
+    operate on a single underlying lock.
+
+    WARNING: Be very careful to not try to acquire a read lock while the same
+    thread holds a write lock and vice versa. That is, a given thread should
+    always release *all* locks of type A before trying to acquire a lock of type
+    B. Bad things will happen if you violate this rule, the most benign of
+    which is the raising of a LockingError (I haven't been able to eliminate
+    the possibility of deadlocking in this scenario).
+    '''
+    l = SHLock()
+    return RWLockWrapper(l), RWLockWrapper(l, is_shared=False)
+
+class SHLock(object):
+    '''
+    Shareable lock class. Used to implement the Multiple readers-single writer
+    paradigm. As best as I can tell, neither writer nor reader starvation
+    should be possible.
+
+    Based on code from: https://github.com/rfk/threading2
+    '''
+
+    def __init__(self):
+        self._lock = Lock()
+        #  When a shared lock is held, is_shared will give the cumulative
+        #  number of locks and _shared_owners maps each owning thread to
+        #  the number of locks is holds.
+        self.is_shared = 0
+        self._shared_owners = {}
+        #  When an exclusive lock is held, is_exclusive will give the number
+        #  of locks held and _exclusive_owner will give the owning thread
+        self.is_exclusive = 0
+        self._exclusive_owner = None
+        #  When someone is forced to wait for a lock, they add themselves
+        #  to one of these queues along with a "waiter" condition that
+        #  is used to wake them up.
+        self._shared_queue = []
+        self._exclusive_queue = []
+        #  This is for recycling waiter objects.
+        self._free_waiters = []
+
+    def acquire(self, blocking=True, shared=False):
+        '''
+        Acquire the lock in shared or exclusive mode.
+
+        If blocking is False this method will return False if acquiring the
+        lock failed.
+        '''
+        with self._lock:
+            if shared:
+                return self._acquire_shared(blocking)
+            else:
+                return self._acquire_exclusive(blocking)
+            assert not (self.is_shared and self.is_exclusive)
+
+    def release(self):
+        ''' Release the lock. '''
+        #  This decrements the appropriate lock counters, and if the lock
+        #  becomes free, it looks for a queued thread to hand it off to.
+        #  By doing the handoff here we ensure fairness.
+        me = current_thread()
+        with self._lock:
+            if self.is_exclusive:
+                if self._exclusive_owner is not me:
+                    raise LockingError("release() called on unheld lock")
+                self.is_exclusive -= 1
+                if not self.is_exclusive:
+                    self._exclusive_owner = None
+                    #  If there are waiting shared locks, issue them
+                    #  all and them wake everyone up.
+                    if self._shared_queue:
+                        for (thread, waiter) in self._shared_queue:
+                            self.is_shared += 1
+                            self._shared_owners[thread] = 1
+                            waiter.notify()
+                        del self._shared_queue[:]
+                    #  Otherwise, if there are waiting exclusive locks,
+                    #  they get first dibbs on the lock.
+                    elif self._exclusive_queue:
+                        (thread, waiter) = self._exclusive_queue.pop(0)
+                        self._exclusive_owner = thread
+                        self.is_exclusive += 1
+                        waiter.notify()
+            elif self.is_shared:
+                try:
+                    self._shared_owners[me] -= 1
+                    if self._shared_owners[me] == 0:
+                        del self._shared_owners[me]
+                except KeyError:
+                    raise LockingError("release() called on unheld lock")
+                self.is_shared -= 1
+                if not self.is_shared:
+                    #  If there are waiting exclusive locks,
+                    #  they get first dibbs on the lock.
+                    if self._exclusive_queue:
+                        (thread, waiter) = self._exclusive_queue.pop(0)
+                        self._exclusive_owner = thread
+                        self.is_exclusive += 1
+                        waiter.notify()
+                    else:
+                        assert not self._shared_queue
+            else:
+                raise LockingError("release() called on unheld lock")
+
+    def _acquire_shared(self, blocking=True):
+        me = current_thread()
+        #  Each case: acquiring a lock we already hold.
+        if self.is_shared and me in self._shared_owners:
+            self.is_shared += 1
+            self._shared_owners[me] += 1
+            return True
+        #  If the lock is already spoken for by an exclusive, add us
+        #  to the shared queue and it will give us the lock eventually.
+        if self.is_exclusive or self._exclusive_queue:
+            if self._exclusive_owner is me:
+                raise LockingError("can't downgrade SHLock object")
+            if not blocking:
+                return False
+            waiter = self._take_waiter()
+            try:
+                self._shared_queue.append((me, waiter))
+                waiter.wait()
+                assert not self.is_exclusive
+            finally:
+                self._return_waiter(waiter)
+        else:
+            self.is_shared += 1
+            self._shared_owners[me] = 1
+        return True
+
+    def _acquire_exclusive(self, blocking=True):
+        me = current_thread()
+        #  Each case: acquiring a lock we already hold.
+        if self._exclusive_owner is me:
+            assert self.is_exclusive
+            self.is_exclusive += 1
+            return True
+        # Do not allow upgrade of lock
+        if self.is_shared and me in self._shared_owners:
+            raise LockingError("can't upgrade SHLock object")
+        #  If the lock is already spoken for, add us to the exclusive queue.
+        #  This will eventually give us the lock when it's our turn.
+        if self.is_shared or self.is_exclusive:
+            if not blocking:
+                return False
+            waiter = self._take_waiter()
+            try:
+                self._exclusive_queue.append((me, waiter))
+                waiter.wait()
+            finally:
+                self._return_waiter(waiter)
+        else:
+            self._exclusive_owner = me
+            self.is_exclusive += 1
+        return True
+
+    def _take_waiter(self):
+        try:
+            return self._free_waiters.pop()
+        except IndexError:
+            return Condition(self._lock)#, verbose=True)
+
+    def _return_waiter(self, waiter):
+        self._free_waiters.append(waiter)
+
+class RWLockWrapper(object):
+
+    def __init__(self, shlock, is_shared=True):
+        self._shlock = shlock
+        self._is_shared = is_shared
+
+    def __enter__(self):
+        self._shlock.acquire(shared=self._is_shared)
+        return self
+
+    def __exit__(self, *args):
+        self._shlock.release()
+
+
+# Tests {{{
+if __name__ == '__main__':
+    import time, random, unittest
+    from threading import Thread
+
+    class TestSHLock(unittest.TestCase):
+        """Testcases for SHLock class."""
+
+        def test_upgrade(self):
+            lock = SHLock()
+            lock.acquire(shared=True)
+            self.assertRaises(LockingError, lock.acquire, shared=False)
+            lock.release()
+
+        def test_downgrade(self):
+            lock = SHLock()
+            lock.acquire(shared=False)
+            self.assertRaises(LockingError, lock.acquire, shared=True)
+            lock.release()
+
+        def test_recursive(self):
+            lock = SHLock()
+            lock.acquire(shared=True)
+            lock.acquire(shared=True)
+            self.assertEqual(lock.is_shared, 2)
+            lock.release()
+            lock.release()
+            self.assertFalse(lock.is_shared)
+            lock.acquire(shared=False)
+            lock.acquire(shared=False)
+            self.assertEqual(lock.is_exclusive, 2)
+            lock.release()
+            lock.release()
+            self.assertFalse(lock.is_exclusive)
+
+        def test_release(self):
+            lock = SHLock()
+            self.assertRaises(LockingError, lock.release)
+
+            def get_lock(shared):
+                lock.acquire(shared=shared)
+                time.sleep(1)
+                lock.release()
+
+            threads = [Thread(target=get_lock, args=(x,)) for x in (True,
+                False)]
+            for t in threads:
+                t.daemon = True
+                t.start()
+                self.assertRaises(LockingError, lock.release)
+                t.join(2)
+                self.assertFalse(t.is_alive())
+            self.assertFalse(lock.is_shared)
+            self.assertFalse(lock.is_exclusive)
+
+        def test_acquire(self):
+            lock = SHLock()
+
+            def get_lock(shared):
+                lock.acquire(shared=shared)
+                time.sleep(1)
+                lock.release()
+
+            shared = Thread(target=get_lock, args=(True,))
+            shared.daemon = True
+            shared.start()
+            time.sleep(0.1)
+            self.assertTrue(lock.acquire(shared=True, blocking=False))
+            lock.release()
+            self.assertFalse(lock.acquire(shared=False, blocking=False))
+            lock.acquire(shared=False)
+            self.assertFalse(shared.is_alive())
+            lock.release()
+            self.assertTrue(lock.acquire(shared=False, blocking=False))
+            lock.release()
+
+            exclusive = Thread(target=get_lock, args=(False,))
+            exclusive.daemon = True
+            exclusive.start()
+            time.sleep(0.1)
+            self.assertFalse(lock.acquire(shared=False, blocking=False))
+            self.assertFalse(lock.acquire(shared=True, blocking=False))
+            lock.acquire(shared=True)
+            self.assertFalse(exclusive.is_alive())
+            lock.release()
+            lock.acquire(shared=False)
+            lock.release()
+            lock.acquire(shared=True)
+            lock.release()
+            self.assertFalse(lock.is_shared)
+            self.assertFalse(lock.is_exclusive)
+
+        def test_contention(self):
+            lock = SHLock()
+            done = []
+            def lots_of_acquires():
+                for _ in xrange(1000):
+                    shared = random.choice([True,False])
+                    lock.acquire(shared=shared)
+                    lock.acquire(shared=shared)
+                    time.sleep(random.random() * 0.0001)
+                    lock.release()
+                    time.sleep(random.random() * 0.0001)
+                    lock.acquire(shared=shared)
+                    time.sleep(random.random() * 0.0001)
+                    lock.release()
+                    lock.release()
+                done.append(True)
+            threads = [Thread(target=lots_of_acquires) for _ in xrange(10)]
+            for t in threads:
+                t.daemon = True
+                t.start()
+            for t in threads:
+                t.join(20)
+            live = [t for t in threads if t.is_alive()]
+            self.assertListEqual(live, [], 'ShLock hung')
+            self.assertEqual(len(done), len(threads), 'SHLock locking failed')
+            self.assertFalse(lock.is_shared)
+            self.assertFalse(lock.is_exclusive)
+
+
+    suite = unittest.TestLoader().loadTestsFromTestCase(TestSHLock)
+    unittest.TextTestRunner(verbosity=2).run(suite)
+
+# }}}
+
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -24,12 +24,12 @@ class ANDROID(USBMS):
                       0xff9 : [0x0100, 0x0227, 0x0226],
                       0xc87 : [0x0100, 0x0227, 0x0226],
                       0xc91 : [0x0100, 0x0227, 0x0226],
-                       0xc92  : [0x100],
-                       0xc97  : [0x226],
-                       0xc99  : [0x0100],
-                       0xca2  : [0x226],
-                       0xca3  : [0x100],
-                       0xca4  : [0x226],
+                       0xc92  : [0x100, 0x0227, 0x0226, 0x222],
+                       0xc97  : [0x100, 0x0227, 0x0226, 0x222],
+                       0xc99  : [0x100, 0x0227, 0x0226, 0x222],
+                       0xca2  : [0x100, 0x0227, 0x0226, 0x222],
+                       0xca3  : [0x100, 0x0227, 0x0226, 0x222],
+                       0xca4  : [0x100, 0x0227, 0x0226, 0x222],
            },

            # Eken
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -203,12 +203,13 @@ class KOBO(USBMS):
        result = cursor.fetchone()
        self.dbversion = result[0]

+        debug_print("Database Version: ", self.dbversion)
        if self.dbversion >= 14:
            query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
-                'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex from content where BookID is Null'
+                'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex from content where BookID is Null  and  ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
        elif self.dbversion < 14 and self.dbversion >= 8:
            query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
-                'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null'
+                'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null  and  ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
        else:
            query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
                'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex from content where BookID is Null'
@ -542,7 +543,72 @@ class KOBO(USBMS):
                paths[source_id] = os.path.join(prefix, *(path.split('/')))
        return paths

+    def reset_readstatus(self, connection, oncard):
+        cursor = connection.cursor()
+
+        # Reset Im_Reading list in the database
+        if oncard == 'carda':
+            query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
+        elif oncard != 'carda' and oncard != 'cardb':
+            query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
+
+        try:
+            cursor.execute (query)
+        except:
+            debug_print('    Database Exception:  Unable to reset ReadStatus list')
+            raise
+        else:
+            connection.commit()
+            debug_print('    Commit: Reset ReadStatus list')
+
+        cursor.close()
+        
+    def set_readstatus(self, connection, ContentID, ReadStatus):
+        cursor = connection.cursor()
+        t = (ContentID,)
+        cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t)
+        result = cursor.fetchone()
+        if result is None:
+            datelastread = '1970-01-01T00:00:00'
+        else:
+            datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00'
+
+        t = (ReadStatus,datelastread,ContentID,)
+
+        try:
+            cursor.execute('update content set ReadStatus=?,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t)
+        except:
+            debug_print('    Database Exception:  Unable update ReadStatus')
+            raise
+        else:
+            connection.commit()
+            debug_print('    Commit: Setting ReadStatus List')
+        cursor.close()
+
+    def reset_favouritesindex(self, connection, oncard):
+        # Reset FavouritesIndex list in the database
+        if oncard == 'carda':
+            query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID like \'file:///mnt/sd/%\''
+        elif oncard != 'carda' and oncard != 'cardb':
+            query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
+
+        cursor = connection.cursor()
+        try:
+            cursor.execute (query)
+        except:
+            debug_print('Database Exception:  Unable to reset Shortlist list')
+            raise
+        else:
+            connection.commit()
+            debug_print('    Commit: Reset FavouritesIndex list')
+        
    def update_device_database_collections(self, booklists, collections_attributes, oncard):
+        # Define lists for the ReadStatus
+        readstatuslist = {
+            "Im_Reading":1,
+            "Read":2,
+            "Closed":3,
+        }	
 #        debug_print('Starting update_device_database_collections', collections_attributes)

        # Force collections_attributes to be 'tags' as no other is currently supported
@ -561,149 +627,35 @@ class KOBO(USBMS):
        # return bytestrings if the content cannot the decoded as unicode
        connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")

-        cursor = connection.cursor()
-
-
        if collections:
+
+            # Need to reset the collections outside the particular loops
+            # otherwise the last item will not be removed
+            self.reset_readstatus(connection, oncard)
+            self.reset_favouritesindex(connection, oncard)
+
            # Process any collections that exist
            for category, books in collections.items():
-                # debug_print (category)
-                if category == 'Im_Reading':
-                    # Reset Im_Reading list in the database
-                    if oncard == 'carda':
-                        query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID like \'file:///mnt/sd/%\''
-                    elif oncard != 'carda' and oncard != 'cardb':
-                        query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 1 and ContentID not like \'file:///mnt/sd/%\''
-
-                    try:
-                        cursor.execute (query)
-                    except:
-                        debug_print('Database Exception:  Unable to reset Im_Reading list')
-                        raise
-                    else:
-#                       debug_print('Commit: Reset Im_Reading list')
-                        connection.commit()
+                # This is used to reset the Im_Reading, Read and Closed list
+                # in the ReadStatus column of the Content table
+                if category in readstatuslist.keys():
+                    debug_print("Category: ", category, " id = ", readstatuslist.get(category))

                    for book in books:
-#                        debug_print('Title:', book.title, 'lpath:', book.path)
-                        if 'Im_Reading' not in book.device_collections:
-                            book.device_collections.append('Im_Reading')
+                        debug_print('    Title:', book.title, 'category: ', category)
+                        if category not in book.device_collections:
+                            book.device_collections.append(category)

                        extension =  os.path.splitext(book.path)[1]
                        ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)

                        ContentID = self.contentid_from_path(book.path, ContentType)
-
-                        t = (ContentID,)
-                        cursor.execute('select DateLastRead from Content where BookID is Null and ContentID = ?', t)
-                        result = cursor.fetchone()
-                        if result is None:
-                            datelastread = '1970-01-01T00:00:00'
-                        else:
-                            datelastread = result[0] if result[0] is not None else '1970-01-01T00:00:00'
-
-                        t = (datelastread,ContentID,)
-
-                        try:
-                            cursor.execute('update content set ReadStatus=1,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t)
-                        except:
-                            debug_print('Database Exception:  Unable create Im_Reading list')
-                            raise
-                        else:
-                            connection.commit()
- #                           debug_print('Database: Commit create Im_Reading list')
-                if category == 'Read':
-                    # Reset Im_Reading list in the database
-                    if oncard == 'carda':
-                        query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 2 and ContentID like \'file:///mnt/sd/%\''
-                    elif oncard != 'carda' and oncard != 'cardb':
-                        query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 2 and ContentID not like \'file:///mnt/sd/%\''
-
-                    try:
-                        cursor.execute (query)
-                    except:
-                        debug_print('Database Exception:  Unable to reset Im_Reading list')
-                        raise
-                    else:
-#                       debug_print('Commit: Reset Im_Reading list')
-                        connection.commit()
-
-                    for book in books:
-#                       debug_print('Title:', book.title, 'lpath:', book.path)
-                        if 'Read' not in book.device_collections:
-                            book.device_collections.append('Read')
-
-                        extension =  os.path.splitext(book.path)[1]
-                        ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
-
-                        ContentID = self.contentid_from_path(book.path, ContentType)
-#                        datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
-
-                        t = (ContentID,)
-
-                        try:
-                            cursor.execute('update content set ReadStatus=2,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
-                        except:
-                            debug_print('Database Exception:  Unable set book as Finished')
-                            raise
-                        else:
-                            connection.commit()
-#                            debug_print('Database: Commit set ReadStatus as Finished')
-                if category == 'Closed':
-                    # Reset Im_Reading list in the database
-                    if oncard == 'carda':
-                        query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID like \'file:///mnt/sd/%\''
-                    elif oncard != 'carda' and oncard != 'cardb':
-                        query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID not like \'file:///mnt/sd/%\''
-
-                    try:
-                        cursor.execute (query)
-                    except:
-                        debug_print('Database Exception:  Unable to reset Closed list')
-                        raise
-                    else:
-#                       debug_print('Commit: Reset Closed list')
-                        connection.commit()
-
-                    for book in books:
-#                       debug_print('Title:', book.title, 'lpath:', book.path)
-                        if 'Closed' not in book.device_collections:
-                            book.device_collections.append('Closed')
-
-                        extension =  os.path.splitext(book.path)[1]
-                        ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
-
-                        ContentID = self.contentid_from_path(book.path, ContentType)
-#                        datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
-
-                        t = (ContentID,)
-
-                        try:
-                            cursor.execute('update content set ReadStatus=3,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
-                        except:
-                            debug_print('Database Exception:  Unable set book as Closed')
-                            raise
-                        else:
-                            connection.commit()
-#                            debug_print('Database: Commit set ReadStatus as Closed')
+                        self.set_readstatus(connection, ContentID, readstatuslist.get(category))
                if category == 'Shortlist':
-                    # Reset FavouritesIndex list in the database
-                    if oncard == 'carda':
-                        query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID like \'file:///mnt/sd/%\''
-                    elif oncard != 'carda' and oncard != 'cardb':
-                        query= 'update content set FavouritesIndex=-1 where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
-
-                    try:
-                        cursor.execute (query)
-                    except:
-                        debug_print('Database Exception:  Unable to reset Shortlist list')
-                        raise
-                    else:
-#                       debug_print('Commit: Reset Shortlist list')
-                        connection.commit()
-
+                    debug_print("Category: ", category)
+                    cursor = connection.cursor()
                    for book in books:
-#                        debug_print('Title:', book.title, 'lpath:', book.path)
+                        debug_print('    Title:', book.title, 'category: ', category)
                        if 'Shortlist' not in book.device_collections:
                            book.device_collections.append('Shortlist')
                        # debug_print ("Shortlist found for: ", book.title)
@ -726,23 +678,11 @@ class KOBO(USBMS):

        else: # No collections
            # Since no collections exist the ReadStatus needs to be reset to 0 (Unread)
-            print "Reseting ReadStatus to 0"
-            # Reset Im_Reading list in the database
-            if oncard == 'carda':
-                query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID like \'file:///mnt/sd/%\''
-            elif oncard != 'carda' and oncard != 'cardb':
-                query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ContentID not like \'file:///mnt/sd/%\''
+            debug_print("No Collections - reseting ReadStatus to 0")
+            self.reset_readstatus(connection, oncard)
+            debug_print("No Collections - reseting FavouritesIndex")
+            self.reset_favouritesindex(connection, oncard)

-            try:
-                cursor.execute (query)
-            except:
-                debug_print('Database Exception:  Unable to reset Im_Reading list')
-                raise
-            else:
-#               debug_print('Commit: Reset Im_Reading list')
-                connection.commit()
-
-        cursor.close()
        connection.close()

 #        debug_print('Finished update_device_database_collections', collections_attributes)
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -176,7 +176,7 @@ def add_pipeline_options(parser, plumber):
                  [
                    'level1_toc', 'level2_toc', 'level3_toc',
                    'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
-                    'use_auto_toc', 'toc_filter',
+                    'use_auto_toc', 'toc_filter', 'duplicate_links_in_toc',
                  ]
                  ),

--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -265,6 +265,14 @@ OptionRecommendation(name='toc_filter',
                )
        ),

+OptionRecommendation(name='duplicate_links_in_toc',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('When creating a TOC from links in the input document, '
+                'allow duplicate entries, i.e. allow more than one entry '
+                'with the same text, provided that they point to a '
+                'different location.')
+        ),
+

 OptionRecommendation(name='chapter',
        recommended_value="//*[((name()='h1' or name()='h2') and "
--- a/src/calibre/ebooks/html/to_zip.py
+++ b/src/calibre/ebooks/html/to_zip.py
@ -0,0 +1,117 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import textwrap, os, glob
+
+from calibre.customize import FileTypePlugin
+from calibre.constants import numeric_version
+
+class HTML2ZIP(FileTypePlugin):
+    name = 'HTML to ZIP'
+    author = 'Kovid Goyal'
+    description = textwrap.dedent(_('''\
+Follow all local links in an HTML file and create a ZIP \
+file containing all linked files. This plugin is run \
+every time you add an HTML file to the library.\
+'''))
+    version = numeric_version
+    file_types = set(['html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
+    supported_platforms = ['windows', 'osx', 'linux']
+    on_import = True
+
+    def run(self, htmlfile):
+        from calibre.ptempfile import TemporaryDirectory
+        from calibre.gui2.convert.gui_conversion import gui_convert
+        from calibre.customize.conversion import OptionRecommendation
+        from calibre.ebooks.epub import initialize_container
+
+        with TemporaryDirectory('_plugin_html2zip') as tdir:
+            recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
+            recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
+            if self.site_customization and self.site_customization.strip():
+                sc = self.site_customization.strip()
+                enc, _, bf = sc.partition('|')
+                if enc:
+                    recs.append(['input_encoding', enc,
+                        OptionRecommendation.HIGH])
+                if bf == 'bf':
+                    recs.append(['breadth_first', True,
+                        OptionRecommendation.HIGH])
+            gui_convert(htmlfile, tdir, recs, abort_after_input_dump=True)
+            of = self.temporary_file('_plugin_html2zip.zip')
+            tdir = os.path.join(tdir, 'input')
+            opf = glob.glob(os.path.join(tdir, '*.opf'))[0]
+            ncx = glob.glob(os.path.join(tdir, '*.ncx'))
+            if ncx:
+                os.remove(ncx[0])
+            epub = initialize_container(of.name, os.path.basename(opf))
+            epub.add_dir(tdir)
+            epub.close()
+
+        return of.name
+
+    def customization_help(self, gui=False):
+        return _('Character encoding for the input HTML files. Common choices '
+        'include: cp1252, cp1251, latin1 and utf-8.')
+
+    def do_user_config(self, parent=None):
+        '''
+        This method shows a configuration dialog for this plugin. It returns
+        True if the user clicks OK, False otherwise. The changes are
+        automatically applied.
+        '''
+        from PyQt4.Qt import (QDialog, QDialogButtonBox, QVBoxLayout,
+                QLabel, Qt, QLineEdit, QCheckBox)
+
+        config_dialog = QDialog(parent)
+        button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
+        v = QVBoxLayout(config_dialog)
+
+        def size_dialog():
+            config_dialog.resize(config_dialog.sizeHint())
+
+        button_box.accepted.connect(config_dialog.accept)
+        button_box.rejected.connect(config_dialog.reject)
+        config_dialog.setWindowTitle(_('Customize') + ' ' + self.name)
+        from calibre.customize.ui import (plugin_customization,
+                customize_plugin)
+        help_text = self.customization_help(gui=True)
+        help_text = QLabel(help_text, config_dialog)
+        help_text.setWordWrap(True)
+        help_text.setTextInteractionFlags(Qt.LinksAccessibleByMouse
+                | Qt.LinksAccessibleByKeyboard)
+        help_text.setOpenExternalLinks(True)
+        v.addWidget(help_text)
+        bf = QCheckBox(_('Add linked files in breadth first order'))
+        bf.setToolTip(_('Normally, when following links in HTML files'
+            ' calibre does it depth first, i.e. if file A links to B and '
+            ' C, but B links to D, the files are added in the order A, B, D, C. '
+            ' With this option, they will instead be added as A, B, C, D'))
+        sc = plugin_customization(self)
+        if not sc:
+            sc = ''
+        sc = sc.strip()
+        enc = sc.partition('|')[0]
+        bfs = sc.partition('|')[-1]
+        bf.setChecked(bfs == 'bf')
+        sc = QLineEdit(enc, config_dialog)
+        v.addWidget(sc)
+        v.addWidget(bf)
+        v.addWidget(button_box)
+        size_dialog()
+        config_dialog.exec_()
+
+        if config_dialog.result() == QDialog.Accepted:
+            sc = unicode(sc.text()).strip()
+            if bf.isChecked():
+                sc += '|bf'
+            customize_plugin(self, sc)
+
+        return config_dialog.result()
+
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@ -153,7 +153,8 @@ class Douban(Source):
    author = 'Li Fanxi'
    version = (2, 0, 0)

-    description = _('Downloads metadata and covers from Douban.com')
+    description = _('Downloads metadata and covers from Douban.com. '
+            'Useful only for chinese language books.')

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags',
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -19,7 +19,7 @@ from calibre.customize.ui import metadata_plugins, all_metadata_plugins
 from calibre.ebooks.metadata.sources.base import create_log, msprefs
 from calibre.ebooks.metadata.xisbn import xisbn
 from calibre.ebooks.metadata.book.base import Metadata
-from calibre.utils.date import utc_tz
+from calibre.utils.date import utc_tz, as_utc
 from calibre.utils.html2text import html2text
 from calibre.utils.icu import lower

@ -57,11 +57,34 @@ def is_worker_alive(workers):

 # Merge results from different sources {{{

+class xISBN(Thread):
+
+    def __init__(self, isbn):
+        Thread.__init__(self)
+        self.isbn = isbn
+        self.isbns = frozenset()
+        self.min_year = None
+        self.daemon = True
+        self.exception = self.tb = None
+
+    def run(self):
+        try:
+            self.isbns, self.min_year = xisbn.get_isbn_pool(self.isbn)
+        except Exception as e:
+            import traceback
+            self.exception = e
+            self.tb = traceback.format_exception()
+
+
+
 class ISBNMerge(object):

-    def __init__(self):
+    def __init__(self, log):
        self.pools = {}
        self.isbnless_results = []
+        self.results = []
+        self.log = log
+        self.use_xisbn = True

    def isbn_in_pool(self, isbn):
        if isbn:
@ -82,7 +105,20 @@ class ISBNMerge(object):
        if isbn:
            pool = self.isbn_in_pool(isbn)
            if pool is None:
-                isbns, min_year = xisbn.get_isbn_pool(isbn)
+                isbns = min_year = None
+                if self.use_xisbn:
+                    xw = xISBN(isbn)
+                    xw.start()
+                    xw.join(10)
+                    if xw.is_alive():
+                        self.log.error('Query to xISBN timed out')
+                        self.use_xisbn = False
+                    else:
+                        if xw.exception:
+                            self.log.error('Query to xISBN failed:')
+                            self.log.debug(xw.tb)
+                        else:
+                            isbns, min_year = xw.isbns, xw.min_year
                if not isbns:
                    isbns = frozenset([isbn])
                if isbns in self.pools:
@ -102,15 +138,19 @@ class ISBNMerge(object):
            if results:
                has_isbn_result = True
                break
-        self.has_isbn_result = has_isbn_result

+        isbn_sources = frozenset()
        if has_isbn_result:
-            self.merge_isbn_results()
-        else:
+            isbn_sources = self.merge_isbn_results()
+
+        # Now handle results that have no ISBNs
        results = sorted(self.isbnless_results,
                key=attrgetter('relevance_in_source'))
+        # Only use results that are from sources that have not also returned a
+        # result with an ISBN
+        results = [r for r in results if r.identify_plugin not in isbn_sources]
+        if results:
            # Pick only the most relevant result from each source
-            self.results = []
            seen = set()
            for result in results:
                if result.identify_plugin not in seen:
@ -190,11 +230,15 @@ class ISBNMerge(object):

    def merge_isbn_results(self):
        self.results = []
+        sources = set()
        for min_year, results in self.pools.itervalues():
            if results:
+                for r in results:
+                    sources.add(r.identify_plugin)
                self.results.append(self.merge(results, min_year))

        self.results.sort(key=attrgetter('average_source_relevance'))
+        return sources

    def length_merge(self, attr, results, null_value=None, shortest=True):
        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
@ -254,13 +298,23 @@ class ISBNMerge(object):

        # Published date
        if min_year:
+            for r in results:
+                year = getattr(r.pubdate, 'year', None)
+                if year == min_year:
+                    ans.pubdate = r.pubdate
+                    break
+            if getattr(ans.pubdate, 'year', None) == min_year:
+                min_date = datetime(min_year, ans.pubdate.month, ans.pubdate.day)
+            else:
                min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
            ans.pubdate = min_date
        else:
            min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
            for r in results:
-                if r.pubdate is not None and r.pubdate < min_date:
-                    min_date = r.pubdate
+                if r.pubdate is not None:
+                    candidate = as_utc(r.pubdate)
+                    if candidate < min_date:
+                        min_date = candidate
            if min_date.year < 3000:
                ans.pubdate = min_date

@ -293,7 +347,7 @@ class ISBNMerge(object):


 def merge_identify_results(result_map, log):
-    isbn_merge = ISBNMerge()
+    isbn_merge = ISBNMerge(log)
    for plugin, results in result_map.iteritems():
        for result in results:
            isbn_merge.add_result(result)
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -121,7 +121,9 @@ class DetectStructure(object):
                    if not self.oeb.toc.has_href(href):
                        text = xml2text(a)
                        text = text[:100].strip()
-                        if not self.oeb.toc.has_text(text):
+                        if (not self.opts.duplicate_links_in_toc and
+                                self.oeb.toc.has_text(text)):
+                            continue
                        num += 1
                        self.oeb.toc.add(text, href,
                            play_order=self.oeb.toc.next_play_order())
--- a/src/calibre/gui2/convert/toc.py
+++ b/src/calibre/gui2/convert/toc.py
@ -22,7 +22,7 @@ class TOCWidget(Widget, Ui_Form):
        Widget.__init__(self, parent,
                ['level1_toc', 'level2_toc', 'level3_toc',
                'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
-                'use_auto_toc', 'toc_filter',
+                'use_auto_toc', 'toc_filter', 'duplicate_links_in_toc',
                ]
                )
        self.db, self.book_id = db, book_id
--- a/src/calibre/gui2/convert/toc.ui
+++ b/src/calibre/gui2/convert/toc.ui
@ -21,7 +21,7 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="0">
+   <item row="3" column="0">
    <widget class="QLabel" name="label_10">
     <property name="text">
      <string>Number of &amp;links to add to Table of Contents</string>
@ -31,14 +31,14 @@
     </property>
    </widget>
   </item>
-   <item row="2" column="1">
+   <item row="3" column="1">
    <widget class="QSpinBox" name="opt_max_toc_links">
     <property name="maximum">
      <number>10000</number>
     </property>
    </widget>
   </item>
-   <item row="3" column="0">
+   <item row="4" column="0">
    <widget class="QLabel" name="label_16">
     <property name="text">
      <string>Chapter &amp;threshold</string>
@ -48,7 +48,7 @@
     </property>
    </widget>
   </item>
-   <item row="3" column="1">
+   <item row="4" column="1">
    <widget class="QSpinBox" name="opt_toc_threshold"/>
   </item>
   <item row="0" column="0" colspan="2">
@ -58,7 +58,7 @@
     </property>
    </widget>
   </item>
-   <item row="4" column="0">
+   <item row="5" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>TOC &amp;Filter:</string>
@ -68,19 +68,19 @@
     </property>
    </widget>
   </item>
-   <item row="4" column="1">
+   <item row="5" column="1">
    <widget class="QLineEdit" name="opt_toc_filter"/>
   </item>
-   <item row="5" column="0" colspan="2">
+   <item row="6" column="0" colspan="2">
    <widget class="XPathEdit" name="opt_level1_toc" native="true"/>
   </item>
-   <item row="6" column="0" colspan="2">
+   <item row="7" column="0" colspan="2">
    <widget class="XPathEdit" name="opt_level2_toc" native="true"/>
   </item>
-   <item row="7" column="0" colspan="2">
+   <item row="8" column="0" colspan="2">
    <widget class="XPathEdit" name="opt_level3_toc" native="true"/>
   </item>
-   <item row="8" column="0">
+   <item row="9" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -93,6 +93,13 @@
     </property>
    </spacer>
   </item>
+   <item row="2" column="0" colspan="2">
+    <widget class="QCheckBox" name="opt_duplicate_links_in_toc">
+     <property name="text">
+      <string>Allow &amp;duplicate links when creating the Table of Contents</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <customwidgets>
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@ -336,7 +336,12 @@ class SchedulerDialog(QDialog, Ui_Dialog):
        self.download_button.setVisible(True)
        self.detail_box.setCurrentIndex(0)
        recipe = self.recipe_model.recipe_from_urn(urn)
+        try:
            schedule_info = self.recipe_model.schedule_info_from_urn(urn)
+        except:
+            # Happens if user does something stupid like unchecking all the
+            # days of the week
+            schedule_info = None
        account_info = self.recipe_model.account_info_from_urn(urn)
        customize_info = self.recipe_model.get_customize_info(urn)

--- a/src/calibre/gui2/metadata/bulk_download.py
+++ b/src/calibre/gui2/metadata/bulk_download.py
@ -20,6 +20,7 @@ from calibre.ebooks.metadata.sources.covers import download_cover
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.customize.ui import metadata_plugins
 from calibre.ptempfile import PersistentTemporaryFile
+from calibre.utils.date import as_utc

 # Start download {{{
 def show_config(gui, parent):
@ -124,10 +125,18 @@ def merge_result(oldmi, newmi):
    for plugin in metadata_plugins(['identify']):
        fields |= plugin.touched_fields

+    def is_equal(x, y):
+        if hasattr(x, 'tzinfo'):
+            x = as_utc(x)
+        if hasattr(y, 'tzinfo'):
+            y = as_utc(y)
+        return x == y
+
    for f in fields:
        # Optimize so that set_metadata does not have to do extra work later
        if not f.startswith('identifier:'):
-            if (not newmi.is_null(f) and getattr(newmi, f) == getattr(oldmi, f)):
+            if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
+                    getattr(oldmi, f))):
                setattr(newmi, f, getattr(dummy, f))

    newmi.last_modified = oldmi.last_modified
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@ -254,6 +254,10 @@ class ResultsView(QTableView): # {{{
            '<h2>%s</h2>'%book.title,
            '<div><i>%s</i></div>'%authors_to_string(book.authors),
        ]
+        if not book.is_null('series'):
+            series = book.format_field('series')
+            if series[1]:
+                parts.append('<div>%s: %s</div>'%series)
        if not book.is_null('rating'):
            parts.append('<div>%s</div>'%('\u2605'*int(book.rating)))
        parts.append('</center>')
--- a/src/calibre/gui2/preferences/metadata_sources.py
+++ b/src/calibre/gui2/preferences/metadata_sources.py
@ -17,12 +17,13 @@ from calibre.gui2.preferences.metadata_sources_ui import Ui_Form
 from calibre.ebooks.metadata.sources.base import msprefs
 from calibre.customize.ui import (all_metadata_plugins, is_disabled,
        enable_plugin, disable_plugin, default_disabled_plugins)
-from calibre.gui2 import NONE, error_dialog
+from calibre.gui2 import NONE, error_dialog, question_dialog

 class SourcesModel(QAbstractTableModel): # {{{

    def __init__(self, parent=None):
        QAbstractTableModel.__init__(self, parent)
+        self.gui_parent = parent

        self.plugins = []
        self.enabled_overrides = {}
@ -87,6 +88,15 @@ class SourcesModel(QAbstractTableModel): # {{{
        if col == 0 and role == Qt.CheckStateRole:
            val, ok = val.toInt()
            if ok:
+                if val == Qt.Checked and 'Douban' in plugin.name:
+                    if not question_dialog(self.gui_parent,
+                        _('Are you sure?'), '<p>'+
+                        _('This plugin is useful only for <b>Chinese</b>'
+                            ' language books. It can return incorrect'
+                            ' results for books in English. Are you'
+                            ' sure you want to enable it?'),
+                        show_copy_button=False):
+                        return ret
                self.enabled_overrides[plugin] = val
                ret = True
        if col == 1 and role == Qt.EditRole:
--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@ -241,7 +241,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        if self.current_format != plugboard_any_format_value and \
                    self.current_device in self.device_to_formats_map:
            allowable_formats = self.device_to_formats_map[self.current_device]
-            if self.current_format not in allowable_formats:
+            if (self.current_format not in allowable_formats and
+                    self.current_format != 'device_db'):
                error_dialog(self, '',
                     _('The {0} device does not support the {1} format.').
                                format(self.current_device, self.current_format),
@ -358,5 +359,5 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
 if __name__ == '__main__':
    from PyQt4.Qt import QApplication
    app = QApplication([])
-    test_widget('Import/Export', 'plugboards')
+    test_widget('Import/Export', 'Plugboard')

--- a/src/calibre/gui2/store/search/models.py
+++ b/src/calibre/gui2/store/search/models.py
@ -22,6 +22,7 @@ from calibre.utils.icu import sort_key
 from calibre.utils.search_query_parser import SearchQueryParser

 def comparable_price(text):
+    text = re.sub(r'[^0-9.,]', '', text)
    if len(text) < 3 or text[-3] not in ('.', ','):
        text += '00'
    text = re.sub(r'\D', '', text)
@ -293,6 +294,7 @@ class SearchFilter(SearchQueryParser):
        return self.srs

    def get_matches(self, location, query):
+        query = query.strip()
        location = location.lower().strip()
        if location == 'authors':
            location = 'author'
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -22,6 +22,7 @@ from calibre.gui2.store.search.adv_search_builder import AdvSearchBuilderDialog
 from calibre.gui2.store.search.download_thread import SearchThreadPool, \
    CacheUpdateThreadPool
 from calibre.gui2.store.search.search_ui import Ui_Dialog
+from calibre.utils.filenames import ascii_filename

 class SearchDialog(QDialog, Ui_Dialog):

@ -350,6 +351,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        if d.exec_() == d.Accepted:
            ext = d.format()
            fname = result.title + '.' + ext.lower()
+            fname = ascii_filename(fname)
            self.gui.download_ebook(result.downloads[ext], filename=fname)
    
    def open_store(self, result):
--- a/src/calibre/gui2/store/stores/smashwords_plugin.py
+++ b/src/calibre/gui2/store/stores/smashwords_plugin.py
@ -77,9 +77,12 @@ class SmashwordsStore(BasicStoreConfig, StorePlugin):
                title = ''.join(data.xpath('//a[@class="bookTitle"]/text()'))
                subnote = ''.join(data.xpath('//span[@class="subnote"]/text()'))
                author = ''.join(data.xpath('//span[@class="subnote"]/a/text()'))
+                if '$' in subnote:
                    price = subnote.partition('$')[2]
                    price = price.split(u'\xa0')[0]
                    price = '$' + price
+                else:
+                    price = '$0.00'

                counter -= 1
                
--- a/src/calibre/manual/develop.rst
+++ b/src/calibre/manual/develop.rst
@ -9,7 +9,7 @@ Setting up a |app| development environment
 This means that you are free to download and modify the program to your hearts content. In this section, 
 you will learn how to get a |app| development environment setup on the operating system of your choice. 
 |app| is written primarily in `Python <http://www.python.org>`_ with some C/C++ code for speed and system interfacing. 
-Note that |app| is not compatible with Python 3 and requires at least Python 2.6.
+Note that |app| is not compatible with Python 3 and requires at least Python 2.7.

 .. contents:: Contents
  :depth: 2
@ -122,7 +122,7 @@ Setting this environment variable means that |app| will now load all its Python
 That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src\\calibre\\__init__.py`
 in your favorite editor and add the line::
    
-    print "Hello, world!"
+    print ("Hello, world!")

 near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``.

@ -151,7 +151,7 @@ Setting this environment variable means that |app| will now load all its Python
 That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src/calibre/__init__.py`
 in your favorite editor and add the line::
    
-    print "Hello, world!"
+    print ("Hello, world!")

 near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``.

@ -161,7 +161,8 @@ Linux development environment
 |app| is primarily developed on linux. You have two choices in setting up the development environment. You can install the
 |app| binary as normal and use that as a runtime environment to do your development. This approach is similar to that
 used in windows and OS X. Alternatively, you can install |app| from source. Instructions for setting up a development
-environment from source are in the INSTALL file in the source tree. Here we will address using the binary a runtime.
+environment from source are in the INSTALL file in the source tree. Here we will address using the binary a runtime, which is the
+recommended method.

 Install the |app| using the binary installer. Then open a terminal and change to the previously checked out |app| code directory, for example::

@ -183,7 +184,7 @@ Setting this environment variable means that |app| will now load all its Python
 That's it! You are now ready to start hacking on the |app| code. For example, open the file :file:`src/calibre/__init__.py`
 in your favorite editor and add the line::
    
-    print "Hello, world!"
+    print ("Hello, world!")

 near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``.

@ -193,9 +194,9 @@ Having separate "normal" and "development" |app| installs on the same computer
 The calibre source tree is very stable, it rarely breaks, but if you feel the need to run from source on a separate
 test library and run the released calibre version with your everyday library, you can achieve this easily using
 .bat files or shell scripts to launch |app|. The example below shows how to do this on windows using .bat files (the
-instructions for other platforms are the same, just use a BASh script instead of a .bat file)
+instructions for other platforms are the same, just use a BASH script instead of a .bat file)

-To launch the relase version of |app| with your everyday library:
+To launch the release version of |app| with your everyday library:

 calibre-normal.bat::

--- a/src/calibre/ptempfile.py
+++ b/src/calibre/ptempfile.py
@ -72,8 +72,19 @@ class PersistentTemporaryFile(object):
            prefix = ""
        if dir is None:
            dir = base_dir()
+        try:
            fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
                                    dir=dir)
+        except UnicodeDecodeError:
+            global _base_dir
+            from calibre.constants import filesystem_encoding
+            base_dir()
+            if not isinstance(_base_dir, unicode):
+                _base_dir = _base_dir.decode(filesystem_encoding)
+            dir = dir.decode(filesystem_encoding)
+            fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
+                                    dir=dir)
+
        self._file = os.fdopen(fd, mode)
        self._name = name
        self._fd = fd
--- a/src/calibre/utils/date.py
+++ b/src/calibre/utils/date.py
@ -132,6 +132,14 @@ def as_local_time(date_time, assume_utc=True):
                _local_tz)
    return date_time.astimezone(_local_tz)

+def as_utc(date_time, assume_utc=True):
+    if not hasattr(date_time, 'tzinfo'):
+        return date_time
+    if date_time.tzinfo is None:
+        date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
+                _local_tz)
+    return date_time.astimezone(_utc_tz)
+
 def now():
    return datetime.now().replace(tzinfo=_local_tz)