Merge from trunk

2025-08-30 23:00:21 -04:00 · 2012-04-04 11:20:25 +02:00 · 2012-04-04 11:20:25 +02:00 · 20ec5de3f4
commit 20ec5de3f4
parent 6c505b1242 60edd8c7b1
28 changed files with 989 additions and 312 deletions
--- a/recipes/ba_herald.recipe
+++ b/recipes/ba_herald.recipe
@ -0,0 +1,82 @@
+__license__   = 'GPL v3'
+__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.buenosairesherald.com
+'''
+
+import re
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class BuenosAiresHerald(BasicNewsRecipe):
+    title                 = 'Buenos Aires Herald'
+    __author__            = 'Darko Miletic'
+    description           = 'A world of information in a few words'
+    publisher             = 'Editorial Nefir S.A.'
+    category              = 'news, politics, Argentina'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en_AR'
+    remove_empty_feeds    = True
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.buenosairesherald.com/img/logo.jpg'
+    INDEX                 = 'http://www.buenosairesherald.com'
+    extra_css             = """
+                               body{font-family: Arial,Helvetica,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                               h1{font-family: Georgia,serif}
+                               #fecha{text-align: right; font-size: small}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags    = [dict(name=['meta','link','iframe'])]
+    keep_only_tags = [dict(attrs={'class':'nota_texto p'})]
+
+
+    feeds = [
+              (u'Argentina'      , u'http://www.buenosairesherald.com/argentina'     )
+             ,(u'World'          , u'http://www.buenosairesherald.com/world'         )
+             ,(u'Latin America'  , u'http://www.buenosairesherald.com/latin-america' )
+             ,(u'Entertainment'  , u'http://www.buenosairesherald.com/entertainment' )
+             ,(u'Sports'         , u'http://www.buenosairesherald.com/sports'        )
+            ]
+
+    def print_version(self, url):
+        artidraw = url.rpartition('/article/')[2]
+        artid = artidraw.partition('/')[0]
+        return 'http://www.buenosairesherald.com/articles/print.aspx?ix=' + artid
+
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for item in soup.findAll('div', attrs={'class':'nota_texto_seccion'}):
+                description = self.tag_to_string(item.h2)
+                atag = item.h2.find('a')
+                if atag and atag.has_key('href'):
+                    url         = self.INDEX + atag['href']
+                    title       = description
+                    date        = strftime(self.timefmt)
+                    articles.append({
+                                      'title'      :title
+                                     ,'date'       :date
+                                     ,'url'        :url
+                                     ,'description':description
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
--- a/recipes/icons/ba_herald.png
+++ b/recipes/icons/ba_herald.png
--- a/recipes/oreilly_premium.recipe
+++ b/recipes/oreilly_premium.recipe
@ -1,45 +1,73 @@
-# Talking Points is not grabbing everything.
-# The look is right, but only the last one added?
-import re
+import string, re
 import time
+import traceback
+# above for debugging via stack
 from calibre.web.feeds.recipes import BasicNewsRecipe
 # Allows the Python soup converter, which makes parsing easier.
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-# strip ads and graphics
-# Current Column lacks a title.
-# Talking Points Memo - shorten title - Remove year and Bill's name
-# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
-# Newsletters: Talking Points Memos covered by cat12

+import os, time, traceback, re, urlparse, sys, cStringIO
+from collections import defaultdict
+from functools import partial
+from contextlib import nested, closing
+
+
+from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
+from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
+
+
+# To Do: strip ads and graphics, Current Column lacks a title.
+# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
+# Newsletters: Talking Points Memos covered by cat12 
+# ./ebook-convert  --username xxx --password xxx
+
+# this is derived from BasicNewsRecipe, so it can only overload those.  
+# Soome of what we need is otherwise in article, so we have more copy to do than otherwise.
 class OReillyPremium(BasicNewsRecipe):
    title           = u'OReilly Premium'
    __author__      = 'TMcN'
-    language = 'en'
    description     = 'Retrieves Premium and News Letter content from BillOReilly.com.  Requires a Bill OReilly Premium Membership.'
    cover_url       = 'http://images.billoreilly.com/images/headers/billgray_header.png'
+    custom_title    = 'Bill O\'Reilly Premium - '+ time.strftime('%d %b %Y')
+    title           = 'Bill O\'Reilly Premium'
    auto_cleanup    = True
+    conversion_options = {'linearize_tables': True}
    encoding        = 'utf8'
-    needs_subscription = True
+    language        = 'en'
    no_stylesheets  = True
-    oldest_article  = 20
+    needs_subscription = True
+    oldest_article  = 31
    remove_javascript = True
    remove_tags     = [dict(name='img', attrs={})]
    # Don't go down
    recursions      = 0
-    max_articles_per_feed = 2000
-
+    max_articles_per_feed = 20
+    
    debugMessages   = True
-
+    
    # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
    catList = [ ["TV Archives",         'https://www.billoreilly.com/show?action=tvShowArchive', 'a',    {'class':['showLinks','homeLinks']},                   []],
-                ["No Spin Archives",    'https://www.billoreilly.com/blog?categoryID=7',         True,   {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
-                ["Daily Briefings",     'http://www.billoreilly.com/blog?categoryID=11',         True,   {'class':['defaultHeaderSmallLinks']},                 []],
-                ["Stratfor",            'http://www.billoreilly.com/blog?categoryID=5',          'a',    {'class':['blogLinks']},                               []],
-                ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12',        'td',   {},                                                    []],
+                # ["No Spin Archives",    'https://www.billoreilly.com/blog?categoryID=7',         True,   {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
+                # ["Daily Briefings",     'http://www.billoreilly.com/blog?categoryID=11',         True,   {'class':['defaultHeaderSmallLinks']},                 []],
+                # ["Stratfor",            'http://www.billoreilly.com/blog?categoryID=5',          'a',    {'class':['blogLinks']},                               []],
+                # ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12',        'td',   {},                                                    []],
                ["Current Column",      'https://www.billoreilly.com/currentcolumn',             'span', {'class':['defaultHeader']},                           []]
              ]
-
+              
+    feeds          = [
+        (u'No Spin',        u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=7'),
+        (u'Daily Briefing', u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=11'), 
+        (u'Talking Points', u'https://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=12'),
+        (u'Blog',           u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=0'),
+        (u'StratFor',       u'http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=5')
+    ]
+    # http://www.billoreilly.com/blog?action=blogArchive&rss=true&categoryID=8 is word for the day.              
+              
+    # Note: Talking Points is broken in the above model; the site changed to more Ajax-y.
+    # Now using RSS
+    
    def get_browser(self):
+        print("In get_browser")
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
@ -48,7 +76,7 @@ class OReillyPremium(BasicNewsRecipe):
            br['formPasswordField'] = self.password
            br.submit()
        return br
-
+        
    # Returns the best-guess print url.
    # The second parameter (pageURL) is returned if nothing is found.
    def extractPrintURL(self, baseURL, pageURL, printString):
@ -62,17 +90,19 @@ class OReillyPremium(BasicNewsRecipe):
            tag = printText.parent
            tagURL = baseURL+tag['href']
        return tagURL
-
+        
    def stripBadChars(self, inString) :
        return inString.replace("\'", "")
-
+        
+       
    def parseGeneric(self, baseURL):
-        # Does a generic parsing of the articles.  There are six categories (0-5)
+        # Does a generic parsing of the articles.  There are six categories (0-5) 
        # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
        # NoSpin and TV are generic
        fullReturn = []
-        for i in range(len(self.catList)) :
+        for i in range(len(self.catList)) : 
            articleList = []
+            print("In "+self.catList[i][0]+", index: "+ str(i))
            soup = self.index_to_soup(self.catList[i][1])
            # Set defaults
            description = 'None'
@ -80,15 +110,13 @@ class OReillyPremium(BasicNewsRecipe):
            # Problem: 0-2 create many in an array
            # 3-5 create one.
            # So no for-div for 3-5
-
-            if i < 3 :
+            
+            if i == 0 :
+                print("Starting TV Archives")
                for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
+                     print("Next DIV:")
                     print(div)
-                     if i == 1:
-                        a = div.find('a', href=True)
-                     else :
-                        a = div
-                     print(a)
+                     a = div
                     summary = div.find(True, attrs={'class':'summary'})
                     if summary:
                         description = self.tag_to_string(summary, use_alt=False)
@ -96,82 +124,63 @@ class OReillyPremium(BasicNewsRecipe):
                         continue
                     # url = baseURL+re.sub(r'\?.*', '', a['href'])
                     url = baseURL+a['href']
-                     if i < 2 :
-                        url = self.extractPrintURL(baseURL, url, "Print this entry")
-                        title = self.tag_to_string(a, use_alt=True).strip()
-                     elif i == 2 :
-                        # Daily Briefs
-                        url = self.extractPrintURL(baseURL, url, "Print this entry")
-                        title =  div.contents[0]
-                     if self.debugMessages :
-                        print(title+" @ "+url)
+                     url = self.extractPrintURL(baseURL, url, "Print this entry")
+                     title = self.tag_to_string(a, use_alt=True).strip()
                     articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))

-            elif i == 3 :   # Stratfor
-                a = soup.find('a', self.catList[i][3])
-                if a is None :
-                    continue
-                url = baseURL+a['href']
-                title = self.tag_to_string(a, use_alt=True).strip()
-                # Get Stratfor contents so we can get the real title.
-                stratSoup = self.index_to_soup(url)
-                title = stratSoup.html.head.title.string
-                stratIndex = title.find('Stratfor.com:', 0)
-                if (stratIndex > -1) :
-                    title = title[stratIndex+14:-1]
-                # Look for first blogBody  <td class="blogBody"
-                # Changed 12 Jan 2012 - new page format
-                #stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
-                #stratBody = stratSoup.find('td', {'class':['blogBody']})
-            elif i == 4 :      # Talking Points
-                topDate =  soup.find("td", "blogBody")
-                if not topDate :
-                    print("Failed to find date in Talking Points")
-                # This page has the contents in double-wrapped tables!
-                myTable = topDate.findParents('table')[0]
-                if myTable is not None:
-                    upOneTable = myTable.findParents('table')[0]
-                    if upOneTable is not None:
-                        upTwo = upOneTable.findParents('table')[0]
-                if upTwo is None:
-                    continue
-                # Now navigate rows of upTwo
-                if self.debugMessages :
-                    print("Entering rows")
-                for rows in upTwo.findChildren("tr", recursive=False):
-                    # Inside top level table, each row is an article
-                    rowTable = rows.find("table")
-                    articleTable = rowTable.find("table")
-                    # This looks wrong.
-                    articleTable = rows.find("tr")
-                    # The middle table is just for formatting the article buffer... but this means we can skip the inner table.
-                    blogDate = articleTable.find("a","blogDate").contents[0]
-                    # Skip to second blogBody for this.
-                    blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
-                    blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
-                    url = baseURL+re.sub(r'\?.*', '', blogURL)
-                    title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
-                    if self.debugMessages :
-                        print("Talking Points Memo title "+title+" at url: "+url)
-                    pubdate = time.strftime('%a, %d %b')
-                    articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
            else :       # Current Column
                titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
                if titleSpan is None :
+                    print("No Current Column Title Span")
+                    print(soup)
                    continue
                title = titleSpan.contents[0]
                url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
-            if i == 3 or i == 5 :
+            if i == 1 :
                 if self.debugMessages :
                    print(self.catList[i][0]+" Title:"+title+" at url: "+url)
                 summary = div.find(True, attrs={'class':'summary'})
-                 if summary:
+                 print("At Summary")
+                 print(summary)
+                 if summary is not None:
                     description = self.tag_to_string(summary, use_alt=False)
+                 print("At append")
                 articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
            self.catList[i][3] = articleList
            fullReturn.append((self.catList[i][0], articleList))
+            print("Returning")
+            # print fullReturn
        return fullReturn
+     

+    # build_index() starts with:
+    #     try:
+    #        feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
+    #                                 max_articles_per_feed=self.max_articles_per_feed,
+    #                                 log=self.log)
+    #        self.report_progress(0, _('Got feeds from index page'))
+    #    except NotImplementedError:
+    #        feeds = self.parse_feeds()
+    
+    # which in turn is from __init__.py
+    #def feeds_from_index(index, oldest_article=7, max_articles_per_feed=100,
+    #    log=default_log):
+    #'''
+    #@param index: A parsed index as returned by L{BasicNewsRecipe.parse_index}.
+    #@return: A list of L{Feed} objects.
+    #@rtype: list
+    #'''
+    #feeds = []
+    #for title, articles in index:
+    #    pfeed = Feed(log=log)
+    #    pfeed.populate_from_preparsed_feed(title, articles, oldest_article=oldest_article,
+    #                                   max_articles_per_feed=max_articles_per_feed)
+    #    feeds.append(pfeed)
+    #           return feeds
+    
+    #  use_embedded_content defaults to None, at which point if the content is > 2K, it is used as the article.
+
+            
    # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
    # returns a list of tuple ('feed title', list of articles)
    # {
@ -182,16 +191,148 @@ class OReillyPremium(BasicNewsRecipe):
    # 'content'     : The full article (can be an empty string). This is used by FullContentProfile
    # }
    # this is used instead of BasicNewsRecipe.parse_feeds().
+    # it is called by download
    def parse_index(self):
        # Parse the page into Python Soup
+        print("Entering recipe print_index from:")
+        traceback.print_stack()
+        print("web")
        baseURL = "https://www.billoreilly.com"
-        return self.parseGeneric(baseURL)
-
+        masterList = self.parseGeneric(baseURL)
+        #print(masterList)
+        return masterList
+        
    def preprocess_html(self, soup):
+        print("In preprocess_html")
        refresh = soup.find('meta', {'http-equiv':'refresh'})
        if refresh is None:
            return soup
        content = refresh.get('content').partition('=')[2]
        raw = self.browser.open('https://www.billoreilly.com'+content).read()
        return BeautifulSoup(raw.decode('cp1252', 'replace'))
+    
+    def build_index(self):
+        print("In OReilly build_index()\n\n")
+        feedsRSS = []
+        self.report_progress(0, _('Fetching feeds...'))
+        #try:
+        feeds = feeds_from_index(self.parse_index(), oldest_article=self.oldest_article,
+                                 max_articles_per_feed=self.max_articles_per_feed,
+                                 log=self.log)
+        self.report_progress(0, _('Got feeds from index page'))
+        #except NotImplementedError:
+        #    feeds = self.parse_feeds()
+        # Now add regular feeds.    
+        feedsRSS = self.parse_feeds()
+        print ("feedsRSS is type "+feedsRSS.__class__.__name__)
+        
+        for articles in feedsRSS:
+            print("articles is type "+articles.__class__.__name__)
+            print("Title:" + articles.title)
+            feeds.append(articles)
+        if not feeds:
+            raise ValueError('No articles found, aborting')
+
+        #feeds = FeedCollection(feeds)
+
+        self.report_progress(0, _('Trying to download cover...'))
+        self.download_cover()
+        self.report_progress(0, _('Generating masthead...'))
+        self.masthead_path = None
+
+        try:
+            murl = self.get_masthead_url()
+        except:
+            self.log.exception('Failed to get masthead url')
+            murl = None
+
+        if murl is not None:
+            # Try downloading the user-supplied masthead_url
+            # Failure sets self.masthead_path to None
+            self.download_masthead(murl)
+        if self.masthead_path is None:
+            self.log.info("Synthesizing mastheadImage")
+            self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
+            try:
+                self.default_masthead_image(self.masthead_path)
+            except:
+                self.log.exception('Failed to generate default masthead image')
+                self.masthead_path = None
+
+        if self.test:
+            feeds = feeds[:2]
+        self.has_single_feed = len(feeds) == 1
+
+        index = os.path.join(self.output_dir, 'index.html')
+
+        html = self.feeds2index(feeds)
+        with open(index, 'wb') as fi:
+            fi.write(html)
+
+        self.jobs = []
+
+        if self.reverse_article_order:
+            for feed in feeds:
+                if hasattr(feed, 'reverse'):
+                    feed.reverse()
+
+        self.feed_objects = feeds
+        for f, feed in enumerate(feeds):
+            feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
+            if not os.path.isdir(feed_dir):
+                os.makedirs(feed_dir)
+
+            for a, article in enumerate(feed):
+                if a >= self.max_articles_per_feed:
+                    break
+                art_dir = os.path.join(feed_dir, 'article_%d'%a)
+                if not os.path.isdir(art_dir):
+                    os.makedirs(art_dir)
+                try:
+                    url = self.print_version(article.url)
+                except NotImplementedError:
+                    url = article.url
+                except:
+                    self.log.exception('Failed to find print version for: '+article.url)
+                    url = None
+                if not url:
+                    continue
+                func, arg = (self.fetch_embedded_article, article) \
+                            if self.use_embedded_content or (self.use_embedded_content == None and feed.has_embedded_content()) \
+                            else \
+                            ((self.fetch_obfuscated_article if self.articles_are_obfuscated \
+                              else self.fetch_article), url)
+                req = WorkRequest(func, (arg, art_dir, f, a, len(feed)),
+                                      {}, (f, a), self.article_downloaded,
+                                      self.error_in_article_download)
+                req.feed = feed
+                req.article = article
+                req.feed_dir = feed_dir
+                self.jobs.append(req)
+
+
+        self.jobs_done = 0
+        tp = ThreadPool(self.simultaneous_downloads)
+        for req in self.jobs:
+            tp.putRequest(req, block=True, timeout=0)
+
+
+        self.report_progress(0, _('Starting download [%d thread(s)]...')%self.simultaneous_downloads)
+        while True:
+            try:
+                tp.poll()
+                time.sleep(0.1)
+            except NoResultsPending:
+                break
+        for f, feed in enumerate(feeds):
+            print("Writing feeds for "+feed.title)
+            html = self.feed2index(f,feeds)
+            feed_dir = os.path.join(self.output_dir, 'feed_%d'%f)
+            with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi:
+                fi.write(html)
+        self.create_opf(feeds)
+        self.report_progress(1, _('Feeds downloaded to %s')%index)
+
+        return index
+    

--- a/recipes/real_clear.recipe
+++ b/recipes/real_clear.recipe
@ -1,7 +1,9 @@
 #  Test with "\Program Files\Calibre2\ebook-convert.exe" RealClear.recipe .epub --test -vv --debug-pipeline debug
+import string, re
 import time
+from urlparse import urlparse
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import NavigableString
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString

 class RealClear(BasicNewsRecipe):
    title           = u'Real Clear'
@ -20,12 +22,13 @@ class RealClear(BasicNewsRecipe):
    # Don't go down
    recursions      = 0
    max_articles_per_feed = 400
-    debugMessages = False
-
-    # Numeric parameter is type, controls whether we look for
+    debugMessages = True
+    
+    # Numeric parameter is type, controls whether we look for 
    feedsets = [
-                ["Politics",        "http://www.realclearpolitics.com/index.xml", 0],
-                ["Science",         "http://www.realclearscience.com/index.xml", 0],
+                ["Politics",        "http://www.realclearpolitics.com/index.xml",   0],
+                ["Policy",           "http://www.realclearpolicy.com/index.xml", 0],
+                ["Science",         "http://www.realclearscience.com/index.xml",    0],
                ["Tech",            "http://www.realcleartechnology.com/index.xml", 0],
                # The feedburner is essentially the same as the top feed, politics.
                # ["Politics Burner", "http://feeds.feedburner.com/realclearpolitics/qlMj", 1],
@ -37,22 +40,37 @@ class RealClear(BasicNewsRecipe):
            ]
    # Hints to extractPrintURL.
    # First column is the URL snippet.  Then the string to search for as text, and the attributes to look for above it.  Start with attributes and drill down.
-    printhints = [
+    phUrlSnip, phLinkText, phMainSearch, phHrefSearch = range(4)
+    
+    printhints = [  ["realclear",           "",                            '' , 'printpage'],
                    ["billoreilly.com",     "Print this entry",            'a', ''],
                    ["billoreilly.com",     "Print This Article",          'a', ''],
-                    ["politico.com",        "Print",                       'a', 'share-print'],
+                    ["politico.com",        "Print",                       'a', 'share-print'],    
                    ["nationalreview.com",  ">Print<",                     'a', ''],
                    ["reason.com",          "",                       'a', 'printer']
                    # The following are not supported due to JavaScripting, and would require obfuscated_article to handle
-                    # forbes,
+                    # forbes, 
                    # usatoday - just prints with all current crap anyhow
-
+            
            ]
-
+     # RCP - look for a strange compound.  See http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879.html
+     # The print link isn't obvious, and only the end is needed (the -full append.)  SO maybe try that first?s
+     # http://www.realclearpolitics.com/printpage/?url=http://www.realclearpolitics.com/articles/2012/01/24/in_speech_obama_to_call_for_fairness_--_and_four_more_years_112879-full.html
+     # Single page articles don't have a _full; e.g. http://www.realclearpolitics.com/articles/2012/01/25/obamas_green_robber_barons_112897.html
+     # Use the FULL PRINTPAGE URL; it formats it better too!
+     #
+     # NYT - try single page...
+     # Need special code - is it one page or several?  Which URL?
+     # from  http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1
+     # to    http://www.nytimes.com/2012/01/22/business/apple-america-and-a-squeezed-middle-class.html?_r=1&pagewanted=all
+     # which is at link rel="canonical"   and at        <meta property="og:url"    or look for "Single Page"
+     
    # Returns the best-guess print url.
    # The second parameter (pageURL) is returned if nothing is found.
    def extractPrintURL(self, pageURL):
        tagURL = pageURL
+        baseParse = urlparse(pageURL)
+        baseURL = baseParse[0]+"://"+baseParse[1]
        hintsCount =len(self.printhints)
        for x in range(0,hintsCount):
            if pageURL.find(self.printhints[x][0])== -1 :
@ -62,23 +80,37 @@ class RealClear(BasicNewsRecipe):
            soup = self.index_to_soup(pageURL)
            if soup is None:
                return pageURL
-            if len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
+            if  len(self.printhints[x][self.phHrefSearch])>0 and len(self.printhints[x][self.phLinkText]) == 0:
+                # e.g. RealClear
                if self.debugMessages == True :
-                    print("search1")
+                    print("Search by href: "+self.printhints[x][self.phHrefSearch])
+                printFind = soup.find(href=re.compile(self.printhints[x][self.phHrefSearch]))
+            elif len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
+                if self.debugMessages == True :
+                    print("Search 1: "+self.printhints[x][2]+" Attributes: ")
+                    print(self.printhints[x][3])
                printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
            elif  len(self.printhints[x][3])>0 :
                if self.debugMessages == True :
                    print("search2")
                printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
            else :
+                if self.debugMessages == True:
+                    print("Default Search: "+self.printhints[x][2]+" Text: "+self.printhints[x][1])
                printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
            if printFind is None:
                if self.debugMessages == True :
                    print("Not Found")
+                    # print(soup)
+                    print("end soup\n\n");
                continue
+                
            print(printFind)
            if isinstance(printFind, NavigableString)==False:
                if printFind['href'] is not None:
+                    print("Check "+printFind['href']+" for base of "+baseURL)
+                    if printFind['href'].find("http")!=0 :
+                        return baseURL+printFind['href']
                    return printFind['href']
            tag = printFind.parent
            print(tag)
@ -98,7 +130,7 @@ class RealClear(BasicNewsRecipe):
            print("In get_browser")
        br = BasicNewsRecipe.get_browser()
        return br
-
+        
    def parseRSS(self, index) :
        if self.debugMessages == True :
            print("\n\nStarting "+self.feedsets[index][0])
@ -128,7 +160,7 @@ class RealClear(BasicNewsRecipe):
            pubDateEl = div.find("pubDate")
            if pubDateEl is None :
                pubDateEl = div.find("pubdate")
-            if pubDateEl is None :
+            if pubDateEl is None :    
                pubDate = time.strftime('%a, %d %b')
            else :
                pubDate = pubDateEl.contents[0]
@ -144,7 +176,7 @@ class RealClear(BasicNewsRecipe):
            pubdate = time.strftime('%a, %d %b')
            articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
        return articleList
-
+    
    # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
    # returns a list of tuple ('feed title', list of articles)
    # {
@ -157,7 +189,8 @@ class RealClear(BasicNewsRecipe):
    # this is used instead of BasicNewsRecipe.parse_feeds().
    def parse_index(self):
        # Parse the page into Python Soup
-
+        
+        articleList = []
        ans = []
        feedsCount = len(self.feedsets)
        for x in range(0,feedsCount): # should be ,4
@ -167,4 +200,5 @@ class RealClear(BasicNewsRecipe):
        if self.debugMessages == True :
            print(ans)
        return ans
+        

--- a/recipes/soldiers.recipe
+++ b/recipes/soldiers.recipe
@ -15,6 +15,8 @@ class Soldiers(BasicNewsRecipe):
    max_articles_per_feed  = 100
    no_stylesheets         = True
    use_embedded_content   = False
+    auto_cleanup = True
+    auto_cleanup_keep = '//div[@id="mediaWrapper"]'
    simultaneous_downloads = 1
    delay                  = 4
    max_connections        = 1    
@ -31,14 +33,14 @@ class Soldiers(BasicNewsRecipe):
                        , 'language'         : language
                        }

-    keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
+    #keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
                     
-    remove_tags = [
-                     dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
-                    ,dict(name=['object','link'])
-                  ]
+    #remove_tags = [
+                     #dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
+                    #,dict(name=['object','link'])
+                  #]
                            
-    feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
+    feeds = [(u'Frontpage', u'http://www.army.mil/rss/2/' )]


    def get_cover_url(self):
--- a/recipes/southernstar.recipe
+++ b/recipes/southernstar.recipe
@ -0,0 +1,136 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, watou'
+'''
+southernstar.ie
+'''
+import re
+import tempfile
+import os
+import codecs
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag, NavigableString
+
+class TheSouthernStar(BasicNewsRecipe):
+
+    title      = 'The Southern Star'
+    __author__ = 'watou'
+    description = 'West Cork\'s leading news and information provider since 1889'
+    NEWS_INDEX = 'http://www.southernstar.ie/news.php'
+    LOCAL_NOTES = 'http://www.southernstar.ie/localnotes.php'
+    SPORT_INDEX = 'http://www.southernstar.ie/sport.php'
+    CLASSIFIEDS = 'http://www.southernstar.ie/classifieds.php'
+    language = 'en_IE'
+    encoding = 'cp1252'
+
+    publication_type   = 'newspaper'
+    masthead_url       = 'http://www.southernstar.ie/images/logo.gif'
+    remove_tags_before = dict(name='div', attrs={'class':'article'})
+    remove_tags_after  = dict(name='div', attrs={'class':'article'})
+    remove_tags        = [dict(name='div', attrs={'style':'width:300px; position:relative'}),
+        dict(name='form'),
+        dict(name='div', attrs={'class':'endpanel'})]
+    no_stylesheets = True
+    tempfiles = []
+    pubdate = ''
+
+    preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+
+    def parse_index(self):
+        feeds = []
+        seen_titles = set([])
+
+        articles = self.fetch_ss_articles(self.NEWS_INDEX, seen_titles)
+        if articles:
+            feeds.append(('News', articles))
+
+        articles = self.fetch_ss_notes(self.LOCAL_NOTES)
+        if articles:
+            feeds.append(('Local Notes', articles))
+
+        articles = self.fetch_ss_articles(self.SPORT_INDEX, seen_titles)
+        if articles:
+            feeds.append(('Sport', articles))
+
+        articles = self.fetch_ss_notes(self.CLASSIFIEDS)
+        if articles:
+            feeds.append(('Classifieds', articles))
+
+        return feeds
+
+    def fetch_ss_articles(self, index, seen_titles):
+        articles = []
+        soup = self.index_to_soup(index)
+        ts = soup.find('div', {'class':'article'})
+        ds = self.tag_to_string(ts.find('strong'))
+        self.pubdate = ' ['+ds+']'
+        self.timefmt = ' [%s]'%ds
+
+        for post in ts.findAll('h1'):
+            a = post.find('a', href=True)
+            title = self.tag_to_string(a)
+            if title in seen_titles:
+                continue
+            seen_titles.add(title)
+            url = a['href']
+            if url.startswith('article'):
+                url = 'http://www.southernstar.ie/'+url
+            self.log('\tFound article:', title, 'at', url)
+            p = post.findNextSibling('p')
+            desc = None
+            if p is not None:
+                desc = str(p)
+            articles.append({'title':title, 'url':url, 'description':desc,
+                'date':self.pubdate})
+
+        return articles
+
+    def fetch_ss_notes(self, page):
+        articles = []
+
+        soup = self.index_to_soup(page)
+        ts = soup.find('div', {'class':'content'})
+        for post in ts.findAll('h1'):
+            title = self.tag_to_string(post)
+            self.log('\tFound note:', title)
+            f = tempfile.NamedTemporaryFile(suffix='.html',delete=False)
+            f.close()
+            f = codecs.open(f.name, 'w+b', self.encoding, 'replace')
+            url = "file://" + f.name
+            f.write(u'<html><head><meta http-equiv="Content-Type" content="text/html; charset='+
+                self.encoding+'"></head><body><h1>'+title+'</h1>')
+            f.write(str(post.findNextSibling('p')))
+            f.write(u'</body></html>')
+            self.log('\tWrote note to', f.name)
+            f.close()
+            self.tempfiles.append(f)
+            articles.append({'title':title, 'url':url, 'date':self.pubdate})
+
+        return articles
+
+    def postprocess_html(self, soup, first):
+        for table in soup.findAll('table', align='right'):
+            img = table.find('img')
+            if img is not None:
+                img.extract()
+                caption = self.tag_to_string(table).strip()
+                div = Tag(soup, 'div')
+                div['style'] = 'text-align:center'
+                div.insert(0, img)
+                div.insert(1, Tag(soup, 'br'))
+                if caption:
+                    div.insert(2, NavigableString(caption))
+                table.replaceWith(div)
+
+        return soup
+
+    def image_url_processor(self, baseurl, url):
+        return url.replace(' ','%20')
+
+    def cleanup(self):
+        self.log('cleaning up')
+        for f in self.tempfiles:
+            os.unlink(f.name)
+        self.tempfiles = []
--- a/setup/installer/windows/freeze.py
+++ b/setup/installer/windows/freeze.py
@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
 from setup.installer.windows.wix import WixMixIn

 OPENSSL_DIR = r'Q:\openssl'
-QT_DIR = 'Q:\\Qt\\4.8.0'
+QT_DIR = 'Q:\\Qt\\4.8.1'
 QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
 LIBUNRAR         = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
 SW               = r'C:\cygwin\home\kovid\sw'
--- a/src/calibre/ebooks/conversion/plugins/mobi_input.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py
@ -32,6 +32,7 @@ class MOBIInput(InputFormatPlugin):

    def convert(self, stream, options, file_ext, log,
                accelerators):
+        self.is_kf8 = False

        if os.environ.get('USE_MOBIUNPACK', None) is not None:
            pos = stream.tell()
@ -62,6 +63,7 @@ class MOBIInput(InputFormatPlugin):
            mr = Mobi8Reader(mr, log)
            opf = os.path.abspath(mr())
            self.encrypted_fonts = mr.encrypted_fonts
+            self.is_kf8 = True
            return opf

        raw = parse_cache.pop('calibre_raw_mobi_markup', False)
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -535,7 +535,7 @@ class OPF(object): # {{{
        series_index    = MetadataField('series_index', is_dc=False,
                                        formatter=float, none_is=1)
    title_sort      = TitleSortField('title_sort', is_dc=False)
-    rating          = MetadataField('rating', is_dc=False, formatter=int)
+    rating          = MetadataField('rating', is_dc=False, formatter=float)
    pubdate         = MetadataField('date', formatter=parse_date,
                                    renderer=isoformat)
    publication_type = MetadataField('publication_type', is_dc=False)
@ -883,6 +883,8 @@ class OPF(object): # {{{
                    val = etree.tostring(x, with_tail=False, encoding=unicode,
                            method='text').strip()
                    if val and typ not in ('calibre', 'uuid'):
+                        if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
+                            val = val[len('urn:isbn:'):]
                        identifiers[typ] = val
                    found_scheme = True
                    break
--- a/src/calibre/ebooks/metadata/sources/worker.py
+++ b/src/calibre/ebooks/metadata/sources/worker.py
@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+from threading import Event
+from io import BytesIO
+
+from calibre.utils.date import as_utc
+from calibre.ebooks.metadata.sources.identify import identify, msprefs
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.customize.ui import metadata_plugins
+from calibre.ebooks.metadata.sources.covers import download_cover
+from calibre.utils.logging import GUILog
+from calibre.ebooks.metadata.opf2 import metadata_to_opf, OPF
+
+def merge_result(oldmi, newmi, ensure_fields=None):
+    dummy = Metadata(_('Unknown'))
+    for f in msprefs['ignore_fields']:
+        if ':' in f or (ensure_fields and f in ensure_fields):
+            continue
+        setattr(newmi, f, getattr(dummy, f))
+    fields = set()
+    for plugin in metadata_plugins(['identify']):
+        fields |= plugin.touched_fields
+
+    def is_equal(x, y):
+        if hasattr(x, 'tzinfo'):
+            x = as_utc(x)
+        if hasattr(y, 'tzinfo'):
+            y = as_utc(y)
+        return x == y
+
+    for f in fields:
+        # Optimize so that set_metadata does not have to do extra work later
+        if not f.startswith('identifier:'):
+            if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
+                    getattr(oldmi, f))):
+                setattr(newmi, f, getattr(dummy, f))
+
+    return newmi
+
+def main(do_identify, covers, metadata, ensure_fields):
+    failed_ids = set()
+    failed_covers = set()
+    all_failed = True
+    log = GUILog()
+
+    for book_id, mi in metadata.iteritems():
+        mi = OPF(BytesIO(mi), basedir=os.getcwdu(),
+                populate_spine=False).to_book_metadata()
+        title, authors, identifiers = mi.title, mi.authors, mi.identifiers
+        cdata = None
+        log.clear()
+
+        if do_identify:
+            results = []
+            try:
+                results = identify(log, Event(), title=title, authors=authors,
+                    identifiers=identifiers)
+            except:
+                pass
+            if results:
+                all_failed = False
+                mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
+                identifiers = mi.identifiers
+                if not mi.is_null('rating'):
+                    # set_metadata expects a rating out of 10
+                    mi.rating *= 2
+                with open('%d.mi'%book_id, 'wb') as f:
+                    f.write(metadata_to_opf(mi, default_lang='und'))
+            else:
+                log.error('Failed to download metadata for', title)
+                failed_ids.add(book_id)
+
+        if covers:
+            cdata = download_cover(log, title=title, authors=authors,
+                    identifiers=identifiers)
+            if cdata is None:
+                failed_covers.add(book_id)
+            else:
+                with open('%d.cover'%book_id, 'wb') as f:
+                    f.write(cdata[-1])
+                all_failed = False
+
+        with open('%d.log'%book_id, 'wb') as f:
+            f.write(log.plain_text.encode('utf-8'))
+
+    return failed_ids, failed_covers, all_failed
+
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -217,6 +217,10 @@ class EbookIterator(object):
        if hasattr(self.pathtoopf, 'manifest'):
            self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

+        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
+        if getattr(plumber.input_plugin, 'is_kf8', False):
+            self.book_format = 'KF8'
+
        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os
+import os, shutil
 from functools import partial

 from PyQt4.Qt import QMenu, QModelIndex, QTimer
@ -16,6 +16,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.device_category_editor import DeviceCategoryEditor
 from calibre.gui2.actions import InterfaceAction
 from calibre.ebooks.metadata import authors_to_string
+from calibre.ebooks.metadata.opf2 import OPF
 from calibre.utils.icu import sort_key
 from calibre.db.errors import NoSuchFormat

@ -79,17 +80,27 @@ class EditMetadataAction(InterfaceAction):
                Dispatcher(self.metadata_downloaded),
                ensure_fields=ensure_fields)

+    def cleanup_bulk_download(self, tdir):
+        try:
+            shutil.rmtree(tdir, ignore_errors=True)
+        except:
+            pass
+
    def metadata_downloaded(self, job):
        if job.failed:
            self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
            return
        from calibre.gui2.metadata.bulk_download import get_job_details
-        id_map, failed_ids, failed_covers, all_failed, det_msg = \
-                                            get_job_details(job)
+        (aborted, id_map, tdir, log_file, failed_ids, failed_covers, all_failed,
+                det_msg, lm_map) = get_job_details(job)
+        if aborted:
+            return self.cleanup_bulk_download(tdir)
        if all_failed:
+            num = len(failed_ids | failed_covers)
+            self.cleanup_bulk_download(tdir)
            return error_dialog(self.gui, _('Download failed'),
            _('Failed to download metadata or covers for any of the %d'
-               ' book(s).') % len(id_map), det_msg=det_msg, show=True)
+               ' book(s).') % num, det_msg=det_msg, show=True)

        self.gui.status_bar.show_message(_('Metadata download completed'), 3000)

@ -103,28 +114,27 @@ class EditMetadataAction(InterfaceAction):
            msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
                    ' "Show details" to see which books.')%num

-        payload = (id_map, failed_ids, failed_covers)
+        payload = (id_map, tdir, log_file, lm_map)
        from calibre.gui2.dialogs.message_box import ProceedNotification
        p = ProceedNotification(self.apply_downloaded_metadata,
-                payload, job.html_details,
+                payload, log_file,
                _('Download log'), _('Download complete'), msg,
                det_msg=det_msg, show_copy_button=show_copy_button,
-                parent=self.gui)
+                cancel_callback=lambda x:self.cleanup_bulk_download(tdir),
+                parent=self.gui, log_is_file=True)
        p.show()

    def apply_downloaded_metadata(self, payload):
-        id_map, failed_ids, failed_covers = payload
-        id_map = dict([(k, v) for k, v in id_map.iteritems() if k not in
-            failed_ids])
-        if not id_map:
+        good_ids, tdir, log_file, lm_map = payload
+        if not good_ids:
            return

        modified = set()
        db = self.gui.current_db

-        for i, mi in id_map.iteritems():
+        for i in good_ids:
            lm = db.metadata_last_modified(i, index_is_id=True)
-            if lm > mi.last_modified:
+            if lm > lm_map[i]:
                title = db.title(i, index_is_id=True)
                authors = db.authors(i, index_is_id=True)
                if authors:
@ -144,7 +154,18 @@ class EditMetadataAction(InterfaceAction):
                        'Do you want to proceed?'), det_msg='\n'.join(modified)):
                return

-        self.apply_metadata_changes(id_map)
+        id_map = {}
+        for bid in good_ids:
+            opf = os.path.join(tdir, '%d.mi'%bid)
+            if not os.path.exists(opf):
+                opf = None
+            cov = os.path.join(tdir, '%d.cover'%bid)
+            if not os.path.exists(cov):
+                cov = None
+            id_map[bid] = (opf, cov)
+
+        self.apply_metadata_changes(id_map, callback=lambda x:
+                self.cleanup_bulk_download(tdir))

    # }}}

@ -468,13 +489,18 @@ class EditMetadataAction(InterfaceAction):
        callback can be either None or a function accepting a single argument,
        in which case it is called after applying is complete with the list of
        changed ids.
+
+        id_map can also be a mapping of ids to 2-tuple's where each 2-tuple
+        contains the absolute paths to an OPF and cover file respectively. If
+        either of the paths is None, then the corresponding metadata is not
+        updated.
        '''
        if title is None:
            title = _('Applying changed metadata')
        self.apply_id_map = list(id_map.iteritems())
        self.apply_current_idx = 0
        self.apply_failures = []
-        self.applied_ids = []
+        self.applied_ids = set()
        self.apply_pd = None
        self.apply_callback = callback
        if len(self.apply_id_map) > 1:
@ -492,28 +518,49 @@ class EditMetadataAction(InterfaceAction):
            return self.finalize_apply()

        i, mi = self.apply_id_map[self.apply_current_idx]
+        if isinstance(mi, tuple):
+            opf, cover = mi
+            if opf:
+                mi = OPF(open(opf, 'rb'), basedir=os.path.dirname(opf),
+                        populate_spine=False).to_book_metadata()
+                self.apply_mi(i, mi)
+            if cover:
+                self.gui.current_db.set_cover(i, open(cover, 'rb'),
+                        notify=False, commit=False)
+                self.applied_ids.add(i)
+        else:
+            self.apply_mi(i, mi)
+
+        self.apply_current_idx += 1
+        if self.apply_pd is not None:
+            self.apply_pd.value += 1
+        QTimer.singleShot(50, self.do_one_apply)
+
+
+    def apply_mi(self, book_id, mi):
        db = self.gui.current_db
+
        try:
            set_title = not mi.is_null('title')
            set_authors = not mi.is_null('authors')
-            idents = db.get_identifiers(i, index_is_id=True)
+            idents = db.get_identifiers(book_id, index_is_id=True)
            if mi.identifiers:
                idents.update(mi.identifiers)
            mi.identifiers = idents
            if mi.is_null('series'):
                mi.series_index = None
            if self._am_merge_tags:
-                old_tags = db.tags(i, index_is_id=True)
+                old_tags = db.tags(book_id, index_is_id=True)
                if old_tags:
                    tags = [x.strip() for x in old_tags.split(',')] + (
                            mi.tags if mi.tags else [])
                    mi.tags = list(set(tags))
-            db.set_metadata(i, mi, commit=False, set_title=set_title,
+            db.set_metadata(book_id, mi, commit=False, set_title=set_title,
                    set_authors=set_authors, notify=False)
-            self.applied_ids.append(i)
+            self.applied_ids.add(book_id)
        except:
            import traceback
-            self.apply_failures.append((i, traceback.format_exc()))
+            self.apply_failures.append((book_id, traceback.format_exc()))

        try:
            if mi.cover:
@ -521,11 +568,6 @@ class EditMetadataAction(InterfaceAction):
        except:
            pass

-        self.apply_current_idx += 1
-        if self.apply_pd is not None:
-            self.apply_pd.value += 1
-        QTimer.singleShot(50, self.do_one_apply)
-
    def finalize_apply(self):
        db = self.gui.current_db
        db.commit()
@ -550,7 +592,7 @@ class EditMetadataAction(InterfaceAction):
        if self.applied_ids:
            cr = self.gui.library_view.currentIndex().row()
            self.gui.library_view.model().refresh_ids(
-                self.applied_ids, cr)
+                list(self.applied_ids), cr)
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
            self.gui.tags_view.recount()
@ -559,7 +601,7 @@ class EditMetadataAction(InterfaceAction):
        self.apply_pd = None
        try:
            if callable(self.apply_callback):
-                self.apply_callback(self.applied_ids)
+                self.apply_callback(list(self.applied_ids))
        finally:
            self.apply_callback = None

--- a/src/calibre/gui2/dialogs/message_box.py
+++ b/src/calibre/gui2/dialogs/message_box.py
@ -160,7 +160,7 @@ class ProceedNotification(MessageBox): # {{{

    def __init__(self, callback, payload, html_log, log_viewer_title, title, msg,
            det_msg='', show_copy_button=False, parent=None,
-            cancel_callback=None):
+            cancel_callback=None, log_is_file=False):
        '''
        A non modal popup that notifies the user that a background task has
        been completed.
@ -175,12 +175,15 @@ class ProceedNotification(MessageBox): # {{{
        :param title: The title for this popup
        :param msg: The msg to display
        :param det_msg: Detailed message
+        :param log_is_file: If True the html_log parameter is interpreted as
+        the path to a file on disk containing the log encoded with utf-8
        '''
        MessageBox.__init__(self, MessageBox.QUESTION, title, msg,
                det_msg=det_msg, show_copy_button=show_copy_button,
                parent=parent)
        self.payload = payload
        self.html_log = html_log
+        self.log_is_file = log_is_file
        self.log_viewer_title = log_viewer_title

        self.vlb = self.bb.addButton(_('View log'), self.bb.ActionRole)
@ -192,7 +195,11 @@ class ProceedNotification(MessageBox): # {{{
        _proceed_memory.append(self)

    def show_log(self):
-        self.log_viewer = ViewLog(self.log_viewer_title, self.html_log,
+        log = self.html_log
+        if self.log_is_file:
+            with open(log, 'rb') as f:
+                log = f.read().decode('utf-8')
+        self.log_viewer = ViewLog(self.log_viewer_title, log,
                parent=self)

    def do_proceed(self, result):
--- a/src/calibre/gui2/jobs.py
+++ b/src/calibre/gui2/jobs.py
@ -402,7 +402,8 @@ class DetailView(QDialog, Ui_Dialog): # {{{
        self.setupUi(self)
        self.setWindowTitle(job.description)
        self.job = job
-        self.html_view = hasattr(job, 'html_details')
+        self.html_view = (hasattr(job, 'html_details') and not getattr(job,
+            'ignore_html_details', False))
        if self.html_view:
            self.log.setVisible(False)
        else:
--- a/src/calibre/gui2/metadata/bulk_download.py
+++ b/src/calibre/gui2/metadata/bulk_download.py
@ -7,22 +7,42 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import os, time, shutil
 from functools import partial
-from itertools import izip
-from threading import Event
+from threading import Thread

 from PyQt4.Qt import (QIcon, QDialog,
        QDialogButtonBox, QLabel, QGridLayout, QPixmap, Qt)

 from calibre.gui2.threaded_jobs import ThreadedJob
-from calibre.ebooks.metadata.sources.identify import identify, msprefs
-from calibre.ebooks.metadata.sources.covers import download_cover
-from calibre.ebooks.metadata.book.base import Metadata
-from calibre.customize.ui import metadata_plugins
-from calibre.ptempfile import PersistentTemporaryFile
-from calibre.utils.date import as_utc
+from calibre.ebooks.metadata.opf2 import metadata_to_opf
+from calibre.utils.ipc.simple_worker import fork_job, WorkerError
+from calibre.ptempfile import (PersistentTemporaryDirectory,
+        PersistentTemporaryFile)

 # Start download {{{
+
+class Job(ThreadedJob):
+
+    ignore_html_details = True
+
+    def consolidate_log(self):
+        self.consolidated_log = self.log.plain_text
+        self.log = None
+
+    def read_consolidated_log(self):
+        return self.consolidated_log
+
+    @property
+    def details(self):
+        if self.consolidated_log is None:
+            return self.log.plain_text
+        return self.read_consolidated_log()
+
+    @property
+    def log_file(self):
+        return open(self.download_debug_log, 'rb')
+
 def show_config(gui, parent):
    from calibre.gui2.preferences import show_config_widget
    show_config_widget('Sharing', 'Metadata download', parent=parent,
@ -104,19 +124,22 @@ def start_download(gui, ids, callback, ensure_fields=None):
    d.b.clicked.disconnect()
    if ret != d.Accepted:
        return
+    tf = PersistentTemporaryFile('_metadata_bulk.log')
+    tf.close()

-    for batch in split_jobs(ids):
-        job = ThreadedJob('metadata bulk download',
-            _('Download metadata for %d books')%len(batch),
-            download, (batch, gui.current_db, d.identify, d.covers,
-                ensure_fields), {}, callback)
-        gui.job_manager.run_threaded_job(job)
+    job = Job('metadata bulk download',
+        _('Download metadata for %d books')%len(ids),
+        download, (ids, tf.name, gui.current_db, d.identify, d.covers,
+            ensure_fields), {}, callback)
+    job.download_debug_log = tf.name
+    gui.job_manager.run_threaded_job(job)
    gui.status_bar.show_message(_('Metadata download started'), 3000)

 # }}}

 def get_job_details(job):
-    id_map, failed_ids, failed_covers, title_map, all_failed = job.result
+    (aborted, good_ids, tdir, log_file, failed_ids, failed_covers, title_map,
+            lm_map, all_failed) = job.result
    det_msg = []
    for i in failed_ids | failed_covers:
        title = title_map[i]
@ -126,92 +149,118 @@ def get_job_details(job):
            title += (' ' + _('(Failed cover)'))
        det_msg.append(title)
    det_msg = '\n'.join(det_msg)
-    return id_map, failed_ids, failed_covers, all_failed, det_msg
+    return (aborted, good_ids, tdir, log_file, failed_ids, failed_covers,
+            all_failed, det_msg, lm_map)

-def merge_result(oldmi, newmi, ensure_fields=None):
-    dummy = Metadata(_('Unknown'))
-    for f in msprefs['ignore_fields']:
-        if ':' in f or (ensure_fields and f in ensure_fields):
-            continue
-        setattr(newmi, f, getattr(dummy, f))
-    fields = set()
-    for plugin in metadata_plugins(['identify']):
-        fields |= plugin.touched_fields
+class HeartBeat(object):
+    CHECK_INTERVAL = 300 # seconds
+    ''' Check that the file count in tdir changes every five minutes '''

-    def is_equal(x, y):
-        if hasattr(x, 'tzinfo'):
-            x = as_utc(x)
-        if hasattr(y, 'tzinfo'):
-            y = as_utc(y)
-        return x == y
+    def __init__(self, tdir):
+        self.tdir = tdir
+        self.last_count = len(os.listdir(self.tdir))
+        self.last_time = time.time()

-    for f in fields:
-        # Optimize so that set_metadata does not have to do extra work later
-        if not f.startswith('identifier:'):
-            if (not newmi.is_null(f) and is_equal(getattr(newmi, f),
-                    getattr(oldmi, f))):
-                setattr(newmi, f, getattr(dummy, f))
+    def __call__(self):
+        if time.time() - self.last_time > self.CHECK_INTERVAL:
+            c = len(os.listdir(self.tdir))
+            if c == self.last_count:
+                return False
+            self.last_count = c
+            self.last_time = time.time()
+        return True

-    newmi.last_modified = oldmi.last_modified
+class Notifier(Thread):

-    return newmi
+    def __init__(self, notifications, title_map, tdir, total):
+        Thread.__init__(self)
+        self.daemon = True
+        self.notifications, self.title_map = notifications, title_map
+        self.tdir, self.total = tdir, total
+        self.seen = set()
+        self.keep_going = True

-def download(ids, db, do_identify, covers, ensure_fields,
+    def run(self):
+        while self.keep_going:
+            try:
+                names = os.listdir(self.tdir)
+            except:
+                pass
+            else:
+                for x in names:
+                    if x.endswith('.log'):
+                        try:
+                            book_id = int(x.partition('.')[0])
+                        except:
+                            continue
+                        if book_id not in self.seen and book_id in self.title_map:
+                            self.seen.add(book_id)
+                            self.notifications.put((
+                                float(len(self.seen))/self.total,
+                                _('Processed %s')%self.title_map[book_id]))
+            time.sleep(1)
+
+def download(all_ids, tf, db, do_identify, covers, ensure_fields,
        log=None, abort=None, notifications=None):
-    ids = list(ids)
-    metadata = [db.get_metadata(i, index_is_id=True, get_user_categories=False)
-        for i in ids]
+    batch_size = 10
+    batches = split_jobs(all_ids, batch_size=batch_size)
+    tdir = PersistentTemporaryDirectory('_metadata_bulk')
+    heartbeat = HeartBeat(tdir)
+
    failed_ids = set()
    failed_covers = set()
    title_map = {}
-    ans = {}
-    count = 0
+    lm_map = {}
+    ans = set()
    all_failed = True
-    '''
-    # Test apply dialog
-    all_failed = do_identify = covers = False
-    '''
-    for i, mi in izip(ids, metadata):
-        if abort.is_set():
-            log.error('Aborting...')
-            break
-        title, authors, identifiers = mi.title, mi.authors, mi.identifiers
-        title_map[i] = title
-        if do_identify:
-            results = []
+    aborted = False
+    count = 0
+    notifier = Notifier(notifications, title_map, tdir, len(all_ids))
+    notifier.start()
+
+    try:
+        for ids in batches:
+            if abort.is_set():
+                log.error('Aborting...')
+                break
+            metadata = {i:db.get_metadata(i, index_is_id=True,
+                get_user_categories=False) for i in ids}
+            for i in ids:
+                title_map[i] = metadata[i].title
+                lm_map[i] = metadata[i].last_modified
+            metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in
+                    metadata.iteritems()}
            try:
-                results = identify(log, Event(), title=title, authors=authors,
-                    identifiers=identifiers)
-            except:
-                pass
-            if results:
+                ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main',
+                        (do_identify, covers, metadata, ensure_fields),
+                        cwd=tdir, abort=abort, heartbeat=heartbeat, no_output=True)
+            except WorkerError as e:
+                if e.orig_tb:
+                    raise Exception('Failed to download metadata. Original '
+                            'traceback: \n\n'+e.orig_tb)
+                raise
+            count += batch_size
+
+            fids, fcovs, allf = ret['result']
+            if not allf:
                all_failed = False
-                mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
-                identifiers = mi.identifiers
-                if not mi.is_null('rating'):
-                    # set_metadata expects a rating out of 10
-                    mi.rating *= 2
-            else:
-                log.error('Failed to download metadata for', title)
-                failed_ids.add(i)
-                # We don't want set_metadata operating on anything but covers
-                mi = merge_result(mi, mi, ensure_fields=ensure_fields)
-        if covers:
-            cdata = download_cover(log, title=title, authors=authors,
-                    identifiers=identifiers)
-            if cdata is not None:
-                with PersistentTemporaryFile('.jpg', 'downloaded-cover-') as f:
-                    f.write(cdata[-1])
-                    mi.cover = f.name
-                all_failed = False
-            else:
-                failed_covers.add(i)
-        ans[i] = mi
-        count += 1
-        notifications.put((count/len(ids),
-            _('Downloaded %(num)d of %(tot)d')%dict(num=count, tot=len(ids))))
-    log('Download complete, with %d failures'%len(failed_ids))
-    return (ans, failed_ids, failed_covers, title_map, all_failed)
-
+            failed_ids = failed_ids.union(fids)
+            failed_covers = failed_covers.union(fcovs)
+            ans = ans.union(set(ids) - fids)
+            for book_id in ids:
+                lp = os.path.join(tdir, '%d.log'%book_id)
+                if os.path.exists(lp):
+                    with open(tf, 'ab') as dest, open(lp, 'rb') as src:
+                        dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] +
+                            '#'*20+'\n').encode('utf-8'))
+                        shutil.copyfileobj(src, dest)
+
+        if abort.is_set():
+            aborted = True
+        log('Download complete, with %d failures'%len(failed_ids))
+        return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map,
+                lm_map, all_failed)
+    finally:
+        notifier.keep_going = False


--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -161,10 +161,10 @@ class MetadataSingleDialogBase(ResizableDialog):
        self.manage_authors_button.clicked.connect(self.authors.manage_authors)

        self.series = SeriesEdit(self)
-        self.remove_unused_series_button = QToolButton(self)
-        self.remove_unused_series_button.setToolTip(
-               _('Remove unused series (Series that have no books)') )
-        self.remove_unused_series_button.clicked.connect(self.remove_unused_series)
+        self.clear_series_button = QToolButton(self)
+        self.clear_series_button.setToolTip(
+               _('Clear series') )
+        self.clear_series_button.clicked.connect(self.series.clear)
        self.series_index = SeriesIndexEdit(self, self.series)
        self.basic_metadata_widgets.extend([self.series, self.series_index])

@ -198,6 +198,7 @@ class MetadataSingleDialogBase(ResizableDialog):
        self.basic_metadata_widgets.append(self.identifiers)
        self.clear_identifiers_button = QToolButton(self)
        self.clear_identifiers_button.setIcon(QIcon(I('trash.png')))
+        self.clear_identifiers_button.setToolTip(_('Clear Ids'))
        self.clear_identifiers_button.clicked.connect(self.identifiers.clear)
        self.paste_isbn_button = QToolButton(self)
        self.paste_isbn_button.setToolTip('<p>' +
@ -303,17 +304,6 @@ class MetadataSingleDialogBase(ResizableDialog):
        self.title_sort.auto_generate()
        self.author_sort.auto_generate()

-    def remove_unused_series(self, *args):
-        self.db.remove_unused_series()
-        idx = self.series.current_val
-        self.series.clear()
-        self.series.initialize(self.db, self.book_id)
-        if idx:
-            for i in range(self.series.count()):
-                if unicode(self.series.itemText(i)) == idx:
-                    self.series.setCurrentIndex(i)
-                    break
-
    def tags_editor(self, *args):
        self.tags.edit(self.db, self.book_id)

@ -591,7 +581,7 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
        sto(self.title_sort, self.authors)
        create_row(1, self.authors, self.deduce_author_sort_button, self.author_sort)
        sto(self.author_sort, self.series)
-        create_row(2, self.series, self.remove_unused_series_button,
+        create_row(2, self.series, self.clear_series_button,
                self.series_index, icon='trash.png')
        sto(self.series_index, self.swap_title_author_button)
        sto(self.swap_title_author_button, self.manage_authors_button)
@ -756,7 +746,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
                   span=2, icon='auto_author_sort.png')
        create_row(3, self.author_sort, self.series)
        create_row(4, self.series, self.series_index,
-                   button=self.remove_unused_series_button, icon='trash.png')
+                   button=self.clear_series_button, icon='trash.png')
        create_row(5, self.series_index, self.tags)
        create_row(6, self.tags, self.rating, button=self.tags_editor_button)
        create_row(7, self.rating, self.pubdate)
@ -892,7 +882,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
                   span=2, icon='auto_author_sort.png')
        create_row(3, self.author_sort, self.series)
        create_row(4, self.series, self.series_index,
-                   button=self.remove_unused_series_button, icon='trash.png')
+                   button=self.clear_series_button, icon='trash.png')
        create_row(5, self.series_index, self.tags)
        create_row(6, self.tags, self.rating, button=self.tags_editor_button)
        create_row(7, self.rating, self.pubdate)
--- a/src/calibre/gui2/preferences/server.ui
+++ b/src/calibre/gui2/preferences/server.ui
@ -35,9 +35,7 @@
        <string>&lt;p&gt;If you leave the password blank, anyone will be able to
 access your book collection using the web interface.
 &lt;br&gt;
-&lt;p&gt;Note that passwords do not work with Android devices. 
-Leave this blank if you intend to use the server with an
- Android phone or tablet.</string>
+&lt;p&gt;Some devices have browsers that do not support authentication. If you are having trouble downloading files from the content server, try removing the password.</string>
       </property>
      </widget>
     </item>
@ -167,17 +165,13 @@ Leave this blank if you intend to use the server with an
        </font>
       </property>
       <property name="toolTip">
-        <string>&lt;p&gt;Because of a bug in Google's Android, setting a password
- will prevent the server from working with Android devices.
-&lt;br&gt;
-&lt;p&gt;Do not set a password if you plan to use the server with an
- Android phone or tablet.</string>
+        <string>&lt;p&gt;Some devices have browsers that do not support authentication. If you are having trouble downloading files from the content server, trying removing the password.</string>
       </property>
       <property name="styleSheet">
        <string notr="true">QLabel {color:red}</string>
       </property>
       <property name="text">
-        <string>Password incompatible with Android devices</string>
+        <string>Password incompatible with some devices</string>
       </property>
      </widget>
     </item>
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -241,12 +241,6 @@ def fetch_scheduled_recipe(arg): # {{{
    if 'output_profile' in ps:
        recs.append(('output_profile', ps['output_profile'],
            OptionRecommendation.HIGH))
-        # Disabled since apparently some people use
-        # K4PC and, surprise, surprise, it doesn't support
-        # indexed MOBIs.
-        #if ps['output_profile'] == 'kindle':
-        #    recs.append(('no_inline_toc', True,
-        #        OptionRecommendation.HIGH))

    lf = load_defaults('look_and_feel')
    if lf.get('base_font_size', 0.0) != 0.0:
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -822,7 +822,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
                        as_unicode(r), det_msg=worker.traceback, show=True)
            self.close_progress_indicator()
        else:
-            self.metadata.show_opf(self.iterator.opf, os.path.splitext(pathtoebook)[1][1:])
+            self.metadata.show_opf(self.iterator.opf,
+                    self.iterator.book_format)
            self.view.current_language = self.iterator.language
            title = self.iterator.opf.title
            if not title:
@ -849,7 +850,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
            self.current_book_has_toc = bool(self.iterator.toc)
            self.current_title = title
            self.setWindowTitle(self.base_window_title+' - '+title +
-                    ' [%s]'%os.path.splitext(pathtoebook)[1][1:].upper())
+                    ' [%s]'%self.iterator.book_format)
            self.pos.setMaximum(sum(self.iterator.pages))
            self.pos.setSuffix(' / %d'%sum(self.iterator.pages))
            self.vertical_scrollbar.setMinimum(100)
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@ -15,7 +15,7 @@ from cherrypy.process.plugins import SimplePlugin
 from calibre.constants import __appname__, __version__
 from calibre.utils.date import fromtimestamp
 from calibre.library.server import listen_on, log_access_file, log_error_file
-from calibre.library.server.utils import expose
+from calibre.library.server.utils import expose, AuthController
 from calibre.utils.mdns import publish as publish_zeroconf, \
            stop_server as stop_zeroconf, get_external_ip
 from calibre.library.server.content import ContentServer
@ -31,10 +31,11 @@ from calibre import prints, as_unicode

 class DispatchController(object): # {{{

-    def __init__(self, prefix, wsgi=False):
+    def __init__(self, prefix, wsgi=False, auth_controller=None):
        self.dispatcher = cherrypy.dispatch.RoutesDispatcher()
        self.funcs = []
        self.seen = set()
+        self.auth_controller = auth_controller
        self.prefix = prefix if prefix else ''
        if wsgi:
            self.prefix = ''
@ -44,6 +45,7 @@ class DispatchController(object): # {{{
            raise NameError('Route name: '+ repr(name) + ' already used')
        self.seen.add(name)
        kwargs['action'] = 'f_%d'%len(self.funcs)
+        aw = kwargs.pop('android_workaround', False)
        if route != '/':
            route = self.prefix + route
        elif self.prefix:
@ -52,6 +54,8 @@ class DispatchController(object): # {{{
            self.dispatcher.connect(name+'prefix_extra_trailing',
                    self.prefix+'/', self, **kwargs)
        self.dispatcher.connect(name, route, self, **kwargs)
+        if self.auth_controller is not None:
+            func = self.auth_controller(func, aw)
        self.funcs.append(expose(func))

    def __getattr__(self, attr):
@ -156,6 +160,8 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
        self.config = {}
        self.is_running = False
        self.exception = None
+        auth_controller = None
+        self.users_dict = {}
        #self.config['/'] = {
        #    'tools.sessions.on' : True,
        #    'tools.sessions.timeout': 60, # Session times out after 60 minutes
@ -171,15 +177,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
            }

            if opts.password:
-                self.config['/'] = {
-                        'tools.digest_auth.on'    : True,
-                        'tools.digest_auth.realm' : (
-                            'Your calibre library. Username: '
-                            + opts.username.strip()),
-                        'tools.digest_auth.users' : {opts.username.strip():opts.password.strip()},
-                }
+                self.users_dict[opts.username.strip()] = opts.password.strip()
+                auth_controller = AuthController('Your calibre library',
+                        self.users_dict)

-        self.__dispatcher__ = DispatchController(self.opts.url_prefix, wsgi)
+        self.__dispatcher__ = DispatchController(self.opts.url_prefix,
+                wsgi=wsgi, auth_controller=auth_controller)
        for x in self.__class__.__bases__:
            if hasattr(x, 'add_routes'):
                x.__init__(self)
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -41,7 +41,8 @@ class ContentServer(object):
        connect('root', '/', self.index)
        connect('old', '/old', self.old)
        connect('get', '/get/{what}/{id}', self.get,
-                conditions=dict(method=["GET", "HEAD"]))
+                conditions=dict(method=["GET", "HEAD"]),
+                android_workaround=True)
        connect('static', '/static/{name:.*?}', self.static,
                conditions=dict(method=["GET", "HEAD"]))
        connect('favicon', '/favicon.png', self.favicon,
--- a/src/calibre/library/server/utils.py
+++ b/src/calibre/library/server/utils.py
@ -5,10 +5,12 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import time, sys
+import time, sys, uuid, hashlib
 from urllib import quote as quote_, unquote as unquote_
+from functools import wraps

 import cherrypy
+from cherrypy.lib.auth_digest import digest_auth, get_ha1_dict_plain

 from calibre import strftime as _strftime, prints, isbytestring
 from calibre.utils.date import now as nowf
@ -40,6 +42,7 @@ class Offsets(object):

 def expose(func):

+    @wraps(func)
    def do(*args, **kwargs):
        self = func.im_self
        if self.opts.develop:
@ -54,10 +57,87 @@ def expose(func):
            prints('\tTime:', func.__name__, time.time()-start)
        return ans

-    do.__name__ = func.__name__
-
    return do

+class AuthController(object):
+
+    '''
+    Implement Digest authentication for the content server. Android browsers
+    cannot handle HTTP AUTH when downloading files, as the download is handed
+    off to a separate process. So we use a cookie based authentication scheme
+    for some endpoints (/get) to allow downloads to work on android. Apparently,
+    cookies are passed to the download process. The cookie expires after
+    MAX_AGE seconds.
+
+    The android browser appears to send a GET request to the server and only if
+    that request succeeds is the download handed off to the download process.
+    Therefore, even if the user clicks Get after MAX_AGE, it should still work.
+    In fact, we could reduce MAX_AGE, but we leave it high as the download
+    process might have downloads queued and therefore not start the download
+    immediately.
+
+    Note that this makes the server vulnerable to session-hijacking (i.e. some
+    one can sniff the traffic and create their own requests to /get with the
+    appropriate cookie, for an hour). The fix is to use https, but since this
+    is usually run as a private server, that cannot be done. If you care about
+    this vulnerability, run the server behind a reverse proxy that uses HTTPS.
+    '''
+
+    MAX_AGE = 3600 # Number of seconds after a successful digest auth for which
+                   # the cookie auth will be allowed
+
+    def __init__(self, realm, users_dict):
+        self.realm = realm
+        self.users_dict = users_dict
+        self.secret = bytes(uuid.uuid4().hex)
+        self.cookie_name = 'android_workaround'
+
+    def hashit(self, raw):
+        return hashlib.sha1(raw).hexdigest()
+
+    def __call__(self, func, allow_cookie_auth):
+
+        @wraps(func)
+        def authenticate(*args, **kwargs):
+            cookie = cherrypy.request.cookie.get(self.cookie_name, None)
+            if not (allow_cookie_auth and self.is_valid(cookie)):
+                digest_auth(self.realm, get_ha1_dict_plain(self.users_dict),
+                            self.secret)
+
+            cookie = cherrypy.response.cookie
+            cookie[self.cookie_name] = self.generate_cookie()
+            cookie[self.cookie_name]['path'] = '/'
+            cookie[self.cookie_name]['version'] = '1'
+
+            return func(*args, **kwargs)
+
+        authenticate.im_self = func.im_self
+        return authenticate
+
+    def generate_cookie(self, timestamp=None):
+        '''
+        Generate a cookie. The cookie contains a plain text timestamp and a
+        hashe of the timestamp and the server secret.
+        '''
+        timestamp = int(time.time()) if timestamp is None else timestamp
+        key = self.hashit('%d:%s'%(timestamp, self.secret))
+        return '%d:%s'%(timestamp, key)
+
+    def is_valid(self, cookie):
+        '''
+        Check that cookie has not been spoofed (i.e. verify the declared
+        timestamp against the hashed timestamp). If the timestamps match, check
+        that the cookie has not expired. Return True iff the cookie has not
+        been spoofed and has not expired.
+        '''
+        try:
+            timestamp, hashpart = cookie.value.split(':', 1)
+            timestamp = int(timestamp)
+        except:
+            return False
+        s_timestamp, s_hashpart = self.generate_cookie(timestamp).split(':', 1)
+        is_valid = s_hashpart == hashpart
+        return (is_valid and (time.time() - timestamp) < self.MAX_AGE)

 def strftime(fmt='%Y/%m/%d %H:%M:%S', dt=None):
    if not hasattr(dt, 'timetuple'):
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -381,6 +381,18 @@ that allows you to create collections on your Kindle from the |app| metadata. It

 .. note:: Amazon have removed the ability to manipulate collections completely in their newer models, like the Kindle Touch and Kindle Fire, making even the above plugin useless. If you really want the ability to manage collections on your Kindle via a USB connection, we encourage you to complain to Amazon about it, or get a reader where this is supported, like the SONY Readers.

+I am getting an error when I try to use |app| with my Kobo Touch?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The Kobo Touch has very buggy firmware. Connecting to it has been known to fail at random. Certain combinations of motherboard, USB ports/cables/hubs can exacerbate this tendency to fail. If you are getting an error when connecting to your touch with |app| try the following, each of which has solved the problem for *some* |app| users.
+
+  * Connect the Kobo directly to your computer, not via USB Hub
+  * Try a different USB cable and a different USB port on your computer
+  * Try a different computer (preferably an older model)
+  * Try upgrading the firmware on your Kobo Touch to the latest
+  * Try resetting the Kobo (sometimes this cures the problem for a little while, but then it re-appears, in which case you have to reset again and again)
+  * Try only putting one or two books onto the Kobo at a time and do not keep large collections on the Kobo
+
 Library Management
 ------------------

--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@ -73,7 +73,7 @@ Edit metadata

 |emii| The :guilabel:`Edit metadata` action has four variations which can be accessed by doing a right-click on the button.

-    1. **Edit metadata individually**: Allows you to edit the metadata of books one-by-one with the option of fetching metadata, including covers, from the Internet. It also allows you to add or remove particular ebook formats from a book. 
+    1. **Edit metadata individually**: Allows you to edit the metadata of books one-by-one with the option of fetching metadata, including covers, from the Internet. It also allows you to add or remove particular ebook formats from a book.
    2. **Edit metadata in bulk**: Allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view <search_sort>`.
    3. **Download metadata and covers**: Downloads metadata and covers (if available) for the books that are selected in the book list.
    4. **Merge book records**: Gives you the capability of merging the metadata and formats of two or more book records. You can choose to either delete or keep the records that were not clicked first.
@ -117,7 +117,7 @@ View

 |vi| The :guilabel:`View` action displays the book in an ebook viewer program. |app| has a built-in viewer for many ebook formats.
 For other formats it uses the default operating system application. You can configure which formats should open with the internal viewer via
-Preferences->Behavior. If a book has more than one format, you can view a particular format by doing a right-click on the button. 
+Preferences->Behavior. If a book has more than one format, you can view a particular format by doing a right-click on the button.


 .. _send_to_device:
@ -175,7 +175,7 @@ Library
    5. **<library name>**: Actions 5, 6 etc... give you immediate switch access between multiple libraries that you have created or attached to. This list contains only the 5 most frequently used libraries. For the complete list, use the Quick Switch menu.
    6. **Library maintenance**: Allows you to check the current library for data consistency issues and restore the current library's database from backups.

-.. note:: Metadata about your ebooks, e.g. title, author, and tags, is stored in a single file in your |app| library folder called metadata.db. If this file gets corrupted (a very rare event), you can lose the metadata. Fortunately, |app| automatically backs up the metadata for every individual book in the book's folder as an OPF file. By using the Restore Library action under Library Maintenance described above, you can have |app| rebuild the metadata.db file from the individual OPF files for you. 
+.. note:: Metadata about your ebooks, e.g. title, author, and tags, is stored in a single file in your |app| library folder called metadata.db. If this file gets corrupted (a very rare event), you can lose the metadata. Fortunately, |app| automatically backs up the metadata for every individual book in the book's folder as an OPF file. By using the Restore Library action under Library Maintenance described above, you can have |app| rebuild the metadata.db file from the individual OPF files for you.

 You can copy or move books between different libraries (once you have more than one library setup) by right clicking on the book and selecting the action :guilabel:`Copy to library`.

@ -235,7 +235,7 @@ Connect/Share

    1. **Connect to folder**: Allows you to connect to any folder on your computer as though it were a device and use all the facilities |app| has for devices with that folder. Useful if your device cannot be supported by |app| but is available as a USB disk.

-    2. **Connect to iTunes**: Allows you to connect to your iTunes books database as though it were a device. Once the books are sent to iTunes, you can use iTunes to make them available to your various iDevices. This is useful if you would rather not have |app| send books to your iDevice directly.
+    2. **Connect to iTunes**: Allows you to connect to your iTunes books database as though it were a device. Once the books are sent to iTunes, you can use iTunes to make them available to your various iDevices.

    3. **Start Content Server**: Starts |app|'s built-in web server.  When started, your |app| library will be accessible via a web browser from the Internet (if you choose). You can configure how the web server is accessed by setting preferences at :guilabel:`Preferences->Sharing->Sharing over the net`

@ -338,9 +338,9 @@ Two other kinds of searches are available: equality search and search using `reg
 Equality searches are indicated by prefixing the search string with an equals sign (=). For example, the query
 ``tag:"=science"`` will match "science", but not "science fiction" or "hard science". Regular expression searches are
 indicated by prefixing the search string with a tilde (~). Any `python-compatible regular expression <http://docs.python.org/library/re.html>`_ can
-be used. Note that backslashes used to escape special characters in reqular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression contains anchors. 
+be used. Note that backslashes used to escape special characters in reqular expressions must be doubled because single backslashes will be removed during query parsing. For example, to match a literal parenthesis you must enter ``\\(``. Regular expression searches are 'contains' searches unless the expression contains anchors.

-Should you need to search for a string with a leading equals or tilde, prefix the string with a backslash. 
+Should you need to search for a string with a leading equals or tilde, prefix the string with a backslash.

 Enclose search strings with quotes (") if the string contains parenthesis or spaces. For example, to search
 for the tag ``Science Fiction`` you would need to search for ``tag:"=science fiction"``. If you search for
@ -362,7 +362,7 @@ The syntax for searching for dates is::
 If the date is ambiguous, the current locale is used for date comparison. For example, in an mm/dd/yyyy
 locale 2/1/2009 is interpreted as 1 Feb 2009. In a dd/mm/yyyy locale it is interpreted as 2 Jan 2009.  Some
 special date strings are available. The string ``today`` translates to today's date, whatever it is. The
-strings ``yesterday`` and ``thismonth`` (or the translated equivalent in the current language) also work. 
+strings ``yesterday`` and ``thismonth`` (or the translated equivalent in the current language) also work.
 In addition, the string ``daysago`` (also translated) can be used to compare to a date some number of days ago.
 For example::

--- a/src/calibre/utils/ipc/launch.py
+++ b/src/calibre/utils/ipc/launch.py
@ -167,7 +167,8 @@ class Worker(object):
        '''
        exe = self.gui_executable if self.gui else self.executable
        env = self.env
-        env['ORIGWD'] = cwd or os.path.abspath(os.getcwd())
+        env[b'ORIGWD'] = binascii.hexlify(cPickle.dumps(cwd or
+                                    os.path.abspath(os.getcwdu())))
        _cwd = cwd
        if priority is None:
            priority = prefs['worker_process_priority']
--- a/src/calibre/utils/pyconsole/interpreter.py
+++ b/src/calibre/utils/pyconsole/interpreter.py
@ -5,7 +5,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import sys, cPickle, os
+import sys, cPickle, os, binascii
 from code import InteractiveInterpreter
 from Queue import Queue, Empty
 from threading import Thread
@ -130,7 +130,7 @@ class Interpreter(InteractiveInterpreter): # {{{
 # }}}

 def connect():
-    os.chdir(os.environ['ORIGWD'])
+    os.chdir(cPickle.loads(binascii.unhexlify(os.environ['ORIGWD'])))
    address = cPickle.loads(unhexlify(os.environ['CALIBRE_WORKER_ADDRESS']))
    key     = unhexlify(os.environ['CALIBRE_WORKER_KEY'])
    return Client(address, authkey=key)
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -648,7 +648,10 @@ class BasicNewsRecipe(Recipe):
            'url'         : URL of print version,
            'date'        : The publication date of the article as a string,
            'description' : A summary of the article
-            'content'     : The full article (can be an empty string). This is used by FullContentProfile
+            'content'     : The full article (can be an empty string). Obsolete
+                            do not use, instead save the content to a temporary
+                            file and pass a file:///path/to/temp/file.html as
+                            the URL.
            }

        For an example, see the recipe for downloading `The Atlantic`.
--- a/src/cherrypy/lib/auth_digest.py
+++ b/src/cherrypy/lib/auth_digest.py
@ -33,7 +33,8 @@ qop_auth = 'auth'
 qop_auth_int = 'auth-int'
 valid_qops = (qop_auth, qop_auth_int)

-valid_algorithms = ('MD5', 'MD5-sess')
+valid_algorithms = ('MD5', 'MD5-sess', 'md5', 'md5-sess') # Changed by Kovid to
+                                                          # add lowercase


 def TRACE(msg):
@ -67,7 +68,7 @@ def get_ha1_dict(user_ha1_dict):
    argument to digest_auth().
    """
    def get_ha1(realm, username):
-        return user_ha1_dict.get(user)
+        return user_ha1_dict.get(username) # Changed by Kovid to fix typo

    return get_ha1

@ -107,10 +108,10 @@ def synthesize_nonce(s, key, timestamp=None):

    key
        A secret string known only to the server.
-    
+
    timestamp
        An integer seconds-since-the-epoch timestamp
-    
+
    """
    if timestamp is None:
        timestamp = int(time.time())
@ -190,10 +191,10 @@ class HttpDigestAuthorization (object):

        s
            A string related to the resource, such as the hostname of the server.
-            
+
        key
            A secret string known only to the server.
-        
+
        Both s and key must be the same values which were used to synthesize the nonce
        we are trying to validate.
        """
@ -256,7 +257,7 @@ class HttpDigestAuthorization (object):
            4.3.  This refers to the entity the user agent sent in the request which
            has the Authorization header. Typically GET requests don't have an entity,
            and POST requests do.
-        
+
        """
        ha2 = self.HA2(entity_body)
        # Request-Digest -- RFC 2617 3.2.2.1
@ -302,16 +303,16 @@ def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth, stal
 def digest_auth(realm, get_ha1, key, debug=False):
    """A CherryPy tool which hooks at before_handler to perform
    HTTP Digest Access Authentication, as specified in :rfc:`2617`.
-    
+
    If the request has an 'authorization' header with a 'Digest' scheme, this
    tool authenticates the credentials supplied in that header.  If
    the request has no 'authorization' header, or if it does but the scheme is
    not "Digest", or if authentication fails, the tool sends a 401 response with
    a 'WWW-Authenticate' Digest header.
-    
+
    realm
        A string containing the authentication realm.
-    
+
    get_ha1
        A callable which looks up a username in a credentials store
        and returns the HA1 string, which is defined in the RFC to be
@ -320,13 +321,13 @@ def digest_auth(realm, get_ha1, key, debug=False):
        where username is obtained from the request's 'authorization' header.
        If username is not found in the credentials store, get_ha1() returns
        None.
-    
+
    key
        A secret string known only to the server, used in the synthesis of nonces.
-    
+
    """
    request = cherrypy.serving.request
-    
+
    auth_header = request.headers.get('authorization')
    nonce_is_stale = False
    if auth_header is not None:
@ -334,10 +335,10 @@ def digest_auth(realm, get_ha1, key, debug=False):
            auth = HttpDigestAuthorization(auth_header, request.method, debug=debug)
        except ValueError:
            raise cherrypy.HTTPError(400, "The Authorization header could not be parsed.")
-        
+
        if debug:
            TRACE(str(auth))
-        
+
        if auth.validate_nonce(realm, key):
            ha1 = get_ha1(realm, auth.username)
            if ha1 is not None:
@ -355,7 +356,7 @@ def digest_auth(realm, get_ha1, key, debug=False):
                        if debug:
                            TRACE("authentication of %s successful" % auth.username)
                        return
-    
+
    # Respond with 401 status and a WWW-Authenticate header
    header = www_authenticate(realm, key, stale=nonce_is_stale)
    if debug: