0.8.36

2025-07-09 03:04:10 -04:00 · 2012-01-20 16:13:14 -07:00 · 2012-01-20 16:13:14 -07:00 · 18e83abe93
commit 18e83abe93
parent 687ef958a9 e94340b997
114 changed files with 36541 additions and 32829 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -5,7 +5,7 @@
 # Also, each release can have new and improved recipes.

 # - version: ?.?.?
-#   date: 2011-??-??
+#   date: 2012-??-??
 #
 #   new features:
 #     - title: 
@ -19,8 +19,68 @@
 #   new recipes:
 #     - title: 

+- version: 0.8.36
+  date: 2012-01-20
+
+  new features:
+    - title: "Decrease startup time for large libraries with at least one composite custom column by reading format info on demand"
+
+    - title: "When automatically deleting news older than x days, from the calibre library, only delete the book if it both has the tag News and the author calibre. This prevents accidental deletion of books tagged with News by the user."
+
+    - title: "Driver for Infibeam Pi 2"
+
+    - title: "Add a Tag Editor for tags like custom columns to the edit metadata dialog"
+ 
+  bug fixes:
+    - title: "E-book viewer: Fix regression in 0.8.35 that caused viewer to raise an error on books that did not define a language"
+
+    - title: "Content server: Fix grouping for categories based on custom columns."
+      tickets: [919011]
+
+    - title: "Edit metadata dialog: When setting the series from a format or via metadata download, ensure that the series index is not automatically changed, when closing the dialog."
+      tickets: [918751]
+
+    - title: "When reading metadata from Topaz (azw1) files, handle non ascii metadata correctly."
+      tickets: [917419]
+
+    - title: "CHM Input: Do not choke on CHM files with non ascii internal filenames on windows."
+      tickets: [917696]
+
+    - title: "Fix reading metadata from CHM files with non-ascii titles"
+
+    - title: "Fix HTML 5 parser choking on comments"
+
+    - title: "If calibre is started from a directory that does not exist, automatically use the home directory as the working directory, instead of crashing"
+
+    - title: "Fix iriver story HD Wi-Fi device and external SD card swapped"
+      tickets: [916364] 
+
+    - title: "Content server: Fix ugly URLs for specific format download in the book details and permalink panels"
+
+    - title: "When adding FB2 files do not set the date field from the metadata in the file"
+
+  improved recipes:
+    - OReilly Premuim
+    - Variety
+    - Blic
+    - New Journal of Physics
+    - Der Tagesspiegel
+
+  new recipes:
+    - title: Tweakers.net 
+      author: Roedi06
+
+    - title: Village Voice 
+      author: Barty
+
+    - title: Edge.org Conversations 
+      author: levien
+
+    - title: Novi list - printed edition
+      author: Darko Miletic
+
 - version: 0.8.35
-  date: 2011-01-13
+  date: 2012-01-13

  new features:
    - title: "Metadata plugboards: Allow creation of plugboards for email delivery."
--- a/recipes/al_masry_al_youm.recipe
+++ b/recipes/al_masry_al_youm.recipe
@ -0,0 +1,50 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
+'''
+abc.net.au/news
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class TheDailyNewsEG(BasicNewsRecipe):
+    title          	   = u'al-masry al-youm'
+    __author__             = 'Omm Mishmishah'
+    description            = 'Independent News from Egypt'
+    masthead_url           = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
+    cover_url              = 'http://www.almasryalyoum.com/sites/default/files/img/english_logo.png'
+
+    auto_cleanup           = True
+    oldest_article         = 7
+    max_articles_per_feed  = 100
+    no_stylesheets         = False
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'Independent News Egypt'
+    category               = 'News, Egypt, World'
+    language               = 'en_EG'
+    publication_type       = 'newsportal'
+#    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
+    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': False
+                         }
+
+    keep_only_tags = [dict(attrs={'class':['article section']})]
+
+    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
+        'inline-content story left', 'inline-content map left contracted', 'published',
+        'story-map', 'statepromo', 'topics', ]})]
+
+    remove_attributes = ['width','height']
+
+    feeds          = [(u'English News', u'http://www.almasryalyoum.com/en/rss_feed_term/113/rss.xml'),
+                      (u'News Features', u'http://www.almasryalyoum.com/en/rss_feed_term/115/rss.xml'),
+                      (u'Culture', u'http://www.almasryalyoum.com/en/rss_feed_term/133/rss.xml'),
+                      (u'Cinema', u'http://www.almasryalyoum.com/en/rss_feed_term/134/rss.xml')
+                     ]
--- a/recipes/blic.recipe
+++ b/recipes/blic.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blic.rs
 '''
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
    def print_version(self, url):
        return url + '/print'

-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']    
-        return soup
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.blic.rs/')
+        alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
+        if alink:
+           return 'http://www.blic.rs' + alink['href']
+        return None
+       
--- a/recipes/espn.recipe
+++ b/recipes/espn.recipe
@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):

    use_embedded_content = False
    remove_javascript     = True
-    needs_subscription = True
+    needs_subscription = 'optional'
    encoding= 'ISO-8859-1'

    remove_tags_before = dict(name='font', attrs={'class':'date'})
@ -75,32 +75,30 @@ class ESPN(BasicNewsRecipe):

        return soup

-
-
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
-        br.set_handle_refresh(False)
-        url = ('https://r.espn.go.com/members/v3_1/login')
-        raw = br.open(url).read()
-        raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
-        with TemporaryFile(suffix='.htm') as fname:
-            with open(fname, 'wb') as f:
-                f.write(raw)
-            br.open_local_file(fname)
+        if self.username and self.password:
+            br.set_handle_refresh(False)
+            url = ('https://r.espn.go.com/members/v3_1/login')
+            raw = br.open(url).read()
+            raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
+            with TemporaryFile(suffix='.htm') as fname:
+                with open(fname, 'wb') as f:
+                    f.write(raw)
+                br.open_local_file(fname)

-        br.form = br.forms().next()
-        br.form.find_control(name='username', type='text').value = self.username
-        br.form['password'] = self.password
-        br.submit().read()
-        br.open('http://espn.go.com').read()
-        br.set_handle_refresh(True)
+            br.form = br.forms().next()
+            br.form.find_control(name='username', type='text').value = self.username
+            br.form['password'] = self.password
+            br.submit().read()
+            br.open('http://espn.go.com').read()
+            br.set_handle_refresh(True)
        return br

    def get_article_url(self, article):
        return article.get('guid',  None)

    def print_version(self, url):
-
        if 'eticket' in url:
            return url.partition('&')[0].replace('story?', 'print?')
        match = re.search(r'story\?(id=\d+)', url)
--- a/recipes/klip_me.recipe
+++ b/recipes/klip_me.recipe
@ -0,0 +1,72 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1299694372(BasicNewsRecipe):
+    title                             = u'Klipme'
+    __author__                  = 'Ken Sun'
+    publisher                     = 'Klip.me'
+    category                      = 'info, custom, Klip.me'
+    oldest_article               = 365
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    remove_javascript     = True
+    remove_tags              = [
+    dict(name='div', attrs={'id':'text_controls_toggle'})
+    ,dict(name='script')
+    ,dict(name='div', attrs={'id':'text_controls'})
+    ,dict(name='div', attrs={'id':'editing_controls'})
+    ,dict(name='div', attrs={'class':'bar bottom'})
+     ]
+    use_embedded_content  = False
+    needs_subscription    = True
+    INDEX                 = u'http://www.klip.me'
+    LOGIN                 = INDEX + u'/fav/signin?callback=/fav'
+
+
+    feeds          = [
+            (u'Klip.me unread', u'http://www.klip.me/fav'),
+            (u'Klip.me started', u'http://www.klip.me/fav?s=starred')
+            ]
+
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None:
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['Email'] = self.username
+            if self.password is not None:
+               br['Passwd'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            for item in soup.findAll('table',attrs={'class':['item','item new']}):
+                atag = item.a
+                if atag and atag.has_key('href'):
+                    url         = atag['href']
+                    articles.append({
+                                     'url'        :url
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
+    def print_version(self, url):
+        return 'http://www.klip.me' + url
+
+    def populate_article_metadata(self, article, soup, first):
+        article.title  = soup.find('title').contents[0].strip()
+
+    def postprocess_html(self, soup, first_fetch):
+        for link_tag in soup.findAll(attrs={"id" : "story"}):
+            link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>')
+            print link_tag
+
+        return soup
+
--- a/recipes/new_scientist.recipe
+++ b/recipes/new_scientist.recipe
@ -1,16 +1,35 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
+##
+## Title:        Microwave Journal RSS recipe
+## Contact:      AprilHare, Darko Miletic <darko.miletic at gmail.com>
+##
+## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
+## Copyright:    2008-2010, AprilHare, Darko Miletic <darko.miletic at gmail.com>
+##
+## Written:      2008
+## Last Edited:  Jan 2012
+##
+
+'''
+01-19-2012: Added GrayScale Image conversion and Duplicant article removals
+'''
+
+__license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
+__copyright__ = '2008-2012, AprilHare, Darko Miletic <darko.miletic at gmail.com>'
+__version__     = 'v0.5.0'
+__date__        = '2012-01-19'
+__author__      = 'Darko Miletic'
+
 '''
 newscientist.com
 '''

 import re
 import urllib
+from calibre.utils.magick import Image
 from calibre.web.feeds.news import BasicNewsRecipe

 class NewScientist(BasicNewsRecipe):
    title                 = 'New Scientist - Online News w. subscription'
-    __author__            = 'Darko Miletic'
    description           = 'Science news and science articles from New Scientist.'
    language              = 'en'
    publisher             = 'Reed Business Information Ltd.'
@ -39,10 +58,19 @@ class NewScientist(BasicNewsRecipe):

    keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]

+    # Whether to omit duplicates of articles (typically arsing when articles are indexed in
+    # more than one section). If True, only the first occurance will be downloaded.
+    filterDuplicates = True
+
+    # Whether to convert images to grayscale for eInk readers.
+    Convert_Grayscale = False
+
+    url_list = []   # This list is used to check if an article had already been included.
+
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open('http://www.newscientist.com/')
-        if self.username is not None and self.password is not None:        
+        if self.username is not None and self.password is not None:
            br.open('https://www.newscientist.com/user/login')
            data = urllib.urlencode({ 'source':'form'
                                     ,'redirectURL':''
@ -80,6 +108,10 @@ class NewScientist(BasicNewsRecipe):
        return article.get('guid',  None)

    def print_version(self, url):
+        if self.filterDuplicates:
+            if url in self.url_list:
+                return
+        self.url_list.append(url)
        return url + '?full=true&print=true'

    def preprocess_html(self, soup):
@ -91,7 +123,7 @@ class NewScientist(BasicNewsRecipe):
            item.name='p'
        for item in soup.findAll(['xref','figref']):
            tstr = item.string
-            item.replaceWith(tstr)            
+            item.replaceWith(tstr)
        for tg in soup.findAll('a'):
            if tg.string == 'Home':
                tg.parent.extract()
@ -101,3 +133,16 @@ class NewScientist(BasicNewsRecipe):
                   tg.replaceWith(tstr)
        return soup

+    # Converts images to Gray Scale
+    def postprocess_html(self, soup, first):
+        if self.Convert_Grayscale:
+            #process all the images
+            for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+                iurl = tag['src']
+                img = Image()
+                img.open(iurl)
+                if img < 0:
+                    raise RuntimeError('Out of memory')
+                img.type = "GrayscaleType"
+                img.save(iurl)
+        return soup
--- a/recipes/oreilly_premium.recipe
+++ b/recipes/oreilly_premium.recipe
@ -1,8 +1,15 @@
+# Talking Points is not grabbing everything.
+# The look is right, but only the last one added?
 import re
 import time
 from calibre.web.feeds.recipes import BasicNewsRecipe
 # Allows the Python soup converter, which makes parsing easier.
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
+# strip ads and graphics
+# Current Column lacks a title.
+# Talking Points Memo - shorten title - Remove year and Bill's name
+# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
+# Newsletters: Talking Points Memos covered by cat12

 class OReillyPremium(BasicNewsRecipe):
    title           = u'OReilly Premium'
@ -19,7 +26,17 @@ class OReillyPremium(BasicNewsRecipe):
    # Don't go down
    recursions      = 0
    max_articles_per_feed = 2000
-    language = 'en'
+
+    debugMessages   = True
+
+    # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
+    catList = [ ["TV Archives",         'https://www.billoreilly.com/show?action=tvShowArchive', 'a',    {'class':['showLinks','homeLinks']},                   []],
+                ["No Spin Archives",    'https://www.billoreilly.com/blog?categoryID=7',         True,   {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
+                ["Daily Briefings",     'http://www.billoreilly.com/blog?categoryID=11',         True,   {'class':['defaultHeaderSmallLinks']},                 []],
+                ["Stratfor",            'http://www.billoreilly.com/blog?categoryID=5',          'a',    {'class':['blogLinks']},                               []],
+                ["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12',        'td',   {},                                                    []],
+                ["Current Column",      'https://www.billoreilly.com/currentcolumn',             'span', {'class':['defaultHeader']},                           []]
+              ]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -31,6 +48,8 @@ class OReillyPremium(BasicNewsRecipe):
            br.submit()
        return br

+    # Returns the best-guess print url.
+    # The second parameter (pageURL) is returned if nothing is found.
    def extractPrintURL(self, baseURL, pageURL, printString):
        tagURL = pageURL
        soup = self.index_to_soup(pageURL)
@ -38,7 +57,6 @@ class OReillyPremium(BasicNewsRecipe):
            printText = soup.find('a', text=printString)
        else :
            print("Failed to find Print string "+printString+ " in "+pageURL)
-
        if printText:
            tag = printText.parent
            tagURL = baseURL+tag['href']
@ -47,177 +65,111 @@ class OReillyPremium(BasicNewsRecipe):
    def stripBadChars(self, inString) :
        return inString.replace("\'", "")

-
-    # returns a qualifying article list
-    def parseNoSpinArchives(self, baseURL, soupURL, debugMessages):
-        articleList = []
-        soup = self.index_to_soup(soupURL)
-        for div in soup.findAll(True, attrs={'class':['blogBody'], 'style':['padding-top:10px;']}):
-             a = div.find('a', href=True)
-             if not a:
-                 continue
-             # re == regex. [href] is the link
-             url = baseURL
-             url +=re.sub(r'\?.*', '', a['href'])
-             # Get print version
-             printURL = self.extractPrintURL(baseURL, url, "Print this entry")
-             if printURL:
-                url = printURL
-             title = self.tag_to_string(a, use_alt=True).strip()
-             if debugMessages :
-                print("No Spin Archive Title:"+title+" at url: "+url)
-             description = 'None'
-             pubdate = time.strftime('%a, %d %b')
-             summary = div.find(True, attrs={'class':'summary'})
-             if summary:
-                 description = self.tag_to_string(summary, use_alt=False)
-             articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
-        return articleList
-
-
-    def parseTVArchives(self, baseURL, soupURL, debugMessages):
-        # TV Archives page has some Ajax, so look for the static only.
-        articleList = []
-        soup = self.index_to_soup(soupURL)
-        if debugMessages :
-           print("In parseTVArchives")
-        for div in soup.findAll('a', {'class':['showLinks','homeLinks']}):
-             a = div
-             url = baseURL
-             url +=a['href']
-             printURL = self.extractPrintURL(baseURL, url, "Print this entry")
-             if printURL:
-                url = printURL
-             title = self.tag_to_string(a, use_alt=True).strip()
-             title = self.stripBadChars(title)
-             if debugMessages :
-                print("TV Archive "+title+" at url: "+url)
-             description = 'None'
-             pubdate = time.strftime('%a, %d %b')
-             summary = div.find(True, attrs={'class':'summary'})
-             if summary:
-                 description = self.tag_to_string(summary, use_alt=False)
-             articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
-        if debugMessages :
-            print("Leaving TV Parse ")
-        return articleList
-
-    # Get Daily Briefing Archives
-    def parseDailyBriefs(self, baseURL, soupURL, debugMessages) :
-        print("Starting daily briefs")
-        articleList = []
-        soup = self.index_to_soup(soupURL)
-        for div in soup.findAll(True, attrs={'class':['defaultHeaderSmallLinks']}):
-             # re == regex. [href] is the link
-             url = baseURL
-             url +=re.sub(r'\?.*', '', div['href'])
-             printURL = self.extractPrintURL(baseURL, url, "Print this entry")
-             if printURL:
-                url = printURL
-             title = div.contents[0]
-             if debugMessages :
-                print("Daily Brief - title:"+title+" at url: "+url)
-             description = 'None'
-             pubdate = time.strftime('%a, %d %b')
-             summary = div.find(True, attrs={'class':'summary'})
-             if summary:
-                 description = self.tag_to_string(summary, use_alt=False)
-             articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
-        print("Leaving daily briefs")
-        return articleList
-
-    # Get the weekly Stratfor intelligence report
-    def parseStratfor(self, baseURL, soupURL, debugMessages):
-        # http://www.billoreilly.com/blog?categoryID=5
-        articleList = []
-        soup = self.index_to_soup(soupURL)
-        if debugMessages :
-           print("In parseStratfor")
-        a = soup.find('a', {'class':['blogLinks']})
-        url = baseURL
-        url +=a['href']
-        title = self.tag_to_string(a, use_alt=True).strip()
-        if debugMessages :
-            print("url: "+url)
-            print("title:"+title)
-        # Get Stratfor contents so we can get the real title.
-        stratSoup = self.index_to_soup(url)
-        title = stratSoup.html.head.title.string
-        stratIndex = title.find('Stratfor.com:', 0)
-        if (stratIndex > -1) :
-            title = title[stratIndex+14:-1]
-        # Look for first blogBody  <td class="blogBody"
-        stratBody = stratSoup.find('td', {'class':['blogBody']})
-        if debugMessages :
-            print("Strat content title:"+title)
-            print("Strat body: "+ stratBody.contents[0])
-        description = 'None'
-        pubdate = time.strftime('%a, %d %b')
-        articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
-        if debugMessages :
-           print("Leaving Stratfor Parse ")
-        return articleList
-
-    def parseTalkingPoints(self, baseURL, soupURL, debugMessages) :
-        # Look for blogDate.  That's got the date...  Then the next blogBody has the title.  and then an anchor with class "homeBlogReadMore bold" has the URL.
-        articleList = []
-        soup = self.index_to_soup(soupURL)
-        if debugMessages :
-            print("Starting Talking Points")
-        topDate =  soup.find("td", "blogBody")
-        if not topDate :
-            print("Failed to find date in Talking Points")
-        # This page has the contents in double-wrapped tables!
-        # tableParent = topDate.parent.parent
-        myTable = topDate.findParents('table')[0]
-        upOneTable = myTable.findParents('table')[0]
-        upTwo = upOneTable.findParents('table')[0]
-        # Now navigate rows of upTwo
-        if debugMessages :
-            print("Entering rows")
-        for rows in upTwo.findChildren("tr", recursive=False):
-            # Inside top level table, each row is an article
-            rowTable = rows.find("table")
-            articleTable = rowTable.find("table")
-            articleTable = rows.find("tr")
-            # The middle table is just for formatting the article buffer... but this means we can skip the inner table.
-            blogDate = articleTable.find("a","blogDate").contents[0]
-            # Skip to second blogBody for this.
-            blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
-            blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
-            # re == regex. [href] is the link
-            url = baseURL
-            url +=re.sub(r'\?.*', '', blogURL)
-            title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
-            if debugMessages :
-                print("Talking Points Memo title "+title+" at url: "+url)
+    def parseGeneric(self, baseURL):
+        # Does a generic parsing of the articles.  There are six categories (0-5)
+        # Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
+        # NoSpin and TV are generic
+        fullReturn = []
+        for i in range(len(self.catList)) :
+            articleList = []
+            soup = self.index_to_soup(self.catList[i][1])
+            # Set defaults
            description = 'None'
            pubdate = time.strftime('%a, %d %b')
-            articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
-        print("Exiting parseTalkingPoints\n")
-        return articleList
+            # Problem: 0-2 create many in an array
+            # 3-5 create one.
+            # So no for-div for 3-5

-    def parseCurrentColumn(self, baseURL, soupURL, debugMessages) :
-        # Only needed to get the column title.  Otherwise it's all good already; there's only one column
-        articleList = []
-        soup = self.index_to_soup(soupURL)
-        titleSpan = soup.find('span', {'class':['defaultHeader']})
-        title = titleSpan.contents[0]
-        # Get Print URL since it's available
-        printURL = self.extractPrintURL(baseURL, soupURL, "Print This Article")
-        if printURL:
-            print("Found print URL")
-            url = printURL
-        if debugMessages :
-            print("url: "+url)
-            print("title:"+title)
-        description = 'None'
-        pubdate = time.strftime('%a, %d %b')
-        articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
-        if debugMessages :
-           print("Leaving Stratfor Parse ")
-        return articleList
+            if i < 3 :
+                for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
+                     print(div)
+                     if i == 1:
+                        a = div.find('a', href=True)
+                     else :
+                        a = div
+                     print(a)
+                     summary = div.find(True, attrs={'class':'summary'})
+                     if summary:
+                         description = self.tag_to_string(summary, use_alt=False)
+                     if not a:
+                         continue
+                     # url = baseURL+re.sub(r'\?.*', '', a['href'])
+                     url = baseURL+a['href']
+                     if i < 2 :
+                        url = self.extractPrintURL(baseURL, url, "Print this entry")
+                        title = self.tag_to_string(a, use_alt=True).strip()
+                     elif i == 2 :
+                        # Daily Briefs
+                        url = self.extractPrintURL(baseURL, url, "Print this entry")
+                        title =  div.contents[0]
+                     if self.debugMessages :
+                        print(title+" @ "+url)
+                     articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))

+            elif i == 3 :   # Stratfor
+                a = soup.find('a', self.catList[i][3])
+                if a is None :
+                    continue
+                url = baseURL+a['href']
+                title = self.tag_to_string(a, use_alt=True).strip()
+                # Get Stratfor contents so we can get the real title.
+                stratSoup = self.index_to_soup(url)
+                title = stratSoup.html.head.title.string
+                stratIndex = title.find('Stratfor.com:', 0)
+                if (stratIndex > -1) :
+                    title = title[stratIndex+14:-1]
+                # Look for first blogBody  <td class="blogBody"
+                # Changed 12 Jan 2012 - new page format
+                #stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
+                #stratBody = stratSoup.find('td', {'class':['blogBody']})
+            elif i == 4 :      # Talking Points
+                topDate =  soup.find("td", "blogBody")
+                if not topDate :
+                    print("Failed to find date in Talking Points")
+                # This page has the contents in double-wrapped tables!
+                myTable = topDate.findParents('table')[0]
+                if myTable is not None:
+                    upOneTable = myTable.findParents('table')[0]
+                    if upOneTable is not None:
+                        upTwo = upOneTable.findParents('table')[0]
+                if upTwo is None:
+                    continue
+                # Now navigate rows of upTwo
+                if self.debugMessages :
+                    print("Entering rows")
+                for rows in upTwo.findChildren("tr", recursive=False):
+                    # Inside top level table, each row is an article
+                    rowTable = rows.find("table")
+                    articleTable = rowTable.find("table")
+                    # This looks wrong.
+                    articleTable = rows.find("tr")
+                    # The middle table is just for formatting the article buffer... but this means we can skip the inner table.
+                    blogDate = articleTable.find("a","blogDate").contents[0]
+                    # Skip to second blogBody for this.
+                    blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
+                    blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
+                    url = baseURL+re.sub(r'\?.*', '', blogURL)
+                    title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
+                    if self.debugMessages :
+                        print("Talking Points Memo title "+title+" at url: "+url)
+                    pubdate = time.strftime('%a, %d %b')
+                    articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
+            else :       # Current Column
+                titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
+                if titleSpan is None :
+                    continue
+                title = titleSpan.contents[0]
+                url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
+            if i == 3 or i == 5 :
+                 if self.debugMessages :
+                    print(self.catList[i][0]+" Title:"+title+" at url: "+url)
+                 summary = div.find(True, attrs={'class':'summary'})
+                 if summary:
+                     description = self.tag_to_string(summary, use_alt=False)
+                 articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
+            self.catList[i][3] = articleList
+            fullReturn.append((self.catList[i][0], articleList))
+        return fullReturn

    # calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
    # returns a list of tuple ('feed title', list of articles)
@ -231,27 +183,8 @@ class OReillyPremium(BasicNewsRecipe):
    # this is used instead of BasicNewsRecipe.parse_feeds().
    def parse_index(self):
        # Parse the page into Python Soup
-        debugMessages = True
        baseURL = "https://www.billoreilly.com"
-        def feed_title(div):
-            return ''.join(div.findAll(text=True, recursive=False)).strip()
-        # [] is list, {} is empty mapping.
-        articleList = []
-        ans = []
-        showList = self.parseTVArchives(baseURL, 'https://www.billoreilly.com/show?action=tvShowArchive', debugMessages)
-        articleList = self.parseNoSpinArchives(baseURL, 'https://www.billoreilly.com/blog?categoryID=7', debugMessages)
-        stratList = self.parseStratfor(baseURL, 'http://www.billoreilly.com/blog?categoryID=5', debugMessages)
-        dailyBriefs = self.parseDailyBriefs(baseURL, 'http://www.billoreilly.com/blog?categoryID=11', debugMessages)
-        talkingPoints = self.parseTalkingPoints(baseURL, 'https://www.billoreilly.com/blog?categoryID=12', debugMessages)
-        currentColumn = self.parseCurrentColumn(baseURL, 'https://www.billoreilly.com/currentcolumn', debugMessages)
-        # Below, { x:y, a:b } creates a dictionary.   We return a tuple of a title and list of dict...
-        # Lists are constructed with square brackets, separating items with commas: [a, b, c].  Tuples are constructed by the comma operator (not within square brackets), with or without enclosing parentheses, but an empty tuple must have the enclosing parentheses, such as a, b, c or (). A single item tuple must have a trailing comma, such as (d,).
-        # Shows first two if talking points and no spin news.  Also if they are TV Shows ande Stratfor Weekly, also if Daily Briefing and Curren Column
-        # So all work individually.  No idea why only getting first two in TOC now.
-        ans = [("Talking Points Memos", talkingPoints),("No Spin News", articleList),("TV Shows", showList),("Stratfor Weekly",stratList), ("Daily Briefing", dailyBriefs),("Current Column", currentColumn)]
-        if debugMessages :
-            print ans
-        return ans
+        return self.parseGeneric(baseURL)

    def preprocess_html(self, soup):
        refresh = soup.find('meta', {'http-equiv':'refresh'})
--- a/recipes/the_daily_news_egypt.recipe
+++ b/recipes/the_daily_news_egypt.recipe
@ -0,0 +1,46 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Pat Stapleton <pat.stapleton at gmail.com>'
+'''
+abc.net.au/news
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class TheDailyNewsEG(BasicNewsRecipe):
+    title          	   = u'The Daily News Egypt'
+    __author__             = 'Omm Mishmishah'
+    description            = 'News from Egypt'
+    masthead_url           = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
+    cover_url              = 'http://www.thedailynewsegypt.com/images/DailyNews-03_05.gif'
+
+    auto_cleanup           = True
+    oldest_article         = 7
+    max_articles_per_feed  = 100
+    no_stylesheets         = False
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'The Daily News Egypt'
+    category               = 'News, Egypt, World'
+    language               = 'en_EG'
+    publication_type       = 'newsportal'
+#    preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+#Remove annoying map links (inline-caption class is also used for some image captions! hence regex to match maps.google)
+    preprocess_regexps     = [(re.compile(r'<a class="inline-caption" href="http://maps\.google\.com.*?/a>', re.DOTALL), lambda m: '')]
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': False
+                         }
+
+    keep_only_tags = [dict(attrs={'class':['article section']})]
+
+    remove_tags = [dict(attrs={'class':['related', 'tags', 'tools', 'attached-content ready',
+        'inline-content story left', 'inline-content map left contracted', 'published',
+        'story-map', 'statepromo', 'topics', ]})]
+
+    remove_attributes = ['width','height']
+
+    feeds          = [(u'The Daily News Egypt', u'http://www.thedailynewsegypt.com/rss.php?sectionid=all')]
--- a/recipes/tweakers_net.recipe
+++ b/recipes/tweakers_net.recipe
@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__docformat__ = 'restructuredtext en'
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Tweakers(BasicNewsRecipe):
+     title          = u'Tweakers.net - with Reactions'
+     __author__     = 'Roedi06'
+     language       = 'nl'
+     oldest_article = 7
+     max_articles_per_feed = 100
+     cover_url       = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif'
+
+     keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}),
+    {'id':'reacties'},
+      ]
+
+     remove_tags    = [dict(name='div', attrs={'id' : ['utracker']}),
+                        {'id' : ['channelNav']},
+                        {'id' : ['contentArea']},
+                        {'class' : ['breadCrumb']},
+                        {'class' : ['nextPrevious ellipsis']},
+                        {'class' : ['advertorial']},
+                        {'class' : ['sidebar']},
+                        {'class' : ['filterBox']},
+                        {'id' : ['toggleButtonTxt']},
+                        {'id' : ['socialButtons']},
+                        {'class' : ['button']},
+                        {'class' : ['textadTop']},
+                        {'class' : ['commentLink']},
+                        {'title' : ['Reageer op deze reactie']},
+                        {'class' : ['pageIndex']},
+        {'class' : ['reactieHeader collapsed']},
+                      ]
+     no_stylesheets=True
+
+     preprocess_regexps = [
+        (re.compile(r'<hr*?>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'<p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'</p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'<a.*?>'), lambda h1: '<b><u>'),
+        (re.compile(r'</a>'), lambda h2: '</u></b>'),
+        (re.compile(r'<span class="new">', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'</span>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'), lambda match : ' - moderated 0<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'),
+        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'), lambda match : ' - moderated +1<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'),
+        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'), lambda match : ' - moderated +2<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'),
+        (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'), lambda match : ' - moderated +3<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'),
+        (re.compile(r'<div class="moderation">.*?</div>'), lambda h1: ''),
+     ]
+
+     extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \
+       .reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \
+       .quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }'
+
+
+     feeds          = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')]
+
+     def print_version(self, url):
+        return url + '?max=200'
+
--- a/recipes/variety.recipe
+++ b/recipes/variety.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.variety.com
 '''
@ -14,11 +14,11 @@ class Variety(BasicNewsRecipe):
    max_articles_per_feed  = 100
    no_stylesheets         = True
    use_embedded_content   = False
-    encoding               = 'cp1252'
+    encoding               = 'utf8'
    publisher              = 'Red Business Information'
    category               = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
    language               = 'en'
-    masthead_url           = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
+    masthead_url           = 'http://images1.variety.com/graphics/variety/Variety_logo_green_tm.gif'
    extra_css              = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '

    conversion_options = {
@ -30,17 +30,10 @@ class Variety(BasicNewsRecipe):

    remove_tags = [dict(name=['object','link','map'])]

-    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+    keep_only_tags = [dict(name='div', attrs={'class':'art control'})]

    feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]

    def print_version(self, url):
-        rpt = url.rpartition('?')[0]
-        artid = rpt.rpartition('/')[2]
-        catidr = url.rpartition('categoryid=')[2]
-        catid = catidr.partition('&')[0]
-        return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
-
-
-    def preprocess_html(self, soup):
-        return self.adeify_images(soup)
+        rpt = url.rpartition('.html')[0]
+        return rpt + '?printerfriendly=true'
--- a/recipes/villagevoice.recipe
+++ b/recipes/villagevoice.recipe
@ -0,0 +1,46 @@
+#!/usr/bin/env  python
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class VillageVoice(BasicNewsRecipe):
+
+    title        = 'Village Voice'
+    feeds        = [
+        ("Complete Issue", "http://villagevoice.com/syndication/issue"),
+        ("News", "http://villagevoice.com/syndication/section/news"),
+        ("Music", "http://villagevoice.com/syndication/section/music"),
+        ("Movies", "http://villagevoice.com/syndication/section/film"),
+        #("Restaurants", "http://villagevoice.com/syndication/section/dining"),
+        #("Music Events", "http://villagevoice.com/syndication/events?type=music"),
+        #("Calendar Events", "http://villagevoice.com/syndication/events"),
+        #("Promotional Events", "http://villagevoice.com/syndication/promoEvents"),
+        #("Restaurant Guide", "http://villagevoice.com/syndication/restaurants/search")
+        ]
+
+    auto_cleanup          = True
+    max_articles_per_feed = 50
+    masthead_url          = "http://assets.villagevoice.com/img/citylogo.png"
+    language = 'en'
+    __author__ = 'Barty'
+
+    seen_urls = []
+
+    # village voice breaks the article up into multiple pages, so
+    # parse page and grab the print url
+
+    url_regex = re.compile(r'\/content\/printVersion\/\d+',re.I)
+
+    def print_version(self, url):
+        if url in self.seen_urls:
+            return None
+        self.seen_urls.append( url)
+        soup = self.index_to_soup(url)
+        atag = soup.find('a',attrs={'href':self.url_regex})
+        if atag is None:
+            self.log('Warning: no print url found for '+url)
+        else:
+            m = self.url_regex.search(atag['href'])
+            if m:
+                url = 'http://www.villagevoice.com'+m.group(0)
+        return url
--- a/resources/content_server/browse/browse.html
+++ b/resources/content_server/browse/browse.html
@ -3,7 +3,7 @@
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html>
 <head>
-    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
    <title>..:: calibre {library} ::.. {title}</title>
    <meta http-equiv="X-UA-Compatible" content="IE=100" />
    <link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
@ -58,7 +58,7 @@
                method="post" title="Donate to support the development of calibre">
                <div>
                    <input type="hidden" name="cmd" value="_s-xclick"></input>
-                    <input type="hidden" name="hosted_button_id" value="3028915"></input>
+                    <input type="hidden" name="hosted_button_id" value="MZQCP8EESW4H4"></input>
                    <input type="image"
                    src="{prefix}/static/button-donate.png"
                        name="submit"></input>
--- a/setup/hosting.py
+++ b/setup/hosting.py
@ -26,7 +26,11 @@ def login_to_google(username, password):
    br.form['Email'] = username
    br.form['Passwd'] = password
    raw = br.submit().read()
-    if b'<title>Account overview - Account Settings</title>' not in raw:
+    if re.search(br'<title>.*?Account Settings</title>', raw) is None:
+        x = re.search(br'(?is)<title>.*?</title>', raw)
+        if x is not None:
+            print ('Title of post login page: %s'%x.group())
+        #open('/tmp/goog.html', 'wb').write(raw)
        raise ValueError(('Failed to login to google with credentials: %s %s'
            '\nGoogle sometimes requires verification when logging in from a '
            'new IP address. Use lynx to login and supply the verification, '
--- a/setup/iso_639/de.po
+++ b/setup/iso_639/de.po
@ -18,14 +18,14 @@ msgstr ""
 "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
 "devel@lists.alioth.debian.org>\n"
 "POT-Creation-Date: 2011-11-25 14:01+0000\n"
-"PO-Revision-Date: 2012-01-08 20:03+0000\n"
-"Last-Translator: Simeon <Unknown>\n"
+"PO-Revision-Date: 2012-01-14 02:30+0000\n"
+"Last-Translator: Wolfgang Rohdewald <wolfgang@rohdewald.de>\n"
 "Language-Team: German <debian-l10n-german@lists.debian.org>\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Launchpad-Export-Date: 2012-01-09 04:49+0000\n"
-"X-Generator: Launchpad (build 14640)\n"
+"X-Launchpad-Export-Date: 2012-01-15 05:18+0000\n"
+"X-Generator: Launchpad (build 14664)\n"
 "Language: de\n"

 #. name for aaa
--- a/setup/iso_639/en_GB.po
+++ b/setup/iso_639/en_GB.po
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 8, 35)
+numeric_version = (0, 8, 36)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -14,6 +14,7 @@ from functools import wraps, partial
 from calibre.db.locking import create_locks, RecordLock
 from calibre.db.fields import create_field
 from calibre.db.tables import VirtualTable
+from calibre.db.lazy import FormatMetadata, FormatsList
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import now

@ -127,14 +128,8 @@ class Cache(object):
        if not formats:
            good_formats = None
        else:
-            good_formats = []
-            for f in formats:
-                try:
-                    mi.format_metadata[f] = self._format_metadata(book_id, f)
-                except:
-                    pass
-                else:
-                    good_formats.append(f)
+            mi.format_metadata = FormatMetadata(self, id, formats)
+            good_formats = FormatsList(formats, mi.format_metadata)
        mi.formats = good_formats
        mi.has_cover = _('Yes') if self._field_for('cover', book_id,
                default_value=False) else ''
--- a/src/calibre/db/lazy.py
+++ b/src/calibre/db/lazy.py
@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import weakref
+from functools import wraps
+from collections import MutableMapping, MutableSequence
+
+'''
+Avoid doing stats on all files in a book when getting metadata for that book.
+Speeds up calibre startup with large libraries/libraries on a network share,
+with a composite custom column.
+'''
+
+# Lazy format metadata retrieval {{{
+def resolved(f):
+    @wraps(f)
+    def wrapper(self, *args, **kwargs):
+        if getattr(self, '_must_resolve', True):
+            self._resolve()
+            self._must_resolve = False
+        return f(self, *args, **kwargs)
+    return wrapper
+
+class MutableBase(object):
+
+    @resolved
+    def __str__(self):
+        return str(self._values)
+
+    @resolved
+    def __repr__(self):
+        return repr(self._values)
+
+    @resolved
+    def __unicode__(self):
+        return unicode(self._values)
+
+    @resolved
+    def __len__(self):
+        return len(self._values)
+
+    @resolved
+    def __iter__(self):
+        return iter(self._values)
+
+    @resolved
+    def __contains__(self, key):
+        return key in self._values
+
+    @resolved
+    def __getitem__(self, fmt):
+        return self._values[fmt]
+
+    @resolved
+    def __setitem__(self, key, val):
+        self._values[key] = val
+
+    @resolved
+    def __delitem__(self, key):
+        del self._values[key]
+
+
+class FormatMetadata(MutableBase, MutableMapping):
+
+    def __init__(self, db, id_, formats):
+        self._dbwref = weakref.ref(db)
+        self._id = id_
+        self._formats = formats
+
+    def _resolve(self):
+        db = self._dbwref()
+        self._values = {}
+        for f in self._formats:
+            try:
+                self._values[f] = db.format_metadata(self._id, f)
+            except:
+                pass
+
+class FormatsList(MutableBase, MutableSequence):
+
+    def __init__(self, formats, format_metadata):
+        self._formats = formats
+        self._format_metadata = format_metadata
+
+    def _resolve(self):
+        self._values = [f for f in self._formats if f in self._format_metadata]
+
+    @resolved
+    def insert(self, idx, val):
+        self._values.insert(idx, val)
+
+# }}}
+
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -162,7 +162,7 @@ class ANDROID(USBMS):
            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
            'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
-            'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO']
+            'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -175,7 +175,7 @@ class ANDROID(USBMS):
            'GT-S5830_CARD', 'GT-S5570_CARD', 'MB870', 'MID7015A',
            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
            'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
-            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET']
+            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -11,6 +11,7 @@ from calibre.customize.conversion import InputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
+from calibre.constants import filesystem_encoding

 class CHMInput(InputFormatPlugin):

@ -36,6 +37,8 @@ class CHMInput(InputFormatPlugin):

        log.debug('Processing CHM...')
        with TemporaryDirectory('_chm2oeb') as tdir:
+            if not isinstance(tdir, unicode):
+                tdir = tdir.decode(filesystem_encoding)
            html_input = plugin_for_input_format('html')
            for opt in html_input.options:
                setattr(options, opt.option.name, opt.recommended_value)
--- a/src/calibre/ebooks/chm/metadata.py
+++ b/src/calibre/ebooks/chm/metadata.py
@ -6,13 +6,14 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import re
+import re, codecs

 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import string_to_authors, MetaInformation
 from calibre.utils.logging import default_log
 from calibre.ptempfile import TemporaryFile
+from calibre import force_unicode

 def _clean(s):
    return s.replace(u'\u00a0', u' ')
@ -138,6 +139,13 @@ def get_metadata_from_reader(rdr):
        resolve_entities=True)[0])

    title = rdr.title
+    try:
+        x = rdr.GetEncoding()
+        codecs.lookup(x)
+        enc = x
+    except:
+        enc = 'cp1252'
+    title = force_unicode(title, enc)
    authors = _get_authors(home)
    mi = MetaInformation(title, authors)
    publisher = _get_publisher(home)
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                 ' and Alex Bramley <a.bramley at gmail.com>.'

-import os, re
+import os, re, codecs

 from calibre import guess_type as guess_mimetype
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
@ -99,8 +99,17 @@ class CHMReader(CHMFile):

    def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
        html_files = set([])
+        try:
+            x = self.GetEncoding()
+            codecs.lookup(x)
+            enc = x
+        except:
+            enc = 'cp1252'
        for path in self.Contents():
-            lpath = os.path.join(output_dir, path)
+            fpath = path
+            if not isinstance(path, unicode):
+                fpath = path.decode(enc)
+            lpath = os.path.join(output_dir, fpath)
            self._ensure_dir(lpath)
            try:
                data = self.GetFile(path)
@ -123,6 +132,7 @@ class CHMReader(CHMFile):
                    self.log.warn('%r filename too long, skipping'%path)
                    continue
                raise
+
        if debug_dump:
            import shutil
            shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
--- a/src/calibre/ebooks/metadata/topaz.py
+++ b/src/calibre/ebooks/metadata/topaz.py
@ -8,6 +8,7 @@ import StringIO, sys
 from struct import pack

 from calibre.ebooks.metadata import MetaInformation
+from calibre import force_unicode

 class StreamSlicer(object):

@ -245,7 +246,9 @@ class MetadataUpdater(object):
    def get_metadata(self):
        ''' Return MetaInformation with title, author'''
        self.get_original_metadata()
-        return MetaInformation(self.metadata['Title'], [self.metadata['Authors']])
+        title = force_unicode(self.metadata['Title'], 'utf-8')
+        authors = force_unicode(self.metadata['Authors'], 'utf-8').split(';')
+        return MetaInformation(title, authors)

    def get_original_metadata(self):
        offset = self.base + self.topaz_headers['metadata']['blocks'][0]['offset']
--- a/src/calibre/ebooks/oeb/display/cfi.coffee
+++ b/src/calibre/ebooks/oeb/display/cfi.coffee
@ -13,6 +13,7 @@
 3. Much more comprehensive testing/error handling
 4. Properly encodes/decodes assertions
 5. Handles points in the padding of elements consistently
+ 6. Has a utility method to calculate the CFI for the current viewport position robustly

 To check if this script is compatible with the current browser, call
 window.cfi.is_compatible() it will throw an exception if not compatible.
@ -72,7 +73,7 @@ get_current_time = (target) -> # {{{
    fstr(ans)
 # }}}

-window_scroll_pos = (win) -> # {{{
+window_scroll_pos = (win=window) -> # {{{
    if typeof(win.pageXOffset) == 'number'
        x = win.pageXOffset
        y = win.pageYOffset
@ -86,7 +87,7 @@ window_scroll_pos = (win) -> # {{{
    return [x, y]
 # }}}

-viewport_to_document = (x, y, doc) -> # {{{
+viewport_to_document = (x, y, doc=window?.document) -> # {{{
    until doc == window.document
        # We are in a frame
        frame = doc.defaultView.frameElement
@ -101,7 +102,7 @@ viewport_to_document = (x, y, doc) -> # {{{
    return [x, y]
 # }}}

-# Equivalent for caretRangeFromPoint for non WebKit browsers {{{
+# Convert point to character offset {{{
 range_has_point = (range, x, y) ->
    for rect in range.getClientRects()
        if (rect.left <= x <= rect.right) and (rect.top <= y <= rect.bottom)
@ -157,7 +158,8 @@ class CanonicalFragmentIdentifier
    is_compatible(): Throws an error if the browser is not compatible with
                     this script

-    at(x, y): which maps a point to a CFI, if possible
+    at(x, y): Maps a point to a CFI, if possible
+    at_current(): Returns the CFI corresponding to the current viewport scroll location

    scroll_to(cfi): which scrolls the browser to a point corresponding to the
                    given cfi, and returns the x and y co-ordinates of the point.
@ -559,11 +561,73 @@ class CanonicalFragmentIdentifier
        null
    # }}}

-    current_cfi: () -> # {{{
+    at_current: () -> # {{{
        [winx, winy] = window_scroll_pos()
        [winw, winh] = [window.innerWidth, window.innerHeight]
+        max = Math.max
        winw = max(winw, 400)
        winh = max(winh, 600)
+        deltay = Math.floor(winh/50)
+        deltax = Math.floor(winw/25)
+        miny = max(-winy, -winh)
+        maxy = winh
+        minx = max(-winx, -winw)
+        maxx = winw
+
+        dist = (p1, p2) ->
+            Math.sqrt(Math.pow(p1[0]-p2[0], 2), Math.pow(p1[1]-p2[1], 2))
+
+        get_cfi = (ox, oy) ->
+            try
+                cfi = this.at(ox, oy)
+                point = this.point(cfi)
+            catch err
+                cfi = null
+
+            if point.range != null
+                r = point.range
+                rect = r.getClientRects()[0]
+
+                x = (point.a*rect.left + (1-point.a)*rect.right)
+                y = (rect.top + rect.bottom)/2
+                [x, y] = viewport_to_document(x, y, r.startContainer.ownerDocument)
+            else
+                node = point.node
+                r = node.getBoundingClientRect()
+                [x, y] = viewport_to_document(r.left, r.top, node.ownerDocument)
+                if typeof(point.x) == 'number' and node.offsetWidth
+                    x += (point.x*node.offsetWidth)/100
+                if typeof(point.y) == 'number' and node.offsetHeight
+                    y += (point.y*node.offsetHeight)/100
+
+            if dist(viewport_to_document(ox, oy), [x, y]) > 50
+                cfi = null
+
+            return cfi
+
+        x_loop = (cury) ->
+            for direction in [-1, 1]
+                delta = deltax * direction
+                curx = 0
+                until (direction < 0 and curx < minx) or (direction > 0 and curx > maxx)
+                    cfi = get_cfi(curx, cury)
+                    if cfi
+                        return cfi
+                    curx += delta
+            null
+
+        for direction in [-1, 1]
+            delta = deltay * direction
+            cury = 0
+            until (direction < 0 and cury < miny) or (direction > 0 and cury > maxy)
+                cfi = x_loop(cury, -1)
+                if cfi
+                    return cfi
+                cury += delta
+
+        # TODO: Return the CFI corresponding to the <body> tag
+        null
+
    # }}}

 if window?
--- a/src/calibre/ebooks/oeb/display/test-cfi/iframe.html
+++ b/src/calibre/ebooks/oeb/display/test-cfi/iframe.html
@ -23,6 +23,7 @@
            indignation and dislike men who are so beguiled and demoralized by
            the charms of pleasure of the moment, so blinded by desire, that
            they cannot foresee</p>
+            <p><img src="marker.png" width="300" height="300" alt="Test image"/></p>

    </body>
 </html>
--- a/src/calibre/ebooks/oeb/display/test-cfi/index.html
+++ b/src/calibre/ebooks/oeb/display/test-cfi/index.html
@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html>
    <head>
-        <title>Testing EPUB CFI</title>
+        <title>Testing cfi.coffee</title>
        <script type="text/javascript" src="cfi.coffee"></script>
        <script type="text/javascript" src="cfi-test.coffee"></script>
        <style type="text/css">
@ -46,7 +46,8 @@
    </head>
    <body>
        <div id="container">
-            <h1 id="first-h1">Testing EPUB CFI</h1>
+            <h1 id="first-h1">Testing cfi.coffee</h1>
+            <p>Click anywhere and the location will be marked with a marker, whose position is set via a CFI.</p>
            <p><a id="reset" href="/">Reset CFI to None</a></p>
            <h2>A div with scrollbars</h2>
            <p>Scroll down and click on some elements. Make sure to hit both
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -103,7 +103,7 @@ def html5_parse(data, max_nesting_depth=100):
    xmlns_declaration = '{%s}'%XMLNS_NS
    non_html5_namespaces = {}
    seen_namespaces = set()
-    for elem in tuple(data.iter()):
+    for elem in tuple(data.iter(tag=etree.Element)):
        elem.attrib.pop('xmlns', None)
        namespaces = {}
        for x in tuple(elem.attrib):
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@ -462,7 +462,7 @@ class Scheduler(QObject):
            delta = timedelta(days=self.oldest)
            try:
                ids = list(self.db.tags_older_than(_('News'),
-                    delta))
+                    delta, must_have_authors=['calibre']))
            except:
                # Happens if library is being switched
                ids = []
--- a/src/calibre/gui2/dialogs/scheduler.ui
+++ b/src/calibre/gui2/dialogs/scheduler.ui
@ -362,7 +362,7 @@
     <item>
      <widget class="QLabel" name="label_7">
       <property name="text">
-        <string>&amp;Delete downloaded news older than:</string>
+        <string>Delete downloaded news &amp;older than:</string>
       </property>
       <property name="buddy">
        <cstring>old_news</cstring>
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@ -573,6 +573,9 @@ class SeriesIndexEdit(QDoubleSpinBox):
                import traceback
                traceback.print_exc()

+    def reset_original(self):
+        self.original_series_name = self.series_edit.current_val
+
    def break_cycles(self):
        try:
            self.series_edit.currentIndexChanged.disconnect()
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -376,6 +376,7 @@ class MetadataSingleDialogBase(ResizableDialog):
        if not mi.is_null('series') and mi.series.strip():
            self.series.current_val = mi.series
            if mi.series_index is not None:
+                self.series_index.reset_original()
                self.series_index.current_val = float(mi.series_index)
        if not mi.is_null('languages'):
            langs = [canonicalize_lang(x) for x in mi.languages]
--- a/src/calibre/gui2/preferences/main.py
+++ b/src/calibre/gui2/preferences/main.py
@ -325,6 +325,7 @@ class Preferences(QMainWindow):
            return
        rc = self.showing_widget.restart_critical
        self.committed = True
+        do_restart = False
        if must_restart:
            self.must_restart = True
            msg = _('Some of the changes you made require a restart.'
@ -335,12 +336,24 @@ class Preferences(QMainWindow):
                        'set any more preferences, until you restart.')


-            warning_dialog(self, _('Restart needed'), msg, show=True,
+            d = warning_dialog(self, _('Restart needed'), msg,
                    show_copy_button=False)
+            b = d.bb.addButton(_('Restart calibre now'), d.bb.AcceptRole)
+            b.setIcon(QIcon(I('lt.png')))
+            d.do_restart = False
+            def rf():
+                d.do_restart = True
+            b.clicked.connect(rf)
+            d.set_details('')
+            d.exec_()
+            b.clicked.disconnect()
+            do_restart = d.do_restart
        self.showing_widget.refresh_gui(self.gui)
        self.hide_plugin()
-        if self.close_after_initial or (must_restart and rc):
+        if self.close_after_initial or (must_restart and rc) or do_restart:
            self.close()
+        if do_restart:
+            self.gui.quit(restart=True)


    def cancel(self, *args):
--- a/src/calibre/gui2/viewer/javascript.py
+++ b/src/calibre/gui2/viewer/javascript.py
@ -73,6 +73,9 @@ class JavaScriptLoader(object):
            src = self.get(x)
            evaljs(src)

+        if not lang:
+            lang = 'en'
+
        def lang_name(l):
            l = l.lower()
            l = lang_as_iso639_1(l)
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -40,6 +40,7 @@ from calibre.utils.magick.draw import save_cover_data_to
 from calibre.utils.recycle_bin import delete_file, delete_tree
 from calibre.utils.formatter_functions import load_user_template_functions
 from calibre.db.errors import NoSuchFormat
+from calibre.db.lazy import FormatMetadata, FormatsList
 from calibre.utils.localization import (canonicalize_lang,
        calibre_langcode_to_name)

@ -81,7 +82,6 @@ class Tag(object):
    def __repr__(self):
        return str(self)

-
 class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    '''
    An ebook metadata database that stores references to ebook files on disk.
@ -170,6 +170,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        except:
            traceback.print_exc()
        self.field_metadata = FieldMetadata()
+        self.format_filename_cache = defaultdict(dict)
        self._library_id_ = None
        # Create the lock to be used to guard access to the metadata writer
        # queues. This must be an RLock, not a Lock
@ -310,6 +311,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        if not self.is_second_db:
            load_user_template_functions(self.prefs.get('user_template_functions', []))

+        # Load the format filename cache
+        self.format_filename_cache = defaultdict(dict)
+        for book_id, fmt, name in self.conn.get(
+                'SELECT book,format,name FROM data'):
+            self.format_filename_cache[book_id][fmt.upper() if fmt else ''] = name
+
        self.conn.executescript('''
        DROP TRIGGER IF EXISTS author_insert_trg;
        CREATE TEMP TRIGGER author_insert_trg
@ -599,7 +606,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        fname = self.construct_file_name(id)
        changed = False
        for format in formats:
-            name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+            name = self.format_filename_cache[id].get(format.upper(), None)
            if name and name != fname:
                changed = True
                break
@ -944,14 +951,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            good_formats = None
        else:
            formats = sorted(formats.split(','))
-            good_formats = []
-            for f in formats:
-                try:
-                    mi.format_metadata[f] = self.format_metadata(id, f)
-                except:
-                    pass
-                else:
-                    good_formats.append(f)
+            mi.format_metadata = FormatMetadata(self, id, formats)
+            good_formats = FormatsList(formats, mi.format_metadata)
        mi.formats = good_formats
        tags = row[fm['tags']]
        if tags:
@ -1145,12 +1146,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):

    def format_files(self, index, index_is_id=False):
        id = index if index_is_id else self.id(index)
-        try:
-            formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,))
-            formats = map(lambda x:(x[0], x[1]), formats)
-            return formats
-        except:
-            return []
+        return [(v, k) for k, v in self.format_filename_cache[id].iteritems()]

    def formats(self, index, index_is_id=False, verify_formats=True):
        ''' Return available formats as a comma separated list or None if there are no available formats '''
@ -1236,7 +1232,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        '''
        id = index if index_is_id else self.id(index)
        try:
-            name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+            name = self.format_filename_cache[id][format.upper()]
        except:
            return None
        if name:
@ -1333,11 +1329,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    def add_format(self, index, format, stream, index_is_id=False, path=None,
            notify=True, replace=True):
        id = index if index_is_id else self.id(index)
-        if format:
-            self.format_metadata_cache[id].pop(format.upper(), None)
+        if not format: format = ''
+        self.format_metadata_cache[id].pop(format.upper(), None)
+        name = self.format_filename_cache[id].get(format.upper(), None)
        if path is None:
            path = os.path.join(self.library_path, self.path(id, index_is_id=True))
-        name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
        if name and not replace:
            return False
        name = self.construct_file_name(id)
@ -1355,6 +1351,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.conn.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
                          (id, format.upper(), size, name))
        self.conn.commit()
+        self.format_filename_cache[id][format.upper()] = name
        self.refresh_ids([id])
        if notify:
            self.notify('metadata', [id])
@ -1402,9 +1399,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    def remove_format(self, index, format, index_is_id=False, notify=True,
                      commit=True, db_only=False):
        id = index if index_is_id else self.id(index)
-        if format:
-            self.format_metadata_cache[id].pop(format.upper(), None)
-        name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
+        if not format: format = ''
+        self.format_metadata_cache[id].pop(format.upper(), None)
+        name = self.format_filename_cache[id].pop(format.upper(), None)
        if name:
            if not db_only:
                try:
@ -1925,7 +1922,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):

    ############# End get_categories

-    def tags_older_than(self, tag, delta, must_have_tag=None):
+    def tags_older_than(self, tag, delta, must_have_tag=None,
+            must_have_authors=None):
        '''
        Return the ids of all books having the tag ``tag`` that are older than
        than the specified time. tag comparison is case insensitive.
@ -1934,6 +1932,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        the tag are returned.
        :param must_have_tag: If not None the list of matches will be
        restricted to books that have this tag
+        :param must_have_authors: A list of authors. If not None the list of
+        matches will be restricted to books that have these authors (case
+        insensitive).
        '''
        tag = tag.lower().strip()
        mht = must_have_tag.lower().strip() if must_have_tag else None
@ -1941,9 +1942,18 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        tindex = self.FIELD_MAP['timestamp']
        gindex = self.FIELD_MAP['tags']
        iindex = self.FIELD_MAP['id']
+        aindex = self.FIELD_MAP['authors']
+        mah = must_have_authors
+        if mah is not None:
+            mah = [x.replace(',', '|').lower() for x in mah]
+            mah = ','.join(mah)
        for r in self.data._data:
            if r is not None:
                if delta is None or (now - r[tindex]) > delta:
+                    if mah:
+                        authors = r[aindex] or ''
+                        if authors.lower() != mah:
+                            continue
                    tags = r[gindex]
                    if tags:
                        tags = [x.strip() for x in tags.lower().split(',')]
@ -3128,6 +3138,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        stream.seek(0)
        mi = get_metadata(stream, format, use_libprs_metadata=False,
                force_read_metadata=True)
+        # Force the author to calibre as the auto delete of old news checks for
+        # both the author==calibre and the tag News
+        mi.authors = ['calibre']
        stream.seek(0)
        if mi.series_index is None:
            mi.series_index = self.get_next_series_num_for(mi.series)
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@ -497,7 +497,8 @@ class BrowseServer(object):
                        xml(s, True),
                        xml(_('Loading, please wait'))+'&hellip;',
                        unicode(c),
-                        xml(u'/browse/category_group/%s/%s'%(category,
+                        xml(u'/browse/category_group/%s/%s'%(
+                            hexlify(category.encode('utf-8')),
                            hexlify(s.encode('utf-8'))), True),
                        self.opts.url_prefix)
                    for s, c in category_groups.items()]
@ -531,6 +532,13 @@ class BrowseServer(object):
            sort = None
        if sort not in ('rating', 'name', 'popularity'):
            sort = 'name'
+        try:
+            category = unhexlify(category)
+            if isbytestring(category):
+                category = category.decode('utf-8')
+        except:
+            raise cherrypy.HTTPError(404, 'invalid category')
+
        categories = self.categories_cache()
        if category not in categories:
            raise cherrypy.HTTPError(404, 'category not found')
--- a/src/calibre/manual/images/bookmark.png
+++ b/src/calibre/manual/images/bookmark.png
--- a/src/calibre/manual/images/pref_button.png
+++ b/src/calibre/manual/images/pref_button.png
--- a/src/calibre/manual/images/ref_mode_button.png
+++ b/src/calibre/manual/images/ref_mode_button.png
--- a/src/calibre/translations/af.po
+++ b/src/calibre/translations/af.po
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/ast.po
+++ b/src/calibre/translations/ast.po
--- a/src/calibre/translations/az.po
+++ b/src/calibre/translations/az.po
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/bn.po
+++ b/src/calibre/translations/bn.po
--- a/src/calibre/translations/br.po
+++ b/src/calibre/translations/br.po
--- a/src/calibre/translations/bs.po
+++ b/src/calibre/translations/bs.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/da.po
+++ b/src/calibre/translations/da.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/en_AU.po
+++ b/src/calibre/translations/en_AU.po
--- a/src/calibre/translations/en_CA.po
+++ b/src/calibre/translations/en_CA.po
--- a/src/calibre/translations/en_GB.po
+++ b/src/calibre/translations/en_GB.po
--- a/src/calibre/translations/eo.po
+++ b/src/calibre/translations/eo.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/et.po
+++ b/src/calibre/translations/et.po
--- a/src/calibre/translations/eu.po
+++ b/src/calibre/translations/eu.po
--- a/src/calibre/translations/fa.po
+++ b/src/calibre/translations/fa.po
--- a/src/calibre/translations/fi.po
+++ b/src/calibre/translations/fi.po
--- a/src/calibre/translations/fo.po
+++ b/src/calibre/translations/fo.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/gu.po
+++ b/src/calibre/translations/gu.po
--- a/src/calibre/translations/he.po
+++ b/src/calibre/translations/he.po
--- a/src/calibre/translations/hi.po
+++ b/src/calibre/translations/hi.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/id.po
+++ b/src/calibre/translations/id.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/ja.po
+++ b/src/calibre/translations/ja.po
--- a/src/calibre/translations/kn.po
+++ b/src/calibre/translations/kn.po
--- a/src/calibre/translations/ko.po
+++ b/src/calibre/translations/ko.po
--- a/src/calibre/translations/ku.po
+++ b/src/calibre/translations/ku.po
--- a/src/calibre/translations/lt.po
+++ b/src/calibre/translations/lt.po
--- a/src/calibre/translations/ltg.po
+++ b/src/calibre/translations/ltg.po
--- a/src/calibre/translations/lv.po
+++ b/src/calibre/translations/lv.po
--- a/src/calibre/translations/mk.po
+++ b/src/calibre/translations/mk.po
--- a/src/calibre/translations/ml.po
+++ b/src/calibre/translations/ml.po
--- a/src/calibre/translations/mr.po
+++ b/src/calibre/translations/mr.po
--- a/src/calibre/translations/ms.po
+++ b/src/calibre/translations/ms.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/nn.po
+++ b/src/calibre/translations/nn.po
--- a/src/calibre/translations/oc.po
+++ b/src/calibre/translations/oc.po
--- a/src/calibre/translations/pa.po
+++ b/src/calibre/translations/pa.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/pt_BR.po
+++ b/src/calibre/translations/pt_BR.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sc.po
+++ b/src/calibre/translations/sc.po
--- a/src/calibre/translations/si.po
+++ b/src/calibre/translations/si.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sq.po
+++ b/src/calibre/translations/sq.po
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/Show More
+++ b/Show More