KG updates

2025-07-09 03:04:10 -04:00 · 2011-03-25 03:56:47 -07:00 · 2011-03-25 03:56:47 -07:00 · 9af79ba225
commit 9af79ba225
parent c1d7c0acbc 25a4ce17c6
205 changed files with 34277 additions and 27140 deletions
--- a/recipes/caijing.recipe
+++ b/recipes/caijing.recipe
@ -0,0 +1,79 @@
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class Caijing(BasicNewsRecipe):
+
+    title       = 'Caijing Magazine'
+    __author__  = 'Eric Chen'
+
+    description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
+                 Magazine has firmly established itself as a news authority and leading voice for
+                 business and financial issues in China.
+                 CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
+                 developments and policy changes, as well as major events in the capital markets. It also
+                 offers a broad international perspective through first-hand reporting on international
+                 political and economic issues.
+                 CAIJING Magazine is China's most widely read business and finance magazine, with a
+                 circulation of 225,000 per issue. It boasts top-level readers from government, business
+                 and academic circles. '''
+    language = 'zh'
+    category = 'news, China'
+    encoding = 'UTF-8'
+    timefmt = ' [%a, %d %b, %Y]'
+    needs_subscription = True
+
+    remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
+        'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
+        'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
+                dict(name=['script', 'noscript', 'style'])]
+    no_stylesheets = True
+    remove_javascript = True
+    current_issue_url = ""
+    current_issue_cover = ""
+
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://service.caijing.com.cn/usermanage/login')
+            br.select_form(name='mainLoginForm')
+            br['username'] = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        articles = []
+        soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
+        div = soup0.find('div', attrs={'class':'fmcon'})
+        link = div.find('a', href=True)
+        current_issue_url = link['href']
+
+        soup = self.index_to_soup(current_issue_url)
+
+        for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
+            if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
+                self.current_issue_cover = div_cover['src']
+
+        feeds = []
+        for section in soup.findAll('div', attrs={'class':'cebd'}):
+            section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
+            articles = []
+            for post in section.findAll('a', href=True):
+                if re.search('\d{4}-\d{2}-\d{2}', post['href']):
+                        date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
+                id = re.search('\d{9}', post['href']).group(0)
+                url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
+                url = url + id + '&time=' + date + '&cl=106&page=all'
+
+                title = self.tag_to_string(post)
+                articles.append({'title':title, 'url':url, 'date':date})
+
+            if articles:
+                feeds.append((section_title, articles))
+        return feeds
+
+    def get_cover_url(self):
+        return self.current_issue_cover
+
--- a/recipes/chr_mon.recipe
+++ b/recipes/chr_mon.recipe
@ -8,13 +8,13 @@ __description__ = 'Providing context and clarity on national and international n

 '''csmonitor.com'''

-
 import re
 from calibre.web.feeds.news import BasicNewsRecipe

+
 class ChristianScienceMonitor(BasicNewsRecipe):

-    author        = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini'
+    __author__    = 'Kovid Goyal'
    description   = 'Providing context and clarity on national and international news, peoples and cultures'

    cover_url      = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
@ -34,6 +34,49 @@ class ChristianScienceMonitor(BasicNewsRecipe):
    remove_javascript     = True
    no_stylesheets = True

+    def append_page(self, soup, appendtag, position):
+        nav = soup.find('div',attrs={'class':'navigation'})
+        if nav:
+            pager = nav.findAll('a')
+            for part in pager:
+                if 'Next' in part:
+                    nexturl = ('http://www.csmonitor.com' +
+                           re.findall(r'href="(.*?)"', str(part))[0])
+                    soup2 = self.index_to_soup(nexturl)
+                    texttag = soup2.find('div',
+                                 attrs={'class': re.compile('list-article-.*')})
+                    trash_c = soup2.findAll(attrs={'class': 'list-description'})
+                    trash_h = soup2.h1
+                    for tc in trash_c: tc.extract()
+                    trash_h.extract()
+
+                    newpos = len(texttag.contents)
+                    self.append_page(soup2, texttag, newpos)
+                    texttag.extract()
+                    appendtag.insert(position, texttag)
+
+    def preprocess_html(self, soup):
+        PRINT_RE = re.compile(r'/layout/set/print/content/view/print/[0-9]*')
+        html = str(soup)
+        try:
+            print_found = PRINT_RE.findall(html)
+        except Exception:
+            pass
+        if print_found:
+            print_url = 'http://www.csmonitor.com' + print_found[0]
+            print_soup = self.index_to_soup(print_url)
+        else:
+            self.append_page(soup, soup.body, 3)
+
+            trash_a = soup.findAll(attrs={'class': re.compile('navigation.*')})
+            trash_b = soup.findAll(attrs={'style': re.compile('.*')})
+            trash_d = soup.findAll(attrs={'class': 'sByline'})
+            for ta in trash_a: ta.extract()
+            for tb in trash_b: tb.extract()
+            for td in trash_d: td.extract()
+
+            print_soup = soup
+        return print_soup

    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
        [
@ -43,7 +86,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
        (r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
              lambda match : '</body>'),
        ]]
-
    extra_css      = '''
                        h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
                        .sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
@ -56,10 +98,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                        #main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
                        #photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
                        span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
-                        p#dateline{color:#444444 ;  font-family:Arial,Helvetica,sans-serif ; font-style:italic;}
-                        '''
-    feeds          = [
-                        (u'Top Stories' , u'http://rss.csmonitor.com/feeds/top'),
+                        p#dateline{color:#444444 ;  font-family:Arial,Helvetica,sans-serif ; font-style:italic;} '''
+
+    feeds          = [(u'Top Stories', u'http://rss.csmonitor.com/feeds/top'),
                        (u'World' , u'http://rss.csmonitor.com/feeds/world'),
                        (u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
                        (u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
@ -74,9 +115,7 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                        (u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
                     ]

-    keep_only_tags = [
-                        dict(name='div', attrs={'id':'mainColumn'}),
-                        ]
+    keep_only_tags = [dict(name='div', attrs={'id':'mainColumn'}), ]

    remove_tags    = [
                        dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
@ -86,7 +125,10 @@ class ChristianScienceMonitor(BasicNewsRecipe):
                                'hide', 'podBrdr']}),
                        dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
                        dict(name='form', attrs={'id':[ 'commentform']}) ,
+          dict(name='div', attrs={'class': ['ui-comments']})
                    ]

-    remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})]
-
+    remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']}),
+              dict(name='div', attrs={'class': [re.compile('navigation.*')]}),
+              dict(name='div', attrs={'style': [re.compile('.*')]})
+                        ]
--- a/recipes/h1.recipe
+++ b/recipes/h1.recipe
@ -1,33 +1,51 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-from __future__ import with_statement
+# -*- coding: utf-8 -*-
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe

-__license__   = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class H168(BasicNewsRecipe):
-     title          = u'168\xf3ra'
-     oldest_article = 4
-     max_articles_per_feed = 50
+class hu168ora(BasicNewsRecipe):
+    title                 = u'168 óra'
+    __author__            = u'István Papp'
+    description           = u'A 168 óra friss hírei'
+    timefmt               = ' [%Y. %b. %d., %a.]'
+    oldest_article        = 7
    language              = 'hu'

-     __author__ = 'Ezmegaz'
-
-     feeds          = [(u'Itthon',
- u'http://www.168ora.hu/static/rss/cikkek_itthon.xml'), (u'Gl\xf3busz',
- u'http://www.168ora.hu/static/rss/cikkek_globusz.xml'), (u'Punch',
- u'http://www.168ora.hu/static/rss/cikkek_punch.xml'), (u'Arte',
- u'http://www.168ora.hu/static/rss/cikkek_arte.xml'), (u'Buxa',
- u'http://www.168ora.hu/static/rss/cikkek_buxa.xml'), (u'Sebess\xe9g',
- u'http://www.168ora.hu/static/rss/cikkek_sebesseg.xml'), (u'Tud\xe1s',
- u'http://www.168ora.hu/static/rss/cikkek_tudas.xml'), (u'Sport',
- u'http://www.168ora.hu/static/rss/cikkek_sport.xml'), (u'V\xe9lem\xe9ny',
- u'http://www.168ora.hu/static/rss/cikkek_velemeny.xml'), (u'Dolce Vita',
- u'http://www.168ora.hu/static/rss/cikkek_dolcevita.xml'), (u'R\xe1di\xf3',
- u'http://www.168ora.hu/static/rss/radio.xml')]
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    publisher             = u'Telegráf Kiadó'
+    category              = u'news, hírek, 168'
+    extra_css             = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif }'
+    preprocess_regexps    = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    keep_only_tags        = [
+                              dict(id='cikk_fejlec')
+                             ,dict(id='cikk_torzs')
+                            ]
+#    remove_tags_before    = dict(id='cikk_fejlec')
+#    remove_tags_after     = dict(id='szoveg')
+    remove_tags           = [
+                              dict(id='box_toolbar')
+                             ,dict(id='text')
+                            ]
+    remove_javascript     = True
+    remove_empty_feeds    = True


+    feeds = [
+              (u'Itthon', u'http://www.168ora.hu/static/rss/cikkek_itthon.xml')
+             ,(u'Glóbusz', u'http://www.168ora.hu/static/rss/cikkek_globusz.xml')
+             ,(u'Punch', u'http://www.168ora.hu/static/rss/cikkek_punch.xml')
+             ,(u'Arte', u'http://www.168ora.hu/static/rss/cikkek_arte.xml')
+             ,(u'Buxa', u'http://www.168ora.hu/static/rss/cikkek_buxa.xml')
+             ,(u'Sebesség', u'http://www.168ora.hu/static/rss/cikkek_sebesseg.xml')
+             ,(u'Tudás', u'http://www.168ora.hu/static/rss/cikkek_tudas.xml')
+             ,(u'Sport', u'http://www.168ora.hu/static/rss/cikkek_sport.xml')
+             ,(u'Vélemény', u'http://www.168ora.hu/static/rss/cikkek_velemeny.xml')
+             ,(u'Dolce Vita', u'http://www.168ora.hu/static/rss/cikkek_dolcevita.xml')
+#             ,(u'Rádió', u'http://www.168ora.hu/static/rss/radio.xml')
+            ]

+    def print_version(self, url):
+        url += '?print=1'
+        return url
--- a/recipes/handelsblatt.recipe
+++ b/recipes/handelsblatt.recipe
@ -1,4 +1,3 @@
-import re

 from calibre.web.feeds.news import BasicNewsRecipe

@ -10,10 +9,12 @@ class Handelsblatt(BasicNewsRecipe):
    no_stylesheets = True
    cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
    language = 'de'
-    keep_only_tags = []
-    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
-    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
-    remove_tags    = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
+  #  keep_only_tags = []
+    keep_only_tags = (dict(name = 'div', attrs = {'class': ['hcf-detail-abstract hcf-teaser ajaxify','hcf-detail','hcf-author-wrapper']}))
+   # keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
+    remove_tags    = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})
+                      ,dict(name='ul' , attrs={'class':['hcf-detail-tools']})
+    									]

    feeds          = [
                        (u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
@ -28,14 +29,16 @@ class Handelsblatt(BasicNewsRecipe):
                        (u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
                     ]
    extra_css = '''
-        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
-        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
+        .hcf-headline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:x-large;}
+        .hcf-overline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:x-large;}
+        .hcf-exclusive {font-family:Arial,Helvetica,sans-serif; font-style:italic;font-weight:bold; margin-right:5pt;}
+        p{font-family:Arial,Helvetica,sans-serif;}
+        .hcf-location-mark{font-weight:bold; margin-right:5pt;}
+        .MsoNormal{font-family:Helvetica,Arial,sans-serif;}
+        .hcf-author-wrapper{font-style:italic;}
+        .hcf-article-date{font-size:x-small;}
+        .hcf-caption {font-style:italic;font-size:small;}
+        img {align:left;}
        '''

-    def print_version(self, url):
-         m = re.search('(?<=;)[0-9]*', url)
-         return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'
-

--- a/recipes/hvg.recipe
+++ b/recipes/hvg.recipe
@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class HVG(BasicNewsRecipe):
+    title                 = 'HVG.HU'
+    __author__            = u'István Papp'
+    description           = u'Friss hírek a HVG-től'
+    timefmt               = ' [%Y. %b. %d., %a.]'
+    oldest_article        = 4
+    language              = 'hu'
+
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    publisher             = 'HVG Online'
+    category              = u'news, hírek, hvg'
+    extra_css             = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    preprocess_regexps    = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    remove_tags_before    = dict(id='pg-content')
+    remove_javascript     = True
+    remove_empty_feeds    = True
+
+    feeds = [
+              (u'Itthon', u'http://hvg.hu/rss/itthon')
+             ,(u'Világ', u'http://hvg.hu/rss/vilag')
+             ,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
+             ,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
+             ,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
+             ,(u'Karrier', u'http://hvg.hu/rss/karrier')
+             ,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
+             ,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
+             ,(u'Kultúra', u'http://hvg.hu/rss/kultura')
+             ,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
+             ,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
+             ,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
+             ,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
+             ,(u'Sport', u'http://hvg.hu/rss/sport')
+            ]
+
+    def print_version(self, url):
+        return url.replace ('#rss', '/print')
+
--- a/recipes/lwn_weekly.recipe
+++ b/recipes/lwn_weekly.recipe
@ -23,6 +23,11 @@ class WeeklyLWN(BasicNewsRecipe):
    remove_tags_after = dict(attrs={'class':'ArticleText'})
    remove_tags = [dict(name=['h2', 'form'])]

+    preprocess_regexps = [
+        # Remove the <hr> and "Log in to post comments"
+        (re.compile(r'<hr.*?comments[)]', re.DOTALL), lambda m: ''),
+    ]
+
    conversion_options = { 'linearize_tables' : True }

    oldest_article = 7.0
@ -40,15 +45,15 @@ class WeeklyLWN(BasicNewsRecipe):

    def parse_index(self):
        if self.username is not None and self.password is not None:
-            index_url = 'http://lwn.net/current/bigpage'
+            index_url = 'http://lwn.net/current/bigpage?format=printable'
        else:
-            index_url = 'http://lwn.net/free/bigpage'
+            index_url = 'http://lwn.net/free/bigpage?format=printable'
        soup = self.index_to_soup(index_url)
        body = soup.body

        articles = {}
        ans = []
-        url_re = re.compile('^http://lwn.net/Articles/')
+        url_re = re.compile('^/Articles/')

        while True:
            tag_title = body.findNext(name='p', attrs={'class':'SummaryHL'})
@ -91,7 +96,7 @@ class WeeklyLWN(BasicNewsRecipe):

            article = dict(
                title=tag_title.string,
-                url=tag_url['href'].split('#')[0],
+                url= 'http://lwn.net' + tag_url['href'].split('#')[0] + '?format=printable',
                description='', content='', date='')
            articles[section].append(article)

--- a/recipes/planet_kde.recipe
+++ b/recipes/planet_kde.recipe
@ -0,0 +1,11 @@
+from calibre.web.feeds.news import AutomaticNewsRecipe
+
+class BasicUserRecipe1300864518(AutomaticNewsRecipe):
+    title          = u'KDE News'
+    language = 'en'
+    __author__ = 'Riccardo Iaconelli'
+    oldest_article = 10
+    max_articles_per_feed = 100
+
+    feeds          = [(u'Planet KDE', u'http://planetkde.org/rss20.xml'), (u'Got the Dot?', u'http://dot.kde.org/rss.xml')]
+
--- a/recipes/wash_post.recipe
+++ b/recipes/wash_post.recipe
@ -1,4 +1,3 @@
-import re
 from calibre.web.feeds.news import BasicNewsRecipe


@ -6,55 +5,48 @@ class WashingtonPost(BasicNewsRecipe):

    title = 'Washington Post'
    description = 'US political news'
-    __author__ = 'Kovid Goyal and Sujata Raman'
+    __author__ = 'Kovid Goyal'
    use_embedded_content   = False
    max_articles_per_feed = 20
    language = 'en'
+    encoding = 'utf-8'


    remove_javascript = True
    no_stylesheets = True

-    extra_css       = '''
-                        #articleCopyright { font-family:Arial,helvetica,sans-serif ; font-weight:bold ; font-size:x-small ;}
-                        p { font-family:"Times New Roman",times,serif ; font-weight:normal ; font-size:small ;}
-                        body{font-family:arial,helvetica,sans-serif}
-                            '''
-
-    feeds = [   ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
-                ('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
-                ('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
-                ('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
-                ('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
-                ('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
-                ('Health', 'http://www.washingtonpost.com/wp-dyn/rss/health/index.xml'),
-                ('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
-                ('Style',
-                     'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
-                ('NFL Sports',
-                     'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
-                ('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
-                ('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
+    feeds = [
+            ('Politics', 'http://www.washingtonpost.com/rss/politics'),
+            ('Nation', 'http://www.washingtonpost.com/rss/national'),
+            ('World', 'http://www.washingtonpost.com/rss/world'),
+            ('Business', 'http://www.washingtonpost.com/rss/business'),
+            ('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'),
+            ('Sports', 'http://www.washingtonpost.com/rss/sports'),
+            ('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'),
+            ('Opinions', 'http://www.washingtonpost.com/rss/opinions'),
+            ('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'),
+            ('Local', 'http://www.washingtonpost.com/rss/local'),
+            ('Investigations',
+                'http://www.washingtonpost.com/rss/investigations'),
    ]

-    remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
+    remove_tags = [
+            {'class':lambda x: x and 'article-toolbar' in x},
+            {'class':lambda x: x and 'quick-comments' in x},
+            {'class':lambda x: x and 'tweet' in x},
+            {'class':lambda x: x and 'article-related' in x},
+            {'class':lambda x: x and 'hidden' in x.split()},
+            {'class':lambda x: x and 'also-read' in x.split()},
+            {'class':lambda x: x and 'partners-content' in x.split()},
+            {'class':['module share', 'module ads', 'comment-vars', 'hidden',
+                'share-icons-wrap', 'comments']},
+            {'id':['right-rail']},

+            ]
+    keep_only_tags = dict(id=['content', 'article'])

-    def get_article_url(self, article):
-        return article.get('guid', article.get('link', None))

    def print_version(self, url):
-        return url.rpartition('.')[0] + '_pf.html'
+        url = url.rpartition('?')[0]
+        return url.replace('_story.html', '_singlePage.html')

-    def postprocess_html(self, soup, first):
-        for div in soup.findAll(name='div', style=re.compile('margin')):
-            div['style'] = ''
-        return soup
-
-    def preprocess_html(self, soup):
-        for tag in soup.findAll('font'):
-            if tag.has_key('size'):
-                if tag['size'] == '+2':
-                    if tag.b:
-                        return soup
-        return None
--- a/session.vim
+++ b/session.vim
@ -18,6 +18,6 @@ def recipe_title_callback(raw):
    return eval(raw.decode('utf-8'))

 vipy.session.add_content_browser('.r', ',r', 'Recipe',
-    vipy.session.glob_based_iterator(os.path.join(project_dir, 'resources', 'recipes', '*.recipe')),
+    vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
    vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
 EOFPY
--- a/setup/install.py
+++ b/setup/install.py
@ -12,7 +12,7 @@ from setup import Command, islinux, isfreebsd, basenames, modules, functions, \
        __appname__, __version__

 HEADER = '''\
-#!/usr/bin/env python
+#!/usr/bin/env python2

 """
 This is the standard runscript for all of calibre's tools.
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -99,7 +99,7 @@ def sanitize_file_name_unicode(name, substitute='_'):
    **WARNING:** This function also replaces path separators, so only pass file names
    and not full paths to it.
    '''
-    if not isinstance(name, unicode):
+    if isbytestring(name):
        return sanitize_file_name(name, substitute=substitute, as_unicode=True)
    chars = [substitute if c in _filename_sanitize_unicode else c for c in
            name]
@ -115,6 +115,14 @@ def sanitize_file_name_unicode(name, substitute='_'):
        one = '_' + one[1:]
    return one

+def sanitize_file_name2(name, substitute='_'):
+    '''
+    Sanitize filenames removing invalid chars. Keeps unicode names as unicode
+    and bytestrings as bytestrings
+    '''
+    if isbytestring(name):
+        return sanitize_file_name(name, substitute=substitute)
+    return sanitize_file_name_unicode(name, substitute=substitute)

 def prints(*args, **kwargs):
    '''
@ -162,8 +170,8 @@ def prints(*args, **kwargs):
        except:
            file.write(repr(arg))
        if i != len(args)-1:
-            file.write(sep)
-    file.write(end)
+            file.write(bytes(sep))
+    file.write(bytes(end))

 class CommandLineError(Exception):
    pass
@ -270,12 +278,15 @@ def get_parsed_proxy(typ='http', debug=True):

 def random_user_agent():
    choices = [
-        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
-        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
-        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
-        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)'
-        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19'
-        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
+        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en; rv:1.8.1.14) Gecko/20080409 Camino/1.6 (like Firefox/2.0.0.14)',
+        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.0.1) Gecko/20060118 Camino/1.0b2+',
+        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3',
+        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5',
+        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
    ]
    return choices[random.randint(0, len(choices)-1)]

--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -5,7 +5,7 @@ __appname__   = 'calibre'
 __version__   = '0.7.50'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

-import re
+import re, importlib
 _ver = __version__.split('.')
 _ver = [int(re.search(r'(\d+)', x).group(1)) for x in _ver]
 numeric_version = tuple(_ver)
@ -33,10 +33,10 @@ try:
 except:
    preferred_encoding = 'utf-8'

-win32event = __import__('win32event') if iswindows else None
-winerror   = __import__('winerror') if iswindows else None
-win32api   = __import__('win32api') if iswindows else None
-fcntl      = None if iswindows else __import__('fcntl')
+win32event = importlib.import_module('win32event') if iswindows else None
+winerror   = importlib.import_module('winerror') if iswindows else None
+win32api   = importlib.import_module('win32api') if iswindows else None
+fcntl      = None if iswindows else importlib.import_module('fcntl')

 filesystem_encoding = sys.getfilesystemencoding()
 if filesystem_encoding is None: filesystem_encoding = 'utf-8'
@ -74,8 +74,8 @@ if plugins is None:
                    (['winutil'] if iswindows else []) + \
                    (['usbobserver'] if isosx else []):
            try:
-                p, err = __import__(plugin), ''
-            except Exception, err:
+                p, err = importlib.import_module(plugin), ''
+            except Exception as err:
                p = None
                err = str(err)
            plugins[plugin] = (p, err)
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import os, sys, zipfile
+import os, sys, zipfile, importlib

 from calibre.constants import numeric_version
 from calibre.ptempfile import PersistentTemporaryFile
@ -517,7 +517,7 @@ class InterfaceActionBase(Plugin): # {{{
        This method must return the actual interface action plugin object.
        '''
        mod, cls = self.actual_plugin.split(':')
-        return getattr(__import__(mod, fromlist=['1'], level=0), cls)(gui,
+        return getattr(importlib.import_module(mod), cls)(gui,
                self.site_customization)

 # }}}
@ -575,7 +575,7 @@ class PreferencesPlugin(Plugin): # {{{
        base, _, wc = self.config_widget.partition(':')
        if not wc:
            wc = 'ConfigWidget'
-        base = __import__(base, fromlist=[1])
+        base = importlib.import_module(base)
        widget = getattr(base, wc)
        return widget(parent)

--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -1032,7 +1032,8 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
 # New metadata download plugins {{{
 from calibre.ebooks.metadata.sources.google import GoogleBooks
 from calibre.ebooks.metadata.sources.amazon import Amazon
+from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary

-plugins += [GoogleBooks, Amazon]
+plugins += [GoogleBooks, Amazon, OpenLibrary]

 # }}}
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -58,12 +58,13 @@ class ANDROID(USBMS):
            0x413c : { 0xb007 : [0x0100, 0x0224]},

            # LG
-            0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100] },
+            0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },

            # Archos
            0x0e79 : {
                0x1400 : [0x0222, 0x0216],
                0x1408 : [0x0222, 0x0216],
+                0x1411 : [0x216],
                0x1417 : [0x0216],
                0x1419 : [0x0216],
                0x1420 : [0x0216],
@ -92,14 +93,14 @@ class ANDROID(USBMS):

    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
-            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC']
+            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
-            '7', 'A956', 'A955', 'A43']
+            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7']
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -748,11 +748,13 @@ class ITUNES(DriverBase):

        # Display a dialog recommending using 'Connect to iTunes'
        if not self.settings().extra_customization[self.SKIP_CONNECT_TO_ITUNES_DIALOG]:
-            raise OpenFeedback("The recommended connection method for Apple iDevices " +\
-                               "is to use the 'Connect to iTunes' method described in the <br />" +\
-                               '<a href="http://www.mobileread.com/forums/showthread.php?t=118559">Calibre + Apple iDevices FAQ</a>.<br />' +\
-                               'After following the Quick Start steps outlined in the FAQ, restart calibre.')
-
+            raise OpenFeedback('<p>' + _('Click the "Connect/Share" button and choose'
+                ' "Connect to iTunes" to send books from your calibre library'
+                ' to your Apple iDevice.<p>For more information, see'
+                '<a href="http://www.mobileread.com/forums/showthread.php?t=118559">'
+                'Calibre + Apple iDevices FAQ</a>.<p>'
+                'After following the Quick Start steps outlined in the FAQ, '
+                'restart calibre.'))

        # Confirm/create thumbs archive
        if not os.path.exists(self.cache_dir):
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):

    VENDOR_ID   = [0x0fca]
    PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211]
+    BCD         = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]

    VENDOR_NAME = 'RIM'
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
--- a/src/calibre/devices/prs500/cli/main.py
+++ b/src/calibre/devices/prs500/cli/main.py
@ -282,7 +282,7 @@ def main():
                    outfile = os.path.join(outfile, path[path.rfind("/")+1:])
                try:
                    outfile = open(outfile, "wb")
-                except IOError, e:
+                except IOError as e:
                    print >> sys.stderr, e
                    parser.print_help()
                    return 1
@ -291,13 +291,13 @@ def main():
            elif args[1].startswith("prs500:"):
                try:
                    infile = open(args[0], "rb")
-                except IOError, e:
+                except IOError as e:
                    print >> sys.stderr, e
                    parser.print_help()
                    return 1
                try:
                    dev.put_file(infile, args[1][7:])
-                except PathError, err:
+                except PathError as err:
                    if options.force and 'exists' in str(err):
                        dev.del_file(err.path, False)
                        dev.put_file(infile, args[1][7:])
@ -355,7 +355,7 @@ def main():
            return 1
    except DeviceLocked:
        print >> sys.stderr, "The device is locked. Use the --unlock option"
-    except (ArgumentError, DeviceError), e:
+    except (ArgumentError, DeviceError) as e:
        print >>sys.stderr, e
        return 1
    return 0
--- a/src/calibre/devices/prs500/driver.py
+++ b/src/calibre/devices/prs500/driver.py
@ -177,7 +177,7 @@ class PRS500(DeviceConfig, DevicePlugin):
                        dev.send_validated_command(BeginEndSession(end=True))
                        dev.in_session = False
                    raise
-                except USBError, err:
+                except USBError as err:
                    if "No such device" in str(err):
                        raise DeviceError()
                    elif "Connection timed out" in str(err):
@ -272,7 +272,7 @@ class PRS500(DeviceConfig, DevicePlugin):
                self.bulk_read_max_packet_size = red.MaxPacketSize
                self.bulk_write_max_packet_size = wed.MaxPacketSize
                self.handle.claim_interface(self.INTERFACE_ID)
-            except USBError, err:
+            except USBError as err:
                raise DeviceBusy(str(err))
            # Large timeout as device may still be initializing
            res = self.send_validated_command(GetUSBProtocolVersion(), timeout=20000)
@ -303,7 +303,7 @@ class PRS500(DeviceConfig, DevicePlugin):
            try:
                self.handle.reset()
                self.handle.release_interface(self.INTERFACE_ID)
-            except Exception, err:
+            except Exception as err:
                print >> sys.stderr, err
            self.handle, self.device = None, None
            self.in_session = False
@ -509,7 +509,7 @@ class PRS500(DeviceConfig, DevicePlugin):
                outfile.write("".join(map(chr, packets[0][16:])))
                for i in range(1, len(packets)):
                    outfile.write("".join(map(chr, packets[i])))
-            except IOError, err:
+            except IOError as err:
                self.send_validated_command(FileClose(_id))
                raise ArgumentError("File get operation failed. " + \
                            "Could not write to local location: " + str(err))
@ -656,7 +656,7 @@ class PRS500(DeviceConfig, DevicePlugin):
        dest = None
        try:
            dest = self.path_properties(path, end_session=False)
-        except PathError, err:
+        except PathError as err:
            if "does not exist" in str(err) or "not mounted" in str(err):
                return (False, None)
            else: raise
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -124,11 +124,11 @@ class Device(DeviceConfig, DevicePlugin):
        if not prefix:
            return 0, 0
        prefix = prefix[:-1]
-        win32file = __import__('win32file', globals(), locals(), [], -1)
+        import win32file
        try:
            sectors_per_cluster, bytes_per_sector, free_clusters, total_clusters = \
                win32file.GetDiskFreeSpace(prefix)
-        except Exception, err:
+        except Exception as err:
            if getattr(err, 'args', [None])[0] == 21: # Disk not ready
                time.sleep(3)
                sectors_per_cluster, bytes_per_sector, free_clusters, total_clusters = \
@ -771,7 +771,7 @@ class Device(DeviceConfig, DevicePlugin):
        for d in drives:
            try:
                eject(d)
-            except Exception, e:
+            except Exception as e:
                print 'Udisks eject call for:', d, 'failed:'
                print '\t', e
                failures = True
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -57,7 +57,7 @@ class HTMLRenderer(object):
            buf.open(QBuffer.WriteOnly)
            image.save(buf, 'JPEG')
            self.data = str(ba.data())
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.traceback = traceback.format_exc()
        finally:
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -49,6 +49,8 @@ HEURISTIC_OPTIONS = ['markup_chapter_headings',
                      'dehyphenate', 'renumber_headings',
                      'replace_scene_breaks']

+DEFAULT_TRUE_OPTIONS = HEURISTIC_OPTIONS + ['remove_fake_margins']
+
 def print_help(parser, log):
    help = parser.format_help().encode(preferred_encoding, 'replace')
    log(help)
@ -90,7 +92,7 @@ def option_recommendation_to_cli_option(add_option, rec):
    if opt.long_switch == 'verbose':
        attrs['action'] = 'count'
        attrs.pop('type', '')
-    if opt.name in HEURISTIC_OPTIONS and rec.recommended_value is True:
+    if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
        switches = ['--disable-'+opt.long_switch]
    add_option(Option(*switches, **attrs))

@ -162,6 +164,7 @@ def add_pipeline_options(parser, plumber):
                      'chapter', 'chapter_mark',
                      'prefer_metadata_cover', 'remove_first_image',
                      'insert_metadata', 'page_breaks_before',
+                      'remove_fake_margins',
                  ]
                  ),

--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -304,6 +304,17 @@ OptionRecommendation(name='page_breaks_before',
            'before the specified elements.')
        ),

+OptionRecommendation(name='remove_fake_margins',
+            recommended_value=True, level=OptionRecommendation.LOW,
+            help=_('Some documents specify page margins by '
+                'specifying a left and right margin on each individual '
+                'paragraph. calibre will try to detect and remove these '
+                'margins. Sometimes, this can cause the removal of '
+                'margins that should not have been removed. In this '
+                'case you can disable the removal.')
+        ),
+
+
 OptionRecommendation(name='margin_top',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the top margin in pts. Default is %default. '
@ -988,9 +999,13 @@ OptionRecommendation(name='sr3_replace',
                page_break_on_body=self.output_plugin.file_type in ('mobi',
                    'lit'))
        flattener(self.oeb, self.opts)
+
        self.opts.insert_blank_line = oibl
        self.opts.remove_paragraph_spacing = orps

+        from calibre.ebooks.oeb.transforms.page_margin import RemoveFakeMargins
+        RemoveFakeMargins()(self.oeb, self.log, self.opts)
+
        pr(0.9)
        self.flush()

--- a/src/calibre/ebooks/epub/fix/container.py
+++ b/src/calibre/ebooks/epub/fix/container.py
@ -151,7 +151,7 @@ class Container(object):
        if name in self.mime_map:
            try:
                raw = self._parse(raw, self.mime_map[name])
-            except XMLSyntaxError, err:
+            except XMLSyntaxError as err:
                raise ParseError(name, unicode(err))
        self.cache[name] = raw
        return raw
--- a/src/calibre/ebooks/epub/fix/main.py
+++ b/src/calibre/ebooks/epub/fix/main.py
@ -54,7 +54,7 @@ def main(args=sys.argv):
    epub = os.path.abspath(args[1])
    try:
        run(epub, opts, default_log)
-    except ParseError, err:
+    except ParseError as err:
        default_log.error(unicode(err))
        raise SystemExit(1)

--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -110,7 +110,7 @@ class HTMLFile(object):
        try:
            with open(self.path, 'rb') as f:
                src = f.read()
-        except IOError, err:
+        except IOError as err:
            msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err))
            if level == 0:
                raise IOError(msg)
@ -202,7 +202,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
                        raise IgnoreFile('%s is a binary file'%nf.path, -1)
                    nl.append(nf)
                    flat.append(nf)
-                except IgnoreFile, err:
+                except IgnoreFile as err:
                    rejects.append(link)
                    if not err.doesnt_exist or verbose > 1:
                        print repr(err)
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@ -332,7 +332,7 @@ class HTMLConverter(object):
            soup = BeautifulSoup(raw,
                         convertEntities=BeautifulSoup.XHTML_ENTITIES,
                         markupMassage=nmassage)
-        except ConversionError, err:
+        except ConversionError as err:
            if 'Failed to coerce to unicode' in str(err):
                raw = unicode(raw, 'utf8', 'replace')
                soup = BeautifulSoup(raw,
@ -935,7 +935,7 @@ class HTMLConverter(object):

        try:
            im = PILImage.open(path)
-        except IOError, err:
+        except IOError as err:
            self.log.warning('Unable to process image: %s\n%s'%( original_path, err))
            return
        encoding = detect_encoding(im)
@ -953,7 +953,7 @@ class HTMLConverter(object):
                pt.close()
                self.scaled_images[path] = pt
                return pt.name
-            except (IOError, SystemError), err: # PIL chokes on interlaced PNG images as well a some GIF images
+            except (IOError, SystemError) as err: # PIL chokes on interlaced PNG images as well a some GIF images
                self.log.warning(_('Unable to process image %s. Error: %s')%(path, err))

        if width == None or height == None:
@ -1013,7 +1013,7 @@ class HTMLConverter(object):
        if not self.images.has_key(path):
            try:
                self.images[path] = ImageStream(path, encoding=encoding)
-            except LrsError, err:
+            except LrsError as err:
                self.log.warning(_('Could not process image: %s\n%s')%(
                    original_path, err))
                return
@ -1768,7 +1768,7 @@ class HTMLConverter(object):
                    tag_css = self.tag_css(tag)[0] # Table should not inherit CSS
                    try:
                        self.process_table(tag, tag_css)
-                    except Exception, err:
+                    except Exception as err:
                        self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
                        self.log.exception('')
                        self.log.debug(_('Bad table:\n%s')%unicode(tag)[:300])
@ -1858,7 +1858,7 @@ def process_file(path, options, logger):
                tf.close()
                tim.save(tf.name)
                tpath = tf.name
-            except IOError, err: # PIL sometimes fails, for example on interlaced PNG files
+            except IOError as err: # PIL sometimes fails, for example on interlaced PNG files
                logger.warn(_('Could not read cover image: %s'), err)
                options.cover = None
        else:
--- a/src/calibre/ebooks/markdown/markdown.py
+++ b/src/calibre/ebooks/markdown/markdown.py
@ -34,7 +34,7 @@ License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
 import re, sys, codecs

 from logging import getLogger, StreamHandler, Formatter, \
-                    DEBUG, INFO, WARN, ERROR, CRITICAL
+                    DEBUG, INFO, WARN, CRITICAL


 MESSAGE_THRESHOLD = CRITICAL
@ -242,8 +242,6 @@ class Element:

        if bidi:

-            orig_bidi = self.bidi
-
            if not self.bidi or self.isDocumentElement:
                # Once the bidi is set don't change it (except for doc element)
                self.bidi = bidi
@ -775,7 +773,6 @@ class HtmlPattern (Pattern):

    def handleMatch (self, m, doc):
        rawhtml = m.group(2)
-        inline = True
        place_holder = self.stash.store(rawhtml)
        return doc.createTextNode(place_holder)

@ -1031,7 +1028,6 @@ class BlockGuru:
                      remainder of the original list"""

        items = []
-        item = -1

        i = 0 # to keep track of where we are

@ -1849,22 +1845,7 @@ For lower versions of Python use:
 """ % EXECUTABLE_NAME_FOR_USAGE

 def parse_options():
-
-    try:
-        optparse = __import__("optparse")
-    except:
-        if len(sys.argv) == 2:
-            return {'input': sys.argv[1],
-                    'output': None,
-                    'message_threshold': CRITICAL,
-                    'safe': False,
-                    'extensions': [],
-                    'encoding': None }
-
-        else:
-            print OPTPARSE_WARNING
-            return None
-
+    import optparse
    parser = optparse.OptionParser(usage="%prog INPUTFILE [options]")

    parser.add_option("-f", "--file", dest="filename",
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -108,7 +108,7 @@ def _get_cover_url(br, asin):
    q = 'http://amzn.com/'+asin
    try:
        raw = br.open_novisit(q).read()
-    except Exception, e:
+    except Exception as e:
        if callable(getattr(e, 'getcode', None)) and \
                e.getcode() == 404:
            return None
@ -139,7 +139,7 @@ def get_metadata(br, asin, mi):
    q = 'http://amzn.com/'+asin
    try:
        raw = br.open_novisit(q).read()
-    except Exception, e:
+    except Exception as e:
        if callable(getattr(e, 'getcode', None)) and \
                e.getcode() == 404:
            return False
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@ -33,7 +33,7 @@ class AmazonFr(MetadataSource):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='fr')
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -50,7 +50,7 @@ class AmazonEs(MetadataSource):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='es')
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -67,7 +67,7 @@ class AmazonEn(MetadataSource):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='en')
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -84,7 +84,7 @@ class AmazonDe(MetadataSource):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='de')
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -103,7 +103,7 @@ class Amazon(MetadataSource):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='all')
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -193,7 +193,7 @@ class Query(object):

        try:
            raw = browser.open_novisit(self.urldata, timeout=timeout).read()
-        except Exception, e:
+        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
@ -226,7 +226,7 @@ class Query(object):
                try:
                    urldata = self.urldata + '&page=' + str(i)
                    raw = browser.open_novisit(urldata, timeout=timeout).read()
-                except Exception, e:
+                except Exception as e:
                    continue
                if '<title>404 - ' in raw:
                    continue
@ -413,7 +413,7 @@ class ResultList(list):
    def get_individual_metadata(self, browser, linkdata, verbose):
        try:
            raw = browser.open_novisit(linkdata).read()
-        except Exception, e:
+        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
@ -445,7 +445,7 @@ class ResultList(list):
                # self.clean_entry(entry, invalid_id=inv_ids)
                title = self.get_title(entry)
                authors = self.get_authors(entry)
-            except Exception, e:
+            except Exception as e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -575,6 +575,9 @@ class Metadata(object):
            orig_res = res
            datatype = cmeta['datatype']
            if datatype == 'text' and cmeta['is_multiple']:
+                if cmeta['display'].get('is_names', False):
+                    res = u' & '.join(res)
+                else:
                    res = u', '.join(sorted(res, key=sort_key))
            elif datatype == 'series' and series_with_index:
                if self.get_extra(key) is not None:
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@ -91,7 +91,7 @@ class OpenLibraryCovers(CoverDownload): # {{{
            br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout)
            self.debug('cover for', mi.isbn, 'found')
            ans.set()
-        except Exception, e:
+        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 302:
                self.debug('cover for', mi.isbn, 'found')
                ans.set()
@ -106,7 +106,7 @@ class OpenLibraryCovers(CoverDownload): # {{{
        try:
            ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read()
            result_queue.put((True, ans, 'jpg', self.name))
-        except Exception, e:
+        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                result_queue.put((False, _('ISBN: %s not found')%mi.isbn, '', self.name))
            else:
@ -131,7 +131,7 @@ class AmazonCovers(CoverDownload): # {{{
            get_cover_url(mi.isbn, br)
            self.debug('cover for', mi.isbn, 'found')
            ans.set()
-        except Exception, e:
+        except Exception as e:
            self.debug(e)

    def get_covers(self, mi, result_queue, abort, timeout=5.):
@ -145,7 +145,7 @@ class AmazonCovers(CoverDownload): # {{{
                raise ValueError('No cover found for ISBN: %s'%mi.isbn)
            cover_data = br.open_novisit(url).read()
            result_queue.put((True, cover_data, 'jpg', self.name))
-        except Exception, e:
+        except Exception as e:
            result_queue.put((False, self.exception_to_string(e),
                traceback.format_exc(), self.name))

@ -215,7 +215,7 @@ class DoubanCovers(CoverDownload): # {{{
        try:
            url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY
            src = br.open(url, timeout=timeout).read()
-        except Exception, err:
+        except Exception as err:
            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
                err = Exception(_('Douban.com API timed out. Try again later.'))
            raise err
@ -248,7 +248,7 @@ class DoubanCovers(CoverDownload): # {{{
            if self.get_cover_url(mi.isbn, br, timeout=timeout) != None:
                self.debug('cover for', mi.isbn, 'found')
                ans.set()
-        except Exception, e:
+        except Exception as e:
            self.debug(e)

    def get_covers(self, mi, result_queue, abort, timeout=5.):
@ -259,7 +259,7 @@ class DoubanCovers(CoverDownload): # {{{
            url = self.get_cover_url(mi.isbn, br, timeout=timeout)
            cover_data = br.open_novisit(url).read()
            result_queue.put((True, cover_data, 'jpg', self.name))
-        except Exception, e:
+        except Exception as e:
            result_queue.put((False, self.exception_to_string(e),
                traceback.format_exc(), self.name))
 # }}}
@ -302,4 +302,16 @@ def test(isbns): # {{{

 if __name__ == '__main__':
    isbns = sys.argv[1:] + ['9781591025412', '9780307272119']
-    test(isbns)
+    #test(isbns)
+
+    from calibre.ebooks.metadata import MetaInformation
+    oc = OpenLibraryCovers(None)
+    for isbn in isbns:
+        mi = MetaInformation('xx', ['yy'])
+        mi.isbn = isbn
+        rq = Queue()
+        oc.get_covers(mi, rq, Event())
+        result = rq.get_nowait()
+        if not result[0]:
+            print 'Failed for ISBN:', isbn
+            print result
--- a/src/calibre/ebooks/metadata/douban.py
+++ b/src/calibre/ebooks/metadata/douban.py
@ -49,7 +49,7 @@ class DoubanBooks(MetadataSource):
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10,
                                  verbose=self.verbose)
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -192,7 +192,7 @@ class ResultList(list):
                raw = browser.open(id_url).read()
                feed = etree.fromstring(raw)
                x = entry(feed)[0]
-            except Exception, e:
+            except Exception as e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 '''Read meta information from epub files'''

-import os, re, posixpath, shutil
+import os, re, posixpath
 from cStringIO import StringIO
 from contextlib import closing

@ -192,6 +192,13 @@ def get_metadata(stream, extract_cover=True):
 def get_quick_metadata(stream):
    return get_metadata(stream, False)

+def _write_new_cover(new_cdata, cpath):
+    from calibre.utils.magick.draw import save_cover_data_to
+    new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1])
+    new_cover.close()
+    save_cover_data_to(new_cdata, new_cover.name)
+    return new_cover
+
 def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
    stream.seek(0)
    reader = OCFZipReader(stream, root=os.getcwdu())
@ -208,6 +215,7 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
            new_cdata = open(mi.cover, 'rb').read()
        except:
            pass
+    new_cover = cpath = None
    if new_cdata and raster_cover:
        try:
            cpath = posixpath.join(posixpath.dirname(reader.opf_path),
@ -215,19 +223,7 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
            cover_replacable = not reader.encryption_meta.is_encrypted(cpath) and \
                    os.path.splitext(cpath)[1].lower() in ('.png', '.jpg', '.jpeg')
            if cover_replacable:
-                from calibre.utils.magick.draw import save_cover_data_to, \
-                    identify
-                new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1])
-                resize_to = None
-                if False: # Resize new cover to same size as old cover
-                    shutil.copyfileobj(reader.open(cpath), new_cover)
-                    new_cover.close()
-                    width, height, fmt = identify(new_cover.name)
-                    resize_to = (width, height)
-                else:
-                    new_cover.close()
-                save_cover_data_to(new_cdata, new_cover.name,
-                        resize_to=resize_to)
+                new_cover = _write_new_cover(new_cdata, cpath)
                replacements[cpath] = open(new_cover.name, 'rb')
        except:
            import traceback
@ -249,4 +245,11 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
    newopf = StringIO(reader.opf.render())
    safe_replace(stream, reader.container[OPF.MIMETYPE], newopf,
            extra_replacements=replacements)
+    try:
+        if cpath is not None:
+            replacements[cpath].close()
+            os.remove(replacements[cpath].name)
+    except:
+        pass
+

--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -93,7 +93,7 @@ class MetadataSource(Plugin): # {{{
                            traceback.print_exc()
                            mi.comments = None

-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -186,7 +186,7 @@ class GoogleBooks(MetadataSource): # {{{
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10,
                                  verbose=self.verbose)
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -217,7 +217,7 @@ class ISBNDB(MetadataSource): # {{{
        try:
            opts, args = option_parser().parse_args(args)
            self.results = create_books(opts, args)
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -244,7 +244,7 @@ class Amazon(MetadataSource): # {{{
        try:
            self.results = get_social_metadata(self.title, self.book_author,
                    self.publisher, self.isbn)
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -264,7 +264,7 @@ class KentDistrictLibrary(MetadataSource): # {{{
        from calibre.ebooks.metadata.kdl import get_series
        try:
            self.results = get_series(self.title, self.book_author)
-        except Exception, e:
+        except Exception as e:
            import traceback
            traceback.print_exc()
            self.exception = e
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@ -30,7 +30,7 @@ class Fictionwise(MetadataSource): # {{{
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                self.isbn, max_results=10, verbose=self.verbose)
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -91,7 +91,7 @@ class Query(object):

        try:
            raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
-        except Exception, e:
+        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
@ -276,7 +276,7 @@ class ResultList(list):
    def get_individual_metadata(self, browser, linkdata, verbose):
        try:
            raw = browser.open_novisit(self.BASE_URL + linkdata).read()
-        except Exception, e:
+        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
@ -311,7 +311,7 @@ class ResultList(list):
                #maybe strenghten the search
                ratings =  self.get_rating(entry.xpath("./p/table")[1], verbose)
                authors = self.get_authors(entry)
-            except Exception, e:
+            except Exception as e:
                if verbose:
                    print _('Failed to get all details for an entry')
                    print e
@ -328,7 +328,7 @@ class ResultList(list):
                    #maybe strenghten the search
                    ratings =  self.get_rating(entry.xpath("./p/table")[1], verbose)
                    authors = self.get_authors(entry)
-                except Exception, e:
+                except Exception as e:
                    if verbose:
                        print _('Failed to get all details for an entry')
                        print e
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -176,7 +176,7 @@ class ResultList(list):
                raw = browser.open(id_url).read()
                feed = etree.fromstring(raw)
                x = entry(feed)[0]
-            except Exception, e:
+            except Exception as e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
--- a/src/calibre/ebooks/metadata/imp.py
+++ b/src/calibre/ebooks/metadata/imp.py
@ -38,7 +38,7 @@ def get_metadata(stream):
            mi.author = author
        if category:
            mi.category = category
-    except Exception, err:
+    except Exception as err:
        msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
        print >>sys.stderr, msg.encode('utf8')
    return mi
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -25,7 +25,7 @@ def fetch_metadata(url, max=3, timeout=5.):
    while len(books) < total_results and max > 0:
        try:
            raw = br.open(url, timeout=timeout).read()
-        except Exception, err:
+        except Exception as err:
            raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
        soup = BeautifulStoneSoup(raw,
                convertEntities=BeautifulStoneSoup.XML_ENTITIES)
--- a/src/calibre/ebooks/metadata/kdl.py
+++ b/src/calibre/ebooks/metadata/kdl.py
@ -43,7 +43,7 @@ def get_series(title, authors, timeout=60):
    br = browser()
    try:
        raw = br.open_novisit(url, timeout=timeout).read()
-    except URLError, e:
+    except URLError as e:
        if isinstance(e.reason, socket.timeout):
            raise Exception('KDL Server busy, try again later')
        raise
--- a/src/calibre/ebooks/metadata/library_thing.py
+++ b/src/calibre/ebooks/metadata/library_thing.py
@ -4,34 +4,23 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Fetch cover from LibraryThing.com based on ISBN number.
 '''

-import sys, re, random
+import sys, re

 from lxml import html
 import mechanize

-from calibre import browser, prints
+from calibre import browser, prints, random_user_agent
 from calibre.utils.config import OptionParser
 from calibre.ebooks.chardet import strip_encoding_declarations

 OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'

-def get_ua():
-    choices = [
-        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
-        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
-        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
-        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)'
-        'Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16'
-        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19'
-        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
-    ]
-    return choices[random.randint(0, len(choices)-1)]

 _lt_br = None
 def get_browser():
    global _lt_br
    if _lt_br is None:
-        _lt_br = browser(user_agent=get_ua())
+        _lt_br = browser(user_agent=random_user_agent())
    return _lt_br.clone_browser()

 class HeadRequest(mechanize.Request):
@ -45,7 +34,7 @@ def check_for_cover(isbn, timeout=5.):
    try:
        br.open_novisit(HeadRequest(OPENLIBRARY%isbn), timeout=timeout)
        return True
-    except Exception, e:
+    except Exception as e:
        if callable(getattr(e, 'getcode', None)) and e.getcode() == 302:
            return True
    return False
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@ -32,7 +32,7 @@ class NiceBooks(MetadataSource):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose)
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

@ -54,7 +54,7 @@ class NiceBooksCovers(CoverDownload):
            if Covers(mi.isbn)(entry).check_cover():
                self.debug('cover for', mi.isbn, 'found')
                ans.set()
-        except Exception, e:
+        except Exception as e:
            self.debug(e)

    def get_covers(self, mi, result_queue, abort, timeout=5.):
@ -67,7 +67,7 @@ class NiceBooksCovers(CoverDownload):
            if not ext:
                ext = 'jpg'
            result_queue.put((True, cover_data, ext, self.name))
-        except Exception, e:
+        except Exception as e:
            result_queue.put((False, self.exception_to_string(e),
                traceback.format_exc(), self.name))

@ -109,7 +109,7 @@ class Query(object):

        try:
            raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
-        except Exception, e:
+        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
@ -144,7 +144,7 @@ class Query(object):
                try:
                    urldata = self.urldata + '&p=' + str(i)
                    raw = browser.open_novisit(self.BASE_URL+urldata, timeout=timeout).read()
-                except Exception, e:
+                except Exception as e:
                    continue
                if '<title>404 - ' in raw:
                    continue
@ -233,7 +233,7 @@ class ResultList(list):
    def get_individual_metadata(self, browser, linkdata, verbose):
        try:
            raw = browser.open_novisit(self.BASE_URL + linkdata).read()
-        except Exception, e:
+        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
@ -266,7 +266,7 @@ class ResultList(list):
                entry = entry.find("div[@id='book-info']")
                title = self.get_title(entry)
                authors = self.get_authors(entry)
-            except Exception, e:
+            except Exception as e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
@ -280,7 +280,7 @@ class ResultList(list):
                    entry = entry.find("div[@id='book-info']")
                    title = self.get_title(entry)
                    authors = self.get_authors(entry)
-                except Exception, e:
+                except Exception as e:
                    if verbose:
                        print 'Failed to get all details for an entry'
                        print e
@ -315,7 +315,7 @@ class Covers(object):
            cover, ext = browser.open_novisit(self.urlimg, timeout=timeout).read(), \
                self.urlimg.rpartition('.')[-1]
            return cover, ext if ext else 'jpg'
-        except Exception, err:
+        except Exception as err:
            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
            if not len(self.urlimg):
--- a/src/calibre/ebooks/metadata/rb.py
+++ b/src/calibre/ebooks/metadata/rb.py
@ -43,7 +43,7 @@ def get_metadata(stream):
            elif key.strip() == 'AUTHOR':
                mi.author = value
                mi.authors = string_to_authors(value)
-    except Exception, err:
+    except Exception as err:
        msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
        print >>sys.stderr, msg.encode('utf8')
        raise
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import socket, time, re
 from urllib import urlencode
 from threading import Thread
+from Queue import Queue, Empty

 from lxml.html import soupparser, tostring

@ -28,11 +29,12 @@ class Worker(Thread): # {{{
    Get book details from amazons book page in a separate thread
    '''

-    def __init__(self, url, result_queue, browser, log, timeout=20):
+    def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.url, self.result_queue = url, result_queue
        self.log, self.timeout = log, timeout
+        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.amazon_id = self.isbn = None

@ -40,12 +42,12 @@ class Worker(Thread): # {{{
        try:
            self.get_details()
        except:
-            self.log.error('get_details failed for url: %r'%self.url)
+            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
-        except Exception, e:
+        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
@ -161,6 +163,17 @@ class Worker(Thread): # {{{
        else:
            self.log.warning('Failed to find product description for url: %r'%self.url)

+        mi.source_relevance = self.relevance
+
+        if self.amazon_id:
+            if self.isbn:
+                self.plugin.cache_isbn_to_identifier(self.isbn, self.amazon_id)
+            if self.cover_url:
+                self.plugin.cache_identifier_to_cover_url(self.amazon_id,
+                        self.cover_url)
+
+        self.plugin.clean_downloaded_metadata(mi)
+
        self.result_queue.put(mi)

    def parse_asin(self, root):
@ -266,7 +279,7 @@ class Amazon(Source):
    name = 'Amazon'
    description = _('Downloads metadata from Amazon')

-    capabilities = frozenset(['identify'])
+    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])

@ -274,6 +287,7 @@ class Amazon(Source):
            'com': _('US'),
            'fr' : _('France'),
            'de' : _('Germany'),
+            'uk' : _('UK'),
    }

    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
@ -321,6 +335,21 @@ class Amazon(Source):

    # }}}

+    def get_cached_cover_url(self, identifiers): # {{{
+        url = None
+        asin = identifiers.get('amazon', None)
+        if asin is None:
+            asin = identifiers.get('asin', None)
+        if asin is None:
+            isbn = identifiers.get('isbn', None)
+            if isbn is not None:
+                asin = self.cached_isbn_to_identifier(isbn)
+        if asin is not None:
+            url = self.cached_identifier_to_cover_url(asin)
+
+        return url
+    # }}}
+
    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
            identifiers={}, timeout=30):
        '''
@ -335,7 +364,7 @@ class Amazon(Source):
        br = self.browser
        try:
            raw = br.open_novisit(query, timeout=timeout).read().strip()
-        except Exception, e:
+        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                log.error('Query malformed: %r'%query)
@ -396,7 +425,8 @@ class Amazon(Source):
            log.error('No matches found with query: %r'%query)
            return

-        workers = [Worker(url, result_queue, br, log) for url in matches]
+        workers = [Worker(url, result_queue, br, log, i, self) for i, url in
+                enumerate(matches)]

        for w in workers:
            w.start()
@ -414,19 +444,47 @@ class Amazon(Source):
            if not a_worker_is_alive:
                break

-        for w in workers:
-            if w.amazon_id:
-                if w.isbn:
-                    self.cache_isbn_to_identifier(w.isbn, w.amazon_id)
-                if w.cover_url:
-                    self.cache_identifier_to_cover_url(w.amazon_id,
-                            w.cover_url)
-
        return None
    # }}}

+    def download_cover(self, log, result_queue, abort, # {{{
+            title=None, authors=None, identifiers={}, timeout=30):
+        cached_url = self.get_cached_cover_url(identifiers)
+        if cached_url is None:
+            log.info('No cached cover found, running identify')
+            rq = Queue()
+            self.identify(log, rq, abort, title=title, authors=authors,
+                    identifiers=identifiers)
+            if abort.is_set():
+                return
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            results.sort(key=self.identify_results_keygen(
+                title=title, authors=authors, identifiers=identifiers))
+            for mi in results:
+                cached_url = self.get_cached_cover_url(mi.identifiers)
+                if cached_url is not None:
+                    break
+        if cached_url is None:
+            log.info('No cover found')
+            return

-if __name__ == '__main__':
+        if abort.is_set():
+            return
+        br = self.browser
+        try:
+            cdata = br.open_novisit(cached_url, timeout=timeout).read()
+            result_queue.put(cdata)
+        except:
+            log.exception('Failed to download cover from:', cached_url)
+    # }}}
+
+
+if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e
    # src/calibre/ebooks/metadata/sources/amazon.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
@ -472,5 +530,5 @@ if __name__ == '__main__':
            ),

        ])
-
+# }}}

--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -8,11 +8,13 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import re, threading
+from future_builtins import map

 from calibre import browser, random_user_agent
 from calibre.customize import Plugin
 from calibre.utils.logging import ThreadSafeLog, FileStream
 from calibre.utils.config import JSONConfig
+from calibre.utils.titlecase import titlecase

 msprefs = JSONConfig('metadata_sources.json')

@ -21,6 +23,71 @@ def create_log(ostream=None):
    log.outputs = [FileStream(ostream)]
    return log

+# Comparing Metadata objects for relevance {{{
+words = ("the", "a", "an", "of", "and")
+prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
+trailing_paren_pat = re.compile(r'\(.*\)$')
+whitespace_pat = re.compile(r'\s+')
+
+def cleanup_title(s):
+    if not s:
+        s = _('Unknown')
+    s = s.strip().lower()
+    s = prefix_pat.sub(' ', s)
+    s = trailing_paren_pat.sub('', s)
+    s = whitespace_pat.sub(' ', s)
+    return s.strip()
+
+class InternalMetadataCompareKeyGen(object):
+
+    '''
+    Generate a sort key for comparison of the relevance of Metadata objects,
+    given a search query.
+
+    The sort key ensures that an ascending order sort is a sort by order of
+    decreasing relevance.
+
+    The algorithm is:
+
+        * Prefer results that have the same ISBN as specified in the query
+        * Prefer results with a cached cover URL
+        * Prefer results with all available fields filled in
+        * Prefer results that are an exact title match to the query
+        * Prefer results with longer comments (greater than 10% longer)
+        * Use the relevance of the result as reported by the metadata source's search
+           engine
+    '''
+
+    def __init__(self, mi, source_plugin, title, authors, identifiers):
+        isbn = 1 if mi.isbn and mi.isbn == identifiers.get('isbn', None) else 2
+
+        all_fields = 1 if source_plugin.test_fields(mi) is None else 2
+
+        exact_title = 1 if title and \
+                cleanup_title(title) == cleanup_title(mi.title) else 2
+
+        has_cover = 2 if source_plugin.get_cached_cover_url(mi.identifiers)\
+                is None else 1
+
+        self.base = (isbn, has_cover, all_fields, exact_title)
+        self.comments_len = len(mi.comments.strip() if mi.comments else '')
+        self.extra = (getattr(mi, 'source_relevance', 0), )
+
+    def __cmp__(self, other):
+        result = cmp(self.base, other.base)
+        if result == 0:
+            # Now prefer results with the longer comments, within 10%
+            cx, cy = self.comments_len, other.comments_len
+            t = (cx + cy) / 20
+            delta = cy - cx
+            if abs(delta) > t:
+                result = delta
+            else:
+                result = cmp(self.extra, other.extra)
+        return result
+
+# }}}
+
 class Source(Plugin):

    type = _('Metadata source')
@ -28,8 +95,12 @@ class Source(Plugin):

    supported_platforms = ['windows', 'osx', 'linux']

+    #: Set of capabilities supported by this plugin.
+    #: Useful capabilities are: 'identify', 'cover'
    capabilities = frozenset()

+    #: List of metadata fields that can potentially be download by this plugin
+    #: during the identify phase
    touched_fields = frozenset()

    def __init__(self, *args, **kwargs):
@ -55,11 +126,17 @@ class Source(Plugin):
    def browser(self):
        if self._browser is None:
            self._browser = browser(user_agent=random_user_agent())
-        return self._browser
+        return self._browser.clone_browser()

    # }}}

-    # Utility functions {{{
+    # Caching {{{
+
+    def get_related_isbns(self, id_):
+        with self.cache_lock:
+            for isbn, q in self._isbn_to_identifier_cache.iteritems():
+                if q == id_:
+                    yield isbn

    def cache_isbn_to_identifier(self, isbn, identifier):
        with self.cache_lock:
@ -77,6 +154,10 @@ class Source(Plugin):
        with self.cache_lock:
            return self._identifier_to_cover_url_cache.get(id_, None)

+    # }}}
+
+    # Utility functions {{{
+
    def get_author_tokens(self, authors, only_first_author=True):
        '''
        Take a list of authors and return a list of tokens useful for an
@ -128,12 +209,68 @@ class Source(Plugin):
                gr.append(job)
        return [g for g in groups if g]

+    def test_fields(self, mi):
+        '''
+        Return the first field from self.touched_fields that is null on the
+        mi object
+        '''
+        for key in self.touched_fields:
+            if key.startswith('identifier:'):
+                key = key.partition(':')[-1]
+                if not mi.has_identifier(key):
+                    return 'identifier: ' + key
+            elif mi.is_null(key):
+                return key
+
+    def clean_downloaded_metadata(self, mi):
+        '''
+        Call this method in your plugin's identify method to normalize metadata
+        before putting the Metadata object into result_queue. You can of
+        course, use a custom algorithm suited to your metadata source.
+        '''
+        def fixcase(x):
+            if x:
+                x = titlecase(x)
+            return x
+        if mi.title:
+            mi.title = fixcase(mi.title)
+        mi.authors = list(map(fixcase, mi.authors))
+        mi.tags = list(map(fixcase, mi.tags))
+
    # }}}

    # Metadata API {{{

+    def get_cached_cover_url(self, identifiers):
+        '''
+        Return cached cover URL for the book identified by
+        the identifiers dict or None if no such URL exists.
+
+        Note that this method must only return validated URLs, i.e. not URLS
+        that could result in a generic cover image or a not found error.
+        '''
+        return None
+
+    def identify_results_keygen(self, title=None, authors=None,
+            identifiers={}):
+        '''
+        Return a function that is used to generate a key that can sort Metadata
+        objects by their relevance given a search query (title, authors,
+        identifiers).
+
+        These keys are used to sort the results of a call to :meth:`identify`.
+
+        For details on the default algorithm see
+        :class:`InternalMetadataCompareKeyGen`. Re-implement this function in
+        your plugin if the default algorithm is not suitable.
+        '''
+        def keygen(mi):
+            return InternalMetadataCompareKeyGen(mi, self, title, authors,
+                identifiers)
+        return keygen
+
    def identify(self, log, result_queue, abort, title=None, authors=None,
-            identifiers={}, timeout=5):
+            identifiers={}, timeout=30):
        '''
        Identify a book by its title/author/isbn/etc.

@ -147,6 +284,15 @@ class Source(Plugin):
        the same ISBN/special identifier does not need to get the cover URL
        again. Use the caching API for this.

+        Every Metadata object put into result_queue by this method must have a
+        `source_relevance` attribute that is an integer indicating the order in
+        which the results were returned by the metadata source for this query.
+        This integer will be used by :meth:`compare_identify_results`. If the
+        order is unimportant, set it to zero for every result.
+
+        Make sure that any cover/isbn mapping information is cached before the
+        Metadata object is put into result_queue.
+
        :param log: A log object, use it to output debugging information/errors
        :param result_queue: A result Queue, results should be put into it.
                            Each result is a Metadata object
@ -164,5 +310,17 @@ class Source(Plugin):
        '''
        return None

+    def download_cover(self, log, result_queue, abort,
+            title=None, authors=None, identifiers={}, timeout=30):
+        '''
+        Download a cover and put it into result_queue. The parameters all have
+        the same meaning as for :meth:`identify`.
+
+        This method should use cached cover URLs for efficiency whenever
+        possible. When cached data is not present, most plugins simply call
+        identify and use its results.
+        '''
+        pass
+
    # }}}

--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
 import time
 from urllib import urlencode
 from functools import partial
+from Queue import Queue, Empty

 from lxml import etree

@ -24,7 +25,8 @@ from calibre import as_unicode
 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
              'atom' : 'http://www.w3.org/2005/Atom',
-              'dc': 'http://purl.org/dc/terms'
+              'dc'   : 'http://purl.org/dc/terms',
+              'gd'   : 'http://schemas.google.com/g/2005'
            }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)

@ -41,6 +43,7 @@ publisher      = XPath('descendant::dc:publisher')
 subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')
+rating         = XPath('descendant::gd:rating[@average]')

 def get_details(browser, url, timeout): # {{{
    try:
@ -113,8 +116,10 @@ def to_metadata(browser, log, entry_, timeout): # {{{
        btags = [x.text for x in subject(extra) if x.text]
        tags = []
        for t in btags:
-            tags.extend([y.strip() for y in t.split('/')])
-        tags = list(sorted(list(set(tags))))
+            atags = [y.strip() for y in t.split('/')]
+            for tag in atags:
+                if tag not in tags:
+                    tags.append(tag)
    except:
        log.exception('Failed to parse tags:')
        tags = []
@ -130,6 +135,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
        except:
            log.exception('Failed to parse pubdate')

+    # Ratings
+    for x in rating(extra):
+        try:
+            mi.rating = float(x.get('average'))
+            if mi.rating > 5:
+                mi.rating /= 2
+        except:
+            log.exception('Failed to parse rating')
+
+    # Cover
+    mi.has_google_cover = len(extra.xpath(
+        '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0

    return mi
 # }}}
@ -139,11 +156,13 @@ class GoogleBooks(Source):
    name = 'Google Books'
    description = _('Downloads metadata from Google Books')

-    capabilities = frozenset(['identify'])
+    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
-        'comments', 'publisher', 'identifier:isbn',
+        'comments', 'publisher', 'identifier:isbn', 'rating',
        'identifier:google']) # language currently disabled

+    GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
+
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
        BASE_URL = 'http://books.google.com/books/feeds/volumes?'
        isbn = check_isbn(identifiers.get('isbn', None))
@ -174,36 +193,78 @@ class GoogleBooks(Source):
            })
    # }}}

-    def cover_url_from_identifiers(self, identifiers):
+    def download_cover(self, log, result_queue, abort, # {{{
+            title=None, authors=None, identifiers={}, timeout=30):
+        cached_url = self.get_cached_cover_url(identifiers)
+        if cached_url is None:
+            log.info('No cached cover found, running identify')
+            rq = Queue()
+            self.identify(log, rq, abort, title=title, authors=authors,
+                    identifiers=identifiers)
+            if abort.is_set():
+                return
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            results.sort(key=self.identify_results_keygen(
+                title=title, authors=authors, identifiers=identifiers))
+            for mi in results:
+                cached_url = self.cover_url_from_identifiers(mi.identifiers)
+                if cached_url is not None:
+                    break
+        if cached_url is None:
+            log.info('No cover found')
+            return
+
+        if abort.is_set():
+            return
+        br = self.browser
+        try:
+            cdata = br.open_novisit(cached_url, timeout=timeout).read()
+            result_queue.put(cdata)
+        except:
+            log.exception('Failed to download cover from:', cached_url)
+
+    # }}}
+
+    def get_cached_cover_url(self, identifiers): # {{{
+        url = None
        goog = identifiers.get('google', None)
        if goog is None:
            isbn = identifiers.get('isbn', None)
+            if isbn is not None:
                goog = self.cached_isbn_to_identifier(isbn)
        if goog is not None:
-            return ('http://books.google.com/books?id=%s&printsec=frontcover&img=1' %
-                goog)
+            url = self.cached_identifier_to_cover_url(goog)

-    def is_cover_image_valid(self, raw):
-        # When no cover is present, returns a PNG saying image not available
-        # Try for example google identifier llNqPwAACAAJ
-        # I have yet to see an actual cover in PNG format
-        return raw and len(raw) > 17000 and raw[1:4] != 'PNG'
+        return url
+    # }}}

-    def get_all_details(self, br, log, entries, abort, result_queue, timeout):
-        for i in entries:
+    def get_all_details(self, br, log, entries, abort, # {{{
+            result_queue, timeout):
+        for relevance, i in enumerate(entries):
            try:
                ans = to_metadata(br, log, i, timeout)
                if isinstance(ans, Metadata):
-                    result_queue.put(ans)
+                    ans.source_relevance = relevance
+                    goog = ans.identifiers['google']
                    for isbn in getattr(ans, 'all_isbns', []):
-                        self.cache_isbn_to_identifier(isbn,
-                                ans.identifiers['google'])
+                        self.cache_isbn_to_identifier(isbn, goog)
+                        if ans.has_google_cover:
+                            self.cache_identifier_to_cover_url(goog,
+                                    self.GOOGLE_COVER%goog)
+                    self.clean_downloaded_metadata(ans)
+                    result_queue.put(ans)
            except:
                log.exception(
                    'Failed to get metadata for identify entry:',
                    etree.tostring(i))
            if abort.is_set():
                break
+    # }}}

    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
            identifiers={}, timeout=30):
@ -212,7 +273,7 @@ class GoogleBooks(Source):
        br = self.browser
        try:
            raw = br.open_novisit(query, timeout=timeout).read()
-        except Exception, e:
+        except Exception as e:
            log.exception('Failed to make identify query: %r'%query)
            return as_unicode(e)

@ -221,7 +282,7 @@ class GoogleBooks(Source):
            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
                strip_encoding_pats=True)[0], parser=parser)
            entries = entry(feed)
-        except Exception, e:
+        except Exception as e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)

@ -237,7 +298,7 @@ class GoogleBooks(Source):
        return None
    # }}}

-if __name__ == '__main__':
+if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test)
@ -252,8 +313,10 @@ if __name__ == '__main__':
                    authors_test(['Francis Scott Fitzgerald'])]
            ),

-            #(
-            #    {'title': 'Great Expectations', 'authors':['Charles Dickens']},
-            #    [title_test('Great Expectations', exact=True)]
-            #),
+            (
+                {'title': 'Flatland', 'authors':['Abbott']},
+                [title_test('Flatland', exact=False)]
+            ),
    ])
+# }}}
+
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import time
+from Queue import Queue, Empty
+from threading import Thread
+from io import BytesIO
+
+from calibre.customize.ui import metadata_plugins
+from calibre.ebooks.metadata.sources.base import create_log
+
+# How long to wait for more results after first result is found
+WAIT_AFTER_FIRST_RESULT = 30 # seconds
+
+class Worker(Thread):
+
+    def __init__(self, plugin, kwargs, abort):
+        Thread.__init__(self)
+        self.daemon = True
+
+        self.plugin, self.kwargs, self.rq = plugin, kwargs, Queue()
+        self.abort = abort
+        self.buf = BytesIO()
+        self.log = create_log(self.buf)
+
+    def run(self):
+        try:
+            self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs)
+        except:
+            self.log.exception('Plugin', self.plugin.name, 'failed')
+
+def is_worker_alive(workers):
+    for w in workers:
+        if w.is_alive():
+            return True
+    return False
+
+def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+    plugins = list(metadata_plugins['identify'])
+
+    kwargs = {
+            'title': title,
+            'authors': authors,
+            'identifiers': identifiers,
+            'timeout': timeout,
+    }
+
+    log('Running identify query with parameters:')
+    log(kwargs)
+    log('Using plugins:', ', '.join([p.name for p in plugins]))
+    log('The log (if any) from individual plugins is below')
+
+    workers = [Worker(p, kwargs, abort) for p in plugins]
+    for w in workers:
+        w.start()
+
+    first_result_at = None
+    results = dict.fromkeys(plugins, [])
+
+    def get_results():
+        found = False
+        for w in workers:
+            try:
+                result = w.rq.get_nowait()
+            except Empty:
+                pass
+            else:
+                results[w.plugin].append(result)
+                found = True
+        return found
+
+    while True:
+        time.sleep(0.2)
+
+        if get_results() and first_result_at is None:
+                first_result_at = time.time()
+
+        if not is_worker_alive(workers):
+            break
+
+        if (first_result_at is not None and time.time() - first_result_at <
+                WAIT_AFTER_FIRST_RESULT):
+            log('Not waiting any longer for more results')
+            abort.set()
+            break
+
+    get_results()
+    sort_kwargs = dict(kwargs)
+    for k in list(sort_kwargs.iterkeys()):
+        if k not in ('title', 'authors', 'identifiers'):
+            sort_kwargs.pop(k)
+
+    for plugin, results in results.iteritems():
+        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+        plog = plugin.buf.getvalue().strip()
+        if plog:
+            log('\n'+'*'*35, plugin.name, '*'*35)
+            log('Found %d results'%len(results))
+            log(plog)
+            log('\n'+'*'*80)
+
--- a/src/calibre/ebooks/metadata/sources/openlibrary.py
+++ b/src/calibre/ebooks/metadata/sources/openlibrary.py
@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.ebooks.metadata.sources.base import Source
+
+class OpenLibrary(Source):
+
+    name = 'Open Library'
+    description = _('Downloads metadata from The Open Library')
+
+    capabilities = frozenset(['cover'])
+
+    OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
+
+    def download_cover(self, log, result_queue, abort,
+            title=None, authors=None, identifiers={}, timeout=30):
+        if 'isbn' not in identifiers:
+            return
+        isbn = identifiers['isbn']
+        br = self.browser
+        try:
+            ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
+            result_queue.put(ans)
+        except Exception as e:
+            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
+                log.error('No cover for ISBN: %r found'%isbn)
+            else:
+                log.exception('Failed to download cover for ISBN:', isbn)
+
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -11,9 +11,8 @@ import os, tempfile, time
 from Queue import Queue, Empty
 from threading import Event

-
 from calibre.customize.ui import metadata_plugins
-from calibre import prints
+from calibre import prints, sanitize_file_name2
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import create_log

@ -46,15 +45,6 @@ def authors_test(authors):

    return test

-def _test_fields(touched_fields, mi):
-    for key in touched_fields:
-        if key.startswith('identifier:'):
-            key = key.partition(':')[-1]
-            if not mi.has_identifier(key):
-                return 'identifier: ' + key
-        elif mi.is_null(key):
-            return key
-

 def test_identify_plugin(name, tests):
    '''
@ -99,11 +89,19 @@ def test_identify_plugin(name, tests):
            except Empty:
                break

-        prints('Found', len(results), 'matches:')
+        prints('Found', len(results), 'matches:', end=' ')
+        prints('Smaller relevance means better match')

-        for mi in results:
+        results.sort(key=plugin.identify_results_keygen(
+            title=kwargs.get('title', None), authors=kwargs.get('authors',
+                None), identifiers=kwargs.get('identifiers', {})))
+
+        for i, mi in enumerate(results):
+            prints('*'*30, 'Relevance:', i, '*'*30)
            prints(mi)
-            prints('\n\n')
+            prints('\nCached cover URL    :',
+                    plugin.get_cached_cover_url(mi.identifiers))
+            prints('*'*75, '\n\n')

        possibles = []
        for mi in results:
@ -120,13 +118,42 @@ def test_identify_plugin(name, tests):
            prints('Log saved to', lf)
            raise SystemExit(1)

-        good = [x for x in possibles if _test_fields(plugin.touched_fields, x) is
+        good = [x for x in possibles if plugin.test_fields(x) is
                None]
        if not good:
-            prints('Failed to find', _test_fields(plugin.touched_fields,
-                possibles[0]))
+            prints('Failed to find', plugin.test_fields(possibles[0]))
            raise SystemExit(1)

+        if results[0] is not possibles[0]:
+            prints('Most relevant result failed the tests')
+            raise SystemExit(1)
+
+        if 'cover' in plugin.capabilities:
+            rq = Queue()
+            mi = results[0]
+            plugin.download_cover(log, rq, abort, title=mi.title,
+                    authors=mi.authors, identifiers=mi.identifiers)
+            results = []
+            while True:
+                try:
+                    results.append(rq.get_nowait())
+                except Empty:
+                    break
+            if not results:
+                prints('Cover download failed')
+                raise SystemExit(1)
+            cdata = results[0]
+            cover = os.path.join(tdir, plugin.name.replace(' ',
+                '')+'-%s-cover.jpg'%sanitize_file_name2(mi.title.replace(' ',
+                    '_')))
+            with open(cover, 'wb') as f:
+                f.write(cdata)
+
+            prints('Cover downloaded to:', cover)
+
+            if len(cdata) < 10240:
+                prints('Downloaded cover too small')
+                raise SystemExit(1)

    prints('Average time per query', sum(times)/len(times))

--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@ -147,7 +147,7 @@ class TOC(list):
                if path and os.access(path, os.R_OK):
                    try:
                        self.read_ncx_toc(path)
-                    except Exception, err:
+                    except Exception as err:
                        print 'WARNING: Invalid NCX file:', err
                    return
                cwd = os.path.abspath(self.base_path)
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -769,7 +769,8 @@ class MobiReader(object):

    def extract_text(self):
        self.log.debug('Extracting text...')
-        text_sections = [self.text_section(i) for i in range(1, self.book_header.records + 1)]
+        text_sections = [self.text_section(i) for i in range(1,
+            min(self.book_header.records + 1, len(self.sections)))]
        processed_records = list(range(0, self.book_header.records + 1))

        self.mobi_html = ''
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -884,13 +884,13 @@ class Manifest(object):
            def first_pass(data):
                try:
                    data = etree.fromstring(data, parser=parser)
-                except etree.XMLSyntaxError, err:
+                except etree.XMLSyntaxError as err:
                    self.oeb.log.exception('Initial parse failed:')
                    repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
                    data = ENTITY_RE.sub(repl, data)
                    try:
                        data = etree.fromstring(data, parser=parser)
-                    except etree.XMLSyntaxError, err:
+                    except etree.XMLSyntaxError as err:
                        self.oeb.logger.warn('Parsing file %r as HTML' % self.href)
                        if err.args and err.args[0].startswith('Excessive depth'):
                            from lxml.html import soupparser
--- a/src/calibre/ebooks/oeb/transforms/page_margin.py
+++ b/src/calibre/ebooks/oeb/transforms/page_margin.py
@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from collections import Counter
+
+from calibre.ebooks.oeb.base import OEB_STYLES, barename, XPath
+
+class RemoveFakeMargins(object):
+
+    '''
+    Remove left and right margins from paragraph/divs if the same margin is specified
+    on almost all the elements of at that level.
+
+    Must be called only after CSS flattening
+    '''
+
+    def __call__(self, oeb, log, opts):
+        if not opts.remove_fake_margins:
+            return
+        self.oeb, self.log, self.opts = oeb, log, opts
+        stylesheet = None
+        self.levels = {}
+        self.stats = {}
+        self.selector_map = {}
+
+        for item in self.oeb.manifest:
+            if item.media_type.lower() in OEB_STYLES:
+                stylesheet = item
+                break
+        if stylesheet is None:
+            return
+
+        self.log('Removing fake margins...')
+
+        stylesheet = stylesheet.data
+
+        from cssutils.css import CSSRule
+        for rule in stylesheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
+            self.selector_map[rule.selectorList.selectorText] = rule.style
+
+        self.find_levels()
+
+        for level in self.levels:
+            self.process_level(level)
+
+    def get_margins(self, elem):
+        cls = elem.get('class', None)
+        if cls:
+            style = self.selector_map.get('.'+cls, None)
+            if style:
+                return style.marginLeft, style.marginRight, style
+        return '', '', None
+
+
+    def process_level(self, level):
+        elems = self.levels[level]
+        self.stats[level+'_left'] = Counter()
+        self.stats[level+'_right'] = Counter()
+
+        for elem in elems:
+            lm, rm = self.get_margins(elem)[:2]
+            self.stats[level+'_left'][lm] += 1
+            self.stats[level+'_right'][rm] += 1
+
+        self.log.debug(level, ' left margin stats:', self.stats[level+'_left'])
+        self.log.debug(level, ' right margin stats:', self.stats[level+'_right'])
+
+        remove_left = self.analyze_stats(self.stats[level+'_left'])
+        remove_right = self.analyze_stats(self.stats[level+'_right'])
+
+
+        if remove_left:
+            mcl = self.stats[level+'_left'].most_common(1)[0][0]
+            self.log('Removing level %s left margin of:'%level, mcl)
+
+        if remove_right:
+            mcr = self.stats[level+'_right'].most_common(1)[0][0]
+            self.log('Removing level %s right margin of:'%level, mcr)
+
+        if remove_left or remove_right:
+            for elem in elems:
+                lm, rm, style = self.get_margins(elem)
+                if remove_left and lm == mcl:
+                    style.removeProperty('margin-left')
+                if remove_right and rm == mcr:
+                    style.removeProperty('margin-right')
+
+    def find_levels(self):
+
+        def level_of(elem, body):
+            ans = 1
+            while elem.getparent() is not body:
+                ans += 1
+                elem = elem.getparent()
+            return ans
+
+        paras = XPath('descendant::h:p|descendant::h:div')
+
+        for item in self.oeb.spine:
+            body = XPath('//h:body')(item.data)
+            if not body:
+                continue
+            body = body[0]
+
+            for p in paras(body):
+                level = level_of(p, body)
+                level = '%s_%d'%(barename(p.tag), level)
+                if level not in self.levels:
+                    self.levels[level] = []
+                self.levels[level].append(p)
+
+        remove = set()
+        for k, v in self.levels.iteritems():
+            num = len(v)
+            self.log.debug('Found %d items of level:'%num, k)
+            level = int(k.split('_')[-1])
+            tag = k.split('_')[0]
+            if tag == 'p' and num < 25:
+                remove.add(k)
+            if tag == 'div':
+                if level > 2 and num < 25:
+                    remove.add(k)
+                elif level < 3:
+                    # Check each level < 3 element and only keep those
+                    # that have many child paras
+                    for elem in list(v):
+                        children = len(paras(elem))
+                        if children < 5:
+                            v.remove(elem)
+
+        for k in remove:
+            self.levels.pop(k)
+            self.log.debug('Ignoring level', k)
+
+    def analyze_stats(self, stats):
+        if not stats:
+            return False
+        mc = stats.most_common(1)
+        if len(mc) > 1:
+            return False
+        mc = mc[0]
+        most_common, most_common_count = mc
+        if not most_common or most_common == '0':
+            return False
+        total = sum(stats.values())
+        # True if greater than 95% of elements have the same margin
+        return most_common_count/total > 0.95
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -10,10 +10,10 @@ import re

 from lxml import etree
 from urlparse import urlparse
+from collections import OrderedDict

 from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
 from calibre.ebooks import ConversionError
-from calibre.utils.ordered_dict import OrderedDict

 def XPath(x):
    try:
--- a/src/calibre/ebooks/pdf/manipulate/decrypt.py
+++ b/src/calibre/ebooks/pdf/manipulate/decrypt.py
@ -103,7 +103,7 @@ def main(args=sys.argv, name=''):

    try:
        decrypt(args[0], opts.output, args[1])
-    except DecryptionError, e:
+    except DecryptionError as e:
        print e.value
        return 1

--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -50,7 +50,7 @@ def pdftohtml(output_dir, pdf_path, no_images):
        try:
            p = popen(cmd, stderr=logf._fd, stdout=logf._fd,
                    stdin=subprocess.PIPE)
-        except OSError, err:
+        except OSError as err:
            if err.errno == 2:
                raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
            else:
@ -60,7 +60,7 @@ def pdftohtml(output_dir, pdf_path, no_images):
            try:
                ret = p.wait()
                break
-            except OSError, e:
+            except OSError as e:
                if e.errno == errno.EINTR:
                    continue
                else:
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -22,6 +22,7 @@ border_style_map = {
        'dot-dot-dash': 'dotted',
        'outset': 'outset',
        'tripple': 'double',
+        'triple': 'double',
        'thick-thin-small': 'solid',
        'thin-thick-small': 'solid',
        'thin-thick-thin-small': 'solid',
@ -267,7 +268,7 @@ class RTFInput(InputFormatPlugin):
        self.log('Converting RTF to XML...')
        try:
            xml = self.generate_xml(stream.name)
-        except RtfInvalidCodeException, e:
+        except RtfInvalidCodeException as e:
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)

--- a/src/calibre/ebooks/rtf2xml/colors.py
+++ b/src/calibre/ebooks/rtf2xml/colors.py
@ -245,8 +245,11 @@ class Colors:
                    self.__token_info = line[:16]
                    action = self.__state_dict.get(self.__state)
                    if action is None:
+                        try:
                            sys.stderr.write('no matching state in module fonts.py\n')
                            sys.stderr.write(self.__state + '\n')
+                        except:
+                            pass
                    action(line)
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
--- a/src/calibre/ebooks/snb/snbfile.py
+++ b/src/calibre/ebooks/snb/snbfile.py
@ -85,7 +85,7 @@ class SNBFile:
                                uncompressedData += bzdc.decompress(data)
                            else:
                                uncompressedData += data
-                        except Exception, e:
+                        except Exception as e:
                            print e
                if len(uncompressedData) != self.plainStreamSizeUncompressed:
                    raise Exception()
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@ -1,4 +1,6 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
 """
 PyTextile

@ -202,26 +204,27 @@ class Textile(object):
        (re.compile(r'{Rs}'),            r'&#8360;'),  #  Rupee
        (re.compile(r'{(C=|=C)}'),       r'&#8364;'),  #  euro
        (re.compile(r'{tm}'),            r'&#8482;'),  #  trademark
-        (re.compile(r'{spade}'),         r'&#9824;'),  #  spade
-        (re.compile(r'{club}'),          r'&#9827;'),  #  club
-        (re.compile(r'{heart}'),         r'&#9829;'),  #  heart
-        (re.compile(r'{diamond}'),       r'&#9830;'),  #  diamond
+        (re.compile(r'{spades?}'),       r'&#9824;'),  #  spade
+        (re.compile(r'{clubs?}'),        r'&#9827;'),  #  club
+        (re.compile(r'{hearts?}'),       r'&#9829;'),  #  heart
+        (re.compile(r'{diam(onds?|s)}'), r'&#9830;'),  #  diamond
+        (re.compile(r'{"}'),             r'&#34;'),    #  double-quote
+        (re.compile(r"{'}"),             r'&#39;'),    #  single-quote
+        (re.compile(r"{(’|'/|/')}"),     r'&#8217;'),  #  closing-single-quote - apostrophe
+        (re.compile(r"{(‘|\\'|'\\)}"),   r'&#8216;'),  #  opening-single-quote
+        (re.compile(r'{(”|"/|/")}'),     r'&#8221;'),  #  closing-double-quote
+        (re.compile(r'{(“|\\"|"\\)}'),   r'&#8220;'),  #  opening-double-quote        
    ]
    glyph_defaults = [
        (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'),                   r'\1\2&#215;\3'),                       #  dimension sign
        (re.compile(r'(\d+)\'', re.I),                                 r'\1&#8242;'),                          #  prime
        (re.compile(r'(\d+)\"', re.I),                                 r'\1&#8243;'),                          #  prime-double
-        (re.compile(r"(\w)\'(\w)"),                                    r'\1&#8217;\2'),                        #  apostrophe's
-        (re.compile(r'(\s)\'(\d+\w?)\b(?!\')'),                        r'\1&#8217;\2'),                        #  back in '88
-        (re.compile(r'(\S)\'(?=\s|\'|<|$)'),                           r'\1&#8217;'),                          #  single closing
-        (re.compile(r'\'/'),                                           r'&#8216;'),                            #  single opening
-        (re.compile(r'(\")\"'),                                        r'\1&#8221;'),                          #  double closing - following another
-        (re.compile(r'(\S)\"(?=\s|&#8221;|<|$)'),                      r'\1&#8221;'),                          #  double closing
-        (re.compile(r'"'),                                             r'&#8220;'),                            #  double opening
        (re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'),      r'<acronym title="\2">\1</acronym>'),   #  3+ uppercase acronym
        (re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'),         r'<span class="caps">\1</span>'),       #  3+ uppercase
-        (re.compile(r'\b(\s{0,1})?\.{3}'),                             r'\1&#8260;'),                          #  ellipsis
-        (re.compile(r'(\s?)--(\s?)'),                                  r'\1&#8212;\2'),                        #  em dash
+        (re.compile(r'\b(\s{0,1})?\.{3}'),                             r'\1&#8230;'),                          #  ellipsis
+        (re.compile(r'^[\*_-]{3,}$', re.M),                            r'<hr />'),                             #  <hr> scene-break
+        (re.compile(r'\b--\b'),                                        r'&#8212;'),                            #  em dash
+        (re.compile(r'(\s)--(\s)'),                                    r'\1&#8212;\2'),                        #  em dash
        (re.compile(r'\s-(?:\s|$)'),                                   r' &#8211; '),                          #  en dash
        (re.compile(r'\b( ?)[([]TM[])]', re.I),                        r'\1&#8482;'),                          #  trademark
        (re.compile(r'\b( ?)[([]R[])]', re.I),                         r'\1&#174;'),                           #  registered
@ -747,7 +750,7 @@ class Textile(object):
        return url

    def shelve(self, text):
-        id = str(uuid.uuid4())
+        id = str(uuid.uuid4()) + 'c'
        self.shelf[id] = text
        return id

@ -865,11 +868,11 @@ class Textile(object):
        'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
        """
        qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__', r'_', r'%', r'\+', r'~', r'\^')
-        pnct = ".,\"'?!;:()"
+        pnct = ".,\"'?!;:"

        for qtag in qtags:
            pattern = re.compile(r"""
-                (?:^|(?<=[\s>%(pnct)s])|\[|([\]}]))
+                (?:^|(?<=[\s>%(pnct)s\(])|\[|([\]}]))
                (%(qtag)s)(?!%(qtag)s)
                (%(c)s)
                (?::(\S+))?
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -165,6 +165,7 @@ class TXTInput(InputFormatPlugin):
        elif options.formatting_type == 'textile':
            log.debug('Running text through textile conversion...')
            html = convert_textile(txt)
+            #setattr(options, 'smarten_punctuation', True)
        else:
            log.debug('Running text through basic conversion...')
            flow_size = getattr(options, 'flow_size', 0)
--- a/src/calibre/gui2/actions/copy_to_library.py
+++ b/src/calibre/gui2/actions/copy_to_library.py
@ -32,7 +32,7 @@ class Worker(Thread):
    def run(self):
        try:
            self.doit()
-        except Exception, err:
+        except Exception as err:
            import traceback
            try:
                err = unicode(err)
--- a/src/calibre/gui2/actions/preferences.py
+++ b/src/calibre/gui2/actions/preferences.py
@ -25,8 +25,11 @@ class PreferencesAction(InterfaceAction):
                self.gui.run_wizard)
        if not DEBUG:
            pm.addSeparator()
-            pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
+            ac = pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
                self.debug_restart)
+            ac.setShortcut('Ctrl+Shift+R')
+            self.gui.addAction(ac)
+
        self.qaction.setMenu(pm)
        self.preferences_menu = pm
        for x in (self.gui.preferences_action, self.qaction):
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@ -78,7 +78,7 @@ class RecursiveFind(QThread): # {{{
                if isinstance(root, unicode):
                    root = root.encode(filesystem_encoding)
                self.walk(root)
-            except Exception, err:
+            except Exception as err:
                import traceback
                traceback.print_exc()
                try:
--- a/src/calibre/gui2/convert/init.py
+++ b/src/calibre/gui2/convert/init.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import textwrap, codecs
+import textwrap, codecs, importlib
 from functools import partial

 from PyQt4.Qt import QWidget, QSpinBox, QDoubleSpinBox, QLineEdit, QTextEdit, \
@ -22,8 +22,8 @@ from calibre.customize.ui import plugin_for_input_format
 def config_widget_for_input_plugin(plugin):
    name = plugin.name.lower().replace(' ', '_')
    try:
-        return __import__('calibre.gui2.convert.'+name,
-                fromlist=[1]).PluginWidget
+        return importlib.import_module(
+                'calibre.gui2.convert.'+name).PluginWidget
    except ImportError:
        pass

--- a/src/calibre/gui2/convert/bulk.py
+++ b/src/calibre/gui2/convert/bulk.py
@ -4,7 +4,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import shutil
+import shutil, importlib

 from PyQt4.Qt import QString, SIGNAL

@ -82,8 +82,8 @@ class BulkConfig(Config):
        output_widget = None
        name = self.plumber.output_plugin.name.lower().replace(' ', '_')
        try:
-            output_widget = __import__('calibre.gui2.convert.'+name,
-                        fromlist=[1])
+            output_widget = importlib.import_module(
+                    'calibre.gui2.convert.'+name)
            pw = output_widget.PluginWidget
            pw.ICON = I('back.png')
            pw.HELP = _('Options specific to the output format.')
--- a/src/calibre/gui2/convert/metadata.py
+++ b/src/calibre/gui2/convert/metadata.py
@ -192,7 +192,7 @@ class MetadataWidget(Widget, Ui_Form):
            try:
                cf = open(_file, "rb")
                cover = cf.read()
-            except IOError, e:
+            except IOError as e:
                d = error_dialog(self.parent(), _('Error reading file'),
                        _("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e))
                d.exec_()
--- a/src/calibre/gui2/convert/search_and_replace.py
+++ b/src/calibre/gui2/convert/search_and_replace.py
@ -69,7 +69,7 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
            try:
                pat = unicode(x.regex)
                re.compile(pat)
-            except Exception, err:
+            except Exception as err:
                error_dialog(self, _('Invalid regular expression'),
                             _('Invalid regular expression: %s')%err, show=True)
                return False
--- a/src/calibre/gui2/convert/single.py
+++ b/src/calibre/gui2/convert/single.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import sys, cPickle, shutil
+import sys, cPickle, shutil, importlib

 from PyQt4.Qt import QString, SIGNAL, QAbstractListModel, Qt, QVariant, QFont

@ -182,8 +182,8 @@ class Config(ResizableDialog, Ui_Dialog):
        output_widget = None
        name = self.plumber.output_plugin.name.lower().replace(' ', '_')
        try:
-            output_widget = __import__('calibre.gui2.convert.'+name,
-                        fromlist=[1])
+            output_widget = importlib.import_module(
+                    'calibre.gui2.convert.'+name)
            pw = output_widget.PluginWidget
            pw.ICON = I('back.png')
            pw.HELP = _('Options specific to the output format.')
@ -193,8 +193,8 @@ class Config(ResizableDialog, Ui_Dialog):
        input_widget = None
        name = self.plumber.input_plugin.name.lower().replace(' ', '_')
        try:
-            input_widget = __import__('calibre.gui2.convert.'+name,
-                        fromlist=[1])
+            input_widget = importlib.import_module(
+                    'calibre.gui2.convert.'+name)
            pw = input_widget.PluginWidget
            pw.ICON = I('forward.png')
            pw.HELP = _('Options specific to the input format.')
--- a/src/calibre/gui2/convert/structure_detection.py
+++ b/src/calibre/gui2/convert/structure_detection.py
@ -21,7 +21,7 @@ class StructureDetectionWidget(Widget, Ui_Form):
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
                ['chapter', 'chapter_mark',
-                'remove_first_image',
+                'remove_first_image', 'remove_fake_margins',
                'insert_metadata', 'page_breaks_before']
                )
        self.db, self.book_id = db, book_id
--- a/src/calibre/gui2/convert/structure_detection.ui
+++ b/src/calibre/gui2/convert/structure_detection.ui
@ -48,10 +48,10 @@
     </property>
    </widget>
   </item>
-   <item row="6" column="0" colspan="3">
+   <item row="7" column="0" colspan="3">
    <widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
   </item>
-   <item row="7" column="0" colspan="3">
+   <item row="8" column="0" colspan="3">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
@ -77,7 +77,7 @@
     </property>
    </spacer>
   </item>
-   <item row="4" column="0" colspan="3">
+   <item row="5" column="0" colspan="3">
    <widget class="QLabel" name="label_2">
     <property name="text">
      <string>The header and footer removal options have been replaced by the Search &amp; Replace options. Click the Search &amp; Replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
@ -87,6 +87,13 @@
     </property>
    </widget>
   </item>
+   <item row="2" column="2">
+    <widget class="QCheckBox" name="opt_remove_fake_margins">
+     <property name="text">
+      <string>Remove &amp;fake margins</string>
+     </property>
+    </widget>
+   </item>
  </layout>
 </widget>
 <customwidgets>
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@ -226,10 +226,18 @@ class Comments(Base):
 class Text(Base):

    def setup_ui(self, parent):
+        if self.col_metadata['display'].get('is_names', False):
+            self.sep = u' & '
+        else:
+            self.sep = u', '
        values = self.all_values = list(self.db.all_custom(num=self.col_id))
        values.sort(key=sort_key)
        if self.col_metadata['is_multiple']:
            w = MultiCompleteLineEdit(parent)
+            w.set_separator(self.sep.strip())
+            if self.sep == u' & ':
+                w.set_space_before_sep(True)
+                w.set_add_separator(tweaks['authors_completer_append_separator'])
            w.update_items_cache(values)
            w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
        else:
@ -261,12 +269,12 @@ class Text(Base):
        if self.col_metadata['is_multiple']:
            if not val:
                val = []
-            self.widgets[1].setText(u', '.join(val))
+            self.widgets[1].setText(self.sep.join(val))

    def getter(self):
        if self.col_metadata['is_multiple']:
            val = unicode(self.widgets[1].text()).strip()
-            ans = [x.strip() for x in val.split(',') if x.strip()]
+            ans = [x.strip() for x in val.split(self.sep.strip()) if x.strip()]
            if not ans:
                ans = None
            return ans
@ -847,13 +855,20 @@ class BulkText(BulkBase):
            self.main_widget.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred)
            self.adding_widget = self.main_widget

+            if not self.col_metadata['display'].get('is_names', False):
                w = RemoveTags(parent, values)
                self.widgets.append(QLabel('&'+self.col_metadata['name']+': ' +
                                           _('tags to remove'), parent))
                self.widgets.append(w)
                self.removing_widget = w
+                self.main_widget.set_separator(',')
                w.tags_box.textChanged.connect(self.a_c_checkbox_changed)
                w.checkbox.stateChanged.connect(self.a_c_checkbox_changed)
+            else:
+                self.main_widget.set_separator('&')
+                self.main_widget.set_space_before_sep(True)
+                self.main_widget.set_add_separator(
+                                tweaks['authors_completer_append_separator'])
        else:
            self.make_widgets(parent, MultiCompleteComboBox)
            self.main_widget.set_separator(None)
@ -882,6 +897,11 @@ class BulkText(BulkBase):
        if not self.a_c_checkbox.isChecked():
            return
        if self.col_metadata['is_multiple']:
+            if self.col_metadata['display'].get('is_names', False):
+                val = self.gui_val
+                add = [v.strip() for v in val.split('&') if v.strip()]
+                self.db.set_custom_bulk(book_ids, add, num=self.col_id)
+            else:
                remove_all, adding, rtext = self.gui_val
                remove = set()
                if remove_all:
@ -895,8 +915,8 @@ class BulkText(BulkBase):
                    add = set([v.strip() for v in txt.split(',')])
                else:
                    add = set()
-            self.db.set_custom_bulk_multiple(book_ids, add=add, remove=remove,
-                                            num=self.col_id)
+                self.db.set_custom_bulk_multiple(book_ids, add=add,
+                                            remove=remove, num=self.col_id)
        else:
            val = self.gui_val
            val = self.normalize_ui_val(val)
@ -905,10 +925,11 @@ class BulkText(BulkBase):

    def getter(self):
        if self.col_metadata['is_multiple']:
+            if not self.col_metadata['display'].get('is_names', False):
                return self.removing_widget.checkbox.isChecked(), \
                        unicode(self.adding_widget.text()), \
                        unicode(self.removing_widget.tags_box.text())
-
+            return unicode(self.adding_widget.text())
        val = unicode(self.main_widget.currentText()).strip()
        if not val:
            val = None
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -64,7 +64,7 @@ class DeviceJob(BaseJob): # {{{
            self.result = self.func(*self.args, **self.kwargs)
            if self._aborted:
                return
-        except (Exception, SystemExit), err:
+        except (Exception, SystemExit) as err:
            if self._aborted:
                return
            self.failed = True
@ -162,7 +162,7 @@ class DeviceManager(Thread): # {{{
                dev.reset(detected_device=detected_device,
                    report_progress=self.report_progress)
                dev.open(self.current_library_uuid)
-            except OpenFeedback, e:
+            except OpenFeedback as e:
                if dev not in self.ejected_devices:
                    self.open_feedback_msg(dev.get_gui_name(), e.feedback_msg)
                    self.ejected_devices.add(dev)
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@ -133,7 +133,7 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
        try:
            validation_formatter.validate(tmpl)
            return True
-        except Exception, err:
+        except Exception as err:
            error_dialog(self, _('Invalid template'),
                    '<p>'+_('The template %s is invalid:')%tmpl + \
                    '<br>'+unicode(err), show=True)
--- a/src/calibre/gui2/dialogs/catalog.py
+++ b/src/calibre/gui2/dialogs/catalog.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, sys
+import os, sys, importlib

 from calibre.customize.ui import config
 from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
@ -43,8 +43,7 @@ class Catalog(ResizableDialog, Ui_Dialog):
            name = plugin.name.lower().replace(' ', '_')
            if type(plugin) in builtin_plugins:
                try:
-                    catalog_widget = __import__('calibre.gui2.catalog.'+name,
-                            fromlist=[1])
+                    catalog_widget = importlib.import_module('calibre.gui2.catalog.'+name)
                    pw = catalog_widget.PluginWidget()
                    pw.initialize(name, db)
                    pw.ICON = I('forward.png')
@ -75,7 +74,7 @@ class Catalog(ResizableDialog, Ui_Dialog):
                    # Import the dynamic PluginWidget() from .py file provided in plugin.zip
                    try:
                        sys.path.insert(0, plugin.resources_path)
-                        catalog_widget = __import__(name, fromlist=[1])
+                        catalog_widget = importlib.import_module(name)
                        pw = catalog_widget.PluginWidget()
                        pw.initialize(name)
                        pw.ICON = I('forward.png')
--- a/src/calibre/gui2/dialogs/check_library.py
+++ b/src/calibre/gui2/dialogs/check_library.py
@ -68,7 +68,7 @@ class DBCheck(QDialog): # {{{
                self.start_load()
                return
            QTimer.singleShot(0, self.do_one_dump)
-        except Exception, e:
+        except Exception as e:
            import traceback
            self.error = (as_unicode(e), traceback.format_exc())
            self.reject()
@ -90,7 +90,7 @@ class DBCheck(QDialog): # {{{
            self.conn.commit()

            QTimer.singleShot(0, self.do_one_load)
-        except Exception, e:
+        except Exception as e:
            import traceback
            self.error = (as_unicode(e), traceback.format_exc())
            self.reject()
@ -111,7 +111,7 @@ class DBCheck(QDialog): # {{{
                self.pb.setValue(self.pb.value() + 1)
                self.count -= 1
                QTimer.singleShot(0, self.do_one_load)
-            except Exception, e:
+            except Exception as e:
                import traceback
                self.error = (as_unicode(e), traceback.format_exc())
                self.reject()
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -120,7 +120,7 @@ class MyBlockingBusy(QDialog): # {{{
            self.msg.setText(self.msg_text.format(self.phases[self.current_phase],
                                        percent))
            self.do_one(id)
-        except Exception, err:
+        except Exception as err:
            import traceback
            try:
                err = unicode(err)
@ -653,7 +653,10 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):

        if self.destination_field_fm['is_multiple']:
            if self.comma_separated.isChecked():
-                if dest == 'authors':
+                if dest == 'authors' or \
+                        (self.destination_field_fm['is_custom'] and
+                         self.destination_field_fm['datatype'] == 'text' and
+                         self.destination_field_fm['display'].get('is_names', False)):
                    splitter = ' & '
                else:
                    splitter = ','
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -76,7 +76,7 @@ class CoverFetcher(Thread): # {{{

            self.cover_data, self.errors = download_cover(mi,
                    timeout=self.timeout)
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.traceback = traceback.format_exc()
            print self.traceback
@ -183,7 +183,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            try:
                cf = open(_file, "rb")
                cover = cf.read()
-            except IOError, e:
+            except IOError as e:
                d = error_dialog(self, _('Error reading file'),
                        _("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e))
                d.exec_()
--- a/src/calibre/gui2/dialogs/scheduler.py
+++ b/src/calibre/gui2/dialogs/scheduler.py
@ -9,6 +9,7 @@ Scheduler for automated recipe downloads

 from datetime import timedelta
 import calendar, textwrap
+from collections import OrderedDict

 from PyQt4.Qt import QDialog, Qt, QTime, QObject, QMenu, QHBoxLayout, \
        QAction, QIcon, QMutex, QTimer, pyqtSignal, QWidget, QGridLayout, \
@ -20,7 +21,6 @@ from calibre.web.feeds.recipes.model import RecipeModel
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.date import utcnow
 from calibre.utils.network import internet_connected
-from calibre.utils.ordered_dict import OrderedDict
 from calibre import force_unicode

 def convert_day_time_schedule(val):
--- a/src/calibre/gui2/dialogs/tag_editor.py
+++ b/src/calibre/gui2/dialogs/tag_editor.py
@ -122,6 +122,8 @@ class TagEditor(QDialog, Ui_TagEditor):
        tags = unicode(self.add_tag_input.text()).split(',')
        for tag in tags:
            tag = tag.strip()
+            if not tag:
+                continue
            for item in self.available_tags.findItems(tag, Qt.MatchFixedString):
                self.available_tags.takeItem(self.available_tags.row(item))
            if tag not in self.tags:
--- a/src/calibre/gui2/dialogs/user_profiles.py
+++ b/src/calibre/gui2/dialogs/user_profiles.py
@ -237,7 +237,7 @@ class %(classname)s(%(base_class)s):

            try:
                compile_recipe(src)
-            except Exception, err:
+            except Exception as err:
                error_dialog(self, _('Invalid input'),
                        _('<p>Could not create recipe. Error:<br>%s')%str(err)).exec_()
                return
@ -246,7 +246,7 @@ class %(classname)s(%(base_class)s):
            src = unicode(self.source_code.toPlainText())
            try:
                title = compile_recipe(src).title
-            except Exception, err:
+            except Exception as err:
                error_dialog(self, _('Invalid input'),
                        _('<p>Could not create recipe. Error:<br>%s')%str(err)).exec_()
                return
@ -333,7 +333,7 @@ class %(classname)s(%(base_class)s):
            try:
                profile = open(file, 'rb').read().decode('utf-8')
                title = compile_recipe(profile).title
-            except Exception, err:
+            except Exception as err:
                error_dialog(self, _('Invalid input'),
                        _('<p>Could not create recipe. Error:<br>%s')%str(err)).exec_()
                return
--- a/src/calibre/gui2/dnd.py
+++ b/src/calibre/gui2/dnd.py
@ -35,7 +35,7 @@ class Worker(Thread): # {{{
        try:
            br = browser()
            br.retrieve(self.url, self.fpath, self.callback)
-        except Exception, e:
+        except Exception as e:
            self.err = as_unicode(e)
            import traceback
            self.tb = traceback.format_exc()
@ -143,21 +143,27 @@ def dnd_has_extension(md, extensions):
        urls = [unicode(u.toString()) for u in
                md.urls()]
        purls = [urlparse(u) for u in urls]
+        paths = [u2p(x) for x in purls]
        if DEBUG:
            prints('URLS:', urls)
-            prints('Paths:', [u2p(x) for x in purls])
+            prints('Paths:', paths)

-        exts = frozenset([posixpath.splitext(u.path)[1][1:].lower() for u in
-            purls])
+        exts = frozenset([posixpath.splitext(u)[1][1:].lower() for u in
+            paths])
        return bool(exts.intersection(frozenset(extensions)))
    return False

+def _u2p(raw):
+    path = raw
+    if iswindows and path.startswith('/'):
+        path = path[1:]
+    return path.replace('/', os.sep)
+
 def u2p(url):
    path = url.path
-    if iswindows:
-        if path.startswith('/'):
-            path = path[1:]
-    ans = path.replace('/', os.sep)
+    ans = _u2p(path)
+    if not os.path.exists(ans):
+        ans = _u2p(url.path + '#' + url.fragment)
    if os.path.exists(ans):
        return ans
    # Try unquoting the URL
@ -189,8 +195,9 @@ def dnd_get_image(md, image_exts=IMAGE_EXTENSIONS):
                md.urls()]
        purls = [urlparse(u) for u in urls]
        # First look for a local file
-        images = [u2p(x) for x in purls if x.scheme in ('', 'file') and
-                posixpath.splitext(urllib.unquote(x.path))[1][1:].lower() in
+        images = [u2p(x) for x in purls if x.scheme in ('', 'file')]
+        images = [x for x in images if
+                posixpath.splitext(urllib.unquote(x))[1][1:].lower() in
                image_exts]
        images = [x for x in images if os.path.exists(x)]
        p = QPixmap()
@ -235,8 +242,9 @@ def dnd_get_files(md, exts):
                md.urls()]
        purls = [urlparse(u) for u in urls]
        # First look for a local file
-        local_files = [u2p(x) for x in purls if x.scheme in ('', 'file') and
-                posixpath.splitext(urllib.unquote(x.path))[1][1:].lower() in
+        local_files = [u2p(x) for x in purls if x.scheme in ('', 'file')]
+        local_files = [ p for p in local_files if
+                posixpath.splitext(urllib.unquote(p))[1][1:].lower() in
                exts]
        local_files = [x for x in local_files if os.path.exists(x)]
        if local_files:
--- a/src/calibre/gui2/email.py
+++ b/src/calibre/gui2/email.py
@ -116,7 +116,7 @@ class Emailer(Thread): # {{{
                try:
                    self.sendmail(job)
                    break
-                except Exception, e:
+                except Exception as e:
                    if not self._run:
                        return
                    import traceback
--- a/src/calibre/gui2/library/delegates.py
+++ b/src/calibre/gui2/library/delegates.py
@ -398,7 +398,7 @@ class CcTemplateDelegate(QStyledItemDelegate): # {{{
        val = unicode(editor.textbox.toPlainText())
        try:
            validation_formatter.validate(val)
-        except Exception, err:
+        except Exception as err:
            error_dialog(self.parent(), _('Invalid template'),
                    '<p>'+_('The template %s is invalid:')%val + \
                    '<br>'+str(err), show=True)
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -640,18 +640,18 @@ class BooksModel(QAbstractTableModel): # {{{
                return self.bool_yes_icon
            return self.bool_blank_icon

-        def text_type(r, mult=False, idx=-1):
+        def text_type(r, mult=None, idx=-1):
            text = self.db.data[r][idx]
-            if text and mult:
-                return QVariant(', '.join(sorted(text.split('|'),key=sort_key)))
+            if text and mult is not None:
+                if mult:
+                    return QVariant(u' & '.join(text.split('|')))
+                return QVariant(u', '.join(sorted(text.split('|'),key=sort_key)))
            return QVariant(text)

-        def decorated_text_type(r, mult=False, idx=-1):
+        def decorated_text_type(r, idx=-1):
            text = self.db.data[r][idx]
            if force_to_bool(text) is not None:
                return None
-            if text and mult:
-                return QVariant(', '.join(sorted(text.split('|'),key=sort_key)))
            return QVariant(text)

        def number_type(r, idx=-1):
@ -659,7 +659,7 @@ class BooksModel(QAbstractTableModel): # {{{

        self.dc = {
                   'title'    : functools.partial(text_type,
-                                idx=self.db.field_metadata['title']['rec_index'], mult=False),
+                                idx=self.db.field_metadata['title']['rec_index'], mult=None),
                   'authors'  : functools.partial(authors,
                                idx=self.db.field_metadata['authors']['rec_index']),
                   'size'     : functools.partial(size,
@ -671,14 +671,14 @@ class BooksModel(QAbstractTableModel): # {{{
                   'rating'   : functools.partial(rating_type,
                                idx=self.db.field_metadata['rating']['rec_index']),
                   'publisher': functools.partial(text_type,
-                                idx=self.db.field_metadata['publisher']['rec_index'], mult=False),
+                                idx=self.db.field_metadata['publisher']['rec_index'], mult=None),
                   'tags'     : functools.partial(tags,
                                idx=self.db.field_metadata['tags']['rec_index']),
                   'series'   : functools.partial(series_type,
                                idx=self.db.field_metadata['series']['rec_index'],
                                siix=self.db.field_metadata['series_index']['rec_index']),
                   'ondevice' : functools.partial(text_type,
-                                idx=self.db.field_metadata['ondevice']['rec_index'], mult=False),
+                                idx=self.db.field_metadata['ondevice']['rec_index'], mult=None),
                   }

        self.dc_decorator = {
@ -692,11 +692,12 @@ class BooksModel(QAbstractTableModel): # {{{
            datatype = self.custom_columns[col]['datatype']
            if datatype in ('text', 'comments', 'composite', 'enumeration'):
                mult=self.custom_columns[col]['is_multiple']
+                if mult is not None:
+                    mult = self.custom_columns[col]['display'].get('is_names', False)
                self.dc[col] = functools.partial(text_type, idx=idx, mult=mult)
                if datatype in ['text', 'composite', 'enumeration'] and not mult:
                    if self.custom_columns[col]['display'].get('use_decorations', False):
-                        self.dc[col] = functools.partial(decorated_text_type,
-                                                         idx=idx, mult=mult)
+                        self.dc[col] = functools.partial(decorated_text_type, idx=idx)
                        self.dc_decorator[col] = functools.partial(
                            bool_type_decorator, idx=idx,
                            bool_cols_are_tristate=
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -78,6 +78,7 @@ class BooksView(QTableView): # {{{
        self.pubdate_delegate = PubDateDelegate(self)
        self.tags_delegate = CompleteDelegate(self, ',', 'all_tags')
        self.authors_delegate = CompleteDelegate(self, '&', 'all_author_names', True)
+        self.cc_names_delegate = CompleteDelegate(self, '&', 'all_custom', True)
        self.series_delegate = TextDelegate(self)
        self.publisher_delegate = TextDelegate(self)
        self.text_delegate = TextDelegate(self)
@ -410,6 +411,7 @@ class BooksView(QTableView): # {{{
        self.save_state()
        self._model.set_database(db)
        self.tags_delegate.set_database(db)
+        self.cc_names_delegate.set_database(db)
        self.authors_delegate.set_database(db)
        self.series_delegate.set_auto_complete_function(db.all_series)
        self.publisher_delegate.set_auto_complete_function(db.all_publishers)
@ -431,12 +433,17 @@ class BooksView(QTableView): # {{{
                    self.setItemDelegateForColumn(cm.index(colhead), delegate)
                elif cc['datatype'] == 'comments':
                    self.setItemDelegateForColumn(cm.index(colhead), self.cc_comments_delegate)
-                elif cc['datatype'] in ('text', 'series'):
+                elif cc['datatype'] == 'text':
                    if cc['is_multiple']:
-                        self.setItemDelegateForColumn(cm.index(colhead), self.tags_delegate)
+                        if cc['display'].get('is_names', False):
+                            self.setItemDelegateForColumn(cm.index(colhead),
+                                                          self.cc_names_delegate)
+                        else:
+                            self.setItemDelegateForColumn(cm.index(colhead),
+                                                          self.tags_delegate)
                    else:
                        self.setItemDelegateForColumn(cm.index(colhead), self.cc_text_delegate)
-                elif cc['datatype'] in ('int', 'float'):
+                elif cc['datatype'] in ('series', 'int', 'float'):
                    self.setItemDelegateForColumn(cm.index(colhead), self.cc_text_delegate)
                elif cc['datatype'] == 'bool':
                    self.setItemDelegateForColumn(cm.index(colhead), self.cc_bool_delegate)
--- a/src/calibre/gui2/lrf_renderer/main.py
+++ b/src/calibre/gui2/lrf_renderer/main.py
@ -35,7 +35,7 @@ class RenderWorker(QThread):
            self.stream = None
            if self.aborted:
                self.lrf = None
-        except Exception, err:
+        except Exception as err:
            self.lrf, self.stream = None, None
            self.exception = err
            self.formatted_traceback = traceback.format_exc()
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -399,7 +399,7 @@ def main(args=sys.argv):
 if __name__ == '__main__':
    try:
        sys.exit(main())
-    except Exception, err:
+    except Exception as err:
        if not iswindows: raise
        tb = traceback.format_exc()
        from PyQt4.QtGui import QErrorMessage
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@ -656,7 +656,7 @@ class Cover(ImageView): # {{{
            try:
                cf = open(_file, "rb")
                cover = cf.read()
-            except IOError, e:
+            except IOError as e:
                d = error_dialog(self, _('Error reading file'),
                        _("<p>There was an error reading from file: <br /><b>")
                        + _file + "</b></p><br />"+str(e))
--- a/src/calibre/gui2/metadata/bulk_download.py
+++ b/src/calibre/gui2/metadata/bulk_download.py
@ -88,7 +88,7 @@ class DownloadMetadata(Thread):
    def run(self):
        try:
            self._run()
-        except Exception, e:
+        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()

--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -303,7 +303,7 @@ class MetadataSingleDialogBase(ResizableDialog):
                    return False
                self.books_to_refresh |= getattr(widget, 'books_to_refresh',
                        set([]))
-            except IOError, err:
+            except IOError as err:
                if err.errno == 13: # Permission denied
                    import traceback
                    fname = err.filename if err.filename else 'file'
--- a/src/calibre/gui2/notify.py
+++ b/src/calibre/gui2/notify.py
@ -34,7 +34,7 @@ class DBUSNotifier(Notifier):
            import dbus
            self.dbus = dbus
            self._notify = dbus.Interface(dbus.SessionBus().get_object(server, path), interface)
-        except Exception, err:
+        except Exception as err:
            self.ok = False
            self.err = str(err)

--- a/src/calibre/gui2/preferences/behavior.py
+++ b/src/calibre/gui2/preferences/behavior.py
@ -31,9 +31,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        db = gui.library_view.model().db

        r = self.register
-
-        r('worker_process_priority', prefs, choices=
-                [(_('Low'), 'low'), (_('Normal'), 'normal'), (_('High'), 'high')])
+        choices = [(_('Low'), 'low'), (_('Normal'), 'normal'), (_('High'),
+            'high')] if iswindows else \
+                    [(_('Normal'), 'normal'), (_('Low'), 'low'), (_('Very low'),
+                        'high')]
+        r('worker_process_priority', prefs, choices=choices)

        r('network_timeout', prefs)

@ -60,9 +62,6 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
            signal = getattr(self.opt_internally_viewed_formats, 'item'+signal)
            signal.connect(self.internally_viewed_formats_changed)

-        self.settings['worker_process_priority'].gui_obj.setVisible(iswindows)
-        self.priority_label.setVisible(iswindows)
-

    def initialize(self):
        ConfigWidgetBase.initialize(self)
--- a/src/calibre/gui2/preferences/conversion.py
+++ b/src/calibre/gui2/preferences/conversion.py
@ -5,6 +5,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

+import importlib
+
 from PyQt4.Qt import QIcon, Qt, QStringListModel, QVariant

 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, AbortCommit
@ -104,8 +106,8 @@ class OutputOptions(Base):
        for plugin in output_format_plugins():
            name = plugin.name.lower().replace(' ', '_')
            try:
-                output_widget = __import__('calibre.gui2.convert.'+name,
-                        fromlist=[1])
+                output_widget = importlib.import_module(
+                        'calibre.gui2.convert.'+name)
                pw = output_widget.PluginWidget
                self.conversion_widgets.append(pw)
            except ImportError:
--- a/src/calibre/gui2/preferences/create_custom_column.py
+++ b/src/calibre/gui2/preferences/create_custom_column.py
@ -63,7 +63,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
        for col, name in [('isbn', _('ISBN')), ('formats', _('Formats')),
                ('last_modified', _('Modified Date')), ('yesno', _('Yes/No')),
                ('tags', _('Tags')), ('series', _('Series')), ('rating',
-                    _('Rating'))]:
+                    _('Rating')), ('people', _("People's names"))]:
            text += ' <a href="col:%s">%s</a>,'%(col, name)
        text = text[:-1]
        self.shortcuts.setText(text)
@ -125,6 +125,8 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
        self.datatype_changed()
        if ct in ['text', 'composite', 'enumeration']:
            self.use_decorations.setChecked(c['display'].get('use_decorations', False))
+        elif ct == '*text':
+            self.is_names.setChecked(c['display'].get('is_names', False))
        self.exec_()

    def shortcut_activated(self, url):
@ -134,6 +136,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
            'tags' : 1,
            'series': 3,
            'rating': 8,
+            'people': 1,
            }.get(which, 10))
        self.column_name_box.setText(which)
        self.column_heading_box.setText({
@ -143,7 +146,9 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
            'tags': _('My Tags'),
            'series': _('My Series'),
            'rating': _('My Rating'),
-            'last_modified':_('Modified Date')}[which])
+            'last_modified':_('Modified Date'),
+            'people': _('People')}[which])
+        self.is_names.setChecked(which == 'people')
        if self.composite_box.isVisible():
            self.composite_box.setText(
                {
@ -153,7 +158,6 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
                    }[which])
            self.composite_sort_by.setCurrentIndex(2 if which == 'last_modified' else 0)

-
    def datatype_changed(self, *args):
        try:
            col_type = self.column_types[self.column_type_box.currentIndex()]['datatype']
@ -167,6 +171,7 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
        for x in ('box', 'default_label', 'label'):
            getattr(self, 'enum_'+x).setVisible(col_type == 'enumeration')
        self.use_decorations.setVisible(col_type in ['text', 'composite', 'enumeration'])
+        self.is_names.setVisible(col_type == '*text')

    def accept(self):
        col = unicode(self.column_name_box.text()).strip()
@ -241,6 +246,8 @@ class CreateCustomColumn(QDialog, Ui_QCreateCustomColumn):
                    return self.simple_error('', _('The value "{0}" is in the '
                    'list more than once').format(l[i]))
            display_dict = {'enum_values': l}
+        elif col_type == 'text' and is_multiple:
+            display_dict = {'is_names': self.is_names.isChecked()}

        if col_type in ['text', 'composite', 'enumeration']:
            display_dict['use_decorations'] = self.use_decorations.checkState()
--- a/src/calibre/gui2/preferences/create_custom_column.ui
+++ b/src/calibre/gui2/preferences/create_custom_column.ui
@ -9,7 +9,7 @@
   <rect>
    <x>0</x>
    <y>0</y>
-    <width>603</width>
+    <width>831</width>
    <height>344</height>
   </rect>
  </property>
@ -110,27 +110,37 @@
         </item>
         <item>
          <widget class="QCheckBox" name="use_decorations">
-           <property name="text">
-            <string>Show checkmarks</string>
-           </property>
           <property name="toolTip">
            <string>Show check marks in the GUI. Values of 'yes', 'checked', and 'true'
 will show a green check. Values of 'no', 'unchecked', and 'false' will show a red X.
 Everything else will show nothing.</string>
           </property>
+           <property name="text">
+            <string>Show checkmarks</string>
+           </property>
+          </widget>
+         </item>
+         <item>
+          <widget class="QCheckBox" name="is_names">
+           <property name="toolTip">
+            <string>Check this box if this column contains names, like the authors column.</string>
+           </property>
+           <property name="text">
+            <string>Contains names</string>
+           </property>
          </widget>
         </item>
         <item>
          <spacer name="horizontalSpacer_27">
-           <property name="orientation">
-            <enum>Qt::Horizontal</enum>
-           </property>
           <property name="sizePolicy">
            <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
             <horstretch>10</horstretch>
             <verstretch>0</verstretch>
            </sizepolicy>
           </property>
+           <property name="orientation">
+            <enum>Qt::Horizontal</enum>
+           </property>
           <property name="sizeHint" stdset="0">
            <size>
             <width>20</width>
@ -241,25 +251,25 @@ Everything else will show nothing.</string>
         </item>
         <item>
          <widget class="QCheckBox" name="composite_make_category">
-           <property name="text">
-            <string>Show in tags browser</string>
-           </property>
           <property name="toolTip">
            <string>If checked, this column will appear in the tags browser as a category</string>
           </property>
+           <property name="text">
+            <string>Show in tags browser</string>
+           </property>
          </widget>
         </item>
         <item>
          <spacer name="horizontalSpacer_24">
-           <property name="orientation">
-            <enum>Qt::Horizontal</enum>
-           </property>
           <property name="sizePolicy">
            <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
             <horstretch>10</horstretch>
             <verstretch>0</verstretch>
            </sizepolicy>
           </property>
+           <property name="orientation">
+            <enum>Qt::Horizontal</enum>
+           </property>
           <property name="sizeHint" stdset="0">
            <size>
             <width>20</width>
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@ -65,7 +65,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):

        choices = set([k for k in db.field_metadata.all_field_keys()
                if db.field_metadata[k]['is_category'] and
-                       db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']])
+                   (db.field_metadata[k]['datatype'] in ['text', 'series', 'enumeration']) and
+                   not db.field_metadata[k]['display'].get('is_names', False)])
        choices -= set(['authors', 'publisher', 'formats', 'news', 'identifiers'])
        choices |= set(['search'])
        self.opt_categories_using_hierarchy.update_items_cache(choices)
--- a/src/calibre/gui2/preferences/main.py
+++ b/src/calibre/gui2/preferences/main.py
@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'

 import textwrap
 from functools import partial
+from collections import OrderedDict

 from PyQt4.Qt import QMainWindow, Qt, QIcon, QStatusBar, QFont, QWidget, \
        QScrollArea, QStackedWidget, QVBoxLayout, QLabel, QFrame, QKeySequence, \
@ -18,7 +19,6 @@ from calibre.gui2 import gprefs, min_available_height, available_width, \
    warning_dialog
 from calibre.gui2.preferences import init_gui, AbortCommit, get_plugin
 from calibre.customize.ui import preferences_plugins
-from calibre.utils.ordered_dict import OrderedDict

 ICON_SIZE = 32

--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@ -251,7 +251,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
                if d != 0:
                    try:
                        validation_formatter.validate(s)
-                    except Exception, err:
+                    except Exception as err:
                        error_dialog(self, _('Invalid template'),
                                '<p>'+_('The template %s is invalid:')%s + \
                                '<br>'+str(err), show=True)
--- a/src/calibre/gui2/preferences/plugins.py
+++ b/src/calibre/gui2/preferences/plugins.py
@ -6,6 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import textwrap, os
+from collections import OrderedDict

 from PyQt4.Qt import Qt, QModelIndex, QAbstractItemModel, QVariant, QIcon, \
        QBrush
@ -19,7 +20,6 @@ from calibre.gui2 import NONE, error_dialog, info_dialog, choose_files, \
        question_dialog, gprefs
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.utils.icu import lower
-from calibre.utils.ordered_dict import OrderedDict

 class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{

--- a/Show More
+++ b/Show More