Sync with trunk. Revision 9165

2025-08-30 23:00:21 -04:00 · 2011-05-08 17:36:50 +08:00 · 2011-05-08 17:36:50 +08:00 · 7bd9cd20fe
commit 7bd9cd20fe
parent ea4b5b9054 2760116d3d
229 changed files with 105834 additions and 91526 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -30,3 +30,4 @@ nbproject/
 .project
 .pydevproject
 .settings/
 *.DS_Store
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,106 @@
 #  new recipes:
 #    - title: 
 - version: 0.8.0
  date: 2010-05-06
  new features:
    - title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0"
      type: major
 - version: 0.7.59
  date: 2011-04-30
  bug fixes:
    - title: "Fixes a bug in 0.7.58 that caused too small fonts when converting to MOBI for the Kindle. Apologies."
    - title: "Apple driver: Handle invalid EPUBs that do not contain an OPF file"
  new recipes:
    - title: The Big Picture and Auto industry news
      author: welovelucy
    - title: Gazeta Prawna 
      author: Vroo
    - title: Various Czech news sources
      author: Tomas Latal
    - title: Diario de Ibiza
      author: Joan Tur
 - version: 0.7.58
  date: 2011-04-29
  new features:
    - title: "Support for converting and reading metadata from Plucker format PDB files"
      type: major
    - title: "The metadata that is displayed in the book details panel on the right is now completely configurable via Preferences->Look & Feel"
    - title: "Add a column that shows the date when the metadata of a book record was last modified in calibre. To see the column, right click on the column headers in calibre and select Show column->Modified. Note that the dates may be incorrect for books added with older versions of calibre."
    - title: "Add command line option to shutdown running calibre"
    - title: "CHM Input: Store extracted files in the input/ sub dir for easy debugging when --debug-pipeline is specified"
    - title: "Add a popup menu to the 'Create saved search button' to allow easy deleting of saved searches"
  bug fixes:
    - title: "Fix regression that broke converting to LIT in 0.7.57"
      tickets: [769334]
    - title: "Conversion pipeline: Remove encoding declarations from input HTML documents to guarantee that there is only a single encoding declaration in the output HTML."
      tickets: [773337]
    - title: "Correctly parenthesize searches that are used to make search restrictions"
    - title: "Fix ratings in save to disk templates not being divided by 2"
    - title: "TXT to EPUB: Underlined words (following quotes?) fail to become italics"
      tickets: [772267]
    - title: "Fix template function source code unavailable when not running calibre from source"
    - title: "Fix adding html books from the top of a deep folder hierarchy very slow"
    - title: "Only set language in MOBI metadata if it is not null"
    - title: "Fix 'count-of' searches (e.g., tags:#>3)."
      tickets: [771175]
    - title: "Fix regression that broke connection to iTunes in some cases"
      tickets: [771164]
    - title: "Fix buggy regex that made converting PDFs with the string ****************** very slow"
      tickets: [770534]
    - title: "Fix Ctrl+L shortcut to lookup word not working in ebook viewer"
      tickets: [769492]
    - title: "Fix regression that broke searching on boolean columns"
  improved recipes:
    - HBR Blogs
    - The Marker
    - Financial Times
    - Clarin
    - Honolulu Star Advertiser
  new recipes:
    - title: Novi Standard
      author: Darko Miletic
    - title: Autobild.ro and Social Diva
      author: Silviu Cotoara
    - title: Novinky
      author: Tomas Latal
    - title: "De Volksrant (subscriber version)"
      author: Selcal
 - version: 0.7.57
  date: 2011-04-22
--- a/recipes/auto_blog.recipe
+++ b/recipes/auto_blog.recipe
@ -0,0 +1,16 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AutoBlog(BasicNewsRecipe):
    title          = u'Auto Blog'
    __author__     = 'Welovelucy'
    language = 'en'
    description = 'Auto industry news'
    oldest_article = 7
    max_articles_per_feed = 100
    feeds          = [(u'AutoBlog', u'http://www.autoblog.com/rss.xml')]
    def print_version(self, url):
        return url + 'print/'
--- a/recipes/autobild.recipe
+++ b/recipes/autobild.recipe
@ -0,0 +1,55 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011, Silviu Cotoar\u0103'
 '''
 auto-bild.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class AutoBild(BasicNewsRecipe):
    title                       = u'Auto Bild'
    __author__                   = u'Silviu Cotoar\u0103'
    description                   = 'Auto'
    publisher                   = 'Auto Bild'
    oldest_article              = 50
    language                    = 'ro'
    max_articles_per_feed = 100
    no_stylesheets          = True
    use_embedded_content  = False
    category                  = 'Ziare,Reviste,Auto'
    encoding                = 'utf-8'
    cover_url     = 'http://www.auto-bild.ro/images/autobild.gif'
    conversion_options = {
                             'comments'  : description
                            ,'tags'           : category
                            ,'language'    : language
                            ,'publisher'   : publisher
                         }
    keep_only_tags = [
                dict(name='div', attrs={'class':'box_2 articol clearfix'})
                 ]
    remove_tags = [
             dict(name='div', attrs={'class':['detail']})
           , dict(name='a', attrs={'id':['zoom_link']})
           , dict(name='div', attrs={'class':['icons clearfix']})
           , dict(name='div', attrs={'class':['pub_articol clearfix']})
                          ]
    remove_tags_after = [
              dict(name='div', attrs={'class':['pub_articol clearfix']})
           ]
    feeds          = [
        (u'Feeds', u'http://www.auto-bild.ro/rss/toate')
                     ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/big_picture.recipe
+++ b/recipes/big_picture.recipe
@ -0,0 +1,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class BigPicture(BasicNewsRecipe):
    title          = u'The Big Picture'
    __author__     = 'Welovelucy'
    description    = ('Macro perspective on capital markets, economy, technology'
    ' and digital media')
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
    feeds          = [(u'Big Picture', u'http://feeds.feedburner.com/TheBigPicture')]
--- a/recipes/brand_eins.recipe
+++ b/recipes/brand_eins.recipe
@ -3,7 +3,8 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
-__version__   = '0.98' # 2011-04-10
+__version__   = '0.98'
 ''' http://brandeins.de - Wirtschaftsmagazin '''
 import re
 import string
@ -13,8 +14,8 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class BrandEins(BasicNewsRecipe):
  title = u'brand eins'
-  __author__ = 'Constantin Hofstetter; Steffen Siebert'
+  __author__ = 'Constantin Hofstetter'
-  description = u'Wirtschaftsmagazin: Gets the last full issue on default. Set a integer value for the username-field to get older issues: 1 -> the newest (but not complete) issue, 2 -> the last complete issue (default), 3 -> the issue before 2 etc.'
+  description = u'Wirtschaftsmagazin'
  publisher ='brandeins.de'
  category = 'politics, business, wirtschaft, Germany'
  use_embedded_content = False
@ -105,10 +106,11 @@ class BrandEins(BasicNewsRecipe):
    keys = issue_map.keys()
    keys.sort()
    keys.reverse()
-    selected_issue = issue_map[keys[issue-1]]
+    selected_issue_key = keys[issue - 1]
    selected_issue = issue_map[selected_issue_key]
    url = selected_issue.get('href', False)
    # Get the title for the magazin - build it out of the title of the cover - take the issue and year;
-    self.title = "brand eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d)", selected_issue.find('img').get('title', False)).group('date')
+    self.title = "brand eins " + selected_issue_key[4:] + "/" + selected_issue_key[0:4]
    url = 'http://brandeins.de/'+url
    # url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
@ -161,3 +163,4 @@ class BrandEins(BasicNewsRecipe):
          current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
    titles_and_articles.append([chapter_title, current_articles])
    return titles_and_articles
--- a/recipes/diario_ibiza.recipe
+++ b/recipes/diario_ibiza.recipe
@ -0,0 +1,55 @@
 __license__   = 'GPL v3'
 __author__    = 'Joan Tur, based on El Pais version by Jordi Balcells & elargentino.com version by Darko Miletic'
 description   = 'Principal periodico de las islas Pitiusas, Ibiza y Formentera (Espanya) - v1.06 (29/04/2011)'
 __docformat__ = 'restructuredtext en'
 '''
 diariodeibiza.es
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DiarioDeIbiza(BasicNewsRecipe):
    __author__        = 'Joan Tur, cullet'
    description   = 'Principal periodico de las islas Pitiusas, Ibiza y Formentera (Espanya) - v1.06 (29/04/2011)'
    cover_url      = 'http://estaticos01.diariodeibiza.es//elementosWeb/mediaweb/images/logo.jpg'
    title          = u'Diario de Ibiza digital'
    publisher      = u'Editorial Prensa Iberica'
    category       = 'News, politics, culture, economy, general interest'
    language = 'es'
    encoding              = 'iso-8859-1'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 2
    max_articles_per_feed = 20
    use_embedded_content  = False
    recursion             = 5
    remove_javascript = True
    no_stylesheets = True
    keep_only_tags = [
                        dict(name='div', attrs={'class':['noticia_titular','epigrafe','subtitulo','actualizada','noticia_fecha','noticia_texto']}),
                        dict(name='font', attrs={'class':['actualizada']})
                     ]
    feeds          = [
                        (u'Portada de Ibiza', u'http://www.diariodeibiza.es/elementosInt/rss/1'),
                        (u'Pitiuses i Balears', u'http://www.diariodeibiza.es/elementosInt/rss/2'),
                        (u'Opini\xf3n', u'http://www.diariodeibiza.es/elementosInt/rss/3'),
                        (u'Nacional', u'http://www.diariodeibiza.es/elementosInt/rss/4'),
                        (u'Internacional', u'http://www.diariodeibiza.es/elementosInt/rss/5'),
                        (u'Econom\xeda', u'http://www.diariodeibiza.es/elementosInt/rss/6'),
                        (u'Deportes', u'http://www.diariodeibiza.es/elementosInt/rss/7'),
                        (u'Sociedad', u'http://www.diariodeibiza.es/elementosInt/rss/8'),
                        (u'Ciencia', u'http://www.diariodeibiza.es/elementosInt/rss/11'),
                        (u'Tecnolog\xeda', u'http://www.diariodeibiza.es/elementosInt/rss/12'),
                        (u'Gente', u'http://www.diariodeibiza.es/elementosInt/rss/13'),
                        (u'Sucesos', u'http://www.diariodeibiza.es/elementosInt/rss/15'),
                        (u'Cultura', u'http://www.diariodeibiza.es/elementosInt/rss/16Piti')
                    ]
--- a/recipes/digizone.recipe
+++ b/recipes/digizone.recipe
@ -0,0 +1,37 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class DigiZoneCZ(BasicNewsRecipe):
    title                   = 'DigiZone'
    __author__              = 'Tomas Latal'
    __version__             = '1.0'
    __date__                = '30 April 2011'
    description             = u'Aktuality a \u010dl\xe1nky z DigiZone.cz'
    oldest_article          = 1
    max_articles_per_feed   = 10
    encoding                = 'iso-8859-2'
    publisher               = 'Internet Info s.r.o.'
    category                = 'digitalni vysilani, televize, CZ'
    language                = 'cs'
    publication_type        = 'newsportal'
    no_stylesheets          = True
    remove_javascript       = True
    extra_css               = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
                               p.perex img {display:none;} \
                               .urs p {margin: 0 0 0.8em 0;}'
    feeds          = [
                        (u'Aktuality', u'http://rss.digizone.cz/aktuality'),
                        (u'\u010cl\xe1nky', u'http://rss.digizone.cz/clanky')
                     ]
    remove_tags_before = dict(id=['p-article','p-actuality'])
    remove_tags_after = dict(id=['p-article','p-actuality'])
    remove_tags = [
                    dict(attrs={'class':['path','mth','lbtr','serial','enquiry','links','dp-n','side','op-ab','op-view','op-sub','op-list',]}),
                    dict(id=['opinions','discussionList','similarItems','sidebar','footer','opl','promo-box'])
                  ]
--- a/recipes/f_secure.recipe
+++ b/recipes/f_secure.recipe
@ -12,7 +12,6 @@ class AdvancedUserRecipe1301860159(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content   = False
    language = 'en_EN'
    remove_javascript = True
    keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
    remove_tags = [dict(name='a'),dict(name='hr')]
--- a/recipes/foxnews.recipe
+++ b/recipes/foxnews.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 foxnews.com
 '''
@ -23,6 +23,7 @@ class FoxNews(BasicNewsRecipe):
    extra_css             = """
                                body{font-family: Arial,sans-serif }
                                .caption{font-size: x-small}
                                .author,.dateline{font-size: small}
                            """
    conversion_options = {
@ -34,12 +35,12 @@ class FoxNews(BasicNewsRecipe):
    remove_attributes = ['xmlns','lang']
-    remove_tags = [
+    remove_tags=[
-                     dict(name=['object','embed','link','script','iframe','meta','base'])
+                  dict(attrs={'class':['user-control','logo','ad-300x250','url-description']})
-                    ,dict(attrs={'class':['user-control','url-description','ad-context']})
+                 ,dict(name=['meta','base','link','iframe','object','embed'])
-                  ]
+                ]
-    remove_tags_before=dict(name='h1')
+    keep_only_tags=[dict(attrs={'id':'article-print'})]
    remove_tags_after =dict(attrs={'class':'url-description'})
    feeds = [
@ -55,3 +56,24 @@ class FoxNews(BasicNewsRecipe):
    def print_version(self, url):
        return url + 'print'
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  item.attrs = []
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/recipes/frazpc.recipe
+++ b/recipes/frazpc.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+__copyright__ = u'2010-2011, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 frazpc.pl
 '''
@ -19,17 +19,20 @@ class FrazPC(BasicNewsRecipe):
    use_embedded_content = False
    no_stylesheets = True
-    feeds          = [(u'Aktualno\u015bci', u'http://www.frazpc.pl/feed'), (u'Recenzje', u'http://www.frazpc.pl/kat/recenzje-2/feed') ]
+    feeds          = [
-
+        (u'Aktualno\u015bci', u'http://www.frazpc.pl/feed/aktualnosci'), 
-    keep_only_tags = [dict(name='div', attrs={'id':'FRAZ_CONTENT'})]
+        (u'Artyku\u0142y', u'http://www.frazpc.pl/feed/artykuly')
    remove_tags = [dict(name='p', attrs={'class':'gray tagsP fs11'})]
    preprocess_regexps = [
        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
            [(r'<div id="post-[0-9]*"', lambda match: '<div id="FRAZ_CONTENT"'),
             (r'href="/f/news/', lambda match: 'href="http://www.frazpc.pl/f/news/'),
             (r' &nbsp; <a href="http://www.frazpc.pl/[^>]*?">(Skomentuj|Komentarz(e)?\([0-9]*\))</a>&nbsp; \|', lambda match: '')]
    ]
    keep_only_tags = [dict(name='div', attrs={'class':'article'})]
    remove_tags = [
        dict(name='div', attrs={'class':'title-wrapper'}),
        dict(name='p', attrs={'class':'tags'}),
        dict(name='p', attrs={'class':'article-links'}),
        dict(name='div', attrs={'class':'comments_box'})
    ]
    preprocess_regexps = [(re.compile(r'\| <a href="#comments">Komentarze \([0-9]*\)</a>'), lambda match: '')]
    remove_attributes = [ 'width', 'height' ]
--- a/recipes/gazeta-prawna-calibre-v1.recipe
+++ b/recipes/gazeta-prawna-calibre-v1.recipe
@ -0,0 +1,53 @@
 #!/usr/bin/env python
 __license__ = 'GPL v3'
 __copyright__ = u'2011, Vroo <vroobelek@iq.pl>'
 __author__ = u'Vroo'
 '''
 gazetaprawna.pl
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class gazetaprawna(BasicNewsRecipe):
    version = 1
    title          = u'Gazeta Prawna'
    __author__ = u'Vroo'
    publisher      = u'Infor Biznes'
    oldest_article = 7
    max_articles_per_feed = 20
    no_stylesheets = True
    remove_javascript = True
    description = 'Polski dziennik gospodarczy'
    language = 'pl'
    encoding = 'utf-8'
    remove_tags_after = [
               dict(name='div', attrs={'class':['data-art']})
    ]
    remove_tags = [
               dict(name='div', attrs={'class':['dodatki_artykulu','data-art']})
    ]
    feeds = [
      (u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'),
      (u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'),
      (u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'),
      (u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'),
      (u'Podatki i rachunkowo\u015b\u0107', u'http://podatki.gazetaprawna.pl/rss.xml')
     ]
    def print_version(self, url):
        url = url.replace('wiadomosci/artykuly', 'drukowanie')
        url = url.replace('artykuly', 'drukowanie')
        url = url.replace('porady', 'drukowanie')
        url = url.replace('wywiady', 'drukowanie')
        url = url.replace('orzeczenia', 'drukowanie')
        url = url.replace('galeria', 'drukowanie')
        url = url.replace('komentarze', 'drukowanie')
        url = url.replace('biznes.gazetaprawna', 'www.gazetaprawna')
        url = url.replace('podatki.gazetaprawna', 'www.gazetaprawna')
        url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna')
        url = url.replace('praca.gazetaprawna', 'www.gazetaprawna')
        return url
--- a/recipes/hbr_blogs.recipe
+++ b/recipes/hbr_blogs.recipe
@ -1,9 +1,6 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 # Needed for BLOGs
 from calibre.web.feeds import Feed
 class HBR(BasicNewsRecipe):
    title = 'Harvard Business Review Blogs'
@ -32,6 +29,7 @@ class HBR(BasicNewsRecipe):
        feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
        oldest_article = 30
        max_articles_per_feed = 100
        use_embedded_content = False
    else:
        timefmt                = ' [%B %Y]'
@ -59,9 +57,9 @@ class HBR(BasicNewsRecipe):
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.LOGIN_URL)
-        br.select_form(name='signInForm')
+        br.select_form(name='signin-form')
-        br['signInForm:username'] = self.username
+        br['signin-form:username'] = self.username
-        br['signInForm:password'] = self.password
+        br['signin-form:password'] = self.password
        raw = br.submit().read()
        if 'My Account' not in raw:
            raise Exception('Failed to login, are you sure your username and password are correct?')
@ -161,27 +159,13 @@ class HBR(BasicNewsRecipe):
        return startDate, endDate
 #-------------------------------------------------------------------------------------------------
    def hbr_parse_blogs(self, feeds):
        # Do the "official" parse_feeds first
        rssFeeds = Feed()
        # Use the PARSE_FEEDS method to get a Feeds object of the articles
        rssFeeds = BasicNewsRecipe.parse_feeds(self)
        # Create a new feed of the right configuration and append to existing afeeds
        self.feed_to_index_append(rssFeeds[:], feeds)
 #-------------------------------------------------------------------------------------------------
    def parse_index(self):
        if self.INCLUDE_ARTICLES == True:
            soup = self.hbr_get_toc()
            feeds = self.hbr_parse_toc(soup)
        else:
-            feeds = []
+            return BasicNewsRecipe.parse_index(self)
        # blog stuff
        if self.INCLUDE_BLOGS == True:
            self.hbr_parse_blogs(feeds)
        return feeds
 #-------------------------------------------------------------------------------------------------
--- a/recipes/icons/autobild.png
+++ b/recipes/icons/autobild.png
--- a/recipes/icons/novistandard.png
+++ b/recipes/icons/novistandard.png
--- a/recipes/icons/socialdiva.png
+++ b/recipes/icons/socialdiva.png
--- a/recipes/jezebel.recipe
+++ b/recipes/jezebel.recipe
@ -16,7 +16,7 @@ class Jezebel(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
-    use_embedded_content  = False
+    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/jezebel.com/img/logo.png'
    extra_css             = '''
@ -32,13 +32,12 @@ class Jezebel(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    remove_attributes  = ['width','height']
+    feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/vip?format=xml')]
-    keep_only_tags     = [dict(attrs={'class':'content permalink'})]
+
-    remove_tags_before = dict(name='h1')
+    remove_tags = [
-    remove_tags        = [dict(attrs={'class':'contactinfo'})]
+            {'class': 'feedflare'},
-    remove_tags_after  = dict(attrs={'class':'contactinfo'})
+    ]
    feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/full')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/korea_herald.recipe
+++ b/recipes/korea_herald.recipe
@ -0,0 +1,36 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Seongkyoun Yoo <Seongkyoun.yoo at gmail.com>'
 '''
 Profile to download KoreaHerald
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class KoreaHerald(BasicNewsRecipe):
    title          = u'KoreaHerald'
    language = 'en'
    description = u'Korea Herald News articles'
    __author__	= 'Seongkyoun Yoo'
    oldest_article = 10
    recursions = 3
    max_articles_per_feed = 10
    no_stylesheets         = True
    keep_only_tags    = [
 						dict(id=['contentLeft', '_article'])
                        ]
    remove_tags = [
       dict(name='iframe'),
       dict(name='div', attrs={'class':['left','htit2', 'navigation','banner_txt','banner_img']}),
       dict(name='ul', attrs={'class':['link_icon', 'flow_icon','detailTextAD110113']}),
       ]
    feeds = [
 	('All News','http://www.koreaherald.com/rss/020000000000.xml'),
    ('National','http://www.koreaherald.com/rss/020100000000.xml'),
    ('Business','http://www.koreaherald.com/rss/020200000000.xml'),
    ('Life&Style','http://www.koreaherald.com/rss/020300000000.xml'),
    ('Entertainment','http://www.koreaherald.com/rss/020400000000.xml'),
    ('Sports','http://www.koreaherald.com/rss/020500000000.xml'),
    ('Opinion','http://www.koreaherald.com/rss/020600000000.xml'),
    ('English Cafe','http://www.koreaherald.com/rss/021000000000.xml'),
 	]
--- a/recipes/kotaku.recipe
+++ b/recipes/kotaku.recipe
@ -16,7 +16,7 @@ class Kotaku(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
-    use_embedded_content  = False
+    use_embedded_content  = True
    language              = 'en'
    masthead_url          = 'http://cache.gawkerassets.com/assets/kotaku.com/img/logo.png'
    extra_css             = '''
@ -31,13 +31,12 @@ class Kotaku(BasicNewsRecipe):
                        , 'language'  : language
                        }
-    remove_attributes  = ['width','height']
+    feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/vip?format=xml')]
-    keep_only_tags     = [dict(attrs={'class':'content permalink'})]
+
-    remove_tags_before = dict(name='h1')
+    remove_tags = [
-    remove_tags        = [dict(attrs={'class':'contactinfo'})]
+            {'class': 'feedflare'},
-    remove_tags_after  = dict(attrs={'class':'contactinfo'})
+    ]
    feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/full')]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/le_monde.recipe
+++ b/recipes/le_monde.recipe
@ -48,7 +48,7 @@ class LeMonde(BasicNewsRecipe):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
-        return soup
+        return self.adeify_images(soup)
    preprocess_regexps = [
        (re.compile(r'([0-9])%'), lambda m: m.group(1) + '&nbsp;%'),
--- a/recipes/lupa.recipe
+++ b/recipes/lupa.recipe
@ -0,0 +1,37 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class LupaCZ(BasicNewsRecipe):
    title                   = 'Lupa'
    __author__              = 'Tomas Latal'
    __version__             = '1.0'
    __date__                = '30 April 2011'
    description             = u'Zpr\xe1vi\u010dky a \u010dl\xe1nky z Lupa.cz'
    oldest_article          = 2
    max_articles_per_feed   = 10
    encoding                = 'utf8'
    publisher               = 'Internet Info s.r.o.'
    category                = 'IT,news,CZ'
    language                = 'cs'
    publication_type        = 'newsportal'
    no_stylesheets          = True
    remove_javascript       = True
    extra_css               = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
                               p.perex img {display:none;} \
                               .urs p {margin: 0 0 0.8em 0;}'
    feeds          = [
                        (u'Zpr\xe1vi\u010dky', u'http://rss.lupa.cz/zpravicky'),
                        (u'\u010cl\xe1nky', u'http://rss.lupa.cz/clanky')
                    ]
    remove_tags_before = dict(id='main')
    remove_tags_after = [dict(id='main')]
    remove_tags = [
                    dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
                    dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
                  ]
--- a/recipes/mesec.recipe
+++ b/recipes/mesec.recipe
@ -0,0 +1,37 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class MesecCZ(BasicNewsRecipe):
    title                   =  u'M\u011b\u0161ec'
    __author__              = 'Tomas Latal'
    __version__             = '1.0'
    __date__                = '30 April 2011'
    description             = u'Zpr\xe1vi\u010dky a \u010dl\xe1nky z Mesec.cz'
    oldest_article          = 1
    max_articles_per_feed   = 10
    encoding                = 'utf8'
    publisher               = 'Internet Info s.r.o.'
    category                = 'finance,CZ'
    language                = 'cs'
    publication_type        = 'newsportal'
    no_stylesheets          = True
    remove_javascript       = True
    extra_css               = 'p.perex{font-size: 1.2em;margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
                               p.perex img {display:none;} \
                               .urs p {margin: 0 0 0.8em 0;}'
    feeds          = [
                        (u'Aktuality', u'http://www.mesec.cz/rss/aktuality/'),
                        (u'\u010cl\xe1nky', u'http://www.mesec.cz/rss/clanky/')
                    ]
    remove_tags_before = dict(id='main')
    remove_tags_after = [dict(id='main')]
    remove_tags = [
                    dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
                    dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
                  ]
--- a/recipes/novinky.recipe
+++ b/recipes/novinky.recipe
@ -0,0 +1,43 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class NovinkyCZ(BasicNewsRecipe):
    title                   = 'Novinky'
    __author__              = 'Tomas Latal'
    __version__             = '1.1'
    __date__                = '30 April 2011'
    description             = 'News from server Novinky.cz'
    oldest_article          = 1
    max_articles_per_feed   = 10
    encoding                = 'utf8'
    publisher               = 'Novinky'
    category                = 'news, CZ'
    language                = 'cs'
    publication_type        = 'newsportal'
    no_stylesheets          = True
    remove_javascript       = True
    cover_url               = 'http://img193.imageshack.us/img193/3039/novinkycover.jpg'
    extra_css               = 'p.acmDescription{font-style:italic;} p.acmAuthor{font-size:0.8em; color:#707070}'
    feeds          = [
                        (u'Dom\xe1c\xed', u'http://www.novinky.cz/rss/domaci/'),
                        (u'Zahrani\u010d\xed', u'http://www.novinky.cz/rss/zahranicni/'),
                        (u'Krimi', u'http://www.novinky.cz/rss/krimi/'),
                        (u'Ekonomika', u'http://www.novinky.cz/rss/ekonomika/'),
                        (u'Finance', u'http://www.novinky.cz/rss/finance/'),
                        (u'Kultura', u'http://www.novinky.cz/rss/kultura/'),
                        (u'Koktejl', u'http://www.novinky.cz/rss/koktejl/'),
                        (u'Internet a PC', u'http://www.novinky.cz/rss/internet-a-pc/'),
                        (u'Auto-moto', u'http://www.novinky.cz/rss/auto/'),
                    ]
    remove_tags_before = dict(id='articleContent')
    remove_tags_after = [dict(id='movedArticleAuthors')]
    remove_tags = [
                      dict(name='div', attrs={'id':['articleColumnInfo','pictureInnerBox']}),
                      dict(name='p', attrs={'id':['articleDate']})
                  ]
--- a/recipes/novistandard.recipe
+++ b/recipes/novistandard.recipe
@ -0,0 +1,100 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.standard.rs
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class NoviStandard(BasicNewsRecipe):
    title                  = 'Novi Standard'
    __author__             = 'Darko Miletic'
    description            = 'NoviStandard - energija je neunistiva!'
    publisher              = 'Novi Standard'
    category               = 'news, politics, Serbia'
    no_stylesheets         = True
    delay                  = 1
    oldest_article         = 15
    encoding               = 'utf-8'
    publication_type       = 'magazine'
    needs_subscription     = 'optional'
    remove_empty_feeds     = True
    INDEX                  = 'http://www.standard.rs/'
    use_embedded_content   = False
    language               = 'sr'
    publication_type       = 'magazine'
    masthead_url           = 'http://www.standard.rs/templates/ja_opal/images/red/logo.png'
    extra_css              = """
                                 @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
                                 @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                                 body{font-family: Arial,"Segoe UI","Trebuchet MS",Helvetica,sans1,sans-serif}
                                 .dropcap{font-family: Georgia,Times,serif1,serif; display:inline}
                                 .dropcap:first-letter{display: inline; font-size: xx-large; font-weight: bold}
                                 .contentheading{color: gray; font-size: x-large}
                                 .article-meta, .createdby{color: red}
                                 img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
                             """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
            br.select_form(name='login')
            br['username'] = self.username
            br['passwd'  ] = self.password
            br.submit()
        return br
    keep_only_tags    =[dict(attrs={'class':['contentheading','article-meta','article-content']})]
    remove_tags_after =dict(attrs={'class':'extravote-container'})
    remove_tags       = [
                           dict(name=['object','link','iframe','meta','base'])
                          ,dict(attrs={'class':'extravote-container'})
                        ]
    remove_attributes =['border','background','height','width','align','valign','lang']
    feeds             = [
                            (u'Naslovna', u'http://www.standard.rs/index.php?format=feed&type=rss')
                           ,(u'Politika', u'http://www.standard.rs/vesti/36-politika.html?format=feed&type=rss')
                           ,(u'Cvijanovic preporucuje', u'http://www.standard.rs/-cvijanovi-vam-preporuuje.html?format=feed&type=rss')
                           ,(u'Kolumne', u'http://www.standard.rs/vesti/49-kolumne.html?format=feed&type=rss')
                           ,(u'Kultura', u'http://www.standard.rs/vesti/40-kultura.html?format=feed&type=rss')
                           ,(u'Lifestyle', u'http://www.standard.rs/vesti/39-lifestyle.html?format=feed&type=rss')
                           ,(u'Svet', u'http://www.standard.rs/vesti/41-svet.html?format=feed&type=rss')
                           ,(u'Ekonomija', u'http://www.standard.rs/vesti/37-ekonomija.html?format=feed&type=rss')
                           ,(u'Sport', u'http://www.standard.rs/vesti/38-sport.html?format=feed&type=rss')
                        ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('div'):
            if len(item.contents) == 0:
               item.extract()
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
               str = item.string
               item.replaceWith(str)
            else:
               if limg:
                  item.name = 'div'
                  item.attrs = []
               else:
                   str = self.tag_to_string(item)
                   item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/recipes/podnikatel.recipe
+++ b/recipes/podnikatel.recipe
@ -0,0 +1,37 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class PodnikatelCZ(BasicNewsRecipe):
    title                   = 'Podnikatel'
    __author__              = 'Tomas Latal'
    __version__             = '1.0'
    __date__                = '30 April 2011'
    description             = u'Aktuality a \u010dl\xe1nky z Podnikatel.cz'
    oldest_article          = 1
    max_articles_per_feed   = 10
    encoding                = 'utf8'
    publisher               = 'Internet Info s.r.o.'
    category                = 'podnikani, bussiness, CZ'
    language                = 'cs'
    publication_type        = 'newsportal'
    no_stylesheets          = True
    remove_javascript       = True
    extra_css               = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0;line-height: 1.4;padding: 0 0 10px 0;font-weight: bold;} \
                               p.perex img {display:none;} \
                               .urs p {margin: 0 0 0.8em 0;}'
    feeds          = [
                        (u'Aktuality', u'http://rss.podnikatel.cz/aktuality'),
                        (u'\u010cl\xe1nky', u'http://rss.podnikatel.cz/clanky')
                    ]
    remove_tags_before = dict(id='art-content')
    remove_tags_after = [dict(id='art-content')]
    remove_tags = [
                    dict(attrs={'class':['socialshare','box-blue','author clear','labels-terms','box diskuze','ad','page-nav right','infobox','box zpravy','s-clanky']}),
                    dict(id=['path','article-tools','discussionList','similarItems','promo-box'])
                  ]
--- a/recipes/socialdiva.recipe
+++ b/recipes/socialdiva.recipe
@ -0,0 +1,54 @@
 # -*- coding: utf-8 -*-
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2011'
 '''
 socialdiva.ro
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class SocialDiva(BasicNewsRecipe):
    title                 = u'Social Diva'
    __author__            = u'Silviu Cotoara'
    description           = u'When in doubt, wear red'
    publisher             = 'Social Diva'
    oldest_article        = 5
    language              = 'ro'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    category              = 'Ziare,Reviste,Femei'
    encoding              = 'utf-8'
    cover_url             = 'http://www.socialdiva.ro/images/logo.png'
    conversion_options = {
                             'comments'   : description
                            ,'tags'       : category
                            ,'language'   : language
                            ,'publisher'  : publisher
                         }
    keep_only_tags = [
            dict(name='div', attrs={'class':'col-alpha mt5 content_articol'}),
            dict(name='div', attrs={'class':'mt5'})
                     ]
    remove_tags = [
             dict(name='a', attrs={'class':['comments float-left scroll mt5']}),
             dict(name='a', attrs={'class':['comments float-left scroll']}),
             dict(name='div', attrs={'class':['rating-container relative float-left']}),
             dict(name='div', attrs={'class':['float-right social_articol']})
                  ]
    remove_tags_after = [
            dict(name='a', attrs={'class':['comments float-left scroll mt5']})
            ]
    feeds          = [
            (u'Feeds', u'http://www.socialdiva.ro/rss.html')
                 ]
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/telepolis.recipe
+++ b/recipes/telepolis.recipe
@ -18,7 +18,7 @@ class TelepolisNews(BasicNewsRecipe):
    recursion = 0
    no_stylesheets = True
    encoding = "utf-8"
-    language = 'de_AT'
+    language = 'de'
    use_embedded_content =False
    remove_empty_feeds = True
--- a/recipes/the_marker.recipe
+++ b/recipes/the_marker.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    description   = 'TheMarker Financial News in Hebrew'
-    __author__            = 'TonyTheBookworm, Marbs'
+    __author__            = 'Marbs'
    cover_url      = 'http://static.ispot.co.il/wp-content/upload/2009/09/themarker.jpg'
    title          = u'TheMarker'
    language              = 'he'
@ -11,42 +11,38 @@ class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    remove_javascript     = True
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1
-    remove_tags = [dict(name='tr', attrs={'bgcolor':['#738A94']})          ]
+    keep_only_tags =dict(name='div', attrs={'id':'content'})
-    max_articles_per_feed = 10
+    remove_attributes = ['width','float','margin-left']
    no_stylesheets        = True
    remove_tags = [dict(name='div', attrs={'class':['social-nav article-social-nav','prsnlArticleEnvelope','cb']}) ,
                            dict(name='a', attrs={'href':['/misc/mobile']})  ,
                            dict(name='span', attrs={'class':['post-summ']}) ]
    max_articles_per_feed = 100
    extra_css='body{direction: rtl;} .article_description{direction: rtl; } a.article{direction: rtl; } .calibre_feed_description{direction: rtl; }'
-    feeds          = [(u'Head Lines', u'http://www.themarker.com/tmc/content/xml/rss/hpfeed.xml'),
+    feeds          = [(u'Head Lines', u'http://www.themarker.com/cmlink/1.144'),
-                      (u'TA Market', u'http://www.themarker.com/tmc/content/xml/rss/sections/marketfeed.xml'),
+                      (u'TA Market', u'http://www.themarker.com/cmlink/1.243'),
-                      (u'Real Estate', u'http://www.themarker.com/tmc/content/xml/rss/sections/realEstaterfeed.xml'),
+                      (u'Real Estate', u'http://www.themarker.com/cmlink/1.605656'),
-                      (u'Wall Street & Global', u'http://www.themarker.com/tmc/content/xml/rss/sections/wallsfeed.xml'),
+                      (u'Global', u'http://www.themarker.com/cmlink/1.605658'),
-                      (u'Law', u'http://www.themarker.com/tmc/content/xml/rss/sections/lawfeed.xml'),
+                      (u'Wall Street', u'http://www.themarker.com/cmlink/1.613713'),
-                      (u'Media', u'http://www.themarker.com/tmc/content/xml/rss/sections/mediafeed.xml'),
+                      (u'SmartPhone', u'http://www.themarker.com/cmlink/1.605661'),
-                      (u'Consumer', u'http://www.themarker.com/tmc/content/xml/rss/sections/consumerfeed.xml'),
+                      (u'Law', u'http://www.themarker.com/cmlink/1.605664'),
-                      (u'Career', u'http://www.themarker.com/tmc/content/xml/rss/sections/careerfeed.xml'),
+                      (u'Media', u'http://www.themarker.com/cmlink/1.605660'),
-                      (u'Car', u'http://www.themarker.com/tmc/content/xml/rss/sections/carfeed.xml'),
+                      (u'Consumer', u'http://www.themarker.com/cmlink/1.605662'),
-                      (u'High Tech', u'http://www.themarker.com/tmc/content/xml/rss/sections/hightechfeed.xml'),
+                      (u'Career', u'http://www.themarker.com/cmlink/1.605665'),
-                      (u'Investor Guide', u'http://www.themarker.com/tmc/content/xml/rss/sections/investorGuidefeed.xml')]
+                      (u'Car', u'http://www.themarker.com/cmlink/1.605663'),
                      (u'High Tech', u'http://www.themarker.com/cmlink/1.605659'),
                      (u'Small Business', u'http://www.themarker.com/cmlink/1.605666')]
    def print_version(self, url):
-        split1 = url.split("=")
+        #split1 = url.split("/")
-        weblinks = url
+        #print_url='http://www.themarker.com/misc/article-print-page/'+split1[-1]
        txt=url
-        if weblinks is not None:
+        re1='.*?'	# Non-greedy match on filler
-            for link in weblinks:
+        re2='(tv)'	# Word 1
                #---------------------------------------------------------
                #here we need some help with some regexpressions
                #we are trying to find it.themarker.com in a url
                #-----------------------------------------------------------
                re1='.*?'   # Non-greedy match on filler
                re2='(it\\.themarker\\.com)'    # Fully Qualified Domain Name 1
                rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
                m = rg.search(url)
-
+        rg = re.compile(re1+re2,re.IGNORECASE|re.DOTALL)
-                if m:
+        m = rg.search(txt)
-                 split2 = url.split("article/")
+        if m:
-                 print_url = 'http://it.themarker.com/tmit/PrintArticle/' + split2[1]
+            #print 'bad link'
-
+            return 1
                else:
                    print_url = 'http://www.themarker.com/ibo/misc/printFriendly.jhtml?ElementId=%2Fibo%2Frepositories%2Fstories%2Fm1_2000%2F' + split1[1]+'.xml'
        return print_url
--- a/recipes/time_magazine.recipe
+++ b/recipes/time_magazine.recipe
@ -10,6 +10,8 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Time(BasicNewsRecipe):
    recipe_disabled = ('This recipe has been disabled as TIME no longer'
            ' publish complete articles on the web.')
    title                 = u'Time'
    __author__            = 'Kovid Goyal and Sujata Raman'
    description           = 'Weekly magazine'
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@ -7,13 +7,11 @@ usatoday.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString, Tag
 import re
 class USAToday(BasicNewsRecipe):
    title = 'USA Today'
-    __author__ = 'GRiker'
+    __author__ = 'Kovid Goyal'
    oldest_article = 1
    timefmt  = ''
    max_articles_per_feed = 20
@ -31,7 +29,6 @@ class USAToday(BasicNewsRecipe):
                                 margin-bottom: 0em;        \
                                 font-size:     smaller;}\n \
                 .articleBody   {text-align:    left;}\n    '
    conversion_options = { 'linearize_tables' : True }
    #simultaneous_downloads = 1
    feeds =  [
                ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
@ -47,63 +44,26 @@ class USAToday(BasicNewsRecipe):
                ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
                ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'),
                ]
-    keep_only_tags = [dict(attrs={'class':[
+    keep_only_tags = [dict(attrs={'class':'story'})]
-                                           'byLine',
+    remove_tags = [
-                                           'inside-copy',
+            dict(attrs={'class':[
-                                           'inside-head',
+                                'share',
-                                           'inside-head2',
+                                'reprints',
-                                           'item',
+                                'inline-h3',
-                                           'item-block',
+                                'info-extras',
-                                           'photo-container',
+                                'ppy-outer',
-                                           ]}),
+                                'ppy-caption',
-                      dict(id=[
+                                'comments',
-                               'applyMainStoryPhoto',
+                                'jump',
-                               'permalink',
+                                'pagetools',
-                               ])]
+                                'post-attributes',
                                'tags',
                                'bottom-tools',
                                'sponsoredlinks',
                                ]}),
            dict(id=['pluck']),
                  ]
    remove_tags = [dict(attrs={'class':[
                                        'comments',
                                        'jump',
                                        'pagetools',
                                        'post-attributes',
                                        'tags',
                                        ]}),
                   dict(id=[])]
    #feeds =  [('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles')]
    def dump_hex(self, src, length=16):
        ''' Diagnostic '''
        FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)])
        N=0; result=''
        while src:
           s,src = src[:length],src[length:]
           hexa = ' '.join(["%02X"%ord(x) for x in s])
           s = s.translate(FILTER)
           result += "%04X   %-*s   %s\n" % (N, length*3, hexa, s)
           N+=length
        print result
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","&#8216;",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","&#8217;",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","&#8220;",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","&#8221;",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","&#8211;",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","&#8212;",fixed)
        return fixed
    def get_masthead_url(self):
        masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
@ -115,321 +75,4 @@ class USAToday(BasicNewsRecipe):
            masthead = None
        return masthead
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&#38;'
            massaged = re.sub("&","&#38;", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def parse_feeds(self, *args, **kwargs):
        parsed_feeds = BasicNewsRecipe.parse_feeds(self, *args, **kwargs)
        # Count articles for progress dialog
        article_count = 0
        for feed in parsed_feeds:
            article_count += len(feed)
        self.log( "Queued %d articles" % article_count)
        return parsed_feeds
    def preprocess_html(self, soup):
        soup = self.strip_anchors(soup)
        return soup
    def postprocess_html(self, soup, first_fetch):
        # Remove navLinks <div class="inside-copy" style="padding-bottom:3px">
        navLinks = soup.find(True,{'style':'padding-bottom:3px'})
        if navLinks:
            navLinks.extract()
        # Remove <div class="inside-copy" style="margin-bottom:10px">
        gibberish = soup.find(True,{'style':'margin-bottom:10px'})
        if gibberish:
            gibberish.extract()
        # Change <inside-head> to <h2>
        headline = soup.find(True, {'class':['inside-head','inside-head2']})
        if not headline:
            headline = soup.find('h3')
        if headline:
            tag = Tag(soup, "h2")
            tag['class'] = "headline"
            tag.insert(0, headline.contents[0])
            headline.replaceWith(tag)
        else:
            print "unable to find headline:\n%s\n" % soup
        # Change byLine to byline, change commas to middot
        # Kindle renders commas in byline as '&'
        byline = soup.find(True, {'class':'byLine'})
        if byline:
            byline['class'] = 'byline'
            # Replace comma with middot
            byline.contents[0].replaceWith(re.sub(","," &middot;", byline.renderContents()))
        jumpout_punc_list = [':','?']
        # Remove the inline jumpouts in <div class="inside-copy">
        paras = soup.findAll(True, {'class':'inside-copy'})
        for para in paras:
            if re.match("<b>[\w\W]+ ",para.renderContents()):
                p = para.find('b')
                for punc in jumpout_punc_list:
                    punc_offset = p.contents[0].find(punc)
                    if punc_offset == -1:
                        continue
                    if punc_offset > 1:
                        if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
                            #print "extracting \n%s\n" % para.prettify()
                            para.extract()
        # Reset class for remaining
        paras = soup.findAll(True, {'class':'inside-copy'})
        for para in paras:
            para['class'] = 'articleBody'
        # Remove inline jumpouts in <p>
        paras = soup.findAll(['p'])
        for p in paras:
            if hasattr(p,'contents') and len(p.contents):
                for punc in jumpout_punc_list:
                    punc_offset = p.contents[0].find(punc)
                    if punc_offset == -1:
                        continue
                    if punc_offset > 2 and hasattr(p,'a') and len(p.contents):
                        #print "evaluating %s\n" % p.contents[0][:punc_offset+1]
                        if p.contents[0][:punc_offset] == p.contents[0][:punc_offset].upper():
                            #print "extracting \n%s\n" % p.prettify()
                            p.extract()
        # Capture the first img, insert after headline
        imgs = soup.findAll('img')
        print "postprocess_html(): %d images" % len(imgs)
        if imgs:
            divTag = Tag(soup, 'div')
            divTag['class'] = 'image'
            body = soup.find('body')
            img = imgs[0]
            #print "img: \n%s\n" % img.prettify()
            # Table for photo and credit
            tableTag = Tag(soup,'table')
            # Photo
            trimgTag = Tag(soup, 'tr')
            tdimgTag = Tag(soup, 'td')
            tdimgTag.insert(0,img)
            trimgTag.insert(0,tdimgTag)
            tableTag.insert(0,trimgTag)
            # Credit
            trcreditTag = Tag(soup, 'tr')
            tdcreditTag = Tag(soup, 'td')
            tdcreditTag['class'] = 'credit'
            credit = soup.find('td',{'class':'photoCredit'})
            if credit:
                tdcreditTag.insert(0,NavigableString(credit.renderContents()))
            else:
                credit = img['credit']
                if credit:
                    tdcreditTag.insert(0,NavigableString(credit))
                else:
                    tdcreditTag.insert(0,NavigableString(''))
            trcreditTag.insert(0,tdcreditTag)
            tableTag.insert(1,trcreditTag)
            dtc = 0
            divTag.insert(dtc,tableTag)
            dtc += 1
            if False:
                # Add the caption in the table
                tableCaptionTag = Tag(soup,'caption')
                tableCaptionTag.insert(0,soup.find('td',{'class':'photoCredit'}).renderContents())
                tableTag.insert(1,tableCaptionTag)
                divTag.insert(dtc,tableTag)
                dtc += 1
                body.insert(1,divTag)
            else:
                # Add the caption below the table
                #print "Looking for caption in this soup:\n%s" % img.prettify()
                captionTag = Tag(soup,'p')
                captionTag['class'] = 'caption'
                if hasattr(img,'alt') and img['alt']:
                    captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['alt']))
                    divTag.insert(dtc, captionTag)
                    dtc += 1
                else:
                    try:
                        captionTag.insert(0,NavigableString('<blockquote>%s</blockquote>' % img['cutline']))
                        divTag.insert(dtc, captionTag)
                        dtc += 1
                    except:
                        pass
            hrTag = Tag(soup, 'hr')
            divTag.insert(dtc, hrTag)
            dtc += 1
            # Delete <div id="applyMainStoryPhoto"
            photoJunk = soup.find('div',{'id':'applyMainStoryPhoto'})
            if photoJunk:
                photoJunk.extract()
            # Insert img after headline
            tag = body.find(True)
            insertLoc = 0
            headline_found = False
            while True:
                # Scan the top-level tags
                insertLoc += 1
                if hasattr(tag,'class') and tag['class'] == 'headline':
                    headline_found = True
                    body.insert(insertLoc,divTag)
                    break
                tag = tag.nextSibling
                if not tag:
                    break
            if not headline_found:
                # Monolithic <div> - restructure
                tag = body.find(True)
                while True:
                    insertLoc += 1
                    try:
                        if hasattr(tag,'class') and tag['class'] == 'headline':
                            headline_found = True
                            tag.insert(insertLoc,divTag)
                            break
                    except:
                        pass
                    tag = tag.next
                    if not tag:
                        break
                # Yank out headline, img and caption
                headline = body.find('h2','headline')
                img = body.find('div','image')
                caption = body.find('p''class')
                # body(0) is calibre_navbar
                # body(1) is <div class="item">
                btc = 1
                headline.extract()
                body.insert(1, headline)
                btc += 1
                if img:
                    img.extract()
                    body.insert(btc, img)
                    btc += 1
                if caption:
                    caption.extract()
                    body.insert(btc, caption)
                    btc += 1
            if len(imgs) > 1:
                if True:
                    [img.extract() for img in imgs[1:]]
                else:
                    # Format the remaining images
                    # This doesn't work yet
                    for img in imgs[1:]:
                        print "img:\n%s\n" % img.prettify()
                        divTag = Tag(soup, 'div')
                        divTag['class'] = 'image'
                        # Table for photo and credit
                        tableTag = Tag(soup,'table')
                        # Photo
                        trimgTag = Tag(soup, 'tr')
                        tdimgTag = Tag(soup, 'td')
                        tdimgTag.insert(0,img)
                        trimgTag.insert(0,tdimgTag)
                        tableTag.insert(0,trimgTag)
                        # Credit
                        trcreditTag = Tag(soup, 'tr')
                        tdcreditTag = Tag(soup, 'td')
                        tdcreditTag['class'] = 'credit'
                        try:
                            tdcreditTag.insert(0,NavigableString(img['credit']))
                        except:
                            tdcreditTag.insert(0,NavigableString(''))
                        trcreditTag.insert(0,tdcreditTag)
                        tableTag.insert(1,trcreditTag)
                        divTag.insert(0,tableTag)
                        soup.img.replaceWith(divTag)
        return soup
    def postprocess_book(self, oeb, opts, log) :
        def extract_byline(href) :
            # <meta name="byline" content=
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            byline = soup.find('div',attrs={'class':'byline'})
            if byline:
                byline['class'] = 'byline'
                # Replace comma with middot
                byline.contents[0].replaceWith(re.sub(u",", u" &middot;",
                    byline.renderContents(encoding=None)))
                return byline.renderContents(encoding=None)
            else :
                paras = soup.findAll(text=True)
                for para in paras:
                    if para.startswith("Copyright"):
                        return para[len('Copyright xxxx '):para.find('.')]
                return None
        def extract_description(href) :
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            description = soup.find('meta',attrs={'name':'description'})
            if description :
                return self.massageNCXText(description['content'])
            else:
                # Take first paragraph of article
                articleBody = soup.find('div',attrs={'id':['articleBody','item']})
                if articleBody:
                    paras = articleBody.findAll('p')
                    for p in paras:
                        if p.renderContents() > '' :
                            return self.massageNCXText(self.tag_to_string(p,use_alt=False))
                else:
                    print "Didn't find <div id='articleBody'> in this soup:\n%s" % soup.prettify()
                    return None
        # Method entry point here
        # Single section toc looks different than multi-section tocs
        if oeb.toc.depth() == 2 :
            for article in oeb.toc :
                if article.author is None :
                    article.author = extract_byline(article.href)
                if article.description is None :
                    article.description = extract_description(article.href)
        elif oeb.toc.depth() == 3 :
            for section in oeb.toc :
                for article in section :
                    article.author = extract_byline(article.href)
                    '''
                    if article.author is None :
                        article.author = self.massageNCXText(extract_byline(article.href))
                    else:
                        article.author = self.massageNCXText(article.author)
                    '''
                    if article.description is None :
                        article.description = extract_description(article.href)
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
--- a/recipes/vitalia.recipe
+++ b/recipes/vitalia.recipe
@ -0,0 +1,39 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Tomas Latal<latal.tomas at gmail.com>'
 from calibre.web.feeds.news import BasicNewsRecipe
 class VitaliaCZ(BasicNewsRecipe):
    title                   = 'Vitalia'
    __author__              = 'Tomas Latal'
    __version__             = '1.0'
    __date__                = '30 April 2011'
    description             = u'Aktuality a \u010dl\xe1nky z Vitalia.cz'
    oldest_article          = 1
    max_articles_per_feed   = 10
    encoding                = 'utf8'
    publisher               = 'Internet Info s.r.o.'
    category                = 'zdravi, vztahy, wellness, CZ'
    language                = 'cs'
    publication_type        = 'newsportal'
    no_stylesheets          = True
    remove_javascript       = True
    extra_css               = 'p.perex{font-size: 1.2em; margin: 0 0 10px 0; line-height: 1.4; padding: 0 0 10px 0; font-weight: bold;} \
                               p.perex img {display:none;} \
                               span.author {font-size:0.8em; font-style:italic} \
                               .urs div.rs-tip-major {padding:0.5em; background: #e0e0e0 none repeat scroll 0 0;border: 1px solid #909090;} \
                               .urs p {margin: 0 0 0.8em 0;}'
    feeds          = [
                        (u'Aktuality', 'http://www.vitalia.cz/rss/aktuality/'),
                        (u'\u010cl\xe1nky', u'http://www.vitalia.cz/rss/clanky/'),
                    ]
    remove_tags_before = dict(id='main')
    remove_tags_after = [dict(id='main')]
    remove_tags = [
                    dict(attrs={'class':['author clear','tags-rubrics','box border style1 links clear','enquiry clear','serial','box border style1 TitleList','breadcrumb clear','article-discussion box border style1 monitoringComponentArticle','link-more border prev-next clear']}),
                    dict(id=['discussionList','similarItems','sidebar','footer','opl','promo-box'])
                  ]
--- a/recipes/volksrant_sub.recipe
+++ b/recipes/volksrant_sub.recipe
@ -0,0 +1,115 @@
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Volkskrant_full(BasicNewsRecipe):
    # This recipe will download the Volkskrant newspaper,
    # from the subscribers site. It requires a password.
    # Known issues are: articles that are spread out over
    # multiple pages will appear multiple times. Pages
    # that contain only adverts will appear, but empty.
    # The supplement 'Volkskrant Magazine' on saturday
    # is currently not downloaded.
    # You can set a manual date, to download an archived
    # newspaper. Volkskrant stores over a month at the
    # moment of writing. To do so I suggest you unmark
    # the date on the line below, and insert it in the title. Then
    # follow the instructions marked further below.
    title = 'De Volkskrant (subscription)' # [za, 13 nov 2010]'
    __author__ = u'Selcal'
    description = u"Volkskrant"
    oldest_article = 30
    max_articles_per_feed = 100
    no_stylesheets = True
    language = 'nl'
    use_embedded_content = False
    simultaneous_downloads = 1
    delay = 1
    needs_subscription = True
    # Set RETRIEVEDATE to 'yyyymmdd' to load an older
    # edition. Otherwise keep '%Y%m%d'
    # When setting a manual date, unmark and add the date
    # to the title above, and unmark the timefmt line to stop
    # Calibre from adding today's date in addition.
    # timefmt = ''
    RETRIEVEDATE = strftime('%Y%m%d')
    INDEX_MAIN = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/#text'
    INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/'
    LOGIN = 'http://www.volkskrant.nl/vk/user/loggedIn.do'
    remove_tags = [dict(name='address')]
    cover_url = 'http://www.volkskrant.nl/vk-online/VK/' + RETRIEVEDATE + '___/VKN01_001/page.jpg'
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
           br.open(self.LOGIN)
           br.select_form(nr = 0)
           br['username'] = self.username
           br['password'] = self.password
           br.submit()
        return br
    def parse_index(self):
        krant = []
        def strip_title(_title):
            i = 0
            while ((_title[i] <> ":") and (i <= len(_title))):
               i = i + 1
            return(_title[0:i])
        for temp in range (5):
              try:
                soup = self.index_to_soup(self.INDEX_MAIN)
                break
              except:
                #print '(Retrying main index load)'
                continue
        mainsoup = soup.find('td', attrs={'id': 'select_page_top'})
        for option in mainsoup.findAll('option'):
           articles = []
           _INDEX = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/#text'
           _INDEX_ARTICLE = 'http://www.volkskrant.nl/vk-online/VK/' + self.RETRIEVEDATE + '___/' + option['value'] + '/'
           #print ''
           #print '<-------    Processing section: ' + _INDEX + ' ------------------------->'
           for temp in range (5):
              try:
                soup = self.index_to_soup(_INDEX)
                break
              except:
                #print '(Retrying index load)'
                continue
           for item in soup.findAll('area'):
              art_nr = item['class']
              attrname = art_nr[0:12] + '_section' + option['value'][0:5] + '_' + art_nr[26:len(art_nr)]
              #print '==> Found: ' + attrname;
              index_title = soup.find('div', attrs={'class': attrname})
              get_title = index_title['title'];
              _ARTICLE   = _INDEX_ARTICLE + attrname + '.html#text'
              title = get_title;
              #print '--> Title: ' + title;
              #print '--> URL: ' + _ARTICLE;
              for temp in range (5):
                 try:
                   souparticle =  self.index_to_soup(_ARTICLE);
                   break
                 except:
                   print '(Retrying URL load)'
                   continue
              headerurl = souparticle.findAll('frame')[0]['src'];
              #print '--> Read frame name for header: ' + headerurl;
              url = _INDEX_ARTICLE + headerurl[0:len(headerurl)-12] + '_text.html';
              #print '--> Corrected URL: ' + url;
              if (get_title <> ''):
                 title = strip_title(get_title)
                 date  = strftime(' %B %Y')
              if (title <> ''):
                 articles.append({
                                         'title'      :title
                                        ,'date'       :date
                                        ,'url'        :url
                                        ,'description':''
                                        })
           krant.append( (option.string, articles))
        return krant
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -118,6 +118,7 @@ sort_columns_at_startup = None
 # timestamp default if not set: dd MMM yyyy
 gui_pubdate_display_format = 'MMM yyyy'
 gui_timestamp_display_format = 'dd MMM yyyy'
 gui_last_modified_display_format = 'dd MMM yyyy'
 #: Control sorting of titles and series in the library display
 # Control title and series sorting in the library view. If set to
--- a/resources/metadata_sqlite.sql
+++ b/resources/metadata_sqlite.sql
@ -7,17 +7,30 @@ CREATE TABLE books ( id      INTEGER PRIMARY KEY AUTOINCREMENT,
                             title     TEXT NOT NULL DEFAULT 'Unknown' COLLATE NOCASE,
                             sort      TEXT COLLATE NOCASE,
                             timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                             uri       TEXT,
+                             pubdate   TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                             series_index INTEGER NOT NULL DEFAULT 1,
+                             series_index REAL NOT NULL DEFAULT 1.0,
                             author_sort TEXT COLLATE NOCASE,
                             isbn TEXT DEFAULT "" COLLATE NOCASE,
-                             path TEXT NOT NULL DEFAULT ""
+                             lccn TEXT DEFAULT "" COLLATE NOCASE,
-                        );
+                             path TEXT NOT NULL DEFAULT "",
                             flags INTEGER NOT NULL DEFAULT 1
                        , uuid TEXT, has_cover BOOL DEFAULT 0, last_modified TIMESTAMP NOT NULL DEFAULT "2000-01-01 00:00:00+00:00");
 CREATE TABLE books_authors_link ( id INTEGER PRIMARY KEY,
                                          book INTEGER NOT NULL,
                                          author INTEGER NOT NULL,
                                          UNIQUE(book, author)
                                        );
 CREATE TABLE books_languages_link ( id INTEGER PRIMARY KEY,
                                            book INTEGER NOT NULL,
                                            lang_code INTEGER NOT NULL,
                                            item_order INTEGER NOT NULL DEFAULT 0,
                                            UNIQUE(book, lang_code)
        );
 CREATE TABLE books_plugin_data(id INTEGER PRIMARY KEY,
                                     book INTEGER NON NULL,
                                     name TEXT NON NULL,
                                     val TEXT NON NULL,
                                     UNIQUE(book,name));
 CREATE TABLE books_publishers_link ( id INTEGER PRIMARY KEY,
                                          book INTEGER NOT NULL,
                                          publisher INTEGER NOT NULL,
@ -49,11 +62,51 @@ CREATE TABLE conversion_options ( id INTEGER PRIMARY KEY,
                                          data BLOB NOT NULL,
                                          UNIQUE(format,book)
                                        );
 CREATE TABLE custom_columns (
                    id       INTEGER PRIMARY KEY AUTOINCREMENT,
                    label    TEXT NOT NULL,
                    name     TEXT NOT NULL,
                    datatype TEXT NOT NULL,
                    mark_for_delete   BOOL DEFAULT 0 NOT NULL,
                    editable BOOL DEFAULT 1 NOT NULL,
                    display  TEXT DEFAULT "{}" NOT NULL,
                    is_multiple BOOL DEFAULT 0 NOT NULL,
                    normalized BOOL NOT NULL,
                    UNIQUE(label)
                );
 CREATE TABLE data ( id     INTEGER PRIMARY KEY,
                            book   INTEGER NON NULL,
                            format TEXT NON NULL COLLATE NOCASE,
                            uncompressed_size INTEGER NON NULL,
                            name TEXT NON NULL,
                            UNIQUE(book, format)
 );
 CREATE TABLE feeds ( id   INTEGER PRIMARY KEY,
                              title TEXT NOT NULL,
                              script TEXT NOT NULL,
                              UNIQUE(title)
                             );
 CREATE TABLE identifiers  ( id     INTEGER PRIMARY KEY,
                                    book   INTEGER NON NULL,
                                    type   TEXT NON NULL DEFAULT "isbn" COLLATE NOCASE,
                                    val    TEXT NON NULL COLLATE NOCASE,
                                    UNIQUE(book, type)
        );
 CREATE TABLE languages    ( id        INTEGER PRIMARY KEY,
                                    lang_code TEXT NON NULL COLLATE NOCASE,
                                    UNIQUE(lang_code)
        );
 CREATE TABLE library_id ( id   INTEGER PRIMARY KEY,
                                  uuid TEXT NOT NULL,
                                  UNIQUE(uuid)
        );
 CREATE TABLE metadata_dirtied(id INTEGER PRIMARY KEY,
                             book INTEGER NOT NULL,
                             UNIQUE(book));
 CREATE TABLE preferences(id INTEGER PRIMARY KEY,
                                 key TEXT NON NULL,
                                 val TEXT NON NULL,
                                 UNIQUE(key));
 CREATE TABLE publishers ( id   INTEGER PRIMARY KEY,
                                  name TEXT NOT NULL COLLATE NOCASE,
                                  sort TEXT COLLATE NOCASE,
@ -72,34 +125,143 @@ CREATE TABLE tags ( id   INTEGER PRIMARY KEY,
                            name TEXT NOT NULL COLLATE NOCASE,
                            UNIQUE (name)
                             );
 CREATE TABLE data ( id     INTEGER PRIMARY KEY,
                            book   INTEGER NON NULL,
                            format TEXT NON NULL COLLATE NOCASE,
                            uncompressed_size INTEGER NON NULL,
                            name TEXT NON NULL,
                            UNIQUE(book, format)
 );
 CREATE VIEW meta AS
-    SELECT id, title,
+        SELECT id, title,
-           (SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors,
+               (SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors,
-           (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
+               (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher,
-           (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
+               (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating,
-           timestamp,
+               timestamp,
-           (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
+               (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size,
-           (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
+               (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags,
-           (SELECT text FROM comments WHERE book=books.id) comments,
+               (SELECT text FROM comments WHERE book=books.id) comments,
-           (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
+               (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series,
-           series_index,
+               series_index,
-           sort,
+               sort,
-           author_sort,
+               author_sort,
-           (SELECT concat(format) FROM data WHERE data.book=books.id) formats,
+               (SELECT concat(format) FROM data WHERE data.book=books.id) formats,
-           isbn
+               isbn,
-    FROM books;
+               path,
               lccn,
               pubdate,
               flags,
               uuid
        FROM books;
 CREATE VIEW tag_browser_authors AS SELECT
                    id,
                    name,
                    (SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.author=authors.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
                     sort AS sort
                FROM authors;
 CREATE VIEW tag_browser_filtered_authors AS SELECT
                    id,
                    name,
                    (SELECT COUNT(books_authors_link.id) FROM books_authors_link WHERE
                        author=authors.id AND books_list_filter(book)) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_authors_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.author=authors.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0 AND
                     books_list_filter(bl.book)) avg_rating,
                     sort AS sort
                FROM authors;
 CREATE VIEW tag_browser_filtered_publishers AS SELECT
                    id,
                    name,
                    (SELECT COUNT(books_publishers_link.id) FROM books_publishers_link WHERE
                        publisher=publishers.id AND books_list_filter(book)) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0 AND
                     books_list_filter(bl.book)) avg_rating,
                     name AS sort
                FROM publishers;
 CREATE VIEW tag_browser_filtered_ratings AS SELECT
                    id,
                    rating,
                    (SELECT COUNT(books_ratings_link.id) FROM books_ratings_link WHERE
                        rating=ratings.id AND books_list_filter(book)) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.rating=ratings.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0 AND
                     books_list_filter(bl.book)) avg_rating,
                     rating AS sort
                FROM ratings;
 CREATE VIEW tag_browser_filtered_series AS SELECT
                    id,
                    name,
                    (SELECT COUNT(books_series_link.id) FROM books_series_link WHERE
                        series=series.id AND books_list_filter(book)) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_series_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.series=series.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0 AND
                     books_list_filter(bl.book)) avg_rating,
                     (title_sort(name)) AS sort
                FROM series;
 CREATE VIEW tag_browser_filtered_tags AS SELECT
                    id,
                    name,
                    (SELECT COUNT(books_tags_link.id) FROM books_tags_link WHERE
                        tag=tags.id AND books_list_filter(book)) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.tag=tags.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0 AND
                     books_list_filter(bl.book)) avg_rating,
                     name AS sort
                FROM tags;
 CREATE VIEW tag_browser_publishers AS SELECT
                    id,
                    name,
                    (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_publishers_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.publisher=publishers.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
                     name AS sort
                FROM publishers;
 CREATE VIEW tag_browser_ratings AS SELECT
                    id,
                    rating,
                    (SELECT COUNT(id) FROM books_ratings_link WHERE rating=ratings.id) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_ratings_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.rating=ratings.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
                     rating AS sort
                FROM ratings;
 CREATE VIEW tag_browser_series AS SELECT
                    id,
                    name,
                    (SELECT COUNT(id) FROM books_series_link WHERE series=series.id) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_series_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.series=series.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
                     (title_sort(name)) AS sort
                FROM series;
 CREATE VIEW tag_browser_tags AS SELECT
                    id,
                    name,
                    (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) count,
                    (SELECT AVG(ratings.rating)
                     FROM books_tags_link AS tl, books_ratings_link AS bl, ratings
                     WHERE tl.tag=tags.id AND bl.book=tl.book AND
                     ratings.id = bl.rating AND ratings.rating <> 0) avg_rating,
                     name AS sort
                FROM tags;
 CREATE INDEX authors_idx ON books (author_sort COLLATE NOCASE);
 CREATE INDEX books_authors_link_aidx ON books_authors_link (author);
 CREATE INDEX books_authors_link_bidx ON books_authors_link (book);
 CREATE INDEX books_idx ON books (sort COLLATE NOCASE);
 CREATE INDEX books_languages_link_aidx ON books_languages_link (lang_code);
 CREATE INDEX books_languages_link_bidx ON books_languages_link (book);
 CREATE INDEX books_publishers_link_aidx ON books_publishers_link (publisher);
 CREATE INDEX books_publishers_link_bidx ON books_publishers_link (book);
 CREATE INDEX books_ratings_link_aidx ON books_ratings_link (rating);
@ -111,32 +273,38 @@ CREATE INDEX books_tags_link_bidx ON books_tags_link (book);
 CREATE INDEX comments_idx ON comments (book);
 CREATE INDEX conversion_options_idx_a ON conversion_options (format COLLATE NOCASE);
 CREATE INDEX conversion_options_idx_b ON conversion_options (book);
 CREATE INDEX custom_columns_idx ON custom_columns (label);
 CREATE INDEX data_idx ON data (book);
 CREATE INDEX formats_idx ON data (format);
 CREATE INDEX languages_idx ON languages (lang_code COLLATE NOCASE);
 CREATE INDEX publishers_idx ON publishers (name COLLATE NOCASE);
-CREATE INDEX series_idx ON series (sort COLLATE NOCASE);
+CREATE INDEX series_idx ON series (name COLLATE NOCASE);
 CREATE INDEX tags_idx ON tags (name COLLATE NOCASE);
 CREATE TRIGGER books_delete_trg
-        AFTER DELETE ON books
+            AFTER DELETE ON books
-        BEGIN
+            BEGIN
-            DELETE FROM books_authors_link WHERE book=OLD.id;
+                DELETE FROM books_authors_link WHERE book=OLD.id;
-            DELETE FROM books_publishers_link WHERE book=OLD.id;
+                DELETE FROM books_publishers_link WHERE book=OLD.id;
-            DELETE FROM books_ratings_link WHERE book=OLD.id;
+                DELETE FROM books_ratings_link WHERE book=OLD.id;
-            DELETE FROM books_series_link WHERE book=OLD.id;
+                DELETE FROM books_series_link WHERE book=OLD.id;
-            DELETE FROM books_tags_link WHERE book=OLD.id;
+                DELETE FROM books_tags_link WHERE book=OLD.id;
-            DELETE FROM data WHERE book=OLD.id;
+                DELETE FROM books_languages_link WHERE book=OLD.id;
-            DELETE FROM comments WHERE book=OLD.id;
+                DELETE FROM data WHERE book=OLD.id;
-            DELETE FROM conversion_options WHERE book=OLD.id;
+                DELETE FROM comments WHERE book=OLD.id;
                DELETE FROM conversion_options WHERE book=OLD.id;
                DELETE FROM books_plugin_data WHERE book=OLD.id;
                DELETE FROM identifiers WHERE book=OLD.id;
        END;
-CREATE TRIGGER books_insert_trg
+CREATE TRIGGER books_insert_trg AFTER INSERT ON books
        AFTER INSERT ON books
        BEGIN
-          UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
+            UPDATE books SET sort=title_sort(NEW.title),uuid=uuid4() WHERE id=NEW.id;
        END;
 CREATE TRIGGER books_update_trg
-        AFTER UPDATE ON books
+            AFTER UPDATE ON books
-        BEGIN
+            BEGIN
-          UPDATE books SET sort=title_sort(NEW.title) WHERE id=NEW.id;
+            UPDATE books SET sort=title_sort(NEW.title)
-        END;
+                         WHERE id=NEW.id AND OLD.title <> NEW.title;
            END;
 CREATE TRIGGER fkc_comments_insert
        BEFORE INSERT ON comments
        BEGIN
@ -169,23 +337,41 @@ CREATE TRIGGER fkc_data_update
                THEN RAISE(ABORT, 'Foreign key violation: book not in books')
            END;
        END;
-CREATE TRIGGER fkc_delete_books_authors_link
+CREATE TRIGGER fkc_delete_on_authors
        BEFORE DELETE ON authors
        BEGIN
            SELECT CASE
-                WHEN (SELECT COUNT(id) FROM books_authors_link WHERE book=OLD.book) > 0
+                WHEN (SELECT COUNT(id) FROM books_authors_link WHERE author=OLD.id) > 0
-                THEN RAISE(ABORT, 'Foreign key violation: author is still referenced')
+                THEN RAISE(ABORT, 'Foreign key violation: authors is still referenced')
            END;
        END;
-CREATE TRIGGER fkc_delete_books_publishers_link
+CREATE TRIGGER fkc_delete_on_languages
        BEFORE DELETE ON languages
        BEGIN
            SELECT CASE
                WHEN (SELECT COUNT(id) FROM books_languages_link WHERE lang_code=OLD.id) > 0
                THEN RAISE(ABORT, 'Foreign key violation: language is still referenced')
            END;
        END;
 CREATE TRIGGER fkc_delete_on_languages_link
        BEFORE INSERT ON books_languages_link
        BEGIN
          SELECT CASE
              WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
              THEN RAISE(ABORT, 'Foreign key violation: book not in books')
              WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
              THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
          END;
        END;
 CREATE TRIGGER fkc_delete_on_publishers
        BEFORE DELETE ON publishers
        BEGIN
            SELECT CASE
-                WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE book=OLD.book) > 0
+                WHEN (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=OLD.id) > 0
-                THEN RAISE(ABORT, 'Foreign key violation: publisher is still referenced')
+                THEN RAISE(ABORT, 'Foreign key violation: publishers is still referenced')
            END;
        END;
-CREATE TRIGGER fkc_delete_books_series_link
+CREATE TRIGGER fkc_delete_on_series
        BEFORE DELETE ON series
        BEGIN
            SELECT CASE
@ -193,12 +379,12 @@ CREATE TRIGGER fkc_delete_books_series_link
                THEN RAISE(ABORT, 'Foreign key violation: series is still referenced')
            END;
        END;
-CREATE TRIGGER fkc_delete_books_tags_link
+CREATE TRIGGER fkc_delete_on_tags
        BEFORE DELETE ON tags
        BEGIN
            SELECT CASE
                WHEN (SELECT COUNT(id) FROM books_tags_link WHERE tag=OLD.id) > 0
-                THEN RAISE(ABORT, 'Foreign key violation: tag is still referenced')
+                THEN RAISE(ABORT, 'Foreign key violation: tags is still referenced')
            END;
        END;
 CREATE TRIGGER fkc_insert_books_authors_link
@ -267,6 +453,22 @@ CREATE TRIGGER fkc_update_books_authors_link_b
                THEN RAISE(ABORT, 'Foreign key violation: author not in authors')
            END;
        END;
 CREATE TRIGGER fkc_update_books_languages_link_a
        BEFORE UPDATE OF book ON books_languages_link
        BEGIN
            SELECT CASE
                WHEN (SELECT id from books WHERE id=NEW.book) IS NULL
                THEN RAISE(ABORT, 'Foreign key violation: book not in books')
            END;
        END;
 CREATE TRIGGER fkc_update_books_languages_link_b
        BEFORE UPDATE OF lang_code ON books_languages_link
        BEGIN
            SELECT CASE
                WHEN (SELECT id from languages WHERE id=NEW.lang_code) IS NULL
                THEN RAISE(ABORT, 'Foreign key violation: lang_code not in languages')
            END;
        END;
 CREATE TRIGGER fkc_update_books_publishers_link_a
        BEFORE UPDATE OF book ON books_publishers_link
        BEGIN
@ -341,3 +543,4 @@ CREATE TRIGGER series_update_trg
        BEGIN
          UPDATE series SET sort=NEW.name WHERE id=NEW.id;
        END;
 pragma user_version=20;
--- a/resources/quick_start.epub
+++ b/resources/quick_start.epub
--- a/resources/templates/book_details.css
+++ b/resources/templates/book_details.css
@ -2,6 +2,11 @@ a {
    text-decoration: none;
    color: blue
 }
 a:hover {
    color: red
 }
 .comments { 
    margin-top: 0;
    padding-top: 0;
--- a/setup/installer/windows/main.c
+++ b/setup/installer/windows/main.c
@ -23,6 +23,9 @@ wWinMain(HINSTANCE Inst, HINSTANCE PrevInst,
 	ret = execute_python_entrypoint(BASENAME, MODULE, FUNCTION,
 					stdout_redirect, stderr_redirect);
    if (stdout != NULL) fclose(stdout);
    if (stderr != NULL) fclose(stderr);
    DeleteFile(stdout_redirect);
    DeleteFile(stderr_redirect);
--- a/setup/installer/windows/notes.rst
+++ b/setup/installer/windows/notes.rst
@ -69,7 +69,24 @@ nmake -f ms\ntdll.mak install
 Qt
 --------
-Extract Qt sourcecode to C:\Qt\4.x.x. Run configure and make::
+Extract Qt sourcecode to C:\Qt\4.x.x. 
 Qt uses its own routine to locate and load "system libraries" including the openssl libraries needed for "Get Books". This means that we have to apply the following patch to have Qt load the openssl libraries bundled with calibre:
 --- src/corelib/plugin/qsystemlibrary.cpp	2011-02-22 05:04:00.000000000 -0700
 +++ src/corelib/plugin/qsystemlibrary.cpp	2011-04-25 20:53:13.635247466 -0600
@@ -110,7 +110,7 @@ HINSTANCE QSystemLibrary::load(const wch
 #if !defined(QT_BOOTSTRAPPED)
     if (!onlySystemDirectory)
 -        searchOrder << QFileInfo(qAppFileName()).path();
 +        searchOrder << (QFileInfo(qAppFileName()).path().replace(QLatin1Char('/'), QLatin1Char('\\')) + QString::fromLatin1("\\DLLs\\"));
 #endif
     searchOrder << qSystemDirectory();
 Now, run configure and make::
    configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@ -11,7 +11,10 @@
 						SummaryCodepage='1252' />
                <Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
-                
+                <!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
                     after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
                <Property Id='REINSTALLMODE' Value='emus'/>
                <Upgrade Id="{upgrade_code}">
                    <UpgradeVersion Maximum="{version}"
                       IncludeMaximum="yes"
--- a/setup/upload.py
+++ b/setup/upload.py
@ -347,9 +347,10 @@ class UploadUserManual(Command): # {{{
        with NamedTemporaryFile(suffix='.zip') as f:
            os.fchmod(f.fileno(),
                stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
-            with CurrentDir(self.d(path)):
+            with CurrentDir(path):
                with ZipFile(f, 'w') as zf:
                    for x in os.listdir('.'):
                        if x.endswith('.swp'): continue
                        zf.write(x)
                        if os.path.isdir(x):
                            for y in os.listdir(x):
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -388,7 +388,11 @@ class CurrentDir(object):
        return self.cwd
    def __exit__(self, *args):
-        os.chdir(self.cwd)
+        try:
            os.chdir(self.cwd)
        except:
            # The previous CWD no longer exists
            pass
 class StreamReadWrapper(object):
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = u'calibre'
-numeric_version = (0, 7, 57)
+numeric_version = (0, 8, 0)
 __version__   = u'.'.join(map(unicode, numeric_version))
 __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -449,7 +449,7 @@ class CatalogPlugin(Plugin): # {{{
                          ['author_sort','authors','comments','cover','formats',
                           'id','isbn','ondevice','pubdate','publisher','rating',
                           'series_index','series','size','tags','timestamp',
-                           'title','uuid'])
+                           'title_sort','title','uuid'])
        all_custom_fields = set(db.custom_field_keys())
        all_fields = all_std_fields.union(all_custom_fields)
@ -607,6 +607,7 @@ class StoreBase(Plugin): # {{{
    supported_platforms = ['windows', 'osx', 'linux']
    author         = 'John Schember'
    type = _('Store')
    minimum_calibre_version = (0, 8, 0)
    actual_plugin = None
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -9,7 +9,6 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.utils.config import test_eight_code
 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@ -596,6 +595,7 @@ from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK, NOOK_COLOR
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.user_defined.driver import USER_DEFINED
 from calibre.devices.android.driver import ANDROID, S60
 from calibre.devices.nokia.driver import N770, N810, E71X, E52
 from calibre.devices.eslick.driver import ESLICK, EBK52
@ -613,6 +613,7 @@ from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, \
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
@ -621,29 +622,16 @@ from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
        Epubcheck, ]
 if test_eight_code:
 # New metadata download plugins {{{
-    from calibre.ebooks.metadata.sources.google import GoogleBooks
+from calibre.ebooks.metadata.sources.google import GoogleBooks
-    from calibre.ebooks.metadata.sources.amazon import Amazon
+from calibre.ebooks.metadata.sources.amazon import Amazon
-    from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
+from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
-    from calibre.ebooks.metadata.sources.isbndb import ISBNDB
+from calibre.ebooks.metadata.sources.isbndb import ISBNDB
-    from calibre.ebooks.metadata.sources.overdrive import OverDrive
+from calibre.ebooks.metadata.sources.overdrive import OverDrive
-    from calibre.ebooks.metadata.sources.douban import Douban
+from calibre.ebooks.metadata.sources.douban import Douban
-    
+plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
    plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban]
 # }}}
 else:
    from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
        KentDistrictLibrary
    from calibre.ebooks.metadata.douban import DoubanBooks
    from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
    from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
            AmazonCovers, DoubanCovers
    plugins += [GoogleBooks, ISBNDB, Amazon,
        OpenLibraryCovers, AmazonCovers, DoubanCovers,
        NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
 plugins += [
    ComicInput,
@ -756,6 +744,9 @@ plugins += [
    EEEREADER,
    NEXTBOOK,
    ITUNES,
    BOEYE_BEX,
    BOEYE_BDX,
    USER_DEFINED,
 ]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
@ -868,10 +859,7 @@ plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
        ActionRestart, ActionOpenFolder, ActionConnectShare,
        ActionSendToDevice, ActionHelp, ActionPreferences, ActionSimilarBooks,
        ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
-        ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch]
+        ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch, ActionStore]
 if test_eight_code:
    plugins += [ActionStore]
 # }}}
@ -1097,10 +1085,8 @@ class Misc(PreferencesPlugin):
 plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
        CommonOptions, OutputOptions, Adding, Saving, Sending, Plugboard,
-        Email, Server, Plugins, Tweaks, Misc, TemplateFunctions]
+        Email, Server, Plugins, Tweaks, Misc, TemplateFunctions,
-
+        MetadataSources]
 if test_eight_code:
    plugins.append(MetadataSources)
 #}}}
@ -1110,6 +1096,11 @@ class StoreAmazonKindleStore(StoreBase):
    description = _('Kindle books from Amazon')
    actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
 class StoreAmazonUKKindleStore(StoreBase):
    name = 'Amazon UK Kindle'
    description = _('Kindle books from Amazon.uk')
    actual_plugin = 'calibre.gui2.store.amazon_uk_plugin:AmazonUKKindleStore'
 class StoreBaenWebScriptionStore(StoreBase):
    name = 'Baen WebScription'
    description = _('Ebooks for readers.')
@ -1175,10 +1166,27 @@ class StoreSmashwordsStore(StoreBase):
    description = _('Your ebook. Your way.')
    actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
-plugins += [StoreAmazonKindleStore, StoreBaenWebScriptionStore, StoreBNStore,
+class StoreWaterstonesUKStore(StoreBase):
    name = 'Waterstones UK'
    description = _('Feel every word')
    actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
 class StoreFoylesUKStore(StoreBase):
    name = 'Foyles UK'
    description = _('Foyles of London, online')
    actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
 class AmazonDEKindleStore(StoreBase):
    name = 'Amazon DE Kindle'
    description = _('Kindle eBooks')
    actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
 plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore,
    StoreBaenWebScriptionStore, StoreBNStore,
    StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
-    StoreEHarlequinStoretore,
+    StoreEHarlequinStoretore, StoreFeedbooksStore,
-    StoreFeedbooksStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
+    StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
-    StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore]
+    StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
    StoreWaterstonesUKStore]
 # }}}
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -15,12 +15,11 @@ from calibre.customize.profiles import InputProfile, OutputProfile
 from calibre.customize.builtins import plugins as builtin_plugins
 from calibre.devices.interface import DevicePlugin
 from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.covers import CoverDownload
+from calibre.utils.config import (make_config_dir, Config, ConfigProxy,
-from calibre.ebooks.metadata.fetch import MetadataSource
+                                 plugin_dir, OptionParser)
 from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
                                 plugin_dir, OptionParser, prefs
 from calibre.ebooks.epub.fix import ePubFixer
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.constants import DEBUG
 builtin_names = frozenset([p.name for p in builtin_plugins])
@ -93,8 +92,7 @@ def restore_plugin_state_to_default(plugin_or_name):
    config['enabled_plugins'] = ep
 default_disabled_plugins = set([
-    'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
+    'Overdrive',
    'Kent District Library'
 ])
 def is_disabled(plugin):
@ -190,44 +188,6 @@ def output_profiles():
            yield plugin
 # }}}
 # Metadata sources {{{
 def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
    for plugin in _initialized_plugins:
        if isinstance(plugin, MetadataSource) and \
                plugin.metadata_type == metadata_type:
            if is_disabled(plugin):
                continue
            if customize:
                customization = config['plugin_customization']
                plugin.site_customization = customization.get(plugin.name, None)
            if plugin.name == 'IsbnDB' and isbndb_key is not None:
                plugin.site_customization = isbndb_key
            yield plugin
 def get_isbndb_key():
    return config['plugin_customization'].get('IsbnDB', None)
 def set_isbndb_key(key):
    for plugin in _initialized_plugins:
        if plugin.name == 'IsbnDB':
            return customize_plugin(plugin, key)
 def migrate_isbndb_key():
    key = prefs['isbndb_com_key']
    if key:
        prefs.set('isbndb_com_key', '')
        set_isbndb_key(key)
 def cover_sources():
    customization = config['plugin_customization']
    for plugin in _initialized_plugins:
        if isinstance(plugin, CoverDownload):
            if not is_disabled(plugin):
                plugin.site_customization = customization.get(plugin.name, '')
                yield plugin
 # }}}
 # Interface Actions # {{{
 def interface_actions():
@ -527,8 +487,9 @@ def initialize_plugins():
            plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
            _initialized_plugins.append(plugin)
        except:
-            print 'Failed to initialize plugin...'
+            print 'Failed to initialize plugin:', repr(zfp)
-            traceback.print_exc()
+            if DEBUG:
                traceback.print_exc()
    _initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
    reread_filetype_plugins()
    reread_metadata_plugins()
--- a/src/calibre/devices/init.py
+++ b/src/calibre/devices/init.py
@ -156,3 +156,60 @@ def debug(ioreg_to_tmp=False, buf=None):
        sys.stdout = oldo
        sys.stderr = olde
 def device_info(ioreg_to_tmp=False, buf=None):
    from calibre.devices.scanner import DeviceScanner, win_pnp_drives
    from calibre.constants import iswindows
    import re
    res = {}
    device_details = {}
    device_set = set()
    drive_details = {}
    drive_set = set()
    res['device_set'] = device_set
    res['device_details'] = device_details
    res['drive_details'] = drive_details
    res['drive_set'] = drive_set
    try:
        s = DeviceScanner()
        s.scan()
        devices = (s.devices)
        if not iswindows:
            devices = [list(x) for x in devices]
            for dev in devices:
                for i in range(3):
                    dev[i] = hex(dev[i])
                d = dev[0] + dev[1] + dev[2]
                device_set.add(d)
                device_details[d] = dev[0:3]
        else:
            for dev in devices:
                vid = re.search('vid_([0-9a-f]*)&', dev)
                if vid:
                    vid = vid.group(1)
                    pid = re.search('pid_([0-9a-f]*)&', dev)
                    if pid:
                        pid = pid.group(1)
                        rev = re.search('rev_([0-9a-f]*)$', dev)
                        if rev:
                            rev = rev.group(1)
                            d = vid+pid+rev
                            device_set.add(d)
                            device_details[d] = (vid, pid, rev)
            drives = win_pnp_drives(debug=False)
            for drive,details in drives.iteritems():
                order = 'ORD_' + str(drive.order)
                ven = re.search('VEN_([^&]*)&', details)
                if ven:
                    ven = ven.group(1)
                    prod = re.search('PROD_([^&]*)&', details)
                    if prod:
                        prod = prod.group(1)
                        d = (order, ven, prod)
                        drive_details[drive] = d
                        drive_set.add(drive)
    finally:
        pass
    return res
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -62,7 +62,7 @@ class ANDROID(USBMS):
            0x502 : { 0x3203 : [0x0100]},
            # Dell
-            0x413c : { 0xb007 : [0x0100, 0x0224]},
+            0x413c : { 0xb007 : [0x0100, 0x0224, 0x0226]},
            # LG
            0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
@ -109,10 +109,10 @@ class ANDROID(USBMS):
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
-            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE']
+            'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
-            'A70S', 'A101IT', '7', 'INCREDIBLE']
+            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']
    OSX_MAIN_MEM = 'Android Device Main Memory'
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -163,6 +163,8 @@ class ITUNES(DriverBase):
        settings()
        set_progress_reporter()
        upload_books()
         _get_fpath()
          _update_epub_metadata()
        add_books_to_metadata()
        use_plugboard_ext()
        set_plugboard()
@ -460,7 +462,7 @@ class ITUNES(DriverBase):
                            cached_books[this_book.path] = {
                             'title':book.Name,
-                             'author':book.artist().split(' & '),
+                             'author':book.Artist.split(' & '),
                             'lib_book':library_books[this_book.path] if this_book.path in library_books else None,
                             'uuid': book.Composer,
                             'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub'
@ -504,7 +506,7 @@ class ITUNES(DriverBase):
        if self.iTunes:
            # Check for connected book-capable device
            self.sources = self._get_sources()
-            if 'iPod' in self.sources:
+            if 'iPod' in self.sources and not self.ejected:
                #if DEBUG:
                    #sys.stdout.write('.')
                    #sys.stdout.flush()
@ -2034,16 +2036,17 @@ class ITUNES(DriverBase):
            if 'iPod' in self.sources:
                connected_device = self.sources['iPod']
                device = self.iTunes.sources[connected_device]
                dev_books = None
                for pl in device.playlists():
                    if pl.special_kind() == appscript.k.Books:
                        if DEBUG:
                            self.log.info("  Book playlist: '%s'" % (pl.name()))
-                        books = pl.file_tracks()
+                        dev_books = pl.file_tracks()
                        break
                else:
                    self.log.error("  book_playlist not found")
-                for book in books:
+                for book in dev_books:
                    # This may need additional entries for international iTunes users
                    if book.kind() in self.Audiobooks:
                        if DEBUG:
@ -2621,42 +2624,42 @@ class ITUNES(DriverBase):
            # Touch the OPF timestamp
            try:
                zf_opf = ZipFile(fpath,'r')
                fnames = zf_opf.namelist()
                opf = [x for x in fnames if '.opf' in x][0]
            except:
                raise UserFeedback("'%s' is not a valid EPUB" % metadata.title,
                                   None,
                                   level=UserFeedback.WARN)
-            fnames = zf_opf.namelist()
+
-            opf = [x for x in fnames if '.opf' in x][0]
+            opf_tree = etree.fromstring(zf_opf.read(opf))
-            if opf:
+            md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
-                opf_tree = etree.fromstring(zf_opf.read(opf))
+            if md_els:
-                md_els = opf_tree.xpath('.//*[local-name()="metadata"]')
+                ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
-                if md_els:
+                if ts is not None:
-                    ts = md_els[0].find('.//*[@name="calibre:timestamp"]')
+                    timestamp = ts.get('content')
-                    if ts is not None:
+                    old_ts = parse_date(timestamp)
-                        timestamp = ts.get('content')
+                    metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
-                        old_ts = parse_date(timestamp)
+                                               old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
-                        metadata.timestamp = datetime.datetime(old_ts.year, old_ts.month, old_ts.day, old_ts.hour,
+                    if DEBUG:
-                                                   old_ts.minute, old_ts.second, old_ts.microsecond+1, old_ts.tzinfo)
+                        self.log.info("   existing timestamp: %s" % metadata.timestamp)
                        if DEBUG:
                            self.log.info("   existing timestamp: %s" % metadata.timestamp)
                    else:
                        metadata.timestamp = now()
                        if DEBUG:
                            self.log.info("   add timestamp: %s" % metadata.timestamp)
                else:
                    metadata.timestamp = now()
                    if DEBUG:
                        self.log.warning("   missing <metadata> block in OPF file")
                        self.log.info("   add timestamp: %s" % metadata.timestamp)
-                # Force the language declaration for iBooks 1.1
+            else:
-                #metadata.language = get_lang().replace('_', '-')
+                metadata.timestamp = now()
                # Updates from metadata plugboard (ignoring publisher)
                metadata.language = metadata_x.language
                if DEBUG:
-                    if metadata.language != metadata_x.language:
+                    self.log.warning("   missing <metadata> block in OPF file")
-                        self.log.info("   rewriting language: <dc:language>%s</dc:language>" % metadata.language)
+                    self.log.info("   add timestamp: %s" % metadata.timestamp)
            # Force the language declaration for iBooks 1.1
            #metadata.language = get_lang().replace('_', '-')
            # Updates from metadata plugboard (ignoring publisher)
            metadata.language = metadata_x.language
            if DEBUG:
                if metadata.language != metadata_x.language:
                    self.log.info("   rewriting language: <dc:language>%s</dc:language>" % metadata.language)
            zf_opf.close()
--- a/src/calibre/devices/boeye/init.py
+++ b/src/calibre/devices/boeye/init.py
--- a/src/calibre/devices/boeye/driver.py
+++ b/src/calibre/devices/boeye/driver.py
@ -0,0 +1,56 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011,  Ken <ken at szboeye.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Device driver for BOEYE serial readers
 '''
 from calibre.devices.usbms.driver import USBMS
 class BOEYE_BEX(USBMS):
 	name		= 'BOEYE BEX reader driver'
 	gui_name	= 'BOEYE BEX'
 	description	= _('Communicate with BOEYE BEX Serial eBook readers.')
 	author		= 'szboeye'
 	supported_platforms = ['windows', 'osx', 'linux']
 	FORMATS		= ['epub', 'mobi', 'fb2', 'lit', 'prc', 'pdf', 'rtf', 'txt', 'djvu', 'doc', 'chm', 'html', 'zip', 'pdb']
 	VENDOR_ID	= [0x0085]
 	PRODUCT_ID	= [0x600]
 	VENDOR_NAME	 = 'LINUX'
 	WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
 	OSX_MAIN_MEM	 = 'Linux File-Stor Gadget Media'
 	MAIN_MEMORY_VOLUME_LABEL  = 'BOEYE BEX Storage Card'
 	EBOOK_DIR_MAIN	  = 'Documents'
 	SUPPORTS_SUB_DIRS = True
 class BOEYE_BDX(USBMS):
 	name		= 'BOEYE BDX reader driver'
 	gui_name	= 'BOEYE BDX'
 	description	= _('Communicate with BOEYE BDX serial eBook readers.')
 	author		= 'szboeye'
 	supported_platforms = ['windows', 'osx', 'linux']
 	FORMATS		= ['epub', 'mobi', 'fb2', 'lit', 'prc', 'pdf', 'rtf', 'txt', 'djvu', 'doc', 'chm', 'html', 'zip', 'pdb']
 	VENDOR_ID	= [0x0085]
 	PRODUCT_ID	= [0x800]
 	VENDOR_NAME	 = 'LINUX'
 	WINDOWS_MAIN_MEM   = 'FILE-STOR_GADGET'
 	WINDOWS_CARD_A_MEM = 'FILE-STOR_GADGET'
 	OSX_MAIN_MEM	 = 'Linux File-Stor Gadget Media'
 	OSX_CARD_A_MEM	 = 'Linux File-Stor Gadget Media'
 	MAIN_MEMORY_VOLUME_LABEL  = 'BOEYE BDX Internal Memory'
 	STORAGE_CARD_VOLUME_LABEL = 'BOEYE BDX Storage Card'
 	EBOOK_DIR_MAIN	 = 'Documents'
 	EBOOK_DIR_CARD_A = 'Documents'
 	SUPPORTS_SUB_DIRS = True
--- a/src/calibre/devices/hanlin/driver.py
+++ b/src/calibre/devices/hanlin/driver.py
@ -64,7 +64,7 @@ class HANLINV3(USBMS):
        return names
    def linux_swap_drives(self, drives):
-        if len(drives) < 2: return drives
+        if len(drives) < 2 or not drives[1] or not drives[2]: return drives
        drives = list(drives)
        t = drives[0]
        drives[0] = drives[1]
@ -95,7 +95,6 @@ class HANLINV5(HANLINV3):
    gui_name       = 'Hanlin V5'
    description    = _('Communicate with Hanlin V5 eBook readers.')
    VENDOR_ID	= [0x0492]
    PRODUCT_ID	= [0x8813]
    BCD         = [0x319]
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -164,7 +164,7 @@ class APNXBuilder(object):
                if c == '/':
                    closing = True
                    continue
-                elif c in ('d', 'p'):
+                elif c == 'p':
                    if closing:
                        in_p = False
                    else:
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -187,7 +187,7 @@ class LUMIREAD(USBMS):
            cfilepath = cfilepath.replace(os.sep+'books'+os.sep,
                    os.sep+'covers'+os.sep, 1)
            pdir = os.path.dirname(cfilepath)
-            if not os.exists(pdir):
+            if not os.path.exists(pdir):
                os.makedirs(pdir)
            with open(cfilepath+'.jpg', 'wb') as f:
                f.write(metadata.thumbnail[-1])
--- a/src/calibre/devices/usbms/deviceconfig.py
+++ b/src/calibre/devices/usbms/deviceconfig.py
@ -94,6 +94,9 @@ class DeviceConfig(object):
            if isinstance(cls.EXTRA_CUSTOMIZATION_MESSAGE, list):
                ec = []
                for i in range(0, len(cls.EXTRA_CUSTOMIZATION_MESSAGE)):
                    if config_widget.opt_extra_customization[i] is None:
                        ec.append(None)
                        continue
                    if hasattr(config_widget.opt_extra_customization[i], 'isChecked'):
                        ec.append(config_widget.opt_extra_customization[i].isChecked())
                    else:
--- a/src/calibre/devices/user_defined/init.py
+++ b/src/calibre/devices/user_defined/init.py
--- a/src/calibre/devices/user_defined/driver.py
+++ b/src/calibre/devices/user_defined/driver.py
@ -0,0 +1,110 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.devices.usbms.driver import USBMS
 class USER_DEFINED(USBMS):
    name           = 'User Defined USB driver'
    gui_name       = 'User Defined USB Device'
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    # Ordered list of supported formats
    FORMATS     = ['epub', 'mobi', 'pdf']
    VENDOR_ID   = 0xFFFF
    PRODUCT_ID  = 0xFFFF
    BCD         = None
    EBOOK_DIR_MAIN = ''
    EBOOK_DIR_CARD_A = ''
    VENDOR_NAME      = []
    WINDOWS_MAIN_MEM = ''
    WINDOWS_CARD_A_MEM = ''
    OSX_MAIN_MEM = 'Device Main Memory'
    MAIN_MEMORY_VOLUME_LABEL  = 'Device Main Memory'
    SUPPORTS_SUB_DIRS = True
    EXTRA_CUSTOMIZATION_MESSAGE = [
        _('USB Vendor ID (in hex)') + ':::<p>' +
            _('Get this ID using Preferences -> Misc -> Get information to '
              'set up the user-defined device') + '</p>',
        _('USB Product ID (in hex)')+ ':::<p>' +
            _('Get this ID using Preferences -> Misc -> Get information to '
              'set up the user-defined device') + '</p>',
        _('USB Revision ID (in hex)')+ ':::<p>' +
            _('Get this ID using Preferences -> Misc -> Get information to '
              'set up the user-defined device') + '</p>',
        '',
        _('Windows main memory vendor string') + ':::<p>' +
            _('This field is used only on windows. '
              'Get this ID using Preferences -> Misc -> Get information to '
              'set up the user-defined device') + '</p>',
        _('Windows main memory ID string') + ':::<p>' +
            _('This field is used only on windows. '
              'Get this ID using Preferences -> Misc -> Get information to '
              'set up the user-defined device') + '</p>',
        _('Windows card A vendor string') + ':::<p>' +
            _('This field is used only on windows. '
              'Get this ID using Preferences -> Misc -> Get information to '
              'set up the user-defined device') + '</p>',
        _('Windows card A ID string') + ':::<p>' +
            _('This field is used only on windows. '
              'Get this ID using Preferences -> Misc -> Get information to '
              'set up the user-defined device') + '</p>',
        _('Main memory folder') + ':::<p>' +
            _('Enter the folder where the books are to be stored. This folder '
              'is prepended to any send_to_device template') + '</p>',
        _('Card A folder') + ':::<p>' +
            _('Enter the folder where the books are to be stored. This folder '
              'is prepended to any send_to_device template') + '</p>',
    ]
    EXTRA_CUSTOMIZATION_DEFAULT = [
                '0xffff',
                '0xffff',
                '0xffff',
                None,
                '',
                '',
                '',
                '',
                '',
                '',
    ]
    OPT_USB_VENDOR_ID           = 0
    OPT_USB_PRODUCT_ID          = 1
    OPT_USB_REVISION_ID         = 2
    OPT_USB_WINDOWS_MM_VEN_ID   = 4
    OPT_USB_WINDOWS_MM_ID       = 5
    OPT_USB_WINDOWS_CA_VEN_ID   = 6
    OPT_USB_WINDOWS_CA_ID       = 7
    OPT_MAIN_MEM_FOLDER         = 8
    OPT_CARD_A_FOLDER           = 9
    def initialize(self):
        try:
            e = self.settings().extra_customization
            self.VENDOR_ID          = int(e[self.OPT_USB_VENDOR_ID], 16)
            self.PRODUCT_ID         = int(e[self.OPT_USB_PRODUCT_ID], 16)
            self.BCD                = [int(e[self.OPT_USB_REVISION_ID], 16)]
            if e[self.OPT_USB_WINDOWS_MM_VEN_ID]:
                self.VENDOR_NAME.append(e[self.OPT_USB_WINDOWS_MM_VEN_ID])
            if e[self.OPT_USB_WINDOWS_CA_VEN_ID] and \
                    e[self.OPT_USB_WINDOWS_CA_VEN_ID] not in self.VENDOR_NAME:
                self.VENDOR_NAME.append(e[self.OPT_USB_WINDOWS_CA_VEN_ID])
            self.WINDOWS_MAIN_MEM   = e[self.OPT_USB_WINDOWS_MM_ID] + '&'
            self.WINDOWS_CARD_A_MEM = e[self.OPT_USB_WINDOWS_CA_ID] + '&'
            self.EBOOK_DIR_MAIN     = e[self.OPT_MAIN_MEM_FOLDER]
            self.EBOOK_DIR_CARD_A   = e[self.OPT_CARD_A_FOLDER]
        except:
            import traceback
            traceback.print_exc()
        USBMS.initialize(self)
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -19,12 +19,12 @@ class CHMInput(InputFormatPlugin):
    description = 'Convert CHM files to OEB'
    file_types  = set(['chm'])
-    def _chmtohtml(self, output_dir, chm_path, no_images, log):
+    def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
        from calibre.ebooks.chm.reader import CHMReader
        log.debug('Opening CHM file')
        rdr = CHMReader(chm_path, log, self.opts)
        log.debug('Extracting CHM to %s' % output_dir)
-        rdr.extract_content(output_dir)
+        rdr.extract_content(output_dir, debug_dump=debug_dump)
        self._chm_reader = rdr
        return rdr.hhc_path
@ -47,7 +47,12 @@ class CHMInput(InputFormatPlugin):
            stream.close()
            log.debug('tdir=%s' % tdir)
            log.debug('stream.name=%s' % stream.name)
-            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
+            debug_dump = False
            odi = options.debug_pipeline
            if odi:
                debug_dump = os.path.join(odi, 'input')
            mainname = self._chmtohtml(tdir, chm_name, no_images, log,
                    debug_dump=debug_dump)
            mainpath = os.path.join(tdir, mainname)
            metadata = get_metadata_from_reader(self._chm_reader)
@ -56,7 +61,6 @@ class CHMInput(InputFormatPlugin):
            #from calibre import ipython
            #ipython()
            odi = options.debug_pipeline
            options.debug_pipeline = None
            options.input_encoding = 'utf-8'
            # try a custom conversion:
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -97,7 +97,7 @@ class CHMReader(CHMFile):
            raise CHMError("'%s' is zero bytes in length!"%(path,))
        return data
-    def ExtractFiles(self, output_dir=os.getcwdu()):
+    def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
        html_files = set([])
        for path in self.Contents():
            lpath = os.path.join(output_dir, path)
@ -123,6 +123,9 @@ class CHMReader(CHMFile):
                    self.log.warn('%r filename too long, skipping'%path)
                    continue
                raise
        if debug_dump:
            import shutil
            shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
        for lpath in html_files:
            with open(lpath, 'r+b') as f:
                data = f.read()
@ -249,8 +252,8 @@ class CHMReader(CHMFile):
        if not os.path.isdir(dir):
            os.makedirs(dir)
-    def extract_content(self, output_dir=os.getcwdu()):
+    def extract_content(self, output_dir=os.getcwdu(), debug_dump=False):
-        self.ExtractFiles(output_dir=output_dir)
+        self.ExtractFiles(output_dir=output_dir, debug_dump=debug_dump)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -854,7 +854,8 @@ OptionRecommendation(name='sr3_replace',
        if isinstance(ret, basestring):
            shutil.copytree(output_dir, out_dir)
        else:
-            os.makedirs(out_dir)
+            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            self.dump_oeb(ret, out_dir)
        if self.input_fmt == 'recipe':
            zf = ZipFile(os.path.join(self.opts.debug_pipeline,
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -402,7 +402,7 @@ class HTMLPreProcessor(object):
                  (re.compile(r'((?<=</a>)\s*file:/{2,4}[A-Z].*<br>|file:////?[A-Z].*<br>(?=\s*<hr>))', re.IGNORECASE), lambda match: ''),
                  # Center separator lines
-                  (re.compile(u'<br>\s*(?P<break>([*#•✦=]+\s*)+)\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group(1) + '</p>'),
+                  (re.compile(u'<br>\s*(?P<break>([*#•✦=] *){3,})\s*<br>'), lambda match: '<p>\n<p style="text-align:center">' + match.group('break') + '</p>'),
                  # Remove page links
                  (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -156,17 +156,17 @@ class HeuristicProcessor(object):
        ]
        ITALICIZE_STYLE_PATS = [
-            r'(?msu)(?<=[\s>])_(?P<words>[^_]+)_',
+            ur'(?msu)(?<=[\s>"“\'‘])_(?P<words>[^_]+)_',
-            r'(?msu)(?<=[\s>])/(?P<words>[^/\*>]+)/',
+            ur'(?msu)(?<=[\s>"“\'‘])/(?P<words>[^/\*>]+)/',
-            r'(?msu)(?<=[\s>])~~(?P<words>[^~]+)~~',
+            ur'(?msu)(?<=[\s>"“\'‘])~~(?P<words>[^~]+)~~',
-            r'(?msu)(?<=[\s>])\*(?P<words>[^\*]+)\*',
+            ur'(?msu)(?<=[\s>"“\'‘])\*(?P<words>[^\*]+)\*',
-            r'(?msu)(?<=[\s>])~(?P<words>[^~]+)~',
+            ur'(?msu)(?<=[\s>"“\'‘])~(?P<words>[^~]+)~',
-            r'(?msu)(?<=[\s>])_/(?P<words>[^/_]+)/_',
+            ur'(?msu)(?<=[\s>"“\'‘])_/(?P<words>[^/_]+)/_',
-            r'(?msu)(?<=[\s>])_\*(?P<words>[^\*_]+)\*_',
+            ur'(?msu)(?<=[\s>"“\'‘])_\*(?P<words>[^\*_]+)\*_',
-            r'(?msu)(?<=[\s>])\*/(?P<words>[^/\*]+)/\*',
+            ur'(?msu)(?<=[\s>"“\'‘])\*/(?P<words>[^/\*]+)/\*',
-            r'(?msu)(?<=[\s>])_\*/(?P<words>[^\*_]+)/\*_',
+            ur'(?msu)(?<=[\s>"“\'‘])_\*/(?P<words>[^\*_]+)/\*_',
-            r'(?msu)(?<=[\s>])/:(?P<words>[^:/]+):/',
+            ur'(?msu)(?<=[\s>"“\'‘])/:(?P<words>[^:/]+):/',
-            r'(?msu)(?<=[\s>])\|:(?P<words>[^:\|]+):\|',
+            ur'(?msu)(?<=[\s>"“\'‘])\|:(?P<words>[^:\|]+):\|',
        ]
        for word in ITALICIZE_WORDS:
@ -518,13 +518,13 @@ class HeuristicProcessor(object):
        if re.findall('(<|>)', replacement_break):
            if re.match('^<hr', replacement_break):
                if replacement_break.find('width') != -1:
-                   width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
+                    width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
-                   replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
+                    replacement_break = re.sub('(?i)(width=\d+\%?|width:\s*\d+(\%|px|pt|em)?;?)', '', replacement_break)
-                   divpercent = (100 - width) / 2
+                    divpercent = (100 - width) / 2
-                   hr_open = re.sub('45', str(divpercent), hr_open)
+                    hr_open = re.sub('45', str(divpercent), hr_open)
-                   scene_break = hr_open+replacement_break+'</div>'
+                    scene_break = hr_open+replacement_break+'</div>'
                else:
-                   scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
+                    scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
            elif re.match('^<img', replacement_break):
                scene_break = self.scene_break_open+replacement_break+'</p>'
            else:
@ -584,10 +584,10 @@ class HeuristicProcessor(object):
                #print "styles for this line are: "+str(styles)
                split_styles = []
                for style in styles:
-                   #print "style is: "+str(style)
+                    #print "style is: "+str(style)
-                   newstyle = style.split(':')
+                    newstyle = style.split(':')
-                   #print "newstyle is: "+str(newstyle)
+                    #print "newstyle is: "+str(newstyle)
-                   split_styles.append(newstyle)
+                    split_styles.append(newstyle)
                styles = split_styles
                for style, setting in styles:
                    if style == 'text-align' and setting != 'left':
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -309,9 +309,9 @@ class HTMLInput(InputFormatPlugin):
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        from calibre.ebooks.conversion.plumber import create_oebbook
-        from calibre.ebooks.oeb.base import DirContainer, \
+        from calibre.ebooks.oeb.base import (DirContainer,
-            rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, \
+            rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
-            xpath
+            xpath)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
@ -345,7 +345,8 @@ class HTMLInput(InputFormatPlugin):
        htmlfile_map = {}
        for f in filelist:
            path = f.path
-            oeb.container = DirContainer(os.path.dirname(path), log)
+            oeb.container = DirContainer(os.path.dirname(path), log,
                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html',
                    href=ascii_filename(bname))
@ -369,7 +370,7 @@ class HTMLInput(InputFormatPlugin):
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
-            oeb.container = DirContainer(dpath, log)
+            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))
@ -409,7 +410,7 @@ class HTMLInput(InputFormatPlugin):
            if not item.linear: continue
            toc.add(title, item.href)
-        oeb.container = DirContainer(os.getcwdu(), oeb.log)
+        oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
        return oeb
    def link_to_local_path(self, link_, base=None):
@ -456,7 +457,7 @@ class HTMLInput(InputFormatPlugin):
                    href=bhref)
            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
-                    self.oeb.log)
+                    self.oeb.log, ignore_opf=True)
            # Load into memory
            guessed = self.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
--- a/src/calibre/ebooks/htmlz/input.py
+++ b/src/calibre/ebooks/htmlz/input.py
@ -7,10 +7,12 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 import posixpath
-from calibre import walk
+from calibre import guess_type, walk
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.utils.zipfile import ZipFile
 class HTMLZInput(InputFormatPlugin):
@ -27,7 +29,7 @@ class HTMLZInput(InputFormatPlugin):
        # Extract content from zip archive.
        zf = ZipFile(stream)
-        zf.extractall('.')
+        zf.extractall()
        for x in walk('.'):
            if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
@ -70,5 +72,24 @@ class HTMLZInput(InputFormatPlugin):
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
        # Get the cover path from the OPF.
        cover_href = None
        opf = None
        for x in walk('.'):
            if os.path.splitext(x)[1].lower() in ('.opf'):
                opf = x
                break
        if opf:
            opf = OPF(opf)
            cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
        # Set the cover.
        if cover_href:
            cdata = None
            with open(cover_href, 'rb') as cf:
                cdata = cf.read()
            id, href = oeb.manifest.generate('cover', cover_href)
            oeb.manifest.add(id, href, guess_type(cover_href)[0], data=cdata)
            oeb.guide.add('cover', 'Cover', href)
        return oeb
--- a/src/calibre/ebooks/htmlz/output.py
+++ b/src/calibre/ebooks/htmlz/output.py
@ -7,11 +7,13 @@ __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 from cStringIO import StringIO
 from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
@ -79,10 +81,31 @@ class HTMLZOutput(OutputFormatPlugin):
                        fname = os.path.join(tdir, 'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)
            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.magick.draw import save_cover_data_to
                    cover_path = os.path.join(tdir, 'cover.jpg')
                    with open(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()
            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
-                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
+                opf = OPF(StringIO(etree.tostring(oeb_book.metadata.to_opf1())))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = 'cover.jpg'
                mdataf.write(metadata_to_opf(mi))
            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -274,6 +274,9 @@ def check_isbn(isbn):
    if not isbn:
        return None
    isbn = re.sub(r'[^0-9X]', '', isbn.upper())
    all_same = re.match(r'(\d)\1{9,12}$', isbn)
    if all_same is not None:
        return None
    if len(isbn) == 10:
        return check_isbn10(isbn)
    if len(isbn) == 13:
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -1,224 +0,0 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Fetch metadata using Amazon AWS
 '''
 import sys, re
 from threading import RLock
 from lxml import html
 from lxml.html import soupparser
 from calibre import browser
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.library.comments import sanitize_comments_html
 asin_cache = {}
 cover_url_cache = {}
 cache_lock = RLock()
 def find_asin(br, isbn):
    q = 'http://www.amazon.com/s/?search-alias=aps&field-keywords='+isbn
    res = br.open_novisit(q)
    raw = res.read()
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
    root = html.fromstring(raw)
    revs = root.xpath('//*[@class="asinReviewsSummary" and @name]')
    revs = [x.get('name') for x in revs]
    if revs:
        return revs[0]
 def to_asin(br, isbn):
    with cache_lock:
        ans = asin_cache.get(isbn, None)
    if ans:
        return ans
    if ans is False:
        return None
    if len(isbn) == 13:
        try:
            asin = find_asin(br, isbn)
        except:
            import traceback
            traceback.print_exc()
            asin = None
    else:
        asin = isbn
    with cache_lock:
        asin_cache[isbn] = asin if asin else False
    return asin
 def get_social_metadata(title, authors, publisher, isbn):
    mi = Metadata(title, authors)
    if not isbn:
        return mi
    isbn = check_isbn(isbn)
    if not isbn:
        return mi
    br = browser()
    asin = to_asin(br, isbn)
    if asin and get_metadata(br, asin, mi):
        return mi
    from calibre.ebooks.metadata.xisbn import xisbn
    for i in xisbn.get_associated_isbns(isbn):
        asin = to_asin(br, i)
        if asin and get_metadata(br, asin, mi):
            return mi
    return mi
 def get_cover_url(isbn, br):
    isbn = check_isbn(isbn)
    if not isbn:
        return None
    with cache_lock:
        ans = cover_url_cache.get(isbn, None)
    if ans:
        return ans
    if ans is False:
        return None
    asin = to_asin(br, isbn)
    if asin:
        ans = _get_cover_url(br, asin)
        if ans:
            with cache_lock:
                cover_url_cache[isbn] = ans
            return ans
    from calibre.ebooks.metadata.xisbn import xisbn
    for i in xisbn.get_associated_isbns(isbn):
        asin = to_asin(br, i)
        if asin:
            ans = _get_cover_url(br, asin)
            if ans:
                with cache_lock:
                    cover_url_cache[isbn] = ans
                    cover_url_cache[i] = ans
                return ans
    with cache_lock:
        cover_url_cache[isbn] = False
    return None
 def _get_cover_url(br, asin):
    q = 'http://amzn.com/'+asin
    try:
        raw = br.open_novisit(q).read()
    except Exception as e:
        if callable(getattr(e, 'getcode', None)) and \
                e.getcode() == 404:
            return None
        raise
    if '<title>404 - ' in raw:
        return None
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
    try:
        root = soupparser.fromstring(raw)
    except:
        return False
    imgs = root.xpath('//img[@id="prodImage" and @src]')
    if imgs:
        src = imgs[0].get('src')
        parts = src.split('/')
        if len(parts) > 3:
            bn = parts[-1]
            sparts = bn.split('_')
            if len(sparts) > 2:
                bn = sparts[0] + sparts[-1]
                return ('/'.join(parts[:-1]))+'/'+bn
    return None
 def get_metadata(br, asin, mi):
    q = 'http://amzn.com/'+asin
    try:
        raw = br.open_novisit(q).read()
    except Exception as e:
        if callable(getattr(e, 'getcode', None)) and \
                e.getcode() == 404:
            return False
        raise
    if '<title>404 - ' in raw:
        return False
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
    try:
        root = soupparser.fromstring(raw)
    except:
        return False
    if root.xpath('//*[@id="errorMessage"]'):
        return False
    ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
    pat = re.compile(r'([0-9.]+) out of (\d+) stars')
    if ratings:
        for elem in ratings[0].xpath('descendant::*[@title]'):
            t = elem.get('title').strip()
            m = pat.match(t)
            if m is not None:
                try:
                    mi.rating = float(m.group(1))/float(m.group(2)) * 5
                except:
                    pass
    desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
    if desc:
        desc = desc[0]
        for c in desc.xpath('descendant::*[@class="seeAll" or'
                ' @class="emptyClear" or @href]'):
            c.getparent().remove(c)
        desc = html.tostring(desc, method='html', encoding=unicode).strip()
        # remove all attributes from tags
        desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
        # Collapse whitespace
        #desc = re.sub('\n+', '\n', desc)
        #desc = re.sub(' +', ' ', desc)
        # Remove the notice about text referring to out of print editions
        desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
        # Remove comments
        desc = re.sub(r'(?s)<!--.*?-->', '', desc)
        mi.comments = sanitize_comments_html(desc)
    return True
 def main(args=sys.argv):
    import tempfile, os
    tdir = tempfile.gettempdir()
    br = browser()
    for title, isbn in [
            ('The Heroes', '9780316044981'), # Test find_asin
            ('Learning Python', '8324616489'), # Test xisbn
            ('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
            # Random tests
            ('Star Trek: Destiny: Mere Mortals', '9781416551720'),
            ('The Great Gatsby', '0743273567'),
            ]:
        cpath = os.path.join(tdir, title+'.jpg')
        curl = get_cover_url(isbn, br)
        if curl is None:
            print 'No cover found for', title
        else:
            open(cpath, 'wb').write(br.open_novisit(curl).read())
            print 'Cover for', title, 'saved to', cpath
        #import time
        #st = time.time()
        mi = get_social_metadata(title, None, None, isbn)
        if not mi.comments:
            print 'Failed to downlaod social metadata for', title
            return 1
        #print '\n\n', time.time() - st, '\n\n'
        print mi
        print '\n'
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/amazonfr.py
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@ -1,516 +0,0 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 import sys, textwrap, re, traceback
 from urllib import urlencode
 from math import ceil
 from lxml import html
 from lxml.html import soupparser
 from calibre.utils.date import parse_date, utcnow, replace_months
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import OptionParser
 from calibre.library.comments import sanitize_comments_html
 class AmazonFr(MetadataSource):
    name = 'Amazon French'
    description = _('Downloads metadata from amazon.fr')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='fr')
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
 class AmazonEs(MetadataSource):
    name = 'Amazon Spanish'
    description = _('Downloads metadata from amazon.com in spanish')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='es')
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
 class AmazonEn(MetadataSource):
    name = 'Amazon English'
    description = _('Downloads metadata from amazon.com in english')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='en')
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
 class AmazonDe(MetadataSource):
    name = 'Amazon German'
    description = _('Downloads metadata from amazon.de')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version = (1, 0, 0)
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='de')
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
 class Amazon(MetadataSource):
    name = 'Amazon'
    description = _('Downloads metadata from amazon.com')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal & Sengian'
    version = (1, 1, 0)
    has_html_comments = True
    def fetch(self):
        # if not self.site_customization:
            # return
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose, lang='all')
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
    # @property
    # def string_customization_help(self):
        # return _('You can select here the language for metadata search with amazon.com')
 def report(verbose):
    if verbose:
        traceback.print_exc()
 class Query(object):
    BASE_URL_ALL = 'http://www.amazon.com'
    BASE_URL_FR = 'http://www.amazon.fr'
    BASE_URL_DE = 'http://www.amazon.de'
    def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
        max_results=20, rlang='all'):
        assert not(title is None and author is None and publisher is None \
            and isbn is None and keywords is None)
        assert (max_results < 21)
        self.max_results = int(max_results)
        self.renbres = re.compile(u'\s*(\d+)\s*')
        q = {   'search-alias' : 'stripbooks' ,
                'unfiltered' : '1',
                'field-keywords' : '',
                'field-author' : '',
                'field-title' : '',
                'field-isbn' : '',
                'field-publisher' : ''
                #get to amazon detailed search page to get all options
                # 'node' : '',
                # 'field-binding' : '',
                #before, during, after
                # 'field-dateop' : '',
                #month as number
                # 'field-datemod' : '',
                # 'field-dateyear' : '',
                #french only
                # 'field-collection' : '',
                #many options available
            }
        if rlang =='all':
            q['sort'] = 'relevanceexprank'
            self.urldata = self.BASE_URL_ALL
        elif rlang =='es':
            q['sort'] = 'relevanceexprank'
            q['field-language'] = 'Spanish'
            self.urldata = self.BASE_URL_ALL
        elif rlang =='en':
            q['sort'] = 'relevanceexprank'
            q['field-language'] = 'English'
            self.urldata = self.BASE_URL_ALL
        elif rlang =='fr':
            q['sort'] = 'relevancerank'
            self.urldata = self.BASE_URL_FR
        elif rlang =='de':
            q['sort'] = 'relevancerank'
            self.urldata = self.BASE_URL_DE
        self.baseurl = self.urldata
        if isbn is not None:
            q['field-isbn'] = isbn.replace('-', '')
        else:
            if title is not None:
                q['field-title'] = title
            if author is not None:
                q['field-author'] = author
            if publisher is not None:
                q['field-publisher'] = publisher
            if keywords is not None:
                q['field-keywords'] = keywords
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
    def __call__(self, browser, verbose, timeout = 5.):
        if verbose:
            print 'Query:', self.urldata
        try:
            raw = browser.open_novisit(self.urldata, timeout=timeout).read()
        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            raise
        if '<title>404 - ' in raw:
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            feed = soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None, self.urldata
        #nb of page
        try:
            nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
        except:
            return None, self.urldata
        pages =[feed]
        if len(nbresults) > 1:
            nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
            for i in xrange(2, nbpagetoquery + 1):
                try:
                    urldata = self.urldata + '&page=' + str(i)
                    raw = browser.open_novisit(urldata, timeout=timeout).read()
                except Exception as e:
                    continue
                if '<title>404 - ' in raw:
                    continue
                raw = xml_to_unicode(raw, strip_encoding_pats=True,
                        resolve_entities=True)[0]
                try:
                    feed = soupparser.fromstring(raw)
                except:
                    try:
                        #remove ASCII invalid chars
                        return soupparser.fromstring(clean_ascii_chars(raw))
                    except:
                        continue
                pages.append(feed)
        results = []
        for x in pages:
            results.extend([i.getparent().get('href') \
                for i in x.xpath("//a/span[@class='srTitle']")])
        return results[:self.max_results], self.baseurl
 class ResultList(list):
    def __init__(self, baseurl, lang = 'all'):
        self.baseurl = baseurl
        self.lang = lang
        self.repub = re.compile(u'\((.*)\)')
        self.rerat = re.compile(u'([0-9.]+)')
        self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
        self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
        self.recom = re.compile(r'(?s)<!--.*?-->')
        self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
        self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
        self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
        self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
        self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
    def strip_tags_etree(self, etreeobj, invalid_tags):
        for (itag, rmv) in invalid_tags.iteritems():
            if rmv:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tree()
            else:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tag()
    def clean_entry(self, entry, invalid_tags = {'script': True},
                invalid_id = (), invalid_class=()):
        #invalid_tags: remove tag and keep content if False else remove
        #remove tags
        if invalid_tags:
            self.strip_tags_etree(entry, invalid_tags)
        #remove id
        if invalid_id:
            for eltid in invalid_id:
                elt = entry.get_element_by_id(eltid)
                if elt is not None:
                    elt.drop_tree()
        #remove class
        if invalid_class:
            for eltclass in invalid_class:
                elts = entry.find_class(eltclass)
                if elts is not None:
                    for elt in elts:
                        elt.drop_tree()
    def get_title(self, entry):
        title = entry.get_element_by_id('btAsinTitle')
        if title is not None:
            title = title.text
        return unicode(title.replace('\n', '').strip())
    def get_authors(self, entry):
        author = entry.get_element_by_id('btAsinTitle')
        while author.getparent().tag != 'div':
            author = author.getparent()
        author = author.getparent()
        authortext = []
        for x in author.getiterator('a'):
            authortext.append(unicode(x.text_content().strip()))
        return authortext
    def get_description(self, entry, verbose):
        try:
            description = entry.get_element_by_id("productDescription").find("div[@class='content']")
            inv_class = ('seeAll', 'emptyClear')
            inv_tags ={'img': True, 'a': False}
            self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
            description = html.tostring(description, method='html', encoding=unicode).strip()
            # remove all attributes from tags
            description = self.reattr.sub(r'<\1>', description)
            # Remove the notice about text referring to out of print editions
            description = self.reoutp.sub('', description)
            # Remove comments
            description = self.recom.sub('', description)
            return unicode(sanitize_comments_html(description))
        except:
            report(verbose)
            return None
    def get_tags(self, entry, browser, verbose):
        try:
            tags = entry.get_element_by_id('tagContentHolder')
            testptag = tags.find_class('see-all')
            if testptag:
                for x in testptag:
                    alink = x.xpath('descendant-or-self::a')
                    if alink:
                        if alink[0].get('class') == 'tgJsActive':
                            continue
                        link = self.baseurl + alink[0].get('href')
                        entry = self.get_individual_metadata(browser, link, verbose)
                        tags = entry.get_element_by_id('tagContentHolder')
                        break
            tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
        except:
            report(verbose)
            tags = []
        return tags
    def get_book_info(self, entry, mi, verbose):
        try:
            entry = entry.get_element_by_id('SalesRank').getparent()
        except:
            try:
                for z in entry.getiterator('h2'):
                    if self.reprod.search(z.text_content()):
                        entry = z.getparent().find("div[@class='content']/ul")
                        break
            except:
                report(verbose)
                return mi
        elts = entry.findall('li')
        #pub & date
        elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
        if elt:
            pub = elt[0].find('b').tail
            mi.publisher = unicode(self.repub.sub('', pub).strip())
            d = self.repub.search(pub)
            if d is not None:
                d = d.group(1)
                try:
                    default = utcnow().replace(day=15)
                    if self.lang != 'all':
                        d = replace_months(d, self.lang)
                    d = parse_date(d, assume_utc=True, default=default)
                    mi.pubdate = d
                except:
                    report(verbose)
        #ISBN
        elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
        if elt:
            isbn = elt[0].find('b').tail.replace('-', '').strip()
            if check_isbn(isbn):
                    mi.isbn = unicode(isbn)
            elif len(elt) > 1:
                isbn = elt[1].find('b').tail.replace('-', '').strip()
                if check_isbn(isbn):
                    mi.isbn = unicode(isbn)
        #Langue
        elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
        if elt:
            langue = elt[0].find('b').tail.strip()
            if langue:
                mi.language = unicode(langue)
        #ratings
        elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
        if elt:
            ratings = elt[0].find_class('swSprite')
            if ratings:
                ratings = self.rerat.findall(ratings[0].get('title'))
                if len(ratings) == 2:
                    mi.rating = float(ratings[0])/float(ratings[1]) * 5
        return mi
    def fill_MI(self, entry, title, authors, browser, verbose):
        mi = MetaInformation(title, authors)
        mi.author_sort = authors_to_sort_string(authors)
        mi.comments = self.get_description(entry, verbose)
        mi = self.get_book_info(entry, mi, verbose)
        mi.tags = self.get_tags(entry, browser, verbose)
        return mi
    def get_individual_metadata(self, browser, linkdata, verbose):
        try:
            raw = browser.open_novisit(linkdata).read()
        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            raise
        if '<title>404 - ' in raw:
            report(verbose)
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            return soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                report(verbose)
                return
    def populate(self, entries, browser, verbose=False):
        for x in entries:
            try:
                entry = self.get_individual_metadata(browser, x, verbose)
                # clean results
                # inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
                # inv_class = ('buyingDetailsGrid', 'productImageGrid')
                # inv_tags ={'script': True, 'style': True, 'form': False}
                # self.clean_entry(entry, invalid_id=inv_ids)
                title = self.get_title(entry)
                authors = self.get_authors(entry)
            except Exception as e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
                    print 'URL who failed:', x
                    report(verbose)
                continue
            self.append(self.fill_MI(entry, title, authors, browser, verbose))
 def search(title=None, author=None, publisher=None, isbn=None,
           max_results=5, verbose=False, keywords=None, lang='all'):
    br = browser()
    entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
        keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
    if entries is None or len(entries) == 0:
        return
    #List of entry
    ans = ResultList(baseurl, lang)
    ans.populate(entries, br, verbose)
    return ans
 def option_parser():
    parser = OptionParser(textwrap.dedent(\
    _('''\
        %prog [options]
        Fetch book metadata from Amazon. You must specify one of title, author,
        ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
        so you should make your query as specific as possible.
        You can chose the language for metadata retrieval:
        All & english & french & german & spanish
    '''
    )))
    parser.add_option('-t', '--title', help='Book title')
    parser.add_option('-a', '--author', help='Book author(s)')
    parser.add_option('-p', '--publisher', help='Book publisher')
    parser.add_option('-i', '--isbn', help='Book ISBN')
    parser.add_option('-k', '--keywords', help='Keywords')
    parser.add_option('-m', '--max-results', default=10,
                      help='Maximum number of results to fetch')
    parser.add_option('-l', '--lang', default='all',
                      help='Chosen language for metadata search (all, en, fr, es, de)')
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help='Be more verbose about errors')
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    try:
        results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
                lang=opts.lang)
    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    if results is None or len(results) == 0:
        print 'No result found for this search!'
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        print
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -68,7 +68,19 @@ composite_formatter = SafeFormat()
 class Metadata(object):
    '''
-    A class representing all the metadata for a book.
+    A class representing all the metadata for a book. The various standard metadata
    fields are available as attributes of this object. You can also stick
    arbitrary attributes onto this object.
    Metadata from custom columns should be accessed via the get() method,
    passing in the lookup name for the column, for example: "#mytags".
    Use the :meth:`is_null` method to test if a filed is null.
    This object also has functions to format fields into strings.
    The list of standard metadata fields grows with time is in
    :data:`STANDARD_METADATA_FIELDS`.
    Please keep the method based API of this class to a minimum. Every method
    becomes a reserved field name.
@ -88,11 +100,19 @@ class Metadata(object):
            if title:
                self.title = title
            if authors:
-                #: List of strings or []
+                # List of strings or []
                self.author = list(authors) if authors else []# Needed for backward compatibility
                self.authors = list(authors) if authors else []
    def is_null(self, field):
        '''
        Return True if the value of filed is null in this object.
        'null' means it is unknown or evaluates to False. So a title of
        _('Unknown') is null or a language of 'und' is null.
        Be careful with numeric fields since this will return True for zero as
        well as None.
        '''
        null_val = NULL_VALUES.get(field, None)
        val = getattr(self, field, None)
        return not val or val == null_val
@ -120,7 +140,11 @@ class Metadata(object):
                                            _('TEMPLATE ERROR'),
                                            self).strip()
            return val
-
+        if field.startswith('#') and field.endswith('_index'):
            try:
                return self.get_extra(field[:-6])
            except:
                pass
        raise AttributeError(
                'Metadata object has no attribute named: '+ repr(field))
@ -170,11 +194,6 @@ class Metadata(object):
        try:
            return self.__getattribute__(field)
        except AttributeError:
            if field.startswith('#') and field.endswith('_index'):
                try:
                    return self.get_extra(field[:-6])
                except:
                    pass
            return default
    def get_extra(self, field, default=None):
@ -544,17 +563,24 @@ class Metadata(object):
    def format_tags(self):
        return u', '.join([unicode(t) for t in sorted(self.tags, key=sort_key)])
-    def format_rating(self):
+    def format_rating(self, v=None, divide_by=1.0):
-        return unicode(self.rating)
+        if v is None:
            if self.rating is not None:
                return unicode(self.rating/divide_by)
            return u'None'
        return unicode(v/divide_by)
    def format_field(self, key, series_with_index=True):
        '''
        Returns the tuple (display_name, formatted_value)
        '''
        name, val, ign, ign = self.format_field_extended(key, series_with_index)
        return (name, val)
    def format_field_extended(self, key, series_with_index=True):
        from calibre.ebooks.metadata import authors_to_string
        '''
-        returns the tuple (field_name, formatted_value, original_value,
+        returns the tuple (display_name, formatted_value, original_value,
        field_metadata)
        '''
@ -631,13 +657,17 @@ class Metadata(object):
                res = format_date(res, fmeta['display'].get('date_format','dd MMM yyyy'))
            elif datatype == 'rating':
                res = res/2.0
-            elif key in ('book_size', 'size'):
+            elif key == 'size':
                res = human_readable(res)
            return (name, unicode(res), orig_res, fmeta)
        return (None, None, None, None)
    def __unicode__(self):
        '''
        A string representation of this object, suitable for printing to
        console
        '''
        from calibre.ebooks.metadata import authors_to_string
        ans = []
        def fmt(x, y):
@ -681,6 +711,9 @@ class Metadata(object):
        return u'\n'.join(ans)
    def to_html(self):
        '''
        A HTML representation of this object.
        '''
        from calibre.ebooks.metadata import authors_to_string
        ans = [(_('Title'), unicode(self.title))]
        ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@ -1,317 +0,0 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import traceback, socket, sys
 from functools import partial
 from threading import Thread, Event
 from Queue import Queue, Empty
 from lxml import etree
 import mechanize
 from calibre.customize import Plugin
 from calibre import browser, prints
 from calibre.constants import preferred_encoding, DEBUG
 class CoverDownload(Plugin):
    '''
    These plugins are used to download covers for books.
    '''
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Kovid Goyal'
    type = _('Cover download')
    def has_cover(self, mi, ans, timeout=5.):
        '''
        Check if the book described by mi has a cover. Call ans.set() if it
        does. Do nothing if it doesn't.
        :param mi: MetaInformation object
        :param timeout: timeout in seconds
        :param ans: A threading.Event object
        '''
        raise NotImplementedError()
    def get_covers(self, mi, result_queue, abort, timeout=5.):
        '''
        Download covers for books described by the mi object. Downloaded covers
        must be put into the result_queue. If more than one cover is available,
        the plugin should continue downloading them and putting them into
        result_queue until abort.is_set() returns True.
        :param mi: MetaInformation object
        :param result_queue: A multithreaded Queue
        :param abort: A threading.Event object
        :param timeout: timeout in seconds
        '''
        raise NotImplementedError()
    def exception_to_string(self, ex):
        try:
            return unicode(ex)
        except:
            try:
                return str(ex).decode(preferred_encoding, 'replace')
            except:
                return repr(ex)
    def debug(self, *args, **kwargs):
        if DEBUG:
            prints('\t'+self.name+':', *args, **kwargs)
 class HeadRequest(mechanize.Request):
    def get_method(self):
        return 'HEAD'
 class OpenLibraryCovers(CoverDownload): # {{{
    'Download covers from openlibrary.org'
    # See http://openlibrary.org/dev/docs/api/covers
    OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
    name = 'openlibrary.org covers'
    description = _('Download covers from openlibrary.org')
    author = 'Kovid Goyal'
    def has_cover(self, mi, ans, timeout=5.):
        if not mi.isbn:
            return False
        from calibre.ebooks.metadata.library_thing import get_browser
        br = get_browser()
        br.set_handle_redirect(False)
        try:
            br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout)
            self.debug('cover for', mi.isbn, 'found')
            ans.set()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 302:
                self.debug('cover for', mi.isbn, 'found')
                ans.set()
            else:
                self.debug(e)
    def get_covers(self, mi, result_queue, abort, timeout=5.):
        if not mi.isbn:
            return
        from calibre.ebooks.metadata.library_thing import get_browser
        br = get_browser()
        try:
            ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read()
            result_queue.put((True, ans, 'jpg', self.name))
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                result_queue.put((False, _('ISBN: %s not found')%mi.isbn, '', self.name))
            else:
                result_queue.put((False, self.exception_to_string(e),
                    traceback.format_exc(), self.name))
 # }}}
 class AmazonCovers(CoverDownload): # {{{
    name = 'amazon.com covers'
    description = _('Download covers from amazon.com')
    author = 'Kovid Goyal'
    def has_cover(self, mi, ans, timeout=5.):
        if not mi.isbn:
            return False
        from calibre.ebooks.metadata.amazon import get_cover_url
        br = browser()
        try:
            get_cover_url(mi.isbn, br)
            self.debug('cover for', mi.isbn, 'found')
            ans.set()
        except Exception as e:
            self.debug(e)
    def get_covers(self, mi, result_queue, abort, timeout=5.):
        if not mi.isbn:
            return
        from calibre.ebooks.metadata.amazon import get_cover_url
        br = browser()
        try:
            url = get_cover_url(mi.isbn, br)
            if url is None:
                raise ValueError('No cover found for ISBN: %s'%mi.isbn)
            cover_data = br.open_novisit(url).read()
            result_queue.put((True, cover_data, 'jpg', self.name))
        except Exception as e:
            result_queue.put((False, self.exception_to_string(e),
                traceback.format_exc(), self.name))
 # }}}
 def check_for_cover(mi, timeout=5.): # {{{
    from calibre.customize.ui import cover_sources
    ans = Event()
    checkers = [partial(p.has_cover, mi, ans, timeout=timeout) for p in
            cover_sources()]
    workers = [Thread(target=c) for c in checkers]
    for w in workers:
        w.daemon = True
        w.start()
    while not ans.is_set():
        ans.wait(0.1)
        if sum([int(w.is_alive()) for w in workers]) == 0:
            break
    return ans.is_set()
 # }}}
 def download_covers(mi, result_queue, max_covers=50, timeout=5.): # {{{
    from calibre.customize.ui import cover_sources
    abort = Event()
    temp = Queue()
    getters = [partial(p.get_covers, mi, temp, abort, timeout=timeout) for p in
            cover_sources()]
    workers = [Thread(target=c) for c in getters]
    for w in workers:
        w.daemon = True
        w.start()
    count = 0
    while count < max_covers:
        try:
            result = temp.get_nowait()
            if result[0]:
                count += 1
            result_queue.put(result)
        except Empty:
            pass
        if sum([int(w.is_alive()) for w in workers]) == 0:
            break
    abort.set()
    while True:
        try:
            result = temp.get_nowait()
            count += 1
            result_queue.put(result)
        except Empty:
            break
 # }}}
 class DoubanCovers(CoverDownload): # {{{
    'Download covers from Douban.com'
    DOUBAN_ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
    CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
    name = 'Douban.com covers'
    description = _('Download covers from Douban.com')
    author = 'Li Fanxi'
    def get_cover_url(self, isbn, br, timeout=5.):
        try:
            url = self.DOUBAN_ISBN_URL + isbn + "?apikey=" + self.CALIBRE_DOUBAN_API_KEY
            src = br.open(url, timeout=timeout).read()
        except Exception as err:
            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
                err = Exception(_('Douban.com API timed out. Try again later.'))
            raise err
        else:
            feed = etree.fromstring(src)
            NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
              'atom' : 'http://www.w3.org/2005/Atom',
              'db': 'http://www.douban.com/xmlns/'
            }
            XPath = partial(etree.XPath, namespaces=NAMESPACES)
            entries = XPath('//atom:entry')(feed)
            if len(entries) < 1:
                return None
            try:
                cover_url = XPath("descendant::atom:link[@rel='image']/attribute::href")
                u = cover_url(entries[0])[0].replace('/spic/', '/lpic/');
                # If URL contains "book-default", the book doesn't have a cover
                if u.find('book-default') != -1:
                    return None
            except:
                return None
            return u
    def has_cover(self, mi, ans, timeout=5.):
        if not mi.isbn:
            return False
        br = browser()
        try:
            if self.get_cover_url(mi.isbn, br, timeout=timeout) != None:
                self.debug('cover for', mi.isbn, 'found')
                ans.set()
        except Exception as e:
            self.debug(e)
    def get_covers(self, mi, result_queue, abort, timeout=5.):
        if not mi.isbn:
            return
        br = browser()
        try:
            url = self.get_cover_url(mi.isbn, br, timeout=timeout)
            cover_data = br.open_novisit(url).read()
            result_queue.put((True, cover_data, 'jpg', self.name))
        except Exception as e:
            result_queue.put((False, self.exception_to_string(e),
                traceback.format_exc(), self.name))
 # }}}
 def download_cover(mi, timeout=5.): # {{{
    results = Queue()
    download_covers(mi, results, max_covers=1, timeout=timeout)
    errors, ans = [], None
    while True:
        try:
            x = results.get_nowait()
            if x[0]:
                ans = x[1]
            else:
                errors.append(x)
        except Empty:
            break
    return ans, errors
 # }}}
 def test(isbns): # {{{
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation('test', ['test'])
    for isbn in isbns:
        prints('Testing ISBN:', isbn)
        mi.isbn = isbn
        found = check_for_cover(mi)
        prints('Has cover:', found)
        ans, errors = download_cover(mi)
        if ans is not None:
            prints('Cover downloaded')
        else:
            prints('Download failed:')
            for err in errors:
                prints('\t', err[-1]+':', err[1])
        print '\n'
 # }}}
 if __name__ == '__main__':
    isbns = sys.argv[1:] + ['9781591025412', '9780307272119']
    #test(isbns)
    from calibre.ebooks.metadata import MetaInformation
    oc = OpenLibraryCovers(None)
    for isbn in isbns:
        mi = MetaInformation('xx', ['yy'])
        mi.isbn = isbn
        rq = Queue()
        oc.get_covers(mi, rq, Event())
        result = rq.get_nowait()
        if not result[0]:
            print 'Failed for ISBN:', isbn
            print result
--- a/src/calibre/ebooks/metadata/douban.py
+++ b/src/calibre/ebooks/metadata/douban.py
@ -1,263 +0,0 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>; 2010, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
 import sys, textwrap
 import traceback
 from urllib import urlencode
 from functools import partial
 from lxml import etree
 from calibre import browser, preferred_encoding
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import OptionParser
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.date import parse_date, utcnow
 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
              'atom' : 'http://www.w3.org/2005/Atom',
              'db': 'http://www.douban.com/xmlns/'
            }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)
 total_results  = XPath('//openSearch:totalResults')
 start_index    = XPath('//openSearch:startIndex')
 items_per_page = XPath('//openSearch:itemsPerPage')
 entry          = XPath('//atom:entry')
 entry_id       = XPath('descendant::atom:id')
 title          = XPath('descendant::atom:title')
 description    = XPath('descendant::atom:summary')
 publisher      = XPath("descendant::db:attribute[@name='publisher']")
 isbn           = XPath("descendant::db:attribute[@name='isbn13']")
 date           = XPath("descendant::db:attribute[@name='pubdate']")
 creator        = XPath("descendant::db:attribute[@name='author']")
 tag            = XPath("descendant::db:tag")
 CALIBRE_DOUBAN_API_KEY = '0bd1672394eb1ebf2374356abec15c3d'
 class DoubanBooks(MetadataSource):
    name = 'Douban Books'
    description = _('Downloads metadata from Douban.com')
    supported_platforms = ['windows', 'osx', 'linux'] # Platforms this plugin will run on
    author              = 'Li Fanxi <lifanxi@freemindworld.com>' # The author of this plugin
    version             = (1, 0, 1)   # The version number of this plugin
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10,
                                  verbose=self.verbose)
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
 def report(verbose):
    if verbose:
        import traceback
        traceback.print_exc()
 class Query(object):
    SEARCH_URL = 'http://api.douban.com/book/subjects?'
    ISBN_URL = 'http://api.douban.com/book/subject/isbn/'
    type = "search"
    def __init__(self, title=None, author=None, publisher=None, isbn=None,
                 max_results=20, start_index=1, api_key=''):
        assert not(title is None and author is None and publisher is None and \
                   isbn is None)
        assert (int(max_results) < 21)
        q = ''
        if isbn is not None:
            q = isbn
            self.type = 'isbn'
        else:
            def build_term(parts):
                return ' '.join(x for x in parts)
            if title is not None:
                q += build_term(title.split())
            if author is not None:
                q += (' ' if q else '') + build_term(author.split())
            if publisher is not None:
                q += (' ' if q else '') + build_term(publisher.split())
            self.type = 'search'
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        if self.type == "isbn":
            self.url = self.ISBN_URL + q
            if api_key != '':
                self.url = self.url + "?apikey=" + api_key
        else:
            self.url = self.SEARCH_URL+urlencode({
                    'q':q,
                    'max-results':max_results,
                    'start-index':start_index,
                    })
            if api_key != '':
                self.url = self.url + "&apikey=" + api_key
    def __call__(self, browser, verbose):
        if verbose:
            print 'Query:', self.url
        if self.type == "search":
            feed = etree.fromstring(browser.open(self.url).read())
            total = int(total_results(feed)[0].text)
            start = int(start_index(feed)[0].text)
            entries = entry(feed)
            new_start = start + len(entries)
            if new_start > total:
                new_start = 0
            return entries, new_start
        elif self.type == "isbn":
            feed = etree.fromstring(browser.open(self.url).read())
            entries = entry(feed)
            return entries, 0
 class ResultList(list):
    def get_description(self, entry, verbose):
        try:
            desc = description(entry)
            if desc:
                return 'SUMMARY:\n'+desc[0].text
        except:
            report(verbose)
    def get_title(self, entry):
        candidates = [x.text for x in title(entry)]
        return ': '.join(candidates)
    def get_authors(self, entry):
        m = creator(entry)
        if not m:
            m = []
        m = [x.text for x in m]
        return m
    def get_tags(self, entry, verbose):
        try:
            btags = [x.attrib["name"] for x in tag(entry)]
            tags = []
            for t in btags:
                tags.extend([y.strip() for y in t.split('/')])
            tags = list(sorted(list(set(tags))))
        except:
            report(verbose)
            tags = []
        return [x.replace(',', ';') for x in tags]
    def get_publisher(self, entry, verbose):
        try:
            pub = publisher(entry)[0].text
        except:
            pub = None
        return pub
    def get_isbn(self, entry, verbose):
        try:
            isbn13 = isbn(entry)[0].text
        except Exception:
            isbn13 = None
        return isbn13
    def get_date(self, entry, verbose):
        try:
            d = date(entry)
            if d:
                default = utcnow().replace(day=15)
                d = parse_date(d[0].text, assume_utc=True, default=default)
            else:
                d = None
        except:
            report(verbose)
            d = None
        return d
    def populate(self, entries, browser, verbose=False, api_key=''):
        for x in entries:
            try:
                id_url = entry_id(x)[0].text
                title = self.get_title(x)
            except:
                report(verbose)
            mi = MetaInformation(title, self.get_authors(x))
            try:
                if api_key != '':
                    id_url = id_url + "?apikey=" + api_key
                raw = browser.open(id_url).read()
                feed = etree.fromstring(raw)
                x = entry(feed)[0]
            except Exception as e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
            mi.comments = self.get_description(x, verbose)
            mi.tags = self.get_tags(x, verbose)
            mi.isbn = self.get_isbn(x, verbose)
            mi.publisher = self.get_publisher(x, verbose)
            mi.pubdate = self.get_date(x, verbose)
            self.append(mi)
 def search(title=None, author=None, publisher=None, isbn=None,
           verbose=False, max_results=40, api_key=None):
    br   = browser()
    start, entries = 1, []
    if api_key is None:
        api_key = CALIBRE_DOUBAN_API_KEY
    while start > 0 and len(entries) <= max_results:
        new, start = Query(title=title, author=author, publisher=publisher,
                       isbn=isbn, max_results=max_results, start_index=start, api_key=api_key)(br, verbose)
        if not new:
            break
        entries.extend(new)
    entries = entries[:max_results]
    ans = ResultList()
    ans.populate(entries, br, verbose, api_key)
    return ans
 def option_parser():
    parser = OptionParser(textwrap.dedent(
        '''\
        %prog [options]
        Fetch book metadata from Douban. You must specify one of title, author,
        publisher or ISBN. If you specify ISBN the others are ignored. Will
        fetch a maximum of 100 matches, so you should make your query as
        specific as possible.
        '''
    ))
    parser.add_option('-t', '--title', help='Book title')
    parser.add_option('-a', '--author', help='Book author(s)')
    parser.add_option('-p', '--publisher', help='Book publisher')
    parser.add_option('-i', '--isbn', help='Book ISBN')
    parser.add_option('-m', '--max-results', default=10,
                      help='Maximum number of results to fetch')
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help='Be more verbose about errors')
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    try:
        results = search(opts.title, opts.author, opts.publisher, opts.isbn,
                         verbose=opts.verbose, max_results=int(opts.max_results))
    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    for result in results:
        print unicode(result).encode(preferred_encoding)
        print
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/extz.py
+++ b/src/calibre/ebooks/metadata/extz.py
@ -13,7 +13,7 @@ import posixpath
 from cStringIO import StringIO
 from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf2 import OPF
+from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.zipfile import ZipFile, safe_replace
@ -31,9 +31,9 @@ def get_metadata(stream, extract_cover=True):
            opf = OPF(opf_stream)
            mi = opf.to_book_metadata()
            if extract_cover:
-                cover_name = opf.raster_cover
+                cover_href = posixpath.relpath(opf.cover, os.path.dirname(stream.name))
-                if cover_name:
+                if cover_href:
-                    mi.cover_data = ('jpg', zf.read(cover_name))
+                    mi.cover_data = ('jpg', zf.read(cover_href))
    except:
        return mi
    return mi
@ -59,17 +59,20 @@ def set_metadata(stream, mi):
        except:
            pass
    if new_cdata:
-        raster_cover = opf.raster_cover
+        cover = opf.cover
-        if not raster_cover:
+        if not cover:
-            raster_cover = 'cover.jpg'
+            cover = 'cover.jpg'
-        cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover)
+        cpath = posixpath.join(posixpath.dirname(opf_path), cover)
        new_cover = _write_new_cover(new_cdata, cpath)
        replacements[cpath] = open(new_cover.name, 'rb')
        mi.cover = cover
    # Update the metadata.
-    opf.smart_update(mi, replace_metadata=True)
+    old_mi = opf.to_book_metadata()
    old_mi.smart_update(mi)
    opf.smart_update(metadata_to_opf(old_mi), replace_metadata=True)
    newopf = StringIO(opf.render())
-    safe_replace(stream, opf_path, newopf, extra_replacements=replacements)
+    safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True)
    # Cleanup temporary files.
    try:
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -1,523 +0,0 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import traceback, sys, textwrap, re
 from threading import Thread
 from calibre import prints
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import default_log
 from calibre.utils.titlecase import titlecase
 from calibre.customize import Plugin
 from calibre.ebooks.metadata.covers import check_for_cover
 from calibre.utils.html2text import html2text
 metadata_config = None
 class MetadataSource(Plugin): # {{{
    '''
    Represents a source to query for metadata. Subclasses must implement
    at least the fetch method.
    When :meth:`fetch` is called, the `self` object will have the following
    useful attributes (each of which may be None)::
        title, book_author, publisher, isbn, log, verbose and extra
    Use these attributes to construct the search query. extra is reserved for
    future use.
    The fetch method must store the results in `self.results` as a list of
    :class:`Metadata` objects. If there is an error, it should be stored
    in `self.exception` and `self.tb` (for the traceback).
    '''
    author = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    #: The type of metadata fetched. 'basic' means basic metadata like
    #: title/author/isbn/etc. 'social' means social metadata like
    #: tags/rating/reviews/etc.
    metadata_type = 'basic'
    #: If not None, the customization dialog will allow for string
    #: based customization as well the default customization. The
    #: string customization will be saved in the site_customization
    #: member.
    string_customization_help = None
    #: Set this to true if your plugin returns HTML markup in comments.
    #: Then if the user disables HTML, calibre will automagically convert
    #: the HTML to Markdown.
    has_html_comments = False
    type = _('Metadata download')
    def __call__(self, title, author, publisher, isbn, verbose, log=None,
            extra=None):
        self.worker = Thread(target=self._fetch)
        self.worker.daemon = True
        self.title = title
        self.verbose = verbose
        self.book_author = author
        self.publisher = publisher
        self.isbn = isbn
        self.log = log if log is not None else default_log
        self.extra = extra
        self.exception, self.tb, self.results = None, None, []
        self.worker.start()
    def _fetch(self):
        try:
            self.fetch()
            if self.results:
                c = self.config_store().get(self.name, {})
                res = self.results
                if hasattr(res, 'authors'):
                    res = [res]
                for mi in res:
                    if not c.get('rating', True):
                        mi.rating = None
                    if not c.get('comments', True):
                        mi.comments = None
                    if not c.get('tags', True):
                        mi.tags = []
                    if self.has_html_comments and mi.comments and \
                            c.get('textcomments', False):
                        try:
                            mi.comments = html2text(mi.comments)
                        except:
                            traceback.print_exc()
                            mi.comments = None
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
    def fetch(self):
        '''
        All the actual work is done here.
        '''
        raise NotImplementedError
    def join(self):
        return self.worker.join()
    def is_alive(self):
        return self.worker.is_alive()
    def is_customizable(self):
        return True
    def config_store(self):
        global metadata_config
        if metadata_config is None:
            from calibre.utils.config import XMLConfig
            metadata_config = XMLConfig('plugins/metadata_download')
        return metadata_config
    def config_widget(self):
        from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, Qt, QLineEdit, \
            QCheckBox
        from calibre.customize.ui import config
        w = QWidget()
        w._layout = QVBoxLayout(w)
        w.setLayout(w._layout)
        if self.string_customization_help is not None:
            w._sc_label = QLabel(self.string_customization_help, w)
            w._layout.addWidget(w._sc_label)
            customization = config['plugin_customization']
            def_sc = customization.get(self.name, '')
            if not def_sc:
                def_sc = ''
            w._sc = QLineEdit(def_sc, w)
            w._layout.addWidget(w._sc)
            w._sc_label.setWordWrap(True)
            w._sc_label.setTextInteractionFlags(Qt.LinksAccessibleByMouse
                    | Qt.LinksAccessibleByKeyboard)
            w._sc_label.setOpenExternalLinks(True)
        c = self.config_store()
        c = c.get(self.name, {})
        for x, l in {'rating':_('ratings'), 'tags':_('tags'),
                'comments':_('description/reviews')}.items():
            cb = QCheckBox(_('Download %s from %s')%(l,
                self.name))
            setattr(w, '_'+x, cb)
            cb.setChecked(c.get(x, True))
            w._layout.addWidget(cb)
        if self.has_html_comments:
            cb = QCheckBox(_('Convert comments downloaded from %s to plain text')%(self.name))
            setattr(w, '_textcomments', cb)
            cb.setChecked(c.get('textcomments', False))
            w._layout.addWidget(cb)
        return w
    def save_settings(self, w):
        dl_settings = {}
        for x in ('rating', 'tags', 'comments'):
            dl_settings[x] = getattr(w, '_'+x).isChecked()
        if self.has_html_comments:
            dl_settings['textcomments'] = getattr(w, '_textcomments').isChecked()
        c = self.config_store()
        c.set(self.name, dl_settings)
        if hasattr(w, '_sc'):
            sc = unicode(w._sc.text()).strip()
            from calibre.customize.ui import customize_plugin
            customize_plugin(self, sc)
    def customization_help(self):
        return 'This plugin can only be customized using the GUI'
    # }}}
 class GoogleBooks(MetadataSource): # {{{
    name = 'Google Books'
    description = _('Downloads metadata from Google Books')
    def fetch(self):
        from calibre.ebooks.metadata.google_books import search
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10,
                                  verbose=self.verbose)
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
    # }}}
 class ISBNDB(MetadataSource): # {{{
    name = 'IsbnDB'
    description = _('Downloads metadata from isbndb.com')
    def fetch(self):
        if not self.site_customization:
            return
        from calibre.ebooks.metadata.isbndb import option_parser, create_books
        args = ['isbndb']
        if self.isbn:
            args.extend(['--isbn', self.isbn])
        else:
            if self.title:
                args.extend(['--title', self.title])
            if self.book_author:
                args.extend(['--author', self.book_author])
            if self.publisher:
                args.extend(['--publisher', self.publisher])
        if self.verbose:
            args.extend(['--verbose'])
        args.append(self.site_customization) # IsbnDb key
        try:
            opts, args = option_parser().parse_args(args)
            self.results = create_books(opts, args)
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
    @property
    def string_customization_help(self):
        ans = _('To use isbndb.com you must sign up for a %sfree account%s '
                'and enter your access key below.')
        return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
    # }}}
 class Amazon(MetadataSource): # {{{
    name = 'Amazon'
    metadata_type = 'social'
    description = _('Downloads social metadata from amazon.com')
    has_html_comments = True
    def fetch(self):
        if not self.isbn:
            return
        from calibre.ebooks.metadata.amazon import get_social_metadata
        try:
            self.results = get_social_metadata(self.title, self.book_author,
                    self.publisher, self.isbn)
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
    # }}}
 class KentDistrictLibrary(MetadataSource): # {{{
    name = 'Kent District Library'
    metadata_type = 'social'
    description = _('Downloads series information from ww2.kdl.org. '
            'This website cannot handle large numbers of queries, '
            'so the plugin is disabled by default.')
    def fetch(self):
        if not self.title or not self.book_author:
            return
        from calibre.ebooks.metadata.kdl import get_series
        try:
            self.results = get_series(self.title, self.book_author)
        except Exception as e:
            import traceback
            traceback.print_exc()
            self.exception = e
            self.tb = traceback.format_exc()
    # }}}
 def result_index(source, result):
    if not result.isbn:
        return -1
    for i, x in enumerate(source):
        if x.isbn == result.isbn:
            return i
    return -1
 def merge_results(one, two):
    if two is not None and one is not None:
        for x in two:
            idx = result_index(one, x)
            if idx < 0:
                one.append(x)
            else:
                one[idx].smart_update(x)
 class MetadataSources(object):
    def __init__(self, sources):
        self.sources = sources
    def __enter__(self):
        for s in self.sources:
            s.__enter__()
        return self
    def __exit__(self, *args):
        for s in self.sources:
            s.__exit__()
    def __call__(self, *args, **kwargs):
        for s in self.sources:
            s(*args, **kwargs)
    def join(self):
        for s in self.sources:
            s.join()
 def filter_metadata_results(item):
    keywords = ["audio", "tape", "cassette", "abridged", "playaway"]
    for keyword in keywords:
        if item.publisher and keyword in item.publisher.lower():
            return False
    return True
 def do_cover_check(item):
    item.has_cover = False
    try:
        item.has_cover = check_for_cover(item)
    except:
        pass # Cover not found
 def check_for_covers(items):
    threads = [Thread(target=do_cover_check, args=(item,)) for item in items]
    for t in threads: t.start()
    for t in threads: t.join()
 def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
           verbose=0):
    assert not(title is None and author is None and publisher is None and \
                   isbn is None)
    from calibre.customize.ui import metadata_sources, migrate_isbndb_key
    migrate_isbndb_key()
    if isbn is not None:
        isbn = re.sub(r'[^a-zA-Z0-9]', '', isbn).upper()
    fetchers = list(metadata_sources(isbndb_key=isbndb_key))
    with MetadataSources(fetchers) as manager:
        manager(title, author, publisher, isbn, verbose)
        manager.join()
    results = list(fetchers[0].results) if fetchers else []
    for fetcher in fetchers[1:]:
        merge_results(results, fetcher.results)
    results = list(filter(filter_metadata_results, results))
    check_for_covers(results)
    words = ("the", "a", "an", "of", "and")
    prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
    trailing_paren_pat = re.compile(r'\(.*\)$')
    whitespace_pat = re.compile(r'\s+')
    def sort_func(x, y):
        def cleanup_title(s):
            if s is None:
                s = _('Unknown')
            s = s.strip().lower()
            s = prefix_pat.sub(' ', s)
            s = trailing_paren_pat.sub('', s)
            s = whitespace_pat.sub(' ', s)
            return s.strip()
        t = cleanup_title(title)
        x_title = cleanup_title(x.title)
        y_title = cleanup_title(y.title)
        # prefer titles that start with the search title
        tx = cmp(t, x_title)
        ty = cmp(t, y_title)
        result = 0 if abs(tx) == abs(ty) else abs(tx) - abs(ty)
        # then prefer titles that have a cover image
        if result == 0:
            result = -cmp(x.has_cover, y.has_cover)
        # then prefer titles with the longest comment, with in 10%
        if result == 0:
            cx = len(x.comments.strip() if x.comments else '')
            cy = len(y.comments.strip() if y.comments else '')
            t = (cx + cy) / 20
            result = cy - cx
            if abs(result) < t:
                result = 0
        return result
    results = sorted(results, cmp=sort_func)
    # if for some reason there is no comment in the top selection, go looking for one
    if len(results) > 1:
        if not results[0].comments or len(results[0].comments) == 0:
            for r in results[1:]:
                try:
                    if title and title.lower() == r.title[:len(title)].lower() \
                            and r.comments and len(r.comments):
                        results[0].comments = r.comments
                        break
                except:
                    pass
        # Find a pubdate
        pubdate = None
        for r in results:
            if r.pubdate is not None:
                pubdate = r.pubdate
                break
        if pubdate is not None:
            for r in results:
                if r.pubdate is None:
                    r.pubdate = pubdate
    def fix_case(x):
        if x:
            x = titlecase(x)
        return x
    for r in results:
        r.title = fix_case(r.title)
        if r.authors:
            r.authors = list(map(fix_case, r.authors))
    return results, [(x.name, x.exception, x.tb) for x in fetchers]
 def get_social_metadata(mi, verbose=0):
    from calibre.customize.ui import metadata_sources
    fetchers = list(metadata_sources(metadata_type='social'))
    with MetadataSources(fetchers) as manager:
        manager(mi.title, mi.authors, mi.publisher, mi.isbn, verbose)
        manager.join()
    ratings, tags, comments, series, series_index = [], set([]), set([]), None, None
    for fetcher in fetchers:
        if fetcher.results:
            dmi = fetcher.results
            if dmi.rating is not None:
                ratings.append(dmi.rating)
            if dmi.tags:
                for t in dmi.tags:
                    tags.add(t)
            if mi.pubdate is None and dmi.pubdate is not None:
                mi.pubdate = dmi.pubdate
            if dmi.comments:
                comments.add(dmi.comments)
            if dmi.series is not None:
                series = dmi.series
                if dmi.series_index is not None:
                    series_index = dmi.series_index
    if ratings:
        rating = sum(ratings)/float(len(ratings))
        if mi.rating is None or mi.rating < 0.1:
            mi.rating = rating
        else:
            mi.rating = (mi.rating + rating)/2.0
    if tags:
        if not mi.tags:
            mi.tags = []
        mi.tags += list(tags)
        mi.tags = list(sorted(list(set(mi.tags))))
    if comments:
        if not mi.comments or len(mi.comments)+20 < len(' '.join(comments)):
            mi.comments = ''
            for x in comments:
                mi.comments += x+'\n\n'
    if series and series_index is not None:
        mi.series = series
        mi.series_index = series_index
    return [(x.name, x.exception, x.tb) for x in fetchers if x.exception is not
            None]
 def option_parser():
    parser = OptionParser(textwrap.dedent(
        '''\
        %prog [options]
        Fetch book metadata from online sources. You must specify at least one
        of title, author, publisher or ISBN. If you specify ISBN, the others
        are ignored.
        '''
    ))
    parser.add_option('-t', '--title', help='Book title')
    parser.add_option('-a', '--author', help='Book author(s)')
    parser.add_option('-p', '--publisher', help='Book publisher')
    parser.add_option('-i', '--isbn', help='Book ISBN')
    parser.add_option('-m', '--max-results', default=10,
                      help='Maximum number of results to fetch')
    parser.add_option('-k', '--isbndb-key',
                      help=('The access key for your ISBNDB.com account. '
                      'Only needed if you want to search isbndb.com '
                      'and you haven\'t customized the IsbnDB plugin.'))
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help='Be more verbose about errors')
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    results, exceptions = search(opts.title, opts.author, opts.publisher,
                                 opts.isbn, opts.isbndb_key, opts.verbose)
    social_exceptions = []
    for result in results:
        social_exceptions.extend(get_social_metadata(result, opts.verbose))
        prints(unicode(result))
        print
    for name, exception, tb in exceptions+social_exceptions:
        if exception is not None:
            print 'WARNING: Fetching from', name, 'failed with error:'
            print exception
            print tb
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/fictionwise.py
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@ -1,390 +0,0 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import sys, textwrap, re, traceback, socket
 from urllib import urlencode
 from lxml.html import soupparser, tostring
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
 from calibre.library.comments import sanitize_comments_html
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.utils.config import OptionParser
 from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars
 class Fictionwise(MetadataSource): # {{{
    author = 'Sengian'
    name = 'Fictionwise'
    description = _('Downloads metadata from Fictionwise')
    has_html_comments = True
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                self.isbn, max_results=10, verbose=self.verbose)
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
    # }}}
 class FictionwiseError(Exception):
    pass
 def report(verbose):
    if verbose:
        traceback.print_exc()
 class Query(object):
    BASE_URL = 'http://www.fictionwise.com/servlet/mw'
    def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
        assert not(title is None and author is None and publisher is None and keywords is None)
        assert (max_results < 21)
        self.max_results = int(max_results)
        q = {   'template' : 'searchresults_adv.htm' ,
                'searchtitle' : '',
                'searchauthor' : '',
                'searchpublisher' : '',
                'searchkeyword' : '',
                #possibilities startoflast, fullname, lastfirst
                'searchauthortype' : 'startoflast',
                'searchcategory' : '',
                'searchcategory2' : '',
                'searchprice_s' : '0',
                'searchprice_e' : 'ANY',
                'searchformat' : '',
                'searchgeo' : 'US',
                'searchfwdatetype' : '',
                #maybe use dates fields if needed?
                #'sortorder' : 'DESC',
                #many options available: b.SortTitle, a.SortName,
                #b.DateFirstPublished, b.FWPublishDate
                'sortby' : 'b.SortTitle'
            }
        if title is not None:
            q['searchtitle'] = title
        if author is not None:
            q['searchauthor'] = author
        if publisher is not None:
            q['searchpublisher'] = publisher
        if keywords is not None:
            q['searchkeyword'] = keywords
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.urldata = urlencode(q)
    def __call__(self, browser, verbose, timeout = 5.):
        if verbose:
            print _('Query: %s') % self.BASE_URL+self.urldata
        try:
            raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
            raise FictionwiseError(_('Fictionwise encountered an error.'))
        if '<title>404 - ' in raw:
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            feed = soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                feed = soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None
        # get list of results as links
        results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
        results = results[:self.max_results]
        results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
        #return feed if no links ie normally a single book or nothing
        if not results:
            results = [feed]
        return results
 class ResultList(list):
    BASE_URL = 'http://www.fictionwise.com'
    COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
    def __init__(self):
        self.retitle = re.compile(r'\[[^\[\]]+\]')
        self.rechkauth = re.compile(r'.*book\s*by', re.I)
        self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
        self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
        self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
        self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
        self.resplitbr = re.compile(r'<br[^>]*>', re.I)
        self.recomment = re.compile(r'(?s)<!--.*?-->')
        self.reimg = re.compile(r'<img[^>]*>', re.I)
        self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
        self.renbcom = re.compile('(?P<nbcom>\d+)\s*Reader Ratings:')
        self.recolor = re.compile('(?P<ncolor>[^/]+).gif')
        self.resplitbrdiv = re.compile(r'(<br[^>]+>|</?div[^>]*>)', re.I)
        self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
    def strip_tags_etree(self, etreeobj, invalid_tags):
        for (itag, rmv) in invalid_tags.iteritems():
            if rmv:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tree()
            else:
                for elts in etreeobj.getiterator(itag):
                    elts.drop_tag()
    def clean_entry(self, entry, invalid_tags = {'script': True},
                invalid_id = (), invalid_class=(), invalid_xpath = ()):
        #invalid_tags: remove tag and keep content if False else remove
        #remove tags
        if invalid_tags:
            self.strip_tags_etree(entry, invalid_tags)
        #remove xpath
        if invalid_xpath:
            for eltid in invalid_xpath:
                elt = entry.xpath(eltid)
                for el in elt:
                    el.drop_tree()
        #remove id
        if invalid_id:
            for eltid in invalid_id:
                elt = entry.get_element_by_id(eltid)
                if elt is not None:
                    elt.drop_tree()
        #remove class
        if invalid_class:
            for eltclass in invalid_class:
                elts = entry.find_class(eltclass)
                if elts is not None:
                    for elt in elts:
                        elt.drop_tree()
    def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
        out = tostring(entry, pretty_print=prettyout)
        #try to work around tostring to remove this encoding for exemle
        reclean = re.compile('(\n+|\t+|\r+|&#'+htmlrm+';)')
        return reclean.sub('', out)
    def get_title(self, entry):
        title = entry.findtext('./')
        return self.retitle.sub('', title).strip()
    def get_authors(self, entry):
        authortext = entry.find('./br').tail
        if not self.rechkauth.search(authortext):
            return []
        authortext = self.rechkauth.sub('', authortext)
        return [a.strip() for a in authortext.split('&')]
    def get_rating(self, entrytable, verbose):
        nbcomment = tostring(entrytable.getprevious())
        try:
            nbcomment = self.renbcom.search(nbcomment).group("nbcom")
        except:
            report(verbose)
            return None
        hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
                    float(image.get('height', default=0))) \
                        for image in entrytable.getiterator('img'))
        #ratings as x/5
        return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
    def get_description(self, entry):
        description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
        description = self.redesc.search(description)
        if not description or not description.group("desc"):
            return None
        #remove invalid tags
        description = self.reimg.sub('', description.group("desc"))
        description = self.recomment.sub('', description)
        description = self.resanitize.sub('', sanitize_comments_html(description))
        return _('SUMMARY:\n %s') % re.sub(r'\n\s+</p>','\n</p>', description)
    def get_publisher(self, entry):
        publisher = self.output_entry(entry.xpath('./p')[1])
        publisher = filter(lambda x: self.repub.search(x) is not None,
            self.resplitbr.split(publisher))
        if not len(publisher):
            return None
        publisher = self.repub.sub('', publisher[0])
        return publisher.split(',')[0].strip()
    def get_tags(self, entry):
        tag = self.output_entry(entry.xpath('./p')[1])
        tag = filter(lambda x: self.retag.search(x) is not None,
            self.resplitbr.split(tag))
        if not len(tag):
            return []
        return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
    def get_date(self, entry, verbose):
        date = self.output_entry(entry.xpath('./p')[1])
        date = filter(lambda x: self.redate.search(x) is not None,
            self.resplitbr.split(date))
        if not len(date):
            return None
        try:
            d = self.redate.sub('', date[0])
            if d:
                default = utcnow().replace(day=15)
                d = parse_date(d, assume_utc=True, default=default)
            else:
                d = None
        except:
            report(verbose)
            d = None
        return d
    def get_ISBN(self, entry):
        isbns = self.output_entry(entry.xpath('./p')[2])
        isbns = filter(lambda x: self.reisbn.search(x) is not None,
            self.resplitbrdiv.split(isbns))
        if not len(isbns):
            return None
        isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
        return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
    def fill_MI(self, entry, title, authors, ratings, verbose):
        mi = MetaInformation(title, authors)
        mi.rating = ratings
        mi.comments = self.get_description(entry)
        mi.publisher = self.get_publisher(entry)
        mi.tags = self.get_tags(entry)
        mi.pubdate = self.get_date(entry, verbose)
        mi.isbn = self.get_ISBN(entry)
        mi.author_sort = authors_to_sort_string(authors)
        return mi
    def get_individual_metadata(self, browser, linkdata, verbose):
        try:
            raw = browser.open_novisit(self.BASE_URL + linkdata).read()
        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
            raise FictionwiseError(_('Fictionwise encountered an error.'))
        if '<title>404 - ' in raw:
            report(verbose)
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            return soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                return soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None
    def populate(self, entries, browser, verbose=False):
        inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
            'ul': False, 'span': False}
        inv_xpath =('./table',)
        #single entry
        if len(entries) == 1 and not isinstance(entries[0], str):
            try:
                entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
                self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
                title = self.get_title(entry)
                #maybe strenghten the search
                ratings =  self.get_rating(entry.xpath("./p/table")[1], verbose)
                authors = self.get_authors(entry)
            except Exception as e:
                if verbose:
                    print _('Failed to get all details for an entry')
                    print e
                return
            self.append(self.fill_MI(entry, title, authors, ratings, verbose))
        else:
            #multiple entries
            for x in entries:
                try:
                    entry = self.get_individual_metadata(browser, x, verbose)
                    entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
                    self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
                    title = self.get_title(entry)
                    #maybe strenghten the search
                    ratings =  self.get_rating(entry.xpath("./p/table")[1], verbose)
                    authors = self.get_authors(entry)
                except Exception as e:
                    if verbose:
                        print _('Failed to get all details for an entry')
                        print e
                    continue
                self.append(self.fill_MI(entry, title, authors, ratings, verbose))
 def search(title=None, author=None, publisher=None, isbn=None,
           min_viewability='none', verbose=False, max_results=5,
            keywords=None):
    br = browser()
    entries = Query(title=title, author=author, publisher=publisher,
        keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
    #List of entry
    ans = ResultList()
    ans.populate(entries, br, verbose)
    return ans
 def option_parser():
    parser = OptionParser(textwrap.dedent(\
    _('''\
        %prog [options]
        Fetch book metadata from Fictionwise. You must specify one of title, author,
        or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
        so you should make your query as specific as possible.
    ''')
    ))
    parser.add_option('-t', '--title', help=_('Book title'))
    parser.add_option('-a', '--author', help=_('Book author(s)'))
    parser.add_option('-p', '--publisher', help=_('Book publisher'))
    parser.add_option('-k', '--keywords', help=_('Keywords'))
    parser.add_option('-m', '--max-results', default=20,
                      help=_('Maximum number of results to fetch'))
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help=_('Be more verbose about errors'))
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    try:
        results = search(opts.title, opts.author, publisher=opts.publisher,
            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    if results is None or len(results) == 0:
        print _('No result found for this search!')
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        print
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -1,247 +0,0 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import sys, textwrap
 from urllib import urlencode
 from functools import partial
 from lxml import etree
 from calibre import browser, preferred_encoding
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import OptionParser
 from calibre.utils.date import parse_date, utcnow
 NAMESPACES = {
              'openSearch':'http://a9.com/-/spec/opensearchrss/1.0/',
              'atom' : 'http://www.w3.org/2005/Atom',
              'dc': 'http://purl.org/dc/terms'
            }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)
 total_results  = XPath('//openSearch:totalResults')
 start_index    = XPath('//openSearch:startIndex')
 items_per_page = XPath('//openSearch:itemsPerPage')
 entry          = XPath('//atom:entry')
 entry_id       = XPath('descendant::atom:id')
 creator        = XPath('descendant::dc:creator')
 identifier     = XPath('descendant::dc:identifier')
 title          = XPath('descendant::dc:title')
 date           = XPath('descendant::dc:date')
 publisher      = XPath('descendant::dc:publisher')
 subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')
 def report(verbose):
    if verbose:
        import traceback
        traceback.print_exc()
 class Query(object):
    BASE_URL = 'http://books.google.com/books/feeds/volumes?'
    def __init__(self, title=None, author=None, publisher=None, isbn=None,
                 max_results=20, min_viewability='none', start_index=1):
        assert not(title is None and author is None and publisher is None and \
                   isbn is None)
        assert (max_results < 21)
        assert (min_viewability in ('none', 'partial', 'full'))
        q = ''
        if isbn is not None:
            q += 'isbn:'+isbn
        else:
            def build_term(prefix, parts):
                return ' '.join('in'+prefix + ':' + x for x in parts)
            if title is not None:
                q += build_term('title', title.split())
            if author is not None:
                q += ('+' if q else '')+build_term('author', author.split())
            if publisher is not None:
                q += ('+' if q else '')+build_term('publisher', publisher.split())
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.url = self.BASE_URL+urlencode({
            'q':q,
            'max-results':max_results,
            'start-index':start_index,
            'min-viewability':min_viewability,
            })
    def __call__(self, browser, verbose):
        if verbose:
            print 'Query:', self.url
        feed = etree.fromstring(browser.open(self.url).read())
        #print etree.tostring(feed, pretty_print=True)
        total = int(total_results(feed)[0].text)
        start = int(start_index(feed)[0].text)
        entries = entry(feed)
        new_start = start + len(entries)
        if new_start > total:
            new_start = 0
        return entries, new_start
 class ResultList(list):
    def get_description(self, entry, verbose):
        try:
            desc = description(entry)
            if desc:
                return 'SUMMARY:\n'+desc[0].text
        except:
            report(verbose)
    def get_language(self, entry, verbose):
        try:
            l = language(entry)
            if l:
                return l[0].text
        except:
            report(verbose)
    def get_title(self, entry):
        candidates = [x.text for x in title(entry)]
        return ': '.join(candidates)
    def get_authors(self, entry):
        m = creator(entry)
        if not m:
            m = []
        m = [x.text for x in m]
        return m
    def get_author_sort(self, entry, verbose):
        for x in creator(entry):
            for key, val in x.attrib.items():
                if key.endswith('file-as'):
                    return val
    def get_identifiers(self, entry, mi):
        isbns = []
        for x in identifier(entry):
            t = str(x.text).strip()
            if t[:5].upper() in ('ISBN:', 'LCCN:', 'OCLC:'):
                if t[:5].upper() == 'ISBN:':
                    isbns.append(t[5:])
        if isbns:
            mi.isbn = sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
    def get_tags(self, entry, verbose):
        try:
            btags = [x.text for x in subject(entry)]
            tags = []
            for t in btags:
                tags.extend([y.strip() for y in t.split('/')])
            tags = list(sorted(list(set(tags))))
        except:
            report(verbose)
            tags = []
        return [x.replace(',', ';') for x in tags]
    def get_publisher(self, entry, verbose):
        try:
            pub = publisher(entry)[0].text
        except:
            pub = None
        return pub
    def get_date(self, entry, verbose):
        try:
            d = date(entry)
            if d:
                default = utcnow().replace(day=15)
                d = parse_date(d[0].text, assume_utc=True, default=default)
            else:
                d = None
        except:
            report(verbose)
            d = None
        return d
    def populate(self, entries, browser, verbose=False):
        for x in entries:
            try:
                id_url = entry_id(x)[0].text
                title = self.get_title(x)
            except:
                report(verbose)
            mi = MetaInformation(title, self.get_authors(x))
            try:
                raw = browser.open(id_url).read()
                feed = etree.fromstring(raw)
                x = entry(feed)[0]
            except Exception as e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
            mi.author_sort = self.get_author_sort(x, verbose)
            mi.comments = self.get_description(x, verbose)
            self.get_identifiers(x, mi)
            mi.tags = self.get_tags(x, verbose)
            mi.publisher = self.get_publisher(x, verbose)
            mi.pubdate = self.get_date(x, verbose)
            mi.language = self.get_language(x, verbose)
            self.append(mi)
 def search(title=None, author=None, publisher=None, isbn=None,
           min_viewability='none', verbose=False, max_results=40):
    br   = browser()
    br.set_handle_gzip(True)
    start, entries = 1, []
    while start > 0 and len(entries) <= max_results:
        new, start = Query(title=title, author=author, publisher=publisher,
                       isbn=isbn, min_viewability=min_viewability)(br, verbose)
        if not new:
            break
        entries.extend(new)
    entries = entries[:max_results]
    ans = ResultList()
    ans.populate(entries, br, verbose)
    return ans
 def option_parser():
    parser = OptionParser(textwrap.dedent(
        '''\
        %prog [options]
        Fetch book metadata from Google. You must specify one of title, author,
        publisher or ISBN. If you specify ISBN the others are ignored. Will
        fetch a maximum of 100 matches, so you should make your query as
        specific as possible.
        '''
    ))
    parser.add_option('-t', '--title', help='Book title')
    parser.add_option('-a', '--author', help='Book author(s)')
    parser.add_option('-p', '--publisher', help='Book publisher')
    parser.add_option('-i', '--isbn', help='Book ISBN')
    parser.add_option('-m', '--max-results', default=10,
                      help='Maximum number of results to fetch')
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help='Be more verbose about errors')
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    try:
        results = search(opts.title, opts.author, opts.publisher, opts.isbn,
                         verbose=opts.verbose, max_results=opts.max_results)
    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    for result in results:
        print unicode(result).encode(preferred_encoding)
        print
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -1,159 +0,0 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Interface to isbndb.com. My key HLLXQX2A.
 '''
 import sys, re
 from urllib import quote
 from calibre.utils.config import OptionParser
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 from calibre import browser
 BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%(key)s&page_number=1&results=subjects,authors,texts&'
 class ISBNDBError(Exception):
    pass
 def fetch_metadata(url, max=3, timeout=5.):
    books = []
    page_number = 1
    total_results = 31
    br = browser()
    while len(books) < total_results and max > 0:
        try:
            raw = br.open(url, timeout=timeout).read()
        except Exception as err:
            raise ISBNDBError('Could not fetch ISBNDB metadata. Error: '+str(err))
        soup = BeautifulStoneSoup(raw,
                convertEntities=BeautifulStoneSoup.XML_ENTITIES)
        book_list = soup.find('booklist')
        if book_list is None:
            errmsg = soup.find('errormessage').string
            raise ISBNDBError('Error fetching metadata: '+errmsg)
        total_results = int(book_list['total_results'])
        page_number += 1
        np = '&page_number=%s&'%page_number
        url = re.sub(r'\&page_number=\d+\&', np, url)
        books.extend(book_list.findAll('bookdata'))
        max -= 1
    return books
 class ISBNDBMetadata(Metadata):
    def __init__(self, book):
        Metadata.__init__(self, None)
        def tostring(e):
            if not hasattr(e, 'string'):
                return None
            ans = e.string
            if ans is not None:
                ans = unicode(ans).strip()
            if not ans:
                ans = None
            return ans
        self.isbn = unicode(book.get('isbn13', book.get('isbn')))
        title = tostring(book.find('titlelong'))
        if not title:
            title = tostring(book.find('title'))
        self.title = title
        self.title = unicode(self.title).strip()
        authors = []
        au = tostring(book.find('authorstext'))
        if au:
            au = au.strip()
            temp = au.split(',')
            for au in temp:
                if not au: continue
                authors.extend([a.strip() for a in au.split('&amp;')])
        if authors:
            self.authors = authors
        try:
            self.author_sort = tostring(book.find('authors').find('person'))
            if self.authors and self.author_sort == self.authors[0]:
                self.author_sort = None
        except:
            pass
        self.publisher = tostring(book.find('publishertext'))
        summ = tostring(book.find('summary'))
        if summ:
            self.comments = 'SUMMARY:\n'+summ
 def build_isbn(base_url, opts):
    return base_url + 'index1=isbn&value1='+opts.isbn
 def build_combined(base_url, opts):
    query = ' '.join([e for e in (opts.title, opts.author, opts.publisher) \
        if e is not None ])
    query = query.strip()
    if len(query) == 0:
        raise ISBNDBError('You must specify at least one of --author, --title or --publisher')
    query = re.sub(r'\s+', '+', query)
    if isinstance(query, unicode):
        query = query.encode('utf-8')
    return base_url+'index1=combined&value1='+quote(query, '+')
 def option_parser():
    parser = OptionParser(usage=\
 _('''
 %prog [options] key
 Fetch metadata for books from isndb.com. You can specify either the
 books ISBN ID or its title and author. If you specify the title and author,
 then more than one book may be returned.
 key is the account key you generate after signing up for a free account from isbndb.com.
 '''))
    parser.add_option('-i', '--isbn', default=None, dest='isbn',
                      help=_('The ISBN ID of the book you want metadata for.'))
    parser.add_option('-a', '--author', dest='author',
                      default=None, help=_('The author whose book to search for.'))
    parser.add_option('-t', '--title', dest='title',
                      default=None, help=_('The title of the book to search for.'))
    parser.add_option('-p', '--publisher', default=None, dest='publisher',
                      help=_('The publisher of the book to search for.'))
    parser.add_option('-v', '--verbose', default=False,
                      action='store_true', help=_('Verbose processing'))
    return parser
 def create_books(opts, args, timeout=5.):
    base_url = BASE_URL%dict(key=args[1])
    if opts.isbn is not None:
        url = build_isbn(base_url, opts)
    else:
        url = build_combined(base_url, opts)
    if opts.verbose:
        print ('ISBNDB query: '+url)
    tans = [ISBNDBMetadata(book) for book in fetch_metadata(url, timeout=timeout)]
    #remove duplicates ISBN
    return list(dict((book.isbn, book) for book in tans).values())
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) != 2:
        parser.print_help()
        print ('You must supply the isbndb.com key')
        return 1
    for book in create_books(opts, args):
        print unicode(book).encode('utf-8')
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -400,7 +400,8 @@ class MetadataUpdater(object):
        if getattr(self, 'exth', None) is None:
            raise MobiError('No existing EXTH record. Cannot update metadata.')
-        self.record0[92:96] = iana2mobi(mi.language)
+        if not mi.is_null('language'):
            self.record0[92:96] = iana2mobi(mi.language)
        self.create_exth(exth=exth, new_title=mi.title)
        # Fetch updated timestamp, cover_record, thumbnail_record
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@ -1,411 +0,0 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
 import sys, textwrap, re, traceback, socket
 from urllib import urlencode
 from math import ceil
 from copy import deepcopy
 from lxml.html import soupparser
 from calibre.utils.date import parse_date, utcnow, replace_months
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre import browser, preferred_encoding
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import MetaInformation, check_isbn, \
    authors_to_sort_string
 from calibre.ebooks.metadata.fetch import MetadataSource
 from calibre.ebooks.metadata.covers import CoverDownload
 from calibre.utils.config import OptionParser
 class NiceBooks(MetadataSource):
    name = 'Nicebooks'
    description = _('Downloads metadata from french Nicebooks')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    version             = (1, 0, 0)
    def fetch(self):
        try:
            self.results = search(self.title, self.book_author, self.publisher,
                                  self.isbn, max_results=10, verbose=self.verbose)
        except Exception as e:
            self.exception = e
            self.tb = traceback.format_exc()
 class NiceBooksCovers(CoverDownload):
    name = 'Nicebooks covers'
    description = _('Downloads covers from french Nicebooks')
    supported_platforms = ['windows', 'osx', 'linux']
    author = 'Sengian'
    type = _('Cover download')
    version             = (1, 0, 0)
    def has_cover(self, mi, ans, timeout=5.):
        if not mi.isbn:
            return False
        br = browser()
        try:
            entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
            if Covers(mi.isbn)(entry).check_cover():
                self.debug('cover for', mi.isbn, 'found')
                ans.set()
        except Exception as e:
            self.debug(e)
    def get_covers(self, mi, result_queue, abort, timeout=5.):
        if not mi.isbn:
            return
        br = browser()
        try:
            entry = Query(isbn=mi.isbn, max_results=1)(br, False, timeout)[0]
            cover_data, ext = Covers(mi.isbn)(entry).get_cover(br, timeout)
            if not ext:
                ext = 'jpg'
            result_queue.put((True, cover_data, ext, self.name))
        except Exception as e:
            result_queue.put((False, self.exception_to_string(e),
                traceback.format_exc(), self.name))
 class NiceBooksError(Exception):
    pass
 class ISBNNotFound(NiceBooksError):
    pass
 def report(verbose):
    if verbose:
        traceback.print_exc()
 class Query(object):
    BASE_URL = 'http://fr.nicebooks.com/'
    def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None, max_results=20):
        assert not(title is None and author is None and publisher is None \
            and isbn is None and keywords is None)
        assert (max_results < 21)
        self.max_results = int(max_results)
        if isbn is not None:
            q = isbn
        else:
            q = ' '.join([i for i in (title, author, publisher, keywords) \
                if i is not None])
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        self.urldata = 'search?' + urlencode({'q':q,'s':'Rechercher'})
    def __call__(self, browser, verbose, timeout = 5.):
        if verbose:
            print _('Query: %s') % self.BASE_URL+self.urldata
        try:
            raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
            raise NiceBooksError(_('Nicebooks encountered an error.'))
        if '<title>404 - ' in raw:
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            feed = soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                feed = soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None
        #nb of page to call
        try:
            nbresults = int(feed.xpath("//div[@id='topbar']/b")[0].text)
        except:
            #direct hit
            return [feed]
        nbpagetoquery = int(ceil(float(min(nbresults, self.max_results))/10))
        pages =[feed]
        if nbpagetoquery > 1:
            for i in xrange(2, nbpagetoquery + 1):
                try:
                    urldata = self.urldata + '&p=' + str(i)
                    raw = browser.open_novisit(self.BASE_URL+urldata, timeout=timeout).read()
                except Exception as e:
                    continue
                if '<title>404 - ' in raw:
                    continue
                raw = xml_to_unicode(raw, strip_encoding_pats=True,
                        resolve_entities=True)[0]
                try:
                    feed = soupparser.fromstring(raw)
                except:
                    try:
                        #remove ASCII invalid chars
                        feed = soupparser.fromstring(clean_ascii_chars(raw))
                    except:
                        continue
                pages.append(feed)
        results = []
        for x in pages:
            results.extend([i.find_class('title')[0].get('href') \
                for i in x.xpath("//ul[@id='results']/li")])
        return results[:self.max_results]
 class ResultList(list):
    BASE_URL = 'http://fr.nicebooks.com'
    def __init__(self):
        self.repub = re.compile(u'\s*.diteur\s*', re.I)
        self.reauteur = re.compile(u'\s*auteur.*', re.I)
        self.reautclean = re.compile(u'\s*\(.*\)\s*')
    def get_title(self, entry):
        title = deepcopy(entry)
        title.remove(title.find("dl[@title='Informations sur le livre']"))
        title = ' '.join([i.text_content() for i in title.iterchildren()])
        return unicode(title.replace('\n', ''))
    def get_authors(self, entry):
        author = entry.find("dl[@title='Informations sur le livre']")
        authortext = []
        for x in author.getiterator('dt'):
            if self.reauteur.match(x.text):
                elt = x.getnext()
                while elt.tag == 'dd':
                    authortext.append(unicode(elt.text_content()))
                    elt = elt.getnext()
                break
        if len(authortext) == 1:
            authortext = [self.reautclean.sub('', authortext[0])]
        return authortext
    def get_description(self, entry, verbose):
        try:
            return u'RESUME:\n' + unicode(entry.getparent().xpath("//p[@id='book-description']")[0].text)
        except:
            report(verbose)
            return None
    def get_book_info(self, entry, mi, verbose):
        entry = entry.find("dl[@title='Informations sur le livre']")
        for x in entry.getiterator('dt'):
            if x.text == 'ISBN':
                isbntext = x.getnext().text_content().replace('-', '')
                if check_isbn(isbntext):
                    mi.isbn = unicode(isbntext)
            elif self.repub.match(x.text):
                mi.publisher = unicode(x.getnext().text_content())
            elif x.text == 'Langue':
                mi.language = unicode(x.getnext().text_content())
            elif x.text == 'Date de parution':
                d = x.getnext().text_content()
                try:
                    default = utcnow().replace(day=15)
                    d = replace_months(d, 'fr')
                    d = parse_date(d, assume_utc=True, default=default)
                    mi.pubdate = d
                except:
                    report(verbose)
        return mi
    def fill_MI(self, entry, title, authors, verbose):
        mi = MetaInformation(title, authors)
        mi.author_sort = authors_to_sort_string(authors)
        mi.comments = self.get_description(entry, verbose)
        return self.get_book_info(entry, mi, verbose)
    def get_individual_metadata(self, browser, linkdata, verbose):
        try:
            raw = browser.open_novisit(self.BASE_URL + linkdata).read()
        except Exception as e:
            report(verbose)
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return
            if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
            raise NiceBooksError(_('Nicebooks encountered an error.'))
        if '<title>404 - ' in raw:
            report(verbose)
            return
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
        try:
            feed = soupparser.fromstring(raw)
        except:
            try:
                #remove ASCII invalid chars
                feed = soupparser.fromstring(clean_ascii_chars(raw))
            except:
                return None
        # get results
        return feed.xpath("//div[@id='container']")[0]
    def populate(self, entries, browser, verbose=False):
        #single entry
        if len(entries) == 1 and not isinstance(entries[0], str):
            try:
                entry = entries[0].xpath("//div[@id='container']")[0]
                entry = entry.find("div[@id='book-info']")
                title = self.get_title(entry)
                authors = self.get_authors(entry)
            except Exception as e:
                if verbose:
                    print 'Failed to get all details for an entry'
                    print e
                return
            self.append(self.fill_MI(entry, title, authors, verbose))
        else:
        #multiple entries
            for x in entries:
                try:
                    entry = self.get_individual_metadata(browser, x, verbose)
                    entry = entry.find("div[@id='book-info']")
                    title = self.get_title(entry)
                    authors = self.get_authors(entry)
                except Exception as e:
                    if verbose:
                        print 'Failed to get all details for an entry'
                        print e
                    continue
                self.append(self.fill_MI(entry, title, authors, verbose))
 class Covers(object):
    def __init__(self, isbn = None):
        assert isbn is not None
        self.urlimg = ''
        self.isbn = isbn
        self.isbnf = False
    def __call__(self, entry = None):
        try:
            self.urlimg = entry.xpath("//div[@id='book-picture']/a")[0].get('href')
        except:
            return self
        isbno = entry.get_element_by_id('book-info').find("dl[@title='Informations sur le livre']")
        for x in isbno.getiterator('dt'):
            if x.text == 'ISBN' and check_isbn(x.getnext().text_content()):
                self.isbnf = True
                break
        return self
    def check_cover(self):
        return True if self.urlimg else False
    def get_cover(self, browser, timeout = 5.):
        try:
            cover, ext = browser.open_novisit(self.urlimg, timeout=timeout).read(), \
                self.urlimg.rpartition('.')[-1]
            return cover, ext if ext else 'jpg'
        except Exception as err:
            if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
                raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
            if not len(self.urlimg):
                if not self.isbnf:
                    raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
                raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
 def search(title=None, author=None, publisher=None, isbn=None,
           max_results=5, verbose=False, keywords=None):
    br = browser()
    entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
        keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
    if entries is None or len(entries) == 0:
        return None
    #List of entry
    ans = ResultList()
    ans.populate(entries, br, verbose)
    return ans
 def check_for_cover(isbn):
    br = browser()
    entry = Query(isbn=isbn, max_results=1)(br, False)[0]
    return Covers(isbn)(entry).check_cover()
 def cover_from_isbn(isbn, timeout = 5.):
    br = browser()
    entry = Query(isbn=isbn, max_results=1)(br, False, timeout)[0]
    return Covers(isbn)(entry).get_cover(br, timeout)
 def option_parser():
    parser = OptionParser(textwrap.dedent(\
    _('''\
        %prog [options]
        Fetch book metadata from Nicebooks. You must specify one of title, author,
        ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
        so you should make your query as specific as possible.
        It can also get covers if the option is activated.
    ''')
    ))
    parser.add_option('-t', '--title', help=_('Book title'))
    parser.add_option('-a', '--author', help=_('Book author(s)'))
    parser.add_option('-p', '--publisher', help=_('Book publisher'))
    parser.add_option('-i', '--isbn', help=_('Book ISBN'))
    parser.add_option('-k', '--keywords', help=_('Keywords'))
    parser.add_option('-c', '--covers', default=0,
                      help=_('Covers: 1-Check/ 2-Download'))
    parser.add_option('-p', '--coverspath', default='',
                      help=_('Covers files path'))
    parser.add_option('-m', '--max-results', default=20,
                      help=_('Maximum number of results to fetch'))
    parser.add_option('-v', '--verbose', default=0, action='count',
                      help=_('Be more verbose about errors'))
    return parser
 def main(args=sys.argv):
    import os
    parser = option_parser()
    opts, args = parser.parse_args(args)
    try:
        results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
            keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
    except AssertionError:
        report(True)
        parser.print_help()
        return 1
    if results is None or len(results) == 0:
        print _('No result found for this search!')
        return 0
    for result in results:
        print unicode(result).encode(preferred_encoding, 'replace')
        covact = int(opts.covers)
        if  covact == 1:
            textcover = _('No cover found!')
            if check_for_cover(result.isbn):
                textcover = _('A cover was found for this book')
            print textcover
        elif covact == 2:
            cover_data, ext = cover_from_isbn(result.isbn)
            cpath = result.isbn
            if len(opts.coverspath):
                cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
            oname = os.path.abspath(cpath+'.'+ext)
            open(oname, 'wb').write(cover_data)
            print _('Cover saved to file '), oname
        print
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -966,7 +966,9 @@ class OPF(object): # {{{
            cover_id = covers[0].get('content')
            for item in self.itermanifest():
                if item.get('id', None) == cover_id:
-                    return item.get('href', None)
+                    mt = item.get('media-type', '')
                    if 'xml' not in mt:
                        return item.get('href', None)
    @dynamic_property
    def cover(self):
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -301,7 +301,7 @@ class Amazon(Source):
        if asin is None:
            asin = identifiers.get('asin', None)
        if asin:
-            return 'http://amzn.com/%s'%asin
+            return ('amazon', asin, 'http://amzn.com/%s'%asin)
    # }}}
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -56,7 +56,8 @@ class InternalMetadataCompareKeyGen(object):
    '''
    Generate a sort key for comparison of the relevance of Metadata objects,
-    given a search query.
+    given a search query. This is used only to compare results from the same
    metadata source, not across different sources.
    The sort key ensures that an ascending order sort is a sort by order of
    decreasing relevance.
@ -306,7 +307,7 @@ class Source(Plugin):
            title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
            [
                # Remove things like: (2010) (Omnibus) etc.
-                (r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|mass\s*market|edition|ed\.)[\])}]', ''),
+                (r'(?i)[({\[](\d{4}|omnibus|anthology|hardcover|paperback|turtleback|mass\s*market|edition|ed\.)[\])}]', ''),
                # Remove any strings that contain the substring edition inside
                # parentheses
                (r'(?i)[({\[].*?(edition|ed.).*?[\]})]', ''),
@ -374,7 +375,11 @@ class Source(Plugin):
    def get_book_url(self, identifiers):
        '''
-        Return the URL for the book identified by identifiers at this source.
+        Return a 3-tuple or None. The 3-tuple is of the form:
        (identifier_type, identifier_value, URL).
        The URL is the URL for the book identified by identifiers at this
        source. identifier_type, identifier_value specify the identifier
        corresponding to the URL.
        This URL must be browseable to by a human using a browser. It is meant
        to provide a clickable link for the user to easily visit the books page
        at this source.
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@ -19,13 +19,8 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.metadata.sources.base import create_log
 from calibre.ebooks.metadata.sources.identify import identify
 from calibre.ebooks.metadata.sources.covers import download_cover
 from calibre.utils.config import test_eight_code
 def option_parser():
    if not test_eight_code:
        from calibre.ebooks.metadata.fetch import option_parser
        return option_parser()
    parser = OptionParser(textwrap.dedent(
        '''\
        %prog [options]
@ -48,9 +43,6 @@ def option_parser():
    return parser
 def main(args=sys.argv):
    if not test_eight_code:
        from calibre.ebooks.metadata.fetch import main
        return main(args)
    parser = option_parser()
    opts, args = parser.parse_args(args)
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -173,7 +173,7 @@ class GoogleBooks(Source):
    def get_book_url(self, identifiers): # {{{
        goog = identifiers.get('google', None)
        if goog is not None:
-            return 'http://books.google.com/books?id=%s'%goog
+            return ('google', goog, 'http://books.google.com/books?id=%s'%goog)
    # }}}
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -13,6 +13,7 @@ from Queue import Queue, Empty
 from threading import Thread
 from io import BytesIO
 from operator import attrgetter
 from urlparse import urlparse
 from calibre.customize.ui import metadata_plugins, all_metadata_plugins
 from calibre.ebooks.metadata.sources.base import create_log, msprefs
@ -400,6 +401,9 @@ def identify(log, abort, # {{{
                    and plugin.get_cached_cover_url(result.identifiers) is not
                    None)
            result.identify_plugin = plugin
            if msprefs['txt_comments']:
                if plugin.has_html_comments and result.comments:
                    result.comments = html2text(result.comments)
    log('The identify phase took %.2f seconds'%(time.time() - start_time))
    log('The longest time (%f) was taken by:'%longest, lp)
@ -410,10 +414,6 @@ def identify(log, abort, # {{{
    log('We have %d merged results, merging took: %.2f seconds' %
            (len(results), time.time() - start_time))
    if msprefs['txt_comments']:
        for r in results:
            if r.identify_plugin.has_html_comments and r.comments:
                r.comments = html2text(r.comments)
    max_tags = msprefs['max_tags']
    for r in results:
@ -435,18 +435,38 @@ def identify(log, abort, # {{{
 # }}}
 def urls_from_identifiers(identifiers): # {{{
    identifiers = dict([(k.lower(), v) for k, v in identifiers.iteritems()])
    ans = []
    for plugin in all_metadata_plugins():
        try:
-            url = plugin.get_book_url(identifiers)
+            id_type, id_val, url = plugin.get_book_url(identifiers)
-            if url is not None:
+            ans.append((plugin.name, id_type, id_val, url))
                ans.append((plugin.name, url))
        except:
            pass
    isbn = identifiers.get('isbn', None)
    if isbn:
-        ans.append((isbn,
+        ans.append((isbn, 'isbn', isbn,
-            'http://www.worldcat.org/search?q=bn%%3A%s&qt=advanced'%isbn))
+            'http://www.worldcat.org/isbn/'+isbn))
    doi = identifiers.get('doi', None)
    if doi:
        ans.append(('DOI', 'doi', doi,
            'http://dx.doi.org/'+doi))
    arxiv = identifiers.get('arxiv', None)
    if arxiv:
        ans.append(('arXiv', 'arxiv', arxiv,
            'http://arxiv.org/abs/'+arxiv))
    oclc = identifiers.get('oclc', None)
    if oclc:
        ans.append(('OCLC', 'oclc', oclc,
            'http://www.worldcat.org/oclc/'+oclc))
    url = identifiers.get('uri', None)
    if url is None:
        url = identifiers.get('url', None)
    if url and url.startswith('http'):
        url = url[:8].replace('|', ':') + url[8:].replace('|', ',')
        parts = urlparse(url)
        name = parts.netloc
        ans.append((name, 'url', url, url))
    return ans
 # }}}
--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -81,7 +81,7 @@ class ISBNDB(Source):
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            tokens += author_tokens
-            tokens = [quote(t) for t in tokens]
+            tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in tokens]
            q = '+'.join(tokens)
            q = 'index1=combined&value1='+q
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@ -41,7 +41,7 @@ class OverDrive(Source):
    cached_cover_url_is_reliable = True
    options = (
-            Option('get_full_metadata', 'bool', False,
+            Option('get_full_metadata', 'bool', True,
                _('Download all metadata (slow)'),
                _('Enable this option to gather all metadata available from Overdrive.')),
            )
@ -265,7 +265,7 @@ class OverDrive(Source):
                    if creators:
                        creators = creators.split(', ')
                    # if an exact match in a preferred format occurs
-                    if ((author and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
+                    if ((author and creators and creators[0] == author[0]) or (not author and not creators)) and od_title.lower() == title.lower() and int(formatid) in [1, 50, 410, 900] and thumbimage:
                        return self.format_results(reserveid, od_title, subtitle, series, publisher,
                                creators, thumbimage, worldcatlink, formatid)
                    else:
@ -291,7 +291,7 @@ class OverDrive(Source):
                                close_matches.insert(0, self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
                            else:
                                close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
-                                
+
                        elif close_title_match and close_author_match and int(formatid) in [1, 50, 410, 900]:
                            close_matches.append(self.format_results(reserveid, od_title, subtitle, series, publisher, creators, thumbimage, worldcatlink, formatid))
--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@ -222,7 +222,7 @@ class SaveWorker(Thread):
                        if isbytestring(fpath):
                            fpath = fpath.decode(filesystem_encoding)
                        formats[fmt.lower()] = fpath
-            data[i] = [opf, cpath, formats]
+            data[i] = [opf, cpath, formats, mi.last_modified.isoformat()]
        return data
    def run(self):
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -253,6 +253,8 @@ class MobiReader(object):
                .italic { font-style: italic }
                .underline { text-decoration: underline }
                .mbp_pagebreak {
                    page-break-after: always; margin: 0; display: block
                }
@ -601,6 +603,9 @@ class MobiReader(object):
            elif tag.tag == 'i':
                tag.tag = 'span'
                tag.attrib['class'] = 'italic'
            elif tag.tag == 'u':
                tag.tag = 'span'
                tag.attrib['class'] = 'underline'
            elif tag.tag == 'b':
                tag.tag = 'span'
                tag.attrib['class'] = 'bold'
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -7,6 +7,8 @@ __docformat__ = 'restructuredtext en'
 Convert an ODT file into a Open Ebook
 '''
 import os
 from lxml import etree
 from odf.odf2xhtml import ODF2XHTML
 from calibre import CurrentDir, walk
@ -23,7 +25,51 @@ class Extract(ODF2XHTML):
                with open(name, 'wb') as f:
                    f.write(data)
-    def __call__(self, stream, odir):
+    def filter_css(self, html, log):
        root = etree.fromstring(html)
        style = root.xpath('//*[local-name() = "style" and @type="text/css"]')
        if style:
            style = style[0]
            css = style.text
            if css:
                style.text, sel_map = self.do_filter_css(css)
                for x in root.xpath('//*[@class]'):
                    extra = []
                    orig = x.get('class')
                    for cls in orig.split():
                        extra.extend(sel_map.get(cls, []))
                    if extra:
                        x.set('class', orig + ' ' + ' '.join(extra))
                html = etree.tostring(root, encoding='utf-8',
                        xml_declaration=True)
        return html
    def do_filter_css(self, css):
        from cssutils import parseString
        from cssutils.css import CSSRule
        sheet = parseString(css)
        rules = list(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
        sel_map = {}
        count = 0
        for r in rules:
            # Check if we have only class selectors for this rule
            nc = [x for x in r.selectorList if not
                    x.selectorText.startswith('.')]
            if len(r.selectorList) > 1 and not nc:
                # Replace all the class selectors with a single class selector
                # This will be added to the class attribute of all elements
                # that have one of these selectors.
                replace_name = 'c_odt%d'%count
                count += 1
                for sel in r.selectorList:
                    s = sel.selectorText[1:]
                    if s not in sel_map:
                        sel_map[s] = []
                    sel_map[s].append(replace_name)
                r.selectorText = '.'+replace_name
        return sheet.cssText, sel_map
    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
@ -32,13 +78,17 @@ class Extract(ODF2XHTML):
        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
-            print 'Extracting ODT file...'
+            log('Extracting ODT file...')
            html = self.odf2xhtml(stream)
            # A blanket img specification like this causes problems
-            # with EPUB output as the contaiing element often has
+            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            try:
                html = self.filter_css(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(html.encode('utf-8'))
            zf = ZipFile(stream, 'r')
@ -67,7 +117,7 @@ class ODTInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
-        return Extract()(stream, '.')
+        return Extract()(stream, '.', log)
    def postprocess_book(self, oeb, opts, log):
        # Fix <p><div> constructs as the asinine epubchecker complains
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -16,7 +16,7 @@ from urllib import unquote as urlunquote
 from lxml import etree, html
 from calibre.constants import filesystem_encoding, __version__
 from calibre.translations.dynamic import translate
-from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
 from calibre.ebooks.conversion.preprocess import CSSPreProcessor
 from calibre import isbytestring, as_unicode, get_types_map
@ -446,22 +446,23 @@ class NullContainer(object):
 class DirContainer(object):
    """Filesystem directory container."""
-    def __init__(self, path, log):
+    def __init__(self, path, log, ignore_opf=False):
        self.log = log
        if isbytestring(path):
            path = path.decode(filesystem_encoding)
        self.opfname = None
        ext = os.path.splitext(path)[1].lower()
        if ext == '.opf':
            self.opfname = os.path.basename(path)
            self.rootdir = os.path.dirname(path)
            return
        self.rootdir = path
-        for path in self.namelist():
+        if not ignore_opf:
-            ext = os.path.splitext(path)[1].lower()
+            for path in self.namelist():
-            if ext == '.opf':
+                ext = os.path.splitext(path)[1].lower()
-                self.opfname = path
+                if ext == '.opf':
-                return
+                    self.opfname = path
-        self.opfname = None
+                    return
    def read(self, path):
        if path is None:
@ -852,6 +853,7 @@ class Manifest(object):
            self.oeb.log.debug('Parsing', self.href, '...')
            # Convert to Unicode and normalize line endings
            data = self.oeb.decode(data)
            data = strip_encoding_declarations(data)
            data = self.oeb.html_preprocessor(data)
            # There could be null bytes in data if it had &#0; entities in it
            data = data.replace('\0', '')
@ -1047,8 +1049,8 @@ class Manifest(object):
            # Remove hyperlinks with no content as they cause rendering
            # artifacts in browser based renderers
-            # Also remove empty <b> and <i> tags
+            # Also remove empty <b>, <u> and <i> tags
-            for a in xpath(data, '//h:a[@href]|//h:i|//h:b'):
+            for a in xpath(data, '//h:a[@href]|//h:i|//h:b|//h:u'):
                if a.get('id', None) is None and a.get('name', None) is None \
                        and len(a) == 0 and not a.text:
                    remove_elem(a)
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -125,7 +125,19 @@ class Stylizer(object):
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css=''):
        self.oeb, self.opts = oeb, opts
-        self.profile = opts.input_profile
+        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -36,7 +36,7 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
        m.clear('description')
        m.add('description', mi.comments)
    elif override_input_metadata:
-         m.clear('description')
+        m.clear('description')
    if not mi.is_null('publisher'):
        m.clear('publisher')
        m.add('publisher', mi.publisher)
--- a/src/calibre/ebooks/pdb/plucker/reader.py
+++ b/src/calibre/ebooks/pdb/plucker/reader.py
@ -16,6 +16,7 @@ from calibre import CurrentDir
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ptempfile import TemporaryFile
 from calibre.utils.magick import Image, create_canvas
 from calibre.ebooks.compression.palmdoc import decompress_doc
 DATATYPE_PHTML = 0
 DATATYPE_PHTML_COMPRESSED = 1
@ -359,7 +360,7 @@ class Reader(FormatReader):
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
-                self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
+                self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
@ -465,7 +466,7 @@ class Reader(FormatReader):
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
-            raise Exception(_('Could not determine home.html'))
+            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@ -32,10 +32,11 @@ class PDFInput(InputFormatPlugin):
    def convert_new(self, stream, accelerators):
        from calibre.ebooks.pdf.reflow import PDFDocument
        from calibre.utils.cleantext import clean_ascii_chars
        if pdfreflow_err:
            raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
        pdfreflow.reflow(stream.read(), 1, -1)
-        xml = open('index.xml', 'rb').read()
+        xml = clean_ascii_chars(open('index.xml', 'rb').read())
        PDFDocument(xml, self.opts, self.log)
        return os.path.join(os.getcwd(), 'metadata.opf')
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@ -15,7 +15,6 @@ import cStringIO
 from lxml import etree
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.filenames import ascii_text
 from calibre.utils.magick.draw import save_cover_data_to, identify_data
 TAGS = {
@ -79,8 +78,7 @@ def txt2rtf(text):
        elif val <= 127:
            buf.write(x)
        else:
-            repl = ascii_text(x)
+            c = r'\u{0:d}?'.format(val)
            c = r'\uc{2}\u{0:d}{1}'.format(val, repl, len(repl))
            buf.write(c)
    return buf.getvalue()
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -34,7 +34,7 @@ if isosx:
        )
    gprefs.defaults['action-layout-toolbar'] = (
        'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
-        'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
+        'Choose Library', 'Donate', None, 'Fetch News', 'Store', 'Save To Disk',
        'Connect Share', None, 'Remove Books',
        )
    gprefs.defaults['action-layout-toolbar-device'] = (
@ -48,7 +48,7 @@ else:
    gprefs.defaults['action-layout-menubar-device'] = ()
    gprefs.defaults['action-layout-toolbar'] = (
        'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
-        'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
+        'Choose Library', 'Donate', None, 'Fetch News', 'Store', 'Save To Disk',
        'Connect Share', None, 'Remove Books', None, 'Help', 'Preferences',
        )
    gprefs.defaults['action-layout-toolbar-device'] = (
@ -739,12 +739,6 @@ def build_forms(srcdir, info=None):
            dat = dat.replace('from QtWebKit.QWebView import QWebView',
                    'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
            if form.endswith('viewer%smain.ui'%os.sep):
                info('\t\tPromoting WebView')
                dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
                dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
                dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
            open(compiled_form, 'wb').write(dat)
 _df = os.environ.get('CALIBRE_DEVELOP_FROM', None)
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -20,9 +20,8 @@ from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.utils.filenames import ascii_filename
 from calibre.constants import preferred_encoding, filesystem_encoding
 from calibre.gui2.actions import InterfaceAction
-from calibre.gui2 import config, question_dialog
+from calibre.gui2 import question_dialog
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import test_eight_code
 from calibre.ebooks.metadata.sources.base import msprefs
 def get_filters():
@ -180,26 +179,17 @@ class AddAction(InterfaceAction):
            except IndexError:
                self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
                self.isbn_add_dialog.accept()
-                if test_eight_code:
+                orig = msprefs['ignore_fields']
-                    orig = msprefs['ignore_fields']
+                new = list(orig)
-                    new = list(orig)
+                for x in ('title', 'authors'):
-                    for x in ('title', 'authors'):
+                    if x in new:
-                        if x in new:
+                        new.remove(x)
-                            new.remove(x)
+                msprefs['ignore_fields'] = new
-                    msprefs['ignore_fields'] = new
+                try:
-                    try:
+                    self.gui.iactions['Edit Metadata'].download_metadata(
-                        self.gui.iactions['Edit Metadata'].download_metadata(
+                        ids=self.add_by_isbn_ids)
-                            ids=self.add_by_isbn_ids)
+                finally:
-                    finally:
+                    msprefs['ignore_fields'] = orig
                        msprefs['ignore_fields'] = orig
                else:
                    orig = config['overwrite_author_title_metadata']
                    config['overwrite_author_title_metadata'] = True
                    try:
                        self.gui.iactions['Edit Metadata'].do_download_metadata(
                                self.add_by_isbn_ids)
                    finally:
                        config['overwrite_author_title_metadata'] = orig
                return
--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -246,7 +246,8 @@ class ChooseLibraryAction(InterfaceAction):
    def delete_requested(self, name, location):
        loc = location.replace('/', os.sep)
        if not question_dialog(self.gui, _('Are you sure?'), '<p>'+
-                _('All files from %s will be '
+                _('<b style="color: red">All files</b> (not just ebooks) '
                    'from <br><br><b>%s</b><br><br> will be '
                '<b>permanently deleted</b>. Are you sure?') % loc,
                show_copy_button=False):
            return
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -10,15 +10,13 @@ from functools import partial
 from PyQt4.Qt import Qt, QMenu, QModelIndex, QTimer
-from calibre.gui2 import error_dialog, config, Dispatcher, question_dialog
+from calibre.gui2 import error_dialog, Dispatcher, question_dialog
 from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
 from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.tag_list_editor import TagListEditor
 from calibre.gui2.actions import InterfaceAction
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.icu import sort_key
 from calibre.utils.config import test_eight_code
 class EditMetadataAction(InterfaceAction):
@ -36,22 +34,8 @@ class EditMetadataAction(InterfaceAction):
        md.addAction(_('Edit metadata in bulk'),
                partial(self.edit_metadata, False, bulk=True))
        md.addSeparator()
-        if test_eight_code:
+        md.addAction(_('Download metadata and covers'), self.download_metadata,
            dall = self.download_metadata
        else:
            dall = partial(self.download_metadata_old, False, covers=True)
            dident = partial(self.download_metadata_old, False, covers=False)
            dcovers = partial(self.download_metadata_old, False, covers=True,
                    set_metadata=False, set_social_metadata=False)
        md.addAction(_('Download metadata and covers'), dall,
                Qt.ControlModifier+Qt.Key_D)
        if not test_eight_code:
            md.addAction(_('Download only metadata'), dident)
            md.addAction(_('Download only covers'), dcovers)
            md.addAction(_('Download only social metadata'),
                partial(self.download_metadata_old, False, covers=False,
                    set_metadata=False, set_social_metadata=True))
        self.metadata_menu = md
        mb = QMenu()
@ -88,7 +72,7 @@ class EditMetadataAction(InterfaceAction):
                            _('No books selected'), show=True)
            db = self.gui.library_view.model().db
            ids = [db.id(row.row()) for row in rows]
-        from calibre.gui2.metadata.bulk_download2 import start_download
+        from calibre.gui2.metadata.bulk_download import start_download
        start_download(self.gui, ids,
                Dispatcher(self.metadata_downloaded))
@ -96,7 +80,7 @@ class EditMetadataAction(InterfaceAction):
        if job.failed:
            self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
            return
-        from calibre.gui2.metadata.bulk_download2 import get_job_details
+        from calibre.gui2.metadata.bulk_download import get_job_details
        id_map, failed_ids, failed_covers, all_failed, det_msg = \
                                            get_job_details(job)
        if all_failed:
@ -112,8 +96,9 @@ class EditMetadataAction(InterfaceAction):
        show_copy_button = False
        if failed_ids or failed_covers:
            show_copy_button = True
            num = len(failed_ids.union(failed_covers))
            msg += '<p>'+_('Could not download metadata and/or covers for %d of the books. Click'
-                    ' "Show details" to see which books.')%len(failed_ids)
+                    ' "Show details" to see which books.')%num
        payload = (id_map, failed_ids, failed_covers)
        from calibre.gui2.dialogs.message_box import ProceedNotification
@ -158,49 +143,6 @@ class EditMetadataAction(InterfaceAction):
        self.apply_metadata_changes(id_map)
    def download_metadata_old(self, checked, covers=True, set_metadata=True,
            set_social_metadata=None):
        rows = self.gui.library_view.selectionModel().selectedRows()
        if not rows or len(rows) == 0:
            d = error_dialog(self.gui, _('Cannot download metadata'),
                             _('No books selected'))
            d.exec_()
            return
        db = self.gui.library_view.model().db
        ids = [db.id(row.row()) for row in rows]
        self.do_download_metadata(ids, covers=covers,
                set_metadata=set_metadata,
                set_social_metadata=set_social_metadata)
    def do_download_metadata(self, ids, covers=True, set_metadata=True,
            set_social_metadata=None):
        m = self.gui.library_view.model()
        db = m.db
        if set_social_metadata is None:
            get_social_metadata = config['get_social_metadata']
        else:
            get_social_metadata = set_social_metadata
        from calibre.gui2.metadata.bulk_download import DoDownload
        if set_social_metadata is not None and set_social_metadata:
            x = _('social metadata')
        else:
            x = _('covers') if covers and not set_metadata else _('metadata')
        title = _('Downloading {0} for {1} book(s)').format(x, len(ids))
        self._download_book_metadata = DoDownload(self.gui, title, db, ids,
                get_covers=covers, set_metadata=set_metadata,
                get_social_metadata=get_social_metadata)
        m.stop_metadata_backup()
        try:
            self._download_book_metadata.exec_()
        finally:
            m.start_metadata_backup()
        cr = self.gui.library_view.currentIndex().row()
        x = self._download_book_metadata
        if x.updated:
            self.gui.library_view.model().refresh_ids(
                x.updated, cr)
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
    # }}}
    def edit_metadata(self, checked, bulk=None):
@ -227,9 +169,7 @@ class EditMetadataAction(InterfaceAction):
                list(range(self.gui.library_view.model().rowCount(QModelIndex())))
            current_row = row_list.index(cr)
-        func = (self.do_edit_metadata if test_eight_code else
+        changed, rows_to_refresh = self.do_edit_metadata(row_list, current_row)
                    self.do_edit_metadata_old)
        changed, rows_to_refresh = func(row_list, current_row)
        m = self.gui.library_view.model()
@ -244,36 +184,6 @@ class EditMetadataAction(InterfaceAction):
            m.current_changed(current, previous)
            self.gui.tags_view.recount()
    def do_edit_metadata_old(self, row_list, current_row):
        changed = set([])
        db = self.gui.library_view.model().db
        while True:
            prev = next_ = None
            if current_row > 0:
                prev = db.title(row_list[current_row-1])
            if current_row < len(row_list) - 1:
                next_ = db.title(row_list[current_row+1])
            d = MetadataSingleDialog(self.gui, row_list[current_row], db,
                    prev=prev, next_=next_)
            d.view_format.connect(lambda
                    fmt:self.gui.iactions['View'].view_format(row_list[current_row],
                        fmt))
            ret = d.exec_()
            d.break_cycles()
            if ret != d.Accepted:
                break
            changed.add(d.id)
            self.gui.library_view.model().refresh_ids(list(d.books_to_refresh))
            if d.row_delta == 0:
                break
            current_row += d.row_delta
            self.gui.library_view.set_current_row(current_row)
            self.gui.library_view.scroll_to_row(current_row)
        return changed, set()
    def do_edit_metadata(self, row_list, current_row):
        from calibre.gui2.metadata.single import edit_metadata
        db = self.gui.library_view.model().db
@ -613,6 +523,7 @@ class EditMetadataAction(InterfaceAction):
                self.applied_ids, cr)
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
            self.gui.tags_view.recount()
        self.apply_id_map = []
        self.apply_pd = None
--- a/src/calibre/gui2/actions/preferences.py
+++ b/src/calibre/gui2/actions/preferences.py
@ -10,7 +10,7 @@ from PyQt4.Qt import QIcon, QMenu, Qt
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2.preferences.main import Preferences
 from calibre.gui2 import error_dialog
-from calibre.constants import DEBUG
+from calibre.constants import DEBUG, isosx
 class PreferencesAction(InterfaceAction):
@ -19,7 +19,8 @@ class PreferencesAction(InterfaceAction):
    def genesis(self):
        pm = QMenu()
-        pm.addAction(QIcon(I('config.png')), _('Preferences'), self.do_config)
+        acname = _('Change calibre behavior') if isosx else _('Preferences')
        pm.addAction(QIcon(I('config.png')), acname, self.do_config)
        pm.addAction(QIcon(I('wizard.png')), _('Run welcome wizard'),
                self.gui.run_wizard)
        if not DEBUG:
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -60,7 +60,7 @@ class ViewAction(InterfaceAction):
    def build_menus(self, db):
        self.view_menu.clear()
-        self.view_menu.addAction(self.qaction)
+        self.view_menu.addAction(self.view_action)
        self.view_menu.addAction(self.view_specific_action)
        self.view_menu.addSeparator()
        self.view_menu.addAction(self.action_pick_random)
--- a/Show More
+++ b/Show More